Rub*_*mez 3 sql-server deadlock parallelism
我有一个应用程序必须从表中删除和插入注册表,该表存储访问控制的历史注册表,并与系统中的检查日期时间一起存储。
现在就是这个问题。我有一个线程池,其中每个线程在一段时间内(通常是一个月)工作超过一天。当这个过程开始时,它给了我一些死锁。我做了一个锁跟踪,在死锁图中只说一个进程被阻塞,但没有说哪个句子或事务发生了,在死锁日志中它说Parallel query worker thread was involved in a deadlock
. 如何进行更详细的跟踪或查询以了解是什么导致了我的应用程序和数据库阻塞?
谢谢
编辑
这是我在过去几天中所做的跟踪的 XML:
<deadlock victim="process45f9fb048">
<process-list>
<process id="process45f9fb048" taskpriority="0" logused="0" waitresource="PAGE: 4:1:98644" waittime="5369" ownerId="1282041063" transactionname="DELETE" lasttranstarted="2014-06-27T03:15:01.157" XDES="0xbeb91620" lockMode="U" schedulerid="7" kpid="1984" status="suspended" spid="204" sbid="0" ecid="4" priority="0" trancount="0" lastbatchstarted="2014-06-27T03:15:00.620" lastbatchcompleted="2014-06-27T03:15:00.620" clientapp="SQLAgent - TSQL JobStep (Job 0xF408BE64B781AB40B05180C1B2EB9DC8 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" isolationlevel="read committed (2)" xactid="1282041063" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="process961c988" taskpriority="0" logused="0" waitresource="PAGE: 4:1:98644" waittime="5324" ownerId="1282041063" transactionname="DELETE" lasttranstarted="2014-06-27T03:15:01.157" XDES="0xaece7620" lockMode="U" schedulerid="3" kpid="27920" status="suspended" spid="204" sbid="0" ecid="2" priority="0" trancount="0" lastbatchstarted="2014-06-27T03:15:00.620" lastbatchcompleted="2014-06-27T03:15:00.620" clientapp="SQLAgent - TSQL JobStep (Job 0xF408BE64B781AB40B05180C1B2EB9DC8 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" isolationlevel="read committed (2)" xactid="1282041063" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="processb543dc8" taskpriority="0" logused="0" waitresource="PAGE: 4:1:108775" waittime="5367" ownerId="1282041011" transactionname="DELETE" lasttranstarted="2014-06-27T03:15:01.153" XDES="0x6771b32d0" lockMode="U" schedulerid="4" kpid="27392" status="suspended" spid="197" sbid="0" ecid="2" priority="0" trancount="0" lastbatchstarted="2014-06-27T03:15:00.617" lastbatchcompleted="2014-06-27T03:15:00.617" clientapp="SQLAgent - TSQL JobStep (Job 0xE0A214220ED4114D8DD04385F151A9B2 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" isolationlevel="read committed (2)" xactid="1282041011" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="process9631948" taskpriority="0" logused="0" waitresource="PAGE: 4:1:108775" waittime="5301" ownerId="1282041011" transactionname="DELETE" lasttranstarted="2014-06-27T03:15:01.153" XDES="0x506b8f300" lockMode="U" schedulerid="5" kpid="23904" status="suspended" spid="197" sbid="0" ecid="1" priority="0" trancount="0" lastbatchstarted="2014-06-27T03:15:00.617" lastbatchcompleted="2014-06-27T03:15:00.617" clientapp="SQLAgent - TSQL JobStep (Job 0xE0A214220ED4114D8DD04385F151A9B2 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" isolationlevel="read committed (2)" xactid="1282041011" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="process964fdc8" taskpriority="0" logused="10000" waittime="5745" schedulerid="8" kpid="32756" status="suspended" spid="204" sbid="0" ecid="0" priority="0" trancount="2" lastbatchstarted="2014-06-27T03:15:00.620" lastbatchcompleted="2014-06-27T03:15:00.620" clientapp="SQLAgent - TSQL JobStep (Job 0xF408BE64B781AB40B05180C1B2EB9DC8 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" loginname="SALUDTOTAL\bo_agent" isolationlevel="read committed (2)" xactid="1282041063" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
EXEC [dbo].[sp_syscollector_purge_collection_logs]
</inputbuf>
</process>
<process id="process5fc0b0508" taskpriority="0" logused="10000" waittime="2896" schedulerid="2" kpid="8248" status="suspended" spid="204" sbid="0" ecid="5" priority="0" trancount="0" lastbatchstarted="2014-06-27T03:15:00.620" lastbatchcompleted="2014-06-27T03:15:00.620" clientapp="SQLAgent - TSQL JobStep (Job 0xF408BE64B781AB40B05180C1B2EB9DC8 : Step 1)" hostname="REPORTESCRYSTAL" hostpid="28848" isolationlevel="read committed (2)" xactid="1282041063" currentdb="4" lockTimeout="4294967295" clientoption1="673185824" clientoption2="128056">
<executionStack>
<frame procname="msdb.dbo.sp_syscollector_purge_collection_logs" line="31" stmtstart="2152" stmtend="2720" sqlhandle="0x03000400af551732fc7ab30044a000000100000000000000">
DELETE FROM dbo.sysssislog
FROM dbo.sysssislog AS s
INNER JOIN dbo.syscollector_execution_log_internal AS l ON (l.package_execution_id = s.executionid)
INNER JOIN #purged_log_ids AS i ON i.log_id = l.log_id
-- Then delete the actual logs </frame>
<frame procname="adhoc" line="2" stmtstart="28" sqlhandle="0x0100040034b4980c80c0a260030000000000000000000000">
EXEC [dbo].[sp_syscollector_purge_collection_logs] </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
</process-list>
<resource-list>
<pagelock fileid="1" pageid="98644" dbid="4" objectname="msdb.dbo.sysssislog" id="lock544471d00" mode="U" associatedObjectId="72057594047037440">
<owner-list>
<owner id="process9631948" mode="U"/>
</owner-list>
<waiter-list>
<waiter id="process45f9fb048" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<pagelock fileid="1" pageid="98644" dbid="4" objectname="msdb.dbo.sysssislog" id="lock544471d00" mode="U" associatedObjectId="72057594047037440">
<owner-list/>
<waiter-list>
<waiter id="process961c988" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<pagelock fileid="1" pageid="108775" dbid="4" objectname="msdb.dbo.sysssislog" id="lock6a3143280" mode="U" associatedObjectId="72057594047037440">
<owner-list>
<owner id="process964fdc8" mode="U"/>
</owner-list>
<waiter-list>
<waiter id="processb543dc8" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<pagelock fileid="1" pageid="108775" dbid="4" objectname="msdb.dbo.sysssislog" id="lock6a3143280" mode="U" associatedObjectId="72057594047037440">
<owner-list/>
<waiter-list>
<waiter id="process9631948" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<exchangeEvent id="Port80185c00" WaitType="e_waitPortOpen" nodeId="2">
<owner-list>
<owner id="process5fc0b0508"/>
</owner-list>
<waiter-list>
<waiter id="process964fdc8"/>
</waiter-list>
</exchangeEvent>
<exchangeEvent id="Pipe1937d8680" WaitType="e_waitPipeGetRow" nodeId="4">
<owner-list>
<owner id="process45f9fb048"/>
<owner id="process961c988"/>
</owner-list>
<waiter-list>
<waiter id="process5fc0b0508"/>
</waiter-list>
</exchangeEvent>
</resource-list>
</deadlock>
</deadlock-list>
Run Code Online (Sandbox Code Playgroud)
编辑 2
我已经在另一个 SQL 实例中复制了死锁,这是图表
<deadlock victim="process9ada4508">
<process-list>
<process id="process9ada4508" taskpriority="0" logused="0" waitresource="PAGE: 7:1:595136" waittime="2411" ownerId="391631459" transactionname="DELETE" lasttranstarted="2014-07-08T10:51:37.907" XDES="0x9a5c9b00" lockMode="U" schedulerid="2" kpid="7308" status="suspended" spid="93" sbid="0" ecid="2" priority="0" trancount="0" lastbatchstarted="2014-07-08T10:51:37.907" lastbatchcompleted="2014-07-08T10:51:37.907" clientapp=".Net SqlClient Data Provider" hostname="CENTROCONTROL" hostpid="9016" isolationlevel="read committed (2)" xactid="391631459" currentdb="7" lockTimeout="4294967295" clientoption1="671088672" clientoption2="128056">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="54" sqlhandle="0x020000000826022c8d837edd59a697e6b8f2b323fc72898d">
DELETE FROM TBL_ITAS_USER_REGISTRY WHERE (URE_DATE_IN >= @p0 AND URE_DATE_IN < @p1) OR (URE_DATE_OUT >= @p0 AND URE_DATE_OUT < @p1) </frame>
<frame procname="unknown" line="1" sqlhandle="0x000000000000000000000000000000000000000000000000">
unknown </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="process43fdc8" taskpriority="0" logused="0" waitresource="PAGE: 7:1:595136" waittime="5046" ownerId="391632197" transactionname="DELETE" lasttranstarted="2014-07-08T10:51:39.943" XDES="0x2388fc080" lockMode="U" schedulerid="1" kpid="7944" status="suspended" spid="84" sbid="0" ecid="3" priority="0" trancount="0" lastbatchstarted="2014-07-08T10:51:39.943" lastbatchcompleted="2014-07-08T10:51:39.943" clientapp=".Net SqlClient Data Provider" hostname="CENTROCONTROL" hostpid="9016" isolationlevel="read committed (2)" xactid="391632197" currentdb="7" lockTimeout="4294967295" clientoption1="671088672" clientoption2="128056">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="54" sqlhandle="0x020000000826022c8d837edd59a697e6b8f2b323fc72898d">
DELETE FROM TBL_ITAS_USER_REGISTRY WHERE (URE_DATE_IN >= @p0 AND URE_DATE_IN < @p1) OR (URE_DATE_OUT >= @p0 AND URE_DATE_OUT < @p1) </frame>
<frame procname="unknown" line="1" sqlhandle="0x000000000000000000000000000000000000000000000000">
unknown </frame>
</executionStack>
<inputbuf>
</inputbuf>
</process>
<process id="process9ada5288" taskpriority="0" logused="10000" waittime="1946" schedulerid="2" kpid="4680" status="suspended" spid="93" sbid="0" ecid="0" priority="0" trancount="2" lastbatchstarted="2014-07-08T10:51:37.907" lastbatchcompleted="2014-07-08T10:51:37.907" clientapp=".Net SqlClient Data Provider" hostname="CENTROCONTROL" hostpid="9016" loginname="Robotec" isolationlevel="read committed (2)" xactid="391631459" currentdb="7" lockTimeout="4294967295" clientoption1="671088672" clientoption2="128056">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="54" sqlhandle="0x020000000826022c8d837edd59a697e6b8f2b323fc72898d">
DELETE FROM TBL_ITAS_USER_REGISTRY WHERE (URE_DATE_IN >= @p0 AND URE_DATE_IN < @p1) OR (URE_DATE_OUT >= @p0 AND URE_DATE_OUT < @p1) </frame>
<frame procname="unknown" line="1" sqlhandle="0x000000000000000000000000000000000000000000000000">
unknown </frame>
</executionStack>
<inputbuf>
(@p0 datetime,@p1 datetime)DELETE FROM TBL_ITAS_USER_REGISTRY WHERE (URE_DATE_IN >= @p0 AND URE_DATE_IN < @p1) OR (URE_DATE_OUT >= @p0 AND URE_DATE_OUT < @p1) </inputbuf>
</process>
</process-list>
<resource-list>
<pagelock fileid="1" pageid="595136" dbid="7" objectname="MFEnterprise_v2.dbo.TBL_ITAS_USER_REGISTRY" id="lock800f1200" mode="U" associatedObjectId="72057594063224832">
<owner-list/>
<waiter-list>
<waiter id="process9ada4508" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<pagelock fileid="1" pageid="595136" dbid="7" objectname="MFEnterprise_v2.dbo.TBL_ITAS_USER_REGISTRY" id="lock800f1200" mode="U" associatedObjectId="72057594063224832">
<owner-list>
<owner id="process9ada5288" mode="U"/>
</owner-list>
<waiter-list>
<waiter id="process43fdc8" mode="U" requestType="wait"/>
</waiter-list>
</pagelock>
<exchangeEvent id="Pipe188f61be0" WaitType="e_waitPipeGetRow" nodeId="2">
<owner-list>
<owner id="process9ada4508"/>
</owner-list>
<waiter-list>
<waiter id="process9ada5288"/>
</waiter-list>
</exchangeEvent>
</resource-list>
</deadlock>
</deadlock-list>
Run Code Online (Sandbox Code Playgroud)
根据我在SQL Sentry Plan Explorer PRO * 中看到的内容,看起来两个不同的工作正在争夺谁将删除某些行(点击放大):
也许有一些重叠,因为微软确实在一些不那么最佳的东西msdb.dbo.sp_syscollector_purge_collection_logs
,比如让程序抢在每次调用TOP (@delete_batch_size)
行没有ORDER BY
或排除技术,这意味着两个人调用存储过程肯定最终可能试图在删除行同一页,甚至可能是同一行。
所以最简单的解决方案:
不要让两个作业运行相同的清理程序。
如果您需要同时运行这两个作业(请再次解释为什么?),我的第一个想法是MAXDOP
将该存储过程中的删除查询设置为 1(是的,您可以修改它,只需将其保留在源代码管理中,因为您的更改可以通过服务包、升级等撤消)。
DELETE ... OPTION (MAXDOP 1);
Run Code Online (Sandbox Code Playgroud)
我不确定这就是原因。通过在工作上下文之外手动调用过程来查看实际计划会很有趣。
如果您不想修改存储过程,如果您运行的是企业版,则至少还有另一个选项:资源调控器。只需让作业执行的登录属于 max_dop 设置为 1 的工作负载组。我在我的白皮书Using the Resource Governor 中有这样做的说明(在维护作业的上下文中构建,但概念相同)。
* 免责声明:我为 SQL Sentry 工作。