Qua*_*ary 7 performance sql-server-2008 sql-server
问题:我正在将以下查询(按外键依赖项列出表)移植到 PostGreSql。
WITH Fkeys AS (
SELECT DISTINCT
OnTable = OnTable.name
,AgainstTable = AgainstTable.name
FROM sysforeignkeys fk
INNER JOIN sysobjects onTable
ON fk.fkeyid = onTable.id
INNER JOIN sysobjects againstTable
ON fk.rkeyid = againstTable.id
WHERE 1=1
AND AgainstTable.TYPE = 'U'
AND OnTable.TYPE = 'U'
-- ignore self joins; they cause an infinite recursion
AND OnTable.Name <> AgainstTable.Name
)
,MyData AS (
SELECT
OnTable = o.name
,AgainstTable = FKeys.againstTable
FROM sys.objects o
LEFT JOIN FKeys
ON o.name = FKeys.onTable
WHERE (1=1)
AND o.type = 'U'
AND o.name NOT LIKE 'sys%'
)
,MyRecursion AS (
-- base case
SELECT
TableName = OnTable
,Lvl = 1
FROM MyData
WHERE 1=1
AND AgainstTable IS NULL
-- recursive case
UNION ALL
SELECT
TableName = OnTable
,Lvl = r.Lvl + 1
FROM MyData d
INNER JOIN MyRecursion r
ON d.AgainstTable = r.TableName
)
SELECT
Lvl = MAX(Lvl)
,TableName
--,strSql = 'delete from [' + tablename + ']'
FROM
MyRecursion
GROUP BY
TableName
ORDER BY lvl
/*
ORDER BY
2 ASC
,1 ASC
*/
Run Code Online (Sandbox Code Playgroud)
使用 information_schema,查询如下所示:
WITH Fkeys AS
(
SELECT DISTINCT
KCU1.TABLE_NAME AS OnTable
,KCU2.TABLE_NAME AS AgainstTable
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1
ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG
AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA
AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2
ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG
AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA
AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME
AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
WHERE (1=1)
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME
)
,MyData AS
(
SELECT
TABLE_NAME AS OnTable
,FKeys.againstTable AS AgainstTable
FROM INFORMATION_SCHEMA.TABLES
LEFT JOIN FKeys
ON TABLE_NAME = FKeys.onTable
WHERE (1=1)
AND TABLE_TYPE = 'BASE TABLE'
AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties')
)
,MyRecursion AS
(
-- base case
SELECT
OnTable AS TableName
,1 AS Lvl
FROM MyData
WHERE 1=1
AND AgainstTable IS NULL
-- recursive case
UNION ALL
SELECT
OnTable AS TableName
,r.Lvl + 1 AS Lvl
FROM MyData d
INNER JOIN MyRecursion r
ON d.AgainstTable = r.TableName
)
SELECT
MAX(Lvl) AS Lvl
,TableName
--,strSql = 'delete from [' + tablename + ']'
FROM
MyRecursion
GROUP BY
TableName
ORDER BY lvl
/*
ORDER BY
2 ASC
,1 ASC
*/
Run Code Online (Sandbox Code Playgroud)
我现在的问题是:
在 SQL Server 中(在 2008 R2 上测试):为什么我替换时查询会从 1 秒跳到 11 分钟
SELECT DISTINCT
OnTable = OnTable.name
,AgainstTable = AgainstTable.name
FROM sysforeignkeys fk
INNER JOIN sysobjects onTable
ON fk.fkeyid = onTable.id
INNER JOIN sysobjects againstTable
ON fk.rkeyid = againstTable.id
WHERE 1=1
AND AgainstTable.TYPE = 'U'
AND OnTable.TYPE = 'U'
-- ignore self joins; they cause an infinite recursion
AND OnTable.Name <> AgainstTable.Name
Run Code Online (Sandbox Code Playgroud)
和
SELECT DISTINCT
KCU1.TABLE_NAME AS OnTable
,KCU2.TABLE_NAME AS AgainstTable
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1
ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG
AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA
AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2
ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG
AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA
AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME
AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
WHERE (1=1)
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME
Run Code Online (Sandbox Code Playgroud)
???
据我所知,单独运行部分查询时确实没有显着的速度差异。结果集也完全相同(我检查了 Excel 中的每一行),但顺序不同。
低于工作 PostGreSQL 版本(在 35 毫秒内完成完全相同的数据库内容 [75 个表]...)
--没有任何保证--
WITH RECURSIVE Fkeys AS
(
SELECT DISTINCT
KCU1.TABLE_NAME AS OnTable
,KCU2.TABLE_NAME AS AgainstTable
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1
ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG
AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA
AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2
ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG
AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA
AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME
AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
)
,MyData AS
(
SELECT
TABLE_NAME AS OnTable
,FKeys.againstTable AS AgainstTable
FROM INFORMATION_SCHEMA.TABLES
LEFT JOIN FKeys
ON TABLE_NAME = FKeys.onTable
WHERE (1=1)
AND TABLE_TYPE = 'BASE TABLE'
AND TABLE_SCHEMA = 'public'
--AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties')
)
,MyRecursion AS
(
-- base case
SELECT
OnTable AS TableName
,1 AS Lvl
FROM MyData
WHERE 1=1
AND AgainstTable IS NULL
-- recursive case
UNION ALL
SELECT
OnTable AS TableName
,r.Lvl + 1 AS Lvl
FROM MyData d
INNER JOIN MyRecursion r
ON d.AgainstTable = r.TableName
)
SELECT
MAX(Lvl) AS Lvl
,TableName
--,strSql = 'delete from [' + tablename + ']'
FROM
MyRecursion
GROUP BY
TableName
ORDER BY lvl
/*
ORDER BY
2 ASC
,1 ASC
*/
Run Code Online (Sandbox Code Playgroud)
似乎也是
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME
Run Code Online (Sandbox Code Playgroud)
使用 information_schema 时是多余的,所以它实际上应该更快。
Mar*_*ith 12
我可能会放弃INFORMATION_SCHEMA这里的视图并使用新sys.视图(而不是向后兼容的视图),或者至少首先将结果物化JOIN到索引表中。
递归 CTE 总是在 SQL Server 中获得相同的基本计划,其中每一行都被添加到堆栈假脱机并逐一处理。这意味着两者之间的连接REFERENTIAL_CONSTRAINTS RC, KEY_COLUMN_USAGE KCU1, KEY_COLUMN_USAGE KCU2发生的次数与以下查询的结果一样多SELECT COUNT(*) FROM MyRecursion。
我假设在您的情况下(从 11 分钟的执行时间开始)可能是数千次,因此您需要递归部分尽可能高效。您的查询将执行以下类型的事情数千次。
SELECT
KCU1.TABLE_CATALOG,
KCU1.TABLE_SCHEMA,
KCU1.TABLE_NAME
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1
ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG
AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA
AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2
ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG
AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA
AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME
AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
WHERE KCU2.TABLE_NAME = 'FOO'
Run Code Online (Sandbox Code Playgroud)
(旁注:如果不同模式中的相同表名,您的查询的两个版本都将返回不正确的结果)
正如你所看到的,这个计划非常可怕。

将此与您的sys查询计划进行比较,后者稍微简单一些。
SELECT OnTable = OnTable.name,
AgainstTable = AgainstTable.name
FROM sysforeignkeys fk
INNER JOIN sysobjects OnTable
ON fk.fkeyid = OnTable.id
INNER JOIN sysobjects AgainstTable
ON fk.rkeyid = AgainstTable.id
WHERE AgainstTable.name = 'FOO'
Run Code Online (Sandbox Code Playgroud)

您可以#temp通过更改MyDatato的定义来鼓励中间物化,而无需显式创建表
MyData AS
(
SELECT TOP 99.999999 PERCENT
TABLE_NAME AS OnTable
,Fkeys.AgainstTable AS AgainstTable
FROM INFORMATION_SCHEMA.TABLES
LEFT JOIN Fkeys
ON TABLE_NAME = Fkeys.OnTable
WHERE (1=1)
AND TABLE_TYPE = 'BASE TABLE'
AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties')
ORDER BY TABLE_NAME
)
Run Code Online (Sandbox Code Playgroud)
Adventureworks2008在我的机器上进行测试,这使运行时间从大约 10 秒降低到 250 毫秒(在第一次运行之后,因为计划需要 2 秒来编译)。它向计划中添加了一个 Eager spool,在第一个递归调用中实现 Join 的结果,然后在后续调用中重放它。但是,不能保证此行为,您可能希望对 Connect 项请求进行投票提供提示以强制 CTE 或派生表的中间物化
我会觉得更安全地创建如下#temp表,而不是依赖于这种行为。
CREATE TABLE #MyData
(
OnTable SYSNAME,
AgainstTable NVARCHAR(128) NULL,
UNIQUE CLUSTERED (AgainstTable, OnTable)
);
WITH Fkeys AS
(
SELECT DISTINCT
KCU1.TABLE_NAME AS OnTable
,KCU2.TABLE_NAME AS AgainstTable
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1
ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG
AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA
AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2
ON KCU2.CONSTRAINT_CATALOG = RC.UNIQUE_CONSTRAINT_CATALOG
AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA
AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME
AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
WHERE (1=1)
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME
)
,MyData AS
(
SELECT
TABLE_NAME AS OnTable
,Fkeys.AgainstTable AS AgainstTable
FROM INFORMATION_SCHEMA.TABLES
LEFT JOIN Fkeys
ON TABLE_NAME = Fkeys.OnTable
WHERE (1=1)
AND TABLE_TYPE = 'BASE TABLE'
AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties')
)
INSERT INTO #MyData
SELECT *
FROM MyData;
WITH MyRecursion AS
(
-- base case
SELECT
OnTable AS TableName
,1 AS Lvl
FROM #MyData
WHERE 1=1
AND AgainstTable IS NULL
-- recursive case
UNION ALL
SELECT
OnTable AS TableName
,r.Lvl + 1 AS Lvl
FROM #MyData d
INNER JOIN MyRecursion r
ON d.AgainstTable = r.TableName
)
SELECT
MAX(Lvl) AS Lvl
,TableName
--,strSql = 'delete from [' + tablename + ']'
FROM
MyRecursion
GROUP BY
TableName
ORDER BY Lvl
DROP TABLE #MyData
Run Code Online (Sandbox Code Playgroud)
或者