SQL 查询从 1 秒减慢到 11 分钟 - 为什么?

Qua*_*ary 7 performance sql-server-2008 sql-server

问题:我正在将以下查询(按外键依赖项列出表)移植到 PostGreSql。

WITH Fkeys AS (

    SELECT DISTINCT 
         OnTable       = OnTable.name
        ,AgainstTable  = AgainstTable.name 
    FROM sysforeignkeys fk 

        INNER JOIN sysobjects onTable 
            ON fk.fkeyid = onTable.id 

        INNER JOIN sysobjects againstTable  
            ON fk.rkeyid = againstTable.id 

    WHERE 1=1
        AND AgainstTable.TYPE = 'U'
        AND OnTable.TYPE = 'U'
        -- ignore self joins; they cause an infinite recursion
        AND OnTable.Name <> AgainstTable.Name
    )

,MyData AS (

    SELECT 
         OnTable = o.name 
        ,AgainstTable = FKeys.againstTable 
    FROM sys.objects o 

    LEFT JOIN FKeys
        ON o.name = FKeys.onTable 

    WHERE (1=1) 
        AND o.type = 'U' 
        AND o.name NOT LIKE 'sys%' 
    )

,MyRecursion AS (

    -- base case
    SELECT  
         TableName    = OnTable
        ,Lvl        = 1
    FROM MyData
    WHERE 1=1
        AND AgainstTable IS NULL 

    -- recursive case
    UNION ALL 

    SELECT 
         TableName = OnTable 
        ,Lvl       = r.Lvl + 1 
    FROM MyData d 
        INNER JOIN MyRecursion r 
            ON d.AgainstTable = r.TableName 
)
SELECT 
     Lvl = MAX(Lvl)
    ,TableName
    --,strSql = 'delete from [' + tablename + ']'
FROM 
    MyRecursion
GROUP BY
    TableName

ORDER BY lvl

/*
ORDER BY 

     2 ASC
    ,1 ASC

*/
Run Code Online (Sandbox Code Playgroud)

使用 information_schema,查询如下所示:

WITH Fkeys AS 
(
    SELECT DISTINCT 
         KCU1.TABLE_NAME AS OnTable 
        ,KCU2.TABLE_NAME AS AgainstTable 
    FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
        ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
        AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
        AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
        ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
        AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
        AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
        AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

    WHERE (1=1)
    AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 
)

,MyData AS 
( 
    SELECT 
         TABLE_NAME AS OnTable  
        ,FKeys.againstTable AS AgainstTable
    FROM INFORMATION_SCHEMA.TABLES 

    LEFT JOIN FKeys
        ON TABLE_NAME = FKeys.onTable  

    WHERE (1=1) 
        AND TABLE_TYPE = 'BASE TABLE'
        AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)

,MyRecursion AS 
(
    -- base case
    SELECT  
         OnTable AS TableName 
        ,1 AS Lvl 
    FROM MyData
    WHERE 1=1
    AND AgainstTable IS NULL 

    -- recursive case
    UNION ALL 

    SELECT 
         OnTable AS TableName
        ,r.Lvl + 1 AS Lvl 
    FROM MyData d 

    INNER JOIN MyRecursion r 
        ON d.AgainstTable = r.TableName 
)

SELECT 
     MAX(Lvl) AS Lvl 
    ,TableName
    --,strSql = 'delete from [' + tablename + ']'
FROM 
    MyRecursion
GROUP BY
    TableName

ORDER BY lvl

/*
ORDER BY 

     2 ASC
    ,1 ASC

*/
Run Code Online (Sandbox Code Playgroud)

我现在的问题是:

在 SQL Server 中(在 2008 R2 上测试):为什么我替换时查询会从 1 秒跳到 11 分钟

SELECT DISTINCT 
     OnTable       = OnTable.name
    ,AgainstTable  = AgainstTable.name 
FROM sysforeignkeys fk 

    INNER JOIN sysobjects onTable 
        ON fk.fkeyid = onTable.id 

    INNER JOIN sysobjects againstTable  
        ON fk.rkeyid = againstTable.id 

WHERE 1=1
    AND AgainstTable.TYPE = 'U'
    AND OnTable.TYPE = 'U'
    -- ignore self joins; they cause an infinite recursion
    AND OnTable.Name <> AgainstTable.Name
Run Code Online (Sandbox Code Playgroud)

SELECT DISTINCT 
     KCU1.TABLE_NAME AS OnTable 
    ,KCU2.TABLE_NAME AS AgainstTable 
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
    ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
    AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
    AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
    ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
    AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
    AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
    AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

WHERE (1=1)
AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 
Run Code Online (Sandbox Code Playgroud)

???

据我所知,单独运行部分查询时确实没有显着的速度差异。结果集也完全相同(我检查了 Excel 中的每一行),但顺序不同。

低于工作 PostGreSQL 版本(在 35 毫秒内完成完全相同的数据库内容 [75 个表]...)
--没有任何保证--

WITH RECURSIVE Fkeys AS 
(
    SELECT DISTINCT 
         KCU1.TABLE_NAME AS OnTable 
        ,KCU2.TABLE_NAME AS AgainstTable 
    FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
        ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
        AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
        AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
        ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
        AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
        AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
        AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 
)

,MyData AS 
( 
    SELECT 
         TABLE_NAME AS OnTable  
        ,FKeys.againstTable AS AgainstTable
    FROM INFORMATION_SCHEMA.TABLES 

    LEFT JOIN FKeys
        ON TABLE_NAME = FKeys.onTable  

    WHERE (1=1) 
        AND TABLE_TYPE = 'BASE TABLE'
        AND TABLE_SCHEMA = 'public'
        --AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)


,MyRecursion AS 
(
    -- base case
    SELECT  
         OnTable AS TableName 
        ,1 AS Lvl 
    FROM MyData
    WHERE 1=1
    AND AgainstTable IS NULL 

    -- recursive case
    UNION ALL 

    SELECT 
         OnTable AS TableName
        ,r.Lvl + 1 AS Lvl 
    FROM MyData d 

    INNER JOIN MyRecursion r 
        ON d.AgainstTable = r.TableName 
)

SELECT 
     MAX(Lvl) AS Lvl 
    ,TableName
    --,strSql = 'delete from [' + tablename + ']'
FROM 
    MyRecursion
GROUP BY
    TableName

ORDER BY lvl


/*
ORDER BY 

     2 ASC
    ,1 ASC

*/
Run Code Online (Sandbox Code Playgroud)

似乎也是

AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME
Run Code Online (Sandbox Code Playgroud)

使用 information_schema 时是多余的,所以它实际上应该更快。

Mar*_*ith 12

我可能会放弃INFORMATION_SCHEMA这里的视图并使用新sys.视图(而不是向后兼容的视图),或者至少首先将结果物化JOIN到索引表中。

递归 CTE 总是在 SQL Server 中获得相同的基本计划,其中每一行都被添加到堆栈假脱机并逐一处理。这意味着两者之间的连接REFERENTIAL_CONSTRAINTS RC, KEY_COLUMN_USAGE KCU1, KEY_COLUMN_USAGE KCU2发生的次数与以下查询的结果一样多SELECT COUNT(*) FROM MyRecursion

我假设在您的情况下(从 11 分钟的执行时间开始)可能是数千次,因此您需要递归部分尽可能高效。您的查询将执行以下类型的事情数千次。

   SELECT  
           KCU1.TABLE_CATALOG,
           KCU1.TABLE_SCHEMA,
           KCU1.TABLE_NAME
    FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 
    INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
        ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
        AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
        AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 
    INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
        ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
        AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
        AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
        AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 
    WHERE KCU2.TABLE_NAME = 'FOO' 
Run Code Online (Sandbox Code Playgroud)

(旁注:如果不同模式中的相同表名,您的查询的两个版本都将返回不正确的结果)

正如你所看到的,这个计划非常可怕。

计划

将此与您的sys查询计划进行比较,后者稍微简单一些。

SELECT OnTable = OnTable.name, 
       AgainstTable = AgainstTable.name 
FROM   sysforeignkeys fk 
       INNER JOIN sysobjects OnTable 
         ON fk.fkeyid = OnTable.id 
       INNER JOIN sysobjects AgainstTable 
         ON fk.rkeyid = AgainstTable.id 
WHERE  AgainstTable.name = 'FOO' 
Run Code Online (Sandbox Code Playgroud)

方案二

您可以#temp通过更改MyDatato的定义来鼓励中间物化,而无需显式创建表

MyData AS 
( 
    SELECT TOP 99.999999 PERCENT
         TABLE_NAME AS OnTable  
        ,Fkeys.AgainstTable AS AgainstTable
    FROM INFORMATION_SCHEMA.TABLES 

    LEFT JOIN Fkeys
        ON TABLE_NAME = Fkeys.OnTable  

    WHERE (1=1) 
        AND TABLE_TYPE = 'BASE TABLE'
        AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
        ORDER BY TABLE_NAME
)
Run Code Online (Sandbox Code Playgroud)

Adventureworks2008在我的机器上进行测试,这使运行时间从大约 10 秒降低到 250 毫秒(在第一次运行之后,因为计划需要 2 秒来编译)。它向计划中添加了一个 Eager spool,在第一个递归调用中实现 Join 的结果,然后在后续调用中重放它。但是,不能保证此行为,您可能希望对 Connect 项请求进行投票提供提示以强制 CTE 或派生表的中间物化

我会觉得更安全地创建如下#temp表,而不是依赖于这种行为。

CREATE TABLE #MyData
(
OnTable SYSNAME,
AgainstTable NVARCHAR(128) NULL,
UNIQUE CLUSTERED (AgainstTable, OnTable)
);

WITH Fkeys AS 
(
    SELECT DISTINCT 
         KCU1.TABLE_NAME AS OnTable 
        ,KCU2.TABLE_NAME AS AgainstTable 
    FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 
        ON KCU1.CONSTRAINT_CATALOG = RC.CONSTRAINT_CATALOG  
        AND KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA 
        AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME 

    LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 
        ON KCU2.CONSTRAINT_CATALOG =  RC.UNIQUE_CONSTRAINT_CATALOG  
        AND KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA 
        AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME 
        AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION 

    WHERE (1=1)
    AND KCU1.TABLE_NAME <> KCU2.TABLE_NAME 
)

,MyData AS 
( 
    SELECT 
         TABLE_NAME AS OnTable  
        ,Fkeys.AgainstTable AS AgainstTable
    FROM INFORMATION_SCHEMA.TABLES 

    LEFT JOIN Fkeys
        ON TABLE_NAME = Fkeys.OnTable  

    WHERE (1=1) 
        AND TABLE_TYPE = 'BASE TABLE'
        AND TABLE_NAME NOT IN ('sysdiagrams', 'dtproperties') 
)
INSERT INTO #MyData
SELECT *
FROM MyData;


WITH MyRecursion AS 
(
    -- base case
    SELECT  
         OnTable AS TableName 
        ,1 AS Lvl 
    FROM #MyData
    WHERE 1=1
    AND AgainstTable IS NULL 

    -- recursive case
    UNION ALL 

    SELECT 
         OnTable AS TableName
        ,r.Lvl + 1 AS Lvl 
    FROM #MyData d 

    INNER JOIN MyRecursion r 
        ON d.AgainstTable = r.TableName 
)

SELECT 
     MAX(Lvl) AS Lvl 
    ,TableName
    --,strSql = 'delete from [' + tablename + ']'
FROM 
    MyRecursion
GROUP BY
    TableName

ORDER BY Lvl

DROP TABLE #MyData
Run Code Online (Sandbox Code Playgroud)

或者