对某个日期范围内的计数列求和的性能问题

Mar*_*own 4 sql-server aggregate sum

在我们的应用程序中,我们有一个查询,它在 API 端点上将具有“活动”的用户返回给指定的客户端或事件(一个或多个 - 由 ID 指定)。当活动表有 3000 万行时,这个查询大约需要 15 秒才能返回(注意“资产”表中还有约 60 万行和 2700 个“用户”)。

表格的粗略架构可以在我的问题的底部找到。下面是我们查找“活动超过阈值的用户”的查询。为简洁起见,我也在下面放置了视图定义。

当此端点返回一页数据时,还会运行第二个类似的查询以获取填充分页响应的元素总数 - 本质上,端点的性能是查询性能的 2 倍。

我的问题本质上是,我应该应用哪些技术来提高此查询的性能?我们试图坚持的“基准”是端点响应的“亚秒级”。

查询计划可以在这里找到。

SELECT DISTINCT t.type, t.sid, t.name, t.emailAddress, t.jobTitle
FROM sec.Trustee t
    INNER JOIN (
        SELECT data.sid, SUM(data.hoursBilled) as hoursBilled, SUM(data.docsAccessed) as docsAccessed,     data.asset_type as asset_type, data.displayId as displayId, data.displayName as displayName
        FROM (
            SELECT billing.trustee_sid as sid, 0 as hoursBilled, billing.recordedValue as docsAccessed, a.type as asset_type, a.displayId, a.displayName
            FROM sec._DocumentsBilling billing
            INNER JOIN sec.SessionSid s
            ON s.sid = billing.client_sid
            AND s.setID = @P0
            INNER JOIN sec.Asset a
            ON a.sid = billing.client_sid
            AND billing.recordedDate > @P1
            AND billing.client_sid IN (@P2)
        ) data
        GROUP BY data.sid, data.asset_type, data.displayId, data.displayName 
        HAVING SUM(data.docsAccessed) > @P4
) trusteeData ON trusteeData.sid = t.sid
ORDER BY sid
OFFSET @P6 ROWS
FETCH NEXT @P7 ROWS ONLY
Run Code Online (Sandbox Code Playgroud)

最大的性能成本是将'Asset'表加入到这个活动的结果中,产品需求要求我们通过displayId来匹配,而不是sid(这是Asset表的PK。

  • 客户和事项统称为“资产”,鉴别器表是type列。
  • 如果matterId帐单表中的 设置为空字符串,我们将其计为“客户”帐单。
  • 不太可能需要所有“用户匹配器”(例如email/'foreignid'/'domainUser'/'unqualifiedDomainUser') - 我们有能力删除它,只选择给定租户所需的那些(顾客)。
CREATE VIEW sec._DocumentsBilling AS
    SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, client.securityType as clientSecurityType, matter.sid as matter_sid, matter.securityType as matterSecurityType, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
    FROM (
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM sec.userActivityDocuments d
        INNER JOIN sec.Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
    ) data
    INNER JOIN sec.Asset client ON client.displayId = data.clientId
    LEFT JOIN sec.Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
    WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL
GO

CREATE TABLE [tenant].[Asset](
    [type] [nvarchar](100) NOT NULL,
    [sid] [bigint] IDENTITY(1,1) NOT NULL,
    [displayId] [nvarchar](32) NOT NULL,
    [tenant] [bigint] NOT NULL,
    [client_sid] [bigint] NULL,
    [clientGroup_sid] [bigint] NULL,
    [matterGroup_sid] [bigint] NULL,
) 

CREATE TABLE [tenant].[Trustee](
    [type] [nvarchar](32) NOT NULL,
    [sid] [bigint] IDENTITY(1,1) NOT NULL,
    [emailAddress] [nvarchar](255) NULL,
    [name] [nvarchar](255) NULL,
    [jobTitle] [nvarchar](255) NULL,
    [foreignId] [nvarchar](255) NULL,
    [tenant] [bigint] NOT NULL,
    [domainUser] [nvarchar](255) NULL,
    [unqualifiedDomainUser] [nvarchar](255) NULL,
)

CREATE TABLE [tenant].[userActivityDocuments](
    [id] [binary](16) NOT NULL,
    [tenant] [bigint] NOT NULL,
    [userType] [nvarchar](64) NOT NULL,
    [systemName] [nvarchar](200) NOT NULL,
    [clientId] [nvarchar](32) NOT NULL,
    [matterId] [nvarchar](32) NOT NULL,
    [recordedValue] [int] NOT NULL,
    [recordedDate] [date] NOT NULL,
    [recordedDateTime] [datetime2](7) NOT NULL,
    [userString] [nvarchar](255) NOT NULL,
    [collectionTime] [bigint] NOT NULL
)

ALTER TABLE [tenant].[Asset] ADD  CONSTRAINT [PK_Asset] PRIMARY KEY CLUSTERED 
(
    [tenant] ASC,
    [sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[Asset] ADD  CONSTRAINT [UNQ_ASSET_NAME_1] UNIQUE NONCLUSTERED 
(
    [tenant] ASC,
    [displayId] ASC,
    [type] ASC,
    [client_sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[Trustee] ADD  CONSTRAINT [PK_Trustee] PRIMARY KEY CLUSTERED 
(
    [tenant] ASC,
    [sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_1] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [domainUser] ASC
)
INCLUDE([isDeleting]) 
WHERE ([domainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_2] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [foreignId] ASC
)
INCLUDE([isDeleting]) 
WHERE ([foreignId] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO

CREATE NONCLUSTERED INDEX [FIDX_Trustee_3] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [emailAddress] ASC
)
INCLUDE([isDeleting]) 
WHERE ([emailAddress] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [FIDX_Trustee_5] ON [tenant].[Trustee]
(
    [tenant] ASC,
    [unqualifiedDomainUser] ASC
)
INCLUDE([isDeleting]) 
WHERE ([unqualifiedDomainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO

ALTER TABLE [tenant].[userActivityDocuments] ADD  CONSTRAINT [PK_userActivityDocuments] PRIMARY KEY CLUSTERED 
(
    [id] ASC,
    [tenant] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO

CREATE NONCLUSTERED INDEX [IDX_UserActivityDocuments_1] ON [tenant].[userActivityDocuments]
(
    [tenant] ASC,
    [clientId] ASC,
    [matterId] ASC,
    [userType] ASC,
    [recordedDate] ASC
)
INCLUDE([userString],[recordedValue]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY]
GO
Run Code Online (Sandbox Code Playgroud)

Ran*_*gen 5

我不能说访问视图的查询,但您提供的示例可以快速了解视图的性能。

您将在所有连接 ( INNER& LEFT OUTER)的末尾获得一个过滤器运算符,以便能够满足 where 子句:

WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL
Run Code Online (Sandbox Code Playgroud)

在此处输入图片说明

使用 进一步拆分视图UNION会使过滤器运算符在执行计划中更早一些,但还不够 (n) 早。(呵呵)

在此处输入图片说明

拆分OR是提高视图性能的良好开端,但还有其他部分需要改进。

我们可以更改过滤器的第一部分:

 WHERE (matter.sid IS NULL AND data.matterId = '')
Run Code Online (Sandbox Code Playgroud)

通过删除 AND data.matterId = '')

并将其添加到 UNION 子查询的每个部分:

SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
WHERE  matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
WHERE  matterId = ''
Run Code Online (Sandbox Code Playgroud)

但是,由于过滤,这仍然给我们留下了过滤器运算符LEFT JOIN + IS NULL

在此处输入图片说明

 LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND
 matter.displayId = data.matterId
     WHERE (matter.sid IS NULL)
Run Code Online (Sandbox Code Playgroud)

我们可以通过将其更改为NOT EXISTS语句来改进这种类型的过滤。

这给我们留下了:

CREATE VIEW [tenant]._DocumentsBilling  AS
      SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid,NULL  as matter_sid, NULL as matterGroup_sid, data.recordedDate
    FROM (
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        WHERE  matterId = ''
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
        WHERE  matterId = ''
    ) data
    INNER JOIN [tenant].Asset client ON client.displayId = data.clientId

    WHERE NOT EXISTS (SELECT * FROM  [tenant].Asset matter WHERE matter.client_sid = client.sid AND matter.displayId = data.matterId)
    
    UNION
            SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, matter.sid as matter_sid, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
    FROM (
            SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
        UNION ALL
        SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
        FROM [tenant].userActivityDocuments d
        INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
    ) data
    INNER JOIN [tenant].Asset client ON client.displayId = data.clientId
    LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
    WHERE  matter.sid IS NOT NULL;
Run Code Online (Sandbox Code Playgroud)

这应该为您提供更早的过滤和更好的性能。

最终执行计划在这里。