Mar*_*own 4 sql-server aggregate sum
在我们的应用程序中,我们有一个查询,它在 API 端点上将具有“活动”的用户返回给指定的客户端或事件(一个或多个 - 由 ID 指定)。当活动表有 3000 万行时,这个查询大约需要 15 秒才能返回(注意“资产”表中还有约 60 万行和 2700 个“用户”)。
表格的粗略架构可以在我的问题的底部找到。下面是我们查找“活动超过阈值的用户”的查询。为简洁起见,我也在下面放置了视图定义。
当此端点返回一页数据时,还会运行第二个类似的查询以获取填充分页响应的元素总数 - 本质上,端点的性能是查询性能的 2 倍。
我的问题本质上是,我应该应用哪些技术来提高此查询的性能?我们试图坚持的“基准”是端点响应的“亚秒级”。
查询计划可以在这里找到。
SELECT DISTINCT t.type, t.sid, t.name, t.emailAddress, t.jobTitle
FROM sec.Trustee t
INNER JOIN (
SELECT data.sid, SUM(data.hoursBilled) as hoursBilled, SUM(data.docsAccessed) as docsAccessed, data.asset_type as asset_type, data.displayId as displayId, data.displayName as displayName
FROM (
SELECT billing.trustee_sid as sid, 0 as hoursBilled, billing.recordedValue as docsAccessed, a.type as asset_type, a.displayId, a.displayName
FROM sec._DocumentsBilling billing
INNER JOIN sec.SessionSid s
ON s.sid = billing.client_sid
AND s.setID = @P0
INNER JOIN sec.Asset a
ON a.sid = billing.client_sid
AND billing.recordedDate > @P1
AND billing.client_sid IN (@P2)
) data
GROUP BY data.sid, data.asset_type, data.displayId, data.displayName
HAVING SUM(data.docsAccessed) > @P4
) trusteeData ON trusteeData.sid = t.sid
ORDER BY sid
OFFSET @P6 ROWS
FETCH NEXT @P7 ROWS ONLY
Run Code Online (Sandbox Code Playgroud)
最大的性能成本是将'Asset'表加入到这个活动的结果中,产品需求要求我们通过displayId来匹配,而不是sid(这是Asset表的PK。
type
列。matterId
帐单表中的 设置为空字符串,我们将其计为“客户”帐单。email
/'foreignid'/'domainUser'/'unqualifiedDomainUser') - 我们有能力删除它,只选择给定租户所需的那些(顾客)。CREATE VIEW sec._DocumentsBilling AS
SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, client.securityType as clientSecurityType, matter.sid as matter_sid, matter.securityType as matterSecurityType, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
FROM (
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM sec.userActivityDocuments d
INNER JOIN sec.Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM sec.userActivityDocuments d
INNER JOIN sec.Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM sec.userActivityDocuments d
INNER JOIN sec.Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM sec.userActivityDocuments d
INNER JOIN sec.Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
) data
INNER JOIN sec.Asset client ON client.displayId = data.clientId
LEFT JOIN sec.Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL
GO
CREATE TABLE [tenant].[Asset](
[type] [nvarchar](100) NOT NULL,
[sid] [bigint] IDENTITY(1,1) NOT NULL,
[displayId] [nvarchar](32) NOT NULL,
[tenant] [bigint] NOT NULL,
[client_sid] [bigint] NULL,
[clientGroup_sid] [bigint] NULL,
[matterGroup_sid] [bigint] NULL,
)
CREATE TABLE [tenant].[Trustee](
[type] [nvarchar](32) NOT NULL,
[sid] [bigint] IDENTITY(1,1) NOT NULL,
[emailAddress] [nvarchar](255) NULL,
[name] [nvarchar](255) NULL,
[jobTitle] [nvarchar](255) NULL,
[foreignId] [nvarchar](255) NULL,
[tenant] [bigint] NOT NULL,
[domainUser] [nvarchar](255) NULL,
[unqualifiedDomainUser] [nvarchar](255) NULL,
)
CREATE TABLE [tenant].[userActivityDocuments](
[id] [binary](16) NOT NULL,
[tenant] [bigint] NOT NULL,
[userType] [nvarchar](64) NOT NULL,
[systemName] [nvarchar](200) NOT NULL,
[clientId] [nvarchar](32) NOT NULL,
[matterId] [nvarchar](32) NOT NULL,
[recordedValue] [int] NOT NULL,
[recordedDate] [date] NOT NULL,
[recordedDateTime] [datetime2](7) NOT NULL,
[userString] [nvarchar](255) NOT NULL,
[collectionTime] [bigint] NOT NULL
)
ALTER TABLE [tenant].[Asset] ADD CONSTRAINT [PK_Asset] PRIMARY KEY CLUSTERED
(
[tenant] ASC,
[sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
ALTER TABLE [tenant].[Asset] ADD CONSTRAINT [UNQ_ASSET_NAME_1] UNIQUE NONCLUSTERED
(
[tenant] ASC,
[displayId] ASC,
[type] ASC,
[client_sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
ALTER TABLE [tenant].[Trustee] ADD CONSTRAINT [PK_Trustee] PRIMARY KEY CLUSTERED
(
[tenant] ASC,
[sid] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_1] ON [tenant].[Trustee]
(
[tenant] ASC,
[domainUser] ASC
)
INCLUDE([isDeleting])
WHERE ([domainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE UNIQUE NONCLUSTERED INDEX [FIDX_Trustee_2] ON [tenant].[Trustee]
(
[tenant] ASC,
[foreignId] ASC
)
INCLUDE([isDeleting])
WHERE ([foreignId] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
CREATE NONCLUSTERED INDEX [FIDX_Trustee_3] ON [tenant].[Trustee]
(
[tenant] ASC,
[emailAddress] ASC
)
INCLUDE([isDeleting])
WHERE ([emailAddress] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [FIDX_Trustee_5] ON [tenant].[Trustee]
(
[tenant] ASC,
[unqualifiedDomainUser] ASC
)
INCLUDE([isDeleting])
WHERE ([unqualifiedDomainUser] IS NOT NULL)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
ALTER TABLE [tenant].[userActivityDocuments] ADD CONSTRAINT [PK_userActivityDocuments] PRIMARY KEY CLUSTERED
(
[id] ASC,
[tenant] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
CREATE NONCLUSTERED INDEX [IDX_UserActivityDocuments_1] ON [tenant].[userActivityDocuments]
(
[tenant] ASC,
[clientId] ASC,
[matterId] ASC,
[userType] ASC,
[recordedDate] ASC
)
INCLUDE([userString],[recordedValue]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY]
GO
Run Code Online (Sandbox Code Playgroud)
我不能说访问视图的查询,但您提供的示例可以快速了解视图的性能。
您将在所有连接 ( INNER
& LEFT OUTER
)的末尾获得一个过滤器运算符,以便能够满足 where 子句:
WHERE (matter.sid IS NULL AND data.matterId = '') OR matter.sid IS NOT NULL
Run Code Online (Sandbox Code Playgroud)
使用 进一步拆分视图UNION
会使过滤器运算符在执行计划中更早一些,但还不够 (n) 早。(呵呵)
拆分OR
是提高视图性能的良好开端,但还有其他部分需要改进。
我们可以更改过滤器的第一部分:
WHERE (matter.sid IS NULL AND data.matterId = '')
Run Code Online (Sandbox Code Playgroud)
通过删除 AND data.matterId = '')
并将其添加到 UNION 子查询的每个部分:
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
WHERE matterId = ''
Run Code Online (Sandbox Code Playgroud)
但是,由于过滤,这仍然给我们留下了过滤器运算符LEFT JOIN + IS NULL
:
LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND
matter.displayId = data.matterId
WHERE (matter.sid IS NULL)
Run Code Online (Sandbox Code Playgroud)
我们可以通过将其更改为NOT EXISTS
语句来改进这种类型的过滤。
这给我们留下了:
CREATE VIEW [tenant]._DocumentsBilling AS
SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid,NULL as matter_sid, NULL as matterGroup_sid, data.recordedDate
FROM (
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
WHERE matterId = ''
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
WHERE matterId = ''
) data
INNER JOIN [tenant].Asset client ON client.displayId = data.clientId
WHERE NOT EXISTS (SELECT * FROM [tenant].Asset matter WHERE matter.client_sid = client.sid AND matter.displayId = data.matterId)
UNION
SELECT data.billingType as billingType, data.trustee_sid as trustee_sid, data.recordedValue, client.sid as client_sid, client.clientGroup_sid as clientGroup_sid, matter.sid as matter_sid, matter.matterGroup_sid as matterGroup_sid, data.recordedDate
FROM (
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.emailAddress = d.userString AND d.userType = N'EMAIL'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.foreignId = d.userString AND d.userType = N'FOREIGNID'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.domainUser = d.userString AND d.userType = N'DOMAIN'
UNION ALL
SELECT N'DOCUMENTS' as billingType, t.sid as trustee_sid, recordedValue, recordedDate, clientId, matterId
FROM [tenant].userActivityDocuments d
INNER JOIN [tenant].Trustee t ON t.unqualifiedDomainUser = d.userString AND d.userType = N'UNQUALIFIED_DOMAIN'
) data
INNER JOIN [tenant].Asset client ON client.displayId = data.clientId
LEFT JOIN [tenant].Asset matter ON matter.client_sid = client.sid AND matter.displayId = data.matterId
WHERE matter.sid IS NOT NULL;
Run Code Online (Sandbox Code Playgroud)
这应该为您提供更早的过滤和更好的性能。