Leg*_*end 1 sql-server-2008 sql-server query
首先,我可以描述表的架构,但我的表本身大约是 80GB,所以我希望一些专家的眼睛可以看到可以执行的任何明显的优化。我正在进行一些基于活动的分析,以了解来自内部虚拟游戏环境的一组用户。
我有两个基表:
EventTable: Moderately big at 10GB
Activity_ID UserName ActivityStart ActivityEnd Badge
ActivityTable: Super big at 100GB
UserName ActivityTime Game1Points Game2Points
Run Code Online (Sandbox Code Playgroud)
简而言之,我正在尝试以下操作:
- For each activity in the EventTable:
- Compute the median of Game1Points, Game2Points
for two cases: for all entries in the ActivityTable
that were present since 1 hour before the activity
and for all entries stored during the activity itself
Run Code Online (Sandbox Code Playgroud)
因此,我期待的决赛桌是这样的:
ResultTable:
Activity_ID Badge Game1_Before Game1_During Game2_Before Game2_During
Run Code Online (Sandbox Code Playgroud)
这显然看起来像是CURSOR
s的工作,所以我首先使用这种方法编写了我的查询。它正在运行,我可以看到进度,但在阅读了这么多恐怖故事之后,我决定用基于集合的方法重写我的 200 行基于 CURSOR 的方法。这非常适用于小桌子。但是,对于我这种大小的桌子,它似乎停滞不前(或者至少我不确定应该运行多长时间)。我运行Display Estimated Plan
并添加了相关索引。现在我的问题是我什至不确定这个查询需要多长时间才能运行(不像CURSOR
我可以打印一些消息并查看进度的方法)。
有人可以告诉我是否有更好的方法来做到这一点?我当前的查询如下。虽然查询本身看起来很长,但结构非常简单:首先,我定义了一个 CTE,它获取与特定事件关联的所有点。使用这个 CTE,我计算了两个单独的中位数。
WITH ST AS
(
-- Fetching the required data: Get entries that exist
-- since 1 hour before the activity until the activity ended
SELECT ROW_NUMBER() OVER(ORDER BY Activity_ID) AS ID
, Activity_ID
, Badge
, (CASE WHEN
Y.ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityStart)
THEN 'Game1ActivityBefore'
WHEN
Y.ActivityTime BETWEEN dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityFinish)
THEN 'Game1ActivityDuring'
END) Game1Type
, (CASE WHEN
Y.ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart)) AND dbo.ROUNDTIME(X.ActivityStart)
THEN 'Game2ActivityBefore'
WHEN
Y.ActivityTime BETWEEN dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityFinish)
THEN 'Game2ActivityDuring'
END) Game2Type
, CAST(Game1Points AS bigint) Game1Points
, CAST(Game2Poins AS bigint) Game2Points
FROM
dbo.EventTable X
INNER JOIN dbo.ActivityTable Y
ON X.UserName = Y.UserName
WHERE
ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart))
AND dbo.ROUNDTIME(X.ActivityFinish)
AND
(
Badge LIKE 'GREEN%'
OR Badge LIKE 'RED%'
)
)
-- Eliminating any NULL values that resulted from the median computation step
SELECT Activity_ID
, Badge
, MAX(CAST([Game1ActivityBefore] AS DECIMAL)) AS 'Game1_Before'
, MAX(CAST([Game1ActivityDuring] AS DECIMAL)) AS 'Game1_During'
, MAX(CAST([Game2ActivityBefore] AS DECIMAL)) AS 'Game2_Before'
, MAX(CAST([Game2ActivityDuring] AS DECIMAL)) AS 'Game2_During'
FROM
(
-- Median computation for the two columns - Aggregation Step
SELECT Activity_ID
, Badge
, Game1Type
, Game2Type
, AVG(Game1Points) Game1Median
, AVG(Game2Points) Game2Median
FROM
(
-- Median computation for the two columns - Inner step
SELECT Activity_ID
, Badge
, Game1Type
, Game2Type
, Game1Points
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game1Type
ORDER BY Game1Points ASC, ID ASC) AS Game1RowAsc
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game1Type
ORDER BY Game1Points DESC, ID DESC) AS Game1RowDesc
, Game2Points
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game2Type
ORDER BY Game2Points ASC, ID ASC) AS Game2RowAsc
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game2Type
ORDER BY Game2Points DESC, ID DESC) AS Game2RowDesc
FROM ST TS
) X
WHERE Game1RowAsc IN (Game1RowDesc, Game1RowDesc - 1, Game1RowDesc + 1)
OR Game2RowAsc IN (Game2RowDesc, Game2RowDesc - 1, Game2RowDesc + 1)
GROUP BY Activity_ID, Badge, Game1Type, Game2Type
) Y
PIVOT
(
MAX(Game1Median)
FOR Game1Type IN ([Game1ActivityBefore], [Game1ActivityDuring])
) AS PivotTable
PIVOT
(
MAX(Game2Median)
FOR Game2Type IN ([Game2ActivityBefore], [Game2ActivityDuring])
) AS PivotTable2
GROUP BY Activity_ID, Badge
Run Code Online (Sandbox Code Playgroud)
更新:在建议之后。取 1
CREATE TABLE #ST(ID INT
, Activity_ID INT
, Game1Type CHAR(13)
, Game2Type CHAR(13)
, Game1Points DECIMAL
, Game2Points DECIMAL
)
INSERT INTO #ST
SELECT ROW_NUMBER() OVER(ORDER BY Activity_ID) AS ID
, Activity_ID
, Badge
, (CASE WHEN
Y.ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityStart)
THEN 'Game1ActivityBefore'
WHEN
Y.ActivityTime BETWEEN dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityFinish)
THEN 'Game1ActivityDuring'
END) Game1Type
, (CASE WHEN
Y.ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart)) AND dbo.ROUNDTIME(X.ActivityStart)
THEN 'Game2ActivityBefore'
WHEN
Y.ActivityTime BETWEEN dbo.ROUNDTIME(X.ActivityStart) AND dbo.ROUNDTIME(X.ActivityFinish)
THEN 'Game2ActivityDuring'
END) Game2Type
, CAST(Game1Points AS bigint) Game1Points
, CAST(Game2Poins AS bigint) Game2Points
FROM
dbo.EventTable X
INNER JOIN dbo.ActivityTable Y
ON X.UserName = Y.UserName
WHERE
ActivityTime BETWEEN DATEADD(HOUR, -1, dbo.ROUNDTIME(X.ActivityStart))
AND dbo.ROUNDTIME(X.ActivityFinish)
AND
(
Badge LIKE 'GREEN%'
OR Badge LIKE 'RED%'
)
CREATE TABLE #INTERMEDIATE (Activity_ID INT
, Badge VARCHAR(255)
, Game1Type CHAR(13)
, Game2Type CHAR(13)
, Game1Points DECIMAL
, Game1RowAsc INT
, Game1RowDesc INT
, Game2Points DECIMAL
, Game2RowAsc INT
, Game2RowDesc INT
)
INSERT INTO #INTERMEDIATE
SELECT Activity_ID
, Badge
, Game1Type
, Game2Type
, Game1Points
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game1Type
ORDER BY Game1Points ASC, ID ASC) AS Game1RowAsc
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game1Type
ORDER BY Game1Points DESC, ID DESC) AS Game1RowDesc
, Game2Points
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game2Type
ORDER BY Game2Points ASC, ID ASC) AS Game2RowAsc
, ROW_NUMBER() OVER (
PARTITION BY Activity_ID, Badge, Game2Type
ORDER BY Game2Points DESC, ID DESC) AS Game2RowDesc
FROM #ST
CREATE CLUSTERED INDEX [TT1] ON #INTERMEDIATE (Acitivity_ID
, Badge)
CREATE NONCLUSTERED INDEX [TT2] ON #INTERMEDIATE (Game1RowAsc
, Game1RowDesc
, Game2RowAsc
, Game2RowDesc)
SELECT Activity_ID
, Badge
, MAX(CAST([Game1ActivityBefore] AS DECIMAL)) AS 'Game1_Before'
, MAX(CAST([Game1ActivityDuring] AS DECIMAL)) AS 'Game1_During'
, MAX(CAST([Game2ActivityBefore] AS DECIMAL)) AS 'Game2_Before'
, MAX(CAST([Game2ActivityDuring] AS DECIMAL)) AS 'Game2_During'
FROM
(
SELECT Activity_ID
, Badge
, Game1Type
, Game2Type
, AVG(Game1Points) Game1Median
, AVG(Game2Points) Game2Median
FROM #ST
WHERE Game1RowAsc IN (Game1RowDesc, Game1RowDesc - 1, Game1RowDesc + 1)
OR Game2RowAsc IN (Game2RowDesc, Game2RowDesc - 1, Game2RowDesc + 1)
GROUP BY Activity_ID, Badge, Game1Type, Game2Type
) Y
PIVOT
(
MAX(Game1Median)
FOR Game1Type IN ([Game1ActivityBefore], [Game1ActivityDuring])
) AS PivotTable
PIVOT
(
MAX(Game2Median)
FOR Game2Type IN ([Game2ActivityBefore], [Game2ActivityDuring])
) AS PivotTable2
GROUP BY Activity_ID, Badge
DROP TABLE #INTERMEDIATE
DROP TABLE #ST
Run Code Online (Sandbox Code Playgroud)
更新2::执行计划
可能是该Display Execution Plan
选项不显示正在使用的索引?
1 - 不要使用 CTE 来提高性能。CTE 基本上是一次性视图。它们绝对没有表现出任何性能优势。如果性能很重要并且您有足够的数据,则索引#temp
表可能会更好地工作。
2-绝对使用基于集合的方法。我想不出任何CURSOR
比基于集合的查询更快的场景。
我不会完全重写您的查询,但我确实看到了一些应该会产生巨大差异的事情:
#temp
表中并为其编制索引。 现在您的过滤器是动态创建的,不可搜索!您正在对ROW_NUMBER
输出进行 4 次过滤,因此这是对每一行的表扫描。
归档时间: |
|
查看次数: |
631 次 |
最近记录: |