ck1*_*123 6 sql-server stored-procedures optimization t-sql cross-apply
我用一些测试数据更新了这篇文章。
我正在为我的电影数据库创建一个报告,我希望最终用户能够选择某种类型的电影。然而,有些电影有多种类型,我已经规范化了数据库,以便具有多个类型的电影订单项变成多个订单项,每个订单项都指向相应的类型/类型 ID。(对于董事也做了类似的事情)。
标准化前
| 电影 | 类型 |
|---|---|
| 弗兰肯斯坦的新娘 | 恐怖、剧情 |
标准化后
| 电影 | 类型 |
|---|---|
| 弗兰肯斯坦的新娘 | 恐怖 |
| 弗兰肯斯坦的新娘 | 戏剧 |
我遇到的问题是,为了这份报告,我希望做到这样,如果电影有多种类型,那么它们就不会在报告中重复。相反,电影标题成为一个行项目,并且流派字段被连接起来以显示一行内的所有流派(类似于标准化之前的视图)。我最终要做的是创建一个视图,在其中按照与电影 ID 匹配的类型交叉应用电影选择的输出。我觉得我有点过于复杂了,而且我的商店程序运行得相当慢,因为我还有其他几个字段允许用户进行过滤。
下面是交叉应用视图。
ALTER VIEW [dbo].[vwMoviesJoinedGenres] AS
WITH genreMovies_CTE AS (
SELECT M.MovieID
, M.MovieTitle
, G.GenreName
, G.GenreID
, M.TitleTypeID
, TT.TitleType
, M.MediaID
, M.IMDBLink
, M.IMDBRating
, M.ReleaseDate
, M.Runtime
, M.ImageURL
, M.MovieYear
FROM [dbo].[Movies] AS M
INNER JOIN GenresMovies AS GM
ON GM.MovieID = M.MovieID
INNER JOIN Genres AS G
ON G.GenreID = GM.GenreID
INNER JOIN TitleType AS TT
ON TT.TitleTypeID = M.TitleTypeID
)
SELECT
DISTINCT MovieID, ImageURL, MovieYear, MovieTitle, TitleType, SUBSTRING(G.genres, 0,
LEN(G.genres)) AS Genres, GenreID, TitleTypeID, MediaID, IMDBLink, IMDBRating, ReleaseDate,
Runtime
FROM genreMovies_CTE
CROSS APPLY
(
SELECT DISTINCT GenreName + ', '
FROM Genres AS G
INNER JOIN GenresMovies AS GM ON GM.GenreID = G.GenreID
WHERE genreMovies_CTE.MovieID = GM.MovieID
FOR XML PATH('')
) G(genres)
GO
Run Code Online (Sandbox Code Playgroud)
然后,我在下面的存储过程中使用此视图(以及主管的类似视图)。
USE [Movies]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
--====================================================
ALTER PROCEDURE [dbo].[usp_MovieByGenreUsingViews]
-- Add the parameters for the stored procedure here
@GenreID nvarchar(255)
, @MediaID nvarchar(255)
, @TitleTypeID nvarchar(255)
WITH RECOMPILE
AS
BEGIN
SET NOCOUNT ON;
WITH genresMovies_CTE AS
(SELECT
DISTINCT JG.[MovieID]
, JG.[MovieTitle]
, JG.Genres
, JG.[TitleTypeName]
, JG.[TitleTypeID]
, JG.[MediaID]
, Me.MediaType
, JD.Directors
FROM [dbo].[vwMoviesJoinedGenres] AS JG --JoinedGenres
INNER JOIN [dbo].[vwMoviesJoinedDirectors] AS JD
ON JG.MovieID = JD.movieID
INNER JOIN Media AS Me
ON Me.MediaTypeID = JG.MediaTypeID
WHERE GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))
AND JG.MediaTypeID IN (SELECT Number FROM [fn_SplitInt](@MediaID, ','))
AND JG.TitleTypeID IN (SELECT Number FROM [fn_SplitInt](@TitleTypeID, ','))
)
SELECT MovieTitle, Genres, Directors, TitleType, MediaType
FROM genresMovies_CTE
ORDER BY movietitle
END
Run Code Online (Sandbox Code Playgroud)
**根据要求使用非常小的样本数据集进行更新。为了节省时间,我做了很多简化,并决定只关注流派。
创建表并插入数据
USE [Test Movies];
GO
--DROP TABLE IF EXISTS MovieDetails;
CREATE TABLE MovieDetails
(
MovieID int NOT NULL
, MovieTitle nvarchar(255)
, Constraint MovieID PRIMARY KEY (MovieID)
);
--DROP TABLE IF EXISTS Genres;
CREATE TABLE Genres(
GenreID tinyint NOT NULL Identity(1,1)
, GenreName varchar(50) NOT NULL
, CONSTRAINT GenreID PRIMARY KEY (GenreID)
)
--DROP TABLE IF EXISTS MovieGenre;
CREATE TABLE MovieGenre
(
MovieID int NOT NULL
, GenreID tinyint NOT NULL
, CONSTRAINT GenresMovies PRIMARY KEY (MovieID, GenreID)
);
INSERT INTO MovieDetails (MovieID, MovieTitle)
VALUES
(1, 'Forest Gump')
, (2, 'Eyes Wide Shut')
, (3, 'Kelly''s Heroes')
, (4, 'Where Eagles Dare')
, (5, 'Star Trek: First Contact')
, (6, 'The Ten Commandments')
, (7, 'Clash of the Titans')
, (8, 'AVP: Alien vs. Predator')
, (9, 'Batman Returns')
, (10, 'Crash')
INSERT INTO Genres (GenreName)
VALUES ('Drama'), ('Adventure'), ('Family'), ('Horror'), ('Crime');
INSERT INTO MovieGenre (MovieID, GenreID)
VALUES (1,1), (2,1), (3,2), (4,2),
(5,1), (5,2), (6,1), (6,2), (7,2),
(7,3), (8,4), (9,5), (10,1);
Run Code Online (Sandbox Code Playgroud)
网上找到的 splitInt 函数的代码
ALTER FUNCTION [dbo].[fn_SplitInt]
(
@List nvarchar(4000),
@Delimiter char(1)= ','
)
RETURNS @Values TABLE
(
Position int IDENTITY PRIMARY KEY,
Number int
)
AS
BEGIN
-- set up working variables
DECLARE @Index INT
DECLARE @ItemValue nvarchar(100)
SELECT @Index = 1
-- iterate until we have no more characters to work with
WHILE @Index > 0
BEGIN
-- find first delimiter
SELECT @Index = CHARINDEX(@Delimiter,@List)
-- extract the item value
IF @Index > 0 -- if found, take the value left of the delimiter
SELECT @ItemValue = LEFT(@List,@Index - 1)
ELSE -- if none, take the remainder as the last value
SELECT @ItemValue = @List
-- insert the value into our new table
INSERT INTO @Values (Number) VALUES (CAST(@ItemValue AS int))
-- remove the found item from the working list
SELECT @List = RIGHT(@List,LEN(@List) - @Index)
-- if list is empty, we are done
IF LEN(@List) = 0 BREAK
END
RETURN
END
Run Code Online (Sandbox Code Playgroud)
使用 STRING_AGG 连接流派
PROCEDURE [dbo].[usp_MovieByGenreStrAgg]
@GenreID nvarchar(255)
WITH RECOMPILE
AS
BEGIN
SET NOCOUNT ON;
SELECT DISTINCT
movieTitleID.movieID
, movieTitleID.movietitle
, movieTitleID.genres
FROM
(SELECT
MD.MovieID
, MD.MovieTitle
, STRING_AGG(G.GenreName, ', ') AS Genres
FROM MovieDetails AS MD
INNER JOIN MovieGenre AS MG
ON MG.MovieID = MD.MovieID
INNER JOIN Genres AS G
ON G.GenreID = MG.GenreID
GROUP BY MD.MovieID, MD.MovieTitle)
AS movieTitleID
INNER JOIN MovieGenre AS MG
ON MG.MovieID = movieTitleID.MovieID
INNER JOIN Genres AS G
ON G.GenreID = MG.GenreID
WHERE G.GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))
END
Run Code Online (Sandbox Code Playgroud)
使用交叉应用加入流派
CREATE PROCEDURE [dbo].[usp_MovieByGenreCrsApply]
@GenreID nvarchar(255)
WITH RECOMPILE
AS
BEGIN
SET NOCOUNT ON;
SELECT
movieTitleID.MovieID
, movieTitleID.MovieTitle
, SUBSTRING(G.genres, 0, LEN(G.genres)) AS genres
FROM
(
SELECT
MovieID
, MovieTitle
FROM MovieDetails
)
AS movieTitleID
CROSS APPLY
(
SELECT DISTINCT G.GenreName + ', '
FROM Genres AS G
INNER JOIN MovieGenre AS MG
ON MG.GenreID = G.GenreID
WHERE movieTitleID.MovieID = MG.MovieID
AND G.GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))
FOR XML PATH('')
) G(genres)
WHERE G.genres IS NOT NULL;
END
Run Code Online (Sandbox Code Playgroud)
当我执行单独的语句时,我注意到了一些我以前没有注意到的东西。
exec [usp_MovieByGenreStrAgg] '2,3' -- 其中 2 是冒险,3 是家庭
| 电影ID | 电影标题 | 流派 |
|---|---|---|
| 3 | 凯利的英雄 | 冒险 |
| 4 | 老鹰敢于冒险的地方 | 冒险 |
| 5 | 星际迷航:第一次接触 | 戏剧、冒险 |
| 6 | 十诫 | 戏剧、冒险 |
| 7 | 泰坦之战 | 冒险、家庭 |
exec [usp_MovieByGenreCrsApply] '2,3' -- 其中 2 是冒险,3 是家庭
| 电影ID | 电影标题 | 流派 |
|---|---|---|
| 3 | 凯利的英雄 | 冒险 |
| 4 | 老鹰敢于冒险的地方 | 冒险 |
| 5 | 星际迷航:第一次接触 | 冒险 |
| 6 | 十诫 | 冒险 |
| 7 | 泰坦之战 | 冒险、家庭 |
String_Agg 返回已选择流派的给定电影的所有流派,即使仅选择其中一种流派作为参数值。交叉应用仅返回所选的流派。我意识到我更喜欢它只显示我感兴趣的类型。
但是,我注意到,当我首先将交叉应用保存在视图中然后应用参数时,它的行为就像我的 String_Agg 过程一样。我认为我更喜欢交叉应用的行为方式(无需先将其保存在视图中),但我很确定在没有视图的情况下这种方式要慢得多,所以回到第一个缓慢的地方。我希望这是有道理的。
小智 1
将 split 函数放在 an 中APPLY是一个坏主意。它可能会被一遍又一遍地评估,从而扼杀性能。您应该将这些值放入临时表或表变量中。
您STRING_AGG返回的类别不是您选择的类别,因为您正在构建列表,然后进行过滤(通过附加联接和 where 子句)。将子句移动WHERE到分组内,您将只获得您选择的类别。
无论哪种情况,您都可以相当程度地简化查询。以下是任一样式(“仅选择”或“全部”)的一些 POC 代码:
DROP TABLE IF EXISTS #MovieDetails
DROP TABLE IF EXISTS #Genres
DROP TABLE IF EXISTS #MovieGenre
CREATE TABLE #MovieDetails
(
MovieID int NOT NULL
, MovieTitle nvarchar(255)
, Constraint MovieID PRIMARY KEY (MovieID)
);
--DROP TABLE IF EXISTS Genres;
CREATE TABLE #Genres(
GenreID tinyint NOT NULL Identity(1,1)
, GenreName varchar(50) NOT NULL
, CONSTRAINT GenreID PRIMARY KEY (GenreID)
)
--DROP TABLE IF EXISTS MovieGenre;
CREATE TABLE #MovieGenre
(
MovieID int NOT NULL
, GenreID tinyint NOT NULL
, CONSTRAINT GenresMovies PRIMARY KEY (MovieID, GenreID)
);
INSERT INTO #MovieDetails (MovieID, MovieTitle)
VALUES
(1, 'Forest Gump')
, (2, 'Eyes Wide Shut')
, (3, 'Kelly''s Heroes')
, (4, 'Where Eagles Dare')
, (5, 'Star Trek: First Contact')
, (6, 'The Ten Commandments')
, (7, 'Clash of the Titans')
, (8, 'AVP: Alien vs. Predator')
, (9, 'Batman Returns')
, (10, 'Crash')
INSERT INTO #Genres (GenreName)
VALUES ('Drama'), ('Adventure'), ('Family'), ('Horror'), ('Crime');
INSERT INTO #MovieGenre (MovieID, GenreID)
VALUES (1,1), (2,1), (3,2), (4,2),
(5,1), (5,2), (6,1), (6,2), (7,2),
(7,3), (8,4), (9,5), (10,1);
DECLARE @ids NVARCHAR(MAX) = '2,3'
DECLARE @idsXML XML = TRY_CAST('<i>' + REPLACE(ISNULL(@ids, ''), ',', '</i><i>') + '</i>' AS XML)
DECLARE @idTable AS TABLE(id INT)
INSERT INTO @idTable
SELECT TRY_CAST(i.value('.', 'NVARCHAR(MAX)') AS INT) FROM @idsXML.nodes('//i') X(i)
SELECT
m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres
FROM #MovieDetails m
JOIN #MovieGenre mg ON m.MovieID = mg.MovieID
JOIN #Genres g ON g.GenreID = mg.GenreID
JOIN @idTable i ON g.GenreID = i.id
GROUP BY m.MovieID, m.MovieTitle
SELECT
m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres
FROM #MovieDetails m
JOIN #MovieGenre mg ON m.MovieID = mg.MovieID
JOIN #Genres g ON g.GenreID = mg.GenreID
WHERE m.MovieID IN (SELECT MovieID FROM #MovieGenre mg JOIN @idTable i ON mg.GenreID = i.id)
GROUP BY m.MovieID, m.MovieTitle
Run Code Online (Sandbox Code Playgroud)
你也可以想象...
DECLARE @allMode BIT = 0
SELECT
m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres
FROM #MovieDetails m
JOIN #MovieGenre mg ON m.MovieID = mg.MovieID
JOIN #Genres g ON g.GenreID = mg.GenreID
LEFT JOIN @idTable i ON g.GenreID = i.id
WHERE (m.MovieID IN (SELECT MovieID FROM #MovieGenre mg JOIN @idTable i ON mg.GenreID = i.id) AND @allMode = 1)
OR (i.id IS NOT NULL AND @allMode = 0)
GROUP BY m.MovieID, m.MovieTitle
Run Code Online (Sandbox Code Playgroud)