是否有更优化的方法来创建此视图或存储过程?目前使用交叉应用,但速度很慢

ck1*_*123 6 sql-server stored-procedures optimization t-sql cross-apply

我用一些测试数据更新了这篇文章。

我正在为我的电影数据库创建一个报告,我希望最终用户能够选择某种类型的电影。然而,有些电影有多种类型,我已经规范化了数据库,以便具有多个类型的电影订单项变成多个订单项,每个订单项都指向相应的类型/类型 ID。(对于董事也做了类似的事情)。

标准化前

电影 类型
弗兰肯斯坦的新娘 恐怖、剧情

标准化后

电影 类型
弗兰肯斯坦的新娘 恐怖
弗兰肯斯坦的新娘 戏剧

我遇到的问题是,为了这份报告,我希望做到这样,如果电影有多种类型,那么它们就不会在报告中重复。相反,电影标题成为一个行项目,并且流派字段被连接起来以显示一行内的所有流派(类似于标准化之前的视图)。我最终要做的是创建一个视图,在其中按照与电影 ID 匹配的类型交叉应用电影选择的输出。我觉得我有点过于复杂了,而且我的商店程序运行得相当慢,因为我还有其他几个字段允许用户进行过滤。

下面是交叉应用视图。

ALTER VIEW [dbo].[vwMoviesJoinedGenres] AS

WITH genreMovies_CTE AS (
SELECT M.MovieID
    , M.MovieTitle
    , G.GenreName
    , G.GenreID
    , M.TitleTypeID
    , TT.TitleType
    , M.MediaID
    , M.IMDBLink
    , M.IMDBRating
    , M.ReleaseDate
    , M.Runtime
    , M.ImageURL
    , M.MovieYear
FROM [dbo].[Movies] AS M
INNER JOIN GenresMovies AS GM
    ON GM.MovieID = M.MovieID
INNER JOIN Genres AS G
    ON G.GenreID = GM.GenreID
INNER JOIN TitleType AS TT
    ON TT.TitleTypeID = M.TitleTypeID
)   
SELECT 
    DISTINCT MovieID, ImageURL, MovieYear, MovieTitle, TitleType, SUBSTRING(G.genres, 0, 
    LEN(G.genres)) AS Genres, GenreID, TitleTypeID, MediaID, IMDBLink, IMDBRating, ReleaseDate, 
    Runtime
FROM genreMovies_CTE
CROSS APPLY
(
SELECT DISTINCT GenreName + ', ' 
    FROM Genres AS G
    INNER JOIN GenresMovies AS GM ON GM.GenreID = G.GenreID
    WHERE genreMovies_CTE.MovieID = GM.MovieID
    FOR XML PATH('')        
) G(genres)
GO
Run Code Online (Sandbox Code Playgroud)

然后,我在下面的存储过程中使用此视图(以及主管的类似视图)。

USE [Movies]
GO

SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

--====================================================
ALTER   PROCEDURE [dbo].[usp_MovieByGenreUsingViews] 
    -- Add the parameters for the stored procedure here
    @GenreID nvarchar(255)
    , @MediaID nvarchar(255)
    , @TitleTypeID nvarchar(255)
WITH RECOMPILE

AS
BEGIN

    SET NOCOUNT ON;

WITH genresMovies_CTE AS
(SELECT 
    DISTINCT JG.[MovieID]
    , JG.[MovieTitle]
    , JG.Genres
    , JG.[TitleTypeName]
    , JG.[TitleTypeID]
    , JG.[MediaID]
    , Me.MediaType
    , JD.Directors
FROM [dbo].[vwMoviesJoinedGenres] AS JG --JoinedGenres
INNER JOIN [dbo].[vwMoviesJoinedDirectors] AS JD
    ON JG.MovieID = JD.movieID
INNER JOIN Media AS Me
    ON Me.MediaTypeID = JG.MediaTypeID
WHERE GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))
    AND JG.MediaTypeID IN (SELECT Number FROM [fn_SplitInt](@MediaID, ',')) 
    AND JG.TitleTypeID IN (SELECT Number FROM [fn_SplitInt](@TitleTypeID, ',')) 
)
SELECT MovieTitle, Genres, Directors, TitleType, MediaType
FROM genresMovies_CTE
ORDER BY movietitle

END
Run Code Online (Sandbox Code Playgroud)

**根据要求使用非常小的样本数据集进行更新。为了节省时间,我做了很多简化,并决定只关注流派。

创建表并插入数据

USE [Test Movies];
GO
--DROP TABLE IF EXISTS MovieDetails;
CREATE TABLE MovieDetails 
(
    MovieID int NOT NULL 
    , MovieTitle nvarchar(255)
    , Constraint MovieID PRIMARY KEY (MovieID)
);

--DROP TABLE IF EXISTS Genres;
CREATE TABLE Genres(
      GenreID tinyint NOT NULL Identity(1,1)
    , GenreName varchar(50) NOT NULL 
    , CONSTRAINT GenreID PRIMARY KEY (GenreID)
)

--DROP TABLE IF EXISTS MovieGenre;
CREATE TABLE MovieGenre
(
    MovieID int NOT NULL
    , GenreID tinyint NOT NULL
    , CONSTRAINT GenresMovies PRIMARY KEY (MovieID, GenreID)
);

INSERT INTO MovieDetails (MovieID, MovieTitle)
VALUES
(1, 'Forest Gump')
, (2, 'Eyes Wide Shut')
, (3, 'Kelly''s Heroes')
, (4, 'Where Eagles Dare')
, (5, 'Star Trek: First Contact')
, (6, 'The Ten Commandments')
, (7, 'Clash of the Titans')
, (8, 'AVP: Alien vs. Predator')
, (9, 'Batman Returns')
, (10, 'Crash')

INSERT INTO Genres (GenreName)
VALUES ('Drama'), ('Adventure'), ('Family'), ('Horror'), ('Crime');

INSERT INTO MovieGenre (MovieID, GenreID)
VALUES (1,1), (2,1), (3,2), (4,2), 
(5,1), (5,2), (6,1), (6,2), (7,2), 
(7,3), (8,4), (9,5), (10,1);
Run Code Online (Sandbox Code Playgroud)

网上找到的 splitInt 函数的代码

ALTER FUNCTION [dbo].[fn_SplitInt]
(
    @List       nvarchar(4000),
    @Delimiter  char(1)= ','
)
RETURNS @Values TABLE
(
    Position int IDENTITY PRIMARY KEY,
    Number int
)

AS

  BEGIN

  -- set up working variables
  DECLARE @Index INT
  DECLARE @ItemValue nvarchar(100)
  SELECT @Index = 1 

  -- iterate until we have no more characters to work with
  WHILE @Index > 0

    BEGIN

      -- find first delimiter
      SELECT @Index = CHARINDEX(@Delimiter,@List)

      -- extract the item value
      IF @Index  > 0     -- if found, take the value left of the delimiter
        SELECT @ItemValue = LEFT(@List,@Index - 1)
      ELSE               -- if none, take the remainder as the last value
        SELECT @ItemValue = @List

      -- insert the value into our new table
      INSERT INTO @Values (Number) VALUES (CAST(@ItemValue AS int))

      -- remove the found item from the working list
      SELECT @List = RIGHT(@List,LEN(@List) - @Index)

      -- if list is empty, we are done
      IF LEN(@List) = 0 BREAK

    END

  RETURN

  END

Run Code Online (Sandbox Code Playgroud)

使用 STRING_AGG 连接流派

PROCEDURE [dbo].[usp_MovieByGenreStrAgg] 
    @GenreID nvarchar(255)
WITH RECOMPILE

AS
BEGIN
    SET NOCOUNT ON;

SELECT DISTINCT 
      movieTitleID.movieID
    , movieTitleID.movietitle
    , movieTitleID.genres 
FROM
(SELECT 
      MD.MovieID
    , MD.MovieTitle
    , STRING_AGG(G.GenreName, ', ') AS Genres
FROM MovieDetails AS MD
INNER JOIN MovieGenre AS MG 
    ON MG.MovieID = MD.MovieID
INNER JOIN Genres AS G 
    ON G.GenreID = MG.GenreID
GROUP BY MD.MovieID, MD.MovieTitle) 
AS movieTitleID
INNER JOIN MovieGenre AS MG 
    ON MG.MovieID = movieTitleID.MovieID
INNER JOIN Genres AS G 
    ON G.GenreID = MG.GenreID
WHERE G.GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))

END
Run Code Online (Sandbox Code Playgroud)

使用交叉应用加入流派

CREATE PROCEDURE [dbo].[usp_MovieByGenreCrsApply] 
    @GenreID nvarchar(255)
WITH RECOMPILE

AS
BEGIN
    SET NOCOUNT ON;

SELECT 
      movieTitleID.MovieID
    , movieTitleID.MovieTitle
    , SUBSTRING(G.genres, 0, LEN(G.genres)) AS genres
FROM
    (
        SELECT 
              MovieID
            , MovieTitle
        FROM MovieDetails
    
    ) 
    AS movieTitleID
    CROSS APPLY
    (
    SELECT DISTINCT G.GenreName + ', '
    FROM Genres AS G
    INNER JOIN MovieGenre AS MG
        ON MG.GenreID = G.GenreID
    WHERE movieTitleID.MovieID = MG.MovieID 
        AND G.GenreID IN (SELECT Number FROM [fn_SplitInt](@GenreID, ','))
    FOR XML PATH('')        
    ) G(genres)
    WHERE G.genres IS NOT NULL; 
END
Run Code Online (Sandbox Code Playgroud)

当我执行单独的语句时,我注意到了一些我以前没有注意到的东西。

exec [usp_MovieByGenreStrAgg] '2,3' -- 其中 2 是冒险,3 是家庭

电影ID 电影标题 流派
3 凯利的英雄 冒险
4 老鹰敢于冒险的地方 冒险
5 星际迷航:第一次接触 戏剧、冒险
6 十诫 戏剧、冒险
7 泰坦之战 冒险、家庭

exec [usp_MovieByGenreCrsApply] '2,3' -- 其中 2 是冒险,3 是家庭

电影ID 电影标题 流派
3 凯利的英雄 冒险
4 老鹰敢于冒险的地方 冒险
5 星际迷航:第一次接触 冒险
6 十诫 冒险
7 泰坦之战 冒险、家庭

String_Agg 返回已选择流派的给定电影的所有流派,即使仅选择其中一种流派作为参数值。交叉应用仅返回所选的流派。我意识到我更喜欢它只显示我感兴趣的类型。

但是,我注意到,当我首先将交叉应用保存在视图中然后应用参数时,它的行为就像我的 String_Agg 过程一样。我认为我更喜欢交叉应用的行为方式(无需先将其保存在视图中),但我很确定在没有视图的情况下这种方式要慢得多,所以回到第一个缓慢的地方。我希望这是有道理的。

小智 1

将 split 函数放在 an 中APPLY是一个坏主意。它可能会被一遍又一遍地评估,从而扼杀性能。您应该将这些值放入临时表或表变量中。

STRING_AGG返回的类别不是您选择的类别,因为您正在构建列表,然后进行过滤(通过附加联接和 where 子句)。将子句移动WHERE到分组内,您将只获得您选择的类别。

无论哪种情况,您都可以相当程度地简化查询。以下是任一样式(“仅选择”或“全部”)的一些 POC 代码:

DROP TABLE IF EXISTS #MovieDetails
DROP TABLE IF EXISTS #Genres
DROP TABLE IF EXISTS #MovieGenre


CREATE TABLE #MovieDetails 
(
    MovieID int NOT NULL 
    , MovieTitle nvarchar(255)
    , Constraint MovieID PRIMARY KEY (MovieID)
);

--DROP TABLE IF EXISTS Genres;
CREATE TABLE #Genres(
      GenreID tinyint NOT NULL Identity(1,1)
    , GenreName varchar(50) NOT NULL 
    , CONSTRAINT GenreID PRIMARY KEY (GenreID)
)

--DROP TABLE IF EXISTS MovieGenre;
CREATE TABLE #MovieGenre
(
    MovieID int NOT NULL
    , GenreID tinyint NOT NULL
    , CONSTRAINT GenresMovies PRIMARY KEY (MovieID, GenreID)
);

INSERT INTO #MovieDetails (MovieID, MovieTitle)
VALUES
(1, 'Forest Gump')
, (2, 'Eyes Wide Shut')
, (3, 'Kelly''s Heroes')
, (4, 'Where Eagles Dare')
, (5, 'Star Trek: First Contact')
, (6, 'The Ten Commandments')
, (7, 'Clash of the Titans')
, (8, 'AVP: Alien vs. Predator')
, (9, 'Batman Returns')
, (10, 'Crash')

INSERT INTO #Genres (GenreName)
VALUES ('Drama'), ('Adventure'), ('Family'), ('Horror'), ('Crime');

INSERT INTO #MovieGenre (MovieID, GenreID)
VALUES (1,1), (2,1), (3,2), (4,2), 
(5,1), (5,2), (6,1), (6,2), (7,2), 
(7,3), (8,4), (9,5), (10,1);


DECLARE @ids NVARCHAR(MAX) = '2,3'

DECLARE @idsXML XML = TRY_CAST('<i>' + REPLACE(ISNULL(@ids, ''), ',', '</i><i>') + '</i>' AS XML)
DECLARE @idTable AS TABLE(id INT)

INSERT INTO @idTable
SELECT TRY_CAST(i.value('.', 'NVARCHAR(MAX)') AS INT) FROM @idsXML.nodes('//i') X(i)

SELECT 
    m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres 
FROM #MovieDetails m 
    JOIN #MovieGenre mg ON m.MovieID = mg.MovieID 
    JOIN #Genres g ON g.GenreID = mg.GenreID
    JOIN @idTable i ON g.GenreID = i.id
GROUP BY m.MovieID, m.MovieTitle

SELECT 
    m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres 
FROM #MovieDetails m 
    JOIN #MovieGenre mg ON m.MovieID = mg.MovieID 
    JOIN #Genres g ON g.GenreID = mg.GenreID
WHERE m.MovieID IN (SELECT MovieID FROM #MovieGenre mg JOIN @idTable i ON mg.GenreID = i.id)
GROUP BY m.MovieID, m.MovieTitle
Run Code Online (Sandbox Code Playgroud)

你也可以想象...

DECLARE @allMode  BIT = 0

SELECT 
    m.MovieID, m.MovieTitle, STRING_AGG(g.GenreName, ',') Genres 
FROM #MovieDetails m 
    JOIN #MovieGenre mg ON m.MovieID = mg.MovieID 
    JOIN #Genres g ON g.GenreID = mg.GenreID
    LEFT JOIN @idTable i ON g.GenreID = i.id
WHERE (m.MovieID IN (SELECT MovieID FROM #MovieGenre mg JOIN @idTable i ON mg.GenreID = i.id) AND @allMode = 1)
    OR (i.id IS NOT NULL AND @allMode = 0)
GROUP BY m.MovieID, m.MovieTitle
Run Code Online (Sandbox Code Playgroud)