QUERY - 透视多列,可变行数

tma*_*lse 6 sql-server pivot sql-server-2008-r2

我有一张看起来像这样的表:

RECIPE     VERSION_ID     INGREDIENT    PERCENTAGE
4000       100            Ing_1          23,0
4000       100            Ing_100         0,1
4000       200            Ing_1          20,0
4000       200            Ing_100         0,7
4000       300            Ing_1          22,3
4000       300            Ing_100         0,9
4001       900            Ing_1           8,3
4001       900            Ing_100        72,4
4001       901            Ing_1           9,3
4001       901            Ing_100        70,5
5012       871            Ing_1          45,1
5012       871            Ing_100         0,9
5012       877            Ing_1          47,2
5012       877            Ing_100         0,8
5012       879            Ing_1          46,6
5012       879            Ing_100         0,9
5012       880            Ing_1          43,6
5012       880            Ing_100         1,2
Run Code Online (Sandbox Code Playgroud)

每个配方/版本有 100 种成分。我想像这样显示这个表中的数据:

RECIPE     INGREDIENT_Vxxx     PERCENTAGE_Vxxx     INGREDIENT_Vyyy     INGREDIENT_Vyyy (ETC)
4000       Ing_1               23,0                Ing_1               20,0
4000       Ing_100             0,1                 Ing_100              0,7
Run Code Online (Sandbox Code Playgroud)

因为在不同版本的食谱中,可以删除或添加成分,我想显示每个版本每个食谱的成分和百分比。还有一个困难是不同的食谱有不同数量的版本。

我什至不确定这是否可能或从哪里开始。也许与PIVOT功能?

有人可以指出我正确的方向吗?

Ava*_*rkx 7

在我看来,这里的问题主要是范围界定问题 - 由于需求定义不够好,您可能很难解决这个问题。根据提供的描述和示例数据,至少有三个部分解决方案,其中没有一个可能适用于您的特定用例。测试数据设置如下,

IF NOT EXISTS ( SELECT  1
                FROM    sys.objects
                WHERE   name = 'Recipe'
                    AND type = 'U' )
BEGIN
    --DROP TABLE dbo.Recipe;
    CREATE TABLE dbo.Recipe
    (
        Recipe          INTEGER NOT NULL,
        VersionID       INTEGER NOT NULL,
        Ingredient      VARCHAR( 8 ) NOT NULL,
        Percentage      DECIMAL( 5, 2 )
    );

    INSERT INTO dbo.Recipe ( Recipe, VersionID, Ingredient, Percentage )
                SELECT  4000, 100, 'Ing_1', 23.0
    UNION ALL   SELECT  4000, 100, 'Ing_100', 0.1
    UNION ALL   SELECT  4000, 200, 'Ing_1', 20.0
    UNION ALL   SELECT  4000, 200, 'Ing_100', 0.7
    UNION ALL   SELECT  4000, 300, 'Ing_1', 22.3
    UNION ALL   SELECT  4000, 300, 'Ing_100', 0.9
    UNION ALL   SELECT  4001, 900, 'Ing_1', 8.3
    UNION ALL   SELECT  4001, 900, 'Ing_100', 72.4
    UNION ALL   SELECT  4001, 901, 'Ing_1', 9.3
    UNION ALL   SELECT  4001, 901, 'Ing_100', 70.5
    UNION ALL   SELECT  5012, 871, 'Ing_1', 45.1
    UNION ALL   SELECT  5012, 871, 'Ing_100', 0.9
    UNION ALL   SELECT  5012, 877, 'Ing_1', 47.2
    UNION ALL   SELECT  5012, 877, 'Ing_100', 0.8
    UNION ALL   SELECT  5012, 879, 'Ing_1', 46.6
    UNION ALL   SELECT  5012, 879, 'Ing_100', 0.9
    UNION ALL   SELECT  5012, 880, 'Ing_1', 43.6
    UNION ALL   SELECT  5012, 880, 'Ing_100', 1.2;

    ALTER TABLE dbo.Recipe
    ADD CONSTRAINT PK__Recipe
        PRIMARY KEY CLUSTERED ( Recipe, VersionID, Ingredient );

    CREATE NONCLUSTERED INDEX IX__Recipe__Recipe__VersionID
        ON  dbo.Recipe ( Recipe, VersionID )
    INCLUDE ( Percentage );
END;
GO
Run Code Online (Sandbox Code Playgroud)

我们可以使用我们的新表来探索一些可能的解决方案。扩展示例输出,我们将下一个配方添加到结果集中以说明问题的难度。

RECIPE --- INGREDIENT_V100 --- PERCENTAGE_V100 --- INGREDIENT_V200 --- INGREDIENT_V200 
4000       Ing_1               23,0                Ing_1               20,0
4000       Ing_100              0,1                Ing_100              0,7
4001       Ing_1                8,3                Ing_1                9,3
4001       Ing_100             72,4                Ing_100             70,5
Run Code Online (Sandbox Code Playgroud)

%_V100%_V2004000食谱的情况下非常有意义,但随着添加其他食谱,它们很快就会失去意义。该4001配方需要新的和单独的列来按版本正确标记数据,但由于每个配方的版本号不同,该路径导致我们得到一个非常稀疏的结果集,使用起来非常烦人,或者我们必须使用别名列,丢失版本号数据。

解决方案1:

从我认为最糟糕的方法开始,让我们看看稀疏结果集。对于示例数据,我们将尝试生成一个类似于以下几行的查询:

SELECT  p.Recipe,
        [Ingredient_v100] = CASE WHEN p.[100] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v100] = p.[100], 
        [Ingredient_v200] = CASE WHEN p.[200] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v200] = p.[200], 
        [Ingredient_v300] = CASE WHEN p.[300] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v300] = p.[300], 
        [Ingredient_v871] = CASE WHEN p.[871] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v871] = p.[871], 
        [Ingredient_v877] = CASE WHEN p.[877] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v877] = p.[877], 
        [Ingredient_v879] = CASE WHEN p.[879] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v879] = p.[879], 
        [Ingredient_v880] = CASE WHEN p.[880] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v880] = p.[880], 
        [Ingredient_v900] = CASE WHEN p.[900] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v900] = p.[900], 
        [Ingredient_v901] = CASE WHEN p.[901] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v901] = p.[901]
FROM (  SELECT  r.Recipe, 
                r.VersionID, 
                r.Ingredient,
                r.Percentage 
        FROM    dbo.Recipe r ) s
PIVOT ( MAX( s.Percentage )
        FOR s.VersionID IN ( [100], [200], [300], [871], [877], [879], [880], [900], [901] ) ) p
ORDER BY p.Recipe;
Run Code Online (Sandbox Code Playgroud)

由于版本数量可变,我们可以使用一些动态 SQL 来生成和执行查询。

DECLARE @Piv            NVARCHAR( MAX ),
        @Col            NVARCHAR( MAX ),
        @SQL            NVARCHAR( MAX );

SELECT  @Piv = LEFT( b.Piv, LEN( b.Piv ) - 1 )
FROM (  SELECT  N'[' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '], '
        FROM (  SELECT  DISTINCT r.VersionID 
                FROM    dbo.Recipe r ) a
        ORDER BY a.VersionID
        FOR XML PATH ( '' ) ) b ( Piv );

SELECT  @Col = LEFT( b.Piv, LEN( b.Piv ) - 1 )
FROM (  SELECT  N'[Ingredient_v' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] = CASE'
                    + ' WHEN p.[' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] IS NULL THEN NULL'
                    + ' ELSE p.[Ingredient] END, ' 
                    + '[Percentage_v' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] = p.[' 
                    + CONVERT( VARCHAR( 8 ), a.VersionID ) + '], ' 
        FROM (  SELECT  DISTINCT r.VersionID 
                FROM    dbo.Recipe r ) a
        ORDER BY a.VersionID
        FOR XML PATH ( '' ) ) b ( Piv );

SET @SQL = N'
        SELECT  p.Recipe, ' + @Col + '
        FROM (  SELECT  r.Recipe, 
                        r.VersionID, 
                        r.Ingredient,
                        r.Percentage 
                FROM    dbo.Recipe r ) s
        PIVOT ( MAX( s.Percentage )
                FOR s.VersionID IN ( ' + @Piv + ' ) ) p
        ORDER BY p.Recipe;';
EXECUTE dbo.sp_executesql @statement = @SQL;
GO
Run Code Online (Sandbox Code Playgroud)

这个结果集显然很糟糕。这是一个显示结果的SQL Fiddle,看一看,让我们继续。

解决方案2:

由于稀疏结果集被证明几乎无用,我们可以接受丢失配方的版本号并简单地按版本号升序对其进行排序。对于这个例子的目的,我们会按字母顺序的别名,所以版本100200300配方4000会收到ABC命名,而版本900901将得到公正AB。我们要为此生成的查询应该类似于以下内容:

SELECT  p.Recipe, 
        [Ingredient_vA] = p.[Ingredient], [Percentage_vA] = ISNULL( p.[Percentage_vA], 0 ),
        [Ingredient_vB] = p.[Ingredient], [Percentage_vB] = ISNULL( p.[Percentage_vB], 0 ),
        [Ingredient_vC] = p.[Ingredient], [Percentage_vC] = ISNULL( p.[Percentage_vC], 0 ),
        [Ingredient_vD] = p.[Ingredient], [Percentage_vD] = ISNULL( p.[Percentage_vD], 0 )
FROM (  SELECT  Lvl = 'Percentage_v' + CHAR( 64 + 
                    DENSE_RANK() OVER ( 
                        PARTITION BY r.Recipe
                        ORDER BY r.VersionID ) ), 
                r.Recipe, 
                r.Ingredient,
                r.Percentage 
        FROM    dbo.Recipe r ) s
PIVOT ( MAX( s.Percentage )
        FOR s.Lvl IN ( [Percentage_vA], [Percentage_vB], [Percentage_vC], [Percentage_vD] ) ) p
ORDER BY p.Recipe;
Run Code Online (Sandbox Code Playgroud)

与解决方案一类似,可以利用动态 SQL 来实现这一点。

DECLARE @Piv            NVARCHAR( MAX ),
        @Col            NVARCHAR( MAX ),
        @SQL            NVARCHAR( MAX );

SELECT  @Piv = LEFT( b.Piv, LEN( b.Piv ) - 1 )
FROM (  SELECT  N'[Percentage_v' + CHAR( 64 + a.Lvl ) + '], '
        FROM (  SELECT  DISTINCT Lvl = DENSE_RANK() 
                            OVER (  PARTITION BY r.Recipe
                                    ORDER BY r.VersionID )
                FROM    dbo.Recipe r ) a
        ORDER BY a.Lvl
        FOR XML PATH ( '' ) ) b ( Piv );

SELECT  @Col = LEFT( b.Col, LEN( b.Col ) - 1 )
FROM (  SELECT  N'[Ingredient_v' + CHAR( 64 + a.Lvl ) + '] = p.[Ingredient], '
                    + '[Percentage_v' + CHAR( 64 + a.Lvl ) + '] = ISNULL( p.[Percentage_v'
                    + CHAR( 64 + a.Lvl ) + '], 0 ),'
        FROM (  SELECT  DISTINCT Lvl = DENSE_RANK() 
                            OVER (  PARTITION BY r.Recipe
                                    ORDER BY r.VersionID )
                FROM    dbo.Recipe r ) a
        ORDER BY a.Lvl
        FOR XML PATH ( '' ) ) b ( Col );

SET @SQL = N'
        SELECT  p.Recipe, ' + @Col + '
        FROM (  SELECT  Lvl = ''Percentage_v'' + CHAR( 64 + 
                            DENSE_RANK() OVER ( 
                                PARTITION BY r.Recipe
                                ORDER BY r.VersionID ) ), 
                        r.Recipe, 
                        r.Ingredient,
                        r.Percentage 
                FROM    dbo.Recipe r ) s
        PIVOT ( MAX( s.Percentage )
                FOR s.Lvl IN ( ' + @Piv + ' ) ) p
        ORDER BY p.Recipe;';
EXECUTE dbo.sp_executesql @statement = @SQL;
GO
Run Code Online (Sandbox Code Playgroud)

尽管丢失了每个配方的特定版本号,但这最终会得到一个更漂亮的结果集,正如在此 SQL Fiddle 中所见。

解决方案3:

如果不能容忍版本号的丢失,则可以实施混合方法,但每次调用的结果将仅限于单个配方。实际上,我们的目标 SQL 将类似于第一个解决方案,但具有明确定义的Recipe数字。

SELECT  p.Recipe, 
        [Ingredient_v100] = CASE WHEN p.[100] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v100] = p.[100], 
        [Ingredient_v200] = CASE WHEN p.[200] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v200] = p.[200], 
        [Ingredient_v300] = CASE WHEN p.[300] IS NULL THEN NULL ELSE p.[Ingredient] END, [Percentage_v300] = p.[300]
FROM (  SELECT  r.Recipe, 
                r.VersionID, 
                r.Ingredient,
                r.Percentage 
        FROM    dbo.Recipe r
        WHERE   r.Recipe = @Recipe ) s
PIVOT ( MAX( s.Percentage )
        FOR s.VersionID IN ( [100], [200], [300] ) ) p
ORDER BY p.Recipe;
Run Code Online (Sandbox Code Playgroud)

可以按如下方式处理生成:

DECLARE @Piv            NVARCHAR( MAX ),
        @Col            NVARCHAR( MAX ),
        @Param          NVARCHAR( MAX ),
        @SQL            NVARCHAR( MAX ),
        @Recipe         INTEGER = 4000;

SELECT  @Piv = LEFT( b.Piv, LEN( b.Piv ) - 1 )
FROM (  SELECT  N'[' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '], '
        FROM (  SELECT  DISTINCT r.VersionID 
                FROM    dbo.Recipe r
                WHERE   Recipe = @Recipe ) a
        ORDER BY a.VersionID
        FOR XML PATH ( '' ) ) b ( Piv );

SELECT  @Col = LEFT( b.Piv, LEN( b.Piv ) - 1 )
FROM (  SELECT  N'[Ingredient_v' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] = CASE'
                + ' WHEN p.[' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] IS NULL THEN NULL'
                + ' ELSE p.[Ingredient] END, ' 
                + '[Percentage_v' + CONVERT( VARCHAR( 8 ), a.VersionID ) + '] = p.[' 
                    + CONVERT( VARCHAR( 8 ), a.VersionID ) + '], ' 
        FROM (  SELECT  DISTINCT r.VersionID 
                FROM    dbo.Recipe r
                WHERE   Recipe = @Recipe ) a
        ORDER BY a.VersionID
        FOR XML PATH ( '' ) ) b ( Piv );

SET @Param = N'@Recipe  INTEGER';

SET @SQL = N'
        SELECT  p.Recipe, ' + @Col + '
        FROM (  SELECT  r.Recipe, 
                        r.VersionID, 
                        r.Ingredient,
                        r.Percentage 
                FROM    dbo.Recipe r
                WHERE   r.Recipe = @Recipe ) s
        PIVOT ( MAX( s.Percentage )
                FOR s.VersionID IN ( ' + @Piv + ' ) ) p
        ORDER BY p.Recipe;';
EXECUTE dbo.sp_executesql @statement = @SQL, @param = @Param, @Recipe = @Recipe;
GO
Run Code Online (Sandbox Code Playgroud)

然后可以按配方访问结果,如这个SQL Fiddle这个所示。