如何设置派生列转换以获取 SSIS 2014 中的行号?

gak*_*ife 4 ssis

我正在尝试导入一个参差不齐的右侧平面文件。我想将文件作为一列导入并使用派生列转换为每一行添加一个行号。我想以名为 RowNum 的列和名为 EntireRow 的列结束。我该怎么做呢?

bil*_*nkc 10

派生列将无法添加行号。好吧,它可以添加一个名为 RowNum 的列,但表达式语言不支持动态更新值的能力。相反,您需要一个脚本组件。

您可以搜索“SSIS 生成代理键”并找到大量参考实现。我要借用 Joost 的帖子Create a Row Id for this answer

// C# code: surrogate key script
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;

[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
    // New internal variable to store the rownumber
    private int rowCounter = 0;

    // Method that will be started for each record in you dataflow  
    public override void Input0_ProcessInputRow(Input0Buffer Row)
    {
        // Seed counter
        rowCounter++;

        // Fill the new column
        Row.RowNum = rowCounter;
    }
}
Run Code Online (Sandbox Code Playgroud)

对于 2005 年的人来说,这种方法看起来像

Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper

<Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute> _
<CLSCompliant(False)> _
Public Class ScriptMain
    Inherits UserComponent

    ' New internal variable to store the rownumber
    Private rowCounter As Integer = 0

    ' Method that will be started for each record in you dataflow   
    Public Overrides Sub Input0_ProcessInputRow(ByVal Row As Input0Buffer)
        'Seed counter
        rowCounter = rowCounter + 1

        ' Fill the new column
        Row.RowNum = rowCounter
    End Sub
End Class
Run Code Online (Sandbox Code Playgroud)

因为我喜欢提供基于 Biml 的答案,所以你可以使用以下代码,同样来自 Joost Creating BIML Script Component Transformation (rownumber)

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
 <Annotations>
  <Annotation>
   File: Script Component Transformation RowNumber.biml
   Description: Example of using the Script Component as
   a transformation to add a rownumber to the destination.
   Note: Example has an OLE DB Destination that supports
   an identity column. Use your own Flat File, Excel or
   PDW destination that doesn't supports an identity.
   VS2012 BIDS Helper 1.6.6.0
   By Joost van Rossum http://microsoft-ssis.blogspot.com
  </Annotation>
 </Annotations>

 <!--Package connection managers-->
    <Connections>
            <OleDbConnection
                Name="Source"
                ConnectionString="Data Source=.;Initial Catalog=ssisjoostS;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;">
            </OleDbConnection>
            <OleDbConnection
                Name="Destination"
                ConnectionString="Data Source=.;Initial Catalog=ssisjoostD;Provider=SQLNCLI11.1;Integrated Security=SSPI;Auto Translate=False;">
            </OleDbConnection>
       </Connections>

       <ScriptProjects>
             <ScriptComponentProject ProjectCoreName="sc_c253bef215bf4d6b85dbe3919c35c167.csproj" Name="SCR - Rownumber">
                    <AssemblyReferences>
                           <AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSPipelineWrap" />
                           <AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSRuntimeWrap" />
                           <AssemblyReference AssemblyPath="Microsoft.SqlServer.PipelineHost" />
                           <AssemblyReference AssemblyPath="Microsoft.SqlServer.TxScript" />
                           <AssemblyReference AssemblyPath="System.dll" />
                           <AssemblyReference AssemblyPath="System.AddIn.dll" />
                           <AssemblyReference AssemblyPath="System.Data.dll" />
                           <AssemblyReference AssemblyPath="System.Xml.dll" />
                    </AssemblyReferences>
                    <ReadOnlyVariables>
                           <Variable VariableName="maxrownumber" Namespace="User" DataType="Int32"></Variable>
                    </ReadOnlyVariables>
                    <Files>
       <!-- Left alignment of .Net script to get a neat layout in package-->
                           <File Path="AssemblyInfo.cs">
using System.Reflection;
using System.Runtime.CompilerServices;

//
// General Information about an assembly is controlled through the following 
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
//
[assembly: AssemblyTitle("SC_977e21e288ea4faaaa4e6b2ad2cd125d")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("SSISJoost")]
[assembly: AssemblyProduct("SC_977e21e288ea4faaaa4e6b2ad2cd125d")]
[assembly: AssemblyCopyright("Copyright @ SSISJoost 2015")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
//
// Version information for an assembly consists of the following four values:
//
//      Major Version
//      Minor Version 
//      Build Number
//      Revision
//
// You can specify all the values or you can default the Revision and Build Numbers 
// by using the '*' as shown below:

[assembly: AssemblyVersion("1.0.*")]
                           </File>
       <!-- Replaced greater/less than by &gt; and &lt; -->
                           <File Path="main.cs">#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
#endregion

/// &lt;summary&gt;
/// Rownumber transformation to create an identity column
/// &lt;/summary&gt;
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
 int rownumber = 0;

 /// &lt;summary&gt;
 /// Get max rownumber from variable
 /// &lt;/summary&gt;
 public override void PreExecute()
 {
  rownumber = this.Variables.maxrownumber;
 }

 /// &lt;summary&gt;
 /// Increase rownumber and fill rownumber column
 /// &lt;/summary&gt;
 /// &lt;param name="Row"&gt;The row that is currently passing through the component&lt;/param&gt;
 public override void Input0_ProcessInputRow(Input0Buffer Row)
 {
  rownumber++;
  Row.rownumber = rownumber;
 }
}
                           </File>
                    </Files>
                    <InputBuffer Name="Input0">
                           <Columns>
                           </Columns>
                    </InputBuffer>
                    <OutputBuffers>
                           <OutputBuffer Name="Output0">
                                  <Columns>
                                        <Column Name="rownumber" DataType="Int32"></Column>
                                  </Columns> 
                           </OutputBuffer>
                    </OutputBuffers>
             </ScriptComponentProject>
       </ScriptProjects>

       <Packages>
             <!--A query to get all tables from a certain database and loop through that collection-->
             <# string sConn = @"Provider=SQLNCLI11.1;Server=.;Initial Catalog=ssisjoostS;Integrated Security=SSPI;";#>
             <# string sSQL  = "SELECT name as TableName FROM dbo.sysobjects where xtype = 'U' and category = 0 ORDER BY name";#>
             <# DataTable tblAllTables = ExternalDataAccess.GetDataTable(sConn,sSQL);#>
             <# foreach (DataRow row in tblAllTables.Rows) { #>

             <!--Create a package for each table and use the tablename in the packagename-->
             <Package ProtectionLevel="DontSaveSensitive" ConstraintMode="Parallel" AutoCreateConfigurationsType="None" Name="ssisjoost_<#=row["TableName"]#>"> 
                    <Variables>
                           <Variable Name="maxrownumber" DataType="Int32">0</Variable>
                    </Variables>

                    <!--The tasks of my control flow: get max rownumber and a data flow task-->
                    <Tasks>
                    <!--Execute SQL Task to get max rownumber from destination-->
                    <ExecuteSQL
                           Name="SQL - Get max rownumber <#=row["TableName"]#>"
                           ConnectionName="Destination"
                           ResultSet="SingleRow">
                           <DirectInput>SELECT ISNULL(max([rownumber]),0) as maxrownumber FROM  <#=row["TableName"]#></DirectInput>
                           <Results> 
                           <Result Name="0" VariableName="User.maxrownumber" /> 
                           </Results> 
                    </ExecuteSQL>

                    <!--Data Flow Task to fill the destination table-->
                    <Dataflow Name="DFT - Process <#=row["TableName"]#>">
                    <!--Connect it to the preceding Execute SQL Task-->
                    <PrecedenceConstraints>
                           <Inputs>
                                  <Input OutputPathName="SQL - Get max rownumber <#=row["TableName"]#>.Output"></Input>
                           </Inputs>
                    </PrecedenceConstraints>

                    <Transformations>
                    <!--My source with dynamic, but ugly * which could be replace by some .NET/SQL code retrieving the columnnames-->
                    <OleDbSource Name="OLE_SRC - <#=row["TableName"]#>" ConnectionName="Source">
                           <DirectInput>SELECT * FROM <#=row["TableName"]#></DirectInput>
                    </OleDbSource>

                    <ScriptComponentTransformation Name="SCR - Rownumber">
                           <ScriptComponentProjectReference ScriptComponentProjectName="SCR - Rownumber" />
                    </ScriptComponentTransformation>

                    <!--My destination with no column mapping because all source columns exist in destination table-->                       
                    <OleDbDestination Name="OLE_DST - <#=row["TableName"]#>" ConnectionName="Destination">
                           <ExternalTableOutput Table="<#=row["TableName"]#>"></ExternalTableOutput>
                    </OleDbDestination>
                    </Transformations>
                    </Dataflow>
                    </Tasks>
             </Package>
             <# } #>
       </Packages>
       </Biml>

<!--Includes/Imports for C#-->
<#@ template language="C#" hostspecific="true"#>
<#@ import namespace="System.Data"#>
<#@ import namespace="System.Data.SqlClient"#>
Run Code Online (Sandbox Code Playgroud)