SQL Server相当于MySQL中的substring_index函数

Ank*_*kit 7 mysql porting sql-server-2012

我正在尝试将查询从MySQL移植到SQL SERVER 2012.

如何为MySQL的substring_index()编写等效项?

MySQL SUBSTRING_INDEX()在指定的分隔符出现次数之前返回给定字符串的子字符串.

SUBSTRING_INDEX(str,delim,count)

SELECT SUBSTRING_INDEX('www.somewebsite.com','.',2);
Run Code Online (Sandbox Code Playgroud)

输出: 'www.somewebsite'

Sco*_*ner 7

我最近需要这个,所以我写了以下存储函数。最后是一系列测试,以确保它的运行与 MySql 函数完全相同(在运行相同的测试后,预期结果是从 MySql 复制的):

-- Function to reproduce the useful functionality of SUBSTRING_INDEX from MySql
CREATE FUNCTION dbo.SUBSTRING_INDEX(@InString  NVARCHAR(Max),
                                    @Delimiter NVARCHAR(Max),
                                    @Count     INT)
RETURNS NVARCHAR(200)
AS
BEGIN
    DECLARE @Pos INT;
    DECLARE @DelimiterOffsets TABLE
    (
         i      INT IDENTITY(1, 1) NOT NULL,
         offset INT NOT NULL
    );

    -- If @Count is zero, we return '' as per spec
    IF @Count = 0
    BEGIN
        RETURN '';
    END;

    DECLARE @OrigLength      INT = LEN(@InString);
    DECLARE @DelimiterLength INT = LEN(@Delimiter);

    -- Prime the pump.
    SET @Pos = Charindex(@Delimiter, @InString, 1);

    -- If the delimiter does not exist in @InString, return the whole string
    IF @Pos = 0
    BEGIN
        RETURN @InString;
    END;

    -- Put all delimiter offsets into @DelimiterOffsets, they get numbered automatically.
    DECLARE @CurrentOffset INT = 0;
    WHILE @Pos > 0
    BEGIN
        SET @CurrentOffset = @Pos;

        INSERT INTO @DelimiterOffsets
                    (offset)
             VALUES (@CurrentOffset);

        SET @Pos = Charindex(@Delimiter, @InString, @CurrentOffset + @DelimiterLength);
    END;

    -- This number is guaranteed to be > 0.
    DECLARE @DelimitersFound INT = (SELECT Count(*) FROM @DelimiterOffsets);

    -- If they requested more delimiters than were found, return the whole string, as per spec.
    IF Abs(@Count) > @DelimitersFound
    BEGIN
        RETURN @InString;
    END;

    DECLARE @StartSubstring INT = 0;
    DECLARE @EndSubstring   INT = @OrigLength;

    -- OK, now return the part they requested
    IF @Count > 0
    BEGIN
        SET @EndSubstring = (SELECT offset 
                               FROM @DelimiterOffsets 
                              WHERE i = @Count);
    END
    ELSE
    BEGIN
        SET @StartSubstring = (SELECT offset + @DelimiterLength 
                                 FROM @DelimiterOffsets 
                                WHERE i = (@DelimitersFound + @Count + 1));
    END;

    RETURN Substring(@InString, @StartSubstring, @EndSubstring);
END; 

Go 

GRANT EXECUTE ON [dbo].SUBSTRING_INDEX TO PUBLIC;

-- Tests
DECLARE @TestResults TABLE (i int, answer nVarChar(MAX), expected nVarChar(MAX));

insert into @TestResults
select * from  
(
    (SELECT  1 as i, [dbo].SUBSTRING_INDEX(N'www.somewebsite.com', N'.', 2)    as r, 'www.somewebsite'     as e) UNION
    (SELECT  2 as i, [dbo].SUBSTRING_INDEX(N'www.yahoo.com', N'.', 2)          as r, 'www.yahoo'           as e) UNION
    (SELECT  3 as i, [dbo].SUBSTRING_INDEX(N'www.outlook.com', N'.', 2)        as r, 'www.outlook'         as e) UNION
    (SELECT  4 as i, [dbo].SUBSTRING_INDEX(N'www.somewebsite.com', N'.', -2)   as r, 'somewebsite.com'     as e) UNION
    (SELECT  5 as i, [dbo].SUBSTRING_INDEX(N'www.yahoo.com', N'.', -2)         as r, 'yahoo.com'           as e) UNION
    (SELECT  6 as i, [dbo].SUBSTRING_INDEX(N'www.outlook.com', N'.', -2)       as r, 'outlook.com'         as e) UNION
    (select  7 as i, [dbo].SUBSTRING_INDEX('hi.you.com','.',2)                 as r, 'hi.you'              as e) UNION
    (select  8 as i, [dbo].SUBSTRING_INDEX('hi.you.com','.',-1)                as r, 'com'                 as e) UNION
    (select  9 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',1)                   as r, 'pr'                  as e) UNION
    (select 10 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',-1)                  as r, 'ba'                  as e) UNION
    (select 11 as i, [dbo].SUBSTRING_INDEX(N'prueba','ue',0)                   as r, ''                    as e) UNION
    (SELECT 12 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', 2)  as r, 'wwwxxxoutlook'       as e) UNION
    (SELECT 13 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', -2) as r, 'outlookxxxcom'       as e) UNION
    (SELECT 14 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', 5)  as r, 'wwwxxxoutlookxxxcom' as e) UNION
    (SELECT 15 as i, [dbo].SUBSTRING_INDEX(N'wwwxxxoutlookxxxcom', N'xxx', -5) as r, 'wwwxxxoutlookxxxcom' as e)
) as results;

select tr.i,
       tr.answer,
       tr.expected,
       CASE WHEN tr.answer = tr.expected THEN 'Test Succeeded' ELSE 'Test Failed' END testState
  from @TestResults tr
 order by i;
Run Code Online (Sandbox Code Playgroud)

这是一个受Bogdan Sahlean的答案启发的版本,使用 SQL Server 的 XML 功能进行解析和组合:

CREATE FUNCTION dbo.SUBSTRING_INDEX(@InString  NVARCHAR(Max),
                                    @Delimiter NVARCHAR(Max),
                                    @Count     INT)
RETURNS NVARCHAR(200)
AS
BEGIN
    -- If @Count is zero, we return '' as per spec
    IF @Count = 0
    BEGIN
        RETURN '';
    END;

    -- First we let the XML parser break up the string by @Delimiter.
    -- Each parsed value will be <piece>[text]</piece>.
    DECLARE @XmlSourceString XML = (select N'<piece>' + REPLACE( (SELECT @InString AS '*' FOR XML PATH('')) , @Delimiter, N'</piece><piece>' ) + N'</piece>');

    -- This will contain the final requested string.
    DECLARE @Results nVarChar(MAX);

    ;WITH Pieces(RowNumber, Piece) as
    (
        -- Take each node in @XmlSourceString, and return it with row numbers
        -- which will identify each piece and give us a handle to change the
        -- order, depending on the direction of search.
        SELECT  row_number() over(order by x.XmlCol) as RowNumber,
                @Delimiter + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(MAX)') AS '*'
          FROM  @XmlSourceString.nodes(N'(piece)') x(XmlCol)
    ), orderedPieces(RowNumber, Piece) as
    (
        -- Order the pieces normally or reversed depending on whether they want
        -- the first @Count pieces or the last @Count pieces.
        select TOP (ABS(@Count)) 
               RowNumber, 
               Piece
          from Pieces
         ORDER BY CASE WHEN @Count < 0 THEN RowNumber END DESC ,
                  CASE WHEN @Count > 0 THEN RowNumber END ASC
    ), combinedPieces(result) as
    (
        -- Now combine the pieces back together, ordering them by
        -- the original order.  There will always
        -- be an extra @Delimiter on the front of the string.
        select CAST(Piece AS VARCHAR(100))
          from OrderedPieces
         order by RowNumber
           FOR XML PATH(N'')
    )
    -- Finally, strip off the extra delimiter using STUFF and store the string in @Results.
    select @Results = STUFF(result, 1, LEN(@Delimiter), '') from combinedPieces;

    return @Results;
END;
Run Code Online (Sandbox Code Playgroud)

运行测试会产生以下结果:

i  answer              expected             testState
1  www.somewebsite     www.somewebsite      Test Succeeded
2  www.yahoo           www.yahoo            Test Succeeded
3  www.outlook         www.outlook          Test Succeeded
4  somewebsite.com     somewebsite.com      Test Succeeded
5  yahoo.com           yahoo.com            Test Succeeded
6  outlook.com         outlook.com          Test Succeeded
7  hi.you              hi.you               Test Succeeded
8  com                 com                  Test Succeeded
9  pr                  pr                   Test Succeeded
10 ba                  ba                   Test Succeeded
11                                          Test Succeeded
12 wwwxxxoutlook       wwwxxxoutlook        Test Succeeded
13 outlookxxxcom       outlookxxxcom        Test Succeeded
14 wwwxxxoutlookxxxcom wwwxxxoutlookxxxcom  Test Succeeded
15 wwwxxxoutlookxxxcom wwwxxxoutlookxxxcom  Test Succeeded
Run Code Online (Sandbox Code Playgroud)


Bog*_*ean 6

尝试基于T-SQL和XQuery((root/row)[position() <= sql:variable("@count")])的此解决方案:

T-SQL标量函数:

CREATE FUNCTION dbo.SUBSTRING_INDEX
(
    @str NVARCHAR(4000),
    @delim NVARCHAR(1),
    @count INT
)
RETURNS NVARCHAR(4000)
WITH SCHEMABINDING
BEGIN
    DECLARE @XmlSourceString XML;
    SET @XmlSourceString = (SELECT N'<root><row>' + REPLACE( (SELECT @str AS '*' FOR XML PATH('')) , @delim, N'</row><row>' ) + N'</row></root>');

    RETURN STUFF
    (
        ((
            SELECT  @delim + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(4000)') AS '*'
            FROM    @XmlSourceString.nodes(N'(root/row)[position() <= sql:variable("@count")]') x(XmlCol)
            FOR XML PATH(N''), TYPE
        ).value(N'.', N'NVARCHAR(4000)')), 
        1, 1, N''
    );
END
GO

SELECT dbo.SUBSTRING_INDEX(N'www.somewebsite.com', N'.', 2) AS Result;
Run Code Online (Sandbox Code Playgroud)

输出:

/*
Result
---------------
www.somewebsite
*/
Run Code Online (Sandbox Code Playgroud)

要么

TSQL内联表值函数:

CREATE FUNCTION dbo.SUBSTRING_INDEX
(
    @str NVARCHAR(4000),
    @delim NVARCHAR(1),
    @count INT
)
RETURNS TABLE
AS 
RETURN
    WITH Base
    AS 
    (
        SELECT XmlSourceString = CONVERT(XML, (SELECT N'<root><row>' + REPLACE( (SELECT @str AS '*' FOR XML PATH('')) , @delim, N'</row><row>' ) + N'</row></root>'))
    )   
    SELECT STUFF
    (
        ((
            SELECT  @delim + x.XmlCol.value(N'(text())[1]', N'NVARCHAR(4000)') AS '*'
            FROM    Base b 
            CROSS APPLY b.XmlSourceString.nodes(N'(root/row)[position() <= sql:variable("@count")]') x(XmlCol)
            FOR XML PATH(N''), TYPE
        ).value(N'.', N'NVARCHAR(4000)')), 
        1, 1, N''
    ) AS Result;
GO

SELECT  *
FROM    (
    SELECT N'www.somewebsite.com' UNION ALL 
    SELECT N'www.yahoo.com' UNION ALL 
    SELECT N'www.outlook.com'
) a(Value)
CROSS APPLY dbo.SUBSTRING_INDEX(a.Value, N'.', 2) b;
Run Code Online (Sandbox Code Playgroud)

输出:

/*
Value               Result
------------------- ---------------
www.somewebsite.com www.somewebsite
www.yahoo.com       www.yahoo
www.outlook.com     www.outlook
*/
Run Code Online (Sandbox Code Playgroud)

  • 此解决方案不完整,因为它不支持@count为负数(从字符串的末尾而不是从开头搜索). (2认同)