Skip to content Skip to sidebar Skip to footer

Sql Split Function That Handles String With Delimeter Appearing Between Text Qualifiers?

There are several SQL split functions, from loop driven, to using xml commands, and even using a numbers table. I haven't found one that supports text qualifiers. Using the exampl

Solution 1:

Here is my solution:

CREATEFUNCTION fnSplitString
(
    @input nvarchar(MAX) 
)
RETURNS@emailsTABLE
(
    email nvarchar(MAX) 
)
ASBEGINDECLARE@lenint= LEN(@input)
DECLARE@posint=1;
DECLARE@startint=1;
DECLARE@ignore bit =0;
WHILE(@pos<=@len)
BEGINDECLARE@chnchar(1) =SUBSTRING(@input, @pos, 1);

    IF ( @ch='"'or@ch='''')
    BEGINSET@ignore=1-@ignore;
    END

    IF (@ch=','AND@ignore=0)
    BEGININSERT@emailsVALUES (SUBSTRING(@input, @start, @pos-@start));
        SET@start=@pos+1;
    ENDSET@pos=@pos+1;
END

IF (@start<>@pos)
BEGININSERT@emailsVALUES (SUBSTRING(@input, @start, @pos-@start));
ENDRETURNEND
GO

DECLARE@input nvarchar(max) ='jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>';

select*from fnSplitString(@input)

Solution 2:

CREATEFUNCTION [dbo].[udfSplit]
(
    @nvcString nvarchar(max),
    @nvcDelimiter nvarchar(1),
    @nvcTQ nvarchar(1)
)
RETURNS@tblTokensTABLE (
                            Token nvarchar(max)
                            )
ASBEGINDECLARE@intCounterintDECLARE@nvcToken nvarchar(4000)
    DECLARE@nvcCurrentChar nvarchar(1)
    DECLARE@intStartint

    IF @nvcString<>''BEGINSET@intCounter=1SET@nvcToken=''SET@intStart=0--Loop through each character of the string
            WHILE @intCounter<= LEN(@nvcString)
                BEGINSET@nvcCurrentChar=SUBSTRING(@nvcString, @intCounter, 1)

                    --If current char is TQ
                    IF @nvcCurrentChar=@nvcTQBEGIN--Concatonate to tokenSET@nvcToken=@nvcToken+@nvcCurrentChar--If this is the end TQ
                            IF @intStart<>0BEGIN--Fix TQSET@nvcToken= dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                                    IF @nvcToken<>''BEGININSERTINTO@tblTokens (Token) VALUES (@nvcToken)
                                            SET@nvcToken=''END--Reset TQSET@intStart=0ENDELSEBEGINSET@nvcToken= dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                                    IF @nvcToken<>''BEGININSERTINTO@tblTokens (Token) VALUES (@nvcToken)
                                            SET@nvcToken=''END--Mark TQ start positionSET@intStart=@intCounterENDENDELSE IF @intStart=0AND@nvcCurrentChar=@nvcDelimiterBEGIN--If not inside TQ, and char is DelimiterSET@nvcToken= dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                            IF @nvcToken<>''BEGININSERTINTO@tblTokens (Token) VALUES (@nvcToken)
                                    SET@nvcToken=''ENDENDELSEBEGIN--Current char is not TQ or Delim, add to current tokenSET@nvcToken=@nvcToken+@nvcCurrentCharENDSET@intCounter=@intCounter+1ENDENDSET@nvcToken= dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

    IF @nvcToken<>''BEGIN--Current Token has not been added to tableINSERTINTO@tblTokens (Token) VALUES (@nvcToken)
        ENDRETURNEND

GO


CREATEFUNCTION [dbo].[udfRemoveTQFromToken]
(
    @nvcToken nvarchar(4000),
    @nvcTQ nvarchar(1)
)
RETURNS nvarchar(4000) ASBEGINDECLARE@nvcReturn nvarchar(4000)

    --Trim token, needs to be done first, --as we dont want to trim any spaces within the TQ--unless it was malformedSET@nvcReturn= LTRIM(RTRIM(@nvcToken))

    --If Left char is TQ
    IF LEFT(@nvcReturn, 1) =@nvcTQBEGIN--Though both cases perform the removal of the left most char (opening TQ)--We need to perform a trim after removal ONLY if it was malformed
            IF RIGHT(@nvcReturn, 1) <>@nvcTQBEGIN--But no matching end TQ, malformed--fix by removing left most char (the opening TQ)SET@nvcReturn=RIGHT(@nvcReturn, LEN(@nvcReturn) -1)
                    --Reapply the LTRIM, incase there were spaces after the opening TQSET@nvcReturn= LTRIM(@nvcReturn)
                ENDELSEBEGIN--has matching end TQ, well-formed--fix by removing left most char (the opening TQ)SET@nvcReturn=RIGHT(@nvcReturn, LEN(@nvcReturn) -1)
                ENDEND--Remove the right most char (the closing TQ)
    IF RIGHT(@nvcReturn, 1) =@nvcTQSET@nvcReturn=LEFT(@nvcReturn, LEN(@nvcReturn) -1)

    RETURN@nvcReturnEND

Solution 3:

This is a quick solution, and it is less than perfect, it has no stack, so it will treat the comma inside the quotes as the delimiter.

alterfunction fnSplit
(
    @Delimchar(1),
    @List nvarchar(4000)
)
returnstableasreturnwith 
    Strings(PosIdx) as 
    (
        select1unionallselect PosIdx +1from Strings where PosIdx <4000
    )
    select
        ltrim(rtrim(substring(@List, PosIdx, charindex(@Delim, @List+@Delim, PosIdx) - PosIdx))) asvaluefrom   
        Strings
    where  
        PosIdx <=convert(int, len(@List))
    andsubstring(@Delim+@List, PosIdx, 1) =@Delim 
go
select*from fnSplit(',', 'jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>') 
    option (maxrecursion 0)

Post a Comment for "Sql Split Function That Handles String With Delimeter Appearing Between Text Qualifiers?"