I use various different ways to split strings by delimiter: using TStringList, string helper Split or System.StrUtils.SplitString, most of the time TStringList. To refactor and choose fastest one I did some test and also have custom split function.
I'm just looking for fastest, so the type of result is less important than the performance:
Interesting is that with short strings, pre-counting delimiters to correctly initialize array is faster, but when using longer strings, pre-counting slows down the performance.
UsingStringList
Short str: 509
Medium str: 1107
Long str: 3473
UsingSplitHelper
Short str: 293
Medium str: 692
Long str: 2116
UsingSplitString
Short str: 476
Medium str: 1413
Long str: 5909
winners:
* CustomSplitWithPrecount - count delimiters to initialize array
Short str: 178
Medium str: 474
Long str: 1659
* CustomSplitNoPrecount - no counting of delimiters, resize array at the end
Short str: 184
Medium str: 457
Long str: 1477
program Project1;
{$APPTYPE CONSOLE}
{$R *.res}
uses
System.SysUtils, System.StrUtils, System.Classes, System.Diagnostics, System.Generics.Collections, System.Generics.Defaults, System.Types;
const
cMaxLoop = 1000000;
cDelimiter : char = ',';
cShortStr : string = 'word,string,character';
cMediumStr : string = 'cat,dog,mouse,horse,pigeon,tiger,worm,lion,turtle,fish';
cLongStr : string = 'black,maroon,green,dark green,light green,olive,navy,purple,teal,silver,grey,red,ligh blue,dark blue,navy blue,cyan,grey,white,aqua,teal,silver,orange,violet,blue violet,dark red,deep pink,steel blue,sea blue,aquamarine,medium turquoise,violet,last colorX';
var
xStrList: TStringList;
xArray: TArray<string>;
xStrDynArray: TStringDynArray;
xSW: TStopWatch;
i: integer;
function UsingStringList(const aString: string; const aDelimiter: char): TStringList;
begin
Result := TStringList.Create;
Result.StrictDelimiter := True;
Result.Delimiter := aDelimiter;
Result.DelimitedText := aString;
end;
function UsingSplitHelper(const aString: string; const aDelimiter: char): TArray<string>;
begin
Result := aString.Split([aDelimiter]);
end;
function UsingSplitString(const aString: string; const aDelimiter: char): TStringDynArray;
begin
Result := System.StrUtils.SplitString(aString, aDelimiter);
end;
function CustomSplitWithPrecount(const aString: string; const aDelimiter: Char): TArray<string>;
var i, c: Integer;
vCurrPos, vCurrTokenStart: PChar;
begin
vCurrPos := PChar(aString);
// count delimiters to set array size
c := 0;
for i := 1 to aString.Length do
begin
if vCurrPos^ = aDelimiter then
Inc(c);
inc(vCurrPos);
end;
if c = 0 then
Exit // exit if no delimiters found
else
SetLength(Result, c + 1); // tokens = no of delimiters + 1
// parse
c := 0;
vCurrPos := PChar(aString);
vCurrTokenStart := vCurrPos;
for i := 1 to length(aString) do
begin
if vCurrPos^ = aDelimiter then
begin
// save Token
SetString(Result[c], vCurrTokenStart, vCurrPos - vCurrTokenStart);
inc(c);
inc(vCurrPos);
vCurrTokenStart := vCurrPos;
// stop looping string at last delimiter
if c = Length(Result) - 1 then
begin
Inc(vCurrPos, aString.Length - i);
Break;
end;
end
else
inc(vCurrPos);
end;
// get last token
SetString(Result[c], vCurrTokenStart, vCurrPos - vCurrTokenStart);
end;
function CustomSplitNoPrecount(const aString: string; const aDelimiter: Char): TArray<string>;
var i, c: Integer;
vCurrPos, vCurrTokenStart: PChar;
begin
// Preset array size to max
SetLength(Result, aString.Length);
if aString.Length = 0 then
Exit;
// parse
c := 0;
vCurrPos := PChar(aString);
vCurrTokenStart := vCurrPos;
for i := 1 to length(aString) do
begin
if vCurrPos^ = aDelimiter then
begin
// save Token
SetString(Result[c], vCurrTokenStart, vCurrPos - vCurrTokenStart);
inc(c);
inc(vCurrPos);
vCurrTokenStart := vCurrPos;
// stop looping string at last delimiter
if c = Length(Result) - 1 then
begin
Inc(vCurrPos, aString.Length - i);
Break;
end;
end
else
inc(vCurrPos);
end;
// get last token
SetString(Result[c], vCurrTokenStart, vCurrPos - vCurrTokenStart);
// re-set filan array size
SetLength(Result, c + 1);
end;
begin
Writeln('UsingStringList');
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
begin
xStrList := UsingStringList(cShortStr, cDelimiter);
xStrList.Free;
end;
Writeln('Short str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
begin
xStrList := UsingStringList(cMediumStr, cDelimiter);
xStrList.Free;
end;
Writeln('Medium str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
begin
xStrList := UsingStringList(cLongStr, cDelimiter);
xStrList.Free;
end;
Writeln('Long str: ' + xSW.ElapsedMilliseconds.ToString);
writeln;
Writeln('UsingSplitHelper');
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := UsingSplitHelper(cShortStr, cDelimiter);
Writeln('Short str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := UsingSplitHelper(cMediumStr, cDelimiter);
Writeln('Medium str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := UsingSplitHelper(cLongStr, cDelimiter);
Writeln('Long str: ' + xSW.ElapsedMilliseconds.ToString);
writeln;
Writeln('UsingSplitString');
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xStrDynArray := UsingSplitString(cShortStr, cDelimiter);
Writeln('Short str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xStrDynArray := UsingSplitString(cMediumStr, cDelimiter);
Writeln('Medium str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xStrDynArray := UsingSplitString(cLongStr, cDelimiter);
Writeln('Long str: ' + xSW.ElapsedMilliseconds.ToString);
writeln;
Writeln('CustomSplitWithPrecount');
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitWithPrecount(cShortStr, cDelimiter);
Writeln('Short str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitWithPrecount(cMediumStr, cDelimiter);
Writeln('Medium str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitWithPrecount(cLongStr, cDelimiter);
Writeln('Long str: ' + xSW.ElapsedMilliseconds.ToString);
writeln;
Writeln('CustomSplitNoPrecount');
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitNoPrecount(cShortStr, cDelimiter);
Writeln('Short str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitNoPrecount(cMediumStr, cDelimiter);
Writeln('Medium str: ' + xSW.ElapsedMilliseconds.ToString);
xSW := TStopWatch.StartNew;
for i := 1 to cMaxLoop do
xArray := CustomSplitNoPrecount(cLongStr, cDelimiter);
Writeln('Long str: ' + xSW.ElapsedMilliseconds.ToString);
readln;
end.
Anybody have example of faster function that splits strings by delimiter?