Jump to content

## Recommended Posts

6 minutes ago, Clément said:

I know﻿﻿ the test is about the conversion, but since "for in" is very slow, it might help straight things up a little extra bit

Should change all of the for ins to be fair

#### Share this post

##### Link to post

This has turned into a great thread for learning about speed improvements by simple tweaks.

Using Sets instead of Case

```function HexToBinMahdi5(const HexValue: string): string;

type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
Table3: array ['A' .. 'F'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
HexDigit: Char;
P: PChar4;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);

for var i: integer := low(HexValue) to high( HexValue ) do
begin
if HexValue[i] in [ '0' .. '9'] then
P^ := Table1[HexValue[i]]
else
if HexValue[i] in [ 'a' .. 'f'] then
P^ := Table2[HexValue[i]]
else
if HexValue[i] in [ 'A' .. 'F'] then
P^ := Table3[HexValue[i]]
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
Inc(P);
end;
end;```

Compiled 32bit Release

Pascal lookup (Heff): 4091
Pascal lookup (Mahdi): 4107
Pascal lookup (Mahdi 3 Table): 4264
Pascal lookup (Mahdi 3 Table Set): 3665
Pascal lookup (Mahdi 3 Table Set Local Var loop): 2951

Edited by Bernard
• 2

#### Share this post

##### Link to post
7 minutes ago, David Heffernan said:

Should change all of the for ins to be fair

The code:

```{\$APPTYPE CONSOLE}

uses
System.SysUtils,
System.Classes,
System.Diagnostics;

//********************************************
// Constants to tune the benchmark performance

const
HexStringLength = 64;
InputDataCount = 512;
IterationCount = 20000;

//*****************************************************************
// David Heffernan's routine, with Move replace by Int64 assignment

{\$DEFINE ReplaceMoveWithInt64Assign}
function HexToBinHeff(const HexValue: string): string;
const
BinaryValues: array [0..15] of string = (
'0000', '0001', '0010', '0011',
'0100', '0101', '0110', '0111',
'1000', '1001', '1010', '1011',
'1100', '1101', '1110', '1111'
);
var
HexDigit: Char;
HexDigitValue: Integer;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9':
HexDigitValue := Ord(HexDigit) - Ord('0');
'a'..'f':
HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
'A'..'F':
HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
{\$IFDEF ReplaceMoveWithInt64Assign}
PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
{\$ELSE}
Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
{\$ENDIF}
Inc(Ptr, 4);
end;
end;

function HexToBinHeff2(const HexValue: string): string;
const
BinaryValues: array [0..15] of string = (
'0000', '0001', '0010', '0011',
'0100', '0101', '0110', '0111',
'1000', '1001', '1010', '1011',
'1100', '1101', '1110', '1111'
);
var
HexDigit: Char;
HexDigitValue: Integer;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for var i : integer := low(HexValue) to High( HexValue ) do
begin
case HexValue[i] of
'0'..'9':
HexDigitValue := Ord(HexValue[i]) - Ord('0');
'a'..'f':
HexDigitValue := 10 + Ord(HexValue[i]) - Ord('a');
'A'..'F':
HexDigitValue := 10 + Ord(HexValue[i]) - Ord('A');
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
end;
{\$IFDEF ReplaceMoveWithInt64Assign}
PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
{\$ELSE}
Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
{\$ENDIF}
Inc(Ptr, 4);
end;
end;

//************************
// Mahdi Safsafi's routine

{\$DEFINE ReplaceMoveWithInt64Assign}

function HexToBinMahdi(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
HexDigit: Char;
P: PChar4;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0' .. '9':
P^ := Table1[HexDigit];
'a' .. 'f':
P^ := Table2[HexDigit];
'A' .. 'F':
P^ := Table2[Chr(Ord(HexDigit) xor \$20)];
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(P);
end;
end;

function HexToBinMahdi2(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
HexDigit: Char;
P: PChar4;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);
for var i: integer := low(HexValue) to high( HexValue ) do
begin
case HexValue[i] of
'0' .. '9':
P^ := Table1[HexValue[i]];
'a' .. 'f':
P^ := Table2[HexValue[i]];
'A' .. 'F':
P^ := Table2[Chr(Ord(HexValue[i]) xor \$20)];
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
end;
Inc(P);
end;
end;

{\$IFDEF CPUX86}
//***********************
// Kas Ob's asm32 version

procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
asm
push edi
mov edi,edx
//      Get the decimal value of one Hex Char (= half byte)
movzx   eax, HexChar
mov     ecx, 57
sub     ecx, eax
sar     ecx, 31
and     ecx, 39
neg     ecx
add     eax, ecx
add     eax,  - 48
//      Produce 4 Chars presenting 4 bits of HexChar
xor     ecx,ecx
mov     dx,\$1
test    al,4
cmovne  cx,dx
shl     ecx,16
test    al,8
cmovne  cx,dx
add     ecx,\$00300030
mov     [edi],ecx
xor     ecx,ecx
test    al,1
cmovne  cx,dx
shl     ecx,16
test    al,2
cmovne  cx,dx
add     ecx,\$00300030
mov     [edi+4],ecx
pop edi
end;

function HexToBinAsm32(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9','a'..'f','A'..'F':
CharToBin_ASM32(HexDigit, Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(Ptr, 4);
end;
end;

function HexToBinAsm322(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for var i: integer := low(HexValue) to High( HexValue ) do
begin
case HexValue[i] of
'0'..'9','a'..'f','A'..'F':
CharToBin_ASM32(HexValue[i], Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(Ptr, 4);
end;
end;

//*********************
// Kas Ob's xmm version

procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
const
DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = (\$01, \$02,\$00,\$00, \$04, \$08, \$00, \$00);
DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = (\$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF);
DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, \$80, 8, \$80, 2, \$80, 0, \$80, \$80, \$80, \$80, \$80, \$80, \$80, \$80, \$80);
asm
movzx   eax, HexChar
mov     ecx, 57
sub     ecx, eax
sar     ecx, 31
and     ecx, 39
neg     ecx
add     eax, ecx
add     eax,  - 48
//      Produce 4 Chars presenting 4 bits of HexChar
movd    xmm0, eax
pxor    xmm1, xmm1
movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
movdqu  xmm3, DEC_TO_BIN_WORD_MASK
movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
punpckldq xmm0, xmm0
packssdw xmm0, xmm0
pand    xmm0, xmm3
pcmpeqw xmm0, xmm1
psubw   xmm0, xmm2
PSHUFB  xmm0, xmm4                   // reverse the result
movq    qword ptr[HexBuffer], xmm0
end;

function HexToBinXmm(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9','a'..'f','A'..'F':
CharToBin_XMM(HexDigit, Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(Ptr, 4);
end;
end;

function HexToBinXmm2(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for var i : integer := low( HexValue ) to high( HexValue) do
begin
case HexValue[i] of
'0'..'9','a'..'f','A'..'F':
CharToBin_XMM(HexValue[i], Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
end;
Inc(Ptr, 4);
end;
end;

{\$ENDIF}

//***************
// Benchmark code

function RandomHexDigit: Char;
var
Ordinal: Integer;
begin
Ordinal := Random(16);
case Ordinal of
0..9:
Result := Chr(Ord('0') + Ordinal);
10..15:
Result := Chr(Ord('a') + Ordinal - 10);
else
raise Exception.Create('');
end;
end;

function RandomHexString(Length: Integer): string;
var
Index: Integer;
begin
SetLength(Result, Length);
for Index := 1 to Length do
Result[Index] := RandomHexDigit;
end;

procedure TestCorrectness;
var
Index: Integer;
HexStr, BinStr: string;
begin
for Index := 0 to \$fffff do
begin
HexStr := IntToHex(Index, 6);
BinStr := HexToBinHeff(HexStr);

if BinStr<>HexToBinMahdi(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinMahdi2(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinHeff2(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinAsm32(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinAsm322(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinXmm(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinXmm2(HexStr) then
raise Exception.Create('incorrect implementation');
end;
end;

procedure Benchmark;
var
Index, Iteration: Integer;
sw: TStopwatch;
HexStrings: TArray<string>;
binaryString: string;
begin
SetLength(HexStrings, InputDataCount);
for Index := 0 to InputDataCount-1 do
HexStrings[Index] := RandomHexString(HexStringLength);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinHeff(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Heff): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinMahdi(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);

{\$IFDEF CPUX86}
sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinAsm32(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('asm32: ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinXmm(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('xmm: ', sw.ElapsedMilliseconds);

{\$ENDIF}

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinHeff2(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Heff2): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinMahdi2(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Mahdi2): ', sw.ElapsedMilliseconds);

{\$IFDEF CPUX86}
sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinAsm322(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('asm32 2: ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinXmm2(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('xmm 2: ', sw.ElapsedMilliseconds);
{\$ENDIF}

end;

begin
try
Randomize;
TestCorrectness;
Benchmark;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
Readln;
end.```

The output:

Pascal lookup (Heff): 5416
Pascal lookup (Mahdi): 5097
asm32: 8073
xmm: 6325
Pascal lookup (Heff2): 4487
Pascal lookup (Mahdi2): 4179
asm32 2: 7260
xmm 2: 5658

#### Share this post

##### Link to post

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

```function HexToBinStefan(const HexValue: string): string;

// put the exception stuff into a subroutine to not pollute our routine
procedure Error(c: PChar; s: string);
begin
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [c^, s]);
end;

label
_Error;
type
TChar4 = array[0..3] of Char;
PChar4 = ^TChar4;
{\$POINTERMATH ON}
PInteger = ^Integer;
{\$POINTERMATH OFF}
const
Table: array[0..22] of TChar4 = (
'0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
'1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
HexDigit: PChar;
P: PChar4;
i, n: Cardinal;
begin
// do not use PChar cast because that causes a call to UStrToPWChar
// we don't need that special PChar to #0 when HexValue is empty
HexDigit := Pointer(HexValue);
if HexDigit = nil then Exit;

// we know that HexDigit is not nil so we can avoid the conditional jump from Length
// this also directly moves it into the correct register for the SetLength call
SetLength(Result, PInteger(HexDigit)[-1] * 4);
P := PChar4(Result);

for i := 1 to PInteger(HexDigit)[-1] do
begin
// subtract 48 to make '0'-'9' 0-9 which enables unconditionally downcasing any upper case char
// when we hit the #0 it will simply produce an invalid value for n that we will break on next
n := Cardinal(Integer(Ord(HexDigit^)) - 48) and not 32;

// avoid one check by simply subtracting 10 and checking the invalid range of 10-16
// thank you godbolt.org and amazingly optimizing c++ compilers for that idea! <3
if (Cardinal(Integer(n)-10) <= 6) or (n > 22) then goto _error;

P^ := Table[n];
Inc(P);
Inc(HexDigit);
end;
Exit;
_error:
Error(HexDigit, HexValue);
end;```

Edited by Stefan Glienke
• 2

#### Share this post

##### Link to post
19 minutes ago, Stefan Glienke said:

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

Nice one.

32bit release

Pascal lookup (Stefan): 1449

64bit release

Pascal lookup (Stefan): 1434

Quote

#### Share this post

##### Link to post
1 hour ago, Stefan Glienke said:

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

Very nicely done indeed.

Another update of my benchmark:

```{\$APPTYPE CONSOLE}

uses
System.SysUtils,
System.Classes,
System.Diagnostics;

//********************************************
// Constants to tune the benchmark performance

const
HexStringLength = 64;
InputDataCount = 512;
IterationCount = 20000;

//**************************
// David Heffernan's routine

function HexToBinHeffMove(const HexValue: string): string;
const
BinaryValues: array [0..15] of string = (
'0000', '0001', '0010', '0011',
'0100', '0101', '0110', '0111',
'1000', '1001', '1010', '1011',
'1100', '1101', '1110', '1111'
);
var
HexDigit: Char;
HexDigitValue: Integer;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9':
HexDigitValue := Ord(HexDigit) - Ord('0');
'a'..'f':
HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
'A'..'F':
HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
Inc(Ptr, 4);
end;
end;

//*****************************************************************
// David Heffernan's routine, with Move replace by Int64 assignment

function HexToBinHeffInt64Assign(const HexValue: string): string;
const
BinaryValues: array [0..15] of string = (
'0000', '0001', '0010', '0011',
'0100', '0101', '0110', '0111',
'1000', '1001', '1010', '1011',
'1100', '1101', '1110', '1111'
);
var
HexDigit: Char;
HexDigitValue: Integer;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9':
HexDigitValue := Ord(HexDigit) - Ord('0');
'a'..'f':
HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
'A'..'F':
HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
Inc(Ptr, 4);
end;
end;

//************************
// Mahdi Safsafi's routine

function HexToBinMahdi(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
HexDigit: Char;
P: PChar4;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0' .. '9':
P^ := Table1[HexDigit];
'a' .. 'f':
P^ := Table2[HexDigit];
'A' .. 'F':
P^ := Table2[Chr(Ord(HexDigit) xor \$20)];
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(P);
end;
end;

//************************************************
// Mahdi Safsafi's routine with a classic for loop

function HexToBinMahdiClassicFor(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
i, n: Integer;
HexDigit: PChar;
P: PChar4;
begin
n := Length(HexValue);
SetLength(Result, n * 4);
HexDigit := PChar(Pointer(HexValue));
P := PChar4(Result);
for i := 1 to n do
begin
case HexDigit^ of
'0' .. '9':
P^ := Table1[HexDigit^];
'a' .. 'f':
P^ := Table2[HexDigit^];
'A' .. 'F':
P^ := Table2[Chr(Ord(HexDigit^) xor \$20)];
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(P);
Inc(HexDigit);
end;
end;

//*****************
// Stefan's routine
function HexToBinStefan(const HexValue: string): string;
label
_error;
type
TChar4 = array [0..3] of Char;
PChar4 = ^TChar4;
const
Table: array [0..22] of TChar4 = (
'0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
'1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
len, i: Integer;
HexDigit: PChar;
P: PChar4;
n: Cardinal;
begin
len := Length(HexValue);
SetLength(Result, len * 4);
P := PChar4(Result);
HexDigit := @PChar(Pointer(HexValue));
for i := 1 to len do
begin
n := Cardinal((Ord(HexDigit^) - 48) and not 32);
{\$B+}
if ((n > 9) and (n < 15)) or (n > 22) then goto _error;
{\$B-}
P^ := Table[n];

Inc(P);
Inc(HexDigit);
end;
Exit;
_error:
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
end;

function HexToBinStefan2(const HexValue: string): string;
label
_error;
type
TChar4 = array [0..3] of Char;
PChar4 = ^TChar4;
const
Table: array [0..22] of TChar4 = (
'0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
'1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
len, i: Integer;
HexDigit: PChar;
P: PChar4;
n: Cardinal;
begin
len := Length(HexValue);
SetLength(Result, len * 4);
P := PChar4(Result);
HexDigit := @PChar(Pointer(HexValue));
for i := 1 to len do
begin
// subtract 48 to make '0'-'9' 0-9 which enables unconditionally downcasing any upper case char
n := Cardinal((Ord(HexDigit^) - 48) and not 32);

{\$B-} // avoid multiple conditional jumps by not using short eval

// avoid one check by simply subtracting 10 and checking the invalid range of 10-16
// thank you godbolt.org and amazingly optimizing c++ compilers for that idea! <3
if (Cardinal(Integer(n)-10) <= 6) or (n > 22) then goto _error;
{\$B-}

P^ := Table[n];

Inc(P);
Inc(HexDigit);
end;
Exit;
_error:
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
end;

{\$IFDEF CPUX86}
//***********************
// Kas Ob's asm32 version

procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
asm
push edi
mov edi,edx
//      Get the decimal value of one Hex Char (= half byte)
movzx   eax, HexChar
mov     ecx, 57
sub     ecx, eax
sar     ecx, 31
and     ecx, 39
neg     ecx
add     eax, ecx
add     eax,  - 48
//      Produce 4 Chars presenting 4 bits of HexChar
xor     ecx,ecx
mov     dx,\$1
test    al,4
cmovne  cx,dx
shl     ecx,16
test    al,8
cmovne  cx,dx
add     ecx,\$00300030
mov     [edi],ecx
xor     ecx,ecx
test    al,1
cmovne  cx,dx
shl     ecx,16
test    al,2
cmovne  cx,dx
add     ecx,\$00300030
mov     [edi+4],ecx
pop edi
end;

function HexToBinAsm32(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9','a'..'f','A'..'F':
CharToBin_ASM32(HexDigit, Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(Ptr, 4);
end;
end;

//*********************
// Kas Ob's xmm version

procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
const
DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = (\$01, \$02,\$00,\$00, \$04, \$08, \$00, \$00);
DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = (\$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF, \$FFCF);
DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, \$80, 8, \$80, 2, \$80, 0, \$80, \$80, \$80, \$80, \$80, \$80, \$80, \$80, \$80);
asm
movzx   eax, HexChar
mov     ecx, 57
sub     ecx, eax
sar     ecx, 31
and     ecx, 39
neg     ecx
add     eax, ecx
add     eax,  - 48
//      Produce 4 Chars presenting 4 bits of HexChar
movd    xmm0, eax
pxor    xmm1, xmm1
movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
movdqu  xmm3, DEC_TO_BIN_WORD_MASK
movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
punpckldq xmm0, xmm0
packssdw xmm0, xmm0
pand    xmm0, xmm3
pcmpeqw xmm0, xmm1
psubw   xmm0, xmm2
PSHUFB  xmm0, xmm4                   // reverse the result
movq    qword ptr[HexBuffer], xmm0
end;

function HexToBinXmm(const HexValue: string): string;
var
HexDigit: Char;
Ptr: PChar;
begin
SetLength(Result, Length(HexValue) * 4);
Ptr := Pointer(Result);
for HexDigit in HexValue do
begin
case HexDigit of
'0'..'9','a'..'f','A'..'F':
CharToBin_XMM(HexDigit, Ptr);
else
raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
end;
Inc(Ptr, 4);
end;
end;
{\$ENDIF}

//***************
// Benchmark code

function RandomHexDigit: Char;
var
Ordinal: Integer;
begin
Ordinal := Random(16);
case Ordinal of
0..9:
Result := Chr(Ord('0') + Ordinal);
10..15:
Result := Chr(Ord('a') + Ordinal - 10);
else
raise Exception.Create('');
end;
end;

function RandomHexString(Length: Integer): string;
var
Index: Integer;
begin
SetLength(Result, Length);
for Index := 1 to Length do
Result[Index] := RandomHexDigit;
end;

procedure TestCorrectness;
var
Index: Integer;
HexStr, BinStr: string;
begin
for Index := 0 to \$fffff do
begin
HexStr := IntToHex(Index, 6);
BinStr := HexToBinHeffMove(HexStr);

if BinStr<>HexToBinHeffInt64Assign(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinMahdi(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinMahdiClassicFor(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinStefan(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinStefan2(HexStr) then
raise Exception.Create('incorrect implementation');
{\$IFDEF CPUX86}
if BinStr<>HexToBinAsm32(HexStr) then
raise Exception.Create('incorrect implementation');
if BinStr<>HexToBinXmm(HexStr) then
raise Exception.Create('incorrect implementation');
{\$ENDIF}
end;
end;

procedure Benchmark;
var
Index, Iteration: Integer;
sw: TStopwatch;
HexStrings: TArray<string>;
binaryString: string;
begin
SetLength(HexStrings, InputDataCount);
for Index := 0 to InputDataCount-1 do
HexStrings[Index] := RandomHexString(HexStringLength);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinHeffMove(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Heff, move): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinHeffInt64Assign(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Heff, int64 assign): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinMahdi(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinMahdiClassicFor(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Mahdi, classic for): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinStefan(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Stefan): ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinStefan2(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('Pascal lookup (Stefan mk 2): ', sw.ElapsedMilliseconds);

{\$IFDEF CPUX86}
sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinAsm32(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('asm32: ', sw.ElapsedMilliseconds);

sw := TStopwatch.StartNew;
for Iteration := 0 to IterationCount-1 do
for Index := 0 to InputDataCount-1 do
begin
binaryString := HexToBinXmm(HexStrings[Index]);
binaryString := ''; // force a reallocation of the string every iteration of this loop
end;
Writeln('xmm: ', sw.ElapsedMilliseconds);
{\$ENDIF}
end;

begin
try
Randomize;
TestCorrectness;
Benchmark;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
Writeln('Completed');
Readln;
end.```

32 bit output:

```Pascal lookup (Heff, move): 4658
Pascal lookup (Heff, int64 assign): 3702
Pascal lookup (Mahdi): 3645
Pascal lookup (Mahdi, classic for): 2766
Pascal lookup (Stefan): 1024
Pascal lookup (Stefan mk 2): 805
asm32: 4165
xmm: 3978```

64 bit output:

```Pascal lookup (Heff, move): 4720
Pascal lookup (Heff, int64 assign): 3138
Pascal lookup (Mahdi): 3221
Pascal lookup (Mahdi, classic for): 2629
Pascal lookup (Stefan): 1180
Pascal lookup (Stefan mk 2): 909```

Edited by David Heffernan

#### Share this post

##### Link to post

While I don't question the timings I don't think these latest benchmarks are fair; The goalposts have been moved.

It seems the first solutions were aiming at optimizing conversion of a single hex digit and the later ones are optimized for strings. Some have had the iterators replaced by classic for and some haven't.

#### Share this post

##### Link to post
9 minutes ago, Anders Melander said:

I don't think these latest benchmarks are fair﻿ [...] some have had the iterators replaced by classic for and some haven't.

Well that is the optimizations that some people were aware of and some weren't - why would that be unfair?

P.S. What did I win? Joking aside - it's always interesting that different people see different things. And at the same time it's very sad that perfectly fine code will be like 3-4 times slower than hardcore optimized code simply because the compiler does not know about some things, does not do zero cost abstractions (*) and does not reorder code to make it better.

(*) I mean seriously - why do an LStrAsg on a string parameter - as if that would go invalid in the middle of the loop or what?! And because you cannot assign to a loop variable it should treat it exactly the same way as a moving PChar over the string.

Edited by Stefan Glienke
• 1

#### Share this post

##### Link to post
2 minutes ago, Stefan Glienke said:

Well that is the optimizations that some people were aware of and some weren't - why would that be unfair?

The iterators were David's code and was just part of the test bench.
As long as every solution uses the same test bench code then that doesn't matter much but when some solutions then replaces the test bench code to optimize the solution that invalidates the comparison.

#### Share this post

##### Link to post
1 hour ago, Anders Melander said:

It seems the first solutions were aiming at optimizing conversion of a single hex digit and the later ones are optimized for strings

It was always strings.

1 hour ago, Anders Melander said:

Some have had the iterators replaced by classic for and some haven't.

I've kept a variety of routines in there, that chart the evolution in this thread.

1 hour ago, Anders Melander said:

The iterators were David's code and was just part of the test bench.

Don't think so, because of the first point, which is that the question was about strings and not individual chars.

Edited by David Heffernan

#### Share this post

##### Link to post

I don't get this:

`{\$B-} // avoid multiple conditional jumps by not using short eval`

Actually \$B- disables the complete Boolean evaluation. It's a default setting, and it gets a best result in this case.

#### Share this post

##### Link to post
2 minutes ago, Kryvich said:

I don't get this:

```
{\$B-} // avoid multiple conditional jumps by not using short eval```

Actually \$B- disables the complete Boolean evaluation. It's a default setting, and it gets a best result in this case.

That was a typo that David copied - my first version had 3 checks in the loop where {\$B+} made it better, now with only 2 checks I don't need that anymore - see my post with the currently best version.

• 1

#### Share this post

##### Link to post

Partially SIMDed without trailing ... and it beat all 🙂

```const
{ Source Data Format : Imm8[1:0] }
DF_UNSIGNED_BYTES = 0;
DF_UNSIGNED_WORDS = 1;
DF_SIGNED_BYTES = 2;
DF_SIGNED_WORDS = 3;
{ Aggregation Operation : Imm8[3:2] }
AGGREGATION_OP_EQUAL_ANY = 0 shl 2;
AGGREGATION_OP_RANGES = 1 shl 2;
AGGREGATION_OP_EQUAL_EACH = 2 shl 2;
AGGREGATION_OP_EQUAL_ORDERED = 3 shl 2;
{ Polarity : Imm8[5:4] }
POLARITY_POSITIVE = 0 shl 4;
POLARITY_NEGATIVE = 1 shl 4;
POLARITY_MASKED_POSITIVE = 2 shl 4;
POLARITY_MASKED_NEGATIVE = 3 shl 4;
{ Output Selection : Imm8 }
OS_LSI = 0 shl 6;
OS_MSI = 1 shl 6;
OS_BIT_MASK = 0 shl 6;
OS_BYTE_WORD_MASK = 1 shl 6;

const
[Align(16)]
Range: array [0 .. 7] of Char = '09afAF' + #00;

function IsValidHex(P: Pointer): Boolean;
asm
movdqa    xmm1, [Range]
sub       eax, 16
@@SimdLoop:
add       eax, 16
movdqu    xmm2, [eax]
pcmpistri xmm1, xmm2, DF_UNSIGNED_WORDS or AGGREGATION_OP_RANGES or POLARITY_NEGATIVE
ja        @@SimdLoop
test      Word [eax + ecx *2], -1
setz      al
end;

function HexToBinMahdiOneShot(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table: array [0 .. 25] of TChar4 = ('0000', '1010', '1011', '1100', '1101', '1110', '1111', '0000', '0000', '0000', '0000', '0000', '0000', '0000', '0000',
'0000', '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
var
P: PChar4;
I, Len: Integer;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);
if IsValidHex(Pointer(HexValue)) then
begin
Len := Length(HexValue);
for I := 1 to Len do
begin
P^ := Table[Ord(HexValue[I]) and 31];
Inc(P);
end;
end
else
raise EConvertError.CreateFmt('Invalid hex : %s', [HexValue]);
end;```

Edited by Mahdi Safsafi
• 2

#### Share this post

##### Link to post
34 minutes ago, Mahdi Safsafi said:

Partially SIMDed without trailing ... and it beat all 🙂

Close one, nice! 😉

But I would be surprised if a simd loop would not beat it.

I am sure doing the entire thing with a simd loop would totally destroy any pure pascal solution.

Edited by Stefan Glienke

#### Share this post

##### Link to post

@Stefan Glienke SIMD are very powerful but they come with their issues too(portability, alignment for some instructions,... ). Some compilers have a great stuff to embarrasse those issues. But neither delphi compiler nor the RTL helps (we don't even have AVX) 😥 I really wish if at some point of time Delphi supports SIMD through intrinsics or some vector types.

• 2

#### Share this post

##### Link to post
5 hours ago, Mahdi Safsafi said:

Partially SIMDed without trailing ... and it beat all 🙂

```
const
{ Source Data Format : Imm8[1:0] }
DF_UNSIGNED_BYTES = 0;
DF_UNSIGNED_WORDS = 1;
DF_SIGNED_BYTES = 2;
DF_SIGNED_WORDS = 3;
{ Aggregation Operation : Imm8[3:2] }
AGGREGATION_OP_EQUAL_ANY = 0 shl 2;
AGGREGATION_OP_RANGES = 1 shl 2;
AGGREGATION_OP_EQUAL_EACH = 2 shl 2;
AGGREGATION_OP_EQUAL_ORDERED = 3 shl 2;
{ Polarity : Imm8[5:4] }
POLARITY_POSITIVE = 0 shl 4;
POLARITY_NEGATIVE = 1 shl 4;
POLARITY_MASKED_POSITIVE = 2 shl 4;
POLARITY_MASKED_NEGATIVE = 3 shl 4;
{ Output Selection : Imm8 }
OS_LSI = 0 shl 6;
OS_MSI = 1 shl 6;
OS_BIT_MASK = 0 shl 6;
OS_BYTE_WORD_MASK = 1 shl 6;

const
[Align(16)]
Range: array [0 .. 7] of Char = '09afAF' + #00;

function IsValidHex(P: Pointer): Boolean;
asm
movdqa    xmm1, [Range]
sub       eax, 16
@@SimdLoop:
add       eax, 16
movdqu    xmm2, [eax]
pcmpistri xmm1, xmm2, DF_UNSIGNED_WORDS or AGGREGATION_OP_RANGES or POLARITY_NEGATIVE
ja        @@SimdLoop
test      Word [eax + ecx *2], ax
setz      al
end;

function HexToBinMahdiOneShot(const HexValue: string): string;
type
TChar4 = array [0 .. 3] of Char;
PChar4 = ^TChar4;
const
Table: array [0 .. 25] of TChar4 = ('0000', '1010', '1011', '1100', '1101', '1110', '1111', '0000', '0000', '0000', '0000', '0000', '0000', '0000', '0000',
'0000', '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
var
P: PChar4;
I, Len: Integer;
begin
SetLength(Result, Length(HexValue) * 4);
P := PChar4(Result);
if IsValidHex(Pointer(HexValue)) then
begin
Len := Length(HexValue);
for I := 1 to Len do
begin
P^ := Table[Ord(HexValue[I]) and 31];
Inc(P);
end;
end
else
raise EConvertError.CreateFmt('Invalid hex : %s', [HexValue]);
end;﻿```

Wow, very impressive! And performance is pretty much the same for long hex strings (8096 chars) and short strings (6 chars):

cHexRange = 16777216; cHexLen = 6;

HexToBin2                             = 1243;

HexToBinMahdiOneShot        = 430;

Diff: HexToBinMahdiOneShot = -65%

cHexRange = 100000; cHexLen = 8096;

HexToBin2                             = 5020;

HexToBinMahdiOneShot        = 1753;

Diff: HexToBinMahdiOneShot =  -65%

#### Share this post

##### Link to post

@Mike Torrettinni it can be further optimized by using full simd for all the operation but unfortunately this requires using some instructions that are not supported by Delphi and also by some cpus.

#### Share this post

##### Link to post
8 minutes ago, Mahdi Safsafi said:

@Mike Torrettinni it can be further optimized by using full simd for all the operation but unfortunately this requires using some instructions that are not supported by Delphi and also by some cpus.

No need to go beyond Delphi's capabilities. This whole topic was great to follow, all this knowledge! 🙂

#### Share this post

##### Link to post

I agree! There are many awesome tweaking tips in this post!

I really love @Stefan Glienke's approach that strips down the pure Pascal to a minimalistic set of operations.

#### Share this post

##### Link to post

@Stefan Glienke The last (edited) version still has the "label" and does not yield on HexToBinStefan(#0). Not sure if this should be considered a bug or not.

#### Share this post

##### Link to post
11 hours ago, Mike Torrettinni said:

No need to go beyond Delphi's capabilities. This whole topic was great to follow, all this knowledge! 🙂

Of course ! I'm just going to underlay a serious problem. In the past, no one bothered himself to understand optimization (even great developer/company didn't) because CPUs were evolving so fast and each new generation beats the previous one by a large margin(people were just upgrading their CPUs and seeing performance x2,x3). But today we are reaching a dead point (Moore's Law) and the difference between a new generation and the previous isn't really significant ! So today the effort is jumping from CPUs to compilers/parallel programming. In the past few years, we have seeing the emergence of LLVM as a powerful compiler infrastructure and big whales started to communicate with each other more than ever.

In a nutshell, tomorrow problem is optimization. The way how many choose to deal with it, is improving compiler. And I really think that Delphi should join this race ASAP.

• 6

#### Share this post

##### Link to post
3 hours ago, Mahdi Safsafi said:

I really think that Delphi should join this race ASAP.

Especially since one of its selling points is "it compiles to native code" - if that native code is garbage for modern CPUs because its written like in the 90s that's kinda poor.

#### Share this post

##### Link to post
1 hour ago, Stefan Glienke said:

Especially since one of its selling points is "it compiles to native code" - if that native code is garbage for modern CPUs because its written like in the 90s that's kinda poor.

If only they'd developed a compiler that targeted .net.....

## Create an account or sign in to comment

You need to be a member in order to leave a comment

## Create an account

Sign up for a new account in our community. It's easy!

Register a new account

## Sign in

Already have an account? Sign in here.

Sign In Now

×