Jump to content

Recommended Posts

6 minutes ago, Clément said:

I know the test is about the conversion, but since "for in" is very slow, it might help straight things up a little extra bit

Should change all of the for ins to be fair

Share this post


Link to post

This has turned into a great thread for learning about speed improvements by simple tweaks.

 

Using Sets instead of Case
 

function HexToBinMahdi5(const HexValue: string): string;

type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
  Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
  Table3: array ['A' .. 'F'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
  HexDigit: Char;
  P: PChar4;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);

  for var i: integer := low(HexValue) to high( HexValue ) do
  begin
    if HexValue[i] in [ '0' .. '9'] then
        P^ := Table1[HexValue[i]]
    else
    if HexValue[i] in [ 'a' .. 'f'] then
        P^ := Table2[HexValue[i]]
    else
    if HexValue[i] in [ 'A' .. 'F'] then
        P^ := Table3[HexValue[i]]
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    Inc(P);
  end;
end;

Compiled 32bit Release

 

Pascal lookup (Heff): 4091
Pascal lookup (Mahdi): 4107
Pascal lookup (Mahdi 3 Table): 4264
Pascal lookup (Mahdi 3 Table Set): 3665
Pascal lookup (Mahdi 3 Table Set Local Var loop): 2951

 

 

 

 

 

 

Edited by Bernard
  • Like 2

Share this post


Link to post
7 minutes ago, David Heffernan said:

Should change all of the for ins to be fair

The code:

{$APPTYPE CONSOLE}

uses
  System.SysUtils,
  System.Classes,
  System.Diagnostics;

//********************************************
// Constants to tune the benchmark performance

const
  HexStringLength = 64;
  InputDataCount = 512;
  IterationCount = 20000;

//*****************************************************************
// David Heffernan's routine, with Move replace by Int64 assignment

{$DEFINE ReplaceMoveWithInt64Assign}
function HexToBinHeff(const HexValue: string): string;
const
  BinaryValues: array [0..15] of string = (
    '0000', '0001', '0010', '0011',
    '0100', '0101', '0110', '0111',
    '1000', '1001', '1010', '1011',
    '1100', '1101', '1110', '1111'
  );
var
  HexDigit: Char;
  HexDigitValue: Integer;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9':
      HexDigitValue := Ord(HexDigit) - Ord('0');
    'a'..'f':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
    'A'..'F':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
{$IFDEF ReplaceMoveWithInt64Assign}
    PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
{$ELSE}
    Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
{$ENDIF}
    Inc(Ptr, 4);
  end;
end;


function HexToBinHeff2(const HexValue: string): string;
const
  BinaryValues: array [0..15] of string = (
    '0000', '0001', '0010', '0011',
    '0100', '0101', '0110', '0111',
    '1000', '1001', '1010', '1011',
    '1100', '1101', '1110', '1111'
  );
var
  HexDigit: Char;
  HexDigitValue: Integer;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for var i : integer := low(HexValue) to High( HexValue ) do
  begin
    case HexValue[i] of
    '0'..'9':
      HexDigitValue := Ord(HexValue[i]) - Ord('0');
    'a'..'f':
      HexDigitValue := 10 + Ord(HexValue[i]) - Ord('a');
    'A'..'F':
      HexDigitValue := 10 + Ord(HexValue[i]) - Ord('A');
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
    end;
{$IFDEF ReplaceMoveWithInt64Assign}
    PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
{$ELSE}
    Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
{$ENDIF}
    Inc(Ptr, 4);
  end;
end;


//************************
// Mahdi Safsafi's routine

{$DEFINE ReplaceMoveWithInt64Assign}

function HexToBinMahdi(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
  Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
  HexDigit: Char;
  P: PChar4;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
      '0' .. '9':
        P^ := Table1[HexDigit];
      'a' .. 'f':
        P^ := Table2[HexDigit];
      'A' .. 'F':
        P^ := Table2[Chr(Ord(HexDigit) xor $20)];
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(P);
  end;
end;

function HexToBinMahdi2(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
  Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
  HexDigit: Char;
  P: PChar4;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);
  for var i: integer := low(HexValue) to high( HexValue ) do
  begin
    case HexValue[i] of
      '0' .. '9':
        P^ := Table1[HexValue[i]];
      'a' .. 'f':
        P^ := Table2[HexValue[i]];
      'A' .. 'F':
        P^ := Table2[Chr(Ord(HexValue[i]) xor $20)];
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
    end;
    Inc(P);
  end;
end;

{$IFDEF CPUX86}
//***********************
// Kas Ob's asm32 version

procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
asm
        push edi
        mov edi,edx
        //      Get the decimal value of one Hex Char (= half byte)
        movzx   eax, HexChar
        mov     ecx, 57
        sub     ecx, eax
        sar     ecx, 31
        and     ecx, 39
        neg     ecx
        add     eax, ecx
        add     eax,  - 48
        //      Produce 4 Chars presenting 4 bits of HexChar
        xor     ecx,ecx
        mov     dx,$1
        test    al,4
        cmovne  cx,dx
        shl     ecx,16
        test    al,8
        cmovne  cx,dx
        add     ecx,$00300030
        mov     [edi],ecx
        xor     ecx,ecx
        test    al,1
        cmovne  cx,dx
        shl     ecx,16
        test    al,2
        cmovne  cx,dx
        add     ecx,$00300030
        mov     [edi+4],ecx
        pop edi
end;

function HexToBinAsm32(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_ASM32(HexDigit, Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;

function HexToBinAsm322(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for var i: integer := low(HexValue) to High( HexValue ) do
  begin
    case HexValue[i] of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_ASM32(HexValue[i], Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;

//*********************
// Kas Ob's xmm version

procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
const
  DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = ($01, $02,$00,$00, $04, $08, $00, $00);
  DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = ($FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF);
  DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, $80, 8, $80, 2, $80, 0, $80, $80, $80, $80, $80, $80, $80, $80, $80);
asm
        movzx   eax, HexChar
        mov     ecx, 57
        sub     ecx, eax
        sar     ecx, 31
        and     ecx, 39
        neg     ecx
        add     eax, ecx
        add     eax,  - 48
        //      Produce 4 Chars presenting 4 bits of HexChar
        movd    xmm0, eax
        pxor    xmm1, xmm1
        movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
        movdqu  xmm3, DEC_TO_BIN_WORD_MASK
        movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
        punpckldq xmm0, xmm0
        packssdw xmm0, xmm0
        pand    xmm0, xmm3
        pcmpeqw xmm0, xmm1
        psubw   xmm0, xmm2
        PSHUFB  xmm0, xmm4                   // reverse the result
        movq    qword ptr[HexBuffer], xmm0
end;

function HexToBinXmm(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_XMM(HexDigit, Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;

function HexToBinXmm2(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for var i : integer := low( HexValue ) to high( HexValue) do
  begin
    case HexValue[i] of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_XMM(HexValue[i], Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexValue[i], HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;

{$ENDIF}

//***************
// Benchmark code

function RandomHexDigit: Char;
var
  Ordinal: Integer;
begin
  Ordinal := Random(16);
  case Ordinal of
  0..9:
    Result := Chr(Ord('0') + Ordinal);
  10..15:
    Result := Chr(Ord('a') + Ordinal - 10);
  else
    raise Exception.Create('');
  end;
end;

function RandomHexString(Length: Integer): string;
var
  Index: Integer;
begin
  SetLength(Result, Length);
  for Index := 1 to Length do
    Result[Index] := RandomHexDigit;
end;

procedure TestCorrectness;
var
  Index: Integer;
  HexStr, BinStr: string;
begin
  for Index := 0 to $fffff do
  begin
    HexStr := IntToHex(Index, 6);
    BinStr := HexToBinHeff(HexStr);

    if BinStr<>HexToBinMahdi(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinMahdi2(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinHeff2(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinAsm32(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinAsm322(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinXmm(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinXmm2(HexStr) then
      raise Exception.Create('incorrect implementation');
  end;
end;

procedure Benchmark;
var
  Index, Iteration: Integer;
  sw: TStopwatch;
  HexStrings: TArray<string>;
  binaryString: string;
begin
  SetLength(HexStrings, InputDataCount);
  for Index := 0 to InputDataCount-1 do
    HexStrings[Index] := RandomHexString(HexStringLength);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinHeff(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Heff): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinMahdi(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);

{$IFDEF CPUX86}
  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinAsm32(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('asm32: ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinXmm(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('xmm: ', sw.ElapsedMilliseconds);


{$ENDIF}


  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinHeff2(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Heff2): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinMahdi2(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Mahdi2): ', sw.ElapsedMilliseconds);



{$IFDEF CPUX86}
  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinAsm322(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('asm32 2: ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinXmm2(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('xmm 2: ', sw.ElapsedMilliseconds);
{$ENDIF}


end;

begin
  try
    Randomize;
    TestCorrectness;
    Benchmark;
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  Readln;
end.

The output:

Pascal lookup (Heff): 5416
Pascal lookup (Mahdi): 5097
asm32: 8073
xmm: 6325
Pascal lookup (Heff2): 4487
Pascal lookup (Mahdi2): 4179
asm32 2: 7260
xmm 2: 5658

 

Share this post


Link to post

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

 

function HexToBinStefan(const HexValue: string): string;

  // put the exception stuff into a subroutine to not pollute our routine
  procedure Error(c: PChar; s: string);
  begin
    raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [c^, s]);
  end;

label
  _Error;
type
  TChar4 = array[0..3] of Char;
  PChar4 = ^TChar4;
  {$POINTERMATH ON}
  PInteger = ^Integer;
  {$POINTERMATH OFF}
const
  Table: array[0..22] of TChar4 = (
   '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
   'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
   '1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
  HexDigit: PChar;
  P: PChar4;
  i, n: Cardinal;
begin
  // do not use PChar cast because that causes a call to UStrToPWChar
  // we don't need that special PChar to #0 when HexValue is empty
  HexDigit := Pointer(HexValue);
  if HexDigit = nil then Exit;

  // we know that HexDigit is not nil so we can avoid the conditional jump from Length
  // this also directly moves it into the correct register for the SetLength call
  SetLength(Result, PInteger(HexDigit)[-1] * 4);
  P := PChar4(Result);

  for i := 1 to PInteger(HexDigit)[-1] do
  begin
    // subtract 48 to make '0'-'9' 0-9 which enables unconditionally downcasing any upper case char
    // when we hit the #0 it will simply produce an invalid value for n that we will break on next
    n := Cardinal(Integer(Ord(HexDigit^)) - 48) and not 32;

    // avoid one check by simply subtracting 10 and checking the invalid range of 10-16
    // thank you godbolt.org and amazingly optimizing c++ compilers for that idea! <3
    if (Cardinal(Integer(n)-10) <= 6) or (n > 22) then goto _error;

    P^ := Table[n];
    Inc(P);
    Inc(HexDigit);
  end;
  Exit;
_error:
  Error(HexDigit, HexValue);
end;

 

Edited by Stefan Glienke
  • Like 2

Share this post


Link to post
19 minutes ago, Stefan Glienke said:

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

Nice one.

 

32bit release

Pascal lookup (Stefan): 1449
 

 

64bit release

Pascal lookup (Stefan): 1434

 

Quote

 

Share this post


Link to post
1 hour ago, Stefan Glienke said:

"Get your conditional jumps and error handling garbage outta my hot loop, kay?"

Very nicely done indeed.

 

Another update of my benchmark:

{$APPTYPE CONSOLE}

uses
  System.SysUtils,
  System.Classes,
  System.Diagnostics;

//********************************************
// Constants to tune the benchmark performance

const
  HexStringLength = 64;
  InputDataCount = 512;
  IterationCount = 20000;

//**************************
// David Heffernan's routine

function HexToBinHeffMove(const HexValue: string): string;
const
  BinaryValues: array [0..15] of string = (
    '0000', '0001', '0010', '0011',
    '0100', '0101', '0110', '0111',
    '1000', '1001', '1010', '1011',
    '1100', '1101', '1110', '1111'
  );
var
  HexDigit: Char;
  HexDigitValue: Integer;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9':
      HexDigitValue := Ord(HexDigit) - Ord('0');
    'a'..'f':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
    'A'..'F':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
    Inc(Ptr, 4);
  end;
end;

//*****************************************************************
// David Heffernan's routine, with Move replace by Int64 assignment

function HexToBinHeffInt64Assign(const HexValue: string): string;
const
  BinaryValues: array [0..15] of string = (
    '0000', '0001', '0010', '0011',
    '0100', '0101', '0110', '0111',
    '1000', '1001', '1010', '1011',
    '1100', '1101', '1110', '1111'
  );
var
  HexDigit: Char;
  HexDigitValue: Integer;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9':
      HexDigitValue := Ord(HexDigit) - Ord('0');
    'a'..'f':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
    'A'..'F':
      HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
    Inc(Ptr, 4);
  end;
end;

//************************
// Mahdi Safsafi's routine

function HexToBinMahdi(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
  Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
  HexDigit: Char;
  P: PChar4;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
      '0' .. '9':
        P^ := Table1[HexDigit];
      'a' .. 'f':
        P^ := Table2[HexDigit];
      'A' .. 'F':
        P^ := Table2[Chr(Ord(HexDigit) xor $20)];
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(P);
  end;
end;

//************************************************
// Mahdi Safsafi's routine with a classic for loop

function HexToBinMahdiClassicFor(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
  Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
var
  i, n: Integer;
  HexDigit: PChar;
  P: PChar4;
begin
  n := Length(HexValue);
  SetLength(Result, n * 4);
  HexDigit := PChar(Pointer(HexValue));
  P := PChar4(Result);
  for i := 1 to n do
  begin
    case HexDigit^ of
      '0' .. '9':
        P^ := Table1[HexDigit^];
      'a' .. 'f':
        P^ := Table2[HexDigit^];
      'A' .. 'F':
        P^ := Table2[Chr(Ord(HexDigit^) xor $20)];
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(P);
    Inc(HexDigit);
  end;
end;

//*****************
// Stefan's routine
function HexToBinStefan(const HexValue: string): string;
label
  _error;
type
  TChar4 = array [0..3] of Char;
  PChar4 = ^TChar4;
const
  Table: array [0..22] of TChar4 = (
   '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
   'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
   '1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
  len, i: Integer;
  HexDigit: PChar;
  P: PChar4;
  n: Cardinal;
begin
  len := Length(HexValue);
  SetLength(Result, len * 4);
  P := PChar4(Result);
  HexDigit := @PChar(Pointer(HexValue))[0];
  for i := 1 to len do
  begin
    n := Cardinal((Ord(HexDigit^) - 48) and not 32);
    {$B+}
    if ((n > 9) and (n < 15)) or (n > 22) then goto _error;
    {$B-}
    P^ := Table[n];

    Inc(P);
    Inc(HexDigit);
  end;
  Exit;
_error:
  raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
end;

function HexToBinStefan2(const HexValue: string): string;
label
  _error;
type
  TChar4 = array [0..3] of Char;
  PChar4 = ^TChar4;
const
  Table: array [0..22] of TChar4 = (
   '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
   'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
   '1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
var
  len, i: Integer;
  HexDigit: PChar;
  P: PChar4;
  n: Cardinal;
begin
  len := Length(HexValue);
  SetLength(Result, len * 4);
  P := PChar4(Result);
  HexDigit := @PChar(Pointer(HexValue))[0];
  for i := 1 to len do
  begin
    // subtract 48 to make '0'-'9' 0-9 which enables unconditionally downcasing any upper case char
    n := Cardinal((Ord(HexDigit^) - 48) and not 32);

    {$B-} // avoid multiple conditional jumps by not using short eval

    // avoid one check by simply subtracting 10 and checking the invalid range of 10-16
    // thank you godbolt.org and amazingly optimizing c++ compilers for that idea! <3
    if (Cardinal(Integer(n)-10) <= 6) or (n > 22) then goto _error;
    {$B-}

    P^ := Table[n];

    Inc(P);
    Inc(HexDigit);
  end;
  Exit;
_error:
  raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
end;

{$IFDEF CPUX86}
//***********************
// Kas Ob's asm32 version

procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
asm
        push edi
        mov edi,edx
        //      Get the decimal value of one Hex Char (= half byte)
        movzx   eax, HexChar
        mov     ecx, 57
        sub     ecx, eax
        sar     ecx, 31
        and     ecx, 39
        neg     ecx
        add     eax, ecx
        add     eax,  - 48
        //      Produce 4 Chars presenting 4 bits of HexChar
        xor     ecx,ecx
        mov     dx,$1
        test    al,4
        cmovne  cx,dx
        shl     ecx,16
        test    al,8
        cmovne  cx,dx
        add     ecx,$00300030
        mov     [edi],ecx
        xor     ecx,ecx
        test    al,1
        cmovne  cx,dx
        shl     ecx,16
        test    al,2
        cmovne  cx,dx
        add     ecx,$00300030
        mov     [edi+4],ecx
        pop edi
end;

function HexToBinAsm32(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_ASM32(HexDigit, Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;

//*********************
// Kas Ob's xmm version

procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
const
  DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = ($01, $02,$00,$00, $04, $08, $00, $00);
  DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = ($FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF);
  DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, $80, 8, $80, 2, $80, 0, $80, $80, $80, $80, $80, $80, $80, $80, $80);
asm
        movzx   eax, HexChar
        mov     ecx, 57
        sub     ecx, eax
        sar     ecx, 31
        and     ecx, 39
        neg     ecx
        add     eax, ecx
        add     eax,  - 48
        //      Produce 4 Chars presenting 4 bits of HexChar
        movd    xmm0, eax
        pxor    xmm1, xmm1
        movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
        movdqu  xmm3, DEC_TO_BIN_WORD_MASK
        movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
        punpckldq xmm0, xmm0
        packssdw xmm0, xmm0
        pand    xmm0, xmm3
        pcmpeqw xmm0, xmm1
        psubw   xmm0, xmm2
        PSHUFB  xmm0, xmm4                   // reverse the result
        movq    qword ptr[HexBuffer], xmm0
end;

function HexToBinXmm(const HexValue: string): string;
var
  HexDigit: Char;
  Ptr: PChar;
begin
  SetLength(Result, Length(HexValue) * 4);
  Ptr := Pointer(Result);
  for HexDigit in HexValue do
  begin
    case HexDigit of
    '0'..'9','a'..'f','A'..'F':
      CharToBin_XMM(HexDigit, Ptr);
    else
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
    end;
    Inc(Ptr, 4);
  end;
end;
{$ENDIF}

//***************
// Benchmark code

function RandomHexDigit: Char;
var
  Ordinal: Integer;
begin
  Ordinal := Random(16);
  case Ordinal of
  0..9:
    Result := Chr(Ord('0') + Ordinal);
  10..15:
    Result := Chr(Ord('a') + Ordinal - 10);
  else
    raise Exception.Create('');
  end;
end;

function RandomHexString(Length: Integer): string;
var
  Index: Integer;
begin
  SetLength(Result, Length);
  for Index := 1 to Length do
    Result[Index] := RandomHexDigit;
end;

procedure TestCorrectness;
var
  Index: Integer;
  HexStr, BinStr: string;
begin
  for Index := 0 to $fffff do
  begin
    HexStr := IntToHex(Index, 6);
    BinStr := HexToBinHeffMove(HexStr);

    if BinStr<>HexToBinHeffInt64Assign(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinMahdi(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinMahdiClassicFor(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinStefan(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinStefan2(HexStr) then
      raise Exception.Create('incorrect implementation');
{$IFDEF CPUX86}
    if BinStr<>HexToBinAsm32(HexStr) then
      raise Exception.Create('incorrect implementation');
    if BinStr<>HexToBinXmm(HexStr) then
      raise Exception.Create('incorrect implementation');
{$ENDIF}
  end;
end;

procedure Benchmark;
var
  Index, Iteration: Integer;
  sw: TStopwatch;
  HexStrings: TArray<string>;
  binaryString: string;
begin
  SetLength(HexStrings, InputDataCount);
  for Index := 0 to InputDataCount-1 do
    HexStrings[Index] := RandomHexString(HexStringLength);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinHeffMove(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Heff, move): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinHeffInt64Assign(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Heff, int64 assign): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinMahdi(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinMahdiClassicFor(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Mahdi, classic for): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinStefan(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Stefan): ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinStefan2(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('Pascal lookup (Stefan mk 2): ', sw.ElapsedMilliseconds);

{$IFDEF CPUX86}
  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinAsm32(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('asm32: ', sw.ElapsedMilliseconds);

  sw := TStopwatch.StartNew;
  for Iteration := 0 to IterationCount-1 do
    for Index := 0 to InputDataCount-1 do
    begin
      binaryString := HexToBinXmm(HexStrings[Index]);
      binaryString := ''; // force a reallocation of the string every iteration of this loop
    end;
  Writeln('xmm: ', sw.ElapsedMilliseconds);
{$ENDIF}
end;

begin
  try
    Randomize;
    TestCorrectness;
    Benchmark;
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  Writeln('Completed');
  Readln;
end.

32 bit output:

Pascal lookup (Heff, move): 4658
Pascal lookup (Heff, int64 assign): 3702
Pascal lookup (Mahdi): 3645
Pascal lookup (Mahdi, classic for): 2766
Pascal lookup (Stefan): 1024
Pascal lookup (Stefan mk 2): 805
asm32: 4165
xmm: 3978

64 bit output:

Pascal lookup (Heff, move): 4720
Pascal lookup (Heff, int64 assign): 3138
Pascal lookup (Mahdi): 3221
Pascal lookup (Mahdi, classic for): 2629
Pascal lookup (Stefan): 1180
Pascal lookup (Stefan mk 2): 909

 

Edited by David Heffernan

Share this post


Link to post

While I don't question the timings I don't think these latest benchmarks are fair; The goalposts have been moved.

It seems the first solutions were aiming at optimizing conversion of a single hex digit and the later ones are optimized for strings. Some have had the iterators replaced by classic for and some haven't.

Share this post


Link to post
9 minutes ago, Anders Melander said:

I don't think these latest benchmarks are fair [...] some have had the iterators replaced by classic for and some haven't.

Well that is the optimizations that some people were aware of and some weren't - why would that be unfair?

 

P.S. What did I win? :classic_tongue: Joking aside - it's always interesting that different people see different things. And at the same time it's very sad that perfectly fine code will be like 3-4 times slower than hardcore optimized code simply because the compiler does not know about some things, does not do zero cost abstractions (*) and does not reorder code to make it better.

 

(*) I mean seriously - why do an LStrAsg on a string parameter - as if that would go invalid in the middle of the loop or what?! And because you cannot assign to a loop variable it should treat it exactly the same way as a moving PChar over the string.

Edited by Stefan Glienke
  • Like 1

Share this post


Link to post
2 minutes ago, Stefan Glienke said:

Well that is the optimizations that some people were aware of and some weren't - why would that be unfair?

The iterators were David's code and was just part of the test bench.
As long as every solution uses the same test bench code then that doesn't matter much but when some solutions then replaces the test bench code to optimize the solution that invalidates the comparison.

Share this post


Link to post
1 hour ago, Anders Melander said:

It seems the first solutions were aiming at optimizing conversion of a single hex digit and the later ones are optimized for strings

It was always strings.

 

1 hour ago, Anders Melander said:

Some have had the iterators replaced by classic for and some haven't.

I've kept a variety of routines in there, that chart the evolution in this thread.

 

1 hour ago, Anders Melander said:

The iterators were David's code and was just part of the test bench.

Don't think so, because of the first point, which is that the question was about strings and not individual chars.

 

Edited by David Heffernan

Share this post


Link to post

I don't get this:

{$B-} // avoid multiple conditional jumps by not using short eval

Actually $B- disables the complete Boolean evaluation. It's a default setting, and it gets a best result in this case.

Share this post


Link to post
2 minutes ago, Kryvich said:

I don't get this:


{$B-} // avoid multiple conditional jumps by not using short eval

Actually $B- disables the complete Boolean evaluation. It's a default setting, and it gets a best result in this case.

That was a typo that David copied - my first version had 3 checks in the loop where {$B+} made it better, now with only 2 checks I don't need that anymore - see my post with the currently best version.

  • Like 1

Share this post


Link to post

Partially SIMDed without trailing ... and it beat all 🙂

const
  { Source Data Format : Imm8[1:0] }
  DF_UNSIGNED_BYTES = 0;
  DF_UNSIGNED_WORDS = 1;
  DF_SIGNED_BYTES = 2;
  DF_SIGNED_WORDS = 3;
  { Aggregation Operation : Imm8[3:2] }
  AGGREGATION_OP_EQUAL_ANY = 0 shl 2;
  AGGREGATION_OP_RANGES = 1 shl 2;
  AGGREGATION_OP_EQUAL_EACH = 2 shl 2;
  AGGREGATION_OP_EQUAL_ORDERED = 3 shl 2;
  { Polarity : Imm8[5:4] }
  POLARITY_POSITIVE = 0 shl 4;
  POLARITY_NEGATIVE = 1 shl 4;
  POLARITY_MASKED_POSITIVE = 2 shl 4;
  POLARITY_MASKED_NEGATIVE = 3 shl 4;
  { Output Selection : Imm8[6] }
  OS_LSI = 0 shl 6;
  OS_MSI = 1 shl 6;
  OS_BIT_MASK = 0 shl 6;
  OS_BYTE_WORD_MASK = 1 shl 6;

const
  [Align(16)]
  Range: array [0 .. 7] of Char = '09afAF' + #00;

function IsValidHex(P: Pointer): Boolean;
asm
  movdqa    xmm1, [Range]
  sub       eax, 16
@@SimdLoop:
  add       eax, 16
  movdqu    xmm2, [eax]
  pcmpistri xmm1, xmm2, DF_UNSIGNED_WORDS or AGGREGATION_OP_RANGES or POLARITY_NEGATIVE
  ja        @@SimdLoop
  test      Word [eax + ecx *2], -1
  setz      al
end;

function HexToBinMahdiOneShot(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table: array [0 .. 25] of TChar4 = ('0000', '1010', '1011', '1100', '1101', '1110', '1111', '0000', '0000', '0000', '0000', '0000', '0000', '0000', '0000',
    '0000', '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
var
  P: PChar4;
  I, Len: Integer;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);
  if IsValidHex(Pointer(HexValue)) then
  begin
    Len := Length(HexValue);
    for I := 1 to Len do
    begin
      P^ := Table[Ord(HexValue[I]) and 31];
      Inc(P);
    end;
  end
  else
    raise EConvertError.CreateFmt('Invalid hex : %s', [HexValue]);
end;

 

Edited by Mahdi Safsafi
  • Like 2

Share this post


Link to post
34 minutes ago, Mahdi Safsafi said:

Partially SIMDed without trailing ... and it beat all 🙂

Close one, nice! 😉

 

But I would be surprised if a simd loop would not beat it.

I am sure doing the entire thing with a simd loop would totally destroy any pure pascal solution.

Edited by Stefan Glienke

Share this post


Link to post

@Stefan Glienke SIMD are very powerful but they come with their issues too(portability, alignment for some instructions,... ). Some compilers have a great stuff to embarrasse those issues. But neither delphi compiler nor the RTL helps (we don't even have AVX) 😥 I really wish if at some point of time Delphi supports SIMD through intrinsics or some vector types.

  • Like 2

Share this post


Link to post
5 hours ago, Mahdi Safsafi said:

Partially SIMDed without trailing ... and it beat all 🙂


const
  { Source Data Format : Imm8[1:0] }
  DF_UNSIGNED_BYTES = 0;
  DF_UNSIGNED_WORDS = 1;
  DF_SIGNED_BYTES = 2;
  DF_SIGNED_WORDS = 3;
  { Aggregation Operation : Imm8[3:2] }
  AGGREGATION_OP_EQUAL_ANY = 0 shl 2;
  AGGREGATION_OP_RANGES = 1 shl 2;
  AGGREGATION_OP_EQUAL_EACH = 2 shl 2;
  AGGREGATION_OP_EQUAL_ORDERED = 3 shl 2;
  { Polarity : Imm8[5:4] }
  POLARITY_POSITIVE = 0 shl 4;
  POLARITY_NEGATIVE = 1 shl 4;
  POLARITY_MASKED_POSITIVE = 2 shl 4;
  POLARITY_MASKED_NEGATIVE = 3 shl 4;
  { Output Selection : Imm8[6] }
  OS_LSI = 0 shl 6;
  OS_MSI = 1 shl 6;
  OS_BIT_MASK = 0 shl 6;
  OS_BYTE_WORD_MASK = 1 shl 6;

const
  [Align(16)]
  Range: array [0 .. 7] of Char = '09afAF' + #00;

function IsValidHex(P: Pointer): Boolean;
asm
  movdqa    xmm1, [Range]
  sub       eax, 16
@@SimdLoop:
  add       eax, 16
  movdqu    xmm2, [eax]
  pcmpistri xmm1, xmm2, DF_UNSIGNED_WORDS or AGGREGATION_OP_RANGES or POLARITY_NEGATIVE
  ja        @@SimdLoop
  test      Word [eax + ecx *2], ax
  setz      al
end;

function HexToBinMahdiOneShot(const HexValue: string): string;
type
  TChar4 = array [0 .. 3] of Char;
  PChar4 = ^TChar4;
const
  Table: array [0 .. 25] of TChar4 = ('0000', '1010', '1011', '1100', '1101', '1110', '1111', '0000', '0000', '0000', '0000', '0000', '0000', '0000', '0000',
    '0000', '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
var
  P: PChar4;
  I, Len: Integer;
begin
  SetLength(Result, Length(HexValue) * 4);
  P := PChar4(Result);
  if IsValidHex(Pointer(HexValue)) then
  begin
    Len := Length(HexValue);
    for I := 1 to Len do
    begin
      P^ := Table[Ord(HexValue[I]) and 31];
      Inc(P);
    end;
  end
  else
    raise EConvertError.CreateFmt('Invalid hex : %s', [HexValue]);
end;

 

Wow, very impressive! And performance is pretty much the same for long hex strings (8096 chars) and short strings (6 chars):

 

cHexRange = 16777216; cHexLen = 6;

HexToBin2                             = 1243;

HexToBinMahdiOneShot        = 430;

Diff: HexToBinMahdiOneShot = -65%

 

cHexRange = 100000; cHexLen = 8096;

HexToBin2                             = 5020;

HexToBinMahdiOneShot        = 1753;

Diff: HexToBinMahdiOneShot =  -65%

 

 

Share this post


Link to post
8 minutes ago, Mahdi Safsafi said:

@Mike Torrettinni it can be further optimized by using full simd for all the operation but unfortunately this requires using some instructions that are not supported by Delphi and also by some cpus.

No need to go beyond Delphi's capabilities. This whole topic was great to follow, all this knowledge! 🙂

Share this post


Link to post

I agree! There are many awesome tweaking tips in this post!

 I really love @Stefan Glienke's approach that strips down the pure Pascal to a minimalistic set of operations.
 

 

Share this post


Link to post
2 hours ago, Attila Kovacs said:

does not yield on HexToBinStefan(#0)

Fair enough - guess I have to go back to the version with the label.

Share this post


Link to post
11 hours ago, Mike Torrettinni said:

No need to go beyond Delphi's capabilities. This whole topic was great to follow, all this knowledge! 🙂

Of course ! I'm just going to underlay a serious problem. In the past, no one bothered himself to understand optimization (even great developer/company didn't) because CPUs were evolving so fast and each new generation beats the previous one by a large margin(people were just upgrading their CPUs and seeing performance x2,x3). But today we are reaching a dead point (Moore's Law) and the difference between a new generation and the previous isn't really significant ! So today the effort is jumping from CPUs to compilers/parallel programming. In the past few years, we have seeing the emergence of LLVM as a powerful compiler infrastructure and big whales started to communicate with each other more than ever.

In a nutshell, tomorrow problem is optimization. The way how many choose to deal with it, is improving compiler. And I really think that Delphi should join this race ASAP. 

  • Like 6

Share this post


Link to post
3 hours ago, Mahdi Safsafi said:

I really think that Delphi should join this race ASAP. 

Especially since one of its selling points is "it compiles to native code" - if that native code is garbage for modern CPUs because its written like in the 90s that's kinda poor.

Share this post


Link to post
1 hour ago, Stefan Glienke said:

Especially since one of its selling points is "it compiles to native code" - if that native code is garbage for modern CPUs because its written like in the 90s that's kinda poor.

If only they'd developed a compiler that targeted .net..... 

Share this post


Link to post

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

×