Jump to content

David Heffernan

Members
  • Content Count

    3711
  • Joined

  • Last visited

  • Days Won

    185

Posts posted by David Heffernan


  1. 8 hours ago, Vandrovnik said:

    In OpenGL, he can work with raster graphics really fast and easy - he just loads them as textures.

    But writing text and printing will not be so easy, if they are needed.

    This seems like choosing a library, and then trying to work out how to fit that library to the problem. That's the wrong way round. One should first understand the problem space, its requirements and constraints, and then find the best solution. 

     

    Problem first, then solution. Not the other way round. 


  2. 4 minutes ago, Alberto Fornés said:

    Years ago I was testing various possibilities to display points, lines, polygons, etc. on a map, with various libraries and OpenGL was the one who did it the fastest, perhaps showing text found worse performance.

    OpenGL doesn't seem like an obvious choice for 2D graphics on Windows. 


  3. 1 hour ago, Stefan Glienke said:

    Especially since one of its selling points is "it compiles to native code" - if that native code is garbage for modern CPUs because its written like in the 90s that's kinda poor.

    If only they'd developed a compiler that targeted .net..... 


  4. 1 hour ago, Anders Melander said:

    It seems the first solutions were aiming at optimizing conversion of a single hex digit and the later ones are optimized for strings

    It was always strings.

     

    1 hour ago, Anders Melander said:

    Some have had the iterators replaced by classic for and some haven't.

    I've kept a variety of routines in there, that chart the evolution in this thread.

     

    1 hour ago, Anders Melander said:

    The iterators were David's code and was just part of the test bench.

    Don't think so, because of the first point, which is that the question was about strings and not individual chars.

     


  5. 1 hour ago, Stefan Glienke said:

    "Get your conditional jumps and error handling garbage outta my hot loop, kay?"

    Very nicely done indeed.

     

    Another update of my benchmark:

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Classes,
      System.Diagnostics;
    
    //********************************************
    // Constants to tune the benchmark performance
    
    const
      HexStringLength = 64;
      InputDataCount = 512;
      IterationCount = 20000;
    
    //**************************
    // David Heffernan's routine
    
    function HexToBinHeffMove(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
        Inc(Ptr, 4);
      end;
    end;
    
    //*****************************************************************
    // David Heffernan's routine, with Move replace by Int64 assignment
    
    function HexToBinHeffInt64Assign(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
        Inc(Ptr, 4);
      end;
    end;
    
    //************************
    // Mahdi Safsafi's routine
    
    function HexToBinMahdi(const HexValue: string): string;
    type
      TChar4 = array [0 .. 3] of Char;
      PChar4 = ^TChar4;
    const
      Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
      Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
    var
      HexDigit: Char;
      P: PChar4;
    begin
      SetLength(Result, Length(HexValue) * 4);
      P := PChar4(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
          '0' .. '9':
            P^ := Table1[HexDigit];
          'a' .. 'f':
            P^ := Table2[HexDigit];
          'A' .. 'F':
            P^ := Table2[Chr(Ord(HexDigit) xor $20)];
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(P);
      end;
    end;
    
    //************************************************
    // Mahdi Safsafi's routine with a classic for loop
    
    function HexToBinMahdiClassicFor(const HexValue: string): string;
    type
      TChar4 = array [0 .. 3] of Char;
      PChar4 = ^TChar4;
    const
      Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
      Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
    var
      i, n: Integer;
      HexDigit: PChar;
      P: PChar4;
    begin
      n := Length(HexValue);
      SetLength(Result, n * 4);
      HexDigit := PChar(Pointer(HexValue));
      P := PChar4(Result);
      for i := 1 to n do
      begin
        case HexDigit^ of
          '0' .. '9':
            P^ := Table1[HexDigit^];
          'a' .. 'f':
            P^ := Table2[HexDigit^];
          'A' .. 'F':
            P^ := Table2[Chr(Ord(HexDigit^) xor $20)];
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(P);
        Inc(HexDigit);
      end;
    end;
    
    //*****************
    // Stefan's routine
    function HexToBinStefan(const HexValue: string): string;
    label
      _error;
    type
      TChar4 = array [0..3] of Char;
      PChar4 = ^TChar4;
    const
      Table: array [0..22] of TChar4 = (
       '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
       'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
       '1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
    var
      len, i: Integer;
      HexDigit: PChar;
      P: PChar4;
      n: Cardinal;
    begin
      len := Length(HexValue);
      SetLength(Result, len * 4);
      P := PChar4(Result);
      HexDigit := @PChar(Pointer(HexValue))[0];
      for i := 1 to len do
      begin
        n := Cardinal((Ord(HexDigit^) - 48) and not 32);
        {$B+}
        if ((n > 9) and (n < 15)) or (n > 22) then goto _error;
        {$B-}
        P^ := Table[n];
    
        Inc(P);
        Inc(HexDigit);
      end;
      Exit;
    _error:
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
    end;
    
    function HexToBinStefan2(const HexValue: string): string;
    label
      _error;
    type
      TChar4 = array [0..3] of Char;
      PChar4 = ^TChar4;
    const
      Table: array [0..22] of TChar4 = (
       '0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001', // 0-9
       'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx', 'xxxx',                         // :-@ - unused
       '1010', '1011', '1100', '1101', '1110', '1111');                                // A-F
    var
      len, i: Integer;
      HexDigit: PChar;
      P: PChar4;
      n: Cardinal;
    begin
      len := Length(HexValue);
      SetLength(Result, len * 4);
      P := PChar4(Result);
      HexDigit := @PChar(Pointer(HexValue))[0];
      for i := 1 to len do
      begin
        // subtract 48 to make '0'-'9' 0-9 which enables unconditionally downcasing any upper case char
        n := Cardinal((Ord(HexDigit^) - 48) and not 32);
    
        {$B-} // avoid multiple conditional jumps by not using short eval
    
        // avoid one check by simply subtracting 10 and checking the invalid range of 10-16
        // thank you godbolt.org and amazingly optimizing c++ compilers for that idea! <3
        if (Cardinal(Integer(n)-10) <= 6) or (n > 22) then goto _error;
        {$B-}
    
        P^ := Table[n];
    
        Inc(P);
        Inc(HexDigit);
      end;
      Exit;
    _error:
      raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit^, HexValue]);
    end;
    
    {$IFDEF CPUX86}
    //***********************
    // Kas Ob's asm32 version
    
    procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
    asm
            push edi
            mov edi,edx
            //      Get the decimal value of one Hex Char (= half byte)
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            xor     ecx,ecx
            mov     dx,$1
            test    al,4
            cmovne  cx,dx
            shl     ecx,16
            test    al,8
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi],ecx
            xor     ecx,ecx
            test    al,1
            cmovne  cx,dx
            shl     ecx,16
            test    al,2
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi+4],ecx
            pop edi
    end;
    
    function HexToBinAsm32(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_ASM32(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    
    //*********************
    // Kas Ob's xmm version
    
    procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
    const
      DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = ($01, $02,$00,$00, $04, $08, $00, $00);
      DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = ($FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF);
      DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, $80, 8, $80, 2, $80, 0, $80, $80, $80, $80, $80, $80, $80, $80, $80);
    asm
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            movd    xmm0, eax
            pxor    xmm1, xmm1
            movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
            movdqu  xmm3, DEC_TO_BIN_WORD_MASK
            movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
            punpckldq xmm0, xmm0
            packssdw xmm0, xmm0
            pand    xmm0, xmm3
            pcmpeqw xmm0, xmm1
            psubw   xmm0, xmm2
            PSHUFB  xmm0, xmm4                   // reverse the result
            movq    qword ptr[HexBuffer], xmm0
    end;
    
    function HexToBinXmm(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_XMM(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    {$ENDIF}
    
    //***************
    // Benchmark code
    
    function RandomHexDigit: Char;
    var
      Ordinal: Integer;
    begin
      Ordinal := Random(16);
      case Ordinal of
      0..9:
        Result := Chr(Ord('0') + Ordinal);
      10..15:
        Result := Chr(Ord('a') + Ordinal - 10);
      else
        raise Exception.Create('');
      end;
    end;
    
    function RandomHexString(Length: Integer): string;
    var
      Index: Integer;
    begin
      SetLength(Result, Length);
      for Index := 1 to Length do
        Result[Index] := RandomHexDigit;
    end;
    
    procedure TestCorrectness;
    var
      Index: Integer;
      HexStr, BinStr: string;
    begin
      for Index := 0 to $fffff do
      begin
        HexStr := IntToHex(Index, 6);
        BinStr := HexToBinHeffMove(HexStr);
    
        if BinStr<>HexToBinHeffInt64Assign(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinMahdi(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinMahdiClassicFor(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinStefan(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinStefan2(HexStr) then
          raise Exception.Create('incorrect implementation');
    {$IFDEF CPUX86}
        if BinStr<>HexToBinAsm32(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinXmm(HexStr) then
          raise Exception.Create('incorrect implementation');
    {$ENDIF}
      end;
    end;
    
    procedure Benchmark;
    var
      Index, Iteration: Integer;
      sw: TStopwatch;
      HexStrings: TArray<string>;
      binaryString: string;
    begin
      SetLength(HexStrings, InputDataCount);
      for Index := 0 to InputDataCount-1 do
        HexStrings[Index] := RandomHexString(HexStringLength);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinHeffMove(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Heff, move): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinHeffInt64Assign(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Heff, int64 assign): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinMahdi(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinMahdiClassicFor(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Mahdi, classic for): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinStefan(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Stefan): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinStefan2(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Stefan mk 2): ', sw.ElapsedMilliseconds);
    
    {$IFDEF CPUX86}
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinAsm32(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('asm32: ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinXmm(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('xmm: ', sw.ElapsedMilliseconds);
    {$ENDIF}
    end;
    
    begin
      try
        Randomize;
        TestCorrectness;
        Benchmark;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Writeln('Completed');
      Readln;
    end.

    32 bit output:

    Pascal lookup (Heff, move): 4658
    Pascal lookup (Heff, int64 assign): 3702
    Pascal lookup (Mahdi): 3645
    Pascal lookup (Mahdi, classic for): 2766
    Pascal lookup (Stefan): 1024
    Pascal lookup (Stefan mk 2): 805
    asm32: 4165
    xmm: 3978

    64 bit output:

    Pascal lookup (Heff, move): 4720
    Pascal lookup (Heff, int64 assign): 3138
    Pascal lookup (Mahdi): 3221
    Pascal lookup (Mahdi, classic for): 2629
    Pascal lookup (Stefan): 1180
    Pascal lookup (Stefan mk 2): 909

     


  6. 6 minutes ago, Clément said:

    I know the test is about the conversion, but since "for in" is very slow, it might help straight things up a little extra bit

    Should change all of the for ins to be fair


  7. Updated benchmark code:

     

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Classes,
      System.Diagnostics;
    
    //********************************************
    // Constants to tune the benchmark performance
    
    const
      HexStringLength = 64;
      InputDataCount = 512;
      IterationCount = 20000;
    
    //*****************************************************************
    // David Heffernan's routine, with Move replace by Int64 assignment
    
    {$DEFINE ReplaceMoveWithInt64Assign}
    function HexToBinHeff(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
    {$IFDEF ReplaceMoveWithInt64Assign}
        PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
    {$ELSE}
        Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
    {$ENDIF}
        Inc(Ptr, 4);
      end;
    end;
    
    //************************
    // Mahdi Safsafi's routine
    
    {$DEFINE ReplaceMoveWithInt64Assign}
    function HexToBinMahdi(const HexValue: string): string;
    type
      TChar4 = array [0 .. 3] of Char;
      PChar4 = ^TChar4;
    const
      Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
      Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
    var
      HexDigit: Char;
      P: PChar4;
    begin
      SetLength(Result, Length(HexValue) * 4);
      P := PChar4(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
          '0' .. '9':
            P^ := Table1[HexDigit];
          'a' .. 'f':
            P^ := Table2[HexDigit];
          'A' .. 'F':
            P^ := Table2[Chr(Ord(HexDigit) xor $20)];
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(P);
      end;
    end;
    
    {$IFDEF CPUX86}
    //***********************
    // Kas Ob's asm32 version
    
    procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
    asm
            push edi
            mov edi,edx
            //      Get the decimal value of one Hex Char (= half byte)
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            xor     ecx,ecx
            mov     dx,$1
            test    al,4
            cmovne  cx,dx
            shl     ecx,16
            test    al,8
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi],ecx
            xor     ecx,ecx
            test    al,1
            cmovne  cx,dx
            shl     ecx,16
            test    al,2
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi+4],ecx
            pop edi
    end;
    
    function HexToBinAsm32(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_ASM32(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    
    //*********************
    // Kas Ob's xmm version
    
    procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
    const
      DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = ($01, $02,$00,$00, $04, $08, $00, $00);
      DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = ($FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF);
      DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, $80, 8, $80, 2, $80, 0, $80, $80, $80, $80, $80, $80, $80, $80, $80);
    asm
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            movd    xmm0, eax
            pxor    xmm1, xmm1
            movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
            movdqu  xmm3, DEC_TO_BIN_WORD_MASK
            movdqu  xmm4,DEC_TO_BIN_REVERSE_MASK
            punpckldq xmm0, xmm0
            packssdw xmm0, xmm0
            pand    xmm0, xmm3
            pcmpeqw xmm0, xmm1
            psubw   xmm0, xmm2
            PSHUFB  xmm0, xmm4                   // reverse the result
            movq    qword ptr[HexBuffer], xmm0
    end;
    
    function HexToBinXmm(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_XMM(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    {$ENDIF}
    
    //***************
    // Benchmark code
    
    function RandomHexDigit: Char;
    var
      Ordinal: Integer;
    begin
      Ordinal := Random(16);
      case Ordinal of
      0..9:
        Result := Chr(Ord('0') + Ordinal);
      10..15:
        Result := Chr(Ord('a') + Ordinal - 10);
      else
        raise Exception.Create('');
      end;
    end;
    
    function RandomHexString(Length: Integer): string;
    var
      Index: Integer;
    begin
      SetLength(Result, Length);
      for Index := 1 to Length do
        Result[Index] := RandomHexDigit;
    end;
    
    procedure TestCorrectness;
    var
      Index: Integer;
      HexStr, BinStr: string;
    begin
      for Index := 0 to $fffff do
      begin
        HexStr := IntToHex(Index, 6);
        BinStr := HexToBinHeff(HexStr);
    
        if BinStr<>HexToBinMahdi(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinAsm32(HexStr) then
          raise Exception.Create('incorrect implementation');
        if BinStr<>HexToBinXmm(HexStr) then
          raise Exception.Create('incorrect implementation');
      end;
    end;
    
    procedure Benchmark;
    var
      Index, Iteration: Integer;
      sw: TStopwatch;
      HexStrings: TArray<string>;
      binaryString: string;
    begin
      SetLength(HexStrings, InputDataCount);
      for Index := 0 to InputDataCount-1 do
        HexStrings[Index] := RandomHexString(HexStringLength);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinHeff(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Heff): ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinMahdi(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('Pascal lookup (Mahdi): ', sw.ElapsedMilliseconds);
    
    {$IFDEF CPUX86}
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinAsm32(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('asm32: ', sw.ElapsedMilliseconds);
    
      sw := TStopwatch.StartNew;
      for Iteration := 0 to IterationCount-1 do
        for Index := 0 to InputDataCount-1 do
        begin
          binaryString := HexToBinXmm(HexStrings[Index]);
          binaryString := ''; // force a reallocation of the string every iteration of this loop
        end;
      Writeln('xmm: ', sw.ElapsedMilliseconds);
    {$ENDIF}
    end;
    
    begin
      try
        Randomize;
        TestCorrectness;
        Benchmark;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Readln;
    end.

     

    OUTPUT:

     

    Pascal lookup (Heff): 3717
    Pascal lookup (Mahdi): 3443
    asm32: 4472
    xmm: 4038

     

    Although the output varies from run to run ofc.

     

    I think Stefan is right though. If you want to benefit from SIMD you need to process bigger chunks than a single hex digit at a time.


  8. 13 minutes ago, Kas Ob. said:

    movdqu xmm3,DEC_TO_BIN_WORD_MASK
    pand    xmm0, xmm3

    I'm still seeing an AV here even with that change. It would be really nice to get this fixed and then I can update my benchmark program again.


  9. UPDATED BENCHMARK:

     

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Classes,
      System.Diagnostics;
    
    //********************************************
    // Constants to tune the benchmark performance
    
    const
      HexStringLength = 64;
      InputDataCount = 512;
      IterationCount = 20000;
    
    //*****************************************************************
    // David Heffernan's routine, with Move replace by Int64 assignment
    
    {$DEFINE ReplaceMoveWithInt64Assign}
    function HexToBin(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
    {$IFDEF ReplaceMoveWithInt64Assign}
        PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
    {$ELSE}
        Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
    {$ENDIF}
        Inc(Ptr, 4);
      end;
    end;
    
    //***********************
    // Kas Ob's asm32 version
    
    procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
    asm
            push edi
            mov edi,edx
            //      Get the decimal value of one Hex Char (= half byte)
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            xor     ecx,ecx
            mov     dx,$1
            test    al,4
            cmovne  cx,dx
            shl     ecx,16
            test    al,8
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi],ecx
            xor     ecx,ecx
            test    al,1
            cmovne  cx,dx
            shl     ecx,16
            test    al,2
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi+4],ecx
            pop edi
    end;
    
    function HexToBinAsm32(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_ASM32(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    
    //*********************
    // Kas Ob's xmm version
    
    procedure CharToBin_XMM(HexChar: Char; HexBuffer: PChar);
    const
      DEC_TO_BIN_WORD_MASK: array[0..7] of UInt16 = ($01, $02,$00,$00, $04, $08, $00, $00);
      DEC_TO_BIN_FF_TO_CHARONE_DISTANCE: array[0..7] of UInt16 = ($FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF, $FFCF);
      DEC_TO_BIN_REVERSE_MASK: array[0..15] of Byte = (10, $80, 8, $80, 2, $80, 0, $80, $80, $80, $80, $80, $80, $80, $80, $80);
    asm
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            movd    xmm0, eax
            pxor    xmm1, xmm1
            movdqu  xmm2, DEC_TO_BIN_FF_TO_CHARONE_DISTANCE
            punpckldq xmm0, xmm0
            packssdw xmm0, xmm0
            pand    xmm0, dqword ptr[DEC_TO_BIN_WORD_MASK]
            pcmpeqw xmm0, xmm1
            psubw   xmm0, xmm2
            movdqu xmm3,DEC_TO_BIN_REVERSE_MASK
            PSHUFB   xmm0, xmm3                   // reverse the result
            movq    qword ptr[HexBuffer], xmm0
    end;
    
    function HexToBinXmm(const HexValue: string): string;
    var
      HexDigit: Char;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_XMM(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    
    //***************
    // Benchmark code
    
    function RandomHexDigit: Char;
    var
      Ordinal: Integer;
    begin
      Ordinal := Random(16);
      case Ordinal of
      0..9:
        Result := Chr(Ord('0') + Ordinal);
      10..15:
        Result := Chr(Ord('a') + Ordinal - 10);
      else
        raise Exception.Create('');
      end;
    end;
    
    function RandomHexString(Length: Integer): string;
    var
      Index: Integer;
    begin
      SetLength(Result, Length);
      for Index := 1 to Length do
        Result[Index] := RandomHexDigit;
    end;
    
    procedure Benchmark;
    var
      Index, Iteration: Integer;
      sw: TStopwatch;
      HexStrings: TStringList;
      binaryString: string;
    begin
      HexStrings := TStringList.Create;
      try
        for Index := 0 to InputDataCount-1 do
          HexStrings.Add(RandomHexString(HexStringLength));
    
        sw := TStopwatch.StartNew;
        for Iteration := 0 to IterationCount-1 do
          for Index := 0 to HexStrings.Count-1 do
          begin
            binaryString := HexToBin(HexStrings[Index]);
            binaryString := ''; // force a reallocation of the string every iteration of this loop
          end;
        Writeln('Pascal lookup: ', sw.ElapsedMilliseconds);
    
        sw := TStopwatch.StartNew;
        for Iteration := 0 to IterationCount-1 do
          for Index := 0 to HexStrings.Count-1 do
          begin
            binaryString := HexToBinAsm32(HexStrings[Index]);
            binaryString := ''; // force a reallocation of the string every iteration of this loop
          end;
        Writeln('asm32: ', sw.ElapsedMilliseconds);
    
        sw := TStopwatch.StartNew;
        for Iteration := 0 to IterationCount-1 do
          for Index := 0 to HexStrings.Count-1 do
          begin
            binaryString := HexToBinXmm(HexStrings[Index]);
            binaryString := ''; // force a reallocation of the string every iteration of this loop
          end;
        Writeln('xmm: ', sw.ElapsedMilliseconds);
      finally
        HexStrings.Free;
      end;
    end;
    
    begin
      try
        Randomize;
        Benchmark;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Readln;
    end.

     

    Compiled on XE7, 32 bit, release configuration

     

    OUTPUT:

     

    Pascal lookup: 3743
    asm32: 4396
    EAccessViolation: Access violation at address 004D77D3 in module 'Project14.exe'. Read of address FFFFFFFF

     

    The AV is HexToBinXmm. Perhaps I misunderstood something.


  10. 40 minutes ago, Kas Ob. said:

    I think, i just did.

    I made this benchmark program, that wraps your single digit asm into a function that handles string input, and included error checking.  If you skip the error checking then your asm code is faster. But if you include error checking, not so. How would you go about integrating your single digit code into a string level function with error checking?

     

    Also, I'd love to have a high level explanation as to why my reasoning that a lookup table should not be beaten is wrong. Or is it perhaps that the implementation of the lookup table could be done better?

     

    {$APPTYPE CONSOLE}
    
    uses
      System.SysUtils,
      System.Classes,
      System.Diagnostics;
    
    //********************************************
    // Constants to tune the benchmark performance
    
    const
      HexStringLength = 64;
      InputDataCount = 512;
      IterationCount = 20000;
    
    //*****************************************************************
    // David Heffernan's routine, with Move replace by Int64 assignment
    
    {$DEFINE ReplaceMoveWithInt64Assign}
    function HexToBin(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
    {$IFDEF ReplaceMoveWithInt64Assign}
        PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^;
    {$ELSE}
        Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
    {$ENDIF}
        Inc(Ptr, 4);
      end;
    end;
    
    //*********************
    // Kas Ob's asm version
    
    procedure CharToBin_ASM32(HexChar: Char; HexBuffer: PChar);
    asm
            push edi
            mov edi,edx
            //      Get the decimal value of one Hex Char (= half byte)
            movzx   eax, HexChar
            mov     ecx, 57
            sub     ecx, eax
            sar     ecx, 31
            and     ecx, 39
            neg     ecx
            add     eax, ecx
            add     eax,  - 48
            //      Produce 4 Chars presenting 4 bits of HexChar
            xor     ecx,ecx
            mov     dx,$1
            test    al,4
            cmovne  cx,dx
            shl     ecx,16
            test    al,8
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi],ecx
            xor     ecx,ecx
            test    al,1
            cmovne  cx,dx
            shl     ecx,16
            test    al,2
            cmovne  cx,dx
            add     ecx,$00300030
            mov     [edi+4],ecx
            pop edi
    end;
    
    function HexToBinAsm(const HexValue: string): string;
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9','a'..'f','A'..'F':
          CharToBin_ASM32(HexDigit, Ptr);
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(Ptr, 4);
      end;
    end;
    
    //***************
    // Benchmark code
    
    function RandomHexDigit: Char;
    var
      Ordinal: Integer;
    begin
      Ordinal := Random(16);
      case Ordinal of
      0..9:
        Result := Chr(Ord('0') + Ordinal);
      10..15:
        Result := Chr(Ord('a') + Ordinal - 10);
      else
        raise Exception.Create('');
      end;
    end;
    
    function RandomHexString(Length: Integer): string;
    var
      Index: Integer;
    begin
      SetLength(Result, Length);
      for Index := 1 to Length do
        Result[Index] := RandomHexDigit;
    end;
    
    procedure Benchmark;
    var
      Index, Iteration: Integer;
      sw: TStopwatch;
      HexStrings: TStringList;
      binaryString: string;
    begin
      HexStrings := TStringList.Create;
      try
        for Index := 0 to InputDataCount-1 do
          HexStrings.Add(RandomHexString(HexStringLength));
        sw := TStopwatch.StartNew;
        for Iteration := 0 to IterationCount-1 do
          for Index := 0 to HexStrings.Count-1 do
          begin
            binaryString := HexToBin(HexStrings[Index]);
            binaryString := ''; // force a reallocation of the string every iteration of this loop
          end;
        Writeln(sw.ElapsedMilliseconds);
      finally
        HexStrings.Free;
      end;
    end;
    
    begin
      try
        Randomize;
        Benchmark;
      except
        on E: Exception do
          Writeln(E.ClassName, ': ', E.Message);
      end;
      Readln;
    end.

    EDIT: It turns out that this benchmark is possibly useless at the moment, because HexToBinAsm doesn't work correctly (almost certainly my fault). I need to work out what I've done wrong.
    EDIT2: No, it's fine, I was mistaken, HexToBinAsm above works correctly.


  11.  

    On 11/19/2020 at 4:12 PM, Mahdi Safsafi said:

    @David Heffernan Few remarks about your code if you don't mind :

    1- Its pointless to use string when characters are fixed in size ... Simply use static array of X char.

    2- Its also pointless to calculate index when you already used a case ... Simply declare your array using char-range. In your case, compiler generated additional instructions to compute the index. 

    
    
    function HexToBin2(const HexValue: string): string;
    type
      TChar4 = array [0 .. 3] of Char;
      PChar4 = ^TChar4;
    const
      Table1: array ['0' .. '9'] of TChar4 = ('0000', '0001', '0010', '0011', '0100', '0101', '0110', '0111', '1000', '1001');
      Table2: array ['a' .. 'f'] of TChar4 = ('1010', '1011', '1100', '1101', '1110', '1111');
    var
      HexDigit: Char;
      P: PChar4;
    begin
      SetLength(Result, Length(HexValue) * 4);
      P := PChar4(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
          '0' .. '9':
            P^ := Table1[HexDigit];
          'a' .. 'f':
            P^ := Table2[HexDigit];
          'A' .. 'F':
            P^ := Table2[Chr(Ord(HexDigit) xor $20)];
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Inc(P);
      end;
    end;

     

    My benchmarking suggests that point 1 has no impact on performance, but point 2 does.

     

    Actually the suggestion made by @Kas Ob. to replace the Move in my code with 

     

    PInt64(Ptr)^ := PInt64(BinaryValues[HexDigitValue])^

    is all you need to match the performance of your version.  Which makes a lot of sense to me.


  12. 13 minutes ago, Lars Fosdal said:

    I don't know, David, which is why I ask.

    I think the way I phrased the question should help your intuition! 😉 

     

    It's always worth double checking, but a lookup table involves working the answer out before you compile. I would imagine it is hard for runtime code to ever beat that.


  13. 16 minutes ago, Lars Fosdal said:

    How does it measure up speedwise to the others?

    How do you think it will compare against a lookup table? How would you expect computing an answer at runtime compare to computing the answer before compile time? 


  14. No guarantee that an out of bounds array access leads to an exception. You have just been unlucky that you've seen one every time you ran your code.

     

    Once again, nobody should ever write code like that.

    • Like 1

  15. I'd probably write it something like this:

     

    function HexToBin(const HexValue: string): string;
    const
      BinaryValues: array [0..15] of string = (
        '0000', '0001', '0010', '0011',
        '0100', '0101', '0110', '0111',
        '1000', '1001', '1010', '1011',
        '1100', '1101', '1110', '1111'
      );
    var
      HexDigit: Char;
      HexDigitValue: Integer;
      Ptr: PChar;
    begin
      SetLength(Result, Length(HexValue) * 4);
      Ptr := Pointer(Result);
      for HexDigit in HexValue do
      begin
        case HexDigit of
        '0'..'9':
          HexDigitValue := Ord(HexDigit) - Ord('0');
        'a'..'f':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('a');
        'A'..'F':
          HexDigitValue := 10 + Ord(HexDigit) - Ord('A');
        else
          raise EConvertError.CreateFmt('Invalid hex digit ''%s'' found in ''%s''', [HexDigit, HexValue]);
        end;
        Move(Pointer(BinaryValues[HexDigitValue])^, Ptr^, 4 * SizeOf(Char));
        Inc(Ptr, 4);
      end;
    end;

     

    Some notes:

     

    1. A case statement makes this quite readable in my view.
    2. You really don't want to be wasting time using Pos to search within a string. You can get the value directly with arithmetic.
    3. I prefer to perform just a single allocation, rather than use repeated allocations with concatenation.
    4. You might want to consider how to treat leading zeros. For instance how should you treat 0F, should that be 00001111 or 1111? I'd expect that both would be desirable in different situations, so an option in an extra argument to the function would be needed.
    • Like 2
    • Thanks 3

  16. 3 hours ago, emailx45 said:

    for lEachHexChar in lHexValue do try Result := Result + lBinValues[Pos(UpperCase(lEachHexChar), lHexChars) - 1]; except // case the "char" is not found, we have a "AV"! then.... doesnt matter for us! end;

    Ugh. You can't rely on getting an AV.

     

    Don't ever write code like this.

    • Like 2
×