Stefan Glienke

September 13, 2024

1 hour ago, DelphiUdIT said:

the times with parallel processing are definitely interesting.

To me they are not - if you have some backend server code that is already doing multiple things in parallel it won't get any better if you then do parallel sort.

September 13, 2024

Just now, DelphiUdIT said:

There are sure an issue with recursive and stack allocation (or may be the algorithm in the ICompare method).

The issue is that in the Parallel implementation there is no mitigation against QuickSorts well known worst case.

September 13, 2024

This is the benchmark code - previous results I posted were with TTestType = Integer, but using Double shows similar results (I ran the benchmark as 64bit release)

program SortBench;

{$APPTYPE CONSOLE}

uses
  System.Generics.Collections,
  Spring,
  Spring.Comparers,
  Spring.Benchmark,
  System.SysUtils;

type
  TTestType = Double;

  TDistFunc = function(size: Integer): TArray<TTestType>;
  TSortFunc = procedure(var values: array of TTestType);

function IntToTestType(i: Integer): TTestType;
begin
  Result := i;
end;

function Equals(const left, right: TTestType): Boolean; inline;
begin
  Result := left = right;
end;

function Shuffled(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(i);
  Spring.TArray.Shuffle<TTestType>(Result);
end;

function Shuffled_16Values(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(Random(16));
end;

function TwoValues(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(Random(2));
end;

function AllEqual(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(0);
end;

function Ascending(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(i);
end;

function SlightlyScrambled(size: Integer): TArray<TTestType>;
const
  scramblePercentage = 5;
var
  i, idx1, idx2: Integer;
  temp: TTestType;
  scrambleCount: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(i);
  scrambleCount := size * scramblePercentage div 100;
  for i := 1 to scrambleCount do
  begin
    idx1 := Random(size);
    idx2 := Random(size);
    temp := Result[idx1];
    Result[idx1] := Result[idx2];
    Result[idx2] := temp;
  end;
end;

function SortedBlocks(size: Integer): TArray<TTestType>;
type
  TBlock100 = array[1..100] of TTestType;
  TBlock10  = array[1..10] of TTestType;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to size - 1 do
    Result[i] := IntToTestType(i);
  if size > 100 then
    Spring.TArray.Shuffle<TBlock100>(Pointer(Result), (size div (SizeOf(TBlock100) div SizeOf(TTestType))) - 1)
  else
    Spring.TArray.Shuffle<TBlock10>(Pointer(Result), (size div (SizeOf(TBlock10) div SizeOf(TTestType))) - 1);
end;

function SingleBigSwap(size: Integer): TArray<TTestType>;
var
  i: Integer;
  halfSize: Integer;
begin
  SetLength(Result, size);
  halfSize := size div 2;
  for i := 0 to halfSize - 1 do
    Result[i] := IntToTestType(i + halfSize);
  for i := halfSize to size -1 do
    Result[i] := IntToTestType(i - halfSize);
end;

function Descending(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := size-1 downto 0 do
    Result[i] := IntToTestType(i);
end;

function PipeOrgan(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to (size div 2) - 1 do
    Result[i] := IntToTestType(i);
  for i := size div 2 to size -1 do
    Result[i] := IntToTestType(size - 1);
end;

function PushFront(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 1 to size - 1 do
    Result[i - 1] := IntToTestType(i);
  Result[size - 1] := IntToTestType(0);
end;

function PushMiddle(size: Integer): TArray<TTestType>;
var
  i: Integer;
begin
  SetLength(Result, size);
  for i := 0 to (size div 2) - 1 do
    Result[i] := IntToTestType(i);
  for i := (size div 2) + 1 to size -1 do
    Result[i - 1] := IntToTestType(i);
  Result[size - 1] := IntToTestType(size div 2);
end;

procedure QuickSort(var values: array of TTestType);
begin
  System.Generics.Collections.TArray.Sort<TTestType>(values);
end;

procedure QuickSort_SpringComparer(var values: array of TTestType);
begin
  System.Generics.Collections.TArray.Sort<TTestType>(values
    , Spring.Comparers.TComparer<TTestType>.Default
    );
end;

procedure IntroSort(var values: array of TTestType);
begin
  Spring.TArray.Sort<TTestType>(values);
end;

const
  Distributions: array[0..11] of record name: string; func: TDistFunc end = (
    (name: 'random'; func: Shuffled),
    (name: 'few_unique'; func: Shuffled_16Values),
    (name: 'two_values'; func: TwoValues),
    (name: 'all_equal'; func: AllEqual),
    (name: 'ascending'; func: Ascending),
    (name: 'slightly_scrambled'; func: SlightlyScrambled),
    (name: 'sorted_blocks'; func: SortedBlocks),
    (name: 'single_big_swap'; func: SingleBigSwap),
    (name: 'descending'; func: Descending),
    (name: 'pipe_organ'; func: PipeOrgan),
    (name: 'push_front'; func: PushFront),
    (name: 'push_middle'; func: PushMiddle)
  );

  Sorts: array[0..2] of record name: string; func: TSortFunc end = (
    (name: 'QuickSort'; func: QuickSort)
    ,(name: 'QuickSort2'; func: QuickSort_SpringComparer)
    ,(name: 'IntroSort'; func: IntroSort)
  );

  Sizes: array of Integer = [100, 1000, 10000, 100000, 1000000];

procedure BenchSort(const state: TState);
begin
  var distIdx := state[0];
  var sortIdx := state[1];
  var size := state[2];

  var distribution: TDistFunc := Distributions[distIdx].func;
  var sort: TSortFunc := Sorts[sortIdx].func;
  var data := distribution(size);
  var copy: TArray<TTestType>;
  SetLength(copy, size);

  for var _ in state do
  begin
    state.PauseTiming;
    if IsManagedType(TTestType) then
      MoveManaged(data, copy, TypeInfo(TTestType), size)
    else
      Move(data[0], copy[0], size * SizeOf(TTestType));
    state.ResumeTiming;
    sort(copy);
  end;
  QuickSort(data);
  for var i := 0 to high(copy) do
    Assert(Equals(copy[i], data[i]), Distributions[distIdx].Name + '/' + Sorts[sortIdx].Name + '/' + size.ToString);
end;

procedure Main;
begin
  benchmark_format_args := False;

  for var distIdx := Low(distributions) to High(Distributions) do
  for var size in Sizes do
  for var sortIdx := Low(Sorts) to High(Sorts) do
  begin
    var bm := Benchmark(BenchSort, Distributions[distIdx].name + '/' + Sorts[sortIdx].name + '/count:' + size.ToString)
      .Args([distIdx, sortIdx, size]);
    bm.Iterations(100);
  end;
  Benchmark_Main;
end;

begin
  try
    Main;
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  Readln;
end.

September 13, 2024

TParallelArray.Sort is broken by the way - try this:

uses
  System.Threading,
  System.SysUtils;

procedure Main;
begin
  var data: TArray<Integer>;
  SetLength(data, 1000000);
  for var i := 0 to High(data) do
    data[i] := Random(16);
  TParallelArray.Sort<Integer>(data);
end;

begin
  try
    Main;
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  Writeln('done');
  Readln;
end.

September 13, 2024

Depends on your IntroSort implementation - the one in Spring is usually faster than RTL quicksort. (*)

But this also is a result of it using branchless comparers as far as possible.

Also, there is a reason why usually benchmarks for sorting algorithms don't only test on complete random data but also on several patterns. Because in reality, you often deal with things such as mostly sorted with a few unsorted elements at the end or reversed sorted.

(*) here are the benchmark results for RTL QuickSort vs RTL QuickSort with Spring4d Comparer vs Spring4d IntroSort for several data patterns - you can easily see how the branchless compare affects the performance quite significantly

--------------------------------------------------------------------------------------
Benchmark                                            Time             CPU   Iterations
--------------------------------------------------------------------------------------
random/QuickSort/count:100                        2463 ns         2435 ns          100
random/QuickSort2/count:100                       1802 ns         1782 ns          100
random/IntroSort/count:100                        1420 ns         1395 ns          100
random/QuickSort/count:1000                      50209 ns        50189 ns          100
random/QuickSort2/count:1000                     27645 ns        27612 ns          100
random/IntroSort/count:1000                      18489 ns        18468 ns          100
random/QuickSort/count:10000                    836172 ns       835798 ns          100
random/QuickSort2/count:10000                   728400 ns       727977 ns          100
random/IntroSort/count:10000                    523833 ns       523565 ns          100
random/QuickSort/count:100000                  9855657 ns      9850263 ns          100
random/QuickSort2/count:100000                 8918812 ns      8912401 ns          100
random/IntroSort/count:100000                  6839316 ns      6829126 ns          100
random/QuickSort/count:1000000               119455635 ns    119330369 ns          100
random/QuickSort2/count:1000000              104800849 ns    104696480 ns          100
random/IntroSort/count:1000000                81749401 ns     81660293 ns          100
few_unique/QuickSort/count:100                    2249 ns         2251 ns          100
few_unique/QuickSort2/count:100                   1632 ns         1593 ns          100
few_unique/IntroSort/count:100                    1231 ns         1202 ns          100
few_unique/QuickSort/count:1000                  35486 ns        35454 ns          100
few_unique/QuickSort2/count:1000                 25086 ns        25038 ns          100
few_unique/IntroSort/count:1000                  13094 ns        13071 ns          100
few_unique/QuickSort/count:10000                515193 ns       514924 ns          100
few_unique/QuickSort2/count:10000               371173 ns       370283 ns          100
few_unique/IntroSort/count:10000                285188 ns       285036 ns          100
few_unique/QuickSort/count:100000              5664379 ns      5657759 ns          100
few_unique/QuickSort2/count:100000             4348078 ns      4341859 ns          100
few_unique/IntroSort/count:100000              3164370 ns      3162498 ns          100
few_unique/QuickSort/count:1000000            63121983 ns     63076941 ns          100
few_unique/QuickSort2/count:1000000           43872122 ns     43809752 ns          100
few_unique/IntroSort/count:1000000            30719211 ns     30651572 ns          100
two_values/QuickSort/count:100                    1538 ns         1514 ns          100
two_values/QuickSort2/count:100                   1205 ns         1181 ns          100
two_values/IntroSort/count:100                     781 ns          768 ns          100
two_values/QuickSort/count:1000                  19267 ns        19242 ns          100
two_values/QuickSort2/count:1000                 15309 ns        15261 ns          100
two_values/IntroSort/count:1000                   9174 ns         9146 ns          100
two_values/QuickSort/count:10000                281462 ns       276065 ns          100
two_values/QuickSort2/count:10000               206943 ns       206818 ns          100
two_values/IntroSort/count:10000                150871 ns       150572 ns          100
two_values/QuickSort/count:100000              3483592 ns      3467751 ns          100
two_values/QuickSort2/count:100000             2574497 ns      2555053 ns          100
two_values/IntroSort/count:100000              1947766 ns      1943228 ns          100
two_values/QuickSort/count:1000000            39543461 ns     39439575 ns          100
two_values/QuickSort2/count:1000000           29565091 ns     29524866 ns          100
two_values/IntroSort/count:1000000            22888036 ns     22847673 ns          100
all_equal/QuickSort/count:100                     1453 ns         1424 ns          100
all_equal/QuickSort2/count:100                    1152 ns         1135 ns          100
all_equal/IntroSort/count:100                      735 ns          718 ns          100
all_equal/QuickSort/count:1000                   16442 ns        16424 ns          100
all_equal/QuickSort2/count:1000                  12609 ns        12588 ns          100
all_equal/IntroSort/count:1000                    8274 ns         8253 ns          100
all_equal/QuickSort/count:10000                 214083 ns       213979 ns          100
all_equal/QuickSort2/count:10000                164971 ns       164921 ns          100
all_equal/IntroSort/count:10000                 125590 ns       125514 ns          100
all_equal/QuickSort/count:100000               2819341 ns      2815702 ns          100
all_equal/QuickSort2/count:100000              2120029 ns      2117072 ns          100
all_equal/IntroSort/count:100000               1563758 ns      1560929 ns          100
all_equal/QuickSort/count:1000000             32668228 ns     32563853 ns          100
all_equal/QuickSort2/count:1000000            24450182 ns     24411351 ns          100
all_equal/IntroSort/count:1000000             18799427 ns     18758084 ns          100
ascending/QuickSort/count:100                     1238 ns         1208 ns          100
ascending/QuickSort2/count:100                     989 ns          962 ns          100
ascending/IntroSort/count:100                      723 ns          700 ns          100
ascending/QuickSort/count:1000                   15109 ns        15088 ns          100
ascending/QuickSort2/count:1000                  11357 ns        11335 ns          100
ascending/IntroSort/count:1000                    7988 ns         7969 ns          100
ascending/QuickSort/count:10000                 187903 ns       187573 ns          100
ascending/QuickSort2/count:10000                140454 ns       140400 ns          100
ascending/IntroSort/count:10000                 119619 ns       119090 ns          100
ascending/QuickSort/count:100000               2250330 ns      2249079 ns          100
ascending/QuickSort2/count:100000              1700410 ns      1699408 ns          100
ascending/IntroSort/count:100000               1470538 ns      1467097 ns          100
ascending/QuickSort/count:1000000             28118193 ns     28071460 ns          100
ascending/QuickSort2/count:1000000            20655432 ns     20619601 ns          100
ascending/IntroSort/count:1000000             17387674 ns     17366389 ns          100
slightly_scrambled/QuickSort/count:100            1260 ns         1221 ns          100
slightly_scrambled/QuickSort2/count:100            992 ns          976 ns          100
slightly_scrambled/IntroSort/count:100             760 ns          729 ns          100
slightly_scrambled/QuickSort/count:1000          16685 ns        16664 ns          100
slightly_scrambled/QuickSort2/count:1000         12477 ns        12462 ns          100
slightly_scrambled/IntroSort/count:1000           9125 ns         9105 ns          100
slightly_scrambled/QuickSort/count:10000        247135 ns       246653 ns          100
slightly_scrambled/QuickSort2/count:10000       183518 ns       183063 ns          100
slightly_scrambled/IntroSort/count:10000        158241 ns       157415 ns          100
slightly_scrambled/QuickSort/count:100000      2975025 ns      2964542 ns          100
slightly_scrambled/QuickSort2/count:100000     2393465 ns      2389448 ns          100
slightly_scrambled/IntroSort/count:100000      2140478 ns      2137574 ns          100
slightly_scrambled/QuickSort/count:1000000    36245900 ns     36023047 ns          100
slightly_scrambled/QuickSort2/count:1000000   29002166 ns     28920883 ns          100
slightly_scrambled/IntroSort/count:1000000    25116044 ns     25034741 ns          100
sorted_blocks/QuickSort/count:100                 1588 ns         1557 ns          100
sorted_blocks/QuickSort2/count:100                1377 ns         1369 ns          100
sorted_blocks/IntroSort/count:100                 1195 ns         1171 ns          100
sorted_blocks/QuickSort/count:1000               25010 ns        24969 ns          100
sorted_blocks/QuickSort2/count:1000              19958 ns        19927 ns          100
sorted_blocks/IntroSort/count:1000               13229 ns        13200 ns          100
sorted_blocks/QuickSort/count:10000             296620 ns       295986 ns          100
sorted_blocks/QuickSort2/count:10000            246763 ns       246503 ns          100
sorted_blocks/IntroSort/count:10000             173911 ns       173822 ns          100
sorted_blocks/QuickSort/count:100000           4510684 ns      4474993 ns          100
sorted_blocks/QuickSort2/count:100000          3492714 ns      3479751 ns          100
sorted_blocks/IntroSort/count:100000           2468243 ns      2466781 ns          100
sorted_blocks/QuickSort/count:1000000         51631011 ns     51470454 ns          100
sorted_blocks/QuickSort2/count:1000000        41472336 ns     41323492 ns          100
sorted_blocks/IntroSort/count:1000000         29948990 ns     29886920 ns          100
single_big_swap/QuickSort/count:100               1408 ns         1378 ns          100
single_big_swap/QuickSort2/count:100              1124 ns         1097 ns          100
single_big_swap/IntroSort/count:100               1096 ns         1067 ns          100
single_big_swap/QuickSort/count:1000             16770 ns        16747 ns          100
single_big_swap/QuickSort2/count:1000            12769 ns        12753 ns          100
single_big_swap/IntroSort/count:1000             14975 ns        14945 ns          100
single_big_swap/QuickSort/count:10000           205373 ns       204738 ns          100
single_big_swap/QuickSort2/count:10000          153375 ns       153290 ns          100
single_big_swap/IntroSort/count:10000           211204 ns       210621 ns          100
single_big_swap/QuickSort/count:100000         2427136 ns      2416559 ns          100
single_big_swap/QuickSort2/count:100000        1853420 ns      1851812 ns          100
single_big_swap/IntroSort/count:100000         2900139 ns      2884030 ns          100
single_big_swap/QuickSort/count:1000000       29680453 ns     29600956 ns          100
single_big_swap/QuickSort2/count:1000000      22054106 ns     21979630 ns          100
single_big_swap/IntroSort/count:1000000       35043829 ns     34961055 ns          100
descending/QuickSort/count:100                    1225 ns         1205 ns          100
descending/QuickSort2/count:100                    979 ns          955 ns          100
descending/IntroSort/count:100                     726 ns          703 ns          100
descending/QuickSort/count:1000                  15322 ns        15138 ns          100
descending/QuickSort2/count:1000                 11398 ns        11374 ns          100
descending/IntroSort/count:1000                   8001 ns         7982 ns          100
descending/QuickSort/count:10000                187287 ns       187230 ns          100
descending/QuickSort2/count:10000               140713 ns       139754 ns          100
descending/IntroSort/count:10000                119207 ns       119157 ns          100
descending/QuickSort/count:100000              2251193 ns      2250138 ns          100
descending/QuickSort2/count:100000             1697955 ns      1697229 ns          100
descending/IntroSort/count:100000              1468175 ns      1467358 ns          100
descending/QuickSort/count:1000000            28025499 ns     27991254 ns          100
descending/QuickSort2/count:1000000           20643835 ns     20610794 ns          100
descending/IntroSort/count:1000000            17434312 ns     17380960 ns          100
pipe_organ/QuickSort/count:100                    1347 ns         1325 ns          100
pipe_organ/QuickSort2/count:100                   1068 ns         1052 ns          100
pipe_organ/IntroSort/count:100                     712 ns          690 ns          100
pipe_organ/QuickSort/count:1000                  15772 ns        15749 ns          100
pipe_organ/QuickSort2/count:1000                 11923 ns        11899 ns          100
pipe_organ/IntroSort/count:1000                   8073 ns         8059 ns          100
pipe_organ/QuickSort/count:10000                200075 ns       200011 ns          100
pipe_organ/QuickSort2/count:10000               153828 ns       153583 ns          100
pipe_organ/IntroSort/count:10000                121821 ns       121781 ns          100
pipe_organ/QuickSort/count:100000              2564621 ns      2558463 ns          100
pipe_organ/QuickSort2/count:100000             1958687 ns      1951100 ns          100
pipe_organ/IntroSort/count:100000              1516152 ns      1511432 ns          100
pipe_organ/QuickSort/count:1000000            30322479 ns     30281379 ns          100
pipe_organ/QuickSort2/count:1000000           23019458 ns     22963220 ns          100
pipe_organ/IntroSort/count:1000000            18033541 ns     17998660 ns          100
push_front/QuickSort/count:100                    1657 ns         1627 ns          100
push_front/QuickSort2/count:100                   1208 ns         1193 ns          100
push_front/IntroSort/count:100                    1173 ns         1150 ns          100
push_front/QuickSort/count:1000                  18850 ns        18774 ns          100
push_front/QuickSort2/count:1000                 13694 ns        13669 ns          100
push_front/IntroSort/count:1000                  15505 ns        15491 ns          100
push_front/QuickSort/count:10000                232070 ns       231695 ns          100
push_front/QuickSort2/count:10000               164370 ns       164085 ns          100
push_front/IntroSort/count:10000                223185 ns       223119 ns          100
push_front/QuickSort/count:100000              2786631 ns      2780806 ns          100
push_front/QuickSort2/count:100000             1981209 ns      1977728 ns          100
push_front/IntroSort/count:100000              2986261 ns      2979247 ns          100
push_front/QuickSort/count:1000000            32337658 ns     32237857 ns          100
push_front/QuickSort2/count:1000000           22995000 ns     22948793 ns          100
push_front/IntroSort/count:1000000            35586468 ns     35516490 ns          100
push_middle/QuickSort/count:100                   1445 ns         1416 ns          100
push_middle/QuickSort2/count:100                  1088 ns         1069 ns          100
push_middle/IntroSort/count:100                    893 ns          868 ns          100
push_middle/QuickSort/count:1000                 17087 ns        17063 ns          100
push_middle/QuickSort2/count:1000                27199 ns        12531 ns          100
push_middle/IntroSort/count:1000                 11300 ns        11284 ns          100
push_middle/QuickSort/count:10000               210365 ns       210271 ns          100
push_middle/QuickSort2/count:10000              152367 ns       152240 ns          100
push_middle/IntroSort/count:10000               166248 ns       166154 ns          100
push_middle/QuickSort/count:100000             2529640 ns      2526924 ns          100
push_middle/QuickSort2/count:100000            1844729 ns      1842635 ns          100
push_middle/IntroSort/count:100000             2160489 ns      2158998 ns          100
push_middle/QuickSort/count:1000000           30219881 ns     30136359 ns          100
push_middle/QuickSort2/count:1000000          21940150 ns     21867966 ns          100
push_middle/IntroSort/count:1000000           25940856 ns     25884546 ns          100

September 13, 2024

I am not commenting on benchmark results without seeing the code - I have seen too many flawed benchmarks in my life.

If you look into the implementation you see that it is using the most simple parallel quicksort imaginable - that means the first run over the entire array which arranges the elements according to the pivot (as in the TArray.Sort the most simple pivot selection - take the middle) runs single threaded.

Only then the processing of left and right is being passed to the PPL.

Anyone who has researched a bit into sorting algorithms in the past 20 years knows that a pure quicksort is hardly anything good enough to be used as general purpose sorting algo in a runtime - other languages using hybrids for years, .NET and some C++ runtimes use IntroSort, Python uses Timsort, so does Java for objects and some special version of QuickSort for primitive types, lately some languages such as Go adapted PDQSort as their default sorting algorithm.

I'd argue that some of these better algorithms running single-threaded outperform parallel quicksort on certain and not so uncommon data patterns.

September 13, 2024

Somehow I cannot believe that ZEOS did not properly support 64bit until now - you must have been using some old code

September 13, 2024

5 hours ago, Anders Melander said:

I would have expected the 500K test to have better throughput than the 5M and 1B tests since the 500K array can fit in the 24Mb cache while the 5M and 1B arrays cannot.

Quicksort with comparer calls compiled by a mediocre compiler operates far below L3 cache speed.

September 11, 2024

Of course - TDictionary<K,V> is hashtable based, so it needs GetHashCode and Equals

August 29, 2024

There should not be any temp, nor assign call tbh. Check out this code:

{$APPTYPE CONSOLE}

uses
  SysUtils;

type
  TMyRec = record
    x: Integer;
    class operator Initialize(out dest: TMyRec);
    class operator Finalize(var dest: TMyRec);
    class operator Assign(var left: TMyRec; const[ref] right: TMyRec);
  end;

function MyDefault: TMyRec;
begin
  Writeln('default - ', IntToHex(IntPtr(@Result)));
  Result.x := 0;
end;

class operator TMyRec.Initialize(out dest: TMyRec);
begin
  Writeln('init - ', IntToHex(IntPtr(@dest)));
end;

class operator TMyRec.Finalize(var dest: TMyRec);
begin
  Writeln('finalize - ', IntToHex(IntPtr(@dest)));
end;

class operator TMyRec.Assign(var left: TMyRec; const[ref] right: TMyRec);
begin
  Writeln('assign');
end;

procedure Main;
var
  r: TMyRec;
begin
  r := MyDefault;
end;

begin
  Main;
end.

Now we can argue why there is no assign call because we are assigning the result of the MyDefault function to r. But even though we have declared an assign operator it does what it always does with managed return type - passing it as hidden var parameter.

I said this before and I say it again - CMR are broken as designed and personally I stay the heck away from them.

I remember some years ago there was a question on SO about struct default ctors serving as default initializers and why C# does not have them and I don't remember if it was Jon or Eric explained that it would cause all kinds of trouble.

Edit: Ah, here it is: https://stackoverflow.com/a/333840/587106 - eventually they added them in C# 10 but there is quite some extensive language spec - when I asked for the CMR spec it was all crickets.

FWIW the Delphi implementation suffers from exactly the situation that Jon describes - allocate a dynamic array of some CMR and watch all the initializers run. Which is completely bonkers - C# 10 does not do any of that for array allocation - see https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/proposals/csharp-10.0/parameterless-struct-constructors#array-allocation

August 29, 2024

Already reported in 2021

August 23, 2024

you can generate a dpk with the help of System.SysUtils.GetPackageInfo

August 23, 2024

Recompile the RTL yourself using source\rtl\buildrtl.bat and generate map file - that's how I used to do it until Anders added jdbg support.

August 21, 2024

3 hours ago, Remy Lebeau said:

The local fib variable inside of the calling function, and the captured fib variable inside the anonymous method, are actually the same variable in memory.

There is no local variable - on the source code level, yes there is, but implementation wise they live inside the heap memory of the compiler generated instance.

That's why captured variables never appeared in the local variables view until recently when the kinda implemented support for it.

August 21, 2024

5 hours ago, silvercoder79 said:

the IfThen function, Delphi's equivalent to the C ternary operator.

omg, no!

5 hours ago, Anders Melander said:

Next up: How to use "with" to make your code more readable.

Followed by "How do I fix these random Access Violations that appear in my code"

August 21, 2024

To understand this, you need to understand how anonymous methods are implemented.

The compiler implements them by creating a class that inherits from TInterfacedObject. The anonymous method type is a special interface with one method called Invoke, which has the signature of the anonymous method type.

When you add ReportMemoryLeaksOnShutdown := True to the code above you will see that it reports one instance of a class called MakeFib$ActRec - that is the class name of the compiler-generated class for the anonymous method.

The class also contains fields for every captured variable - in this case the local variable fib is being captured with holds the reference to itself so this is the circular interface reference that causes the memory leak.

To demonstrate here is the code as the compiler implements it:

type
  MakeFibAnonClass = class(TInterfacedObject, TFunc<integer,int64>)
    fib: TFunc<integer,int64>;
    function Invoke(value: integer): int64;
  end;

function MakeFibAnonClass.Invoke(value: integer): int64;
begin
  if value < 3 then
    Result := 1
  else
    Result := fib(value - 1) + fib(value -2);
end;

function MakeFib_Impl: TFunc<integer,int64>;
var
  obj: MakeFibAnonClass;
begin
  obj := MakeFibAnonClass.Create;
  obj.fib := obj;
  Result := obj.fib;
end;

August 20, 2024

Convert<T> from TValueHelper in Spring. See https://bitbucket.org/sglienke/spring4d/src/2.0.1/Source/Base/Spring.pas#lines-651

August 14, 2024

get the source from SourceForge
open Projects\DelphiXx120\GExpertsRS120.dproj
compile
run Binaries\Register-GExperts-XX120.cmd
restart IDE

August 12, 2024

On 8/10/2024 at 10:54 AM, RDP1974 said:

maybe in rdpsimd64 can we rem this line?

which cpu do you test?

procedure Move2(const Source; var Dest; Count: NativeInt); inline;
begin
//if Count > 0 then //>>> useless checking?
SeaMove(@Source, @Dest, Count);
end;

I removed the check, works ok, thanks for the hint

You *must not* remove that line - Move in Delphi allowed to be called with negative Count (a possible result of some size calculation), resulting in a no-op in the System implementation. Passing a negative number to most C++ implementations will result in passing a value >2mio because their size parameter is unsigned.

Also, the performance difference is hardly about that little check but the ippsMove_8u implementation.

August 10, 2024

ippMove is not much faster than System.Move in Delphi 12. In fact, it performs noticeably worse on small sizes (below 1k) and for some reason completely falls off the cliff on sizes that are >20k running 2-4 times slower.

August 10, 2024

6 hours ago, David Heffernan said:

It is if you use a proper memory manager

Only on 64bit though - on 32bit the size of TDynArrayRec is 8 byte, if the memory manager getmem returns 16byte aligned memory that makes the elem[0] 8byte aligned.

This is why i said that the RTL has to take care of it - the same is the case for strings.

Relying only on the memory manager which is replaceable prevents the RTL from doing things that it could if certain data would have certain alignments. Using SSE for some string handling functions and such.

August 9, 2024

4 hours ago, Kas Ob. said:

Another point, it will help any new optimized memory copy functions to use the SIMD version without going through the first and unaligned 16 bytes, but for this it should be aligned to 16bytes to get the most of juice, meaning that record should be 32bytes.

That would be true if dynamic array allocation would be done in a way that ensures that element [0] would be at a 16 byte aligned address, which is not the case

August 9, 2024

16 hours ago, A.M. Hoornweg said:

My worry is, whenever I delete an element at the beginning, would the RTL just allocate a new block and copy the old array minus the first element to the new location?

Not exactly, it moves everything in the array to the front and then calls SetLength with the new length. What that does internally depends on the refcount of the array.

If it's 1, then it does a Realloc (shrinking in place, though depends on the memory manager what exactly happens, typically for a small downsize it does it in place, thus basically resulting in a no op)

If it's greater than 1 it allocates a new block and copies the content over - I would say calling SetLength inside of Delete is a bug because everything was already moved in place thus the behavior of SetLength is not to be desired in this case. In fact this leads to some corrupted copy lying around

Reported https://embt.atlassian.net/servicedesk/customer/portal/1/RSS-1532

Edit: apparently this is already known since 2020 - https://quality.embarcadero.com/browse/RSP-27870

July 31, 2024

18 hours ago, Brandon Staggs said:

But you can't just wave your hands and say "this is different" when it comes to language models being trained with publicly visible source code and people using it to figure out how to solve problems. What is your LEGAL BASIS for saying it is a license violation?

Because things like Clean room design exist to minimize the chances of any copyright infringement. There have been lawsuits over "similarly enough looking" code in the past.

Also, the simple fact that gen AI does not care about any license attribution which you would have to follow if you take any source code with one of the major permissive licenses that still require attribution.

July 29, 2024

It's really embarrassing to see how many people who work in software development don't know how generative AI works and are being fooled by it like an average TikTok teenager.

Sign In

Stefan Glienke

Content Count

Joined

Last visited

Days Won

Content Type

Profiles

Forums

Calendar

Posts posted by Stefan Glienke

TParallelArray Sort Performance...

TParallelArray Sort Performance...

TParallelArray Sort Performance...

TParallelArray Sort Performance...

TParallelArray Sort Performance...

TParallelArray Sort Performance...

Pointer casting with NativeUint

TParallelArray Sort Performance...

Records as TDictionary keys

Custom Managed Records and Default(T)

Custom Managed Records and Default(T)

MAP2PDB - Profiling with VTune

MAP2PDB - Profiling with VTune

Recursive anonymous functions

Watch me coding in Delphi on YouTube

Recursive anonymous functions

Methods to convert a string TValue to the desired simple type (Like Integer, Float, DateTime);

GExperts for Delphi 10.4.1

updated Delphi64RTL intel ipp onetbb

updated Delphi64RTL intel ipp onetbb

Dynamic array used as a queue. Memory fragmentation?

Dynamic array used as a queue. Memory fragmentation?

Dynamic array used as a queue. Memory fragmentation?

Devin AI - Is it already happening?

Devin AI - Is it already happening?

Browse

Activity