A bit of a challenge ... to produce Delphi code equaling the performance of C++'s next_permutation.
    Hopefully more readable in Delphi ...    unit D_Next_perm_string; interface procedure swapCh(var a:Ansichar; var b:Ansichar); procedure reverse(var s:AnsiString; const a,x:word); function D_next_permutation(var s:AnsiString; const first, end_:word) : boolean; implementation //uses System.Diagnostics; procedure swapCh( var a : Ansichar; var b : Ansichar ); inline; var c : Ansichar; begin if a<>b then begin c := a; a := b; b := c; end; end; procedure reverse(var s:AnsiString; const a,x:word); inline; var i,j : word; //t : string; begin // x is one past the end of string if a = x-1 then exit; j := ( x-a ) shr 1; //trunc((x-a)/2); for i := 1 to j do swapCh( s[a-1+i] , s[x-i] ); end; function D_next_permutation(var s : AnsiString; const first, end_ : word) : boolean; inline; var i, ii, j : word; begin if first = end_ then begin result := false; exit; end; i := first + 1; if i = end_ then begin result := false; exit; end; i := end_ - 1; while true do begin ii := i; dec(i); if s[i] < s[ii] then begin j := end_; repeat dec(j); until s[i] < s[j]; swapCh( s[i] , s[j] ); reverse(s, ii, end_); result := true; exit; end; if i = first then begin reverse(s, first, end_); result := false; exit; end; end; end; end. Ballpark, we're looking at about 6s vs 2s for s = "abcdefghijkl" (12 bytes).   Increases by about a factor of 10 per character beyond that.   Note the c++ end() iterator is **one past the end of string**, so ... D_next_permutation(s,1,13)