pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #define dSORTHINTS   SV *hintsv = GvSV(gv_fetchpv("sort::hints", GV_ADDMULTI, SVt_IV))
  40 #define SORTHINTS    (SvIOK(hintsv) ? ((I32)SvIV(hintsv)) : 0)
  41
  42 #ifndef SMALLSORT
  43 #define SMALLSORT (200)
  44 #endif
  45
  46 /*
  47  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  48  *
  49  * The original code was written in conjunction with BSD Computer Software
  50  * Research Group at University of California, Berkeley.
  51  *
  52  * See also: "Optimistic Merge Sort" (SODA '92)
  53  *
  54  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  55  *
  56  * The code can be distributed under the same terms as Perl itself.
  57  *
  58  */
  59
  60
  61 typedef char * aptr;            /* pointer for arithmetic on sizes */
  62 typedef SV * gptr;              /* pointers in our lists */
  63
  64 /* Binary merge internal sort, with a few special mods
  65 ** for the special perl environment it now finds itself in.
  66 **
  67 ** Things that were once options have been hotwired
  68 ** to values suitable for this use.  In particular, we'll always
  69 ** initialize looking for natural runs, we'll always produce stable
  70 ** output, and we'll always do Peter McIlroy's binary merge.
  71 */
  72
  73 /* Pointer types for arithmetic and storage and convenience casts */
  74
  75 #define APTR(P) ((aptr)(P))
  76 #define GPTP(P) ((gptr *)(P))
  77 #define GPPP(P) ((gptr **)(P))
  78
  79
  80 /* byte offset from pointer P to (larger) pointer Q */
  81 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  82
  83 #define PSIZE sizeof(gptr)
  84
  85 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  86
  87 #ifdef  PSHIFT
  88 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  89 #define PNBYTE(N)       ((N) << (PSHIFT))
  90 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  91 #else
  92 /* Leave optimization to compiler */
  93 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  94 #define PNBYTE(N)       ((N) * (PSIZE))
  95 #define PINDEX(P, N)    (GPTP(P) + (N))
  96 #endif
  97
  98 /* Pointer into other corresponding to pointer into this */
  99 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 100
 101 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 102
 103
 104 /* Runs are identified by a pointer in the auxilliary list.
 105 ** The pointer is at the start of the list,
 106 ** and it points to the start of the next list.
 107 ** NEXT is used as an lvalue, too.
 108 */
 109
 110 #define NEXT(P)         (*GPPP(P))
 111
 112
 113 /* PTHRESH is the minimum number of pairs with the same sense to justify
 114 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 115 ** not just elements, so PTHRESH == 8 means a run of 16.
 116 */
 117
 118 #define PTHRESH (8)
 119
 120 /* RTHRESH is the number of elements in a run that must compare low
 121 ** to the low element from the opposing run before we justify
 122 ** doing a binary rampup instead of single stepping.
 123 ** In random input, N in a row low should only happen with
 124 ** probability 2^(1-N), so we can risk that we are dealing
 125 ** with orderly input without paying much when we aren't.
 126 */
 127
 128 #define RTHRESH (6)
 129
 130
 131 /*
 132 ** Overview of algorithm and variables.
 133 ** The array of elements at list1 will be organized into runs of length 2,
 134 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 135 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 136 **
 137 ** Unless otherwise specified, pair pointers address the first of two elements.
 138 **
 139 ** b and b+1 are a pair that compare with sense "sense".
 140 ** b is the "bottom" of adjacent pairs that might form a longer run.
 141 **
 142 ** p2 parallels b in the list2 array, where runs are defined by
 143 ** a pointer chain.
 144 **
 145 ** t represents the "top" of the adjacent pairs that might extend
 146 ** the run beginning at b.  Usually, t addresses a pair
 147 ** that compares with opposite sense from (b,b+1).
 148 ** However, it may also address a singleton element at the end of list1,
 149 ** or it may be equal to "last", the first element beyond list1.
 150 **
 151 ** r addresses the Nth pair following b.  If this would be beyond t,
 152 ** we back it off to t.  Only when r is less than t do we consider the
 153 ** run long enough to consider checking.
 154 **
 155 ** q addresses a pair such that the pairs at b through q already form a run.
 156 ** Often, q will equal b, indicating we only are sure of the pair itself.
 157 ** However, a search on the previous cycle may have revealed a longer run,
 158 ** so q may be greater than b.
 159 **
 160 ** p is used to work back from a candidate r, trying to reach q,
 161 ** which would mean b through r would be a run.  If we discover such a run,
 162 ** we start q at r and try to push it further towards t.
 163 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 164 ** In any event, after the check (if any), we have two main cases.
 165 **
 166 ** 1) Short run.  b <= q < p <= r <= t.
 167 **      b through q is a run (perhaps trivial)
 168 **      q through p are uninteresting pairs
 169 **      p through r is a run
 170 **
 171 ** 2) Long run.  b < r <= q < t.
 172 **      b through q is a run (of length >= 2 * PTHRESH)
 173 **
 174 ** Note that degenerate cases are not only possible, but likely.
 175 ** For example, if the pair following b compares with opposite sense,
 176 ** then b == q < p == r == t.
 177 */
 178
 179
 180 static IV
 181 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 182 {
 183     I32 sense;
 184     register gptr *b, *p, *q, *t, *p2;
 185     register gptr *last, *r;
 186     IV runs = 0;
 187
 188     b = list1;
 189     last = PINDEX(b, nmemb);
 190     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 191     for (p2 = list2; b < last; ) {
 192         /* We just started, or just reversed sense.
 193         ** Set t at end of pairs with the prevailing sense.
 194         */
 195         for (p = b+2, t = p; ++p < last; t = ++p) {
 196             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 197         }
 198         q = b;
 199         /* Having laid out the playing field, look for long runs */
 200         do {
 201             p = r = b + (2 * PTHRESH);
 202             if (r >= t) p = r = t;      /* too short to care about */
 203             else {
 204                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 205                        ((p -= 2) > q));
 206                 if (p <= q) {
 207                     /* b through r is a (long) run.
 208                     ** Extend it as far as possible.
 209                     */
 210                     p = q = r;
 211                     while (((p += 2) < t) &&
 212                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 213                     r = p = q + 2;      /* no simple pairs, no after-run */
 214                 }
 215             }
 216             if (q > b) {                /* run of greater than 2 at b */
 217                 gptr *savep = p;
 218
 219                 p = q += 2;
 220                 /* pick up singleton, if possible */
 221                 if ((p == t) &&
 222                     ((t + 1) == last) &&
 223                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 224                     savep = r = p = q = last;
 225                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 226                 if (sense)
 227                     while (b < --p) {
 228                         const gptr c = *b;
 229                         *b++ = *p;
 230                         *p = c;
 231                     }
 232                 p = savep;
 233             }
 234             while (q < p) {             /* simple pairs */
 235                 p2 = NEXT(p2) = p2 + 2; ++runs;
 236                 if (sense) {
 237                     const gptr c = *q++;
 238                     *(q-1) = *q;
 239                     *q++ = c;
 240                 } else q += 2;
 241             }
 242             if (((b = p) == t) && ((t+1) == last)) {
 243                 NEXT(p2) = p2 + 1; ++runs;
 244                 b++;
 245             }
 246             q = r;
 247         } while (b < t);
 248         sense = !sense;
 249     }
 250     return runs;
 251 }
 252
 253
 254 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 255  * qsort on many platforms, but slower than qsort, conspicuously so,
 256  * on others.  The most likely explanation was platform-specific
 257  * differences in cache sizes and relative speeds.
 258  *
 259  * The quicksort divide-and-conquer algorithm guarantees that, as the
 260  * problem is subdivided into smaller and smaller parts, the parts
 261  * fit into smaller (and faster) caches.  So it doesn't matter how
 262  * many levels of cache exist, quicksort will "find" them, and,
 263  * as long as smaller is faster, take advantage of them.
 264  *
 265  * By contrast, consider how the original mergesort algorithm worked.
 266  * Suppose we have five runs (each typically of length 2 after dynprep).
 267  *
 268  * pass               base                        aux
 269  *  0              1 2 3 4 5
 270  *  1                                           12 34 5
 271  *  2                1234 5
 272  *  3                                            12345
 273  *  4                 12345
 274  *
 275  * Adjacent pairs are merged in "grand sweeps" through the input.
 276  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 277  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 278  * The only cache that matters is one large enough to hold *all* the input.
 279  * On some platforms, this may be many times slower than smaller caches.
 280  *
 281  * The following pseudo-code uses the same basic merge algorithm,
 282  * but in a divide-and-conquer way.
 283  *
 284  * # merge $runs runs at offset $offset of list $list1 into $list2.
 285  * # all unmerged runs ($runs == 1) originate in list $base.
 286  * sub mgsort2 {
 287  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 288  *
 289  *     if ($runs == 1) {
 290  *         if ($list1 is $base) copy run to $list2
 291  *         return offset of end of list (or copy)
 292  *     } else {
 293  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 294  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 295  *         merge the adjacent runs at $offset of $list1 into $list2
 296  *         return the offset of the end of the merged runs
 297  *     }
 298  * }
 299  * mgsort2(0, $runs, $base, $aux, $base);
 300  *
 301  * For our 5 runs, the tree of calls looks like
 302  *
 303  *           5
 304  *      3        2
 305  *   2     1   1   1
 306  * 1   1
 307  *
 308  * 1   2   3   4   5
 309  *
 310  * and the corresponding activity looks like
 311  *
 312  * copy runs 1 and 2 from base to aux
 313  * merge runs 1 and 2 from aux to base
 314  * (run 3 is where it belongs, no copy needed)
 315  * merge runs 12 and 3 from base to aux
 316  * (runs 4 and 5 are where they belong, no copy needed)
 317  * merge runs 4 and 5 from base to aux
 318  * merge runs 123 and 45 from aux to base
 319  *
 320  * Note that we merge runs 1 and 2 immediately after copying them,
 321  * while they are still likely to be in fast cache.  Similarly,
 322  * run 3 is merged with run 12 while it still may be lingering in cache.
 323  * This implementation should therefore enjoy much of the cache-friendly
 324  * behavior that quicksort does.  In addition, it does less copying
 325  * than the original mergesort implementation (only runs 1 and 2 are copied)
 326  * and the "balancing" of merges is better (merged runs comprise more nearly
 327  * equal numbers of original runs).
 328  *
 329  * The actual cache-friendly implementation will use a pseudo-stack
 330  * to avoid recursion, and will unroll processing of runs of length 2,
 331  * but it is otherwise similar to the recursive implementation.
 332  */
 333
 334 typedef struct {
 335     IV  offset;         /* offset of 1st of 2 runs at this level */
 336     IV  runs;           /* how many runs must be combined into 1 */
 337 } off_runs;             /* pseudo-stack element */
 338
 339
 340 static I32
 341 cmp_desc(pTHX_ gptr a, gptr b)
 342 {
 343     return -PL_sort_RealCmp(aTHX_ a, b);
 344 }
 345
 346 STATIC void
 347 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 348 {
 349     IV i, run, offset;
 350     I32 sense, level;
 351     register gptr *f1, *f2, *t, *b, *p;
 352     int iwhich;
 353     gptr *aux;
 354     gptr *p1;
 355     gptr small[SMALLSORT];
 356     gptr *which[3];
 357     off_runs stack[60], *stackp;
 358     SVCOMPARE_t savecmp = NULL;
 359
 360     if (nmemb <= 1) return;                     /* sorted trivially */
 361
 362     if (flags) {
 363         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 364         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 365         cmp = cmp_desc;
 366     }
 367
 368     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 369     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 370     level = 0;
 371     stackp = stack;
 372     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 373     stackp->offset = offset = 0;
 374     which[0] = which[2] = base;
 375     which[1] = aux;
 376     for (;;) {
 377         /* On levels where both runs have be constructed (stackp->runs == 0),
 378          * merge them, and note the offset of their end, in case the offset
 379          * is needed at the next level up.  Hop up a level, and,
 380          * as long as stackp->runs is 0, keep merging.
 381          */
 382         IV runs = stackp->runs;
 383         if (runs == 0) {
 384             gptr *list1, *list2;
 385             iwhich = level & 1;
 386             list1 = which[iwhich];              /* area where runs are now */
 387             list2 = which[++iwhich];            /* area for merged runs */
 388             do {
 389                 register gptr *l1, *l2, *tp2;
 390                 offset = stackp->offset;
 391                 f1 = p1 = list1 + offset;               /* start of first run */
 392                 p = tp2 = list2 + offset;       /* where merged run will go */
 393                 t = NEXT(p);                    /* where first run ends */
 394                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 395                 t = NEXT(t);                    /* where second runs ends */
 396                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 397                 offset = PNELEM(list2, t);
 398                 while (f1 < l1 && f2 < l2) {
 399                     /* If head 1 is larger than head 2, find ALL the elements
 400                     ** in list 2 strictly less than head1, write them all,
 401                     ** then head 1.  Then compare the new heads, and repeat,
 402                     ** until one or both lists are exhausted.
 403                     **
 404                     ** In all comparisons (after establishing
 405                     ** which head to merge) the item to merge
 406                     ** (at pointer q) is the first operand of
 407                     ** the comparison.  When we want to know
 408                     ** if "q is strictly less than the other",
 409                     ** we can't just do
 410                     **    cmp(q, other) < 0
 411                     ** because stability demands that we treat equality
 412                     ** as high when q comes from l2, and as low when
 413                     ** q was from l1.  So we ask the question by doing
 414                     **    cmp(q, other) <= sense
 415                     ** and make sense == 0 when equality should look low,
 416                     ** and -1 when equality should look high.
 417                     */
 418
 419                     register gptr *q;
 420                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 421                         q = f2; b = f1; t = l1;
 422                         sense = -1;
 423                     } else {
 424                         q = f1; b = f2; t = l2;
 425                         sense = 0;
 426                     }
 427
 428
 429                     /* ramp up
 430                     **
 431                     ** Leave t at something strictly
 432                     ** greater than q (or at the end of the list),
 433                     ** and b at something strictly less than q.
 434                     */
 435                     for (i = 1, run = 0 ;;) {
 436                         if ((p = PINDEX(b, i)) >= t) {
 437                             /* off the end */
 438                             if (((p = PINDEX(t, -1)) > b) &&
 439                                 (cmp(aTHX_ *q, *p) <= sense))
 440                                  t = p;
 441                             else b = p;
 442                             break;
 443                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 444                             t = p;
 445                             break;
 446                         } else b = p;
 447                         if (++run >= RTHRESH) i += i;
 448                     }
 449
 450
 451                     /* q is known to follow b and must be inserted before t.
 452                     ** Increment b, so the range of possibilities is [b,t).
 453                     ** Round binary split down, to favor early appearance.
 454                     ** Adjust b and t until q belongs just before t.
 455                     */
 456
 457                     b++;
 458                     while (b < t) {
 459                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 460                         if (cmp(aTHX_ *q, *p) <= sense) {
 461                             t = p;
 462                         } else b = p + 1;
 463                     }
 464
 465
 466                     /* Copy all the strictly low elements */
 467
 468                     if (q == f1) {
 469                         FROMTOUPTO(f2, tp2, t);
 470                         *tp2++ = *f1++;
 471                     } else {
 472                         FROMTOUPTO(f1, tp2, t);
 473                         *tp2++ = *f2++;
 474                     }
 475                 }
 476
 477
 478                 /* Run out remaining list */
 479                 if (f1 == l1) {
 480                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 481                 } else              FROMTOUPTO(f1, tp2, l1);
 482                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 483
 484                 if (--level == 0) goto done;
 485                 --stackp;
 486                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 487             } while ((runs = stackp->runs) == 0);
 488         }
 489
 490
 491         stackp->runs = 0;               /* current run will finish level */
 492         /* While there are more than 2 runs remaining,
 493          * turn them into exactly 2 runs (at the "other" level),
 494          * each made up of approximately half the runs.
 495          * Stack the second half for later processing,
 496          * and set about producing the first half now.
 497          */
 498         while (runs > 2) {
 499             ++level;
 500             ++stackp;
 501             stackp->offset = offset;
 502             runs -= stackp->runs = runs / 2;
 503         }
 504         /* We must construct a single run from 1 or 2 runs.
 505          * All the original runs are in which[0] == base.
 506          * The run we construct must end up in which[level&1].
 507          */
 508         iwhich = level & 1;
 509         if (runs == 1) {
 510             /* Constructing a single run from a single run.
 511              * If it's where it belongs already, there's nothing to do.
 512              * Otherwise, copy it to where it belongs.
 513              * A run of 1 is either a singleton at level 0,
 514              * or the second half of a split 3.  In neither event
 515              * is it necessary to set offset.  It will be set by the merge
 516              * that immediately follows.
 517              */
 518             if (iwhich) {       /* Belongs in aux, currently in base */
 519                 f1 = b = PINDEX(base, offset);  /* where list starts */
 520                 f2 = PINDEX(aux, offset);       /* where list goes */
 521                 t = NEXT(f2);                   /* where list will end */
 522                 offset = PNELEM(aux, t);        /* offset thereof */
 523                 t = PINDEX(base, offset);       /* where it currently ends */
 524                 FROMTOUPTO(f1, f2, t);          /* copy */
 525                 NEXT(b) = t;                    /* set up parallel pointer */
 526             } else if (level == 0) goto done;   /* single run at level 0 */
 527         } else {
 528             /* Constructing a single run from two runs.
 529              * The merge code at the top will do that.
 530              * We need only make sure the two runs are in the "other" array,
 531              * so they'll end up in the correct array after the merge.
 532              */
 533             ++level;
 534             ++stackp;
 535             stackp->offset = offset;
 536             stackp->runs = 0;   /* take care of both runs, trigger merge */
 537             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 538                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 539                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 540                 t = NEXT(f2);                   /* where first run will end */
 541                 offset = PNELEM(aux, t);        /* offset thereof */
 542                 p = PINDEX(base, offset);       /* end of first run */
 543                 t = NEXT(t);                    /* where second run will end */
 544                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 545                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 546                 NEXT(b) = p;                    /* paralled pointer for 1st */
 547                 NEXT(p) = t;                    /* ... and for second */
 548             }
 549         }
 550     }
 551 done:
 552     if (aux != small) Safefree(aux);    /* free iff allocated */
 553     if (flags) {
 554          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 555     }
 556     return;
 557 }
 558
 559 /*
 560  * The quicksort implementation was derived from source code contributed
 561  * by Tom Horsley.
 562  *
 563  * NOTE: this code was derived from Tom Horsley's qsort replacement
 564  * and should not be confused with the original code.
 565  */
 566
 567 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 568
 569    Permission granted to distribute under the same terms as perl which are
 570    (briefly):
 571
 572     This program is free software; you can redistribute it and/or modify
 573     it under the terms of either:
 574
 575         a) the GNU General Public License as published by the Free
 576         Software Foundation; either version 1, or (at your option) any
 577         later version, or
 578
 579         b) the "Artistic License" which comes with this Kit.
 580
 581    Details on the perl license can be found in the perl source code which
 582    may be located via the www.perl.com web page.
 583
 584    This is the most wonderfulest possible qsort I can come up with (and
 585    still be mostly portable) My (limited) tests indicate it consistently
 586    does about 20% fewer calls to compare than does the qsort in the Visual
 587    C++ library, other vendors may vary.
 588
 589    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 590    others I invented myself (or more likely re-invented since they seemed
 591    pretty obvious once I watched the algorithm operate for a while).
 592
 593    Most of this code was written while watching the Marlins sweep the Giants
 594    in the 1997 National League Playoffs - no Braves fans allowed to use this
 595    code (just kidding :-).
 596
 597    I realize that if I wanted to be true to the perl tradition, the only
 598    comment in this file would be something like:
 599
 600    ...they shuffled back towards the rear of the line. 'No, not at the
 601    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 602
 603    However, I really needed to violate that tradition just so I could keep
 604    track of what happens myself, not to mention some poor fool trying to
 605    understand this years from now :-).
 606 */
 607
 608 /* ********************************************************** Configuration */
 609
 610 #ifndef QSORT_ORDER_GUESS
 611 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 612 #endif
 613
 614 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 615    future processing - a good max upper bound is log base 2 of memory size
 616    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 617    safely be smaller than that since the program is taking up some space and
 618    most operating systems only let you grab some subset of contiguous
 619    memory (not to mention that you are normally sorting data larger than
 620    1 byte element size :-).
 621 */
 622 #ifndef QSORT_MAX_STACK
 623 #define QSORT_MAX_STACK 32
 624 #endif
 625
 626 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 627    Anything bigger and we use qsort. If you make this too small, the qsort
 628    will probably break (or become less efficient), because it doesn't expect
 629    the middle element of a partition to be the same as the right or left -
 630    you have been warned).
 631 */
 632 #ifndef QSORT_BREAK_EVEN
 633 #define QSORT_BREAK_EVEN 6
 634 #endif
 635
 636 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 637    to go quadratic on.  We innoculate larger partitions against
 638    quadratic behavior by shuffling them before sorting.  This is not
 639    an absolute guarantee of non-quadratic behavior, but it would take
 640    staggeringly bad luck to pick extreme elements as the pivot
 641    from randomized data.
 642 */
 643 #ifndef QSORT_PLAY_SAFE
 644 #define QSORT_PLAY_SAFE 255
 645 #endif
 646
 647 /* ************************************************************* Data Types */
 648
 649 /* hold left and right index values of a partition waiting to be sorted (the
 650    partition includes both left and right - right is NOT one past the end or
 651    anything like that).
 652 */
 653 struct partition_stack_entry {
 654    int left;
 655    int right;
 656 #ifdef QSORT_ORDER_GUESS
 657    int qsort_break_even;
 658 #endif
 659 };
 660
 661 /* ******************************************************* Shorthand Macros */
 662
 663 /* Note that these macros will be used from inside the qsort function where
 664    we happen to know that the variable 'elt_size' contains the size of an
 665    array element and the variable 'temp' points to enough space to hold a
 666    temp element and the variable 'array' points to the array being sorted
 667    and 'compare' is the pointer to the compare routine.
 668
 669    Also note that there are very many highly architecture specific ways
 670    these might be sped up, but this is simply the most generally portable
 671    code I could think of.
 672 */
 673
 674 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 675 */
 676 #define qsort_cmp(elt1, elt2) \
 677    ((*compare)(aTHX_ array[elt1], array[elt2]))
 678
 679 #ifdef QSORT_ORDER_GUESS
 680 #define QSORT_NOTICE_SWAP swapped++;
 681 #else
 682 #define QSORT_NOTICE_SWAP
 683 #endif
 684
 685 /* swaps contents of array elements elt1, elt2.
 686 */
 687 #define qsort_swap(elt1, elt2) \
 688    STMT_START { \
 689       QSORT_NOTICE_SWAP \
 690       temp = array[elt1]; \
 691       array[elt1] = array[elt2]; \
 692       array[elt2] = temp; \
 693    } STMT_END
 694
 695 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 696    elt3 and elt3 gets elt1.
 697 */
 698 #define qsort_rotate(elt1, elt2, elt3) \
 699    STMT_START { \
 700       QSORT_NOTICE_SWAP \
 701       temp = array[elt1]; \
 702       array[elt1] = array[elt2]; \
 703       array[elt2] = array[elt3]; \
 704       array[elt3] = temp; \
 705    } STMT_END
 706
 707 /* ************************************************************ Debug stuff */
 708
 709 #ifdef QSORT_DEBUG
 710
 711 static void
 712 break_here()
 713 {
 714    return; /* good place to set a breakpoint */
 715 }
 716
 717 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 718
 719 static void
 720 doqsort_all_asserts(
 721    void * array,
 722    size_t num_elts,
 723    size_t elt_size,
 724    int (*compare)(const void * elt1, const void * elt2),
 725    int pc_left, int pc_right, int u_left, int u_right)
 726 {
 727    int i;
 728
 729    qsort_assert(pc_left <= pc_right);
 730    qsort_assert(u_right < pc_left);
 731    qsort_assert(pc_right < u_left);
 732    for (i = u_right + 1; i < pc_left; ++i) {
 733       qsort_assert(qsort_cmp(i, pc_left) < 0);
 734    }
 735    for (i = pc_left; i < pc_right; ++i) {
 736       qsort_assert(qsort_cmp(i, pc_right) == 0);
 737    }
 738    for (i = pc_right + 1; i < u_left; ++i) {
 739       qsort_assert(qsort_cmp(pc_right, i) < 0);
 740    }
 741 }
 742
 743 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 744    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 745                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 746
 747 #else
 748
 749 #define qsort_assert(t) ((void)0)
 750
 751 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 752
 753 #endif
 754
 755 /* ****************************************************************** qsort */
 756
 757 STATIC void /* the standard unstable (u) quicksort (qsort) */
 758 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 759 {
 760    register SV * temp;
 761
 762    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 763    int next_stack_entry = 0;
 764
 765    int part_left;
 766    int part_right;
 767 #ifdef QSORT_ORDER_GUESS
 768    int qsort_break_even;
 769    int swapped;
 770 #endif
 771
 772    /* Make sure we actually have work to do.
 773    */
 774    if (num_elts <= 1) {
 775       return;
 776    }
 777
 778    /* Innoculate large partitions against quadratic behavior */
 779    if (num_elts > QSORT_PLAY_SAFE) {
 780       register size_t n;
 781       register SV ** const q = array;
 782       for (n = num_elts; n > 1; ) {
 783          register const size_t j = (size_t)(n-- * Drand01());
 784          temp = q[j];
 785          q[j] = q[n];
 786          q[n] = temp;
 787       }
 788    }
 789
 790    /* Setup the initial partition definition and fall into the sorting loop
 791    */
 792    part_left = 0;
 793    part_right = (int)(num_elts - 1);
 794 #ifdef QSORT_ORDER_GUESS
 795    qsort_break_even = QSORT_BREAK_EVEN;
 796 #else
 797 #define qsort_break_even QSORT_BREAK_EVEN
 798 #endif
 799    for ( ; ; ) {
 800       if ((part_right - part_left) >= qsort_break_even) {
 801          /* OK, this is gonna get hairy, so lets try to document all the
 802             concepts and abbreviations and variables and what they keep
 803             track of:
 804
 805             pc: pivot chunk - the set of array elements we accumulate in the
 806                 middle of the partition, all equal in value to the original
 807                 pivot element selected. The pc is defined by:
 808
 809                 pc_left - the leftmost array index of the pc
 810                 pc_right - the rightmost array index of the pc
 811
 812                 we start with pc_left == pc_right and only one element
 813                 in the pivot chunk (but it can grow during the scan).
 814
 815             u:  uncompared elements - the set of elements in the partition
 816                 we have not yet compared to the pivot value. There are two
 817                 uncompared sets during the scan - one to the left of the pc
 818                 and one to the right.
 819
 820                 u_right - the rightmost index of the left side's uncompared set
 821                 u_left - the leftmost index of the right side's uncompared set
 822
 823                 The leftmost index of the left sides's uncompared set
 824                 doesn't need its own variable because it is always defined
 825                 by the leftmost edge of the whole partition (part_left). The
 826                 same goes for the rightmost edge of the right partition
 827                 (part_right).
 828
 829                 We know there are no uncompared elements on the left once we
 830                 get u_right < part_left and no uncompared elements on the
 831                 right once u_left > part_right. When both these conditions
 832                 are met, we have completed the scan of the partition.
 833
 834                 Any elements which are between the pivot chunk and the
 835                 uncompared elements should be less than the pivot value on
 836                 the left side and greater than the pivot value on the right
 837                 side (in fact, the goal of the whole algorithm is to arrange
 838                 for that to be true and make the groups of less-than and
 839                 greater-then elements into new partitions to sort again).
 840
 841             As you marvel at the complexity of the code and wonder why it
 842             has to be so confusing. Consider some of the things this level
 843             of confusion brings:
 844
 845             Once I do a compare, I squeeze every ounce of juice out of it. I
 846             never do compare calls I don't have to do, and I certainly never
 847             do redundant calls.
 848
 849             I also never swap any elements unless I can prove there is a
 850             good reason. Many sort algorithms will swap a known value with
 851             an uncompared value just to get things in the right place (or
 852             avoid complexity :-), but that uncompared value, once it gets
 853             compared, may then have to be swapped again. A lot of the
 854             complexity of this code is due to the fact that it never swaps
 855             anything except compared values, and it only swaps them when the
 856             compare shows they are out of position.
 857          */
 858          int pc_left, pc_right;
 859          int u_right, u_left;
 860
 861          int s;
 862
 863          pc_left = ((part_left + part_right) / 2);
 864          pc_right = pc_left;
 865          u_right = pc_left - 1;
 866          u_left = pc_right + 1;
 867
 868          /* Qsort works best when the pivot value is also the median value
 869             in the partition (unfortunately you can't find the median value
 870             without first sorting :-), so to give the algorithm a helping
 871             hand, we pick 3 elements and sort them and use the median value
 872             of that tiny set as the pivot value.
 873
 874             Some versions of qsort like to use the left middle and right as
 875             the 3 elements to sort so they can insure the ends of the
 876             partition will contain values which will stop the scan in the
 877             compare loop, but when you have to call an arbitrarily complex
 878             routine to do a compare, its really better to just keep track of
 879             array index values to know when you hit the edge of the
 880             partition and avoid the extra compare. An even better reason to
 881             avoid using a compare call is the fact that you can drop off the
 882             edge of the array if someone foolishly provides you with an
 883             unstable compare function that doesn't always provide consistent
 884             results.
 885
 886             So, since it is simpler for us to compare the three adjacent
 887             elements in the middle of the partition, those are the ones we
 888             pick here (conveniently pointed at by u_right, pc_left, and
 889             u_left). The values of the left, center, and right elements
 890             are refered to as l c and r in the following comments.
 891          */
 892
 893 #ifdef QSORT_ORDER_GUESS
 894          swapped = 0;
 895 #endif
 896          s = qsort_cmp(u_right, pc_left);
 897          if (s < 0) {
 898             /* l < c */
 899             s = qsort_cmp(pc_left, u_left);
 900             /* if l < c, c < r - already in order - nothing to do */
 901             if (s == 0) {
 902                /* l < c, c == r - already in order, pc grows */
 903                ++pc_right;
 904                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 905             } else if (s > 0) {
 906                /* l < c, c > r - need to know more */
 907                s = qsort_cmp(u_right, u_left);
 908                if (s < 0) {
 909                   /* l < c, c > r, l < r - swap c & r to get ordered */
 910                   qsort_swap(pc_left, u_left);
 911                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 912                } else if (s == 0) {
 913                   /* l < c, c > r, l == r - swap c&r, grow pc */
 914                   qsort_swap(pc_left, u_left);
 915                   --pc_left;
 916                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 917                } else {
 918                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 919                   qsort_rotate(pc_left, u_right, u_left);
 920                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 921                }
 922             }
 923          } else if (s == 0) {
 924             /* l == c */
 925             s = qsort_cmp(pc_left, u_left);
 926             if (s < 0) {
 927                /* l == c, c < r - already in order, grow pc */
 928                --pc_left;
 929                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 930             } else if (s == 0) {
 931                /* l == c, c == r - already in order, grow pc both ways */
 932                --pc_left;
 933                ++pc_right;
 934                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 935             } else {
 936                /* l == c, c > r - swap l & r, grow pc */
 937                qsort_swap(u_right, u_left);
 938                ++pc_right;
 939                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 940             }
 941          } else {
 942             /* l > c */
 943             s = qsort_cmp(pc_left, u_left);
 944             if (s < 0) {
 945                /* l > c, c < r - need to know more */
 946                s = qsort_cmp(u_right, u_left);
 947                if (s < 0) {
 948                   /* l > c, c < r, l < r - swap l & c to get ordered */
 949                   qsort_swap(u_right, pc_left);
 950                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 951                } else if (s == 0) {
 952                   /* l > c, c < r, l == r - swap l & c, grow pc */
 953                   qsort_swap(u_right, pc_left);
 954                   ++pc_right;
 955                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 956                } else {
 957                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 958                   qsort_rotate(u_right, pc_left, u_left);
 959                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 960                }
 961             } else if (s == 0) {
 962                /* l > c, c == r - swap ends, grow pc */
 963                qsort_swap(u_right, u_left);
 964                --pc_left;
 965                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 966             } else {
 967                /* l > c, c > r - swap ends to get in order */
 968                qsort_swap(u_right, u_left);
 969                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 970             }
 971          }
 972          /* We now know the 3 middle elements have been compared and
 973             arranged in the desired order, so we can shrink the uncompared
 974             sets on both sides
 975          */
 976          --u_right;
 977          ++u_left;
 978          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 979
 980          /* The above massive nested if was the simple part :-). We now have
 981             the middle 3 elements ordered and we need to scan through the
 982             uncompared sets on either side, swapping elements that are on
 983             the wrong side or simply shuffling equal elements around to get
 984             all equal elements into the pivot chunk.
 985          */
 986
 987          for ( ; ; ) {
 988             int still_work_on_left;
 989             int still_work_on_right;
 990
 991             /* Scan the uncompared values on the left. If I find a value
 992                equal to the pivot value, move it over so it is adjacent to
 993                the pivot chunk and expand the pivot chunk. If I find a value
 994                less than the pivot value, then just leave it - its already
 995                on the correct side of the partition. If I find a greater
 996                value, then stop the scan.
 997             */
 998             while ((still_work_on_left = (u_right >= part_left))) {
 999                s = qsort_cmp(u_right, pc_left);
1000                if (s < 0) {
1001                   --u_right;
1002                } else if (s == 0) {
1003                   --pc_left;
1004                   if (pc_left != u_right) {
1005                      qsort_swap(u_right, pc_left);
1006                   }
1007                   --u_right;
1008                } else {
1009                   break;
1010                }
1011                qsort_assert(u_right < pc_left);
1012                qsort_assert(pc_left <= pc_right);
1013                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1014                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1015             }
1016
1017             /* Do a mirror image scan of uncompared values on the right
1018             */
1019             while ((still_work_on_right = (u_left <= part_right))) {
1020                s = qsort_cmp(pc_right, u_left);
1021                if (s < 0) {
1022                   ++u_left;
1023                } else if (s == 0) {
1024                   ++pc_right;
1025                   if (pc_right != u_left) {
1026                      qsort_swap(pc_right, u_left);
1027                   }
1028                   ++u_left;
1029                } else {
1030                   break;
1031                }
1032                qsort_assert(u_left > pc_right);
1033                qsort_assert(pc_left <= pc_right);
1034                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1035                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1036             }
1037
1038             if (still_work_on_left) {
1039                /* I know I have a value on the left side which needs to be
1040                   on the right side, but I need to know more to decide
1041                   exactly the best thing to do with it.
1042                */
1043                if (still_work_on_right) {
1044                   /* I know I have values on both side which are out of
1045                      position. This is a big win because I kill two birds
1046                      with one swap (so to speak). I can advance the
1047                      uncompared pointers on both sides after swapping both
1048                      of them into the right place.
1049                   */
1050                   qsort_swap(u_right, u_left);
1051                   --u_right;
1052                   ++u_left;
1053                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1054                } else {
1055                   /* I have an out of position value on the left, but the
1056                      right is fully scanned, so I "slide" the pivot chunk
1057                      and any less-than values left one to make room for the
1058                      greater value over on the right. If the out of position
1059                      value is immediately adjacent to the pivot chunk (there
1060                      are no less-than values), I can do that with a swap,
1061                      otherwise, I have to rotate one of the less than values
1062                      into the former position of the out of position value
1063                      and the right end of the pivot chunk into the left end
1064                      (got all that?).
1065                   */
1066                   --pc_left;
1067                   if (pc_left == u_right) {
1068                      qsort_swap(u_right, pc_right);
1069                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1070                   } else {
1071                      qsort_rotate(u_right, pc_left, pc_right);
1072                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1073                   }
1074                   --pc_right;
1075                   --u_right;
1076                }
1077             } else if (still_work_on_right) {
1078                /* Mirror image of complex case above: I have an out of
1079                   position value on the right, but the left is fully
1080                   scanned, so I need to shuffle things around to make room
1081                   for the right value on the left.
1082                */
1083                ++pc_right;
1084                if (pc_right == u_left) {
1085                   qsort_swap(u_left, pc_left);
1086                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1087                } else {
1088                   qsort_rotate(pc_right, pc_left, u_left);
1089                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1090                }
1091                ++pc_left;
1092                ++u_left;
1093             } else {
1094                /* No more scanning required on either side of partition,
1095                   break out of loop and figure out next set of partitions
1096                */
1097                break;
1098             }
1099          }
1100
1101          /* The elements in the pivot chunk are now in the right place. They
1102             will never move or be compared again. All I have to do is decide
1103             what to do with the stuff to the left and right of the pivot
1104             chunk.
1105
1106             Notes on the QSORT_ORDER_GUESS ifdef code:
1107
1108             1. If I just built these partitions without swapping any (or
1109                very many) elements, there is a chance that the elements are
1110                already ordered properly (being properly ordered will
1111                certainly result in no swapping, but the converse can't be
1112                proved :-).
1113
1114             2. A (properly written) insertion sort will run faster on
1115                already ordered data than qsort will.
1116
1117             3. Perhaps there is some way to make a good guess about
1118                switching to an insertion sort earlier than partition size 6
1119                (for instance - we could save the partition size on the stack
1120                and increase the size each time we find we didn't swap, thus
1121                switching to insertion sort earlier for partitions with a
1122                history of not swapping).
1123
1124             4. Naturally, if I just switch right away, it will make
1125                artificial benchmarks with pure ascending (or descending)
1126                data look really good, but is that a good reason in general?
1127                Hard to say...
1128          */
1129
1130 #ifdef QSORT_ORDER_GUESS
1131          if (swapped < 3) {
1132 #if QSORT_ORDER_GUESS == 1
1133             qsort_break_even = (part_right - part_left) + 1;
1134 #endif
1135 #if QSORT_ORDER_GUESS == 2
1136             qsort_break_even *= 2;
1137 #endif
1138 #if QSORT_ORDER_GUESS == 3
1139             const int prev_break = qsort_break_even;
1140             qsort_break_even *= qsort_break_even;
1141             if (qsort_break_even < prev_break) {
1142                qsort_break_even = (part_right - part_left) + 1;
1143             }
1144 #endif
1145          } else {
1146             qsort_break_even = QSORT_BREAK_EVEN;
1147          }
1148 #endif
1149
1150          if (part_left < pc_left) {
1151             /* There are elements on the left which need more processing.
1152                Check the right as well before deciding what to do.
1153             */
1154             if (pc_right < part_right) {
1155                /* We have two partitions to be sorted. Stack the biggest one
1156                   and process the smallest one on the next iteration. This
1157                   minimizes the stack height by insuring that any additional
1158                   stack entries must come from the smallest partition which
1159                   (because it is smallest) will have the fewest
1160                   opportunities to generate additional stack entries.
1161                */
1162                if ((part_right - pc_right) > (pc_left - part_left)) {
1163                   /* stack the right partition, process the left */
1164                   partition_stack[next_stack_entry].left = pc_right + 1;
1165                   partition_stack[next_stack_entry].right = part_right;
1166 #ifdef QSORT_ORDER_GUESS
1167                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1168 #endif
1169                   part_right = pc_left - 1;
1170                } else {
1171                   /* stack the left partition, process the right */
1172                   partition_stack[next_stack_entry].left = part_left;
1173                   partition_stack[next_stack_entry].right = pc_left - 1;
1174 #ifdef QSORT_ORDER_GUESS
1175                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1176 #endif
1177                   part_left = pc_right + 1;
1178                }
1179                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1180                ++next_stack_entry;
1181             } else {
1182                /* The elements on the left are the only remaining elements
1183                   that need sorting, arrange for them to be processed as the
1184                   next partition.
1185                */
1186                part_right = pc_left - 1;
1187             }
1188          } else if (pc_right < part_right) {
1189             /* There is only one chunk on the right to be sorted, make it
1190                the new partition and loop back around.
1191             */
1192             part_left = pc_right + 1;
1193          } else {
1194             /* This whole partition wound up in the pivot chunk, so
1195                we need to get a new partition off the stack.
1196             */
1197             if (next_stack_entry == 0) {
1198                /* the stack is empty - we are done */
1199                break;
1200             }
1201             --next_stack_entry;
1202             part_left = partition_stack[next_stack_entry].left;
1203             part_right = partition_stack[next_stack_entry].right;
1204 #ifdef QSORT_ORDER_GUESS
1205             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1206 #endif
1207          }
1208       } else {
1209          /* This partition is too small to fool with qsort complexity, just
1210             do an ordinary insertion sort to minimize overhead.
1211          */
1212          int i;
1213          /* Assume 1st element is in right place already, and start checking
1214             at 2nd element to see where it should be inserted.
1215          */
1216          for (i = part_left + 1; i <= part_right; ++i) {
1217             int j;
1218             /* Scan (backwards - just in case 'i' is already in right place)
1219                through the elements already sorted to see if the ith element
1220                belongs ahead of one of them.
1221             */
1222             for (j = i - 1; j >= part_left; --j) {
1223                if (qsort_cmp(i, j) >= 0) {
1224                   /* i belongs right after j
1225                   */
1226                   break;
1227                }
1228             }
1229             ++j;
1230             if (j != i) {
1231                /* Looks like we really need to move some things
1232                */
1233                int k;
1234                temp = array[i];
1235                for (k = i - 1; k >= j; --k)
1236                   array[k + 1] = array[k];
1237                array[j] = temp;
1238             }
1239          }
1240
1241          /* That partition is now sorted, grab the next one, or get out
1242             of the loop if there aren't any more.
1243          */
1244
1245          if (next_stack_entry == 0) {
1246             /* the stack is empty - we are done */
1247             break;
1248          }
1249          --next_stack_entry;
1250          part_left = partition_stack[next_stack_entry].left;
1251          part_right = partition_stack[next_stack_entry].right;
1252 #ifdef QSORT_ORDER_GUESS
1253          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1254 #endif
1255       }
1256    }
1257
1258    /* Believe it or not, the array is sorted at this point! */
1259 }
1260
1261 /* Stabilize what is, presumably, an otherwise unstable sort method.
1262  * We do that by allocating (or having on hand) an array of pointers
1263  * that is the same size as the original array of elements to be sorted.
1264  * We initialize this parallel array with the addresses of the original
1265  * array elements.  This indirection can make you crazy.
1266  * Some pictures can help.  After initializing, we have
1267  *
1268  *  indir                  list1
1269  * +----+                 +----+
1270  * |    | --------------> |    | ------> first element to be sorted
1271  * +----+                 +----+
1272  * |    | --------------> |    | ------> second element to be sorted
1273  * +----+                 +----+
1274  * |    | --------------> |    | ------> third element to be sorted
1275  * +----+                 +----+
1276  *  ...
1277  * +----+                 +----+
1278  * |    | --------------> |    | ------> n-1st element to be sorted
1279  * +----+                 +----+
1280  * |    | --------------> |    | ------> n-th element to be sorted
1281  * +----+                 +----+
1282  *
1283  * During the sort phase, we leave the elements of list1 where they are,
1284  * and sort the pointers in the indirect array in the same order determined
1285  * by the original comparison routine on the elements pointed to.
1286  * Because we don't move the elements of list1 around through
1287  * this phase, we can break ties on elements that compare equal
1288  * using their address in the list1 array, ensuring stabilty.
1289  * This leaves us with something looking like
1290  *
1291  *  indir                  list1
1292  * +----+                 +----+
1293  * |    | --+       +---> |    | ------> first element to be sorted
1294  * +----+   |       |     +----+
1295  * |    | --|-------|---> |    | ------> second element to be sorted
1296  * +----+   |       |     +----+
1297  * |    | --|-------+ +-> |    | ------> third element to be sorted
1298  * +----+   |         |   +----+
1299  *  ...
1300  * +----+    | |   | |    +----+
1301  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1302  * +----+    |     |      +----+
1303  * |    | ---+     +----> |    | ------> n-th element to be sorted
1304  * +----+                 +----+
1305  *
1306  * where the i-th element of the indirect array points to the element
1307  * that should be i-th in the sorted array.  After the sort phase,
1308  * we have to put the elements of list1 into the places
1309  * dictated by the indirect array.
1310  */
1311
1312
1313 static I32
1314 cmpindir(pTHX_ gptr a, gptr b)
1315 {
1316     gptr * const ap = (gptr *)a;
1317     gptr * const bp = (gptr *)b;
1318     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1319
1320     if (sense)
1321         return sense;
1322     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1323 }
1324
1325 static I32
1326 cmpindir_desc(pTHX_ gptr a, gptr b)
1327 {
1328     gptr * const ap = (gptr *)a;
1329     gptr * const bp = (gptr *)b;
1330     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1331
1332     /* Reverse the default */
1333     if (sense)
1334         return -sense;
1335     /* But don't reverse the stability test.  */
1336     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1337
1338 }
1339
1340 STATIC void
1341 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1342 {
1343
1344     dSORTHINTS;
1345
1346     if (SORTHINTS & HINT_SORT_STABLE) {
1347          register gptr **pp, *q;
1348          register size_t n, j, i;
1349          gptr *small[SMALLSORT], **indir, tmp;
1350          SVCOMPARE_t savecmp;
1351          if (nmemb <= 1) return;     /* sorted trivially */
1352
1353          /* Small arrays can use the stack, big ones must be allocated */
1354          if (nmemb <= SMALLSORT) indir = small;
1355          else { Newx(indir, nmemb, gptr *); }
1356
1357          /* Copy pointers to original array elements into indirect array */
1358          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1359
1360          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1361          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1362
1363          /* sort, with indirection */
1364          if (flags)
1365             qsortsvu((gptr *)indir, nmemb, cmpindir_desc);
1366         else
1367             qsortsvu((gptr *)indir, nmemb, cmpindir);
1368
1369          pp = indir;
1370          q = list1;
1371          for (n = nmemb; n--; ) {
1372               /* Assert A: all elements of q with index > n are already
1373                * in place.  This is vacuosly true at the start, and we
1374                * put element n where it belongs below (if it wasn't
1375                * already where it belonged). Assert B: we only move
1376                * elements that aren't where they belong,
1377                * so, by A, we never tamper with elements above n.
1378                */
1379               j = pp[n] - q;            /* This sets j so that q[j] is
1380                                          * at pp[n].  *pp[j] belongs in
1381                                          * q[j], by construction.
1382                                          */
1383               if (n != j) {             /* all's well if n == j */
1384                    tmp = q[j];          /* save what's in q[j] */
1385                    do {
1386                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1387                         i = pp[j] - q;  /* the index in q of the element
1388                                          * just moved */
1389                         pp[j] = q + j;  /* this is ok now */
1390                    } while ((j = i) != n);
1391                    /* There are only finitely many (nmemb) addresses
1392                     * in the pp array.
1393                     * So we must eventually revisit an index we saw before.
1394                     * Suppose the first revisited index is k != n.
1395                     * An index is visited because something else belongs there.
1396                     * If we visit k twice, then two different elements must
1397                     * belong in the same place, which cannot be.
1398                     * So j must get back to n, the loop terminates,
1399                     * and we put the saved element where it belongs.
1400                     */
1401                    q[n] = tmp;          /* put what belongs into
1402                                          * the n-th element */
1403               }
1404          }
1405
1406         /* free iff allocated */
1407          if (indir != small) { Safefree(indir); }
1408          /* restore prevailing comparison routine */
1409          PL_sort_RealCmp = savecmp;
1410     } else if (flags) {
1411          const SVCOMPARE_t savecmp = PL_sort_RealCmp;   /* Save current comparison routine, if any */
1412          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1413          cmp = cmp_desc;
1414          qsortsvu(list1, nmemb, cmp);
1415          /* restore prevailing comparison routine */
1416          PL_sort_RealCmp = savecmp;
1417     } else {
1418          qsortsvu(list1, nmemb, cmp);
1419     }
1420 }
1421
1422 /*
1423 =head1 Array Manipulation Functions
1424
1425 =for apidoc sortsv
1426
1427 Sort an array. Here is an example:
1428
1429     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1430
1431 See lib/sort.pm for details about controlling the sorting algorithm.
1432
1433 =cut
1434 */
1435
1436 void
1437 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1438 {
1439     dSORTHINTS;
1440     const I32 hints = SORTHINTS;
1441     if (hints & HINT_SORT_QUICKSORT) {
1442         S_qsortsv(aTHX_ array, nmemb, cmp, 0);
1443     }
1444     else {
1445         /* The default as of 5.8.0 is mergesort */
1446         S_mergesortsv(aTHX_ array, nmemb, cmp, 0);
1447     }
1448 }
1449
1450
1451 static void
1452 S_sortsv_desc(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1453 {
1454     dSORTHINTS;
1455     const I32 hints = SORTHINTS;
1456     if (hints & HINT_SORT_QUICKSORT) {
1457         S_qsortsv(aTHX_ array, nmemb, cmp, 1);
1458     }
1459     else {
1460         /* The default as of 5.8.0 is mergesort */
1461         S_mergesortsv(aTHX_ array, nmemb, cmp, 1);
1462     }
1463
1464 }
1465
1466 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1467 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1468 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1469
1470 PP(pp_sort)
1471 {
1472     dSP; dMARK; dORIGMARK;
1473     register SV **p1 = ORIGMARK+1, **p2;
1474     register I32 max, i;
1475     AV* av = NULL;
1476     HV *stash;
1477     GV *gv;
1478     CV *cv = NULL;
1479     I32 gimme = GIMME;
1480     OP* const nextop = PL_op->op_next;
1481     I32 overloading = 0;
1482     bool hasargs = FALSE;
1483     I32 is_xsub = 0;
1484     I32 sorting_av = 0;
1485     const U8 priv = PL_op->op_private;
1486     const U8 flags = PL_op->op_flags;
1487     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1488       = Perl_sortsv;
1489     I32 all_SIVs = 1;
1490
1491     if (gimme != G_ARRAY) {
1492         SP = MARK;
1493         EXTEND(SP,1);
1494         RETPUSHUNDEF;
1495     }
1496
1497     ENTER;
1498     SAVEVPTR(PL_sortcop);
1499     if (flags & OPf_STACKED) {
1500         if (flags & OPf_SPECIAL) {
1501             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1502             kid = kUNOP->op_first;                      /* pass rv2gv */
1503             kid = kUNOP->op_first;                      /* pass leave */
1504             PL_sortcop = kid->op_next;
1505             stash = CopSTASH(PL_curcop);
1506         }
1507         else {
1508             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1509             if (cv && SvPOK(cv)) {
1510                 const char * const proto = SvPV_nolen_const((SV*)cv);
1511                 if (proto && strEQ(proto, "$$")) {
1512                     hasargs = TRUE;
1513                 }
1514             }
1515             if (!(cv && CvROOT(cv))) {
1516                 if (cv && CvISXSUB(cv)) {
1517                     is_xsub = 1;
1518                 }
1519                 else if (gv) {
1520                     SV *tmpstr = sv_newmortal();
1521                     gv_efullname3(tmpstr, gv, NULL);
1522                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1523                         (void*)tmpstr);
1524                 }
1525                 else {
1526                     DIE(aTHX_ "Undefined subroutine in sort");
1527                 }
1528             }
1529
1530             if (is_xsub)
1531                 PL_sortcop = (OP*)cv;
1532             else
1533                 PL_sortcop = CvSTART(cv);
1534         }
1535     }
1536     else {
1537         PL_sortcop = NULL;
1538         stash = CopSTASH(PL_curcop);
1539     }
1540
1541     /* optimiser converts "@a = sort @a" to "sort \@a";
1542      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1543      * result back to @a at the end of this function */
1544     if (priv & OPpSORT_INPLACE) {
1545         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1546         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1547         av = (AV*)(*SP);
1548         max = AvFILL(av) + 1;
1549         if (SvMAGICAL(av)) {
1550             MEXTEND(SP, max);
1551             p2 = SP;
1552             for (i=0; i < max; i++) {
1553                 SV **svp = av_fetch(av, i, FALSE);
1554                 *SP++ = (svp) ? *svp : NULL;
1555             }
1556         }
1557         else {
1558             if (SvREADONLY(av))
1559                 Perl_croak(aTHX_ PL_no_modify);
1560             else
1561                 SvREADONLY_on(av);
1562             p1 = p2 = AvARRAY(av);
1563             sorting_av = 1;
1564         }
1565     }
1566     else {
1567         p2 = MARK+1;
1568         max = SP - MARK;
1569    }
1570
1571     if (priv & OPpSORT_DESCEND) {
1572         sortsvp = S_sortsv_desc;
1573     }
1574
1575     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1576      * any nulls; also stringify or converting to integer or number as
1577      * required any args */
1578     for (i=max; i > 0 ; i--) {
1579         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1580             SvTEMP_off(*p1);
1581             if (!PL_sortcop) {
1582                 if (priv & OPpSORT_NUMERIC) {
1583                     if (priv & OPpSORT_INTEGER) {
1584                         if (!SvIOK(*p1)) {
1585                             if (SvAMAGIC(*p1))
1586                                 overloading = 1;
1587                             else
1588                                 (void)sv_2iv(*p1);
1589                         }
1590                     }
1591                     else {
1592                         if (!SvNSIOK(*p1)) {
1593                             if (SvAMAGIC(*p1))
1594                                 overloading = 1;
1595                             else
1596                                 (void)sv_2nv(*p1);
1597                         }
1598                         if (all_SIVs && !SvSIOK(*p1))
1599                             all_SIVs = 0;
1600                     }
1601                 }
1602                 else {
1603                     if (!SvPOK(*p1)) {
1604                         if (SvAMAGIC(*p1))
1605                             overloading = 1;
1606                         else
1607                             (void)sv_2pv_flags(*p1, 0,
1608                                                SV_GMAGIC|SV_CONST_RETURN);
1609                     }
1610                 }
1611             }
1612             p1++;
1613         }
1614         else
1615             max--;
1616     }
1617     if (sorting_av)
1618         AvFILLp(av) = max-1;
1619
1620     if (max > 1) {
1621         SV **start;
1622         if (PL_sortcop) {
1623             PERL_CONTEXT *cx;
1624             SV** newsp;
1625             const bool oldcatch = CATCH_GET;
1626
1627             SAVETMPS;
1628             SAVEOP();
1629
1630             CATCH_SET(TRUE);
1631             PUSHSTACKi(PERLSI_SORT);
1632             if (!hasargs && !is_xsub) {
1633                 SAVESPTR(PL_firstgv);
1634                 SAVESPTR(PL_secondgv);
1635                 SAVESPTR(PL_sortstash);
1636                 PL_firstgv = gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV);
1637                 PL_secondgv = gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV);
1638                 PL_sortstash = stash;
1639 #ifdef USE_5005THREADS
1640                 sv_lock((SV *)PL_firstgv);
1641                 sv_lock((SV *)PL_secondgv);
1642 #endif
1643                 SAVESPTR(GvSV(PL_firstgv));
1644                 SAVESPTR(GvSV(PL_secondgv));
1645             }
1646
1647             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1648             if (!(flags & OPf_SPECIAL)) {
1649                 cx->cx_type = CXt_SUB;
1650                 cx->blk_gimme = G_SCALAR;
1651                 PUSHSUB(cx);
1652                 if (!is_xsub) {
1653                     AV* const padlist = CvPADLIST(cv);
1654
1655                     if (++CvDEPTH(cv) >= 2) {
1656                         PERL_STACK_OVERFLOW_CHECK();
1657                         pad_push(padlist, CvDEPTH(cv), 1);
1658                     }
1659                     SAVECOMPPAD();
1660                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1661
1662                     if (hasargs) {
1663                         /* This is mostly copied from pp_entersub */
1664                         AV *av = (AV*)PAD_SVl(0);
1665
1666 #ifndef USE_5005THREADS
1667                         cx->blk_sub.savearray = GvAV(PL_defgv);
1668                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1669 #endif /* USE_5005THREADS */
1670                         CX_CURPAD_SAVE(cx->blk_sub);
1671                         cx->blk_sub.argarray = av;
1672                     }
1673
1674                 }
1675             }
1676             cx->cx_type |= CXp_MULTICALL;
1677
1678             start = p1 - max;
1679             sortsvp(aTHX_ start, max,
1680                     is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv);
1681
1682             if (!(flags & OPf_SPECIAL)) {
1683                 LEAVESUB(cv);
1684                 if (!is_xsub)
1685                     CvDEPTH(cv)--;
1686             }
1687             POPBLOCK(cx,PL_curpm);
1688             PL_stack_sp = newsp;
1689             POPSTACK;
1690             CATCH_SET(oldcatch);
1691         }
1692         else {
1693             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1694             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1695             sortsvp(aTHX_ start, max,
1696                     (priv & OPpSORT_NUMERIC)
1697                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1698                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1699                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1700                         : ( IN_LOCALE_RUNTIME
1701                             ? ( overloading
1702                                 ? (SVCOMPARE_t)S_amagic_cmp_locale
1703                                 : (SVCOMPARE_t)sv_cmp_locale_static)
1704                             : ( overloading ? (SVCOMPARE_t)S_amagic_cmp : (SVCOMPARE_t)sv_cmp_static)));
1705         }
1706         if (priv & OPpSORT_REVERSE) {
1707             SV **q = start+max-1;
1708             while (start < q) {
1709                 SV * const tmp = *start;
1710                 *start++ = *q;
1711                 *q-- = tmp;
1712             }
1713         }
1714     }
1715     if (sorting_av)
1716         SvREADONLY_off(av);
1717     else if (av && !sorting_av) {
1718         /* simulate pp_aassign of tied AV */
1719         SV** const base = ORIGMARK+1;
1720         for (i=0; i < max; i++) {
1721             base[i] = newSVsv(base[i]);
1722         }
1723         av_clear(av);
1724         av_extend(av, max);
1725         for (i=0; i < max; i++) {
1726             SV * const sv = base[i];
1727             SV ** const didstore = av_store(av, i, sv);
1728             if (SvSMAGICAL(sv))
1729                 mg_set(sv);
1730             if (!didstore)
1731                 sv_2mortal(sv);
1732         }
1733     }
1734     LEAVE;
1735     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1736     return nextop;
1737 }
1738
1739 static I32
1740 S_sortcv(pTHX_ SV *a, SV *b)
1741 {
1742     const I32 oldsaveix = PL_savestack_ix;
1743     const I32 oldscopeix = PL_scopestack_ix;
1744     I32 result;
1745     GvSV(PL_firstgv) = a;
1746     GvSV(PL_secondgv) = b;
1747     PL_stack_sp = PL_stack_base;
1748     PL_op = PL_sortcop;
1749     CALLRUNOPS(aTHX);
1750     if (PL_stack_sp != PL_stack_base + 1)
1751         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1752     if (!SvNIOKp(*PL_stack_sp))
1753         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1754     result = SvIV(*PL_stack_sp);
1755     while (PL_scopestack_ix > oldscopeix) {
1756         LEAVE;
1757     }
1758     leave_scope(oldsaveix);
1759     return result;
1760 }
1761
1762 static I32
1763 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1764 {
1765     const I32 oldsaveix = PL_savestack_ix;
1766     const I32 oldscopeix = PL_scopestack_ix;
1767     I32 result;
1768 #ifdef USE_5005THREADS
1769     AV * const av = (AV*)PAD_SVl(0);
1770 #else
1771     AV * const av = GvAV(PL_defgv);
1772 #endif
1773
1774     if (AvMAX(av) < 1) {
1775         SV** ary = AvALLOC(av);
1776         if (AvARRAY(av) != ary) {
1777             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1778             SvPV_set(av, (char*)ary);
1779         }
1780         if (AvMAX(av) < 1) {
1781             AvMAX(av) = 1;
1782             Renew(ary,2,SV*);
1783             SvPV_set(av, (char*)ary);
1784         }
1785     }
1786     AvFILLp(av) = 1;
1787
1788     AvARRAY(av)[0] = a;
1789     AvARRAY(av)[1] = b;
1790     PL_stack_sp = PL_stack_base;
1791     PL_op = PL_sortcop;
1792     CALLRUNOPS(aTHX);
1793     if (PL_stack_sp != PL_stack_base + 1)
1794         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1795     if (!SvNIOKp(*PL_stack_sp))
1796         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1797     result = SvIV(*PL_stack_sp);
1798     while (PL_scopestack_ix > oldscopeix) {
1799         LEAVE;
1800     }
1801     leave_scope(oldsaveix);
1802     return result;
1803 }
1804
1805 static I32
1806 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1807 {
1808     dSP;
1809     const I32 oldsaveix = PL_savestack_ix;
1810     const I32 oldscopeix = PL_scopestack_ix;
1811     CV * const cv=(CV*)PL_sortcop;
1812     I32 result;
1813
1814     SP = PL_stack_base;
1815     PUSHMARK(SP);
1816     EXTEND(SP, 2);
1817     *++SP = a;
1818     *++SP = b;
1819     PUTBACK;
1820     (void)(*CvXSUB(cv))(aTHX_ cv);
1821     if (PL_stack_sp != PL_stack_base + 1)
1822         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1823     if (!SvNIOKp(*PL_stack_sp))
1824         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1825     result = SvIV(*PL_stack_sp);
1826     while (PL_scopestack_ix > oldscopeix) {
1827         LEAVE;
1828     }
1829     leave_scope(oldsaveix);
1830     return result;
1831 }
1832
1833
1834 static I32
1835 S_sv_ncmp(pTHX_ SV *a, SV *b)
1836 {
1837     const NV nv1 = SvNSIV(a);
1838     const NV nv2 = SvNSIV(b);
1839     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1840 }
1841
1842 static I32
1843 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1844 {
1845     const IV iv1 = SvIV(a);
1846     const IV iv2 = SvIV(b);
1847     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1848 }
1849
1850 #define tryCALL_AMAGICbin(left,right,meth) \
1851     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1852         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1853         : NULL;
1854
1855 #define SORT_NORMAL_RETURN_VALUE(val)  (((val) > 0) ? 1 : ((val) ? -1 : 0))
1856
1857 static I32
1858 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1859 {
1860     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1861     if (tmpsv) {
1862         if (SvIOK(tmpsv)) {
1863             const I32 i = SvIVX(tmpsv);
1864             return SORT_NORMAL_RETURN_VALUE(i);
1865         }
1866         else {
1867             const NV d = SvNV(tmpsv);
1868             return SORT_NORMAL_RETURN_VALUE(d);
1869         }
1870      }
1871      return S_sv_ncmp(aTHX_ a, b);
1872 }
1873
1874 static I32
1875 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1876 {
1877     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1878     if (tmpsv) {
1879         if (SvIOK(tmpsv)) {
1880             const I32 i = SvIVX(tmpsv);
1881             return SORT_NORMAL_RETURN_VALUE(i);
1882         }
1883         else {
1884             const NV d = SvNV(tmpsv);
1885             return SORT_NORMAL_RETURN_VALUE(d);
1886         }
1887     }
1888     return S_sv_i_ncmp(aTHX_ a, b);
1889 }
1890
1891 static I32
1892 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1893 {
1894     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1895     if (tmpsv) {
1896         if (SvIOK(tmpsv)) {
1897             const I32 i = SvIVX(tmpsv);
1898             return SORT_NORMAL_RETURN_VALUE(i);
1899         }
1900         else {
1901             const NV d = SvNV(tmpsv);
1902             return SORT_NORMAL_RETURN_VALUE(d);
1903         }
1904     }
1905     return sv_cmp(str1, str2);
1906 }
1907
1908 static I32
1909 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1910 {
1911     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1912     if (tmpsv) {
1913         if (SvIOK(tmpsv)) {
1914             const I32 i = SvIVX(tmpsv);
1915             return SORT_NORMAL_RETURN_VALUE(i);
1916         }
1917         else {
1918             const NV d = SvNV(tmpsv);
1919             return SORT_NORMAL_RETURN_VALUE(d);
1920         }
1921     }
1922     return sv_cmp_locale(str1, str2);
1923 }
1924
1925 /*
1926  * Local variables:
1927  * c-indentation-style: bsd
1928  * c-basic-offset: 4
1929  * indent-tabs-mode: t
1930  * End:
1931  *
1932  * ex: set ts=8 sts=4 sw=4 noet:
1933  */