pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #ifndef SMALLSORT
  40 #define SMALLSORT (200)
  41 #endif
  42
  43 /* Flags for qsortsv and mergesortsv */
  44 #define SORTf_DESC   1
  45 #define SORTf_STABLE 2
  46 #define SORTf_QSORT  4
  47
  48 /*
  49  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  50  *
  51  * The original code was written in conjunction with BSD Computer Software
  52  * Research Group at University of California, Berkeley.
  53  *
  54  * See also: "Optimistic Merge Sort" (SODA '92)
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxilliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     register gptr *b, *p, *q, *t, *p2;
 187     register gptr *last, *r;
 188     IV runs = 0;
 189
 190     b = list1;
 191     last = PINDEX(b, nmemb);
 192     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 193     for (p2 = list2; b < last; ) {
 194         /* We just started, or just reversed sense.
 195         ** Set t at end of pairs with the prevailing sense.
 196         */
 197         for (p = b+2, t = p; ++p < last; t = ++p) {
 198             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 199         }
 200         q = b;
 201         /* Having laid out the playing field, look for long runs */
 202         do {
 203             p = r = b + (2 * PTHRESH);
 204             if (r >= t) p = r = t;      /* too short to care about */
 205             else {
 206                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 207                        ((p -= 2) > q));
 208                 if (p <= q) {
 209                     /* b through r is a (long) run.
 210                     ** Extend it as far as possible.
 211                     */
 212                     p = q = r;
 213                     while (((p += 2) < t) &&
 214                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 215                     r = p = q + 2;      /* no simple pairs, no after-run */
 216                 }
 217             }
 218             if (q > b) {                /* run of greater than 2 at b */
 219                 gptr *savep = p;
 220
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense)
 229                     while (b < --p) {
 230                         const gptr c = *b;
 231                         *b++ = *p;
 232                         *p = c;
 233                     }
 234                 p = savep;
 235             }
 236             while (q < p) {             /* simple pairs */
 237                 p2 = NEXT(p2) = p2 + 2; ++runs;
 238                 if (sense) {
 239                     const gptr c = *q++;
 240                     *(q-1) = *q;
 241                     *q++ = c;
 242                 } else q += 2;
 243             }
 244             if (((b = p) == t) && ((t+1) == last)) {
 245                 NEXT(p2) = p2 + 1; ++runs;
 246                 b++;
 247             }
 248             q = r;
 249         } while (b < t);
 250         sense = !sense;
 251     }
 252     return runs;
 253 }
 254
 255
 256 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 257  * qsort on many platforms, but slower than qsort, conspicuously so,
 258  * on others.  The most likely explanation was platform-specific
 259  * differences in cache sizes and relative speeds.
 260  *
 261  * The quicksort divide-and-conquer algorithm guarantees that, as the
 262  * problem is subdivided into smaller and smaller parts, the parts
 263  * fit into smaller (and faster) caches.  So it doesn't matter how
 264  * many levels of cache exist, quicksort will "find" them, and,
 265  * as long as smaller is faster, take advantage of them.
 266  *
 267  * By contrast, consider how the original mergesort algorithm worked.
 268  * Suppose we have five runs (each typically of length 2 after dynprep).
 269  *
 270  * pass               base                        aux
 271  *  0              1 2 3 4 5
 272  *  1                                           12 34 5
 273  *  2                1234 5
 274  *  3                                            12345
 275  *  4                 12345
 276  *
 277  * Adjacent pairs are merged in "grand sweeps" through the input.
 278  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 279  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 280  * The only cache that matters is one large enough to hold *all* the input.
 281  * On some platforms, this may be many times slower than smaller caches.
 282  *
 283  * The following pseudo-code uses the same basic merge algorithm,
 284  * but in a divide-and-conquer way.
 285  *
 286  * # merge $runs runs at offset $offset of list $list1 into $list2.
 287  * # all unmerged runs ($runs == 1) originate in list $base.
 288  * sub mgsort2 {
 289  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 290  *
 291  *     if ($runs == 1) {
 292  *         if ($list1 is $base) copy run to $list2
 293  *         return offset of end of list (or copy)
 294  *     } else {
 295  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 296  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 297  *         merge the adjacent runs at $offset of $list1 into $list2
 298  *         return the offset of the end of the merged runs
 299  *     }
 300  * }
 301  * mgsort2(0, $runs, $base, $aux, $base);
 302  *
 303  * For our 5 runs, the tree of calls looks like
 304  *
 305  *           5
 306  *      3        2
 307  *   2     1   1   1
 308  * 1   1
 309  *
 310  * 1   2   3   4   5
 311  *
 312  * and the corresponding activity looks like
 313  *
 314  * copy runs 1 and 2 from base to aux
 315  * merge runs 1 and 2 from aux to base
 316  * (run 3 is where it belongs, no copy needed)
 317  * merge runs 12 and 3 from base to aux
 318  * (runs 4 and 5 are where they belong, no copy needed)
 319  * merge runs 4 and 5 from base to aux
 320  * merge runs 123 and 45 from aux to base
 321  *
 322  * Note that we merge runs 1 and 2 immediately after copying them,
 323  * while they are still likely to be in fast cache.  Similarly,
 324  * run 3 is merged with run 12 while it still may be lingering in cache.
 325  * This implementation should therefore enjoy much of the cache-friendly
 326  * behavior that quicksort does.  In addition, it does less copying
 327  * than the original mergesort implementation (only runs 1 and 2 are copied)
 328  * and the "balancing" of merges is better (merged runs comprise more nearly
 329  * equal numbers of original runs).
 330  *
 331  * The actual cache-friendly implementation will use a pseudo-stack
 332  * to avoid recursion, and will unroll processing of runs of length 2,
 333  * but it is otherwise similar to the recursive implementation.
 334  */
 335
 336 typedef struct {
 337     IV  offset;         /* offset of 1st of 2 runs at this level */
 338     IV  runs;           /* how many runs must be combined into 1 */
 339 } off_runs;             /* pseudo-stack element */
 340
 341
 342 static I32
 343 cmp_desc(pTHX_ gptr a, gptr b)
 344 {
 345     dVAR;
 346     return -PL_sort_RealCmp(aTHX_ a, b);
 347 }
 348
 349 STATIC void
 350 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 351 {
 352     dVAR;
 353     IV i, run, offset;
 354     I32 sense, level;
 355     register gptr *f1, *f2, *t, *b, *p;
 356     int iwhich;
 357     gptr *aux;
 358     gptr *p1;
 359     gptr small[SMALLSORT];
 360     gptr *which[3];
 361     off_runs stack[60], *stackp;
 362     SVCOMPARE_t savecmp = NULL;
 363
 364     if (nmemb <= 1) return;                     /* sorted trivially */
 365
 366     if ((flags & SORTf_DESC) != 0) {
 367         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 368         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 369         cmp = cmp_desc;
 370     }
 371
 372     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 373     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 374     level = 0;
 375     stackp = stack;
 376     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 377     stackp->offset = offset = 0;
 378     which[0] = which[2] = base;
 379     which[1] = aux;
 380     for (;;) {
 381         /* On levels where both runs have be constructed (stackp->runs == 0),
 382          * merge them, and note the offset of their end, in case the offset
 383          * is needed at the next level up.  Hop up a level, and,
 384          * as long as stackp->runs is 0, keep merging.
 385          */
 386         IV runs = stackp->runs;
 387         if (runs == 0) {
 388             gptr *list1, *list2;
 389             iwhich = level & 1;
 390             list1 = which[iwhich];              /* area where runs are now */
 391             list2 = which[++iwhich];            /* area for merged runs */
 392             do {
 393                 register gptr *l1, *l2, *tp2;
 394                 offset = stackp->offset;
 395                 f1 = p1 = list1 + offset;               /* start of first run */
 396                 p = tp2 = list2 + offset;       /* where merged run will go */
 397                 t = NEXT(p);                    /* where first run ends */
 398                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 399                 t = NEXT(t);                    /* where second runs ends */
 400                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 401                 offset = PNELEM(list2, t);
 402                 while (f1 < l1 && f2 < l2) {
 403                     /* If head 1 is larger than head 2, find ALL the elements
 404                     ** in list 2 strictly less than head1, write them all,
 405                     ** then head 1.  Then compare the new heads, and repeat,
 406                     ** until one or both lists are exhausted.
 407                     **
 408                     ** In all comparisons (after establishing
 409                     ** which head to merge) the item to merge
 410                     ** (at pointer q) is the first operand of
 411                     ** the comparison.  When we want to know
 412                     ** if "q is strictly less than the other",
 413                     ** we can't just do
 414                     **    cmp(q, other) < 0
 415                     ** because stability demands that we treat equality
 416                     ** as high when q comes from l2, and as low when
 417                     ** q was from l1.  So we ask the question by doing
 418                     **    cmp(q, other) <= sense
 419                     ** and make sense == 0 when equality should look low,
 420                     ** and -1 when equality should look high.
 421                     */
 422
 423                     register gptr *q;
 424                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 425                         q = f2; b = f1; t = l1;
 426                         sense = -1;
 427                     } else {
 428                         q = f1; b = f2; t = l2;
 429                         sense = 0;
 430                     }
 431
 432
 433                     /* ramp up
 434                     **
 435                     ** Leave t at something strictly
 436                     ** greater than q (or at the end of the list),
 437                     ** and b at something strictly less than q.
 438                     */
 439                     for (i = 1, run = 0 ;;) {
 440                         if ((p = PINDEX(b, i)) >= t) {
 441                             /* off the end */
 442                             if (((p = PINDEX(t, -1)) > b) &&
 443                                 (cmp(aTHX_ *q, *p) <= sense))
 444                                  t = p;
 445                             else b = p;
 446                             break;
 447                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 448                             t = p;
 449                             break;
 450                         } else b = p;
 451                         if (++run >= RTHRESH) i += i;
 452                     }
 453
 454
 455                     /* q is known to follow b and must be inserted before t.
 456                     ** Increment b, so the range of possibilities is [b,t).
 457                     ** Round binary split down, to favor early appearance.
 458                     ** Adjust b and t until q belongs just before t.
 459                     */
 460
 461                     b++;
 462                     while (b < t) {
 463                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 464                         if (cmp(aTHX_ *q, *p) <= sense) {
 465                             t = p;
 466                         } else b = p + 1;
 467                     }
 468
 469
 470                     /* Copy all the strictly low elements */
 471
 472                     if (q == f1) {
 473                         FROMTOUPTO(f2, tp2, t);
 474                         *tp2++ = *f1++;
 475                     } else {
 476                         FROMTOUPTO(f1, tp2, t);
 477                         *tp2++ = *f2++;
 478                     }
 479                 }
 480
 481
 482                 /* Run out remaining list */
 483                 if (f1 == l1) {
 484                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 485                 } else              FROMTOUPTO(f1, tp2, l1);
 486                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 487
 488                 if (--level == 0) goto done;
 489                 --stackp;
 490                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 491             } while ((runs = stackp->runs) == 0);
 492         }
 493
 494
 495         stackp->runs = 0;               /* current run will finish level */
 496         /* While there are more than 2 runs remaining,
 497          * turn them into exactly 2 runs (at the "other" level),
 498          * each made up of approximately half the runs.
 499          * Stack the second half for later processing,
 500          * and set about producing the first half now.
 501          */
 502         while (runs > 2) {
 503             ++level;
 504             ++stackp;
 505             stackp->offset = offset;
 506             runs -= stackp->runs = runs / 2;
 507         }
 508         /* We must construct a single run from 1 or 2 runs.
 509          * All the original runs are in which[0] == base.
 510          * The run we construct must end up in which[level&1].
 511          */
 512         iwhich = level & 1;
 513         if (runs == 1) {
 514             /* Constructing a single run from a single run.
 515              * If it's where it belongs already, there's nothing to do.
 516              * Otherwise, copy it to where it belongs.
 517              * A run of 1 is either a singleton at level 0,
 518              * or the second half of a split 3.  In neither event
 519              * is it necessary to set offset.  It will be set by the merge
 520              * that immediately follows.
 521              */
 522             if (iwhich) {       /* Belongs in aux, currently in base */
 523                 f1 = b = PINDEX(base, offset);  /* where list starts */
 524                 f2 = PINDEX(aux, offset);       /* where list goes */
 525                 t = NEXT(f2);                   /* where list will end */
 526                 offset = PNELEM(aux, t);        /* offset thereof */
 527                 t = PINDEX(base, offset);       /* where it currently ends */
 528                 FROMTOUPTO(f1, f2, t);          /* copy */
 529                 NEXT(b) = t;                    /* set up parallel pointer */
 530             } else if (level == 0) goto done;   /* single run at level 0 */
 531         } else {
 532             /* Constructing a single run from two runs.
 533              * The merge code at the top will do that.
 534              * We need only make sure the two runs are in the "other" array,
 535              * so they'll end up in the correct array after the merge.
 536              */
 537             ++level;
 538             ++stackp;
 539             stackp->offset = offset;
 540             stackp->runs = 0;   /* take care of both runs, trigger merge */
 541             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 542                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 543                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 544                 t = NEXT(f2);                   /* where first run will end */
 545                 offset = PNELEM(aux, t);        /* offset thereof */
 546                 p = PINDEX(base, offset);       /* end of first run */
 547                 t = NEXT(t);                    /* where second run will end */
 548                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 549                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 550                 NEXT(b) = p;                    /* paralled pointer for 1st */
 551                 NEXT(p) = t;                    /* ... and for second */
 552             }
 553         }
 554     }
 555 done:
 556     if (aux != small) Safefree(aux);    /* free iff allocated */
 557     if (flags) {
 558          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 559     }
 560     return;
 561 }
 562
 563 /*
 564  * The quicksort implementation was derived from source code contributed
 565  * by Tom Horsley.
 566  *
 567  * NOTE: this code was derived from Tom Horsley's qsort replacement
 568  * and should not be confused with the original code.
 569  */
 570
 571 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 572
 573    Permission granted to distribute under the same terms as perl which are
 574    (briefly):
 575
 576     This program is free software; you can redistribute it and/or modify
 577     it under the terms of either:
 578
 579         a) the GNU General Public License as published by the Free
 580         Software Foundation; either version 1, or (at your option) any
 581         later version, or
 582
 583         b) the "Artistic License" which comes with this Kit.
 584
 585    Details on the perl license can be found in the perl source code which
 586    may be located via the www.perl.com web page.
 587
 588    This is the most wonderfulest possible qsort I can come up with (and
 589    still be mostly portable) My (limited) tests indicate it consistently
 590    does about 20% fewer calls to compare than does the qsort in the Visual
 591    C++ library, other vendors may vary.
 592
 593    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 594    others I invented myself (or more likely re-invented since they seemed
 595    pretty obvious once I watched the algorithm operate for a while).
 596
 597    Most of this code was written while watching the Marlins sweep the Giants
 598    in the 1997 National League Playoffs - no Braves fans allowed to use this
 599    code (just kidding :-).
 600
 601    I realize that if I wanted to be true to the perl tradition, the only
 602    comment in this file would be something like:
 603
 604    ...they shuffled back towards the rear of the line. 'No, not at the
 605    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 606
 607    However, I really needed to violate that tradition just so I could keep
 608    track of what happens myself, not to mention some poor fool trying to
 609    understand this years from now :-).
 610 */
 611
 612 /* ********************************************************** Configuration */
 613
 614 #ifndef QSORT_ORDER_GUESS
 615 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 616 #endif
 617
 618 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 619    future processing - a good max upper bound is log base 2 of memory size
 620    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 621    safely be smaller than that since the program is taking up some space and
 622    most operating systems only let you grab some subset of contiguous
 623    memory (not to mention that you are normally sorting data larger than
 624    1 byte element size :-).
 625 */
 626 #ifndef QSORT_MAX_STACK
 627 #define QSORT_MAX_STACK 32
 628 #endif
 629
 630 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 631    Anything bigger and we use qsort. If you make this too small, the qsort
 632    will probably break (or become less efficient), because it doesn't expect
 633    the middle element of a partition to be the same as the right or left -
 634    you have been warned).
 635 */
 636 #ifndef QSORT_BREAK_EVEN
 637 #define QSORT_BREAK_EVEN 6
 638 #endif
 639
 640 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 641    to go quadratic on.  We innoculate larger partitions against
 642    quadratic behavior by shuffling them before sorting.  This is not
 643    an absolute guarantee of non-quadratic behavior, but it would take
 644    staggeringly bad luck to pick extreme elements as the pivot
 645    from randomized data.
 646 */
 647 #ifndef QSORT_PLAY_SAFE
 648 #define QSORT_PLAY_SAFE 255
 649 #endif
 650
 651 /* ************************************************************* Data Types */
 652
 653 /* hold left and right index values of a partition waiting to be sorted (the
 654    partition includes both left and right - right is NOT one past the end or
 655    anything like that).
 656 */
 657 struct partition_stack_entry {
 658    int left;
 659    int right;
 660 #ifdef QSORT_ORDER_GUESS
 661    int qsort_break_even;
 662 #endif
 663 };
 664
 665 /* ******************************************************* Shorthand Macros */
 666
 667 /* Note that these macros will be used from inside the qsort function where
 668    we happen to know that the variable 'elt_size' contains the size of an
 669    array element and the variable 'temp' points to enough space to hold a
 670    temp element and the variable 'array' points to the array being sorted
 671    and 'compare' is the pointer to the compare routine.
 672
 673    Also note that there are very many highly architecture specific ways
 674    these might be sped up, but this is simply the most generally portable
 675    code I could think of.
 676 */
 677
 678 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 679 */
 680 #define qsort_cmp(elt1, elt2) \
 681    ((*compare)(aTHX_ array[elt1], array[elt2]))
 682
 683 #ifdef QSORT_ORDER_GUESS
 684 #define QSORT_NOTICE_SWAP swapped++;
 685 #else
 686 #define QSORT_NOTICE_SWAP
 687 #endif
 688
 689 /* swaps contents of array elements elt1, elt2.
 690 */
 691 #define qsort_swap(elt1, elt2) \
 692    STMT_START { \
 693       QSORT_NOTICE_SWAP \
 694       temp = array[elt1]; \
 695       array[elt1] = array[elt2]; \
 696       array[elt2] = temp; \
 697    } STMT_END
 698
 699 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 700    elt3 and elt3 gets elt1.
 701 */
 702 #define qsort_rotate(elt1, elt2, elt3) \
 703    STMT_START { \
 704       QSORT_NOTICE_SWAP \
 705       temp = array[elt1]; \
 706       array[elt1] = array[elt2]; \
 707       array[elt2] = array[elt3]; \
 708       array[elt3] = temp; \
 709    } STMT_END
 710
 711 /* ************************************************************ Debug stuff */
 712
 713 #ifdef QSORT_DEBUG
 714
 715 static void
 716 break_here()
 717 {
 718    return; /* good place to set a breakpoint */
 719 }
 720
 721 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 722
 723 static void
 724 doqsort_all_asserts(
 725    void * array,
 726    size_t num_elts,
 727    size_t elt_size,
 728    int (*compare)(const void * elt1, const void * elt2),
 729    int pc_left, int pc_right, int u_left, int u_right)
 730 {
 731    int i;
 732
 733    qsort_assert(pc_left <= pc_right);
 734    qsort_assert(u_right < pc_left);
 735    qsort_assert(pc_right < u_left);
 736    for (i = u_right + 1; i < pc_left; ++i) {
 737       qsort_assert(qsort_cmp(i, pc_left) < 0);
 738    }
 739    for (i = pc_left; i < pc_right; ++i) {
 740       qsort_assert(qsort_cmp(i, pc_right) == 0);
 741    }
 742    for (i = pc_right + 1; i < u_left; ++i) {
 743       qsort_assert(qsort_cmp(pc_right, i) < 0);
 744    }
 745 }
 746
 747 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 748    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 749                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 750
 751 #else
 752
 753 #define qsort_assert(t) ((void)0)
 754
 755 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 756
 757 #endif
 758
 759 /* ****************************************************************** qsort */
 760
 761 STATIC void /* the standard unstable (u) quicksort (qsort) */
 762 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 763 {
 764    register SV * temp;
 765
 766    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 767    int next_stack_entry = 0;
 768
 769    int part_left;
 770    int part_right;
 771 #ifdef QSORT_ORDER_GUESS
 772    int qsort_break_even;
 773    int swapped;
 774 #endif
 775
 776    /* Make sure we actually have work to do.
 777    */
 778    if (num_elts <= 1) {
 779       return;
 780    }
 781
 782    /* Innoculate large partitions against quadratic behavior */
 783    if (num_elts > QSORT_PLAY_SAFE) {
 784       register size_t n;
 785       register SV ** const q = array;
 786       for (n = num_elts; n > 1; ) {
 787          register const size_t j = (size_t)(n-- * Drand01());
 788          temp = q[j];
 789          q[j] = q[n];
 790          q[n] = temp;
 791       }
 792    }
 793
 794    /* Setup the initial partition definition and fall into the sorting loop
 795    */
 796    part_left = 0;
 797    part_right = (int)(num_elts - 1);
 798 #ifdef QSORT_ORDER_GUESS
 799    qsort_break_even = QSORT_BREAK_EVEN;
 800 #else
 801 #define qsort_break_even QSORT_BREAK_EVEN
 802 #endif
 803    for ( ; ; ) {
 804       if ((part_right - part_left) >= qsort_break_even) {
 805          /* OK, this is gonna get hairy, so lets try to document all the
 806             concepts and abbreviations and variables and what they keep
 807             track of:
 808
 809             pc: pivot chunk - the set of array elements we accumulate in the
 810                 middle of the partition, all equal in value to the original
 811                 pivot element selected. The pc is defined by:
 812
 813                 pc_left - the leftmost array index of the pc
 814                 pc_right - the rightmost array index of the pc
 815
 816                 we start with pc_left == pc_right and only one element
 817                 in the pivot chunk (but it can grow during the scan).
 818
 819             u:  uncompared elements - the set of elements in the partition
 820                 we have not yet compared to the pivot value. There are two
 821                 uncompared sets during the scan - one to the left of the pc
 822                 and one to the right.
 823
 824                 u_right - the rightmost index of the left side's uncompared set
 825                 u_left - the leftmost index of the right side's uncompared set
 826
 827                 The leftmost index of the left sides's uncompared set
 828                 doesn't need its own variable because it is always defined
 829                 by the leftmost edge of the whole partition (part_left). The
 830                 same goes for the rightmost edge of the right partition
 831                 (part_right).
 832
 833                 We know there are no uncompared elements on the left once we
 834                 get u_right < part_left and no uncompared elements on the
 835                 right once u_left > part_right. When both these conditions
 836                 are met, we have completed the scan of the partition.
 837
 838                 Any elements which are between the pivot chunk and the
 839                 uncompared elements should be less than the pivot value on
 840                 the left side and greater than the pivot value on the right
 841                 side (in fact, the goal of the whole algorithm is to arrange
 842                 for that to be true and make the groups of less-than and
 843                 greater-then elements into new partitions to sort again).
 844
 845             As you marvel at the complexity of the code and wonder why it
 846             has to be so confusing. Consider some of the things this level
 847             of confusion brings:
 848
 849             Once I do a compare, I squeeze every ounce of juice out of it. I
 850             never do compare calls I don't have to do, and I certainly never
 851             do redundant calls.
 852
 853             I also never swap any elements unless I can prove there is a
 854             good reason. Many sort algorithms will swap a known value with
 855             an uncompared value just to get things in the right place (or
 856             avoid complexity :-), but that uncompared value, once it gets
 857             compared, may then have to be swapped again. A lot of the
 858             complexity of this code is due to the fact that it never swaps
 859             anything except compared values, and it only swaps them when the
 860             compare shows they are out of position.
 861          */
 862          int pc_left, pc_right;
 863          int u_right, u_left;
 864
 865          int s;
 866
 867          pc_left = ((part_left + part_right) / 2);
 868          pc_right = pc_left;
 869          u_right = pc_left - 1;
 870          u_left = pc_right + 1;
 871
 872          /* Qsort works best when the pivot value is also the median value
 873             in the partition (unfortunately you can't find the median value
 874             without first sorting :-), so to give the algorithm a helping
 875             hand, we pick 3 elements and sort them and use the median value
 876             of that tiny set as the pivot value.
 877
 878             Some versions of qsort like to use the left middle and right as
 879             the 3 elements to sort so they can insure the ends of the
 880             partition will contain values which will stop the scan in the
 881             compare loop, but when you have to call an arbitrarily complex
 882             routine to do a compare, its really better to just keep track of
 883             array index values to know when you hit the edge of the
 884             partition and avoid the extra compare. An even better reason to
 885             avoid using a compare call is the fact that you can drop off the
 886             edge of the array if someone foolishly provides you with an
 887             unstable compare function that doesn't always provide consistent
 888             results.
 889
 890             So, since it is simpler for us to compare the three adjacent
 891             elements in the middle of the partition, those are the ones we
 892             pick here (conveniently pointed at by u_right, pc_left, and
 893             u_left). The values of the left, center, and right elements
 894             are refered to as l c and r in the following comments.
 895          */
 896
 897 #ifdef QSORT_ORDER_GUESS
 898          swapped = 0;
 899 #endif
 900          s = qsort_cmp(u_right, pc_left);
 901          if (s < 0) {
 902             /* l < c */
 903             s = qsort_cmp(pc_left, u_left);
 904             /* if l < c, c < r - already in order - nothing to do */
 905             if (s == 0) {
 906                /* l < c, c == r - already in order, pc grows */
 907                ++pc_right;
 908                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 909             } else if (s > 0) {
 910                /* l < c, c > r - need to know more */
 911                s = qsort_cmp(u_right, u_left);
 912                if (s < 0) {
 913                   /* l < c, c > r, l < r - swap c & r to get ordered */
 914                   qsort_swap(pc_left, u_left);
 915                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 916                } else if (s == 0) {
 917                   /* l < c, c > r, l == r - swap c&r, grow pc */
 918                   qsort_swap(pc_left, u_left);
 919                   --pc_left;
 920                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 921                } else {
 922                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 923                   qsort_rotate(pc_left, u_right, u_left);
 924                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 925                }
 926             }
 927          } else if (s == 0) {
 928             /* l == c */
 929             s = qsort_cmp(pc_left, u_left);
 930             if (s < 0) {
 931                /* l == c, c < r - already in order, grow pc */
 932                --pc_left;
 933                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 934             } else if (s == 0) {
 935                /* l == c, c == r - already in order, grow pc both ways */
 936                --pc_left;
 937                ++pc_right;
 938                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 939             } else {
 940                /* l == c, c > r - swap l & r, grow pc */
 941                qsort_swap(u_right, u_left);
 942                ++pc_right;
 943                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 944             }
 945          } else {
 946             /* l > c */
 947             s = qsort_cmp(pc_left, u_left);
 948             if (s < 0) {
 949                /* l > c, c < r - need to know more */
 950                s = qsort_cmp(u_right, u_left);
 951                if (s < 0) {
 952                   /* l > c, c < r, l < r - swap l & c to get ordered */
 953                   qsort_swap(u_right, pc_left);
 954                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 955                } else if (s == 0) {
 956                   /* l > c, c < r, l == r - swap l & c, grow pc */
 957                   qsort_swap(u_right, pc_left);
 958                   ++pc_right;
 959                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 960                } else {
 961                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 962                   qsort_rotate(u_right, pc_left, u_left);
 963                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 964                }
 965             } else if (s == 0) {
 966                /* l > c, c == r - swap ends, grow pc */
 967                qsort_swap(u_right, u_left);
 968                --pc_left;
 969                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 970             } else {
 971                /* l > c, c > r - swap ends to get in order */
 972                qsort_swap(u_right, u_left);
 973                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 974             }
 975          }
 976          /* We now know the 3 middle elements have been compared and
 977             arranged in the desired order, so we can shrink the uncompared
 978             sets on both sides
 979          */
 980          --u_right;
 981          ++u_left;
 982          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 983
 984          /* The above massive nested if was the simple part :-). We now have
 985             the middle 3 elements ordered and we need to scan through the
 986             uncompared sets on either side, swapping elements that are on
 987             the wrong side or simply shuffling equal elements around to get
 988             all equal elements into the pivot chunk.
 989          */
 990
 991          for ( ; ; ) {
 992             int still_work_on_left;
 993             int still_work_on_right;
 994
 995             /* Scan the uncompared values on the left. If I find a value
 996                equal to the pivot value, move it over so it is adjacent to
 997                the pivot chunk and expand the pivot chunk. If I find a value
 998                less than the pivot value, then just leave it - its already
 999                on the correct side of the partition. If I find a greater
1000                value, then stop the scan.
1001             */
1002             while ((still_work_on_left = (u_right >= part_left))) {
1003                s = qsort_cmp(u_right, pc_left);
1004                if (s < 0) {
1005                   --u_right;
1006                } else if (s == 0) {
1007                   --pc_left;
1008                   if (pc_left != u_right) {
1009                      qsort_swap(u_right, pc_left);
1010                   }
1011                   --u_right;
1012                } else {
1013                   break;
1014                }
1015                qsort_assert(u_right < pc_left);
1016                qsort_assert(pc_left <= pc_right);
1017                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1018                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1019             }
1020
1021             /* Do a mirror image scan of uncompared values on the right
1022             */
1023             while ((still_work_on_right = (u_left <= part_right))) {
1024                s = qsort_cmp(pc_right, u_left);
1025                if (s < 0) {
1026                   ++u_left;
1027                } else if (s == 0) {
1028                   ++pc_right;
1029                   if (pc_right != u_left) {
1030                      qsort_swap(pc_right, u_left);
1031                   }
1032                   ++u_left;
1033                } else {
1034                   break;
1035                }
1036                qsort_assert(u_left > pc_right);
1037                qsort_assert(pc_left <= pc_right);
1038                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1039                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1040             }
1041
1042             if (still_work_on_left) {
1043                /* I know I have a value on the left side which needs to be
1044                   on the right side, but I need to know more to decide
1045                   exactly the best thing to do with it.
1046                */
1047                if (still_work_on_right) {
1048                   /* I know I have values on both side which are out of
1049                      position. This is a big win because I kill two birds
1050                      with one swap (so to speak). I can advance the
1051                      uncompared pointers on both sides after swapping both
1052                      of them into the right place.
1053                   */
1054                   qsort_swap(u_right, u_left);
1055                   --u_right;
1056                   ++u_left;
1057                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1058                } else {
1059                   /* I have an out of position value on the left, but the
1060                      right is fully scanned, so I "slide" the pivot chunk
1061                      and any less-than values left one to make room for the
1062                      greater value over on the right. If the out of position
1063                      value is immediately adjacent to the pivot chunk (there
1064                      are no less-than values), I can do that with a swap,
1065                      otherwise, I have to rotate one of the less than values
1066                      into the former position of the out of position value
1067                      and the right end of the pivot chunk into the left end
1068                      (got all that?).
1069                   */
1070                   --pc_left;
1071                   if (pc_left == u_right) {
1072                      qsort_swap(u_right, pc_right);
1073                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1074                   } else {
1075                      qsort_rotate(u_right, pc_left, pc_right);
1076                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1077                   }
1078                   --pc_right;
1079                   --u_right;
1080                }
1081             } else if (still_work_on_right) {
1082                /* Mirror image of complex case above: I have an out of
1083                   position value on the right, but the left is fully
1084                   scanned, so I need to shuffle things around to make room
1085                   for the right value on the left.
1086                */
1087                ++pc_right;
1088                if (pc_right == u_left) {
1089                   qsort_swap(u_left, pc_left);
1090                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1091                } else {
1092                   qsort_rotate(pc_right, pc_left, u_left);
1093                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1094                }
1095                ++pc_left;
1096                ++u_left;
1097             } else {
1098                /* No more scanning required on either side of partition,
1099                   break out of loop and figure out next set of partitions
1100                */
1101                break;
1102             }
1103          }
1104
1105          /* The elements in the pivot chunk are now in the right place. They
1106             will never move or be compared again. All I have to do is decide
1107             what to do with the stuff to the left and right of the pivot
1108             chunk.
1109
1110             Notes on the QSORT_ORDER_GUESS ifdef code:
1111
1112             1. If I just built these partitions without swapping any (or
1113                very many) elements, there is a chance that the elements are
1114                already ordered properly (being properly ordered will
1115                certainly result in no swapping, but the converse can't be
1116                proved :-).
1117
1118             2. A (properly written) insertion sort will run faster on
1119                already ordered data than qsort will.
1120
1121             3. Perhaps there is some way to make a good guess about
1122                switching to an insertion sort earlier than partition size 6
1123                (for instance - we could save the partition size on the stack
1124                and increase the size each time we find we didn't swap, thus
1125                switching to insertion sort earlier for partitions with a
1126                history of not swapping).
1127
1128             4. Naturally, if I just switch right away, it will make
1129                artificial benchmarks with pure ascending (or descending)
1130                data look really good, but is that a good reason in general?
1131                Hard to say...
1132          */
1133
1134 #ifdef QSORT_ORDER_GUESS
1135          if (swapped < 3) {
1136 #if QSORT_ORDER_GUESS == 1
1137             qsort_break_even = (part_right - part_left) + 1;
1138 #endif
1139 #if QSORT_ORDER_GUESS == 2
1140             qsort_break_even *= 2;
1141 #endif
1142 #if QSORT_ORDER_GUESS == 3
1143             const int prev_break = qsort_break_even;
1144             qsort_break_even *= qsort_break_even;
1145             if (qsort_break_even < prev_break) {
1146                qsort_break_even = (part_right - part_left) + 1;
1147             }
1148 #endif
1149          } else {
1150             qsort_break_even = QSORT_BREAK_EVEN;
1151          }
1152 #endif
1153
1154          if (part_left < pc_left) {
1155             /* There are elements on the left which need more processing.
1156                Check the right as well before deciding what to do.
1157             */
1158             if (pc_right < part_right) {
1159                /* We have two partitions to be sorted. Stack the biggest one
1160                   and process the smallest one on the next iteration. This
1161                   minimizes the stack height by insuring that any additional
1162                   stack entries must come from the smallest partition which
1163                   (because it is smallest) will have the fewest
1164                   opportunities to generate additional stack entries.
1165                */
1166                if ((part_right - pc_right) > (pc_left - part_left)) {
1167                   /* stack the right partition, process the left */
1168                   partition_stack[next_stack_entry].left = pc_right + 1;
1169                   partition_stack[next_stack_entry].right = part_right;
1170 #ifdef QSORT_ORDER_GUESS
1171                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1172 #endif
1173                   part_right = pc_left - 1;
1174                } else {
1175                   /* stack the left partition, process the right */
1176                   partition_stack[next_stack_entry].left = part_left;
1177                   partition_stack[next_stack_entry].right = pc_left - 1;
1178 #ifdef QSORT_ORDER_GUESS
1179                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1180 #endif
1181                   part_left = pc_right + 1;
1182                }
1183                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1184                ++next_stack_entry;
1185             } else {
1186                /* The elements on the left are the only remaining elements
1187                   that need sorting, arrange for them to be processed as the
1188                   next partition.
1189                */
1190                part_right = pc_left - 1;
1191             }
1192          } else if (pc_right < part_right) {
1193             /* There is only one chunk on the right to be sorted, make it
1194                the new partition and loop back around.
1195             */
1196             part_left = pc_right + 1;
1197          } else {
1198             /* This whole partition wound up in the pivot chunk, so
1199                we need to get a new partition off the stack.
1200             */
1201             if (next_stack_entry == 0) {
1202                /* the stack is empty - we are done */
1203                break;
1204             }
1205             --next_stack_entry;
1206             part_left = partition_stack[next_stack_entry].left;
1207             part_right = partition_stack[next_stack_entry].right;
1208 #ifdef QSORT_ORDER_GUESS
1209             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1210 #endif
1211          }
1212       } else {
1213          /* This partition is too small to fool with qsort complexity, just
1214             do an ordinary insertion sort to minimize overhead.
1215          */
1216          int i;
1217          /* Assume 1st element is in right place already, and start checking
1218             at 2nd element to see where it should be inserted.
1219          */
1220          for (i = part_left + 1; i <= part_right; ++i) {
1221             int j;
1222             /* Scan (backwards - just in case 'i' is already in right place)
1223                through the elements already sorted to see if the ith element
1224                belongs ahead of one of them.
1225             */
1226             for (j = i - 1; j >= part_left; --j) {
1227                if (qsort_cmp(i, j) >= 0) {
1228                   /* i belongs right after j
1229                   */
1230                   break;
1231                }
1232             }
1233             ++j;
1234             if (j != i) {
1235                /* Looks like we really need to move some things
1236                */
1237                int k;
1238                temp = array[i];
1239                for (k = i - 1; k >= j; --k)
1240                   array[k + 1] = array[k];
1241                array[j] = temp;
1242             }
1243          }
1244
1245          /* That partition is now sorted, grab the next one, or get out
1246             of the loop if there aren't any more.
1247          */
1248
1249          if (next_stack_entry == 0) {
1250             /* the stack is empty - we are done */
1251             break;
1252          }
1253          --next_stack_entry;
1254          part_left = partition_stack[next_stack_entry].left;
1255          part_right = partition_stack[next_stack_entry].right;
1256 #ifdef QSORT_ORDER_GUESS
1257          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1258 #endif
1259       }
1260    }
1261
1262    /* Believe it or not, the array is sorted at this point! */
1263 }
1264
1265 /* Stabilize what is, presumably, an otherwise unstable sort method.
1266  * We do that by allocating (or having on hand) an array of pointers
1267  * that is the same size as the original array of elements to be sorted.
1268  * We initialize this parallel array with the addresses of the original
1269  * array elements.  This indirection can make you crazy.
1270  * Some pictures can help.  After initializing, we have
1271  *
1272  *  indir                  list1
1273  * +----+                 +----+
1274  * |    | --------------> |    | ------> first element to be sorted
1275  * +----+                 +----+
1276  * |    | --------------> |    | ------> second element to be sorted
1277  * +----+                 +----+
1278  * |    | --------------> |    | ------> third element to be sorted
1279  * +----+                 +----+
1280  *  ...
1281  * +----+                 +----+
1282  * |    | --------------> |    | ------> n-1st element to be sorted
1283  * +----+                 +----+
1284  * |    | --------------> |    | ------> n-th element to be sorted
1285  * +----+                 +----+
1286  *
1287  * During the sort phase, we leave the elements of list1 where they are,
1288  * and sort the pointers in the indirect array in the same order determined
1289  * by the original comparison routine on the elements pointed to.
1290  * Because we don't move the elements of list1 around through
1291  * this phase, we can break ties on elements that compare equal
1292  * using their address in the list1 array, ensuring stabilty.
1293  * This leaves us with something looking like
1294  *
1295  *  indir                  list1
1296  * +----+                 +----+
1297  * |    | --+       +---> |    | ------> first element to be sorted
1298  * +----+   |       |     +----+
1299  * |    | --|-------|---> |    | ------> second element to be sorted
1300  * +----+   |       |     +----+
1301  * |    | --|-------+ +-> |    | ------> third element to be sorted
1302  * +----+   |         |   +----+
1303  *  ...
1304  * +----+    | |   | |    +----+
1305  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1306  * +----+    |     |      +----+
1307  * |    | ---+     +----> |    | ------> n-th element to be sorted
1308  * +----+                 +----+
1309  *
1310  * where the i-th element of the indirect array points to the element
1311  * that should be i-th in the sorted array.  After the sort phase,
1312  * we have to put the elements of list1 into the places
1313  * dictated by the indirect array.
1314  */
1315
1316
1317 static I32
1318 cmpindir(pTHX_ gptr a, gptr b)
1319 {
1320     dVAR;
1321     gptr * const ap = (gptr *)a;
1322     gptr * const bp = (gptr *)b;
1323     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1324
1325     if (sense)
1326         return sense;
1327     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1328 }
1329
1330 static I32
1331 cmpindir_desc(pTHX_ gptr a, gptr b)
1332 {
1333     dVAR;
1334     gptr * const ap = (gptr *)a;
1335     gptr * const bp = (gptr *)b;
1336     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1337
1338     /* Reverse the default */
1339     if (sense)
1340         return -sense;
1341     /* But don't reverse the stability test.  */
1342     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1343
1344 }
1345
1346 STATIC void
1347 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1348 {
1349     dVAR;
1350     if ((flags & SORTf_STABLE) != 0) {
1351          register gptr **pp, *q;
1352          register size_t n, j, i;
1353          gptr *small[SMALLSORT], **indir, tmp;
1354          SVCOMPARE_t savecmp;
1355          if (nmemb <= 1) return;     /* sorted trivially */
1356
1357          /* Small arrays can use the stack, big ones must be allocated */
1358          if (nmemb <= SMALLSORT) indir = small;
1359          else { Newx(indir, nmemb, gptr *); }
1360
1361          /* Copy pointers to original array elements into indirect array */
1362          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1363
1364          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1365          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1366
1367          /* sort, with indirection */
1368          if (flags & SORTf_DESC)
1369             qsortsvu((gptr *)indir, nmemb, cmpindir_desc);
1370         else
1371             qsortsvu((gptr *)indir, nmemb, cmpindir);
1372
1373          pp = indir;
1374          q = list1;
1375          for (n = nmemb; n--; ) {
1376               /* Assert A: all elements of q with index > n are already
1377                * in place.  This is vacuosly true at the start, and we
1378                * put element n where it belongs below (if it wasn't
1379                * already where it belonged). Assert B: we only move
1380                * elements that aren't where they belong,
1381                * so, by A, we never tamper with elements above n.
1382                */
1383               j = pp[n] - q;            /* This sets j so that q[j] is
1384                                          * at pp[n].  *pp[j] belongs in
1385                                          * q[j], by construction.
1386                                          */
1387               if (n != j) {             /* all's well if n == j */
1388                    tmp = q[j];          /* save what's in q[j] */
1389                    do {
1390                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1391                         i = pp[j] - q;  /* the index in q of the element
1392                                          * just moved */
1393                         pp[j] = q + j;  /* this is ok now */
1394                    } while ((j = i) != n);
1395                    /* There are only finitely many (nmemb) addresses
1396                     * in the pp array.
1397                     * So we must eventually revisit an index we saw before.
1398                     * Suppose the first revisited index is k != n.
1399                     * An index is visited because something else belongs there.
1400                     * If we visit k twice, then two different elements must
1401                     * belong in the same place, which cannot be.
1402                     * So j must get back to n, the loop terminates,
1403                     * and we put the saved element where it belongs.
1404                     */
1405                    q[n] = tmp;          /* put what belongs into
1406                                          * the n-th element */
1407               }
1408          }
1409
1410         /* free iff allocated */
1411          if (indir != small) { Safefree(indir); }
1412          /* restore prevailing comparison routine */
1413          PL_sort_RealCmp = savecmp;
1414     } else if ((flags & SORTf_DESC) != 0) {
1415          const SVCOMPARE_t savecmp = PL_sort_RealCmp;   /* Save current comparison routine, if any */
1416          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1417          cmp = cmp_desc;
1418          qsortsvu(list1, nmemb, cmp);
1419          /* restore prevailing comparison routine */
1420          PL_sort_RealCmp = savecmp;
1421     } else {
1422          qsortsvu(list1, nmemb, cmp);
1423     }
1424 }
1425
1426 /*
1427 =head1 Array Manipulation Functions
1428
1429 =for apidoc sortsv
1430
1431 Sort an array. Here is an example:
1432
1433     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1434
1435 Currently this always uses mergesort. See sortsv_flags for a more
1436 flexible routine.
1437
1438 =cut
1439 */
1440
1441 void
1442 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1443 {
1444     sortsv_flags(array, nmemb, cmp, 0);
1445 }
1446
1447 /*
1448 =for apidoc sortsv_flags
1449
1450 Sort an array, with various options.
1451
1452 =cut
1453 */
1454 void
1455 Perl_sortsv_flags(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1456 {
1457     if (flags & SORTf_QSORT)
1458         S_qsortsv(aTHX_ array, nmemb, cmp, flags);
1459     else
1460         S_mergesortsv(aTHX_ array, nmemb, cmp, flags);
1461 }
1462
1463 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1464 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1465 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1466
1467 PP(pp_sort)
1468 {
1469     dVAR; dSP; dMARK; dORIGMARK;
1470     register SV **p1 = ORIGMARK+1, **p2;
1471     register I32 max, i;
1472     AV* av = NULL;
1473     HV *stash;
1474     GV *gv;
1475     CV *cv = NULL;
1476     I32 gimme = GIMME;
1477     OP* const nextop = PL_op->op_next;
1478     I32 overloading = 0;
1479     bool hasargs = FALSE;
1480     I32 is_xsub = 0;
1481     I32 sorting_av = 0;
1482     const U8 priv = PL_op->op_private;
1483     const U8 flags = PL_op->op_flags;
1484     U32 sort_flags = 0;
1485     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1486       = Perl_sortsv_flags;
1487     I32 all_SIVs = 1;
1488
1489     if ((priv & OPpSORT_DESCEND) != 0)
1490         sort_flags |= SORTf_DESC;
1491     if ((priv & OPpSORT_QSORT) != 0)
1492         sort_flags |= SORTf_QSORT;
1493     if ((priv & OPpSORT_STABLE) != 0)
1494         sort_flags |= SORTf_STABLE;
1495
1496     if (gimme != G_ARRAY) {
1497         SP = MARK;
1498         EXTEND(SP,1);
1499         RETPUSHUNDEF;
1500     }
1501
1502     ENTER;
1503     SAVEVPTR(PL_sortcop);
1504     if (flags & OPf_STACKED) {
1505         if (flags & OPf_SPECIAL) {
1506             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1507             kid = kUNOP->op_first;                      /* pass rv2gv */
1508             kid = kUNOP->op_first;                      /* pass leave */
1509             PL_sortcop = kid->op_next;
1510             stash = CopSTASH(PL_curcop);
1511         }
1512         else {
1513             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1514             if (cv && SvPOK(cv)) {
1515                 const char * const proto = SvPV_nolen_const((SV*)cv);
1516                 if (proto && strEQ(proto, "$$")) {
1517                     hasargs = TRUE;
1518                 }
1519             }
1520             if (!(cv && CvROOT(cv))) {
1521                 if (cv && CvISXSUB(cv)) {
1522                     is_xsub = 1;
1523                 }
1524                 else if (gv) {
1525                     SV *tmpstr = sv_newmortal();
1526                     gv_efullname3(tmpstr, gv, NULL);
1527                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1528                         SVfARG(tmpstr));
1529                 }
1530                 else {
1531                     DIE(aTHX_ "Undefined subroutine in sort");
1532                 }
1533             }
1534
1535             if (is_xsub)
1536                 PL_sortcop = (OP*)cv;
1537             else
1538                 PL_sortcop = CvSTART(cv);
1539         }
1540     }
1541     else {
1542         PL_sortcop = NULL;
1543         stash = CopSTASH(PL_curcop);
1544     }
1545
1546     /* optimiser converts "@a = sort @a" to "sort \@a";
1547      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1548      * result back to @a at the end of this function */
1549     if (priv & OPpSORT_INPLACE) {
1550         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1551         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1552         av = (AV*)(*SP);
1553         max = AvFILL(av) + 1;
1554         if (SvMAGICAL(av)) {
1555             MEXTEND(SP, max);
1556             p2 = SP;
1557             for (i=0; i < max; i++) {
1558                 SV **svp = av_fetch(av, i, FALSE);
1559                 *SP++ = (svp) ? *svp : NULL;
1560             }
1561         }
1562         else {
1563             if (SvREADONLY(av))
1564                 Perl_croak(aTHX_ PL_no_modify);
1565             else
1566                 SvREADONLY_on(av);
1567             p1 = p2 = AvARRAY(av);
1568             sorting_av = 1;
1569         }
1570     }
1571     else {
1572         p2 = MARK+1;
1573         max = SP - MARK;
1574    }
1575
1576     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1577      * any nulls; also stringify or converting to integer or number as
1578      * required any args */
1579     for (i=max; i > 0 ; i--) {
1580         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1581             SvTEMP_off(*p1);
1582             if (!PL_sortcop) {
1583                 if (priv & OPpSORT_NUMERIC) {
1584                     if (priv & OPpSORT_INTEGER) {
1585                         if (!SvIOK(*p1)) {
1586                             if (SvAMAGIC(*p1))
1587                                 overloading = 1;
1588                             else
1589                                 (void)sv_2iv(*p1);
1590                         }
1591                     }
1592                     else {
1593                         if (!SvNSIOK(*p1)) {
1594                             if (SvAMAGIC(*p1))
1595                                 overloading = 1;
1596                             else
1597                                 (void)sv_2nv(*p1);
1598                         }
1599                         if (all_SIVs && !SvSIOK(*p1))
1600                             all_SIVs = 0;
1601                     }
1602                 }
1603                 else {
1604                     if (!SvPOK(*p1)) {
1605                         if (SvAMAGIC(*p1))
1606                             overloading = 1;
1607                         else
1608                             (void)sv_2pv_flags(*p1, 0,
1609                                                SV_GMAGIC|SV_CONST_RETURN);
1610                     }
1611                 }
1612             }
1613             p1++;
1614         }
1615         else
1616             max--;
1617     }
1618     if (sorting_av)
1619         AvFILLp(av) = max-1;
1620
1621     if (max > 1) {
1622         SV **start;
1623         if (PL_sortcop) {
1624             PERL_CONTEXT *cx;
1625             SV** newsp;
1626             const bool oldcatch = CATCH_GET;
1627
1628             SAVETMPS;
1629             SAVEOP();
1630
1631             CATCH_SET(TRUE);
1632             PUSHSTACKi(PERLSI_SORT);
1633             if (!hasargs && !is_xsub) {
1634                 SAVESPTR(PL_firstgv);
1635                 SAVESPTR(PL_secondgv);
1636                 SAVESPTR(PL_sortstash);
1637                 PL_firstgv = gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV);
1638                 PL_secondgv = gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV);
1639                 PL_sortstash = stash;
1640                 SAVESPTR(GvSV(PL_firstgv));
1641                 SAVESPTR(GvSV(PL_secondgv));
1642             }
1643
1644             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1645             if (!(flags & OPf_SPECIAL)) {
1646                 cx->cx_type = CXt_SUB;
1647                 cx->blk_gimme = G_SCALAR;
1648                 PUSHSUB(cx);
1649                 if (!is_xsub) {
1650                     AV* const padlist = CvPADLIST(cv);
1651
1652                     if (++CvDEPTH(cv) >= 2) {
1653                         PERL_STACK_OVERFLOW_CHECK();
1654                         pad_push(padlist, CvDEPTH(cv));
1655                     }
1656                     SAVECOMPPAD();
1657                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1658
1659                     if (hasargs) {
1660                         /* This is mostly copied from pp_entersub */
1661                         AV * const av = (AV*)PAD_SVl(0);
1662
1663                         cx->blk_sub.savearray = GvAV(PL_defgv);
1664                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc_simple(av);
1665                         CX_CURPAD_SAVE(cx->blk_sub);
1666                         cx->blk_sub.argarray = av;
1667                     }
1668
1669                 }
1670             }
1671             cx->cx_type |= CXp_MULTICALL;
1672
1673             start = p1 - max;
1674             sortsvp(aTHX_ start, max,
1675                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1676                     sort_flags);
1677
1678             if (!(flags & OPf_SPECIAL)) {
1679                 LEAVESUB(cv);
1680                 if (!is_xsub)
1681                     CvDEPTH(cv)--;
1682             }
1683             POPBLOCK(cx,PL_curpm);
1684             PL_stack_sp = newsp;
1685             POPSTACK;
1686             CATCH_SET(oldcatch);
1687         }
1688         else {
1689             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1690             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1691             sortsvp(aTHX_ start, max,
1692                     (priv & OPpSORT_NUMERIC)
1693                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1694                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1695                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1696                         : ( IN_LOCALE_RUNTIME
1697                             ? ( overloading
1698                                 ? (SVCOMPARE_t)S_amagic_cmp_locale
1699                                 : (SVCOMPARE_t)sv_cmp_locale_static)
1700                             : ( overloading ? (SVCOMPARE_t)S_amagic_cmp : (SVCOMPARE_t)sv_cmp_static)),
1701                     sort_flags);
1702         }
1703         if ((priv & OPpSORT_REVERSE) != 0) {
1704             SV **q = start+max-1;
1705             while (start < q) {
1706                 SV * const tmp = *start;
1707                 *start++ = *q;
1708                 *q-- = tmp;
1709             }
1710         }
1711     }
1712     if (sorting_av)
1713         SvREADONLY_off(av);
1714     else if (av && !sorting_av) {
1715         /* simulate pp_aassign of tied AV */
1716         SV** const base = ORIGMARK+1;
1717         for (i=0; i < max; i++) {
1718             base[i] = newSVsv(base[i]);
1719         }
1720         av_clear(av);
1721         av_extend(av, max);
1722         for (i=0; i < max; i++) {
1723             SV * const sv = base[i];
1724             SV ** const didstore = av_store(av, i, sv);
1725             if (SvSMAGICAL(sv))
1726                 mg_set(sv);
1727             if (!didstore)
1728                 sv_2mortal(sv);
1729         }
1730     }
1731     LEAVE;
1732     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1733     return nextop;
1734 }
1735
1736 static I32
1737 S_sortcv(pTHX_ SV *a, SV *b)
1738 {
1739     dVAR;
1740     const I32 oldsaveix = PL_savestack_ix;
1741     const I32 oldscopeix = PL_scopestack_ix;
1742     I32 result;
1743     GvSV(PL_firstgv) = a;
1744     GvSV(PL_secondgv) = b;
1745     PL_stack_sp = PL_stack_base;
1746     PL_op = PL_sortcop;
1747     CALLRUNOPS(aTHX);
1748     if (PL_stack_sp != PL_stack_base + 1)
1749         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1750     if (!SvNIOKp(*PL_stack_sp))
1751         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1752     result = SvIV(*PL_stack_sp);
1753     while (PL_scopestack_ix > oldscopeix) {
1754         LEAVE;
1755     }
1756     leave_scope(oldsaveix);
1757     return result;
1758 }
1759
1760 static I32
1761 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1762 {
1763     dVAR;
1764     const I32 oldsaveix = PL_savestack_ix;
1765     const I32 oldscopeix = PL_scopestack_ix;
1766     I32 result;
1767     AV * const av = GvAV(PL_defgv);
1768
1769     if (AvMAX(av) < 1) {
1770         SV** ary = AvALLOC(av);
1771         if (AvARRAY(av) != ary) {
1772             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1773             AvARRAY(av) = ary;
1774         }
1775         if (AvMAX(av) < 1) {
1776             AvMAX(av) = 1;
1777             Renew(ary,2,SV*);
1778             AvARRAY(av) = ary;
1779         }
1780     }
1781     AvFILLp(av) = 1;
1782
1783     AvARRAY(av)[0] = a;
1784     AvARRAY(av)[1] = b;
1785     PL_stack_sp = PL_stack_base;
1786     PL_op = PL_sortcop;
1787     CALLRUNOPS(aTHX);
1788     if (PL_stack_sp != PL_stack_base + 1)
1789         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1790     if (!SvNIOKp(*PL_stack_sp))
1791         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1792     result = SvIV(*PL_stack_sp);
1793     while (PL_scopestack_ix > oldscopeix) {
1794         LEAVE;
1795     }
1796     leave_scope(oldsaveix);
1797     return result;
1798 }
1799
1800 static I32
1801 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1802 {
1803     dVAR; dSP;
1804     const I32 oldsaveix = PL_savestack_ix;
1805     const I32 oldscopeix = PL_scopestack_ix;
1806     CV * const cv=(CV*)PL_sortcop;
1807     I32 result;
1808
1809     SP = PL_stack_base;
1810     PUSHMARK(SP);
1811     EXTEND(SP, 2);
1812     *++SP = a;
1813     *++SP = b;
1814     PUTBACK;
1815     (void)(*CvXSUB(cv))(aTHX_ cv);
1816     if (PL_stack_sp != PL_stack_base + 1)
1817         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1818     if (!SvNIOKp(*PL_stack_sp))
1819         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1820     result = SvIV(*PL_stack_sp);
1821     while (PL_scopestack_ix > oldscopeix) {
1822         LEAVE;
1823     }
1824     leave_scope(oldsaveix);
1825     return result;
1826 }
1827
1828
1829 static I32
1830 S_sv_ncmp(pTHX_ SV *a, SV *b)
1831 {
1832     const NV nv1 = SvNSIV(a);
1833     const NV nv2 = SvNSIV(b);
1834     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1835 }
1836
1837 static I32
1838 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1839 {
1840     const IV iv1 = SvIV(a);
1841     const IV iv2 = SvIV(b);
1842     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1843 }
1844
1845 #define tryCALL_AMAGICbin(left,right,meth) \
1846     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1847         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1848         : NULL;
1849
1850 #define SORT_NORMAL_RETURN_VALUE(val)  (((val) > 0) ? 1 : ((val) ? -1 : 0))
1851
1852 static I32
1853 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1854 {
1855     dVAR;
1856     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1857     if (tmpsv) {
1858         if (SvIOK(tmpsv)) {
1859             const I32 i = SvIVX(tmpsv);
1860             return SORT_NORMAL_RETURN_VALUE(i);
1861         }
1862         else {
1863             const NV d = SvNV(tmpsv);
1864             return SORT_NORMAL_RETURN_VALUE(d);
1865         }
1866      }
1867      return S_sv_ncmp(aTHX_ a, b);
1868 }
1869
1870 static I32
1871 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1872 {
1873     dVAR;
1874     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1875     if (tmpsv) {
1876         if (SvIOK(tmpsv)) {
1877             const I32 i = SvIVX(tmpsv);
1878             return SORT_NORMAL_RETURN_VALUE(i);
1879         }
1880         else {
1881             const NV d = SvNV(tmpsv);
1882             return SORT_NORMAL_RETURN_VALUE(d);
1883         }
1884     }
1885     return S_sv_i_ncmp(aTHX_ a, b);
1886 }
1887
1888 static I32
1889 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1890 {
1891     dVAR;
1892     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1893     if (tmpsv) {
1894         if (SvIOK(tmpsv)) {
1895             const I32 i = SvIVX(tmpsv);
1896             return SORT_NORMAL_RETURN_VALUE(i);
1897         }
1898         else {
1899             const NV d = SvNV(tmpsv);
1900             return SORT_NORMAL_RETURN_VALUE(d);
1901         }
1902     }
1903     return sv_cmp(str1, str2);
1904 }
1905
1906 static I32
1907 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1908 {
1909     dVAR;
1910     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1911     if (tmpsv) {
1912         if (SvIOK(tmpsv)) {
1913             const I32 i = SvIVX(tmpsv);
1914             return SORT_NORMAL_RETURN_VALUE(i);
1915         }
1916         else {
1917             const NV d = SvNV(tmpsv);
1918             return SORT_NORMAL_RETURN_VALUE(d);
1919         }
1920     }
1921     return sv_cmp_locale(str1, str2);
1922 }
1923
1924 /*
1925  * Local variables:
1926  * c-indentation-style: bsd
1927  * c-basic-offset: 4
1928  * indent-tabs-mode: t
1929  * End:
1930  *
1931  * ex: set ts=8 sts=4 sw=4 noet:
1932  */