pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #ifndef SMALLSORT
  40 #define SMALLSORT (200)
  41 #endif
  42
  43 /* Flags for qsortsv and mergesortsv */
  44 #define SORTf_DESC   1
  45 #define SORTf_STABLE 2
  46 #define SORTf_QSORT  4
  47
  48 /*
  49  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  50  *
  51  * The original code was written in conjunction with BSD Computer Software
  52  * Research Group at University of California, Berkeley.
  53  *
  54  * See also: "Optimistic Merge Sort" (SODA '92)
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxilliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     register gptr *b, *p, *q, *t, *p2;
 187     register gptr *last, *r;
 188     IV runs = 0;
 189
 190     b = list1;
 191     last = PINDEX(b, nmemb);
 192     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 193     for (p2 = list2; b < last; ) {
 194         /* We just started, or just reversed sense.
 195         ** Set t at end of pairs with the prevailing sense.
 196         */
 197         for (p = b+2, t = p; ++p < last; t = ++p) {
 198             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 199         }
 200         q = b;
 201         /* Having laid out the playing field, look for long runs */
 202         do {
 203             p = r = b + (2 * PTHRESH);
 204             if (r >= t) p = r = t;      /* too short to care about */
 205             else {
 206                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 207                        ((p -= 2) > q));
 208                 if (p <= q) {
 209                     /* b through r is a (long) run.
 210                     ** Extend it as far as possible.
 211                     */
 212                     p = q = r;
 213                     while (((p += 2) < t) &&
 214                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 215                     r = p = q + 2;      /* no simple pairs, no after-run */
 216                 }
 217             }
 218             if (q > b) {                /* run of greater than 2 at b */
 219                 gptr *savep = p;
 220
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense)
 229                     while (b < --p) {
 230                         const gptr c = *b;
 231                         *b++ = *p;
 232                         *p = c;
 233                     }
 234                 p = savep;
 235             }
 236             while (q < p) {             /* simple pairs */
 237                 p2 = NEXT(p2) = p2 + 2; ++runs;
 238                 if (sense) {
 239                     const gptr c = *q++;
 240                     *(q-1) = *q;
 241                     *q++ = c;
 242                 } else q += 2;
 243             }
 244             if (((b = p) == t) && ((t+1) == last)) {
 245                 NEXT(p2) = p2 + 1; ++runs;
 246                 b++;
 247             }
 248             q = r;
 249         } while (b < t);
 250         sense = !sense;
 251     }
 252     return runs;
 253 }
 254
 255
 256 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 257  * qsort on many platforms, but slower than qsort, conspicuously so,
 258  * on others.  The most likely explanation was platform-specific
 259  * differences in cache sizes and relative speeds.
 260  *
 261  * The quicksort divide-and-conquer algorithm guarantees that, as the
 262  * problem is subdivided into smaller and smaller parts, the parts
 263  * fit into smaller (and faster) caches.  So it doesn't matter how
 264  * many levels of cache exist, quicksort will "find" them, and,
 265  * as long as smaller is faster, take advantage of them.
 266  *
 267  * By contrast, consider how the original mergesort algorithm worked.
 268  * Suppose we have five runs (each typically of length 2 after dynprep).
 269  *
 270  * pass               base                        aux
 271  *  0              1 2 3 4 5
 272  *  1                                           12 34 5
 273  *  2                1234 5
 274  *  3                                            12345
 275  *  4                 12345
 276  *
 277  * Adjacent pairs are merged in "grand sweeps" through the input.
 278  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 279  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 280  * The only cache that matters is one large enough to hold *all* the input.
 281  * On some platforms, this may be many times slower than smaller caches.
 282  *
 283  * The following pseudo-code uses the same basic merge algorithm,
 284  * but in a divide-and-conquer way.
 285  *
 286  * # merge $runs runs at offset $offset of list $list1 into $list2.
 287  * # all unmerged runs ($runs == 1) originate in list $base.
 288  * sub mgsort2 {
 289  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 290  *
 291  *     if ($runs == 1) {
 292  *         if ($list1 is $base) copy run to $list2
 293  *         return offset of end of list (or copy)
 294  *     } else {
 295  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 296  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 297  *         merge the adjacent runs at $offset of $list1 into $list2
 298  *         return the offset of the end of the merged runs
 299  *     }
 300  * }
 301  * mgsort2(0, $runs, $base, $aux, $base);
 302  *
 303  * For our 5 runs, the tree of calls looks like
 304  *
 305  *           5
 306  *      3        2
 307  *   2     1   1   1
 308  * 1   1
 309  *
 310  * 1   2   3   4   5
 311  *
 312  * and the corresponding activity looks like
 313  *
 314  * copy runs 1 and 2 from base to aux
 315  * merge runs 1 and 2 from aux to base
 316  * (run 3 is where it belongs, no copy needed)
 317  * merge runs 12 and 3 from base to aux
 318  * (runs 4 and 5 are where they belong, no copy needed)
 319  * merge runs 4 and 5 from base to aux
 320  * merge runs 123 and 45 from aux to base
 321  *
 322  * Note that we merge runs 1 and 2 immediately after copying them,
 323  * while they are still likely to be in fast cache.  Similarly,
 324  * run 3 is merged with run 12 while it still may be lingering in cache.
 325  * This implementation should therefore enjoy much of the cache-friendly
 326  * behavior that quicksort does.  In addition, it does less copying
 327  * than the original mergesort implementation (only runs 1 and 2 are copied)
 328  * and the "balancing" of merges is better (merged runs comprise more nearly
 329  * equal numbers of original runs).
 330  *
 331  * The actual cache-friendly implementation will use a pseudo-stack
 332  * to avoid recursion, and will unroll processing of runs of length 2,
 333  * but it is otherwise similar to the recursive implementation.
 334  */
 335
 336 typedef struct {
 337     IV  offset;         /* offset of 1st of 2 runs at this level */
 338     IV  runs;           /* how many runs must be combined into 1 */
 339 } off_runs;             /* pseudo-stack element */
 340
 341
 342 static I32
 343 cmp_desc(pTHX_ gptr const a, gptr const b)
 344 {
 345     dVAR;
 346     return -PL_sort_RealCmp(aTHX_ a, b);
 347 }
 348
 349 STATIC void
 350 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 351 {
 352     dVAR;
 353     IV i, run, offset;
 354     I32 sense, level;
 355     register gptr *f1, *f2, *t, *b, *p;
 356     int iwhich;
 357     gptr *aux;
 358     gptr *p1;
 359     gptr small[SMALLSORT];
 360     gptr *which[3];
 361     off_runs stack[60], *stackp;
 362     SVCOMPARE_t savecmp = NULL;
 363
 364     if (nmemb <= 1) return;                     /* sorted trivially */
 365
 366     if ((flags & SORTf_DESC) != 0) {
 367         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 368         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 369         cmp = cmp_desc;
 370     }
 371
 372     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 373     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 374     level = 0;
 375     stackp = stack;
 376     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 377     stackp->offset = offset = 0;
 378     which[0] = which[2] = base;
 379     which[1] = aux;
 380     for (;;) {
 381         /* On levels where both runs have be constructed (stackp->runs == 0),
 382          * merge them, and note the offset of their end, in case the offset
 383          * is needed at the next level up.  Hop up a level, and,
 384          * as long as stackp->runs is 0, keep merging.
 385          */
 386         IV runs = stackp->runs;
 387         if (runs == 0) {
 388             gptr *list1, *list2;
 389             iwhich = level & 1;
 390             list1 = which[iwhich];              /* area where runs are now */
 391             list2 = which[++iwhich];            /* area for merged runs */
 392             do {
 393                 register gptr *l1, *l2, *tp2;
 394                 offset = stackp->offset;
 395                 f1 = p1 = list1 + offset;               /* start of first run */
 396                 p = tp2 = list2 + offset;       /* where merged run will go */
 397                 t = NEXT(p);                    /* where first run ends */
 398                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 399                 t = NEXT(t);                    /* where second runs ends */
 400                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 401                 offset = PNELEM(list2, t);
 402                 while (f1 < l1 && f2 < l2) {
 403                     /* If head 1 is larger than head 2, find ALL the elements
 404                     ** in list 2 strictly less than head1, write them all,
 405                     ** then head 1.  Then compare the new heads, and repeat,
 406                     ** until one or both lists are exhausted.
 407                     **
 408                     ** In all comparisons (after establishing
 409                     ** which head to merge) the item to merge
 410                     ** (at pointer q) is the first operand of
 411                     ** the comparison.  When we want to know
 412                     ** if "q is strictly less than the other",
 413                     ** we can't just do
 414                     **    cmp(q, other) < 0
 415                     ** because stability demands that we treat equality
 416                     ** as high when q comes from l2, and as low when
 417                     ** q was from l1.  So we ask the question by doing
 418                     **    cmp(q, other) <= sense
 419                     ** and make sense == 0 when equality should look low,
 420                     ** and -1 when equality should look high.
 421                     */
 422
 423                     register gptr *q;
 424                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 425                         q = f2; b = f1; t = l1;
 426                         sense = -1;
 427                     } else {
 428                         q = f1; b = f2; t = l2;
 429                         sense = 0;
 430                     }
 431
 432
 433                     /* ramp up
 434                     **
 435                     ** Leave t at something strictly
 436                     ** greater than q (or at the end of the list),
 437                     ** and b at something strictly less than q.
 438                     */
 439                     for (i = 1, run = 0 ;;) {
 440                         if ((p = PINDEX(b, i)) >= t) {
 441                             /* off the end */
 442                             if (((p = PINDEX(t, -1)) > b) &&
 443                                 (cmp(aTHX_ *q, *p) <= sense))
 444                                  t = p;
 445                             else b = p;
 446                             break;
 447                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 448                             t = p;
 449                             break;
 450                         } else b = p;
 451                         if (++run >= RTHRESH) i += i;
 452                     }
 453
 454
 455                     /* q is known to follow b and must be inserted before t.
 456                     ** Increment b, so the range of possibilities is [b,t).
 457                     ** Round binary split down, to favor early appearance.
 458                     ** Adjust b and t until q belongs just before t.
 459                     */
 460
 461                     b++;
 462                     while (b < t) {
 463                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 464                         if (cmp(aTHX_ *q, *p) <= sense) {
 465                             t = p;
 466                         } else b = p + 1;
 467                     }
 468
 469
 470                     /* Copy all the strictly low elements */
 471
 472                     if (q == f1) {
 473                         FROMTOUPTO(f2, tp2, t);
 474                         *tp2++ = *f1++;
 475                     } else {
 476                         FROMTOUPTO(f1, tp2, t);
 477                         *tp2++ = *f2++;
 478                     }
 479                 }
 480
 481
 482                 /* Run out remaining list */
 483                 if (f1 == l1) {
 484                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 485                 } else              FROMTOUPTO(f1, tp2, l1);
 486                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 487
 488                 if (--level == 0) goto done;
 489                 --stackp;
 490                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 491             } while ((runs = stackp->runs) == 0);
 492         }
 493
 494
 495         stackp->runs = 0;               /* current run will finish level */
 496         /* While there are more than 2 runs remaining,
 497          * turn them into exactly 2 runs (at the "other" level),
 498          * each made up of approximately half the runs.
 499          * Stack the second half for later processing,
 500          * and set about producing the first half now.
 501          */
 502         while (runs > 2) {
 503             ++level;
 504             ++stackp;
 505             stackp->offset = offset;
 506             runs -= stackp->runs = runs / 2;
 507         }
 508         /* We must construct a single run from 1 or 2 runs.
 509          * All the original runs are in which[0] == base.
 510          * The run we construct must end up in which[level&1].
 511          */
 512         iwhich = level & 1;
 513         if (runs == 1) {
 514             /* Constructing a single run from a single run.
 515              * If it's where it belongs already, there's nothing to do.
 516              * Otherwise, copy it to where it belongs.
 517              * A run of 1 is either a singleton at level 0,
 518              * or the second half of a split 3.  In neither event
 519              * is it necessary to set offset.  It will be set by the merge
 520              * that immediately follows.
 521              */
 522             if (iwhich) {       /* Belongs in aux, currently in base */
 523                 f1 = b = PINDEX(base, offset);  /* where list starts */
 524                 f2 = PINDEX(aux, offset);       /* where list goes */
 525                 t = NEXT(f2);                   /* where list will end */
 526                 offset = PNELEM(aux, t);        /* offset thereof */
 527                 t = PINDEX(base, offset);       /* where it currently ends */
 528                 FROMTOUPTO(f1, f2, t);          /* copy */
 529                 NEXT(b) = t;                    /* set up parallel pointer */
 530             } else if (level == 0) goto done;   /* single run at level 0 */
 531         } else {
 532             /* Constructing a single run from two runs.
 533              * The merge code at the top will do that.
 534              * We need only make sure the two runs are in the "other" array,
 535              * so they'll end up in the correct array after the merge.
 536              */
 537             ++level;
 538             ++stackp;
 539             stackp->offset = offset;
 540             stackp->runs = 0;   /* take care of both runs, trigger merge */
 541             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 542                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 543                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 544                 t = NEXT(f2);                   /* where first run will end */
 545                 offset = PNELEM(aux, t);        /* offset thereof */
 546                 p = PINDEX(base, offset);       /* end of first run */
 547                 t = NEXT(t);                    /* where second run will end */
 548                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 549                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 550                 NEXT(b) = p;                    /* paralled pointer for 1st */
 551                 NEXT(p) = t;                    /* ... and for second */
 552             }
 553         }
 554     }
 555 done:
 556     if (aux != small) Safefree(aux);    /* free iff allocated */
 557     if (flags) {
 558          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 559     }
 560     return;
 561 }
 562
 563 /*
 564  * The quicksort implementation was derived from source code contributed
 565  * by Tom Horsley.
 566  *
 567  * NOTE: this code was derived from Tom Horsley's qsort replacement
 568  * and should not be confused with the original code.
 569  */
 570
 571 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 572
 573    Permission granted to distribute under the same terms as perl which are
 574    (briefly):
 575
 576     This program is free software; you can redistribute it and/or modify
 577     it under the terms of either:
 578
 579         a) the GNU General Public License as published by the Free
 580         Software Foundation; either version 1, or (at your option) any
 581         later version, or
 582
 583         b) the "Artistic License" which comes with this Kit.
 584
 585    Details on the perl license can be found in the perl source code which
 586    may be located via the www.perl.com web page.
 587
 588    This is the most wonderfulest possible qsort I can come up with (and
 589    still be mostly portable) My (limited) tests indicate it consistently
 590    does about 20% fewer calls to compare than does the qsort in the Visual
 591    C++ library, other vendors may vary.
 592
 593    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 594    others I invented myself (or more likely re-invented since they seemed
 595    pretty obvious once I watched the algorithm operate for a while).
 596
 597    Most of this code was written while watching the Marlins sweep the Giants
 598    in the 1997 National League Playoffs - no Braves fans allowed to use this
 599    code (just kidding :-).
 600
 601    I realize that if I wanted to be true to the perl tradition, the only
 602    comment in this file would be something like:
 603
 604    ...they shuffled back towards the rear of the line. 'No, not at the
 605    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 606
 607    However, I really needed to violate that tradition just so I could keep
 608    track of what happens myself, not to mention some poor fool trying to
 609    understand this years from now :-).
 610 */
 611
 612 /* ********************************************************** Configuration */
 613
 614 #ifndef QSORT_ORDER_GUESS
 615 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 616 #endif
 617
 618 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 619    future processing - a good max upper bound is log base 2 of memory size
 620    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 621    safely be smaller than that since the program is taking up some space and
 622    most operating systems only let you grab some subset of contiguous
 623    memory (not to mention that you are normally sorting data larger than
 624    1 byte element size :-).
 625 */
 626 #ifndef QSORT_MAX_STACK
 627 #define QSORT_MAX_STACK 32
 628 #endif
 629
 630 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 631    Anything bigger and we use qsort. If you make this too small, the qsort
 632    will probably break (or become less efficient), because it doesn't expect
 633    the middle element of a partition to be the same as the right or left -
 634    you have been warned).
 635 */
 636 #ifndef QSORT_BREAK_EVEN
 637 #define QSORT_BREAK_EVEN 6
 638 #endif
 639
 640 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 641    to go quadratic on.  We innoculate larger partitions against
 642    quadratic behavior by shuffling them before sorting.  This is not
 643    an absolute guarantee of non-quadratic behavior, but it would take
 644    staggeringly bad luck to pick extreme elements as the pivot
 645    from randomized data.
 646 */
 647 #ifndef QSORT_PLAY_SAFE
 648 #define QSORT_PLAY_SAFE 255
 649 #endif
 650
 651 /* ************************************************************* Data Types */
 652
 653 /* hold left and right index values of a partition waiting to be sorted (the
 654    partition includes both left and right - right is NOT one past the end or
 655    anything like that).
 656 */
 657 struct partition_stack_entry {
 658    int left;
 659    int right;
 660 #ifdef QSORT_ORDER_GUESS
 661    int qsort_break_even;
 662 #endif
 663 };
 664
 665 /* ******************************************************* Shorthand Macros */
 666
 667 /* Note that these macros will be used from inside the qsort function where
 668    we happen to know that the variable 'elt_size' contains the size of an
 669    array element and the variable 'temp' points to enough space to hold a
 670    temp element and the variable 'array' points to the array being sorted
 671    and 'compare' is the pointer to the compare routine.
 672
 673    Also note that there are very many highly architecture specific ways
 674    these might be sped up, but this is simply the most generally portable
 675    code I could think of.
 676 */
 677
 678 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 679 */
 680 #define qsort_cmp(elt1, elt2) \
 681    ((*compare)(aTHX_ array[elt1], array[elt2]))
 682
 683 #ifdef QSORT_ORDER_GUESS
 684 #define QSORT_NOTICE_SWAP swapped++;
 685 #else
 686 #define QSORT_NOTICE_SWAP
 687 #endif
 688
 689 /* swaps contents of array elements elt1, elt2.
 690 */
 691 #define qsort_swap(elt1, elt2) \
 692    STMT_START { \
 693       QSORT_NOTICE_SWAP \
 694       temp = array[elt1]; \
 695       array[elt1] = array[elt2]; \
 696       array[elt2] = temp; \
 697    } STMT_END
 698
 699 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 700    elt3 and elt3 gets elt1.
 701 */
 702 #define qsort_rotate(elt1, elt2, elt3) \
 703    STMT_START { \
 704       QSORT_NOTICE_SWAP \
 705       temp = array[elt1]; \
 706       array[elt1] = array[elt2]; \
 707       array[elt2] = array[elt3]; \
 708       array[elt3] = temp; \
 709    } STMT_END
 710
 711 /* ************************************************************ Debug stuff */
 712
 713 #ifdef QSORT_DEBUG
 714
 715 static void
 716 break_here()
 717 {
 718    return; /* good place to set a breakpoint */
 719 }
 720
 721 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 722
 723 static void
 724 doqsort_all_asserts(
 725    void * array,
 726    size_t num_elts,
 727    size_t elt_size,
 728    int (*compare)(const void * elt1, const void * elt2),
 729    int pc_left, int pc_right, int u_left, int u_right)
 730 {
 731    int i;
 732
 733    qsort_assert(pc_left <= pc_right);
 734    qsort_assert(u_right < pc_left);
 735    qsort_assert(pc_right < u_left);
 736    for (i = u_right + 1; i < pc_left; ++i) {
 737       qsort_assert(qsort_cmp(i, pc_left) < 0);
 738    }
 739    for (i = pc_left; i < pc_right; ++i) {
 740       qsort_assert(qsort_cmp(i, pc_right) == 0);
 741    }
 742    for (i = pc_right + 1; i < u_left; ++i) {
 743       qsort_assert(qsort_cmp(pc_right, i) < 0);
 744    }
 745 }
 746
 747 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 748    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 749                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 750
 751 #else
 752
 753 #define qsort_assert(t) ((void)0)
 754
 755 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 756
 757 #endif
 758
 759 /* ****************************************************************** qsort */
 760
 761 STATIC void /* the standard unstable (u) quicksort (qsort) */
 762 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 763 {
 764    register SV * temp;
 765    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 766    int next_stack_entry = 0;
 767    int part_left;
 768    int part_right;
 769 #ifdef QSORT_ORDER_GUESS
 770    int qsort_break_even;
 771    int swapped;
 772 #endif
 773
 774     PERL_ARGS_ASSERT_QSORTSVU;
 775
 776    /* Make sure we actually have work to do.
 777    */
 778    if (num_elts <= 1) {
 779       return;
 780    }
 781
 782    /* Innoculate large partitions against quadratic behavior */
 783    if (num_elts > QSORT_PLAY_SAFE) {
 784       register size_t n;
 785       register SV ** const q = array;
 786       for (n = num_elts; n > 1; ) {
 787          register const size_t j = (size_t)(n-- * Drand01());
 788          temp = q[j];
 789          q[j] = q[n];
 790          q[n] = temp;
 791       }
 792    }
 793
 794    /* Setup the initial partition definition and fall into the sorting loop
 795    */
 796    part_left = 0;
 797    part_right = (int)(num_elts - 1);
 798 #ifdef QSORT_ORDER_GUESS
 799    qsort_break_even = QSORT_BREAK_EVEN;
 800 #else
 801 #define qsort_break_even QSORT_BREAK_EVEN
 802 #endif
 803    for ( ; ; ) {
 804       if ((part_right - part_left) >= qsort_break_even) {
 805          /* OK, this is gonna get hairy, so lets try to document all the
 806             concepts and abbreviations and variables and what they keep
 807             track of:
 808
 809             pc: pivot chunk - the set of array elements we accumulate in the
 810                 middle of the partition, all equal in value to the original
 811                 pivot element selected. The pc is defined by:
 812
 813                 pc_left - the leftmost array index of the pc
 814                 pc_right - the rightmost array index of the pc
 815
 816                 we start with pc_left == pc_right and only one element
 817                 in the pivot chunk (but it can grow during the scan).
 818
 819             u:  uncompared elements - the set of elements in the partition
 820                 we have not yet compared to the pivot value. There are two
 821                 uncompared sets during the scan - one to the left of the pc
 822                 and one to the right.
 823
 824                 u_right - the rightmost index of the left side's uncompared set
 825                 u_left - the leftmost index of the right side's uncompared set
 826
 827                 The leftmost index of the left sides's uncompared set
 828                 doesn't need its own variable because it is always defined
 829                 by the leftmost edge of the whole partition (part_left). The
 830                 same goes for the rightmost edge of the right partition
 831                 (part_right).
 832
 833                 We know there are no uncompared elements on the left once we
 834                 get u_right < part_left and no uncompared elements on the
 835                 right once u_left > part_right. When both these conditions
 836                 are met, we have completed the scan of the partition.
 837
 838                 Any elements which are between the pivot chunk and the
 839                 uncompared elements should be less than the pivot value on
 840                 the left side and greater than the pivot value on the right
 841                 side (in fact, the goal of the whole algorithm is to arrange
 842                 for that to be true and make the groups of less-than and
 843                 greater-then elements into new partitions to sort again).
 844
 845             As you marvel at the complexity of the code and wonder why it
 846             has to be so confusing. Consider some of the things this level
 847             of confusion brings:
 848
 849             Once I do a compare, I squeeze every ounce of juice out of it. I
 850             never do compare calls I don't have to do, and I certainly never
 851             do redundant calls.
 852
 853             I also never swap any elements unless I can prove there is a
 854             good reason. Many sort algorithms will swap a known value with
 855             an uncompared value just to get things in the right place (or
 856             avoid complexity :-), but that uncompared value, once it gets
 857             compared, may then have to be swapped again. A lot of the
 858             complexity of this code is due to the fact that it never swaps
 859             anything except compared values, and it only swaps them when the
 860             compare shows they are out of position.
 861          */
 862          int pc_left, pc_right;
 863          int u_right, u_left;
 864
 865          int s;
 866
 867          pc_left = ((part_left + part_right) / 2);
 868          pc_right = pc_left;
 869          u_right = pc_left - 1;
 870          u_left = pc_right + 1;
 871
 872          /* Qsort works best when the pivot value is also the median value
 873             in the partition (unfortunately you can't find the median value
 874             without first sorting :-), so to give the algorithm a helping
 875             hand, we pick 3 elements and sort them and use the median value
 876             of that tiny set as the pivot value.
 877
 878             Some versions of qsort like to use the left middle and right as
 879             the 3 elements to sort so they can insure the ends of the
 880             partition will contain values which will stop the scan in the
 881             compare loop, but when you have to call an arbitrarily complex
 882             routine to do a compare, its really better to just keep track of
 883             array index values to know when you hit the edge of the
 884             partition and avoid the extra compare. An even better reason to
 885             avoid using a compare call is the fact that you can drop off the
 886             edge of the array if someone foolishly provides you with an
 887             unstable compare function that doesn't always provide consistent
 888             results.
 889
 890             So, since it is simpler for us to compare the three adjacent
 891             elements in the middle of the partition, those are the ones we
 892             pick here (conveniently pointed at by u_right, pc_left, and
 893             u_left). The values of the left, center, and right elements
 894             are refered to as l c and r in the following comments.
 895          */
 896
 897 #ifdef QSORT_ORDER_GUESS
 898          swapped = 0;
 899 #endif
 900          s = qsort_cmp(u_right, pc_left);
 901          if (s < 0) {
 902             /* l < c */
 903             s = qsort_cmp(pc_left, u_left);
 904             /* if l < c, c < r - already in order - nothing to do */
 905             if (s == 0) {
 906                /* l < c, c == r - already in order, pc grows */
 907                ++pc_right;
 908                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 909             } else if (s > 0) {
 910                /* l < c, c > r - need to know more */
 911                s = qsort_cmp(u_right, u_left);
 912                if (s < 0) {
 913                   /* l < c, c > r, l < r - swap c & r to get ordered */
 914                   qsort_swap(pc_left, u_left);
 915                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 916                } else if (s == 0) {
 917                   /* l < c, c > r, l == r - swap c&r, grow pc */
 918                   qsort_swap(pc_left, u_left);
 919                   --pc_left;
 920                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 921                } else {
 922                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 923                   qsort_rotate(pc_left, u_right, u_left);
 924                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 925                }
 926             }
 927          } else if (s == 0) {
 928             /* l == c */
 929             s = qsort_cmp(pc_left, u_left);
 930             if (s < 0) {
 931                /* l == c, c < r - already in order, grow pc */
 932                --pc_left;
 933                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 934             } else if (s == 0) {
 935                /* l == c, c == r - already in order, grow pc both ways */
 936                --pc_left;
 937                ++pc_right;
 938                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 939             } else {
 940                /* l == c, c > r - swap l & r, grow pc */
 941                qsort_swap(u_right, u_left);
 942                ++pc_right;
 943                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 944             }
 945          } else {
 946             /* l > c */
 947             s = qsort_cmp(pc_left, u_left);
 948             if (s < 0) {
 949                /* l > c, c < r - need to know more */
 950                s = qsort_cmp(u_right, u_left);
 951                if (s < 0) {
 952                   /* l > c, c < r, l < r - swap l & c to get ordered */
 953                   qsort_swap(u_right, pc_left);
 954                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 955                } else if (s == 0) {
 956                   /* l > c, c < r, l == r - swap l & c, grow pc */
 957                   qsort_swap(u_right, pc_left);
 958                   ++pc_right;
 959                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 960                } else {
 961                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 962                   qsort_rotate(u_right, pc_left, u_left);
 963                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 964                }
 965             } else if (s == 0) {
 966                /* l > c, c == r - swap ends, grow pc */
 967                qsort_swap(u_right, u_left);
 968                --pc_left;
 969                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 970             } else {
 971                /* l > c, c > r - swap ends to get in order */
 972                qsort_swap(u_right, u_left);
 973                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 974             }
 975          }
 976          /* We now know the 3 middle elements have been compared and
 977             arranged in the desired order, so we can shrink the uncompared
 978             sets on both sides
 979          */
 980          --u_right;
 981          ++u_left;
 982          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 983
 984          /* The above massive nested if was the simple part :-). We now have
 985             the middle 3 elements ordered and we need to scan through the
 986             uncompared sets on either side, swapping elements that are on
 987             the wrong side or simply shuffling equal elements around to get
 988             all equal elements into the pivot chunk.
 989          */
 990
 991          for ( ; ; ) {
 992             int still_work_on_left;
 993             int still_work_on_right;
 994
 995             /* Scan the uncompared values on the left. If I find a value
 996                equal to the pivot value, move it over so it is adjacent to
 997                the pivot chunk and expand the pivot chunk. If I find a value
 998                less than the pivot value, then just leave it - its already
 999                on the correct side of the partition. If I find a greater
1000                value, then stop the scan.
1001             */
1002             while ((still_work_on_left = (u_right >= part_left))) {
1003                s = qsort_cmp(u_right, pc_left);
1004                if (s < 0) {
1005                   --u_right;
1006                } else if (s == 0) {
1007                   --pc_left;
1008                   if (pc_left != u_right) {
1009                      qsort_swap(u_right, pc_left);
1010                   }
1011                   --u_right;
1012                } else {
1013                   break;
1014                }
1015                qsort_assert(u_right < pc_left);
1016                qsort_assert(pc_left <= pc_right);
1017                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1018                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1019             }
1020
1021             /* Do a mirror image scan of uncompared values on the right
1022             */
1023             while ((still_work_on_right = (u_left <= part_right))) {
1024                s = qsort_cmp(pc_right, u_left);
1025                if (s < 0) {
1026                   ++u_left;
1027                } else if (s == 0) {
1028                   ++pc_right;
1029                   if (pc_right != u_left) {
1030                      qsort_swap(pc_right, u_left);
1031                   }
1032                   ++u_left;
1033                } else {
1034                   break;
1035                }
1036                qsort_assert(u_left > pc_right);
1037                qsort_assert(pc_left <= pc_right);
1038                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1039                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1040             }
1041
1042             if (still_work_on_left) {
1043                /* I know I have a value on the left side which needs to be
1044                   on the right side, but I need to know more to decide
1045                   exactly the best thing to do with it.
1046                */
1047                if (still_work_on_right) {
1048                   /* I know I have values on both side which are out of
1049                      position. This is a big win because I kill two birds
1050                      with one swap (so to speak). I can advance the
1051                      uncompared pointers on both sides after swapping both
1052                      of them into the right place.
1053                   */
1054                   qsort_swap(u_right, u_left);
1055                   --u_right;
1056                   ++u_left;
1057                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1058                } else {
1059                   /* I have an out of position value on the left, but the
1060                      right is fully scanned, so I "slide" the pivot chunk
1061                      and any less-than values left one to make room for the
1062                      greater value over on the right. If the out of position
1063                      value is immediately adjacent to the pivot chunk (there
1064                      are no less-than values), I can do that with a swap,
1065                      otherwise, I have to rotate one of the less than values
1066                      into the former position of the out of position value
1067                      and the right end of the pivot chunk into the left end
1068                      (got all that?).
1069                   */
1070                   --pc_left;
1071                   if (pc_left == u_right) {
1072                      qsort_swap(u_right, pc_right);
1073                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1074                   } else {
1075                      qsort_rotate(u_right, pc_left, pc_right);
1076                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1077                   }
1078                   --pc_right;
1079                   --u_right;
1080                }
1081             } else if (still_work_on_right) {
1082                /* Mirror image of complex case above: I have an out of
1083                   position value on the right, but the left is fully
1084                   scanned, so I need to shuffle things around to make room
1085                   for the right value on the left.
1086                */
1087                ++pc_right;
1088                if (pc_right == u_left) {
1089                   qsort_swap(u_left, pc_left);
1090                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1091                } else {
1092                   qsort_rotate(pc_right, pc_left, u_left);
1093                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1094                }
1095                ++pc_left;
1096                ++u_left;
1097             } else {
1098                /* No more scanning required on either side of partition,
1099                   break out of loop and figure out next set of partitions
1100                */
1101                break;
1102             }
1103          }
1104
1105          /* The elements in the pivot chunk are now in the right place. They
1106             will never move or be compared again. All I have to do is decide
1107             what to do with the stuff to the left and right of the pivot
1108             chunk.
1109
1110             Notes on the QSORT_ORDER_GUESS ifdef code:
1111
1112             1. If I just built these partitions without swapping any (or
1113                very many) elements, there is a chance that the elements are
1114                already ordered properly (being properly ordered will
1115                certainly result in no swapping, but the converse can't be
1116                proved :-).
1117
1118             2. A (properly written) insertion sort will run faster on
1119                already ordered data than qsort will.
1120
1121             3. Perhaps there is some way to make a good guess about
1122                switching to an insertion sort earlier than partition size 6
1123                (for instance - we could save the partition size on the stack
1124                and increase the size each time we find we didn't swap, thus
1125                switching to insertion sort earlier for partitions with a
1126                history of not swapping).
1127
1128             4. Naturally, if I just switch right away, it will make
1129                artificial benchmarks with pure ascending (or descending)
1130                data look really good, but is that a good reason in general?
1131                Hard to say...
1132          */
1133
1134 #ifdef QSORT_ORDER_GUESS
1135          if (swapped < 3) {
1136 #if QSORT_ORDER_GUESS == 1
1137             qsort_break_even = (part_right - part_left) + 1;
1138 #endif
1139 #if QSORT_ORDER_GUESS == 2
1140             qsort_break_even *= 2;
1141 #endif
1142 #if QSORT_ORDER_GUESS == 3
1143             const int prev_break = qsort_break_even;
1144             qsort_break_even *= qsort_break_even;
1145             if (qsort_break_even < prev_break) {
1146                qsort_break_even = (part_right - part_left) + 1;
1147             }
1148 #endif
1149          } else {
1150             qsort_break_even = QSORT_BREAK_EVEN;
1151          }
1152 #endif
1153
1154          if (part_left < pc_left) {
1155             /* There are elements on the left which need more processing.
1156                Check the right as well before deciding what to do.
1157             */
1158             if (pc_right < part_right) {
1159                /* We have two partitions to be sorted. Stack the biggest one
1160                   and process the smallest one on the next iteration. This
1161                   minimizes the stack height by insuring that any additional
1162                   stack entries must come from the smallest partition which
1163                   (because it is smallest) will have the fewest
1164                   opportunities to generate additional stack entries.
1165                */
1166                if ((part_right - pc_right) > (pc_left - part_left)) {
1167                   /* stack the right partition, process the left */
1168                   partition_stack[next_stack_entry].left = pc_right + 1;
1169                   partition_stack[next_stack_entry].right = part_right;
1170 #ifdef QSORT_ORDER_GUESS
1171                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1172 #endif
1173                   part_right = pc_left - 1;
1174                } else {
1175                   /* stack the left partition, process the right */
1176                   partition_stack[next_stack_entry].left = part_left;
1177                   partition_stack[next_stack_entry].right = pc_left - 1;
1178 #ifdef QSORT_ORDER_GUESS
1179                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1180 #endif
1181                   part_left = pc_right + 1;
1182                }
1183                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1184                ++next_stack_entry;
1185             } else {
1186                /* The elements on the left are the only remaining elements
1187                   that need sorting, arrange for them to be processed as the
1188                   next partition.
1189                */
1190                part_right = pc_left - 1;
1191             }
1192          } else if (pc_right < part_right) {
1193             /* There is only one chunk on the right to be sorted, make it
1194                the new partition and loop back around.
1195             */
1196             part_left = pc_right + 1;
1197          } else {
1198             /* This whole partition wound up in the pivot chunk, so
1199                we need to get a new partition off the stack.
1200             */
1201             if (next_stack_entry == 0) {
1202                /* the stack is empty - we are done */
1203                break;
1204             }
1205             --next_stack_entry;
1206             part_left = partition_stack[next_stack_entry].left;
1207             part_right = partition_stack[next_stack_entry].right;
1208 #ifdef QSORT_ORDER_GUESS
1209             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1210 #endif
1211          }
1212       } else {
1213          /* This partition is too small to fool with qsort complexity, just
1214             do an ordinary insertion sort to minimize overhead.
1215          */
1216          int i;
1217          /* Assume 1st element is in right place already, and start checking
1218             at 2nd element to see where it should be inserted.
1219          */
1220          for (i = part_left + 1; i <= part_right; ++i) {
1221             int j;
1222             /* Scan (backwards - just in case 'i' is already in right place)
1223                through the elements already sorted to see if the ith element
1224                belongs ahead of one of them.
1225             */
1226             for (j = i - 1; j >= part_left; --j) {
1227                if (qsort_cmp(i, j) >= 0) {
1228                   /* i belongs right after j
1229                   */
1230                   break;
1231                }
1232             }
1233             ++j;
1234             if (j != i) {
1235                /* Looks like we really need to move some things
1236                */
1237                int k;
1238                temp = array[i];
1239                for (k = i - 1; k >= j; --k)
1240                   array[k + 1] = array[k];
1241                array[j] = temp;
1242             }
1243          }
1244
1245          /* That partition is now sorted, grab the next one, or get out
1246             of the loop if there aren't any more.
1247          */
1248
1249          if (next_stack_entry == 0) {
1250             /* the stack is empty - we are done */
1251             break;
1252          }
1253          --next_stack_entry;
1254          part_left = partition_stack[next_stack_entry].left;
1255          part_right = partition_stack[next_stack_entry].right;
1256 #ifdef QSORT_ORDER_GUESS
1257          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1258 #endif
1259       }
1260    }
1261
1262    /* Believe it or not, the array is sorted at this point! */
1263 }
1264
1265 /* Stabilize what is, presumably, an otherwise unstable sort method.
1266  * We do that by allocating (or having on hand) an array of pointers
1267  * that is the same size as the original array of elements to be sorted.
1268  * We initialize this parallel array with the addresses of the original
1269  * array elements.  This indirection can make you crazy.
1270  * Some pictures can help.  After initializing, we have
1271  *
1272  *  indir                  list1
1273  * +----+                 +----+
1274  * |    | --------------> |    | ------> first element to be sorted
1275  * +----+                 +----+
1276  * |    | --------------> |    | ------> second element to be sorted
1277  * +----+                 +----+
1278  * |    | --------------> |    | ------> third element to be sorted
1279  * +----+                 +----+
1280  *  ...
1281  * +----+                 +----+
1282  * |    | --------------> |    | ------> n-1st element to be sorted
1283  * +----+                 +----+
1284  * |    | --------------> |    | ------> n-th element to be sorted
1285  * +----+                 +----+
1286  *
1287  * During the sort phase, we leave the elements of list1 where they are,
1288  * and sort the pointers in the indirect array in the same order determined
1289  * by the original comparison routine on the elements pointed to.
1290  * Because we don't move the elements of list1 around through
1291  * this phase, we can break ties on elements that compare equal
1292  * using their address in the list1 array, ensuring stabilty.
1293  * This leaves us with something looking like
1294  *
1295  *  indir                  list1
1296  * +----+                 +----+
1297  * |    | --+       +---> |    | ------> first element to be sorted
1298  * +----+   |       |     +----+
1299  * |    | --|-------|---> |    | ------> second element to be sorted
1300  * +----+   |       |     +----+
1301  * |    | --|-------+ +-> |    | ------> third element to be sorted
1302  * +----+   |         |   +----+
1303  *  ...
1304  * +----+    | |   | |    +----+
1305  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1306  * +----+    |     |      +----+
1307  * |    | ---+     +----> |    | ------> n-th element to be sorted
1308  * +----+                 +----+
1309  *
1310  * where the i-th element of the indirect array points to the element
1311  * that should be i-th in the sorted array.  After the sort phase,
1312  * we have to put the elements of list1 into the places
1313  * dictated by the indirect array.
1314  */
1315
1316
1317 static I32
1318 cmpindir(pTHX_ gptr const a, gptr const b)
1319 {
1320     dVAR;
1321     gptr * const ap = (gptr *)a;
1322     gptr * const bp = (gptr *)b;
1323     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1324
1325     if (sense)
1326         return sense;
1327     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1328 }
1329
1330 static I32
1331 cmpindir_desc(pTHX_ gptr const a, gptr const b)
1332 {
1333     dVAR;
1334     gptr * const ap = (gptr *)a;
1335     gptr * const bp = (gptr *)b;
1336     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1337
1338     /* Reverse the default */
1339     if (sense)
1340         return -sense;
1341     /* But don't reverse the stability test.  */
1342     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1343
1344 }
1345
1346 STATIC void
1347 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1348 {
1349     dVAR;
1350     if ((flags & SORTf_STABLE) != 0) {
1351          register gptr **pp, *q;
1352          register size_t n, j, i;
1353          gptr *small[SMALLSORT], **indir, tmp;
1354          SVCOMPARE_t savecmp;
1355          if (nmemb <= 1) return;     /* sorted trivially */
1356
1357          /* Small arrays can use the stack, big ones must be allocated */
1358          if (nmemb <= SMALLSORT) indir = small;
1359          else { Newx(indir, nmemb, gptr *); }
1360
1361          /* Copy pointers to original array elements into indirect array */
1362          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1363
1364          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1365          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1366
1367          /* sort, with indirection */
1368          if (flags & SORTf_DESC)
1369             qsortsvu((gptr *)indir, nmemb, cmpindir_desc);
1370         else
1371             qsortsvu((gptr *)indir, nmemb, cmpindir);
1372
1373          pp = indir;
1374          q = list1;
1375          for (n = nmemb; n--; ) {
1376               /* Assert A: all elements of q with index > n are already
1377                * in place.  This is vacuosly true at the start, and we
1378                * put element n where it belongs below (if it wasn't
1379                * already where it belonged). Assert B: we only move
1380                * elements that aren't where they belong,
1381                * so, by A, we never tamper with elements above n.
1382                */
1383               j = pp[n] - q;            /* This sets j so that q[j] is
1384                                          * at pp[n].  *pp[j] belongs in
1385                                          * q[j], by construction.
1386                                          */
1387               if (n != j) {             /* all's well if n == j */
1388                    tmp = q[j];          /* save what's in q[j] */
1389                    do {
1390                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1391                         i = pp[j] - q;  /* the index in q of the element
1392                                          * just moved */
1393                         pp[j] = q + j;  /* this is ok now */
1394                    } while ((j = i) != n);
1395                    /* There are only finitely many (nmemb) addresses
1396                     * in the pp array.
1397                     * So we must eventually revisit an index we saw before.
1398                     * Suppose the first revisited index is k != n.
1399                     * An index is visited because something else belongs there.
1400                     * If we visit k twice, then two different elements must
1401                     * belong in the same place, which cannot be.
1402                     * So j must get back to n, the loop terminates,
1403                     * and we put the saved element where it belongs.
1404                     */
1405                    q[n] = tmp;          /* put what belongs into
1406                                          * the n-th element */
1407               }
1408          }
1409
1410         /* free iff allocated */
1411          if (indir != small) { Safefree(indir); }
1412          /* restore prevailing comparison routine */
1413          PL_sort_RealCmp = savecmp;
1414     } else if ((flags & SORTf_DESC) != 0) {
1415          const SVCOMPARE_t savecmp = PL_sort_RealCmp;   /* Save current comparison routine, if any */
1416          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1417          cmp = cmp_desc;
1418          qsortsvu(list1, nmemb, cmp);
1419          /* restore prevailing comparison routine */
1420          PL_sort_RealCmp = savecmp;
1421     } else {
1422          qsortsvu(list1, nmemb, cmp);
1423     }
1424 }
1425
1426 /*
1427 =head1 Array Manipulation Functions
1428
1429 =for apidoc sortsv
1430
1431 Sort an array. Here is an example:
1432
1433     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1434
1435 Currently this always uses mergesort. See sortsv_flags for a more
1436 flexible routine.
1437
1438 =cut
1439 */
1440
1441 void
1442 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1443 {
1444     PERL_ARGS_ASSERT_SORTSV;
1445
1446     sortsv_flags(array, nmemb, cmp, 0);
1447 }
1448
1449 /*
1450 =for apidoc sortsv_flags
1451
1452 Sort an array, with various options.
1453
1454 =cut
1455 */
1456 void
1457 Perl_sortsv_flags(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1458 {
1459     PERL_ARGS_ASSERT_SORTSV_FLAGS;
1460
1461     if (flags & SORTf_QSORT)
1462         S_qsortsv(aTHX_ array, nmemb, cmp, flags);
1463     else
1464         S_mergesortsv(aTHX_ array, nmemb, cmp, flags);
1465 }
1466
1467 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1468 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1469 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1470
1471 PP(pp_sort)
1472 {
1473     dVAR; dSP; dMARK; dORIGMARK;
1474     register SV **p1 = ORIGMARK+1, **p2;
1475     register I32 max, i;
1476     AV* av = NULL;
1477     HV *stash;
1478     GV *gv;
1479     CV *cv = NULL;
1480     I32 gimme = GIMME;
1481     OP* const nextop = PL_op->op_next;
1482     I32 overloading = 0;
1483     bool hasargs = FALSE;
1484     I32 is_xsub = 0;
1485     I32 sorting_av = 0;
1486     const U8 priv = PL_op->op_private;
1487     const U8 flags = PL_op->op_flags;
1488     U32 sort_flags = 0;
1489     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1490       = Perl_sortsv_flags;
1491     I32 all_SIVs = 1;
1492
1493     if ((priv & OPpSORT_DESCEND) != 0)
1494         sort_flags |= SORTf_DESC;
1495     if ((priv & OPpSORT_QSORT) != 0)
1496         sort_flags |= SORTf_QSORT;
1497     if ((priv & OPpSORT_STABLE) != 0)
1498         sort_flags |= SORTf_STABLE;
1499
1500     if (gimme != G_ARRAY) {
1501         SP = MARK;
1502         EXTEND(SP,1);
1503         RETPUSHUNDEF;
1504     }
1505
1506     ENTER;
1507     SAVEVPTR(PL_sortcop);
1508     if (flags & OPf_STACKED) {
1509         if (flags & OPf_SPECIAL) {
1510             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1511             kid = kUNOP->op_first;                      /* pass rv2gv */
1512             kid = kUNOP->op_first;                      /* pass leave */
1513             PL_sortcop = kid->op_next;
1514             stash = CopSTASH(PL_curcop);
1515         }
1516         else {
1517             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1518             if (cv && SvPOK(cv)) {
1519                 const char * const proto = SvPV_nolen_const((SV*)cv);
1520                 if (proto && strEQ(proto, "$$")) {
1521                     hasargs = TRUE;
1522                 }
1523             }
1524             if (!(cv && CvROOT(cv))) {
1525                 if (cv && CvISXSUB(cv)) {
1526                     is_xsub = 1;
1527                 }
1528                 else if (gv) {
1529                     SV *tmpstr = sv_newmortal();
1530                     gv_efullname3(tmpstr, gv, NULL);
1531                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1532                         SVfARG(tmpstr));
1533                 }
1534                 else {
1535                     DIE(aTHX_ "Undefined subroutine in sort");
1536                 }
1537             }
1538
1539             if (is_xsub)
1540                 PL_sortcop = (OP*)cv;
1541             else
1542                 PL_sortcop = CvSTART(cv);
1543         }
1544     }
1545     else {
1546         PL_sortcop = NULL;
1547         stash = CopSTASH(PL_curcop);
1548     }
1549
1550     /* optimiser converts "@a = sort @a" to "sort \@a";
1551      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1552      * result back to @a at the end of this function */
1553     if (priv & OPpSORT_INPLACE) {
1554         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1555         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1556         av = (AV*)(*SP);
1557         max = AvFILL(av) + 1;
1558         if (SvMAGICAL(av)) {
1559             MEXTEND(SP, max);
1560             for (i=0; i < max; i++) {
1561                 SV **svp = av_fetch(av, i, FALSE);
1562                 *SP++ = (svp) ? *svp : NULL;
1563             }
1564             SP--;
1565             p1 = p2 = SP - (max-1);
1566         }
1567         else {
1568             if (SvREADONLY(av))
1569                 Perl_croak(aTHX_ PL_no_modify);
1570             else
1571                 SvREADONLY_on(av);
1572             p1 = p2 = AvARRAY(av);
1573             sorting_av = 1;
1574         }
1575     }
1576     else {
1577         p2 = MARK+1;
1578         max = SP - MARK;
1579    }
1580
1581     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1582      * any nulls; also stringify or converting to integer or number as
1583      * required any args */
1584     for (i=max; i > 0 ; i--) {
1585         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1586             SvTEMP_off(*p1);
1587             if (!PL_sortcop) {
1588                 if (priv & OPpSORT_NUMERIC) {
1589                     if (priv & OPpSORT_INTEGER) {
1590                         if (!SvIOK(*p1)) {
1591                             if (SvAMAGIC(*p1))
1592                                 overloading = 1;
1593                             else
1594                                 (void)sv_2iv(*p1);
1595                         }
1596                     }
1597                     else {
1598                         if (!SvNSIOK(*p1)) {
1599                             if (SvAMAGIC(*p1))
1600                                 overloading = 1;
1601                             else
1602                                 (void)sv_2nv(*p1);
1603                         }
1604                         if (all_SIVs && !SvSIOK(*p1))
1605                             all_SIVs = 0;
1606                     }
1607                 }
1608                 else {
1609                     if (!SvPOK(*p1)) {
1610                         if (SvAMAGIC(*p1))
1611                             overloading = 1;
1612                         else
1613                             (void)sv_2pv_flags(*p1, 0,
1614                                                SV_GMAGIC|SV_CONST_RETURN);
1615                     }
1616                 }
1617             }
1618             p1++;
1619         }
1620         else
1621             max--;
1622     }
1623     if (sorting_av)
1624         AvFILLp(av) = max-1;
1625
1626     if (max > 1) {
1627         SV **start;
1628         if (PL_sortcop) {
1629             PERL_CONTEXT *cx;
1630             SV** newsp;
1631             const bool oldcatch = CATCH_GET;
1632
1633             SAVETMPS;
1634             SAVEOP();
1635
1636             CATCH_SET(TRUE);
1637             PUSHSTACKi(PERLSI_SORT);
1638             if (!hasargs && !is_xsub) {
1639                 SAVESPTR(PL_firstgv);
1640                 SAVESPTR(PL_secondgv);
1641                 SAVESPTR(PL_sortstash);
1642                 PL_firstgv = gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV);
1643                 PL_secondgv = gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV);
1644                 PL_sortstash = stash;
1645                 SAVESPTR(GvSV(PL_firstgv));
1646                 SAVESPTR(GvSV(PL_secondgv));
1647             }
1648
1649             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1650             if (!(flags & OPf_SPECIAL)) {
1651                 cx->cx_type = CXt_SUB;
1652                 cx->blk_gimme = G_SCALAR;
1653                 PUSHSUB(cx);
1654                 if (!is_xsub) {
1655                     AV* const padlist = CvPADLIST(cv);
1656
1657                     if (++CvDEPTH(cv) >= 2) {
1658                         PERL_STACK_OVERFLOW_CHECK();
1659                         pad_push(padlist, CvDEPTH(cv));
1660                     }
1661                     SAVECOMPPAD();
1662                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1663
1664                     if (hasargs) {
1665                         /* This is mostly copied from pp_entersub */
1666                         AV * const av = (AV*)PAD_SVl(0);
1667
1668                         cx->blk_sub.savearray = GvAV(PL_defgv);
1669                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc_simple(av);
1670                         CX_CURPAD_SAVE(cx->blk_sub);
1671                         cx->blk_sub.argarray = av;
1672                     }
1673
1674                 }
1675             }
1676             cx->cx_type |= CXp_MULTICALL;
1677
1678             start = p1 - max;
1679             sortsvp(aTHX_ start, max,
1680                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1681                     sort_flags);
1682
1683             if (!(flags & OPf_SPECIAL)) {
1684                 LEAVESUB(cv);
1685                 if (!is_xsub)
1686                     CvDEPTH(cv)--;
1687             }
1688             POPBLOCK(cx,PL_curpm);
1689             PL_stack_sp = newsp;
1690             POPSTACK;
1691             CATCH_SET(oldcatch);
1692         }
1693         else {
1694             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1695             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1696             sortsvp(aTHX_ start, max,
1697                     (priv & OPpSORT_NUMERIC)
1698                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1699                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1700                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1701                         : ( IN_LOCALE_RUNTIME
1702                             ? ( overloading
1703                                 ? (SVCOMPARE_t)S_amagic_cmp_locale
1704                                 : (SVCOMPARE_t)sv_cmp_locale_static)
1705                             : ( overloading ? (SVCOMPARE_t)S_amagic_cmp : (SVCOMPARE_t)sv_cmp_static)),
1706                     sort_flags);
1707         }
1708         if ((priv & OPpSORT_REVERSE) != 0) {
1709             SV **q = start+max-1;
1710             while (start < q) {
1711                 SV * const tmp = *start;
1712                 *start++ = *q;
1713                 *q-- = tmp;
1714             }
1715         }
1716     }
1717     if (sorting_av)
1718         SvREADONLY_off(av);
1719     else if (av && !sorting_av) {
1720         /* simulate pp_aassign of tied AV */
1721         SV** const base = MARK+1;
1722         for (i=0; i < max; i++) {
1723             base[i] = newSVsv(base[i]);
1724         }
1725         av_clear(av);
1726         av_extend(av, max);
1727         for (i=0; i < max; i++) {
1728             SV * const sv = base[i];
1729             SV ** const didstore = av_store(av, i, sv);
1730             if (SvSMAGICAL(sv))
1731                 mg_set(sv);
1732             if (!didstore)
1733                 sv_2mortal(sv);
1734         }
1735     }
1736     LEAVE;
1737     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1738     return nextop;
1739 }
1740
1741 static I32
1742 S_sortcv(pTHX_ SV *const a, SV *const b)
1743 {
1744     dVAR;
1745     const I32 oldsaveix = PL_savestack_ix;
1746     const I32 oldscopeix = PL_scopestack_ix;
1747     I32 result;
1748
1749     PERL_ARGS_ASSERT_SORTCV;
1750
1751     GvSV(PL_firstgv) = a;
1752     GvSV(PL_secondgv) = b;
1753     PL_stack_sp = PL_stack_base;
1754     PL_op = PL_sortcop;
1755     CALLRUNOPS(aTHX);
1756     if (PL_stack_sp != PL_stack_base + 1)
1757         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1758     if (!SvNIOKp(*PL_stack_sp))
1759         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1760     result = SvIV(*PL_stack_sp);
1761     while (PL_scopestack_ix > oldscopeix) {
1762         LEAVE;
1763     }
1764     leave_scope(oldsaveix);
1765     return result;
1766 }
1767
1768 static I32
1769 S_sortcv_stacked(pTHX_ SV *const a, SV *const b)
1770 {
1771     dVAR;
1772     const I32 oldsaveix = PL_savestack_ix;
1773     const I32 oldscopeix = PL_scopestack_ix;
1774     I32 result;
1775     AV * const av = GvAV(PL_defgv);
1776
1777     PERL_ARGS_ASSERT_SORTCV_STACKED;
1778
1779     if (AvMAX(av) < 1) {
1780         SV** ary = AvALLOC(av);
1781         if (AvARRAY(av) != ary) {
1782             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1783             AvARRAY(av) = ary;
1784         }
1785         if (AvMAX(av) < 1) {
1786             AvMAX(av) = 1;
1787             Renew(ary,2,SV*);
1788             AvARRAY(av) = ary;
1789         }
1790     }
1791     AvFILLp(av) = 1;
1792
1793     AvARRAY(av)[0] = a;
1794     AvARRAY(av)[1] = b;
1795     PL_stack_sp = PL_stack_base;
1796     PL_op = PL_sortcop;
1797     CALLRUNOPS(aTHX);
1798     if (PL_stack_sp != PL_stack_base + 1)
1799         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1800     if (!SvNIOKp(*PL_stack_sp))
1801         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1802     result = SvIV(*PL_stack_sp);
1803     while (PL_scopestack_ix > oldscopeix) {
1804         LEAVE;
1805     }
1806     leave_scope(oldsaveix);
1807     return result;
1808 }
1809
1810 static I32
1811 S_sortcv_xsub(pTHX_ SV *const a, SV *const b)
1812 {
1813     dVAR; dSP;
1814     const I32 oldsaveix = PL_savestack_ix;
1815     const I32 oldscopeix = PL_scopestack_ix;
1816     CV * const cv=(CV*)PL_sortcop;
1817     I32 result;
1818
1819     PERL_ARGS_ASSERT_SORTCV_XSUB;
1820
1821     SP = PL_stack_base;
1822     PUSHMARK(SP);
1823     EXTEND(SP, 2);
1824     *++SP = a;
1825     *++SP = b;
1826     PUTBACK;
1827     (void)(*CvXSUB(cv))(aTHX_ cv);
1828     if (PL_stack_sp != PL_stack_base + 1)
1829         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1830     if (!SvNIOKp(*PL_stack_sp))
1831         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1832     result = SvIV(*PL_stack_sp);
1833     while (PL_scopestack_ix > oldscopeix) {
1834         LEAVE;
1835     }
1836     leave_scope(oldsaveix);
1837     return result;
1838 }
1839
1840
1841 static I32
1842 S_sv_ncmp(pTHX_ SV *const a, SV *const b)
1843 {
1844     const NV nv1 = SvNSIV(a);
1845     const NV nv2 = SvNSIV(b);
1846
1847     PERL_ARGS_ASSERT_SV_NCMP;
1848
1849     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1850 }
1851
1852 static I32
1853 S_sv_i_ncmp(pTHX_ SV *const a, SV *const b)
1854 {
1855     const IV iv1 = SvIV(a);
1856     const IV iv2 = SvIV(b);
1857
1858     PERL_ARGS_ASSERT_SV_I_NCMP;
1859
1860     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1861 }
1862
1863 #define tryCALL_AMAGICbin(left,right,meth) \
1864     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1865         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1866         : NULL;
1867
1868 #define SORT_NORMAL_RETURN_VALUE(val)  (((val) > 0) ? 1 : ((val) ? -1 : 0))
1869
1870 static I32
1871 S_amagic_ncmp(pTHX_ register SV *const a, register SV *const b)
1872 {
1873     dVAR;
1874     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1875
1876     PERL_ARGS_ASSERT_AMAGIC_NCMP;
1877
1878     if (tmpsv) {
1879         if (SvIOK(tmpsv)) {
1880             const I32 i = SvIVX(tmpsv);
1881             return SORT_NORMAL_RETURN_VALUE(i);
1882         }
1883         else {
1884             const NV d = SvNV(tmpsv);
1885             return SORT_NORMAL_RETURN_VALUE(d);
1886         }
1887      }
1888      return S_sv_ncmp(aTHX_ a, b);
1889 }
1890
1891 static I32
1892 S_amagic_i_ncmp(pTHX_ register SV *const a, register SV *const b)
1893 {
1894     dVAR;
1895     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1896
1897     PERL_ARGS_ASSERT_AMAGIC_I_NCMP;
1898
1899     if (tmpsv) {
1900         if (SvIOK(tmpsv)) {
1901             const I32 i = SvIVX(tmpsv);
1902             return SORT_NORMAL_RETURN_VALUE(i);
1903         }
1904         else {
1905             const NV d = SvNV(tmpsv);
1906             return SORT_NORMAL_RETURN_VALUE(d);
1907         }
1908     }
1909     return S_sv_i_ncmp(aTHX_ a, b);
1910 }
1911
1912 static I32
1913 S_amagic_cmp(pTHX_ register SV *const str1, register SV *const str2)
1914 {
1915     dVAR;
1916     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1917
1918     PERL_ARGS_ASSERT_AMAGIC_CMP;
1919
1920     if (tmpsv) {
1921         if (SvIOK(tmpsv)) {
1922             const I32 i = SvIVX(tmpsv);
1923             return SORT_NORMAL_RETURN_VALUE(i);
1924         }
1925         else {
1926             const NV d = SvNV(tmpsv);
1927             return SORT_NORMAL_RETURN_VALUE(d);
1928         }
1929     }
1930     return sv_cmp(str1, str2);
1931 }
1932
1933 static I32
1934 S_amagic_cmp_locale(pTHX_ register SV *const str1, register SV *const str2)
1935 {
1936     dVAR;
1937     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1938
1939     PERL_ARGS_ASSERT_AMAGIC_CMP_LOCALE;
1940
1941     if (tmpsv) {
1942         if (SvIOK(tmpsv)) {
1943             const I32 i = SvIVX(tmpsv);
1944             return SORT_NORMAL_RETURN_VALUE(i);
1945         }
1946         else {
1947             const NV d = SvNV(tmpsv);
1948             return SORT_NORMAL_RETURN_VALUE(d);
1949         }
1950     }
1951     return sv_cmp_locale(str1, str2);
1952 }
1953
1954 /*
1955  * Local variables:
1956  * c-indentation-style: bsd
1957  * c-basic-offset: 4
1958  * indent-tabs-mode: t
1959  * End:
1960  *
1961  * ex: set ts=8 sts=4 sw=4 noet:
1962  */