pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #ifndef SMALLSORT
  40 #define SMALLSORT (200)
  41 #endif
  42
  43 /* Flags for qsortsv and mergesortsv */
  44 #define SORTf_DESC   1
  45 #define SORTf_STABLE 2
  46 #define SORTf_QSORT  4
  47
  48 /*
  49  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  50  *
  51  * The original code was written in conjunction with BSD Computer Software
  52  * Research Group at University of California, Berkeley.
  53  *
  54  * See also: "Optimistic Merge Sort" (SODA '92)
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxilliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     register gptr *b, *p, *q, *t, *p2;
 187     register gptr c, *last, *r;
 188     gptr *savep;
 189     IV runs = 0;
 190
 191     b = list1;
 192     last = PINDEX(b, nmemb);
 193     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 194     for (p2 = list2; b < last; ) {
 195         /* We just started, or just reversed sense.
 196         ** Set t at end of pairs with the prevailing sense.
 197         */
 198         for (p = b+2, t = p; ++p < last; t = ++p) {
 199             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 200         }
 201         q = b;
 202         /* Having laid out the playing field, look for long runs */
 203         do {
 204             p = r = b + (2 * PTHRESH);
 205             if (r >= t) p = r = t;      /* too short to care about */
 206             else {
 207                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 208                        ((p -= 2) > q));
 209                 if (p <= q) {
 210                     /* b through r is a (long) run.
 211                     ** Extend it as far as possible.
 212                     */
 213                     p = q = r;
 214                     while (((p += 2) < t) &&
 215                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 216                     r = p = q + 2;      /* no simple pairs, no after-run */
 217                 }
 218             }
 219             if (q > b) {                /* run of greater than 2 at b */
 220                 savep = p;
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense) while (b < --p) {
 229                     c = *b;
 230                     *b++ = *p;
 231                     *p = c;
 232                 }
 233                 p = savep;
 234             }
 235             while (q < p) {             /* simple pairs */
 236                 p2 = NEXT(p2) = p2 + 2; ++runs;
 237                 if (sense) {
 238                     c = *q++;
 239                     *(q-1) = *q;
 240                     *q++ = c;
 241                 } else q += 2;
 242             }
 243             if (((b = p) == t) && ((t+1) == last)) {
 244                 NEXT(p2) = p2 + 1; ++runs;
 245                 b++;
 246             }
 247             q = r;
 248         } while (b < t);
 249         sense = !sense;
 250     }
 251     return runs;
 252 }
 253
 254
 255 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 256  * qsort on many platforms, but slower than qsort, conspicuously so,
 257  * on others.  The most likely explanation was platform-specific
 258  * differences in cache sizes and relative speeds.
 259  *
 260  * The quicksort divide-and-conquer algorithm guarantees that, as the
 261  * problem is subdivided into smaller and smaller parts, the parts
 262  * fit into smaller (and faster) caches.  So it doesn't matter how
 263  * many levels of cache exist, quicksort will "find" them, and,
 264  * as long as smaller is faster, take advantage of them.
 265  *
 266  * By contrast, consider how the original mergesort algorithm worked.
 267  * Suppose we have five runs (each typically of length 2 after dynprep).
 268  *
 269  * pass               base                        aux
 270  *  0              1 2 3 4 5
 271  *  1                                           12 34 5
 272  *  2                1234 5
 273  *  3                                            12345
 274  *  4                 12345
 275  *
 276  * Adjacent pairs are merged in "grand sweeps" through the input.
 277  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 278  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 279  * The only cache that matters is one large enough to hold *all* the input.
 280  * On some platforms, this may be many times slower than smaller caches.
 281  *
 282  * The following pseudo-code uses the same basic merge algorithm,
 283  * but in a divide-and-conquer way.
 284  *
 285  * # merge $runs runs at offset $offset of list $list1 into $list2.
 286  * # all unmerged runs ($runs == 1) originate in list $base.
 287  * sub mgsort2 {
 288  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 289  *
 290  *     if ($runs == 1) {
 291  *         if ($list1 is $base) copy run to $list2
 292  *         return offset of end of list (or copy)
 293  *     } else {
 294  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 295  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 296  *         merge the adjacent runs at $offset of $list1 into $list2
 297  *         return the offset of the end of the merged runs
 298  *     }
 299  * }
 300  * mgsort2(0, $runs, $base, $aux, $base);
 301  *
 302  * For our 5 runs, the tree of calls looks like
 303  *
 304  *           5
 305  *      3        2
 306  *   2     1   1   1
 307  * 1   1
 308  *
 309  * 1   2   3   4   5
 310  *
 311  * and the corresponding activity looks like
 312  *
 313  * copy runs 1 and 2 from base to aux
 314  * merge runs 1 and 2 from aux to base
 315  * (run 3 is where it belongs, no copy needed)
 316  * merge runs 12 and 3 from base to aux
 317  * (runs 4 and 5 are where they belong, no copy needed)
 318  * merge runs 4 and 5 from base to aux
 319  * merge runs 123 and 45 from aux to base
 320  *
 321  * Note that we merge runs 1 and 2 immediately after copying them,
 322  * while they are still likely to be in fast cache.  Similarly,
 323  * run 3 is merged with run 12 while it still may be lingering in cache.
 324  * This implementation should therefore enjoy much of the cache-friendly
 325  * behavior that quicksort does.  In addition, it does less copying
 326  * than the original mergesort implementation (only runs 1 and 2 are copied)
 327  * and the "balancing" of merges is better (merged runs comprise more nearly
 328  * equal numbers of original runs).
 329  *
 330  * The actual cache-friendly implementation will use a pseudo-stack
 331  * to avoid recursion, and will unroll processing of runs of length 2,
 332  * but it is otherwise similar to the recursive implementation.
 333  */
 334
 335 typedef struct {
 336     IV  offset;         /* offset of 1st of 2 runs at this level */
 337     IV  runs;           /* how many runs must be combined into 1 */
 338 } off_runs;             /* pseudo-stack element */
 339
 340
 341 static I32
 342 cmp_desc(pTHX_ gptr a, gptr b)
 343 {
 344     dVAR;
 345     return -PL_sort_RealCmp(aTHX_ a, b);
 346 }
 347
 348 STATIC void
 349 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 350 {
 351     dVAR;
 352     IV i, run, offset;
 353     I32 sense, level;
 354     register gptr *f1, *f2, *t, *b, *p;
 355     int iwhich;
 356     gptr *aux;
 357     gptr *p1;
 358     gptr small[SMALLSORT];
 359     gptr *which[3];
 360     off_runs stack[60], *stackp;
 361     SVCOMPARE_t savecmp = 0;
 362
 363     if (nmemb <= 1) return;                     /* sorted trivially */
 364
 365     if (flags) {
 366         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 367         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 368         cmp = cmp_desc;
 369     }
 370
 371     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 372     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 373     level = 0;
 374     stackp = stack;
 375     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 376     stackp->offset = offset = 0;
 377     which[0] = which[2] = base;
 378     which[1] = aux;
 379     for (;;) {
 380         /* On levels where both runs have be constructed (stackp->runs == 0),
 381          * merge them, and note the offset of their end, in case the offset
 382          * is needed at the next level up.  Hop up a level, and,
 383          * as long as stackp->runs is 0, keep merging.
 384          */
 385         IV runs = stackp->runs;
 386         if (runs == 0) {
 387             gptr *list1, *list2;
 388             iwhich = level & 1;
 389             list1 = which[iwhich];              /* area where runs are now */
 390             list2 = which[++iwhich];            /* area for merged runs */
 391             do {
 392                 register gptr *l1, *l2, *tp2;
 393                 offset = stackp->offset;
 394                 f1 = p1 = list1 + offset;               /* start of first run */
 395                 p = tp2 = list2 + offset;       /* where merged run will go */
 396                 t = NEXT(p);                    /* where first run ends */
 397                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 398                 t = NEXT(t);                    /* where second runs ends */
 399                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 400                 offset = PNELEM(list2, t);
 401                 while (f1 < l1 && f2 < l2) {
 402                     /* If head 1 is larger than head 2, find ALL the elements
 403                     ** in list 2 strictly less than head1, write them all,
 404                     ** then head 1.  Then compare the new heads, and repeat,
 405                     ** until one or both lists are exhausted.
 406                     **
 407                     ** In all comparisons (after establishing
 408                     ** which head to merge) the item to merge
 409                     ** (at pointer q) is the first operand of
 410                     ** the comparison.  When we want to know
 411                     ** if "q is strictly less than the other",
 412                     ** we can't just do
 413                     **    cmp(q, other) < 0
 414                     ** because stability demands that we treat equality
 415                     ** as high when q comes from l2, and as low when
 416                     ** q was from l1.  So we ask the question by doing
 417                     **    cmp(q, other) <= sense
 418                     ** and make sense == 0 when equality should look low,
 419                     ** and -1 when equality should look high.
 420                     */
 421
 422                     register gptr *q;
 423                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 424                         q = f2; b = f1; t = l1;
 425                         sense = -1;
 426                     } else {
 427                         q = f1; b = f2; t = l2;
 428                         sense = 0;
 429                     }
 430
 431
 432                     /* ramp up
 433                     **
 434                     ** Leave t at something strictly
 435                     ** greater than q (or at the end of the list),
 436                     ** and b at something strictly less than q.
 437                     */
 438                     for (i = 1, run = 0 ;;) {
 439                         if ((p = PINDEX(b, i)) >= t) {
 440                             /* off the end */
 441                             if (((p = PINDEX(t, -1)) > b) &&
 442                                 (cmp(aTHX_ *q, *p) <= sense))
 443                                  t = p;
 444                             else b = p;
 445                             break;
 446                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 447                             t = p;
 448                             break;
 449                         } else b = p;
 450                         if (++run >= RTHRESH) i += i;
 451                     }
 452
 453
 454                     /* q is known to follow b and must be inserted before t.
 455                     ** Increment b, so the range of possibilities is [b,t).
 456                     ** Round binary split down, to favor early appearance.
 457                     ** Adjust b and t until q belongs just before t.
 458                     */
 459
 460                     b++;
 461                     while (b < t) {
 462                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 463                         if (cmp(aTHX_ *q, *p) <= sense) {
 464                             t = p;
 465                         } else b = p + 1;
 466                     }
 467
 468
 469                     /* Copy all the strictly low elements */
 470
 471                     if (q == f1) {
 472                         FROMTOUPTO(f2, tp2, t);
 473                         *tp2++ = *f1++;
 474                     } else {
 475                         FROMTOUPTO(f1, tp2, t);
 476                         *tp2++ = *f2++;
 477                     }
 478                 }
 479
 480
 481                 /* Run out remaining list */
 482                 if (f1 == l1) {
 483                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 484                 } else              FROMTOUPTO(f1, tp2, l1);
 485                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 486
 487                 if (--level == 0) goto done;
 488                 --stackp;
 489                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 490             } while ((runs = stackp->runs) == 0);
 491         }
 492
 493
 494         stackp->runs = 0;               /* current run will finish level */
 495         /* While there are more than 2 runs remaining,
 496          * turn them into exactly 2 runs (at the "other" level),
 497          * each made up of approximately half the runs.
 498          * Stack the second half for later processing,
 499          * and set about producing the first half now.
 500          */
 501         while (runs > 2) {
 502             ++level;
 503             ++stackp;
 504             stackp->offset = offset;
 505             runs -= stackp->runs = runs / 2;
 506         }
 507         /* We must construct a single run from 1 or 2 runs.
 508          * All the original runs are in which[0] == base.
 509          * The run we construct must end up in which[level&1].
 510          */
 511         iwhich = level & 1;
 512         if (runs == 1) {
 513             /* Constructing a single run from a single run.
 514              * If it's where it belongs already, there's nothing to do.
 515              * Otherwise, copy it to where it belongs.
 516              * A run of 1 is either a singleton at level 0,
 517              * or the second half of a split 3.  In neither event
 518              * is it necessary to set offset.  It will be set by the merge
 519              * that immediately follows.
 520              */
 521             if (iwhich) {       /* Belongs in aux, currently in base */
 522                 f1 = b = PINDEX(base, offset);  /* where list starts */
 523                 f2 = PINDEX(aux, offset);       /* where list goes */
 524                 t = NEXT(f2);                   /* where list will end */
 525                 offset = PNELEM(aux, t);        /* offset thereof */
 526                 t = PINDEX(base, offset);       /* where it currently ends */
 527                 FROMTOUPTO(f1, f2, t);          /* copy */
 528                 NEXT(b) = t;                    /* set up parallel pointer */
 529             } else if (level == 0) goto done;   /* single run at level 0 */
 530         } else {
 531             /* Constructing a single run from two runs.
 532              * The merge code at the top will do that.
 533              * We need only make sure the two runs are in the "other" array,
 534              * so they'll end up in the correct array after the merge.
 535              */
 536             ++level;
 537             ++stackp;
 538             stackp->offset = offset;
 539             stackp->runs = 0;   /* take care of both runs, trigger merge */
 540             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 541                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 542                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 543                 t = NEXT(f2);                   /* where first run will end */
 544                 offset = PNELEM(aux, t);        /* offset thereof */
 545                 p = PINDEX(base, offset);       /* end of first run */
 546                 t = NEXT(t);                    /* where second run will end */
 547                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 548                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 549                 NEXT(b) = p;                    /* paralled pointer for 1st */
 550                 NEXT(p) = t;                    /* ... and for second */
 551             }
 552         }
 553     }
 554 done:
 555     if (aux != small) Safefree(aux);    /* free iff allocated */
 556     if (flags) {
 557          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 558     }
 559     return;
 560 }
 561
 562 /*
 563  * The quicksort implementation was derived from source code contributed
 564  * by Tom Horsley.
 565  *
 566  * NOTE: this code was derived from Tom Horsley's qsort replacement
 567  * and should not be confused with the original code.
 568  */
 569
 570 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 571
 572    Permission granted to distribute under the same terms as perl which are
 573    (briefly):
 574
 575     This program is free software; you can redistribute it and/or modify
 576     it under the terms of either:
 577
 578         a) the GNU General Public License as published by the Free
 579         Software Foundation; either version 1, or (at your option) any
 580         later version, or
 581
 582         b) the "Artistic License" which comes with this Kit.
 583
 584    Details on the perl license can be found in the perl source code which
 585    may be located via the www.perl.com web page.
 586
 587    This is the most wonderfulest possible qsort I can come up with (and
 588    still be mostly portable) My (limited) tests indicate it consistently
 589    does about 20% fewer calls to compare than does the qsort in the Visual
 590    C++ library, other vendors may vary.
 591
 592    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 593    others I invented myself (or more likely re-invented since they seemed
 594    pretty obvious once I watched the algorithm operate for a while).
 595
 596    Most of this code was written while watching the Marlins sweep the Giants
 597    in the 1997 National League Playoffs - no Braves fans allowed to use this
 598    code (just kidding :-).
 599
 600    I realize that if I wanted to be true to the perl tradition, the only
 601    comment in this file would be something like:
 602
 603    ...they shuffled back towards the rear of the line. 'No, not at the
 604    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 605
 606    However, I really needed to violate that tradition just so I could keep
 607    track of what happens myself, not to mention some poor fool trying to
 608    understand this years from now :-).
 609 */
 610
 611 /* ********************************************************** Configuration */
 612
 613 #ifndef QSORT_ORDER_GUESS
 614 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 615 #endif
 616
 617 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 618    future processing - a good max upper bound is log base 2 of memory size
 619    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 620    safely be smaller than that since the program is taking up some space and
 621    most operating systems only let you grab some subset of contiguous
 622    memory (not to mention that you are normally sorting data larger than
 623    1 byte element size :-).
 624 */
 625 #ifndef QSORT_MAX_STACK
 626 #define QSORT_MAX_STACK 32
 627 #endif
 628
 629 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 630    Anything bigger and we use qsort. If you make this too small, the qsort
 631    will probably break (or become less efficient), because it doesn't expect
 632    the middle element of a partition to be the same as the right or left -
 633    you have been warned).
 634 */
 635 #ifndef QSORT_BREAK_EVEN
 636 #define QSORT_BREAK_EVEN 6
 637 #endif
 638
 639 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 640    to go quadratic on.  We innoculate larger partitions against
 641    quadratic behavior by shuffling them before sorting.  This is not
 642    an absolute guarantee of non-quadratic behavior, but it would take
 643    staggeringly bad luck to pick extreme elements as the pivot
 644    from randomized data.
 645 */
 646 #ifndef QSORT_PLAY_SAFE
 647 #define QSORT_PLAY_SAFE 255
 648 #endif
 649
 650 /* ************************************************************* Data Types */
 651
 652 /* hold left and right index values of a partition waiting to be sorted (the
 653    partition includes both left and right - right is NOT one past the end or
 654    anything like that).
 655 */
 656 struct partition_stack_entry {
 657    int left;
 658    int right;
 659 #ifdef QSORT_ORDER_GUESS
 660    int qsort_break_even;
 661 #endif
 662 };
 663
 664 /* ******************************************************* Shorthand Macros */
 665
 666 /* Note that these macros will be used from inside the qsort function where
 667    we happen to know that the variable 'elt_size' contains the size of an
 668    array element and the variable 'temp' points to enough space to hold a
 669    temp element and the variable 'array' points to the array being sorted
 670    and 'compare' is the pointer to the compare routine.
 671
 672    Also note that there are very many highly architecture specific ways
 673    these might be sped up, but this is simply the most generally portable
 674    code I could think of.
 675 */
 676
 677 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 678 */
 679 #define qsort_cmp(elt1, elt2) \
 680    ((*compare)(aTHX_ array[elt1], array[elt2]))
 681
 682 #ifdef QSORT_ORDER_GUESS
 683 #define QSORT_NOTICE_SWAP swapped++;
 684 #else
 685 #define QSORT_NOTICE_SWAP
 686 #endif
 687
 688 /* swaps contents of array elements elt1, elt2.
 689 */
 690 #define qsort_swap(elt1, elt2) \
 691    STMT_START { \
 692       QSORT_NOTICE_SWAP \
 693       temp = array[elt1]; \
 694       array[elt1] = array[elt2]; \
 695       array[elt2] = temp; \
 696    } STMT_END
 697
 698 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 699    elt3 and elt3 gets elt1.
 700 */
 701 #define qsort_rotate(elt1, elt2, elt3) \
 702    STMT_START { \
 703       QSORT_NOTICE_SWAP \
 704       temp = array[elt1]; \
 705       array[elt1] = array[elt2]; \
 706       array[elt2] = array[elt3]; \
 707       array[elt3] = temp; \
 708    } STMT_END
 709
 710 /* ************************************************************ Debug stuff */
 711
 712 #ifdef QSORT_DEBUG
 713
 714 static void
 715 break_here()
 716 {
 717    return; /* good place to set a breakpoint */
 718 }
 719
 720 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 721
 722 static void
 723 doqsort_all_asserts(
 724    void * array,
 725    size_t num_elts,
 726    size_t elt_size,
 727    int (*compare)(const void * elt1, const void * elt2),
 728    int pc_left, int pc_right, int u_left, int u_right)
 729 {
 730    int i;
 731
 732    qsort_assert(pc_left <= pc_right);
 733    qsort_assert(u_right < pc_left);
 734    qsort_assert(pc_right < u_left);
 735    for (i = u_right + 1; i < pc_left; ++i) {
 736       qsort_assert(qsort_cmp(i, pc_left) < 0);
 737    }
 738    for (i = pc_left; i < pc_right; ++i) {
 739       qsort_assert(qsort_cmp(i, pc_right) == 0);
 740    }
 741    for (i = pc_right + 1; i < u_left; ++i) {
 742       qsort_assert(qsort_cmp(pc_right, i) < 0);
 743    }
 744 }
 745
 746 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 747    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 748                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 749
 750 #else
 751
 752 #define qsort_assert(t) ((void)0)
 753
 754 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 755
 756 #endif
 757
 758 /* ****************************************************************** qsort */
 759
 760 STATIC void /* the standard unstable (u) quicksort (qsort) */
 761 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 762 {
 763    register SV * temp;
 764
 765    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 766    int next_stack_entry = 0;
 767
 768    int part_left;
 769    int part_right;
 770 #ifdef QSORT_ORDER_GUESS
 771    int qsort_break_even;
 772    int swapped;
 773 #endif
 774
 775    /* Make sure we actually have work to do.
 776    */
 777    if (num_elts <= 1) {
 778       return;
 779    }
 780
 781    /* Innoculate large partitions against quadratic behavior */
 782    if (num_elts > QSORT_PLAY_SAFE) {
 783       register size_t n;
 784       register SV ** const q = array;
 785       for (n = num_elts; n > 1; ) {
 786          register const size_t j = (size_t)(n-- * Drand01());
 787          temp = q[j];
 788          q[j] = q[n];
 789          q[n] = temp;
 790       }
 791    }
 792
 793    /* Setup the initial partition definition and fall into the sorting loop
 794    */
 795    part_left = 0;
 796    part_right = (int)(num_elts - 1);
 797 #ifdef QSORT_ORDER_GUESS
 798    qsort_break_even = QSORT_BREAK_EVEN;
 799 #else
 800 #define qsort_break_even QSORT_BREAK_EVEN
 801 #endif
 802    for ( ; ; ) {
 803       if ((part_right - part_left) >= qsort_break_even) {
 804          /* OK, this is gonna get hairy, so lets try to document all the
 805             concepts and abbreviations and variables and what they keep
 806             track of:
 807
 808             pc: pivot chunk - the set of array elements we accumulate in the
 809                 middle of the partition, all equal in value to the original
 810                 pivot element selected. The pc is defined by:
 811
 812                 pc_left - the leftmost array index of the pc
 813                 pc_right - the rightmost array index of the pc
 814
 815                 we start with pc_left == pc_right and only one element
 816                 in the pivot chunk (but it can grow during the scan).
 817
 818             u:  uncompared elements - the set of elements in the partition
 819                 we have not yet compared to the pivot value. There are two
 820                 uncompared sets during the scan - one to the left of the pc
 821                 and one to the right.
 822
 823                 u_right - the rightmost index of the left side's uncompared set
 824                 u_left - the leftmost index of the right side's uncompared set
 825
 826                 The leftmost index of the left sides's uncompared set
 827                 doesn't need its own variable because it is always defined
 828                 by the leftmost edge of the whole partition (part_left). The
 829                 same goes for the rightmost edge of the right partition
 830                 (part_right).
 831
 832                 We know there are no uncompared elements on the left once we
 833                 get u_right < part_left and no uncompared elements on the
 834                 right once u_left > part_right. When both these conditions
 835                 are met, we have completed the scan of the partition.
 836
 837                 Any elements which are between the pivot chunk and the
 838                 uncompared elements should be less than the pivot value on
 839                 the left side and greater than the pivot value on the right
 840                 side (in fact, the goal of the whole algorithm is to arrange
 841                 for that to be true and make the groups of less-than and
 842                 greater-then elements into new partitions to sort again).
 843
 844             As you marvel at the complexity of the code and wonder why it
 845             has to be so confusing. Consider some of the things this level
 846             of confusion brings:
 847
 848             Once I do a compare, I squeeze every ounce of juice out of it. I
 849             never do compare calls I don't have to do, and I certainly never
 850             do redundant calls.
 851
 852             I also never swap any elements unless I can prove there is a
 853             good reason. Many sort algorithms will swap a known value with
 854             an uncompared value just to get things in the right place (or
 855             avoid complexity :-), but that uncompared value, once it gets
 856             compared, may then have to be swapped again. A lot of the
 857             complexity of this code is due to the fact that it never swaps
 858             anything except compared values, and it only swaps them when the
 859             compare shows they are out of position.
 860          */
 861          int pc_left, pc_right;
 862          int u_right, u_left;
 863
 864          int s;
 865
 866          pc_left = ((part_left + part_right) / 2);
 867          pc_right = pc_left;
 868          u_right = pc_left - 1;
 869          u_left = pc_right + 1;
 870
 871          /* Qsort works best when the pivot value is also the median value
 872             in the partition (unfortunately you can't find the median value
 873             without first sorting :-), so to give the algorithm a helping
 874             hand, we pick 3 elements and sort them and use the median value
 875             of that tiny set as the pivot value.
 876
 877             Some versions of qsort like to use the left middle and right as
 878             the 3 elements to sort so they can insure the ends of the
 879             partition will contain values which will stop the scan in the
 880             compare loop, but when you have to call an arbitrarily complex
 881             routine to do a compare, its really better to just keep track of
 882             array index values to know when you hit the edge of the
 883             partition and avoid the extra compare. An even better reason to
 884             avoid using a compare call is the fact that you can drop off the
 885             edge of the array if someone foolishly provides you with an
 886             unstable compare function that doesn't always provide consistent
 887             results.
 888
 889             So, since it is simpler for us to compare the three adjacent
 890             elements in the middle of the partition, those are the ones we
 891             pick here (conveniently pointed at by u_right, pc_left, and
 892             u_left). The values of the left, center, and right elements
 893             are refered to as l c and r in the following comments.
 894          */
 895
 896 #ifdef QSORT_ORDER_GUESS
 897          swapped = 0;
 898 #endif
 899          s = qsort_cmp(u_right, pc_left);
 900          if (s < 0) {
 901             /* l < c */
 902             s = qsort_cmp(pc_left, u_left);
 903             /* if l < c, c < r - already in order - nothing to do */
 904             if (s == 0) {
 905                /* l < c, c == r - already in order, pc grows */
 906                ++pc_right;
 907                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 908             } else if (s > 0) {
 909                /* l < c, c > r - need to know more */
 910                s = qsort_cmp(u_right, u_left);
 911                if (s < 0) {
 912                   /* l < c, c > r, l < r - swap c & r to get ordered */
 913                   qsort_swap(pc_left, u_left);
 914                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 915                } else if (s == 0) {
 916                   /* l < c, c > r, l == r - swap c&r, grow pc */
 917                   qsort_swap(pc_left, u_left);
 918                   --pc_left;
 919                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 920                } else {
 921                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 922                   qsort_rotate(pc_left, u_right, u_left);
 923                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 924                }
 925             }
 926          } else if (s == 0) {
 927             /* l == c */
 928             s = qsort_cmp(pc_left, u_left);
 929             if (s < 0) {
 930                /* l == c, c < r - already in order, grow pc */
 931                --pc_left;
 932                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 933             } else if (s == 0) {
 934                /* l == c, c == r - already in order, grow pc both ways */
 935                --pc_left;
 936                ++pc_right;
 937                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 938             } else {
 939                /* l == c, c > r - swap l & r, grow pc */
 940                qsort_swap(u_right, u_left);
 941                ++pc_right;
 942                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 943             }
 944          } else {
 945             /* l > c */
 946             s = qsort_cmp(pc_left, u_left);
 947             if (s < 0) {
 948                /* l > c, c < r - need to know more */
 949                s = qsort_cmp(u_right, u_left);
 950                if (s < 0) {
 951                   /* l > c, c < r, l < r - swap l & c to get ordered */
 952                   qsort_swap(u_right, pc_left);
 953                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 954                } else if (s == 0) {
 955                   /* l > c, c < r, l == r - swap l & c, grow pc */
 956                   qsort_swap(u_right, pc_left);
 957                   ++pc_right;
 958                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 959                } else {
 960                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 961                   qsort_rotate(u_right, pc_left, u_left);
 962                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 963                }
 964             } else if (s == 0) {
 965                /* l > c, c == r - swap ends, grow pc */
 966                qsort_swap(u_right, u_left);
 967                --pc_left;
 968                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 969             } else {
 970                /* l > c, c > r - swap ends to get in order */
 971                qsort_swap(u_right, u_left);
 972                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 973             }
 974          }
 975          /* We now know the 3 middle elements have been compared and
 976             arranged in the desired order, so we can shrink the uncompared
 977             sets on both sides
 978          */
 979          --u_right;
 980          ++u_left;
 981          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 982
 983          /* The above massive nested if was the simple part :-). We now have
 984             the middle 3 elements ordered and we need to scan through the
 985             uncompared sets on either side, swapping elements that are on
 986             the wrong side or simply shuffling equal elements around to get
 987             all equal elements into the pivot chunk.
 988          */
 989
 990          for ( ; ; ) {
 991             int still_work_on_left;
 992             int still_work_on_right;
 993
 994             /* Scan the uncompared values on the left. If I find a value
 995                equal to the pivot value, move it over so it is adjacent to
 996                the pivot chunk and expand the pivot chunk. If I find a value
 997                less than the pivot value, then just leave it - its already
 998                on the correct side of the partition. If I find a greater
 999                value, then stop the scan.
1000             */
1001             while ((still_work_on_left = (u_right >= part_left))) {
1002                s = qsort_cmp(u_right, pc_left);
1003                if (s < 0) {
1004                   --u_right;
1005                } else if (s == 0) {
1006                   --pc_left;
1007                   if (pc_left != u_right) {
1008                      qsort_swap(u_right, pc_left);
1009                   }
1010                   --u_right;
1011                } else {
1012                   break;
1013                }
1014                qsort_assert(u_right < pc_left);
1015                qsort_assert(pc_left <= pc_right);
1016                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1017                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1018             }
1019
1020             /* Do a mirror image scan of uncompared values on the right
1021             */
1022             while ((still_work_on_right = (u_left <= part_right))) {
1023                s = qsort_cmp(pc_right, u_left);
1024                if (s < 0) {
1025                   ++u_left;
1026                } else if (s == 0) {
1027                   ++pc_right;
1028                   if (pc_right != u_left) {
1029                      qsort_swap(pc_right, u_left);
1030                   }
1031                   ++u_left;
1032                } else {
1033                   break;
1034                }
1035                qsort_assert(u_left > pc_right);
1036                qsort_assert(pc_left <= pc_right);
1037                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1038                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1039             }
1040
1041             if (still_work_on_left) {
1042                /* I know I have a value on the left side which needs to be
1043                   on the right side, but I need to know more to decide
1044                   exactly the best thing to do with it.
1045                */
1046                if (still_work_on_right) {
1047                   /* I know I have values on both side which are out of
1048                      position. This is a big win because I kill two birds
1049                      with one swap (so to speak). I can advance the
1050                      uncompared pointers on both sides after swapping both
1051                      of them into the right place.
1052                   */
1053                   qsort_swap(u_right, u_left);
1054                   --u_right;
1055                   ++u_left;
1056                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1057                } else {
1058                   /* I have an out of position value on the left, but the
1059                      right is fully scanned, so I "slide" the pivot chunk
1060                      and any less-than values left one to make room for the
1061                      greater value over on the right. If the out of position
1062                      value is immediately adjacent to the pivot chunk (there
1063                      are no less-than values), I can do that with a swap,
1064                      otherwise, I have to rotate one of the less than values
1065                      into the former position of the out of position value
1066                      and the right end of the pivot chunk into the left end
1067                      (got all that?).
1068                   */
1069                   --pc_left;
1070                   if (pc_left == u_right) {
1071                      qsort_swap(u_right, pc_right);
1072                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1073                   } else {
1074                      qsort_rotate(u_right, pc_left, pc_right);
1075                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1076                   }
1077                   --pc_right;
1078                   --u_right;
1079                }
1080             } else if (still_work_on_right) {
1081                /* Mirror image of complex case above: I have an out of
1082                   position value on the right, but the left is fully
1083                   scanned, so I need to shuffle things around to make room
1084                   for the right value on the left.
1085                */
1086                ++pc_right;
1087                if (pc_right == u_left) {
1088                   qsort_swap(u_left, pc_left);
1089                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1090                } else {
1091                   qsort_rotate(pc_right, pc_left, u_left);
1092                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1093                }
1094                ++pc_left;
1095                ++u_left;
1096             } else {
1097                /* No more scanning required on either side of partition,
1098                   break out of loop and figure out next set of partitions
1099                */
1100                break;
1101             }
1102          }
1103
1104          /* The elements in the pivot chunk are now in the right place. They
1105             will never move or be compared again. All I have to do is decide
1106             what to do with the stuff to the left and right of the pivot
1107             chunk.
1108
1109             Notes on the QSORT_ORDER_GUESS ifdef code:
1110
1111             1. If I just built these partitions without swapping any (or
1112                very many) elements, there is a chance that the elements are
1113                already ordered properly (being properly ordered will
1114                certainly result in no swapping, but the converse can't be
1115                proved :-).
1116
1117             2. A (properly written) insertion sort will run faster on
1118                already ordered data than qsort will.
1119
1120             3. Perhaps there is some way to make a good guess about
1121                switching to an insertion sort earlier than partition size 6
1122                (for instance - we could save the partition size on the stack
1123                and increase the size each time we find we didn't swap, thus
1124                switching to insertion sort earlier for partitions with a
1125                history of not swapping).
1126
1127             4. Naturally, if I just switch right away, it will make
1128                artificial benchmarks with pure ascending (or descending)
1129                data look really good, but is that a good reason in general?
1130                Hard to say...
1131          */
1132
1133 #ifdef QSORT_ORDER_GUESS
1134          if (swapped < 3) {
1135 #if QSORT_ORDER_GUESS == 1
1136             qsort_break_even = (part_right - part_left) + 1;
1137 #endif
1138 #if QSORT_ORDER_GUESS == 2
1139             qsort_break_even *= 2;
1140 #endif
1141 #if QSORT_ORDER_GUESS == 3
1142             const int prev_break = qsort_break_even;
1143             qsort_break_even *= qsort_break_even;
1144             if (qsort_break_even < prev_break) {
1145                qsort_break_even = (part_right - part_left) + 1;
1146             }
1147 #endif
1148          } else {
1149             qsort_break_even = QSORT_BREAK_EVEN;
1150          }
1151 #endif
1152
1153          if (part_left < pc_left) {
1154             /* There are elements on the left which need more processing.
1155                Check the right as well before deciding what to do.
1156             */
1157             if (pc_right < part_right) {
1158                /* We have two partitions to be sorted. Stack the biggest one
1159                   and process the smallest one on the next iteration. This
1160                   minimizes the stack height by insuring that any additional
1161                   stack entries must come from the smallest partition which
1162                   (because it is smallest) will have the fewest
1163                   opportunities to generate additional stack entries.
1164                */
1165                if ((part_right - pc_right) > (pc_left - part_left)) {
1166                   /* stack the right partition, process the left */
1167                   partition_stack[next_stack_entry].left = pc_right + 1;
1168                   partition_stack[next_stack_entry].right = part_right;
1169 #ifdef QSORT_ORDER_GUESS
1170                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1171 #endif
1172                   part_right = pc_left - 1;
1173                } else {
1174                   /* stack the left partition, process the right */
1175                   partition_stack[next_stack_entry].left = part_left;
1176                   partition_stack[next_stack_entry].right = pc_left - 1;
1177 #ifdef QSORT_ORDER_GUESS
1178                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1179 #endif
1180                   part_left = pc_right + 1;
1181                }
1182                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1183                ++next_stack_entry;
1184             } else {
1185                /* The elements on the left are the only remaining elements
1186                   that need sorting, arrange for them to be processed as the
1187                   next partition.
1188                */
1189                part_right = pc_left - 1;
1190             }
1191          } else if (pc_right < part_right) {
1192             /* There is only one chunk on the right to be sorted, make it
1193                the new partition and loop back around.
1194             */
1195             part_left = pc_right + 1;
1196          } else {
1197             /* This whole partition wound up in the pivot chunk, so
1198                we need to get a new partition off the stack.
1199             */
1200             if (next_stack_entry == 0) {
1201                /* the stack is empty - we are done */
1202                break;
1203             }
1204             --next_stack_entry;
1205             part_left = partition_stack[next_stack_entry].left;
1206             part_right = partition_stack[next_stack_entry].right;
1207 #ifdef QSORT_ORDER_GUESS
1208             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1209 #endif
1210          }
1211       } else {
1212          /* This partition is too small to fool with qsort complexity, just
1213             do an ordinary insertion sort to minimize overhead.
1214          */
1215          int i;
1216          /* Assume 1st element is in right place already, and start checking
1217             at 2nd element to see where it should be inserted.
1218          */
1219          for (i = part_left + 1; i <= part_right; ++i) {
1220             int j;
1221             /* Scan (backwards - just in case 'i' is already in right place)
1222                through the elements already sorted to see if the ith element
1223                belongs ahead of one of them.
1224             */
1225             for (j = i - 1; j >= part_left; --j) {
1226                if (qsort_cmp(i, j) >= 0) {
1227                   /* i belongs right after j
1228                   */
1229                   break;
1230                }
1231             }
1232             ++j;
1233             if (j != i) {
1234                /* Looks like we really need to move some things
1235                */
1236                int k;
1237                temp = array[i];
1238                for (k = i - 1; k >= j; --k)
1239                   array[k + 1] = array[k];
1240                array[j] = temp;
1241             }
1242          }
1243
1244          /* That partition is now sorted, grab the next one, or get out
1245             of the loop if there aren't any more.
1246          */
1247
1248          if (next_stack_entry == 0) {
1249             /* the stack is empty - we are done */
1250             break;
1251          }
1252          --next_stack_entry;
1253          part_left = partition_stack[next_stack_entry].left;
1254          part_right = partition_stack[next_stack_entry].right;
1255 #ifdef QSORT_ORDER_GUESS
1256          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1257 #endif
1258       }
1259    }
1260
1261    /* Believe it or not, the array is sorted at this point! */
1262 }
1263
1264 /* Stabilize what is, presumably, an otherwise unstable sort method.
1265  * We do that by allocating (or having on hand) an array of pointers
1266  * that is the same size as the original array of elements to be sorted.
1267  * We initialize this parallel array with the addresses of the original
1268  * array elements.  This indirection can make you crazy.
1269  * Some pictures can help.  After initializing, we have
1270  *
1271  *  indir                  list1
1272  * +----+                 +----+
1273  * |    | --------------> |    | ------> first element to be sorted
1274  * +----+                 +----+
1275  * |    | --------------> |    | ------> second element to be sorted
1276  * +----+                 +----+
1277  * |    | --------------> |    | ------> third element to be sorted
1278  * +----+                 +----+
1279  *  ...
1280  * +----+                 +----+
1281  * |    | --------------> |    | ------> n-1st element to be sorted
1282  * +----+                 +----+
1283  * |    | --------------> |    | ------> n-th element to be sorted
1284  * +----+                 +----+
1285  *
1286  * During the sort phase, we leave the elements of list1 where they are,
1287  * and sort the pointers in the indirect array in the same order determined
1288  * by the original comparison routine on the elements pointed to.
1289  * Because we don't move the elements of list1 around through
1290  * this phase, we can break ties on elements that compare equal
1291  * using their address in the list1 array, ensuring stabilty.
1292  * This leaves us with something looking like
1293  *
1294  *  indir                  list1
1295  * +----+                 +----+
1296  * |    | --+       +---> |    | ------> first element to be sorted
1297  * +----+   |       |     +----+
1298  * |    | --|-------|---> |    | ------> second element to be sorted
1299  * +----+   |       |     +----+
1300  * |    | --|-------+ +-> |    | ------> third element to be sorted
1301  * +----+   |         |   +----+
1302  *  ...
1303  * +----+    | |   | |    +----+
1304  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1305  * +----+    |     |      +----+
1306  * |    | ---+     +----> |    | ------> n-th element to be sorted
1307  * +----+                 +----+
1308  *
1309  * where the i-th element of the indirect array points to the element
1310  * that should be i-th in the sorted array.  After the sort phase,
1311  * we have to put the elements of list1 into the places
1312  * dictated by the indirect array.
1313  */
1314
1315
1316 static I32
1317 cmpindir(pTHX_ gptr a, gptr b)
1318 {
1319     dVAR;
1320     gptr * const ap = (gptr *)a;
1321     gptr * const bp = (gptr *)b;
1322     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1323
1324     if (sense)
1325         return sense;
1326     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1327 }
1328
1329 static I32
1330 cmpindir_desc(pTHX_ gptr a, gptr b)
1331 {
1332     dVAR;
1333     gptr * const ap = (gptr *)a;
1334     gptr * const bp = (gptr *)b;
1335     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1336
1337     /* Reverse the default */
1338     if (sense)
1339         return -sense;
1340     /* But don't reverse the stability test.  */
1341     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1342
1343 }
1344
1345 STATIC void
1346 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1347 {
1348     dVAR;
1349     if ((flags & SORTf_STABLE) != 0) {
1350          register gptr **pp, *q;
1351          register size_t n, j, i;
1352          gptr *small[SMALLSORT], **indir, tmp;
1353          SVCOMPARE_t savecmp;
1354          if (nmemb <= 1) return;     /* sorted trivially */
1355
1356          /* Small arrays can use the stack, big ones must be allocated */
1357          if (nmemb <= SMALLSORT) indir = small;
1358          else { Newx(indir, nmemb, gptr *); }
1359
1360          /* Copy pointers to original array elements into indirect array */
1361          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1362
1363          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1364          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1365
1366          /* sort, with indirection */
1367          S_qsortsvu(aTHX_ (gptr *)indir, nmemb,
1368                     ((flags & SORTf_DESC) != 0 ? cmpindir_desc : cmpindir));
1369
1370          pp = indir;
1371          q = list1;
1372          for (n = nmemb; n--; ) {
1373               /* Assert A: all elements of q with index > n are already
1374                * in place.  This is vacuosly true at the start, and we
1375                * put element n where it belongs below (if it wasn't
1376                * already where it belonged). Assert B: we only move
1377                * elements that aren't where they belong,
1378                * so, by A, we never tamper with elements above n.
1379                */
1380               j = pp[n] - q;            /* This sets j so that q[j] is
1381                                          * at pp[n].  *pp[j] belongs in
1382                                          * q[j], by construction.
1383                                          */
1384               if (n != j) {             /* all's well if n == j */
1385                    tmp = q[j];          /* save what's in q[j] */
1386                    do {
1387                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1388                         i = pp[j] - q;  /* the index in q of the element
1389                                          * just moved */
1390                         pp[j] = q + j;  /* this is ok now */
1391                    } while ((j = i) != n);
1392                    /* There are only finitely many (nmemb) addresses
1393                     * in the pp array.
1394                     * So we must eventually revisit an index we saw before.
1395                     * Suppose the first revisited index is k != n.
1396                     * An index is visited because something else belongs there.
1397                     * If we visit k twice, then two different elements must
1398                     * belong in the same place, which cannot be.
1399                     * So j must get back to n, the loop terminates,
1400                     * and we put the saved element where it belongs.
1401                     */
1402                    q[n] = tmp;          /* put what belongs into
1403                                          * the n-th element */
1404               }
1405          }
1406
1407         /* free iff allocated */
1408          if (indir != small) { Safefree(indir); }
1409          /* restore prevailing comparison routine */
1410          PL_sort_RealCmp = savecmp;
1411     } else if ((flags & SORTf_DESC) != 0) {
1412          SVCOMPARE_t savecmp = PL_sort_RealCmp; /* Save current comparison routine, if any */
1413          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1414          cmp = cmp_desc;
1415          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1416          /* restore prevailing comparison routine */
1417          PL_sort_RealCmp = savecmp;
1418     } else {
1419          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1420     }
1421 }
1422
1423 /*
1424 =head1 Array Manipulation Functions
1425
1426 =for apidoc sortsv
1427
1428 Sort an array. Here is an example:
1429
1430     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1431
1432 Currently this always uses mergesort. See sortsv_flags for a more
1433 flexible routine.
1434
1435 =cut
1436 */
1437
1438 void
1439 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1440 {
1441     sortsv_flags(array, nmemb, cmp, 0);
1442 }
1443
1444 /*
1445 =for apidoc sortsv_flags
1446
1447 Sort an array, with various options.
1448
1449 =cut
1450 */
1451 void
1452 Perl_sortsv_flags(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1453 {
1454     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1455       = ((flags & SORTf_QSORT) != 0 ? S_qsortsv : S_mergesortsv);
1456
1457     sortsvp(aTHX_ array, nmemb, cmp, flags);
1458 }
1459
1460 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1461 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1462 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1463
1464 PP(pp_sort)
1465 {
1466     dVAR; dSP; dMARK; dORIGMARK;
1467     register SV **p1 = ORIGMARK+1, **p2;
1468     register I32 max, i;
1469     AV* av = NULL;
1470     HV *stash;
1471     GV *gv;
1472     CV *cv = NULL;
1473     I32 gimme = GIMME;
1474     OP* const nextop = PL_op->op_next;
1475     I32 overloading = 0;
1476     bool hasargs = FALSE;
1477     I32 is_xsub = 0;
1478     I32 sorting_av = 0;
1479     const U8 priv = PL_op->op_private;
1480     const U8 flags = PL_op->op_flags;
1481     U32 sort_flags = 0;
1482     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1483       = Perl_sortsv_flags;
1484     I32 all_SIVs = 1;
1485
1486     if ((priv & OPpSORT_DESCEND) != 0)
1487         sort_flags |= SORTf_DESC;
1488     if ((priv & OPpSORT_QSORT) != 0)
1489         sort_flags |= SORTf_QSORT;
1490     if ((priv & OPpSORT_STABLE) != 0)
1491         sort_flags |= SORTf_STABLE;
1492
1493     if (gimme != G_ARRAY) {
1494         SP = MARK;
1495         EXTEND(SP,1);
1496         RETPUSHUNDEF;
1497     }
1498
1499     ENTER;
1500     SAVEVPTR(PL_sortcop);
1501     if (flags & OPf_STACKED) {
1502         if (flags & OPf_SPECIAL) {
1503             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1504             kid = kUNOP->op_first;                      /* pass rv2gv */
1505             kid = kUNOP->op_first;                      /* pass leave */
1506             PL_sortcop = kid->op_next;
1507             stash = CopSTASH(PL_curcop);
1508         }
1509         else {
1510             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1511             if (cv && SvPOK(cv)) {
1512                 const char * const proto = SvPV_nolen_const((SV*)cv);
1513                 if (proto && strEQ(proto, "$$")) {
1514                     hasargs = TRUE;
1515                 }
1516             }
1517             if (!(cv && CvROOT(cv))) {
1518                 if (cv && CvISXSUB(cv)) {
1519                     is_xsub = 1;
1520                 }
1521                 else if (gv) {
1522                     SV *tmpstr = sv_newmortal();
1523                     gv_efullname3(tmpstr, gv, NULL);
1524                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1525                         tmpstr);
1526                 }
1527                 else {
1528                     DIE(aTHX_ "Undefined subroutine in sort");
1529                 }
1530             }
1531
1532             if (is_xsub)
1533                 PL_sortcop = (OP*)cv;
1534             else
1535                 PL_sortcop = CvSTART(cv);
1536         }
1537     }
1538     else {
1539         PL_sortcop = NULL;
1540         stash = CopSTASH(PL_curcop);
1541     }
1542
1543     /* optimiser converts "@a = sort @a" to "sort \@a";
1544      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1545      * result back to @a at the end of this function */
1546     if (priv & OPpSORT_INPLACE) {
1547         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1548         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1549         av = (AV*)(*SP);
1550         max = AvFILL(av) + 1;
1551         if (SvMAGICAL(av)) {
1552             MEXTEND(SP, max);
1553             p2 = SP;
1554             for (i=0; i < max; i++) {
1555                 SV **svp = av_fetch(av, i, FALSE);
1556                 *SP++ = (svp) ? *svp : NULL;
1557             }
1558         }
1559         else {
1560             if (SvREADONLY(av))
1561                 Perl_croak(aTHX_ PL_no_modify);
1562             else
1563                 SvREADONLY_on(av);
1564             p1 = p2 = AvARRAY(av);
1565             sorting_av = 1;
1566         }
1567     }
1568     else {
1569         p2 = MARK+1;
1570         max = SP - MARK;
1571    }
1572
1573     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1574      * any nulls; also stringify or converting to integer or number as
1575      * required any args */
1576     for (i=max; i > 0 ; i--) {
1577         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1578             SvTEMP_off(*p1);
1579             if (!PL_sortcop) {
1580                 if (priv & OPpSORT_NUMERIC) {
1581                     if (priv & OPpSORT_INTEGER) {
1582                         if (!SvIOK(*p1)) {
1583                             if (SvAMAGIC(*p1))
1584                                 overloading = 1;
1585                             else
1586                                 (void)sv_2iv(*p1);
1587                         }
1588                     }
1589                     else {
1590                         if (!SvNSIOK(*p1)) {
1591                             if (SvAMAGIC(*p1))
1592                                 overloading = 1;
1593                             else
1594                                 (void)sv_2nv(*p1);
1595                         }
1596                         if (all_SIVs && !SvSIOK(*p1))
1597                             all_SIVs = 0;
1598                     }
1599                 }
1600                 else {
1601                     if (!SvPOK(*p1)) {
1602                         if (SvAMAGIC(*p1))
1603                             overloading = 1;
1604                         else
1605                             (void)sv_2pv_flags(*p1, 0,
1606                                                SV_GMAGIC|SV_CONST_RETURN);
1607                     }
1608                 }
1609             }
1610             p1++;
1611         }
1612         else
1613             max--;
1614     }
1615     if (sorting_av)
1616         AvFILLp(av) = max-1;
1617
1618     if (max > 1) {
1619         SV **start;
1620         if (PL_sortcop) {
1621             PERL_CONTEXT *cx;
1622             SV** newsp;
1623             const bool oldcatch = CATCH_GET;
1624
1625             SAVETMPS;
1626             SAVEOP();
1627
1628             CATCH_SET(TRUE);
1629             PUSHSTACKi(PERLSI_SORT);
1630             if (!hasargs && !is_xsub) {
1631                 SAVESPTR(PL_firstgv);
1632                 SAVESPTR(PL_secondgv);
1633                 SAVESPTR(PL_sortstash);
1634                 PL_firstgv = gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV);
1635                 PL_secondgv = gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV);
1636                 PL_sortstash = stash;
1637                 SAVESPTR(GvSV(PL_firstgv));
1638                 SAVESPTR(GvSV(PL_secondgv));
1639             }
1640
1641             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1642             if (!(flags & OPf_SPECIAL)) {
1643                 cx->cx_type = CXt_SUB;
1644                 cx->blk_gimme = G_SCALAR;
1645                 PUSHSUB(cx);
1646                 if (!is_xsub) {
1647                     AV* const padlist = CvPADLIST(cv);
1648
1649                     if (++CvDEPTH(cv) >= 2) {
1650                         PERL_STACK_OVERFLOW_CHECK();
1651                         pad_push(padlist, CvDEPTH(cv));
1652                     }
1653                     SAVECOMPPAD();
1654                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1655
1656                     if (hasargs) {
1657                         /* This is mostly copied from pp_entersub */
1658                         AV * const av = (AV*)PAD_SVl(0);
1659
1660                         cx->blk_sub.savearray = GvAV(PL_defgv);
1661                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc_simple(av);
1662                         CX_CURPAD_SAVE(cx->blk_sub);
1663                         cx->blk_sub.argarray = av;
1664                     }
1665
1666                 }
1667             }
1668             cx->cx_type |= CXp_MULTICALL;
1669
1670             start = p1 - max;
1671             sortsvp(aTHX_ start, max,
1672                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1673                     sort_flags);
1674
1675             if (!(flags & OPf_SPECIAL)) {
1676                 LEAVESUB(cv);
1677                 if (!is_xsub)
1678                     CvDEPTH(cv)--;
1679             }
1680             POPBLOCK(cx,PL_curpm);
1681             PL_stack_sp = newsp;
1682             POPSTACK;
1683             CATCH_SET(oldcatch);
1684         }
1685         else {
1686             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1687             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1688             sortsvp(aTHX_ start, max,
1689                     (priv & OPpSORT_NUMERIC)
1690                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1691                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1692                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1693                         : ( IN_LOCALE_RUNTIME
1694                             ? ( overloading
1695                                 ? S_amagic_cmp_locale
1696                                 : sv_cmp_locale_static)
1697                             : ( overloading ? S_amagic_cmp : sv_cmp_static)),
1698                     sort_flags);
1699         }
1700         if ((priv & OPpSORT_REVERSE) != 0) {
1701             SV **q = start+max-1;
1702             while (start < q) {
1703                 SV * const tmp = *start;
1704                 *start++ = *q;
1705                 *q-- = tmp;
1706             }
1707         }
1708     }
1709     if (sorting_av)
1710         SvREADONLY_off(av);
1711     else if (av && !sorting_av) {
1712         /* simulate pp_aassign of tied AV */
1713         SV** const base = ORIGMARK+1;
1714         for (i=0; i < max; i++) {
1715             base[i] = newSVsv(base[i]);
1716         }
1717         av_clear(av);
1718         av_extend(av, max);
1719         for (i=0; i < max; i++) {
1720             SV * const sv = base[i];
1721             SV ** const didstore = av_store(av, i, sv);
1722             if (SvSMAGICAL(sv))
1723                 mg_set(sv);
1724             if (!didstore)
1725                 sv_2mortal(sv);
1726         }
1727     }
1728     LEAVE;
1729     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1730     return nextop;
1731 }
1732
1733 static I32
1734 S_sortcv(pTHX_ SV *a, SV *b)
1735 {
1736     dVAR;
1737     const I32 oldsaveix = PL_savestack_ix;
1738     const I32 oldscopeix = PL_scopestack_ix;
1739     I32 result;
1740     GvSV(PL_firstgv) = a;
1741     GvSV(PL_secondgv) = b;
1742     PL_stack_sp = PL_stack_base;
1743     PL_op = PL_sortcop;
1744     CALLRUNOPS(aTHX);
1745     if (PL_stack_sp != PL_stack_base + 1)
1746         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1747     if (!SvNIOKp(*PL_stack_sp))
1748         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1749     result = SvIV(*PL_stack_sp);
1750     while (PL_scopestack_ix > oldscopeix) {
1751         LEAVE;
1752     }
1753     leave_scope(oldsaveix);
1754     return result;
1755 }
1756
1757 static I32
1758 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1759 {
1760     dVAR;
1761     const I32 oldsaveix = PL_savestack_ix;
1762     const I32 oldscopeix = PL_scopestack_ix;
1763     I32 result;
1764     AV * const av = GvAV(PL_defgv);
1765
1766     if (AvMAX(av) < 1) {
1767         SV** ary = AvALLOC(av);
1768         if (AvARRAY(av) != ary) {
1769             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1770             SvPV_set(av, (char*)ary);
1771         }
1772         if (AvMAX(av) < 1) {
1773             AvMAX(av) = 1;
1774             Renew(ary,2,SV*);
1775             SvPV_set(av, (char*)ary);
1776         }
1777     }
1778     AvFILLp(av) = 1;
1779
1780     AvARRAY(av)[0] = a;
1781     AvARRAY(av)[1] = b;
1782     PL_stack_sp = PL_stack_base;
1783     PL_op = PL_sortcop;
1784     CALLRUNOPS(aTHX);
1785     if (PL_stack_sp != PL_stack_base + 1)
1786         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1787     if (!SvNIOKp(*PL_stack_sp))
1788         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1789     result = SvIV(*PL_stack_sp);
1790     while (PL_scopestack_ix > oldscopeix) {
1791         LEAVE;
1792     }
1793     leave_scope(oldsaveix);
1794     return result;
1795 }
1796
1797 static I32
1798 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1799 {
1800     dVAR; dSP;
1801     const I32 oldsaveix = PL_savestack_ix;
1802     const I32 oldscopeix = PL_scopestack_ix;
1803     CV * const cv=(CV*)PL_sortcop;
1804     I32 result;
1805
1806     SP = PL_stack_base;
1807     PUSHMARK(SP);
1808     EXTEND(SP, 2);
1809     *++SP = a;
1810     *++SP = b;
1811     PUTBACK;
1812     (void)(*CvXSUB(cv))(aTHX_ cv);
1813     if (PL_stack_sp != PL_stack_base + 1)
1814         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1815     if (!SvNIOKp(*PL_stack_sp))
1816         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1817     result = SvIV(*PL_stack_sp);
1818     while (PL_scopestack_ix > oldscopeix) {
1819         LEAVE;
1820     }
1821     leave_scope(oldsaveix);
1822     return result;
1823 }
1824
1825
1826 static I32
1827 S_sv_ncmp(pTHX_ SV *a, SV *b)
1828 {
1829     const NV nv1 = SvNSIV(a);
1830     const NV nv2 = SvNSIV(b);
1831     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1832 }
1833
1834 static I32
1835 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1836 {
1837     const IV iv1 = SvIV(a);
1838     const IV iv2 = SvIV(b);
1839     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1840 }
1841
1842 #define tryCALL_AMAGICbin(left,right,meth) \
1843     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1844         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1845         : NULL;
1846
1847 static I32
1848 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1849 {
1850     dVAR;
1851     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1852     if (tmpsv) {
1853         if (SvIOK(tmpsv)) {
1854             const I32 i = SvIVX(tmpsv);
1855             if (i > 0)
1856                return 1;
1857             return i? -1 : 0;
1858         }
1859         else {
1860             const NV d = SvNV(tmpsv);
1861             if (d > 0)
1862                return 1;
1863             return d ? -1 : 0;
1864         }
1865      }
1866      return S_sv_ncmp(aTHX_ a, b);
1867 }
1868
1869 static I32
1870 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1871 {
1872     dVAR;
1873     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1874     if (tmpsv) {
1875         if (SvIOK(tmpsv)) {
1876             const I32 i = SvIVX(tmpsv);
1877             if (i > 0)
1878                return 1;
1879             return i? -1 : 0;
1880         }
1881         else {
1882             const NV d = SvNV(tmpsv);
1883             if (d > 0)
1884                return 1;
1885             return d ? -1 : 0;
1886         }
1887     }
1888     return S_sv_i_ncmp(aTHX_ a, b);
1889 }
1890
1891 static I32
1892 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1893 {
1894     dVAR;
1895     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1896     if (tmpsv) {
1897         if (SvIOK(tmpsv)) {
1898             const I32 i = SvIVX(tmpsv);
1899             if (i > 0)
1900                return 1;
1901             return i? -1 : 0;
1902         }
1903         else {
1904             const NV d = SvNV(tmpsv);
1905             if (d > 0)
1906                return 1;
1907             return d? -1 : 0;
1908         }
1909     }
1910     return sv_cmp(str1, str2);
1911 }
1912
1913 static I32
1914 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1915 {
1916     dVAR;
1917     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1918     if (tmpsv) {
1919         if (SvIOK(tmpsv)) {
1920             const I32 i = SvIVX(tmpsv);
1921             if (i > 0)
1922                return 1;
1923             return i? -1 : 0;
1924         }
1925         else {
1926             const NV d = SvNV(tmpsv);
1927             if (d > 0)
1928                return 1;
1929             return d? -1 : 0;
1930         }
1931     }
1932     return sv_cmp_locale(str1, str2);
1933 }
1934
1935 /*
1936  * Local variables:
1937  * c-indentation-style: bsd
1938  * c-basic-offset: 4
1939  * indent-tabs-mode: t
1940  * End:
1941  *
1942  * ex: set ts=8 sts=4 sw=4 noet:
1943  */