pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #ifndef SMALLSORT
  40 #define SMALLSORT (200)
  41 #endif
  42
  43 /* Flags for qsortsv and mergesortsv */
  44 #define SORTf_DESC   1
  45 #define SORTf_STABLE 2
  46 #define SORTf_QSORT  4
  47
  48 /*
  49  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  50  *
  51  * The original code was written in conjunction with BSD Computer Software
  52  * Research Group at University of California, Berkeley.
  53  *
  54  * See also: "Optimistic Merge Sort" (SODA '92)
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxilliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     register gptr *b, *p, *q, *t, *p2;
 187     register gptr *last, *r;
 188     IV runs = 0;
 189
 190     b = list1;
 191     last = PINDEX(b, nmemb);
 192     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 193     for (p2 = list2; b < last; ) {
 194         /* We just started, or just reversed sense.
 195         ** Set t at end of pairs with the prevailing sense.
 196         */
 197         for (p = b+2, t = p; ++p < last; t = ++p) {
 198             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 199         }
 200         q = b;
 201         /* Having laid out the playing field, look for long runs */
 202         do {
 203             p = r = b + (2 * PTHRESH);
 204             if (r >= t) p = r = t;      /* too short to care about */
 205             else {
 206                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 207                        ((p -= 2) > q));
 208                 if (p <= q) {
 209                     /* b through r is a (long) run.
 210                     ** Extend it as far as possible.
 211                     */
 212                     p = q = r;
 213                     while (((p += 2) < t) &&
 214                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 215                     r = p = q + 2;      /* no simple pairs, no after-run */
 216                 }
 217             }
 218             if (q > b) {                /* run of greater than 2 at b */
 219                 gptr *savep = p;
 220
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense)
 229                     while (b < --p) {
 230                         const gptr c = *b;
 231                         *b++ = *p;
 232                         *p = c;
 233                     }
 234                 p = savep;
 235             }
 236             while (q < p) {             /* simple pairs */
 237                 p2 = NEXT(p2) = p2 + 2; ++runs;
 238                 if (sense) {
 239                     const gptr c = *q++;
 240                     *(q-1) = *q;
 241                     *q++ = c;
 242                 } else q += 2;
 243             }
 244             if (((b = p) == t) && ((t+1) == last)) {
 245                 NEXT(p2) = p2 + 1; ++runs;
 246                 b++;
 247             }
 248             q = r;
 249         } while (b < t);
 250         sense = !sense;
 251     }
 252     return runs;
 253 }
 254
 255
 256 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 257  * qsort on many platforms, but slower than qsort, conspicuously so,
 258  * on others.  The most likely explanation was platform-specific
 259  * differences in cache sizes and relative speeds.
 260  *
 261  * The quicksort divide-and-conquer algorithm guarantees that, as the
 262  * problem is subdivided into smaller and smaller parts, the parts
 263  * fit into smaller (and faster) caches.  So it doesn't matter how
 264  * many levels of cache exist, quicksort will "find" them, and,
 265  * as long as smaller is faster, take advantage of them.
 266  *
 267  * By contrast, consider how the original mergesort algorithm worked.
 268  * Suppose we have five runs (each typically of length 2 after dynprep).
 269  *
 270  * pass               base                        aux
 271  *  0              1 2 3 4 5
 272  *  1                                           12 34 5
 273  *  2                1234 5
 274  *  3                                            12345
 275  *  4                 12345
 276  *
 277  * Adjacent pairs are merged in "grand sweeps" through the input.
 278  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 279  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 280  * The only cache that matters is one large enough to hold *all* the input.
 281  * On some platforms, this may be many times slower than smaller caches.
 282  *
 283  * The following pseudo-code uses the same basic merge algorithm,
 284  * but in a divide-and-conquer way.
 285  *
 286  * # merge $runs runs at offset $offset of list $list1 into $list2.
 287  * # all unmerged runs ($runs == 1) originate in list $base.
 288  * sub mgsort2 {
 289  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 290  *
 291  *     if ($runs == 1) {
 292  *         if ($list1 is $base) copy run to $list2
 293  *         return offset of end of list (or copy)
 294  *     } else {
 295  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 296  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 297  *         merge the adjacent runs at $offset of $list1 into $list2
 298  *         return the offset of the end of the merged runs
 299  *     }
 300  * }
 301  * mgsort2(0, $runs, $base, $aux, $base);
 302  *
 303  * For our 5 runs, the tree of calls looks like
 304  *
 305  *           5
 306  *      3        2
 307  *   2     1   1   1
 308  * 1   1
 309  *
 310  * 1   2   3   4   5
 311  *
 312  * and the corresponding activity looks like
 313  *
 314  * copy runs 1 and 2 from base to aux
 315  * merge runs 1 and 2 from aux to base
 316  * (run 3 is where it belongs, no copy needed)
 317  * merge runs 12 and 3 from base to aux
 318  * (runs 4 and 5 are where they belong, no copy needed)
 319  * merge runs 4 and 5 from base to aux
 320  * merge runs 123 and 45 from aux to base
 321  *
 322  * Note that we merge runs 1 and 2 immediately after copying them,
 323  * while they are still likely to be in fast cache.  Similarly,
 324  * run 3 is merged with run 12 while it still may be lingering in cache.
 325  * This implementation should therefore enjoy much of the cache-friendly
 326  * behavior that quicksort does.  In addition, it does less copying
 327  * than the original mergesort implementation (only runs 1 and 2 are copied)
 328  * and the "balancing" of merges is better (merged runs comprise more nearly
 329  * equal numbers of original runs).
 330  *
 331  * The actual cache-friendly implementation will use a pseudo-stack
 332  * to avoid recursion, and will unroll processing of runs of length 2,
 333  * but it is otherwise similar to the recursive implementation.
 334  */
 335
 336 typedef struct {
 337     IV  offset;         /* offset of 1st of 2 runs at this level */
 338     IV  runs;           /* how many runs must be combined into 1 */
 339 } off_runs;             /* pseudo-stack element */
 340
 341
 342 static I32
 343 cmp_desc(pTHX_ gptr a, gptr b)
 344 {
 345     dVAR;
 346     return -PL_sort_RealCmp(aTHX_ a, b);
 347 }
 348
 349 STATIC void
 350 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 351 {
 352     dVAR;
 353     IV i, run, offset;
 354     I32 sense, level;
 355     register gptr *f1, *f2, *t, *b, *p;
 356     int iwhich;
 357     gptr *aux;
 358     gptr *p1;
 359     gptr small[SMALLSORT];
 360     gptr *which[3];
 361     off_runs stack[60], *stackp;
 362     SVCOMPARE_t savecmp = NULL;
 363
 364     if (nmemb <= 1) return;                     /* sorted trivially */
 365
 366     if ((flags & SORTf_DESC) != 0) {
 367         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 368         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 369         cmp = cmp_desc;
 370     }
 371
 372     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 373     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 374     level = 0;
 375     stackp = stack;
 376     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 377     stackp->offset = offset = 0;
 378     which[0] = which[2] = base;
 379     which[1] = aux;
 380     for (;;) {
 381         /* On levels where both runs have be constructed (stackp->runs == 0),
 382          * merge them, and note the offset of their end, in case the offset
 383          * is needed at the next level up.  Hop up a level, and,
 384          * as long as stackp->runs is 0, keep merging.
 385          */
 386         IV runs = stackp->runs;
 387         if (runs == 0) {
 388             gptr *list1, *list2;
 389             iwhich = level & 1;
 390             list1 = which[iwhich];              /* area where runs are now */
 391             list2 = which[++iwhich];            /* area for merged runs */
 392             do {
 393                 register gptr *l1, *l2, *tp2;
 394                 offset = stackp->offset;
 395                 f1 = p1 = list1 + offset;               /* start of first run */
 396                 p = tp2 = list2 + offset;       /* where merged run will go */
 397                 t = NEXT(p);                    /* where first run ends */
 398                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 399                 t = NEXT(t);                    /* where second runs ends */
 400                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 401                 offset = PNELEM(list2, t);
 402                 while (f1 < l1 && f2 < l2) {
 403                     /* If head 1 is larger than head 2, find ALL the elements
 404                     ** in list 2 strictly less than head1, write them all,
 405                     ** then head 1.  Then compare the new heads, and repeat,
 406                     ** until one or both lists are exhausted.
 407                     **
 408                     ** In all comparisons (after establishing
 409                     ** which head to merge) the item to merge
 410                     ** (at pointer q) is the first operand of
 411                     ** the comparison.  When we want to know
 412                     ** if "q is strictly less than the other",
 413                     ** we can't just do
 414                     **    cmp(q, other) < 0
 415                     ** because stability demands that we treat equality
 416                     ** as high when q comes from l2, and as low when
 417                     ** q was from l1.  So we ask the question by doing
 418                     **    cmp(q, other) <= sense
 419                     ** and make sense == 0 when equality should look low,
 420                     ** and -1 when equality should look high.
 421                     */
 422
 423                     register gptr *q;
 424                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 425                         q = f2; b = f1; t = l1;
 426                         sense = -1;
 427                     } else {
 428                         q = f1; b = f2; t = l2;
 429                         sense = 0;
 430                     }
 431
 432
 433                     /* ramp up
 434                     **
 435                     ** Leave t at something strictly
 436                     ** greater than q (or at the end of the list),
 437                     ** and b at something strictly less than q.
 438                     */
 439                     for (i = 1, run = 0 ;;) {
 440                         if ((p = PINDEX(b, i)) >= t) {
 441                             /* off the end */
 442                             if (((p = PINDEX(t, -1)) > b) &&
 443                                 (cmp(aTHX_ *q, *p) <= sense))
 444                                  t = p;
 445                             else b = p;
 446                             break;
 447                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 448                             t = p;
 449                             break;
 450                         } else b = p;
 451                         if (++run >= RTHRESH) i += i;
 452                     }
 453
 454
 455                     /* q is known to follow b and must be inserted before t.
 456                     ** Increment b, so the range of possibilities is [b,t).
 457                     ** Round binary split down, to favor early appearance.
 458                     ** Adjust b and t until q belongs just before t.
 459                     */
 460
 461                     b++;
 462                     while (b < t) {
 463                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 464                         if (cmp(aTHX_ *q, *p) <= sense) {
 465                             t = p;
 466                         } else b = p + 1;
 467                     }
 468
 469
 470                     /* Copy all the strictly low elements */
 471
 472                     if (q == f1) {
 473                         FROMTOUPTO(f2, tp2, t);
 474                         *tp2++ = *f1++;
 475                     } else {
 476                         FROMTOUPTO(f1, tp2, t);
 477                         *tp2++ = *f2++;
 478                     }
 479                 }
 480
 481
 482                 /* Run out remaining list */
 483                 if (f1 == l1) {
 484                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 485                 } else              FROMTOUPTO(f1, tp2, l1);
 486                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 487
 488                 if (--level == 0) goto done;
 489                 --stackp;
 490                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 491             } while ((runs = stackp->runs) == 0);
 492         }
 493
 494
 495         stackp->runs = 0;               /* current run will finish level */
 496         /* While there are more than 2 runs remaining,
 497          * turn them into exactly 2 runs (at the "other" level),
 498          * each made up of approximately half the runs.
 499          * Stack the second half for later processing,
 500          * and set about producing the first half now.
 501          */
 502         while (runs > 2) {
 503             ++level;
 504             ++stackp;
 505             stackp->offset = offset;
 506             runs -= stackp->runs = runs / 2;
 507         }
 508         /* We must construct a single run from 1 or 2 runs.
 509          * All the original runs are in which[0] == base.
 510          * The run we construct must end up in which[level&1].
 511          */
 512         iwhich = level & 1;
 513         if (runs == 1) {
 514             /* Constructing a single run from a single run.
 515              * If it's where it belongs already, there's nothing to do.
 516              * Otherwise, copy it to where it belongs.
 517              * A run of 1 is either a singleton at level 0,
 518              * or the second half of a split 3.  In neither event
 519              * is it necessary to set offset.  It will be set by the merge
 520              * that immediately follows.
 521              */
 522             if (iwhich) {       /* Belongs in aux, currently in base */
 523                 f1 = b = PINDEX(base, offset);  /* where list starts */
 524                 f2 = PINDEX(aux, offset);       /* where list goes */
 525                 t = NEXT(f2);                   /* where list will end */
 526                 offset = PNELEM(aux, t);        /* offset thereof */
 527                 t = PINDEX(base, offset);       /* where it currently ends */
 528                 FROMTOUPTO(f1, f2, t);          /* copy */
 529                 NEXT(b) = t;                    /* set up parallel pointer */
 530             } else if (level == 0) goto done;   /* single run at level 0 */
 531         } else {
 532             /* Constructing a single run from two runs.
 533              * The merge code at the top will do that.
 534              * We need only make sure the two runs are in the "other" array,
 535              * so they'll end up in the correct array after the merge.
 536              */
 537             ++level;
 538             ++stackp;
 539             stackp->offset = offset;
 540             stackp->runs = 0;   /* take care of both runs, trigger merge */
 541             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 542                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 543                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 544                 t = NEXT(f2);                   /* where first run will end */
 545                 offset = PNELEM(aux, t);        /* offset thereof */
 546                 p = PINDEX(base, offset);       /* end of first run */
 547                 t = NEXT(t);                    /* where second run will end */
 548                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 549                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 550                 NEXT(b) = p;                    /* paralled pointer for 1st */
 551                 NEXT(p) = t;                    /* ... and for second */
 552             }
 553         }
 554     }
 555 done:
 556     if (aux != small) Safefree(aux);    /* free iff allocated */
 557     if (flags) {
 558          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 559     }
 560     return;
 561 }
 562
 563 /*
 564  * The quicksort implementation was derived from source code contributed
 565  * by Tom Horsley.
 566  *
 567  * NOTE: this code was derived from Tom Horsley's qsort replacement
 568  * and should not be confused with the original code.
 569  */
 570
 571 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 572
 573    Permission granted to distribute under the same terms as perl which are
 574    (briefly):
 575
 576     This program is free software; you can redistribute it and/or modify
 577     it under the terms of either:
 578
 579         a) the GNU General Public License as published by the Free
 580         Software Foundation; either version 1, or (at your option) any
 581         later version, or
 582
 583         b) the "Artistic License" which comes with this Kit.
 584
 585    Details on the perl license can be found in the perl source code which
 586    may be located via the www.perl.com web page.
 587
 588    This is the most wonderfulest possible qsort I can come up with (and
 589    still be mostly portable) My (limited) tests indicate it consistently
 590    does about 20% fewer calls to compare than does the qsort in the Visual
 591    C++ library, other vendors may vary.
 592
 593    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 594    others I invented myself (or more likely re-invented since they seemed
 595    pretty obvious once I watched the algorithm operate for a while).
 596
 597    Most of this code was written while watching the Marlins sweep the Giants
 598    in the 1997 National League Playoffs - no Braves fans allowed to use this
 599    code (just kidding :-).
 600
 601    I realize that if I wanted to be true to the perl tradition, the only
 602    comment in this file would be something like:
 603
 604    ...they shuffled back towards the rear of the line. 'No, not at the
 605    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 606
 607    However, I really needed to violate that tradition just so I could keep
 608    track of what happens myself, not to mention some poor fool trying to
 609    understand this years from now :-).
 610 */
 611
 612 /* ********************************************************** Configuration */
 613
 614 #ifndef QSORT_ORDER_GUESS
 615 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 616 #endif
 617
 618 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 619    future processing - a good max upper bound is log base 2 of memory size
 620    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 621    safely be smaller than that since the program is taking up some space and
 622    most operating systems only let you grab some subset of contiguous
 623    memory (not to mention that you are normally sorting data larger than
 624    1 byte element size :-).
 625 */
 626 #ifndef QSORT_MAX_STACK
 627 #define QSORT_MAX_STACK 32
 628 #endif
 629
 630 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 631    Anything bigger and we use qsort. If you make this too small, the qsort
 632    will probably break (or become less efficient), because it doesn't expect
 633    the middle element of a partition to be the same as the right or left -
 634    you have been warned).
 635 */
 636 #ifndef QSORT_BREAK_EVEN
 637 #define QSORT_BREAK_EVEN 6
 638 #endif
 639
 640 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 641    to go quadratic on.  We innoculate larger partitions against
 642    quadratic behavior by shuffling them before sorting.  This is not
 643    an absolute guarantee of non-quadratic behavior, but it would take
 644    staggeringly bad luck to pick extreme elements as the pivot
 645    from randomized data.
 646 */
 647 #ifndef QSORT_PLAY_SAFE
 648 #define QSORT_PLAY_SAFE 255
 649 #endif
 650
 651 /* ************************************************************* Data Types */
 652
 653 /* hold left and right index values of a partition waiting to be sorted (the
 654    partition includes both left and right - right is NOT one past the end or
 655    anything like that).
 656 */
 657 struct partition_stack_entry {
 658    int left;
 659    int right;
 660 #ifdef QSORT_ORDER_GUESS
 661    int qsort_break_even;
 662 #endif
 663 };
 664
 665 /* ******************************************************* Shorthand Macros */
 666
 667 /* Note that these macros will be used from inside the qsort function where
 668    we happen to know that the variable 'elt_size' contains the size of an
 669    array element and the variable 'temp' points to enough space to hold a
 670    temp element and the variable 'array' points to the array being sorted
 671    and 'compare' is the pointer to the compare routine.
 672
 673    Also note that there are very many highly architecture specific ways
 674    these might be sped up, but this is simply the most generally portable
 675    code I could think of.
 676 */
 677
 678 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 679 */
 680 #define qsort_cmp(elt1, elt2) \
 681    ((*compare)(aTHX_ array[elt1], array[elt2]))
 682
 683 #ifdef QSORT_ORDER_GUESS
 684 #define QSORT_NOTICE_SWAP swapped++;
 685 #else
 686 #define QSORT_NOTICE_SWAP
 687 #endif
 688
 689 /* swaps contents of array elements elt1, elt2.
 690 */
 691 #define qsort_swap(elt1, elt2) \
 692    STMT_START { \
 693       QSORT_NOTICE_SWAP \
 694       temp = array[elt1]; \
 695       array[elt1] = array[elt2]; \
 696       array[elt2] = temp; \
 697    } STMT_END
 698
 699 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 700    elt3 and elt3 gets elt1.
 701 */
 702 #define qsort_rotate(elt1, elt2, elt3) \
 703    STMT_START { \
 704       QSORT_NOTICE_SWAP \
 705       temp = array[elt1]; \
 706       array[elt1] = array[elt2]; \
 707       array[elt2] = array[elt3]; \
 708       array[elt3] = temp; \
 709    } STMT_END
 710
 711 /* ************************************************************ Debug stuff */
 712
 713 #ifdef QSORT_DEBUG
 714
 715 static void
 716 break_here()
 717 {
 718    return; /* good place to set a breakpoint */
 719 }
 720
 721 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 722
 723 static void
 724 doqsort_all_asserts(
 725    void * array,
 726    size_t num_elts,
 727    size_t elt_size,
 728    int (*compare)(const void * elt1, const void * elt2),
 729    int pc_left, int pc_right, int u_left, int u_right)
 730 {
 731    int i;
 732
 733    qsort_assert(pc_left <= pc_right);
 734    qsort_assert(u_right < pc_left);
 735    qsort_assert(pc_right < u_left);
 736    for (i = u_right + 1; i < pc_left; ++i) {
 737       qsort_assert(qsort_cmp(i, pc_left) < 0);
 738    }
 739    for (i = pc_left; i < pc_right; ++i) {
 740       qsort_assert(qsort_cmp(i, pc_right) == 0);
 741    }
 742    for (i = pc_right + 1; i < u_left; ++i) {
 743       qsort_assert(qsort_cmp(pc_right, i) < 0);
 744    }
 745 }
 746
 747 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 748    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 749                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 750
 751 #else
 752
 753 #define qsort_assert(t) ((void)0)
 754
 755 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 756
 757 #endif
 758
 759 /* ****************************************************************** qsort */
 760
 761 STATIC void /* the standard unstable (u) quicksort (qsort) */
 762 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 763 {
 764    register SV * temp;
 765
 766    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 767    int next_stack_entry = 0;
 768
 769    int part_left;
 770    int part_right;
 771 #ifdef QSORT_ORDER_GUESS
 772    int qsort_break_even;
 773    int swapped;
 774 #endif
 775
 776    /* Make sure we actually have work to do.
 777    */
 778    if (num_elts <= 1) {
 779       return;
 780    }
 781
 782    /* Innoculate large partitions against quadratic behavior */
 783    if (num_elts > QSORT_PLAY_SAFE) {
 784       register size_t n;
 785       register SV ** const q = array;
 786       for (n = num_elts; n > 1; ) {
 787          register const size_t j = (size_t)(n-- * Drand01());
 788          temp = q[j];
 789          q[j] = q[n];
 790          q[n] = temp;
 791       }
 792    }
 793
 794    /* Setup the initial partition definition and fall into the sorting loop
 795    */
 796    part_left = 0;
 797    part_right = (int)(num_elts - 1);
 798 #ifdef QSORT_ORDER_GUESS
 799    qsort_break_even = QSORT_BREAK_EVEN;
 800 #else
 801 #define qsort_break_even QSORT_BREAK_EVEN
 802 #endif
 803    for ( ; ; ) {
 804       if ((part_right - part_left) >= qsort_break_even) {
 805          /* OK, this is gonna get hairy, so lets try to document all the
 806             concepts and abbreviations and variables and what they keep
 807             track of:
 808
 809             pc: pivot chunk - the set of array elements we accumulate in the
 810                 middle of the partition, all equal in value to the original
 811                 pivot element selected. The pc is defined by:
 812
 813                 pc_left - the leftmost array index of the pc
 814                 pc_right - the rightmost array index of the pc
 815
 816                 we start with pc_left == pc_right and only one element
 817                 in the pivot chunk (but it can grow during the scan).
 818
 819             u:  uncompared elements - the set of elements in the partition
 820                 we have not yet compared to the pivot value. There are two
 821                 uncompared sets during the scan - one to the left of the pc
 822                 and one to the right.
 823
 824                 u_right - the rightmost index of the left side's uncompared set
 825                 u_left - the leftmost index of the right side's uncompared set
 826
 827                 The leftmost index of the left sides's uncompared set
 828                 doesn't need its own variable because it is always defined
 829                 by the leftmost edge of the whole partition (part_left). The
 830                 same goes for the rightmost edge of the right partition
 831                 (part_right).
 832
 833                 We know there are no uncompared elements on the left once we
 834                 get u_right < part_left and no uncompared elements on the
 835                 right once u_left > part_right. When both these conditions
 836                 are met, we have completed the scan of the partition.
 837
 838                 Any elements which are between the pivot chunk and the
 839                 uncompared elements should be less than the pivot value on
 840                 the left side and greater than the pivot value on the right
 841                 side (in fact, the goal of the whole algorithm is to arrange
 842                 for that to be true and make the groups of less-than and
 843                 greater-then elements into new partitions to sort again).
 844
 845             As you marvel at the complexity of the code and wonder why it
 846             has to be so confusing. Consider some of the things this level
 847             of confusion brings:
 848
 849             Once I do a compare, I squeeze every ounce of juice out of it. I
 850             never do compare calls I don't have to do, and I certainly never
 851             do redundant calls.
 852
 853             I also never swap any elements unless I can prove there is a
 854             good reason. Many sort algorithms will swap a known value with
 855             an uncompared value just to get things in the right place (or
 856             avoid complexity :-), but that uncompared value, once it gets
 857             compared, may then have to be swapped again. A lot of the
 858             complexity of this code is due to the fact that it never swaps
 859             anything except compared values, and it only swaps them when the
 860             compare shows they are out of position.
 861          */
 862          int pc_left, pc_right;
 863          int u_right, u_left;
 864
 865          int s;
 866
 867          pc_left = ((part_left + part_right) / 2);
 868          pc_right = pc_left;
 869          u_right = pc_left - 1;
 870          u_left = pc_right + 1;
 871
 872          /* Qsort works best when the pivot value is also the median value
 873             in the partition (unfortunately you can't find the median value
 874             without first sorting :-), so to give the algorithm a helping
 875             hand, we pick 3 elements and sort them and use the median value
 876             of that tiny set as the pivot value.
 877
 878             Some versions of qsort like to use the left middle and right as
 879             the 3 elements to sort so they can insure the ends of the
 880             partition will contain values which will stop the scan in the
 881             compare loop, but when you have to call an arbitrarily complex
 882             routine to do a compare, its really better to just keep track of
 883             array index values to know when you hit the edge of the
 884             partition and avoid the extra compare. An even better reason to
 885             avoid using a compare call is the fact that you can drop off the
 886             edge of the array if someone foolishly provides you with an
 887             unstable compare function that doesn't always provide consistent
 888             results.
 889
 890             So, since it is simpler for us to compare the three adjacent
 891             elements in the middle of the partition, those are the ones we
 892             pick here (conveniently pointed at by u_right, pc_left, and
 893             u_left). The values of the left, center, and right elements
 894             are refered to as l c and r in the following comments.
 895          */
 896
 897 #ifdef QSORT_ORDER_GUESS
 898          swapped = 0;
 899 #endif
 900          s = qsort_cmp(u_right, pc_left);
 901          if (s < 0) {
 902             /* l < c */
 903             s = qsort_cmp(pc_left, u_left);
 904             /* if l < c, c < r - already in order - nothing to do */
 905             if (s == 0) {
 906                /* l < c, c == r - already in order, pc grows */
 907                ++pc_right;
 908                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 909             } else if (s > 0) {
 910                /* l < c, c > r - need to know more */
 911                s = qsort_cmp(u_right, u_left);
 912                if (s < 0) {
 913                   /* l < c, c > r, l < r - swap c & r to get ordered */
 914                   qsort_swap(pc_left, u_left);
 915                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 916                } else if (s == 0) {
 917                   /* l < c, c > r, l == r - swap c&r, grow pc */
 918                   qsort_swap(pc_left, u_left);
 919                   --pc_left;
 920                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 921                } else {
 922                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 923                   qsort_rotate(pc_left, u_right, u_left);
 924                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 925                }
 926             }
 927          } else if (s == 0) {
 928             /* l == c */
 929             s = qsort_cmp(pc_left, u_left);
 930             if (s < 0) {
 931                /* l == c, c < r - already in order, grow pc */
 932                --pc_left;
 933                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 934             } else if (s == 0) {
 935                /* l == c, c == r - already in order, grow pc both ways */
 936                --pc_left;
 937                ++pc_right;
 938                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 939             } else {
 940                /* l == c, c > r - swap l & r, grow pc */
 941                qsort_swap(u_right, u_left);
 942                ++pc_right;
 943                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 944             }
 945          } else {
 946             /* l > c */
 947             s = qsort_cmp(pc_left, u_left);
 948             if (s < 0) {
 949                /* l > c, c < r - need to know more */
 950                s = qsort_cmp(u_right, u_left);
 951                if (s < 0) {
 952                   /* l > c, c < r, l < r - swap l & c to get ordered */
 953                   qsort_swap(u_right, pc_left);
 954                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 955                } else if (s == 0) {
 956                   /* l > c, c < r, l == r - swap l & c, grow pc */
 957                   qsort_swap(u_right, pc_left);
 958                   ++pc_right;
 959                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 960                } else {
 961                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 962                   qsort_rotate(u_right, pc_left, u_left);
 963                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 964                }
 965             } else if (s == 0) {
 966                /* l > c, c == r - swap ends, grow pc */
 967                qsort_swap(u_right, u_left);
 968                --pc_left;
 969                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 970             } else {
 971                /* l > c, c > r - swap ends to get in order */
 972                qsort_swap(u_right, u_left);
 973                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 974             }
 975          }
 976          /* We now know the 3 middle elements have been compared and
 977             arranged in the desired order, so we can shrink the uncompared
 978             sets on both sides
 979          */
 980          --u_right;
 981          ++u_left;
 982          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 983
 984          /* The above massive nested if was the simple part :-). We now have
 985             the middle 3 elements ordered and we need to scan through the
 986             uncompared sets on either side, swapping elements that are on
 987             the wrong side or simply shuffling equal elements around to get
 988             all equal elements into the pivot chunk.
 989          */
 990
 991          for ( ; ; ) {
 992             int still_work_on_left;
 993             int still_work_on_right;
 994
 995             /* Scan the uncompared values on the left. If I find a value
 996                equal to the pivot value, move it over so it is adjacent to
 997                the pivot chunk and expand the pivot chunk. If I find a value
 998                less than the pivot value, then just leave it - its already
 999                on the correct side of the partition. If I find a greater
1000                value, then stop the scan.
1001             */
1002             while ((still_work_on_left = (u_right >= part_left))) {
1003                s = qsort_cmp(u_right, pc_left);
1004                if (s < 0) {
1005                   --u_right;
1006                } else if (s == 0) {
1007                   --pc_left;
1008                   if (pc_left != u_right) {
1009                      qsort_swap(u_right, pc_left);
1010                   }
1011                   --u_right;
1012                } else {
1013                   break;
1014                }
1015                qsort_assert(u_right < pc_left);
1016                qsort_assert(pc_left <= pc_right);
1017                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1018                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1019             }
1020
1021             /* Do a mirror image scan of uncompared values on the right
1022             */
1023             while ((still_work_on_right = (u_left <= part_right))) {
1024                s = qsort_cmp(pc_right, u_left);
1025                if (s < 0) {
1026                   ++u_left;
1027                } else if (s == 0) {
1028                   ++pc_right;
1029                   if (pc_right != u_left) {
1030                      qsort_swap(pc_right, u_left);
1031                   }
1032                   ++u_left;
1033                } else {
1034                   break;
1035                }
1036                qsort_assert(u_left > pc_right);
1037                qsort_assert(pc_left <= pc_right);
1038                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1039                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1040             }
1041
1042             if (still_work_on_left) {
1043                /* I know I have a value on the left side which needs to be
1044                   on the right side, but I need to know more to decide
1045                   exactly the best thing to do with it.
1046                */
1047                if (still_work_on_right) {
1048                   /* I know I have values on both side which are out of
1049                      position. This is a big win because I kill two birds
1050                      with one swap (so to speak). I can advance the
1051                      uncompared pointers on both sides after swapping both
1052                      of them into the right place.
1053                   */
1054                   qsort_swap(u_right, u_left);
1055                   --u_right;
1056                   ++u_left;
1057                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1058                } else {
1059                   /* I have an out of position value on the left, but the
1060                      right is fully scanned, so I "slide" the pivot chunk
1061                      and any less-than values left one to make room for the
1062                      greater value over on the right. If the out of position
1063                      value is immediately adjacent to the pivot chunk (there
1064                      are no less-than values), I can do that with a swap,
1065                      otherwise, I have to rotate one of the less than values
1066                      into the former position of the out of position value
1067                      and the right end of the pivot chunk into the left end
1068                      (got all that?).
1069                   */
1070                   --pc_left;
1071                   if (pc_left == u_right) {
1072                      qsort_swap(u_right, pc_right);
1073                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1074                   } else {
1075                      qsort_rotate(u_right, pc_left, pc_right);
1076                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1077                   }
1078                   --pc_right;
1079                   --u_right;
1080                }
1081             } else if (still_work_on_right) {
1082                /* Mirror image of complex case above: I have an out of
1083                   position value on the right, but the left is fully
1084                   scanned, so I need to shuffle things around to make room
1085                   for the right value on the left.
1086                */
1087                ++pc_right;
1088                if (pc_right == u_left) {
1089                   qsort_swap(u_left, pc_left);
1090                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1091                } else {
1092                   qsort_rotate(pc_right, pc_left, u_left);
1093                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1094                }
1095                ++pc_left;
1096                ++u_left;
1097             } else {
1098                /* No more scanning required on either side of partition,
1099                   break out of loop and figure out next set of partitions
1100                */
1101                break;
1102             }
1103          }
1104
1105          /* The elements in the pivot chunk are now in the right place. They
1106             will never move or be compared again. All I have to do is decide
1107             what to do with the stuff to the left and right of the pivot
1108             chunk.
1109
1110             Notes on the QSORT_ORDER_GUESS ifdef code:
1111
1112             1. If I just built these partitions without swapping any (or
1113                very many) elements, there is a chance that the elements are
1114                already ordered properly (being properly ordered will
1115                certainly result in no swapping, but the converse can't be
1116                proved :-).
1117
1118             2. A (properly written) insertion sort will run faster on
1119                already ordered data than qsort will.
1120
1121             3. Perhaps there is some way to make a good guess about
1122                switching to an insertion sort earlier than partition size 6
1123                (for instance - we could save the partition size on the stack
1124                and increase the size each time we find we didn't swap, thus
1125                switching to insertion sort earlier for partitions with a
1126                history of not swapping).
1127
1128             4. Naturally, if I just switch right away, it will make
1129                artificial benchmarks with pure ascending (or descending)
1130                data look really good, but is that a good reason in general?
1131                Hard to say...
1132          */
1133
1134 #ifdef QSORT_ORDER_GUESS
1135          if (swapped < 3) {
1136 #if QSORT_ORDER_GUESS == 1
1137             qsort_break_even = (part_right - part_left) + 1;
1138 #endif
1139 #if QSORT_ORDER_GUESS == 2
1140             qsort_break_even *= 2;
1141 #endif
1142 #if QSORT_ORDER_GUESS == 3
1143             const int prev_break = qsort_break_even;
1144             qsort_break_even *= qsort_break_even;
1145             if (qsort_break_even < prev_break) {
1146                qsort_break_even = (part_right - part_left) + 1;
1147             }
1148 #endif
1149          } else {
1150             qsort_break_even = QSORT_BREAK_EVEN;
1151          }
1152 #endif
1153
1154          if (part_left < pc_left) {
1155             /* There are elements on the left which need more processing.
1156                Check the right as well before deciding what to do.
1157             */
1158             if (pc_right < part_right) {
1159                /* We have two partitions to be sorted. Stack the biggest one
1160                   and process the smallest one on the next iteration. This
1161                   minimizes the stack height by insuring that any additional
1162                   stack entries must come from the smallest partition which
1163                   (because it is smallest) will have the fewest
1164                   opportunities to generate additional stack entries.
1165                */
1166                if ((part_right - pc_right) > (pc_left - part_left)) {
1167                   /* stack the right partition, process the left */
1168                   partition_stack[next_stack_entry].left = pc_right + 1;
1169                   partition_stack[next_stack_entry].right = part_right;
1170 #ifdef QSORT_ORDER_GUESS
1171                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1172 #endif
1173                   part_right = pc_left - 1;
1174                } else {
1175                   /* stack the left partition, process the right */
1176                   partition_stack[next_stack_entry].left = part_left;
1177                   partition_stack[next_stack_entry].right = pc_left - 1;
1178 #ifdef QSORT_ORDER_GUESS
1179                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1180 #endif
1181                   part_left = pc_right + 1;
1182                }
1183                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1184                ++next_stack_entry;
1185             } else {
1186                /* The elements on the left are the only remaining elements
1187                   that need sorting, arrange for them to be processed as the
1188                   next partition.
1189                */
1190                part_right = pc_left - 1;
1191             }
1192          } else if (pc_right < part_right) {
1193             /* There is only one chunk on the right to be sorted, make it
1194                the new partition and loop back around.
1195             */
1196             part_left = pc_right + 1;
1197          } else {
1198             /* This whole partition wound up in the pivot chunk, so
1199                we need to get a new partition off the stack.
1200             */
1201             if (next_stack_entry == 0) {
1202                /* the stack is empty - we are done */
1203                break;
1204             }
1205             --next_stack_entry;
1206             part_left = partition_stack[next_stack_entry].left;
1207             part_right = partition_stack[next_stack_entry].right;
1208 #ifdef QSORT_ORDER_GUESS
1209             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1210 #endif
1211          }
1212       } else {
1213          /* This partition is too small to fool with qsort complexity, just
1214             do an ordinary insertion sort to minimize overhead.
1215          */
1216          int i;
1217          /* Assume 1st element is in right place already, and start checking
1218             at 2nd element to see where it should be inserted.
1219          */
1220          for (i = part_left + 1; i <= part_right; ++i) {
1221             int j;
1222             /* Scan (backwards - just in case 'i' is already in right place)
1223                through the elements already sorted to see if the ith element
1224                belongs ahead of one of them.
1225             */
1226             for (j = i - 1; j >= part_left; --j) {
1227                if (qsort_cmp(i, j) >= 0) {
1228                   /* i belongs right after j
1229                   */
1230                   break;
1231                }
1232             }
1233             ++j;
1234             if (j != i) {
1235                /* Looks like we really need to move some things
1236                */
1237                int k;
1238                temp = array[i];
1239                for (k = i - 1; k >= j; --k)
1240                   array[k + 1] = array[k];
1241                array[j] = temp;
1242             }
1243          }
1244
1245          /* That partition is now sorted, grab the next one, or get out
1246             of the loop if there aren't any more.
1247          */
1248
1249          if (next_stack_entry == 0) {
1250             /* the stack is empty - we are done */
1251             break;
1252          }
1253          --next_stack_entry;
1254          part_left = partition_stack[next_stack_entry].left;
1255          part_right = partition_stack[next_stack_entry].right;
1256 #ifdef QSORT_ORDER_GUESS
1257          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1258 #endif
1259       }
1260    }
1261
1262    /* Believe it or not, the array is sorted at this point! */
1263 }
1264
1265 /* Stabilize what is, presumably, an otherwise unstable sort method.
1266  * We do that by allocating (or having on hand) an array of pointers
1267  * that is the same size as the original array of elements to be sorted.
1268  * We initialize this parallel array with the addresses of the original
1269  * array elements.  This indirection can make you crazy.
1270  * Some pictures can help.  After initializing, we have
1271  *
1272  *  indir                  list1
1273  * +----+                 +----+
1274  * |    | --------------> |    | ------> first element to be sorted
1275  * +----+                 +----+
1276  * |    | --------------> |    | ------> second element to be sorted
1277  * +----+                 +----+
1278  * |    | --------------> |    | ------> third element to be sorted
1279  * +----+                 +----+
1280  *  ...
1281  * +----+                 +----+
1282  * |    | --------------> |    | ------> n-1st element to be sorted
1283  * +----+                 +----+
1284  * |    | --------------> |    | ------> n-th element to be sorted
1285  * +----+                 +----+
1286  *
1287  * During the sort phase, we leave the elements of list1 where they are,
1288  * and sort the pointers in the indirect array in the same order determined
1289  * by the original comparison routine on the elements pointed to.
1290  * Because we don't move the elements of list1 around through
1291  * this phase, we can break ties on elements that compare equal
1292  * using their address in the list1 array, ensuring stabilty.
1293  * This leaves us with something looking like
1294  *
1295  *  indir                  list1
1296  * +----+                 +----+
1297  * |    | --+       +---> |    | ------> first element to be sorted
1298  * +----+   |       |     +----+
1299  * |    | --|-------|---> |    | ------> second element to be sorted
1300  * +----+   |       |     +----+
1301  * |    | --|-------+ +-> |    | ------> third element to be sorted
1302  * +----+   |         |   +----+
1303  *  ...
1304  * +----+    | |   | |    +----+
1305  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1306  * +----+    |     |      +----+
1307  * |    | ---+     +----> |    | ------> n-th element to be sorted
1308  * +----+                 +----+
1309  *
1310  * where the i-th element of the indirect array points to the element
1311  * that should be i-th in the sorted array.  After the sort phase,
1312  * we have to put the elements of list1 into the places
1313  * dictated by the indirect array.
1314  */
1315
1316
1317 static I32
1318 cmpindir(pTHX_ gptr a, gptr b)
1319 {
1320     dVAR;
1321     gptr * const ap = (gptr *)a;
1322     gptr * const bp = (gptr *)b;
1323     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1324
1325     if (sense)
1326         return sense;
1327     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1328 }
1329
1330 static I32
1331 cmpindir_desc(pTHX_ gptr a, gptr b)
1332 {
1333     dVAR;
1334     gptr * const ap = (gptr *)a;
1335     gptr * const bp = (gptr *)b;
1336     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1337
1338     /* Reverse the default */
1339     if (sense)
1340         return -sense;
1341     /* But don't reverse the stability test.  */
1342     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1343
1344 }
1345
1346 STATIC void
1347 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1348 {
1349     dVAR;
1350     if ((flags & SORTf_STABLE) != 0) {
1351          register gptr **pp, *q;
1352          register size_t n, j, i;
1353          gptr *small[SMALLSORT], **indir, tmp;
1354          SVCOMPARE_t savecmp;
1355          if (nmemb <= 1) return;     /* sorted trivially */
1356
1357          /* Small arrays can use the stack, big ones must be allocated */
1358          if (nmemb <= SMALLSORT) indir = small;
1359          else { Newx(indir, nmemb, gptr *); }
1360
1361          /* Copy pointers to original array elements into indirect array */
1362          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1363
1364          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1365          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1366
1367          /* sort, with indirection */
1368          S_qsortsvu(aTHX_ (gptr *)indir, nmemb,
1369                     ((flags & SORTf_DESC) != 0 ? cmpindir_desc : cmpindir));
1370
1371          pp = indir;
1372          q = list1;
1373          for (n = nmemb; n--; ) {
1374               /* Assert A: all elements of q with index > n are already
1375                * in place.  This is vacuosly true at the start, and we
1376                * put element n where it belongs below (if it wasn't
1377                * already where it belonged). Assert B: we only move
1378                * elements that aren't where they belong,
1379                * so, by A, we never tamper with elements above n.
1380                */
1381               j = pp[n] - q;            /* This sets j so that q[j] is
1382                                          * at pp[n].  *pp[j] belongs in
1383                                          * q[j], by construction.
1384                                          */
1385               if (n != j) {             /* all's well if n == j */
1386                    tmp = q[j];          /* save what's in q[j] */
1387                    do {
1388                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1389                         i = pp[j] - q;  /* the index in q of the element
1390                                          * just moved */
1391                         pp[j] = q + j;  /* this is ok now */
1392                    } while ((j = i) != n);
1393                    /* There are only finitely many (nmemb) addresses
1394                     * in the pp array.
1395                     * So we must eventually revisit an index we saw before.
1396                     * Suppose the first revisited index is k != n.
1397                     * An index is visited because something else belongs there.
1398                     * If we visit k twice, then two different elements must
1399                     * belong in the same place, which cannot be.
1400                     * So j must get back to n, the loop terminates,
1401                     * and we put the saved element where it belongs.
1402                     */
1403                    q[n] = tmp;          /* put what belongs into
1404                                          * the n-th element */
1405               }
1406          }
1407
1408         /* free iff allocated */
1409          if (indir != small) { Safefree(indir); }
1410          /* restore prevailing comparison routine */
1411          PL_sort_RealCmp = savecmp;
1412     } else if ((flags & SORTf_DESC) != 0) {
1413          const SVCOMPARE_t savecmp = PL_sort_RealCmp;   /* Save current comparison routine, if any */
1414          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1415          cmp = cmp_desc;
1416          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1417          /* restore prevailing comparison routine */
1418          PL_sort_RealCmp = savecmp;
1419     } else {
1420          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1421     }
1422 }
1423
1424 /*
1425 =head1 Array Manipulation Functions
1426
1427 =for apidoc sortsv
1428
1429 Sort an array. Here is an example:
1430
1431     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1432
1433 Currently this always uses mergesort. See sortsv_flags for a more
1434 flexible routine.
1435
1436 =cut
1437 */
1438
1439 void
1440 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1441 {
1442     sortsv_flags(array, nmemb, cmp, 0);
1443 }
1444
1445 /*
1446 =for apidoc sortsv_flags
1447
1448 Sort an array, with various options.
1449
1450 =cut
1451 */
1452 void
1453 Perl_sortsv_flags(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1454 {
1455     if (flags & SORTf_QSORT)
1456         S_qsortsv(aTHX_ array, nmemb, cmp, flags);
1457     else
1458         S_mergesortsv(aTHX_ array, nmemb, cmp, flags);
1459 }
1460
1461 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1462 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1463 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1464
1465 PP(pp_sort)
1466 {
1467     dVAR; dSP; dMARK; dORIGMARK;
1468     register SV **p1 = ORIGMARK+1, **p2;
1469     register I32 max, i;
1470     AV* av = NULL;
1471     HV *stash;
1472     GV *gv;
1473     CV *cv = NULL;
1474     I32 gimme = GIMME;
1475     OP* const nextop = PL_op->op_next;
1476     I32 overloading = 0;
1477     bool hasargs = FALSE;
1478     I32 is_xsub = 0;
1479     I32 sorting_av = 0;
1480     const U8 priv = PL_op->op_private;
1481     const U8 flags = PL_op->op_flags;
1482     U32 sort_flags = 0;
1483     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1484       = Perl_sortsv_flags;
1485     I32 all_SIVs = 1;
1486
1487     if ((priv & OPpSORT_DESCEND) != 0)
1488         sort_flags |= SORTf_DESC;
1489     if ((priv & OPpSORT_QSORT) != 0)
1490         sort_flags |= SORTf_QSORT;
1491     if ((priv & OPpSORT_STABLE) != 0)
1492         sort_flags |= SORTf_STABLE;
1493
1494     if (gimme != G_ARRAY) {
1495         SP = MARK;
1496         EXTEND(SP,1);
1497         RETPUSHUNDEF;
1498     }
1499
1500     ENTER;
1501     SAVEVPTR(PL_sortcop);
1502     if (flags & OPf_STACKED) {
1503         if (flags & OPf_SPECIAL) {
1504             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1505             kid = kUNOP->op_first;                      /* pass rv2gv */
1506             kid = kUNOP->op_first;                      /* pass leave */
1507             PL_sortcop = kid->op_next;
1508             stash = CopSTASH(PL_curcop);
1509         }
1510         else {
1511             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1512             if (cv && SvPOK(cv)) {
1513                 const char * const proto = SvPV_nolen_const((SV*)cv);
1514                 if (proto && strEQ(proto, "$$")) {
1515                     hasargs = TRUE;
1516                 }
1517             }
1518             if (!(cv && CvROOT(cv))) {
1519                 if (cv && CvISXSUB(cv)) {
1520                     is_xsub = 1;
1521                 }
1522                 else if (gv) {
1523                     SV *tmpstr = sv_newmortal();
1524                     gv_efullname3(tmpstr, gv, NULL);
1525                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1526                         tmpstr);
1527                 }
1528                 else {
1529                     DIE(aTHX_ "Undefined subroutine in sort");
1530                 }
1531             }
1532
1533             if (is_xsub)
1534                 PL_sortcop = (OP*)cv;
1535             else
1536                 PL_sortcop = CvSTART(cv);
1537         }
1538     }
1539     else {
1540         PL_sortcop = NULL;
1541         stash = CopSTASH(PL_curcop);
1542     }
1543
1544     /* optimiser converts "@a = sort @a" to "sort \@a";
1545      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1546      * result back to @a at the end of this function */
1547     if (priv & OPpSORT_INPLACE) {
1548         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1549         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1550         av = (AV*)(*SP);
1551         max = AvFILL(av) + 1;
1552         if (SvMAGICAL(av)) {
1553             MEXTEND(SP, max);
1554             p2 = SP;
1555             for (i=0; i < max; i++) {
1556                 SV **svp = av_fetch(av, i, FALSE);
1557                 *SP++ = (svp) ? *svp : NULL;
1558             }
1559         }
1560         else {
1561             if (SvREADONLY(av))
1562                 Perl_croak(aTHX_ PL_no_modify);
1563             else
1564                 SvREADONLY_on(av);
1565             p1 = p2 = AvARRAY(av);
1566             sorting_av = 1;
1567         }
1568     }
1569     else {
1570         p2 = MARK+1;
1571         max = SP - MARK;
1572    }
1573
1574     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1575      * any nulls; also stringify or converting to integer or number as
1576      * required any args */
1577     for (i=max; i > 0 ; i--) {
1578         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1579             SvTEMP_off(*p1);
1580             if (!PL_sortcop) {
1581                 if (priv & OPpSORT_NUMERIC) {
1582                     if (priv & OPpSORT_INTEGER) {
1583                         if (!SvIOK(*p1)) {
1584                             if (SvAMAGIC(*p1))
1585                                 overloading = 1;
1586                             else
1587                                 (void)sv_2iv(*p1);
1588                         }
1589                     }
1590                     else {
1591                         if (!SvNSIOK(*p1)) {
1592                             if (SvAMAGIC(*p1))
1593                                 overloading = 1;
1594                             else
1595                                 (void)sv_2nv(*p1);
1596                         }
1597                         if (all_SIVs && !SvSIOK(*p1))
1598                             all_SIVs = 0;
1599                     }
1600                 }
1601                 else {
1602                     if (!SvPOK(*p1)) {
1603                         if (SvAMAGIC(*p1))
1604                             overloading = 1;
1605                         else
1606                             (void)sv_2pv_flags(*p1, 0,
1607                                                SV_GMAGIC|SV_CONST_RETURN);
1608                     }
1609                 }
1610             }
1611             p1++;
1612         }
1613         else
1614             max--;
1615     }
1616     if (sorting_av)
1617         AvFILLp(av) = max-1;
1618
1619     if (max > 1) {
1620         SV **start;
1621         if (PL_sortcop) {
1622             PERL_CONTEXT *cx;
1623             SV** newsp;
1624             const bool oldcatch = CATCH_GET;
1625
1626             SAVETMPS;
1627             SAVEOP();
1628
1629             CATCH_SET(TRUE);
1630             PUSHSTACKi(PERLSI_SORT);
1631             if (!hasargs && !is_xsub) {
1632                 SAVESPTR(PL_firstgv);
1633                 SAVESPTR(PL_secondgv);
1634                 SAVESPTR(PL_sortstash);
1635                 PL_firstgv = gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV);
1636                 PL_secondgv = gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV);
1637                 PL_sortstash = stash;
1638                 SAVESPTR(GvSV(PL_firstgv));
1639                 SAVESPTR(GvSV(PL_secondgv));
1640             }
1641
1642             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1643             if (!(flags & OPf_SPECIAL)) {
1644                 cx->cx_type = CXt_SUB;
1645                 cx->blk_gimme = G_SCALAR;
1646                 PUSHSUB(cx);
1647                 if (!is_xsub) {
1648                     AV* const padlist = CvPADLIST(cv);
1649
1650                     if (++CvDEPTH(cv) >= 2) {
1651                         PERL_STACK_OVERFLOW_CHECK();
1652                         pad_push(padlist, CvDEPTH(cv));
1653                     }
1654                     SAVECOMPPAD();
1655                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1656
1657                     if (hasargs) {
1658                         /* This is mostly copied from pp_entersub */
1659                         AV * const av = (AV*)PAD_SVl(0);
1660
1661                         cx->blk_sub.savearray = GvAV(PL_defgv);
1662                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc_simple(av);
1663                         CX_CURPAD_SAVE(cx->blk_sub);
1664                         cx->blk_sub.argarray = av;
1665                     }
1666
1667                 }
1668             }
1669             cx->cx_type |= CXp_MULTICALL;
1670
1671             start = p1 - max;
1672             sortsvp(aTHX_ start, max,
1673                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1674                     sort_flags);
1675
1676             if (!(flags & OPf_SPECIAL)) {
1677                 LEAVESUB(cv);
1678                 if (!is_xsub)
1679                     CvDEPTH(cv)--;
1680             }
1681             POPBLOCK(cx,PL_curpm);
1682             PL_stack_sp = newsp;
1683             POPSTACK;
1684             CATCH_SET(oldcatch);
1685         }
1686         else {
1687             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1688             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1689             sortsvp(aTHX_ start, max,
1690                     (priv & OPpSORT_NUMERIC)
1691                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1692                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1693                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1694                         : ( IN_LOCALE_RUNTIME
1695                             ? ( overloading
1696                                 ? S_amagic_cmp_locale
1697                                 : sv_cmp_locale_static)
1698                             : ( overloading ? S_amagic_cmp : sv_cmp_static)),
1699                     sort_flags);
1700         }
1701         if ((priv & OPpSORT_REVERSE) != 0) {
1702             SV **q = start+max-1;
1703             while (start < q) {
1704                 SV * const tmp = *start;
1705                 *start++ = *q;
1706                 *q-- = tmp;
1707             }
1708         }
1709     }
1710     if (sorting_av)
1711         SvREADONLY_off(av);
1712     else if (av && !sorting_av) {
1713         /* simulate pp_aassign of tied AV */
1714         SV** const base = ORIGMARK+1;
1715         for (i=0; i < max; i++) {
1716             base[i] = newSVsv(base[i]);
1717         }
1718         av_clear(av);
1719         av_extend(av, max);
1720         for (i=0; i < max; i++) {
1721             SV * const sv = base[i];
1722             SV ** const didstore = av_store(av, i, sv);
1723             if (SvSMAGICAL(sv))
1724                 mg_set(sv);
1725             if (!didstore)
1726                 sv_2mortal(sv);
1727         }
1728     }
1729     LEAVE;
1730     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1731     return nextop;
1732 }
1733
1734 static I32
1735 S_sortcv(pTHX_ SV *a, SV *b)
1736 {
1737     dVAR;
1738     const I32 oldsaveix = PL_savestack_ix;
1739     const I32 oldscopeix = PL_scopestack_ix;
1740     I32 result;
1741     GvSV(PL_firstgv) = a;
1742     GvSV(PL_secondgv) = b;
1743     PL_stack_sp = PL_stack_base;
1744     PL_op = PL_sortcop;
1745     CALLRUNOPS(aTHX);
1746     if (PL_stack_sp != PL_stack_base + 1)
1747         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1748     if (!SvNIOKp(*PL_stack_sp))
1749         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1750     result = SvIV(*PL_stack_sp);
1751     while (PL_scopestack_ix > oldscopeix) {
1752         LEAVE;
1753     }
1754     leave_scope(oldsaveix);
1755     return result;
1756 }
1757
1758 static I32
1759 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1760 {
1761     dVAR;
1762     const I32 oldsaveix = PL_savestack_ix;
1763     const I32 oldscopeix = PL_scopestack_ix;
1764     I32 result;
1765     AV * const av = GvAV(PL_defgv);
1766
1767     if (AvMAX(av) < 1) {
1768         SV** ary = AvALLOC(av);
1769         if (AvARRAY(av) != ary) {
1770             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1771             SvPV_set(av, (char*)ary);
1772         }
1773         if (AvMAX(av) < 1) {
1774             AvMAX(av) = 1;
1775             Renew(ary,2,SV*);
1776             SvPV_set(av, (char*)ary);
1777         }
1778     }
1779     AvFILLp(av) = 1;
1780
1781     AvARRAY(av)[0] = a;
1782     AvARRAY(av)[1] = b;
1783     PL_stack_sp = PL_stack_base;
1784     PL_op = PL_sortcop;
1785     CALLRUNOPS(aTHX);
1786     if (PL_stack_sp != PL_stack_base + 1)
1787         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1788     if (!SvNIOKp(*PL_stack_sp))
1789         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1790     result = SvIV(*PL_stack_sp);
1791     while (PL_scopestack_ix > oldscopeix) {
1792         LEAVE;
1793     }
1794     leave_scope(oldsaveix);
1795     return result;
1796 }
1797
1798 static I32
1799 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1800 {
1801     dVAR; dSP;
1802     const I32 oldsaveix = PL_savestack_ix;
1803     const I32 oldscopeix = PL_scopestack_ix;
1804     CV * const cv=(CV*)PL_sortcop;
1805     I32 result;
1806
1807     SP = PL_stack_base;
1808     PUSHMARK(SP);
1809     EXTEND(SP, 2);
1810     *++SP = a;
1811     *++SP = b;
1812     PUTBACK;
1813     (void)(*CvXSUB(cv))(aTHX_ cv);
1814     if (PL_stack_sp != PL_stack_base + 1)
1815         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1816     if (!SvNIOKp(*PL_stack_sp))
1817         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1818     result = SvIV(*PL_stack_sp);
1819     while (PL_scopestack_ix > oldscopeix) {
1820         LEAVE;
1821     }
1822     leave_scope(oldsaveix);
1823     return result;
1824 }
1825
1826
1827 static I32
1828 S_sv_ncmp(pTHX_ SV *a, SV *b)
1829 {
1830     const NV nv1 = SvNSIV(a);
1831     const NV nv2 = SvNSIV(b);
1832     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1833 }
1834
1835 static I32
1836 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1837 {
1838     const IV iv1 = SvIV(a);
1839     const IV iv2 = SvIV(b);
1840     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1841 }
1842
1843 #define tryCALL_AMAGICbin(left,right,meth) \
1844     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1845         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1846         : NULL;
1847
1848 static I32
1849 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1850 {
1851     dVAR;
1852     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1853     if (tmpsv) {
1854         if (SvIOK(tmpsv)) {
1855             const I32 i = SvIVX(tmpsv);
1856             if (i > 0)
1857                return 1;
1858             return i? -1 : 0;
1859         }
1860         else {
1861             const NV d = SvNV(tmpsv);
1862             if (d > 0)
1863                return 1;
1864             return d ? -1 : 0;
1865         }
1866      }
1867      return S_sv_ncmp(aTHX_ a, b);
1868 }
1869
1870 static I32
1871 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1872 {
1873     dVAR;
1874     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1875     if (tmpsv) {
1876         if (SvIOK(tmpsv)) {
1877             const I32 i = SvIVX(tmpsv);
1878             if (i > 0)
1879                return 1;
1880             return i? -1 : 0;
1881         }
1882         else {
1883             const NV d = SvNV(tmpsv);
1884             if (d > 0)
1885                return 1;
1886             return d ? -1 : 0;
1887         }
1888     }
1889     return S_sv_i_ncmp(aTHX_ a, b);
1890 }
1891
1892 static I32
1893 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1894 {
1895     dVAR;
1896     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1897     if (tmpsv) {
1898         if (SvIOK(tmpsv)) {
1899             const I32 i = SvIVX(tmpsv);
1900             if (i > 0)
1901                return 1;
1902             return i? -1 : 0;
1903         }
1904         else {
1905             const NV d = SvNV(tmpsv);
1906             if (d > 0)
1907                return 1;
1908             return d? -1 : 0;
1909         }
1910     }
1911     return sv_cmp(str1, str2);
1912 }
1913
1914 static I32
1915 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1916 {
1917     dVAR;
1918     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1919     if (tmpsv) {
1920         if (SvIOK(tmpsv)) {
1921             const I32 i = SvIVX(tmpsv);
1922             if (i > 0)
1923                return 1;
1924             return i? -1 : 0;
1925         }
1926         else {
1927             const NV d = SvNV(tmpsv);
1928             if (d > 0)
1929                return 1;
1930             return d? -1 : 0;
1931         }
1932     }
1933     return sv_cmp_locale(str1, str2);
1934 }
1935
1936 /*
1937  * Local variables:
1938  * c-indentation-style: bsd
1939  * c-basic-offset: 4
1940  * indent-tabs-mode: t
1941  * End:
1942  *
1943  * ex: set ts=8 sts=4 sw=4 noet:
1944  */