pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #ifndef SMALLSORT
  40 #define SMALLSORT (200)
  41 #endif
  42
  43 /* Flags for qsortsv and mergesortsv */
  44 #define SORTf_DESC   1
  45 #define SORTf_STABLE 2
  46 #define SORTf_QSORT  4
  47
  48 /*
  49  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  50  *
  51  * The original code was written in conjunction with BSD Computer Software
  52  * Research Group at University of California, Berkeley.
  53  *
  54  * See also: "Optimistic Merge Sort" (SODA '92)
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxilliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     register gptr *b, *p, *q, *t, *p2;
 187     register gptr c, *last, *r;
 188     gptr *savep;
 189     IV runs = 0;
 190
 191     b = list1;
 192     last = PINDEX(b, nmemb);
 193     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 194     for (p2 = list2; b < last; ) {
 195         /* We just started, or just reversed sense.
 196         ** Set t at end of pairs with the prevailing sense.
 197         */
 198         for (p = b+2, t = p; ++p < last; t = ++p) {
 199             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 200         }
 201         q = b;
 202         /* Having laid out the playing field, look for long runs */
 203         do {
 204             p = r = b + (2 * PTHRESH);
 205             if (r >= t) p = r = t;      /* too short to care about */
 206             else {
 207                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 208                        ((p -= 2) > q));
 209                 if (p <= q) {
 210                     /* b through r is a (long) run.
 211                     ** Extend it as far as possible.
 212                     */
 213                     p = q = r;
 214                     while (((p += 2) < t) &&
 215                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 216                     r = p = q + 2;      /* no simple pairs, no after-run */
 217                 }
 218             }
 219             if (q > b) {                /* run of greater than 2 at b */
 220                 savep = p;
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense) while (b < --p) {
 229                     c = *b;
 230                     *b++ = *p;
 231                     *p = c;
 232                 }
 233                 p = savep;
 234             }
 235             while (q < p) {             /* simple pairs */
 236                 p2 = NEXT(p2) = p2 + 2; ++runs;
 237                 if (sense) {
 238                     c = *q++;
 239                     *(q-1) = *q;
 240                     *q++ = c;
 241                 } else q += 2;
 242             }
 243             if (((b = p) == t) && ((t+1) == last)) {
 244                 NEXT(p2) = p2 + 1; ++runs;
 245                 b++;
 246             }
 247             q = r;
 248         } while (b < t);
 249         sense = !sense;
 250     }
 251     return runs;
 252 }
 253
 254
 255 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 256  * qsort on many platforms, but slower than qsort, conspicuously so,
 257  * on others.  The most likely explanation was platform-specific
 258  * differences in cache sizes and relative speeds.
 259  *
 260  * The quicksort divide-and-conquer algorithm guarantees that, as the
 261  * problem is subdivided into smaller and smaller parts, the parts
 262  * fit into smaller (and faster) caches.  So it doesn't matter how
 263  * many levels of cache exist, quicksort will "find" them, and,
 264  * as long as smaller is faster, take advantage of them.
 265  *
 266  * By contrast, consider how the original mergesort algorithm worked.
 267  * Suppose we have five runs (each typically of length 2 after dynprep).
 268  *
 269  * pass               base                        aux
 270  *  0              1 2 3 4 5
 271  *  1                                           12 34 5
 272  *  2                1234 5
 273  *  3                                            12345
 274  *  4                 12345
 275  *
 276  * Adjacent pairs are merged in "grand sweeps" through the input.
 277  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 278  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 279  * The only cache that matters is one large enough to hold *all* the input.
 280  * On some platforms, this may be many times slower than smaller caches.
 281  *
 282  * The following pseudo-code uses the same basic merge algorithm,
 283  * but in a divide-and-conquer way.
 284  *
 285  * # merge $runs runs at offset $offset of list $list1 into $list2.
 286  * # all unmerged runs ($runs == 1) originate in list $base.
 287  * sub mgsort2 {
 288  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 289  *
 290  *     if ($runs == 1) {
 291  *         if ($list1 is $base) copy run to $list2
 292  *         return offset of end of list (or copy)
 293  *     } else {
 294  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 295  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 296  *         merge the adjacent runs at $offset of $list1 into $list2
 297  *         return the offset of the end of the merged runs
 298  *     }
 299  * }
 300  * mgsort2(0, $runs, $base, $aux, $base);
 301  *
 302  * For our 5 runs, the tree of calls looks like
 303  *
 304  *           5
 305  *      3        2
 306  *   2     1   1   1
 307  * 1   1
 308  *
 309  * 1   2   3   4   5
 310  *
 311  * and the corresponding activity looks like
 312  *
 313  * copy runs 1 and 2 from base to aux
 314  * merge runs 1 and 2 from aux to base
 315  * (run 3 is where it belongs, no copy needed)
 316  * merge runs 12 and 3 from base to aux
 317  * (runs 4 and 5 are where they belong, no copy needed)
 318  * merge runs 4 and 5 from base to aux
 319  * merge runs 123 and 45 from aux to base
 320  *
 321  * Note that we merge runs 1 and 2 immediately after copying them,
 322  * while they are still likely to be in fast cache.  Similarly,
 323  * run 3 is merged with run 12 while it still may be lingering in cache.
 324  * This implementation should therefore enjoy much of the cache-friendly
 325  * behavior that quicksort does.  In addition, it does less copying
 326  * than the original mergesort implementation (only runs 1 and 2 are copied)
 327  * and the "balancing" of merges is better (merged runs comprise more nearly
 328  * equal numbers of original runs).
 329  *
 330  * The actual cache-friendly implementation will use a pseudo-stack
 331  * to avoid recursion, and will unroll processing of runs of length 2,
 332  * but it is otherwise similar to the recursive implementation.
 333  */
 334
 335 typedef struct {
 336     IV  offset;         /* offset of 1st of 2 runs at this level */
 337     IV  runs;           /* how many runs must be combined into 1 */
 338 } off_runs;             /* pseudo-stack element */
 339
 340
 341 static I32
 342 cmp_desc(pTHX_ gptr a, gptr b)
 343 {
 344     return -PL_sort_RealCmp(aTHX_ a, b);
 345 }
 346
 347 STATIC void
 348 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 349 {
 350     IV i, run, offset;
 351     I32 sense, level;
 352     register gptr *f1, *f2, *t, *b, *p;
 353     int iwhich;
 354     gptr *aux;
 355     gptr *p1;
 356     gptr small[SMALLSORT];
 357     gptr *which[3];
 358     off_runs stack[60], *stackp;
 359     SVCOMPARE_t savecmp = 0;
 360
 361     if (nmemb <= 1) return;                     /* sorted trivially */
 362
 363     if (flags) {
 364         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 365         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 366         cmp = cmp_desc;
 367     }
 368
 369     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 370     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 371     level = 0;
 372     stackp = stack;
 373     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 374     stackp->offset = offset = 0;
 375     which[0] = which[2] = base;
 376     which[1] = aux;
 377     for (;;) {
 378         /* On levels where both runs have be constructed (stackp->runs == 0),
 379          * merge them, and note the offset of their end, in case the offset
 380          * is needed at the next level up.  Hop up a level, and,
 381          * as long as stackp->runs is 0, keep merging.
 382          */
 383         IV runs = stackp->runs;
 384         if (runs == 0) {
 385             gptr *list1, *list2;
 386             iwhich = level & 1;
 387             list1 = which[iwhich];              /* area where runs are now */
 388             list2 = which[++iwhich];            /* area for merged runs */
 389             do {
 390                 register gptr *l1, *l2, *tp2;
 391                 offset = stackp->offset;
 392                 f1 = p1 = list1 + offset;               /* start of first run */
 393                 p = tp2 = list2 + offset;       /* where merged run will go */
 394                 t = NEXT(p);                    /* where first run ends */
 395                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 396                 t = NEXT(t);                    /* where second runs ends */
 397                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 398                 offset = PNELEM(list2, t);
 399                 while (f1 < l1 && f2 < l2) {
 400                     /* If head 1 is larger than head 2, find ALL the elements
 401                     ** in list 2 strictly less than head1, write them all,
 402                     ** then head 1.  Then compare the new heads, and repeat,
 403                     ** until one or both lists are exhausted.
 404                     **
 405                     ** In all comparisons (after establishing
 406                     ** which head to merge) the item to merge
 407                     ** (at pointer q) is the first operand of
 408                     ** the comparison.  When we want to know
 409                     ** if "q is strictly less than the other",
 410                     ** we can't just do
 411                     **    cmp(q, other) < 0
 412                     ** because stability demands that we treat equality
 413                     ** as high when q comes from l2, and as low when
 414                     ** q was from l1.  So we ask the question by doing
 415                     **    cmp(q, other) <= sense
 416                     ** and make sense == 0 when equality should look low,
 417                     ** and -1 when equality should look high.
 418                     */
 419
 420                     register gptr *q;
 421                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 422                         q = f2; b = f1; t = l1;
 423                         sense = -1;
 424                     } else {
 425                         q = f1; b = f2; t = l2;
 426                         sense = 0;
 427                     }
 428
 429
 430                     /* ramp up
 431                     **
 432                     ** Leave t at something strictly
 433                     ** greater than q (or at the end of the list),
 434                     ** and b at something strictly less than q.
 435                     */
 436                     for (i = 1, run = 0 ;;) {
 437                         if ((p = PINDEX(b, i)) >= t) {
 438                             /* off the end */
 439                             if (((p = PINDEX(t, -1)) > b) &&
 440                                 (cmp(aTHX_ *q, *p) <= sense))
 441                                  t = p;
 442                             else b = p;
 443                             break;
 444                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 445                             t = p;
 446                             break;
 447                         } else b = p;
 448                         if (++run >= RTHRESH) i += i;
 449                     }
 450
 451
 452                     /* q is known to follow b and must be inserted before t.
 453                     ** Increment b, so the range of possibilities is [b,t).
 454                     ** Round binary split down, to favor early appearance.
 455                     ** Adjust b and t until q belongs just before t.
 456                     */
 457
 458                     b++;
 459                     while (b < t) {
 460                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 461                         if (cmp(aTHX_ *q, *p) <= sense) {
 462                             t = p;
 463                         } else b = p + 1;
 464                     }
 465
 466
 467                     /* Copy all the strictly low elements */
 468
 469                     if (q == f1) {
 470                         FROMTOUPTO(f2, tp2, t);
 471                         *tp2++ = *f1++;
 472                     } else {
 473                         FROMTOUPTO(f1, tp2, t);
 474                         *tp2++ = *f2++;
 475                     }
 476                 }
 477
 478
 479                 /* Run out remaining list */
 480                 if (f1 == l1) {
 481                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 482                 } else              FROMTOUPTO(f1, tp2, l1);
 483                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 484
 485                 if (--level == 0) goto done;
 486                 --stackp;
 487                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 488             } while ((runs = stackp->runs) == 0);
 489         }
 490
 491
 492         stackp->runs = 0;               /* current run will finish level */
 493         /* While there are more than 2 runs remaining,
 494          * turn them into exactly 2 runs (at the "other" level),
 495          * each made up of approximately half the runs.
 496          * Stack the second half for later processing,
 497          * and set about producing the first half now.
 498          */
 499         while (runs > 2) {
 500             ++level;
 501             ++stackp;
 502             stackp->offset = offset;
 503             runs -= stackp->runs = runs / 2;
 504         }
 505         /* We must construct a single run from 1 or 2 runs.
 506          * All the original runs are in which[0] == base.
 507          * The run we construct must end up in which[level&1].
 508          */
 509         iwhich = level & 1;
 510         if (runs == 1) {
 511             /* Constructing a single run from a single run.
 512              * If it's where it belongs already, there's nothing to do.
 513              * Otherwise, copy it to where it belongs.
 514              * A run of 1 is either a singleton at level 0,
 515              * or the second half of a split 3.  In neither event
 516              * is it necessary to set offset.  It will be set by the merge
 517              * that immediately follows.
 518              */
 519             if (iwhich) {       /* Belongs in aux, currently in base */
 520                 f1 = b = PINDEX(base, offset);  /* where list starts */
 521                 f2 = PINDEX(aux, offset);       /* where list goes */
 522                 t = NEXT(f2);                   /* where list will end */
 523                 offset = PNELEM(aux, t);        /* offset thereof */
 524                 t = PINDEX(base, offset);       /* where it currently ends */
 525                 FROMTOUPTO(f1, f2, t);          /* copy */
 526                 NEXT(b) = t;                    /* set up parallel pointer */
 527             } else if (level == 0) goto done;   /* single run at level 0 */
 528         } else {
 529             /* Constructing a single run from two runs.
 530              * The merge code at the top will do that.
 531              * We need only make sure the two runs are in the "other" array,
 532              * so they'll end up in the correct array after the merge.
 533              */
 534             ++level;
 535             ++stackp;
 536             stackp->offset = offset;
 537             stackp->runs = 0;   /* take care of both runs, trigger merge */
 538             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 539                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 540                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 541                 t = NEXT(f2);                   /* where first run will end */
 542                 offset = PNELEM(aux, t);        /* offset thereof */
 543                 p = PINDEX(base, offset);       /* end of first run */
 544                 t = NEXT(t);                    /* where second run will end */
 545                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 546                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 547                 NEXT(b) = p;                    /* paralled pointer for 1st */
 548                 NEXT(p) = t;                    /* ... and for second */
 549             }
 550         }
 551     }
 552 done:
 553     if (aux != small) Safefree(aux);    /* free iff allocated */
 554     if (flags) {
 555          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 556     }
 557     return;
 558 }
 559
 560 /*
 561  * The quicksort implementation was derived from source code contributed
 562  * by Tom Horsley.
 563  *
 564  * NOTE: this code was derived from Tom Horsley's qsort replacement
 565  * and should not be confused with the original code.
 566  */
 567
 568 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 569
 570    Permission granted to distribute under the same terms as perl which are
 571    (briefly):
 572
 573     This program is free software; you can redistribute it and/or modify
 574     it under the terms of either:
 575
 576         a) the GNU General Public License as published by the Free
 577         Software Foundation; either version 1, or (at your option) any
 578         later version, or
 579
 580         b) the "Artistic License" which comes with this Kit.
 581
 582    Details on the perl license can be found in the perl source code which
 583    may be located via the www.perl.com web page.
 584
 585    This is the most wonderfulest possible qsort I can come up with (and
 586    still be mostly portable) My (limited) tests indicate it consistently
 587    does about 20% fewer calls to compare than does the qsort in the Visual
 588    C++ library, other vendors may vary.
 589
 590    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 591    others I invented myself (or more likely re-invented since they seemed
 592    pretty obvious once I watched the algorithm operate for a while).
 593
 594    Most of this code was written while watching the Marlins sweep the Giants
 595    in the 1997 National League Playoffs - no Braves fans allowed to use this
 596    code (just kidding :-).
 597
 598    I realize that if I wanted to be true to the perl tradition, the only
 599    comment in this file would be something like:
 600
 601    ...they shuffled back towards the rear of the line. 'No, not at the
 602    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 603
 604    However, I really needed to violate that tradition just so I could keep
 605    track of what happens myself, not to mention some poor fool trying to
 606    understand this years from now :-).
 607 */
 608
 609 /* ********************************************************** Configuration */
 610
 611 #ifndef QSORT_ORDER_GUESS
 612 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 613 #endif
 614
 615 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 616    future processing - a good max upper bound is log base 2 of memory size
 617    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 618    safely be smaller than that since the program is taking up some space and
 619    most operating systems only let you grab some subset of contiguous
 620    memory (not to mention that you are normally sorting data larger than
 621    1 byte element size :-).
 622 */
 623 #ifndef QSORT_MAX_STACK
 624 #define QSORT_MAX_STACK 32
 625 #endif
 626
 627 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 628    Anything bigger and we use qsort. If you make this too small, the qsort
 629    will probably break (or become less efficient), because it doesn't expect
 630    the middle element of a partition to be the same as the right or left -
 631    you have been warned).
 632 */
 633 #ifndef QSORT_BREAK_EVEN
 634 #define QSORT_BREAK_EVEN 6
 635 #endif
 636
 637 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 638    to go quadratic on.  We innoculate larger partitions against
 639    quadratic behavior by shuffling them before sorting.  This is not
 640    an absolute guarantee of non-quadratic behavior, but it would take
 641    staggeringly bad luck to pick extreme elements as the pivot
 642    from randomized data.
 643 */
 644 #ifndef QSORT_PLAY_SAFE
 645 #define QSORT_PLAY_SAFE 255
 646 #endif
 647
 648 /* ************************************************************* Data Types */
 649
 650 /* hold left and right index values of a partition waiting to be sorted (the
 651    partition includes both left and right - right is NOT one past the end or
 652    anything like that).
 653 */
 654 struct partition_stack_entry {
 655    int left;
 656    int right;
 657 #ifdef QSORT_ORDER_GUESS
 658    int qsort_break_even;
 659 #endif
 660 };
 661
 662 /* ******************************************************* Shorthand Macros */
 663
 664 /* Note that these macros will be used from inside the qsort function where
 665    we happen to know that the variable 'elt_size' contains the size of an
 666    array element and the variable 'temp' points to enough space to hold a
 667    temp element and the variable 'array' points to the array being sorted
 668    and 'compare' is the pointer to the compare routine.
 669
 670    Also note that there are very many highly architecture specific ways
 671    these might be sped up, but this is simply the most generally portable
 672    code I could think of.
 673 */
 674
 675 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 676 */
 677 #define qsort_cmp(elt1, elt2) \
 678    ((*compare)(aTHX_ array[elt1], array[elt2]))
 679
 680 #ifdef QSORT_ORDER_GUESS
 681 #define QSORT_NOTICE_SWAP swapped++;
 682 #else
 683 #define QSORT_NOTICE_SWAP
 684 #endif
 685
 686 /* swaps contents of array elements elt1, elt2.
 687 */
 688 #define qsort_swap(elt1, elt2) \
 689    STMT_START { \
 690       QSORT_NOTICE_SWAP \
 691       temp = array[elt1]; \
 692       array[elt1] = array[elt2]; \
 693       array[elt2] = temp; \
 694    } STMT_END
 695
 696 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 697    elt3 and elt3 gets elt1.
 698 */
 699 #define qsort_rotate(elt1, elt2, elt3) \
 700    STMT_START { \
 701       QSORT_NOTICE_SWAP \
 702       temp = array[elt1]; \
 703       array[elt1] = array[elt2]; \
 704       array[elt2] = array[elt3]; \
 705       array[elt3] = temp; \
 706    } STMT_END
 707
 708 /* ************************************************************ Debug stuff */
 709
 710 #ifdef QSORT_DEBUG
 711
 712 static void
 713 break_here()
 714 {
 715    return; /* good place to set a breakpoint */
 716 }
 717
 718 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 719
 720 static void
 721 doqsort_all_asserts(
 722    void * array,
 723    size_t num_elts,
 724    size_t elt_size,
 725    int (*compare)(const void * elt1, const void * elt2),
 726    int pc_left, int pc_right, int u_left, int u_right)
 727 {
 728    int i;
 729
 730    qsort_assert(pc_left <= pc_right);
 731    qsort_assert(u_right < pc_left);
 732    qsort_assert(pc_right < u_left);
 733    for (i = u_right + 1; i < pc_left; ++i) {
 734       qsort_assert(qsort_cmp(i, pc_left) < 0);
 735    }
 736    for (i = pc_left; i < pc_right; ++i) {
 737       qsort_assert(qsort_cmp(i, pc_right) == 0);
 738    }
 739    for (i = pc_right + 1; i < u_left; ++i) {
 740       qsort_assert(qsort_cmp(pc_right, i) < 0);
 741    }
 742 }
 743
 744 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 745    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 746                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 747
 748 #else
 749
 750 #define qsort_assert(t) ((void)0)
 751
 752 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 753
 754 #endif
 755
 756 /* ****************************************************************** qsort */
 757
 758 STATIC void /* the standard unstable (u) quicksort (qsort) */
 759 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 760 {
 761    register SV * temp;
 762
 763    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 764    int next_stack_entry = 0;
 765
 766    int part_left;
 767    int part_right;
 768 #ifdef QSORT_ORDER_GUESS
 769    int qsort_break_even;
 770    int swapped;
 771 #endif
 772
 773    /* Make sure we actually have work to do.
 774    */
 775    if (num_elts <= 1) {
 776       return;
 777    }
 778
 779    /* Innoculate large partitions against quadratic behavior */
 780    if (num_elts > QSORT_PLAY_SAFE) {
 781       register size_t n;
 782       register SV ** const q = array;
 783       for (n = num_elts; n > 1; ) {
 784          register const size_t j = (size_t)(n-- * Drand01());
 785          temp = q[j];
 786          q[j] = q[n];
 787          q[n] = temp;
 788       }
 789    }
 790
 791    /* Setup the initial partition definition and fall into the sorting loop
 792    */
 793    part_left = 0;
 794    part_right = (int)(num_elts - 1);
 795 #ifdef QSORT_ORDER_GUESS
 796    qsort_break_even = QSORT_BREAK_EVEN;
 797 #else
 798 #define qsort_break_even QSORT_BREAK_EVEN
 799 #endif
 800    for ( ; ; ) {
 801       if ((part_right - part_left) >= qsort_break_even) {
 802          /* OK, this is gonna get hairy, so lets try to document all the
 803             concepts and abbreviations and variables and what they keep
 804             track of:
 805
 806             pc: pivot chunk - the set of array elements we accumulate in the
 807                 middle of the partition, all equal in value to the original
 808                 pivot element selected. The pc is defined by:
 809
 810                 pc_left - the leftmost array index of the pc
 811                 pc_right - the rightmost array index of the pc
 812
 813                 we start with pc_left == pc_right and only one element
 814                 in the pivot chunk (but it can grow during the scan).
 815
 816             u:  uncompared elements - the set of elements in the partition
 817                 we have not yet compared to the pivot value. There are two
 818                 uncompared sets during the scan - one to the left of the pc
 819                 and one to the right.
 820
 821                 u_right - the rightmost index of the left side's uncompared set
 822                 u_left - the leftmost index of the right side's uncompared set
 823
 824                 The leftmost index of the left sides's uncompared set
 825                 doesn't need its own variable because it is always defined
 826                 by the leftmost edge of the whole partition (part_left). The
 827                 same goes for the rightmost edge of the right partition
 828                 (part_right).
 829
 830                 We know there are no uncompared elements on the left once we
 831                 get u_right < part_left and no uncompared elements on the
 832                 right once u_left > part_right. When both these conditions
 833                 are met, we have completed the scan of the partition.
 834
 835                 Any elements which are between the pivot chunk and the
 836                 uncompared elements should be less than the pivot value on
 837                 the left side and greater than the pivot value on the right
 838                 side (in fact, the goal of the whole algorithm is to arrange
 839                 for that to be true and make the groups of less-than and
 840                 greater-then elements into new partitions to sort again).
 841
 842             As you marvel at the complexity of the code and wonder why it
 843             has to be so confusing. Consider some of the things this level
 844             of confusion brings:
 845
 846             Once I do a compare, I squeeze every ounce of juice out of it. I
 847             never do compare calls I don't have to do, and I certainly never
 848             do redundant calls.
 849
 850             I also never swap any elements unless I can prove there is a
 851             good reason. Many sort algorithms will swap a known value with
 852             an uncompared value just to get things in the right place (or
 853             avoid complexity :-), but that uncompared value, once it gets
 854             compared, may then have to be swapped again. A lot of the
 855             complexity of this code is due to the fact that it never swaps
 856             anything except compared values, and it only swaps them when the
 857             compare shows they are out of position.
 858          */
 859          int pc_left, pc_right;
 860          int u_right, u_left;
 861
 862          int s;
 863
 864          pc_left = ((part_left + part_right) / 2);
 865          pc_right = pc_left;
 866          u_right = pc_left - 1;
 867          u_left = pc_right + 1;
 868
 869          /* Qsort works best when the pivot value is also the median value
 870             in the partition (unfortunately you can't find the median value
 871             without first sorting :-), so to give the algorithm a helping
 872             hand, we pick 3 elements and sort them and use the median value
 873             of that tiny set as the pivot value.
 874
 875             Some versions of qsort like to use the left middle and right as
 876             the 3 elements to sort so they can insure the ends of the
 877             partition will contain values which will stop the scan in the
 878             compare loop, but when you have to call an arbitrarily complex
 879             routine to do a compare, its really better to just keep track of
 880             array index values to know when you hit the edge of the
 881             partition and avoid the extra compare. An even better reason to
 882             avoid using a compare call is the fact that you can drop off the
 883             edge of the array if someone foolishly provides you with an
 884             unstable compare function that doesn't always provide consistent
 885             results.
 886
 887             So, since it is simpler for us to compare the three adjacent
 888             elements in the middle of the partition, those are the ones we
 889             pick here (conveniently pointed at by u_right, pc_left, and
 890             u_left). The values of the left, center, and right elements
 891             are refered to as l c and r in the following comments.
 892          */
 893
 894 #ifdef QSORT_ORDER_GUESS
 895          swapped = 0;
 896 #endif
 897          s = qsort_cmp(u_right, pc_left);
 898          if (s < 0) {
 899             /* l < c */
 900             s = qsort_cmp(pc_left, u_left);
 901             /* if l < c, c < r - already in order - nothing to do */
 902             if (s == 0) {
 903                /* l < c, c == r - already in order, pc grows */
 904                ++pc_right;
 905                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 906             } else if (s > 0) {
 907                /* l < c, c > r - need to know more */
 908                s = qsort_cmp(u_right, u_left);
 909                if (s < 0) {
 910                   /* l < c, c > r, l < r - swap c & r to get ordered */
 911                   qsort_swap(pc_left, u_left);
 912                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 913                } else if (s == 0) {
 914                   /* l < c, c > r, l == r - swap c&r, grow pc */
 915                   qsort_swap(pc_left, u_left);
 916                   --pc_left;
 917                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 918                } else {
 919                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 920                   qsort_rotate(pc_left, u_right, u_left);
 921                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 922                }
 923             }
 924          } else if (s == 0) {
 925             /* l == c */
 926             s = qsort_cmp(pc_left, u_left);
 927             if (s < 0) {
 928                /* l == c, c < r - already in order, grow pc */
 929                --pc_left;
 930                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 931             } else if (s == 0) {
 932                /* l == c, c == r - already in order, grow pc both ways */
 933                --pc_left;
 934                ++pc_right;
 935                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 936             } else {
 937                /* l == c, c > r - swap l & r, grow pc */
 938                qsort_swap(u_right, u_left);
 939                ++pc_right;
 940                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 941             }
 942          } else {
 943             /* l > c */
 944             s = qsort_cmp(pc_left, u_left);
 945             if (s < 0) {
 946                /* l > c, c < r - need to know more */
 947                s = qsort_cmp(u_right, u_left);
 948                if (s < 0) {
 949                   /* l > c, c < r, l < r - swap l & c to get ordered */
 950                   qsort_swap(u_right, pc_left);
 951                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 952                } else if (s == 0) {
 953                   /* l > c, c < r, l == r - swap l & c, grow pc */
 954                   qsort_swap(u_right, pc_left);
 955                   ++pc_right;
 956                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 957                } else {
 958                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 959                   qsort_rotate(u_right, pc_left, u_left);
 960                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 961                }
 962             } else if (s == 0) {
 963                /* l > c, c == r - swap ends, grow pc */
 964                qsort_swap(u_right, u_left);
 965                --pc_left;
 966                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 967             } else {
 968                /* l > c, c > r - swap ends to get in order */
 969                qsort_swap(u_right, u_left);
 970                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 971             }
 972          }
 973          /* We now know the 3 middle elements have been compared and
 974             arranged in the desired order, so we can shrink the uncompared
 975             sets on both sides
 976          */
 977          --u_right;
 978          ++u_left;
 979          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 980
 981          /* The above massive nested if was the simple part :-). We now have
 982             the middle 3 elements ordered and we need to scan through the
 983             uncompared sets on either side, swapping elements that are on
 984             the wrong side or simply shuffling equal elements around to get
 985             all equal elements into the pivot chunk.
 986          */
 987
 988          for ( ; ; ) {
 989             int still_work_on_left;
 990             int still_work_on_right;
 991
 992             /* Scan the uncompared values on the left. If I find a value
 993                equal to the pivot value, move it over so it is adjacent to
 994                the pivot chunk and expand the pivot chunk. If I find a value
 995                less than the pivot value, then just leave it - its already
 996                on the correct side of the partition. If I find a greater
 997                value, then stop the scan.
 998             */
 999             while ((still_work_on_left = (u_right >= part_left))) {
1000                s = qsort_cmp(u_right, pc_left);
1001                if (s < 0) {
1002                   --u_right;
1003                } else if (s == 0) {
1004                   --pc_left;
1005                   if (pc_left != u_right) {
1006                      qsort_swap(u_right, pc_left);
1007                   }
1008                   --u_right;
1009                } else {
1010                   break;
1011                }
1012                qsort_assert(u_right < pc_left);
1013                qsort_assert(pc_left <= pc_right);
1014                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1015                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1016             }
1017
1018             /* Do a mirror image scan of uncompared values on the right
1019             */
1020             while ((still_work_on_right = (u_left <= part_right))) {
1021                s = qsort_cmp(pc_right, u_left);
1022                if (s < 0) {
1023                   ++u_left;
1024                } else if (s == 0) {
1025                   ++pc_right;
1026                   if (pc_right != u_left) {
1027                      qsort_swap(pc_right, u_left);
1028                   }
1029                   ++u_left;
1030                } else {
1031                   break;
1032                }
1033                qsort_assert(u_left > pc_right);
1034                qsort_assert(pc_left <= pc_right);
1035                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1036                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1037             }
1038
1039             if (still_work_on_left) {
1040                /* I know I have a value on the left side which needs to be
1041                   on the right side, but I need to know more to decide
1042                   exactly the best thing to do with it.
1043                */
1044                if (still_work_on_right) {
1045                   /* I know I have values on both side which are out of
1046                      position. This is a big win because I kill two birds
1047                      with one swap (so to speak). I can advance the
1048                      uncompared pointers on both sides after swapping both
1049                      of them into the right place.
1050                   */
1051                   qsort_swap(u_right, u_left);
1052                   --u_right;
1053                   ++u_left;
1054                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1055                } else {
1056                   /* I have an out of position value on the left, but the
1057                      right is fully scanned, so I "slide" the pivot chunk
1058                      and any less-than values left one to make room for the
1059                      greater value over on the right. If the out of position
1060                      value is immediately adjacent to the pivot chunk (there
1061                      are no less-than values), I can do that with a swap,
1062                      otherwise, I have to rotate one of the less than values
1063                      into the former position of the out of position value
1064                      and the right end of the pivot chunk into the left end
1065                      (got all that?).
1066                   */
1067                   --pc_left;
1068                   if (pc_left == u_right) {
1069                      qsort_swap(u_right, pc_right);
1070                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1071                   } else {
1072                      qsort_rotate(u_right, pc_left, pc_right);
1073                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1074                   }
1075                   --pc_right;
1076                   --u_right;
1077                }
1078             } else if (still_work_on_right) {
1079                /* Mirror image of complex case above: I have an out of
1080                   position value on the right, but the left is fully
1081                   scanned, so I need to shuffle things around to make room
1082                   for the right value on the left.
1083                */
1084                ++pc_right;
1085                if (pc_right == u_left) {
1086                   qsort_swap(u_left, pc_left);
1087                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1088                } else {
1089                   qsort_rotate(pc_right, pc_left, u_left);
1090                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1091                }
1092                ++pc_left;
1093                ++u_left;
1094             } else {
1095                /* No more scanning required on either side of partition,
1096                   break out of loop and figure out next set of partitions
1097                */
1098                break;
1099             }
1100          }
1101
1102          /* The elements in the pivot chunk are now in the right place. They
1103             will never move or be compared again. All I have to do is decide
1104             what to do with the stuff to the left and right of the pivot
1105             chunk.
1106
1107             Notes on the QSORT_ORDER_GUESS ifdef code:
1108
1109             1. If I just built these partitions without swapping any (or
1110                very many) elements, there is a chance that the elements are
1111                already ordered properly (being properly ordered will
1112                certainly result in no swapping, but the converse can't be
1113                proved :-).
1114
1115             2. A (properly written) insertion sort will run faster on
1116                already ordered data than qsort will.
1117
1118             3. Perhaps there is some way to make a good guess about
1119                switching to an insertion sort earlier than partition size 6
1120                (for instance - we could save the partition size on the stack
1121                and increase the size each time we find we didn't swap, thus
1122                switching to insertion sort earlier for partitions with a
1123                history of not swapping).
1124
1125             4. Naturally, if I just switch right away, it will make
1126                artificial benchmarks with pure ascending (or descending)
1127                data look really good, but is that a good reason in general?
1128                Hard to say...
1129          */
1130
1131 #ifdef QSORT_ORDER_GUESS
1132          if (swapped < 3) {
1133 #if QSORT_ORDER_GUESS == 1
1134             qsort_break_even = (part_right - part_left) + 1;
1135 #endif
1136 #if QSORT_ORDER_GUESS == 2
1137             qsort_break_even *= 2;
1138 #endif
1139 #if QSORT_ORDER_GUESS == 3
1140             const int prev_break = qsort_break_even;
1141             qsort_break_even *= qsort_break_even;
1142             if (qsort_break_even < prev_break) {
1143                qsort_break_even = (part_right - part_left) + 1;
1144             }
1145 #endif
1146          } else {
1147             qsort_break_even = QSORT_BREAK_EVEN;
1148          }
1149 #endif
1150
1151          if (part_left < pc_left) {
1152             /* There are elements on the left which need more processing.
1153                Check the right as well before deciding what to do.
1154             */
1155             if (pc_right < part_right) {
1156                /* We have two partitions to be sorted. Stack the biggest one
1157                   and process the smallest one on the next iteration. This
1158                   minimizes the stack height by insuring that any additional
1159                   stack entries must come from the smallest partition which
1160                   (because it is smallest) will have the fewest
1161                   opportunities to generate additional stack entries.
1162                */
1163                if ((part_right - pc_right) > (pc_left - part_left)) {
1164                   /* stack the right partition, process the left */
1165                   partition_stack[next_stack_entry].left = pc_right + 1;
1166                   partition_stack[next_stack_entry].right = part_right;
1167 #ifdef QSORT_ORDER_GUESS
1168                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1169 #endif
1170                   part_right = pc_left - 1;
1171                } else {
1172                   /* stack the left partition, process the right */
1173                   partition_stack[next_stack_entry].left = part_left;
1174                   partition_stack[next_stack_entry].right = pc_left - 1;
1175 #ifdef QSORT_ORDER_GUESS
1176                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1177 #endif
1178                   part_left = pc_right + 1;
1179                }
1180                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1181                ++next_stack_entry;
1182             } else {
1183                /* The elements on the left are the only remaining elements
1184                   that need sorting, arrange for them to be processed as the
1185                   next partition.
1186                */
1187                part_right = pc_left - 1;
1188             }
1189          } else if (pc_right < part_right) {
1190             /* There is only one chunk on the right to be sorted, make it
1191                the new partition and loop back around.
1192             */
1193             part_left = pc_right + 1;
1194          } else {
1195             /* This whole partition wound up in the pivot chunk, so
1196                we need to get a new partition off the stack.
1197             */
1198             if (next_stack_entry == 0) {
1199                /* the stack is empty - we are done */
1200                break;
1201             }
1202             --next_stack_entry;
1203             part_left = partition_stack[next_stack_entry].left;
1204             part_right = partition_stack[next_stack_entry].right;
1205 #ifdef QSORT_ORDER_GUESS
1206             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1207 #endif
1208          }
1209       } else {
1210          /* This partition is too small to fool with qsort complexity, just
1211             do an ordinary insertion sort to minimize overhead.
1212          */
1213          int i;
1214          /* Assume 1st element is in right place already, and start checking
1215             at 2nd element to see where it should be inserted.
1216          */
1217          for (i = part_left + 1; i <= part_right; ++i) {
1218             int j;
1219             /* Scan (backwards - just in case 'i' is already in right place)
1220                through the elements already sorted to see if the ith element
1221                belongs ahead of one of them.
1222             */
1223             for (j = i - 1; j >= part_left; --j) {
1224                if (qsort_cmp(i, j) >= 0) {
1225                   /* i belongs right after j
1226                   */
1227                   break;
1228                }
1229             }
1230             ++j;
1231             if (j != i) {
1232                /* Looks like we really need to move some things
1233                */
1234                int k;
1235                temp = array[i];
1236                for (k = i - 1; k >= j; --k)
1237                   array[k + 1] = array[k];
1238                array[j] = temp;
1239             }
1240          }
1241
1242          /* That partition is now sorted, grab the next one, or get out
1243             of the loop if there aren't any more.
1244          */
1245
1246          if (next_stack_entry == 0) {
1247             /* the stack is empty - we are done */
1248             break;
1249          }
1250          --next_stack_entry;
1251          part_left = partition_stack[next_stack_entry].left;
1252          part_right = partition_stack[next_stack_entry].right;
1253 #ifdef QSORT_ORDER_GUESS
1254          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1255 #endif
1256       }
1257    }
1258
1259    /* Believe it or not, the array is sorted at this point! */
1260 }
1261
1262 /* Stabilize what is, presumably, an otherwise unstable sort method.
1263  * We do that by allocating (or having on hand) an array of pointers
1264  * that is the same size as the original array of elements to be sorted.
1265  * We initialize this parallel array with the addresses of the original
1266  * array elements.  This indirection can make you crazy.
1267  * Some pictures can help.  After initializing, we have
1268  *
1269  *  indir                  list1
1270  * +----+                 +----+
1271  * |    | --------------> |    | ------> first element to be sorted
1272  * +----+                 +----+
1273  * |    | --------------> |    | ------> second element to be sorted
1274  * +----+                 +----+
1275  * |    | --------------> |    | ------> third element to be sorted
1276  * +----+                 +----+
1277  *  ...
1278  * +----+                 +----+
1279  * |    | --------------> |    | ------> n-1st element to be sorted
1280  * +----+                 +----+
1281  * |    | --------------> |    | ------> n-th element to be sorted
1282  * +----+                 +----+
1283  *
1284  * During the sort phase, we leave the elements of list1 where they are,
1285  * and sort the pointers in the indirect array in the same order determined
1286  * by the original comparison routine on the elements pointed to.
1287  * Because we don't move the elements of list1 around through
1288  * this phase, we can break ties on elements that compare equal
1289  * using their address in the list1 array, ensuring stabilty.
1290  * This leaves us with something looking like
1291  *
1292  *  indir                  list1
1293  * +----+                 +----+
1294  * |    | --+       +---> |    | ------> first element to be sorted
1295  * +----+   |       |     +----+
1296  * |    | --|-------|---> |    | ------> second element to be sorted
1297  * +----+   |       |     +----+
1298  * |    | --|-------+ +-> |    | ------> third element to be sorted
1299  * +----+   |         |   +----+
1300  *  ...
1301  * +----+    | |   | |    +----+
1302  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1303  * +----+    |     |      +----+
1304  * |    | ---+     +----> |    | ------> n-th element to be sorted
1305  * +----+                 +----+
1306  *
1307  * where the i-th element of the indirect array points to the element
1308  * that should be i-th in the sorted array.  After the sort phase,
1309  * we have to put the elements of list1 into the places
1310  * dictated by the indirect array.
1311  */
1312
1313
1314 static I32
1315 cmpindir(pTHX_ gptr a, gptr b)
1316 {
1317     gptr * const ap = (gptr *)a;
1318     gptr * const bp = (gptr *)b;
1319     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1320
1321     if (sense)
1322         return sense;
1323     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1324 }
1325
1326 static I32
1327 cmpindir_desc(pTHX_ gptr a, gptr b)
1328 {
1329     gptr * const ap = (gptr *)a;
1330     gptr * const bp = (gptr *)b;
1331     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1332
1333     /* Reverse the default */
1334     if (sense)
1335         return -sense;
1336     /* But don't reverse the stability test.  */
1337     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1338
1339 }
1340
1341 STATIC void
1342 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1343 {
1344     if ((flags & SORTf_STABLE) != 0) {
1345          register gptr **pp, *q;
1346          register size_t n, j, i;
1347          gptr *small[SMALLSORT], **indir, tmp;
1348          SVCOMPARE_t savecmp;
1349          if (nmemb <= 1) return;     /* sorted trivially */
1350
1351          /* Small arrays can use the stack, big ones must be allocated */
1352          if (nmemb <= SMALLSORT) indir = small;
1353          else { Newx(indir, nmemb, gptr *); }
1354
1355          /* Copy pointers to original array elements into indirect array */
1356          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1357
1358          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1359          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1360
1361          /* sort, with indirection */
1362          S_qsortsvu(aTHX_ (gptr *)indir, nmemb,
1363                     ((flags & SORTf_DESC) != 0 ? cmpindir_desc : cmpindir));
1364
1365          pp = indir;
1366          q = list1;
1367          for (n = nmemb; n--; ) {
1368               /* Assert A: all elements of q with index > n are already
1369                * in place.  This is vacuosly true at the start, and we
1370                * put element n where it belongs below (if it wasn't
1371                * already where it belonged). Assert B: we only move
1372                * elements that aren't where they belong,
1373                * so, by A, we never tamper with elements above n.
1374                */
1375               j = pp[n] - q;            /* This sets j so that q[j] is
1376                                          * at pp[n].  *pp[j] belongs in
1377                                          * q[j], by construction.
1378                                          */
1379               if (n != j) {             /* all's well if n == j */
1380                    tmp = q[j];          /* save what's in q[j] */
1381                    do {
1382                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1383                         i = pp[j] - q;  /* the index in q of the element
1384                                          * just moved */
1385                         pp[j] = q + j;  /* this is ok now */
1386                    } while ((j = i) != n);
1387                    /* There are only finitely many (nmemb) addresses
1388                     * in the pp array.
1389                     * So we must eventually revisit an index we saw before.
1390                     * Suppose the first revisited index is k != n.
1391                     * An index is visited because something else belongs there.
1392                     * If we visit k twice, then two different elements must
1393                     * belong in the same place, which cannot be.
1394                     * So j must get back to n, the loop terminates,
1395                     * and we put the saved element where it belongs.
1396                     */
1397                    q[n] = tmp;          /* put what belongs into
1398                                          * the n-th element */
1399               }
1400          }
1401
1402         /* free iff allocated */
1403          if (indir != small) { Safefree(indir); }
1404          /* restore prevailing comparison routine */
1405          PL_sort_RealCmp = savecmp;
1406     } else if ((flags & SORTf_DESC) != 0) {
1407          SVCOMPARE_t savecmp = PL_sort_RealCmp; /* Save current comparison routine, if any */
1408          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1409          cmp = cmp_desc;
1410          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1411          /* restore prevailing comparison routine */
1412          PL_sort_RealCmp = savecmp;
1413     } else {
1414          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1415     }
1416 }
1417
1418 /*
1419 =head1 Array Manipulation Functions
1420
1421 =for apidoc sortsv
1422
1423 Sort an array. Here is an example:
1424
1425     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1426
1427 Currently this always uses mergesort. See sortsv_flags for a more
1428 flexible routine.
1429
1430 =cut
1431 */
1432
1433 void
1434 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1435 {
1436     sortsv_flags(array, nmemb, cmp, 0);
1437 }
1438
1439 /*
1440 =for apidoc sortsv_flags
1441
1442 Sort an array, with various options.
1443
1444 =cut
1445 */
1446 void
1447 Perl_sortsv_flags(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1448 {
1449     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1450       = ((flags & SORTf_QSORT) != 0 ? S_qsortsv : S_mergesortsv);
1451
1452     sortsvp(aTHX_ array, nmemb, cmp, flags);
1453 }
1454
1455 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1456 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1457 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1458
1459 PP(pp_sort)
1460 {
1461     dVAR; dSP; dMARK; dORIGMARK;
1462     register SV **p1 = ORIGMARK+1, **p2;
1463     register I32 max, i;
1464     AV* av = NULL;
1465     HV *stash;
1466     GV *gv;
1467     CV *cv = 0;
1468     I32 gimme = GIMME;
1469     OP* const nextop = PL_op->op_next;
1470     I32 overloading = 0;
1471     bool hasargs = FALSE;
1472     I32 is_xsub = 0;
1473     I32 sorting_av = 0;
1474     const U8 priv = PL_op->op_private;
1475     const U8 flags = PL_op->op_flags;
1476     U32 sort_flags = 0;
1477     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1478       = Perl_sortsv_flags;
1479     I32 all_SIVs = 1;
1480
1481     if ((priv & OPpSORT_DESCEND) != 0)
1482         sort_flags |= SORTf_DESC;
1483     if ((priv & OPpSORT_QSORT) != 0)
1484         sort_flags |= SORTf_QSORT;
1485     if ((priv & OPpSORT_STABLE) != 0)
1486         sort_flags |= SORTf_STABLE;
1487
1488     if (gimme != G_ARRAY) {
1489         SP = MARK;
1490         EXTEND(SP,1);
1491         RETPUSHUNDEF;
1492     }
1493
1494     ENTER;
1495     SAVEVPTR(PL_sortcop);
1496     if (flags & OPf_STACKED) {
1497         if (flags & OPf_SPECIAL) {
1498             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1499             kid = kUNOP->op_first;                      /* pass rv2gv */
1500             kid = kUNOP->op_first;                      /* pass leave */
1501             PL_sortcop = kid->op_next;
1502             stash = CopSTASH(PL_curcop);
1503         }
1504         else {
1505             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1506             if (cv && SvPOK(cv)) {
1507                 const char * const proto = SvPV_nolen_const((SV*)cv);
1508                 if (proto && strEQ(proto, "$$")) {
1509                     hasargs = TRUE;
1510                 }
1511             }
1512             if (!(cv && CvROOT(cv))) {
1513                 if (cv && CvXSUB(cv)) {
1514                     is_xsub = 1;
1515                 }
1516                 else if (gv) {
1517                     SV *tmpstr = sv_newmortal();
1518                     gv_efullname3(tmpstr, gv, Nullch);
1519                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1520                         tmpstr);
1521                 }
1522                 else {
1523                     DIE(aTHX_ "Undefined subroutine in sort");
1524                 }
1525             }
1526
1527             if (is_xsub)
1528                 PL_sortcop = (OP*)cv;
1529             else
1530                 PL_sortcop = CvSTART(cv);
1531         }
1532     }
1533     else {
1534         PL_sortcop = Nullop;
1535         stash = CopSTASH(PL_curcop);
1536     }
1537
1538     /* optimiser converts "@a = sort @a" to "sort \@a";
1539      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1540      * result back to @a at the end of this function */
1541     if (priv & OPpSORT_INPLACE) {
1542         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1543         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1544         av = (AV*)(*SP);
1545         max = AvFILL(av) + 1;
1546         if (SvMAGICAL(av)) {
1547             MEXTEND(SP, max);
1548             p2 = SP;
1549             for (i=0; i < max; i++) {
1550                 SV **svp = av_fetch(av, i, FALSE);
1551                 *SP++ = (svp) ? *svp : Nullsv;
1552             }
1553         }
1554         else {
1555             if (SvREADONLY(av))
1556                 Perl_croak(aTHX_ PL_no_modify);
1557             else
1558                 SvREADONLY_on(av);
1559             p1 = p2 = AvARRAY(av);
1560             sorting_av = 1;
1561         }
1562     }
1563     else {
1564         p2 = MARK+1;
1565         max = SP - MARK;
1566    }
1567
1568     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1569      * any nulls; also stringify or converting to integer or number as
1570      * required any args */
1571     for (i=max; i > 0 ; i--) {
1572         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1573             SvTEMP_off(*p1);
1574             if (!PL_sortcop) {
1575                 if (priv & OPpSORT_NUMERIC) {
1576                     if (priv & OPpSORT_INTEGER) {
1577                         if (!SvIOK(*p1)) {
1578                             if (SvAMAGIC(*p1))
1579                                 overloading = 1;
1580                             else
1581                                 (void)sv_2iv(*p1);
1582                         }
1583                     }
1584                     else {
1585                         if (!SvNSIOK(*p1)) {
1586                             if (SvAMAGIC(*p1))
1587                                 overloading = 1;
1588                             else
1589                                 (void)sv_2nv(*p1);
1590                         }
1591                         if (all_SIVs && !SvSIOK(*p1))
1592                             all_SIVs = 0;
1593                     }
1594                 }
1595                 else {
1596                     if (!SvPOK(*p1)) {
1597                         if (SvAMAGIC(*p1))
1598                             overloading = 1;
1599                         else
1600                             (void)sv_2pv_flags(*p1, 0,
1601                                                SV_GMAGIC|SV_CONST_RETURN);
1602                     }
1603                 }
1604             }
1605             p1++;
1606         }
1607         else
1608             max--;
1609     }
1610     if (sorting_av)
1611         AvFILLp(av) = max-1;
1612
1613     if (max > 1) {
1614         SV **start;
1615         if (PL_sortcop) {
1616             PERL_CONTEXT *cx;
1617             SV** newsp;
1618             const bool oldcatch = CATCH_GET;
1619
1620             SAVETMPS;
1621             SAVEOP();
1622
1623             CATCH_SET(TRUE);
1624             PUSHSTACKi(PERLSI_SORT);
1625             if (!hasargs && !is_xsub) {
1626                 SAVESPTR(PL_firstgv);
1627                 SAVESPTR(PL_secondgv);
1628                 SAVESPTR(PL_sortstash);
1629                 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1630                 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1631                 PL_sortstash = stash;
1632                 SAVESPTR(GvSV(PL_firstgv));
1633                 SAVESPTR(GvSV(PL_secondgv));
1634             }
1635
1636             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1637             if (!(flags & OPf_SPECIAL)) {
1638                 cx->cx_type = CXt_SUB;
1639                 cx->blk_gimme = G_SCALAR;
1640                 PUSHSUB(cx);
1641                 if (!is_xsub) {
1642                     AV* const padlist = CvPADLIST(cv);
1643
1644                     if (++CvDEPTH(cv) >= 2) {
1645                         PERL_STACK_OVERFLOW_CHECK();
1646                         pad_push(padlist, CvDEPTH(cv));
1647                     }
1648                     SAVECOMPPAD();
1649                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1650
1651                     if (hasargs) {
1652                         /* This is mostly copied from pp_entersub */
1653                         AV *av = (AV*)PAD_SVl(0);
1654
1655                         cx->blk_sub.savearray = GvAV(PL_defgv);
1656                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1657                         CX_CURPAD_SAVE(cx->blk_sub);
1658                         cx->blk_sub.argarray = av;
1659                     }
1660
1661                 }
1662             }
1663             cx->cx_type |= CXp_MULTICALL;
1664
1665             start = p1 - max;
1666             sortsvp(aTHX_ start, max,
1667                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1668                     sort_flags);
1669
1670             if (!(flags & OPf_SPECIAL)) {
1671                 LEAVESUB(cv);
1672                 if (!is_xsub)
1673                     CvDEPTH(cv)--;
1674             }
1675             POPBLOCK(cx,PL_curpm);
1676             PL_stack_sp = newsp;
1677             POPSTACK;
1678             CATCH_SET(oldcatch);
1679         }
1680         else {
1681             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1682             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1683             sortsvp(aTHX_ start, max,
1684                     (priv & OPpSORT_NUMERIC)
1685                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1686                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1687                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1688                         : ( IN_LOCALE_RUNTIME
1689                             ? ( overloading
1690                                 ? S_amagic_cmp_locale
1691                                 : sv_cmp_locale_static)
1692                             : ( overloading ? S_amagic_cmp : sv_cmp_static)),
1693                     sort_flags);
1694         }
1695         if ((priv & OPpSORT_REVERSE) != 0) {
1696             SV **q = start+max-1;
1697             while (start < q) {
1698                 SV * const tmp = *start;
1699                 *start++ = *q;
1700                 *q-- = tmp;
1701             }
1702         }
1703     }
1704     if (sorting_av)
1705         SvREADONLY_off(av);
1706     else if (av && !sorting_av) {
1707         /* simulate pp_aassign of tied AV */
1708         SV** const base = ORIGMARK+1;
1709         for (i=0; i < max; i++) {
1710             base[i] = newSVsv(base[i]);
1711         }
1712         av_clear(av);
1713         av_extend(av, max);
1714         for (i=0; i < max; i++) {
1715             SV * const sv = base[i];
1716             SV ** const didstore = av_store(av, i, sv);
1717             if (SvSMAGICAL(sv))
1718                 mg_set(sv);
1719             if (!didstore)
1720                 sv_2mortal(sv);
1721         }
1722     }
1723     LEAVE;
1724     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1725     return nextop;
1726 }
1727
1728 static I32
1729 S_sortcv(pTHX_ SV *a, SV *b)
1730 {
1731     dVAR;
1732     const I32 oldsaveix = PL_savestack_ix;
1733     const I32 oldscopeix = PL_scopestack_ix;
1734     I32 result;
1735     GvSV(PL_firstgv) = a;
1736     GvSV(PL_secondgv) = b;
1737     PL_stack_sp = PL_stack_base;
1738     PL_op = PL_sortcop;
1739     CALLRUNOPS(aTHX);
1740     if (PL_stack_sp != PL_stack_base + 1)
1741         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1742     if (!SvNIOKp(*PL_stack_sp))
1743         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1744     result = SvIV(*PL_stack_sp);
1745     while (PL_scopestack_ix > oldscopeix) {
1746         LEAVE;
1747     }
1748     leave_scope(oldsaveix);
1749     return result;
1750 }
1751
1752 static I32
1753 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1754 {
1755     dVAR;
1756     const I32 oldsaveix = PL_savestack_ix;
1757     const I32 oldscopeix = PL_scopestack_ix;
1758     I32 result;
1759     AV * const av = GvAV(PL_defgv);
1760
1761     if (AvMAX(av) < 1) {
1762         SV** ary = AvALLOC(av);
1763         if (AvARRAY(av) != ary) {
1764             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1765             SvPV_set(av, (char*)ary);
1766         }
1767         if (AvMAX(av) < 1) {
1768             AvMAX(av) = 1;
1769             Renew(ary,2,SV*);
1770             SvPV_set(av, (char*)ary);
1771         }
1772     }
1773     AvFILLp(av) = 1;
1774
1775     AvARRAY(av)[0] = a;
1776     AvARRAY(av)[1] = b;
1777     PL_stack_sp = PL_stack_base;
1778     PL_op = PL_sortcop;
1779     CALLRUNOPS(aTHX);
1780     if (PL_stack_sp != PL_stack_base + 1)
1781         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1782     if (!SvNIOKp(*PL_stack_sp))
1783         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1784     result = SvIV(*PL_stack_sp);
1785     while (PL_scopestack_ix > oldscopeix) {
1786         LEAVE;
1787     }
1788     leave_scope(oldsaveix);
1789     return result;
1790 }
1791
1792 static I32
1793 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1794 {
1795     dVAR; dSP;
1796     const I32 oldsaveix = PL_savestack_ix;
1797     const I32 oldscopeix = PL_scopestack_ix;
1798     CV * const cv=(CV*)PL_sortcop;
1799     I32 result;
1800
1801     SP = PL_stack_base;
1802     PUSHMARK(SP);
1803     EXTEND(SP, 2);
1804     *++SP = a;
1805     *++SP = b;
1806     PUTBACK;
1807     (void)(*CvXSUB(cv))(aTHX_ cv);
1808     if (PL_stack_sp != PL_stack_base + 1)
1809         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1810     if (!SvNIOKp(*PL_stack_sp))
1811         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1812     result = SvIV(*PL_stack_sp);
1813     while (PL_scopestack_ix > oldscopeix) {
1814         LEAVE;
1815     }
1816     leave_scope(oldsaveix);
1817     return result;
1818 }
1819
1820
1821 static I32
1822 S_sv_ncmp(pTHX_ SV *a, SV *b)
1823 {
1824     const NV nv1 = SvNSIV(a);
1825     const NV nv2 = SvNSIV(b);
1826     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1827 }
1828
1829 static I32
1830 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1831 {
1832     const IV iv1 = SvIV(a);
1833     const IV iv2 = SvIV(b);
1834     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1835 }
1836
1837 #define tryCALL_AMAGICbin(left,right,meth) \
1838     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1839         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1840         : Nullsv;
1841
1842 static I32
1843 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1844 {
1845     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1846     if (tmpsv) {
1847         if (SvIOK(tmpsv)) {
1848             const I32 i = SvIVX(tmpsv);
1849             if (i > 0)
1850                return 1;
1851             return i? -1 : 0;
1852         }
1853         else {
1854             const NV d = SvNV(tmpsv);
1855             if (d > 0)
1856                return 1;
1857             return d ? -1 : 0;
1858         }
1859      }
1860      return S_sv_ncmp(aTHX_ a, b);
1861 }
1862
1863 static I32
1864 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1865 {
1866     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1867     if (tmpsv) {
1868         if (SvIOK(tmpsv)) {
1869             const I32 i = SvIVX(tmpsv);
1870             if (i > 0)
1871                return 1;
1872             return i? -1 : 0;
1873         }
1874         else {
1875             const NV d = SvNV(tmpsv);
1876             if (d > 0)
1877                return 1;
1878             return d ? -1 : 0;
1879         }
1880     }
1881     return S_sv_i_ncmp(aTHX_ a, b);
1882 }
1883
1884 static I32
1885 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1886 {
1887     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1888     if (tmpsv) {
1889         if (SvIOK(tmpsv)) {
1890             const I32 i = SvIVX(tmpsv);
1891             if (i > 0)
1892                return 1;
1893             return i? -1 : 0;
1894         }
1895         else {
1896             const NV d = SvNV(tmpsv);
1897             if (d > 0)
1898                return 1;
1899             return d? -1 : 0;
1900         }
1901     }
1902     return sv_cmp(str1, str2);
1903 }
1904
1905 static I32
1906 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1907 {
1908     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1909     if (tmpsv) {
1910         if (SvIOK(tmpsv)) {
1911             const I32 i = SvIVX(tmpsv);
1912             if (i > 0)
1913                return 1;
1914             return i? -1 : 0;
1915         }
1916         else {
1917             const NV d = SvNV(tmpsv);
1918             if (d > 0)
1919                return 1;
1920             return d? -1 : 0;
1921         }
1922     }
1923     return sv_cmp_locale(str1, str2);
1924 }
1925
1926 /*
1927  * Local variables:
1928  * c-indentation-style: bsd
1929  * c-basic-offset: 4
1930  * indent-tabs-mode: t
1931  * End:
1932  *
1933  * ex: set ts=8 sts=4 sw=4 noet:
1934  */