pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line.  'No, not at the
  13  *   rear!' the slave-driver shouted.  'Three files up. And stay there...
  14  *
  15  *     [p.931 of _The Lord of the Rings_, VI/ii: "The Land of Shadow"]
  16  */
  17
  18 /* This file contains pp ("push/pop") functions that
  19  * execute the opcodes that make up a perl program. A typical pp function
  20  * expects to find its arguments on the stack, and usually pushes its
  21  * results onto the stack, hence the 'pp' terminology. Each OP structure
  22  * contains a pointer to the relevant pp_foo() function.
  23  *
  24  * This particular file just contains pp_sort(), which is complex
  25  * enough to merit its own file! See the other pp*.c files for the rest of
  26  * the pp_ functions.
  27  */
  28
  29 #include "EXTERN.h"
  30 #define PERL_IN_PP_SORT_C
  31 #include "perl.h"
  32
  33 #define sv_cmp_static Perl_sv_cmp
  34 #define sv_cmp_locale_static Perl_sv_cmp_locale
  35
  36 #ifndef SMALLSORT
  37 #define SMALLSORT (200)
  38 #endif
  39
  40 /* Flags for qsortsv and mergesortsv */
  41 #define SORTf_DESC   1
  42 #define SORTf_STABLE 2
  43 #define SORTf_UNSTABLE 8
  44
  45 /*
  46  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  47  *
  48  * The original code was written in conjunction with BSD Computer Software
  49  * Research Group at University of California, Berkeley.
  50  *
  51  * See also: "Optimistic Sorting and Information Theoretic Complexity"
  52  *           Peter McIlroy
  53  *           SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
  54  *           pp 467-474, Austin, Texas, 25-27 January 1993.
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl.jpl@gmail.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxiliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     gptr *b, *p, *q, *t, *p2;
 187     gptr *last, *r;
 188     IV runs = 0;
 189
 190     b = list1;
 191     last = PINDEX(b, nmemb);
 192     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 193     for (p2 = list2; b < last; ) {
 194         /* We just started, or just reversed sense.
 195         ** Set t at end of pairs with the prevailing sense.
 196         */
 197         for (p = b+2, t = p; ++p < last; t = ++p) {
 198             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 199         }
 200         q = b;
 201         /* Having laid out the playing field, look for long runs */
 202         do {
 203             p = r = b + (2 * PTHRESH);
 204             if (r >= t) p = r = t;      /* too short to care about */
 205             else {
 206                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 207                        ((p -= 2) > q)) {}
 208                 if (p <= q) {
 209                     /* b through r is a (long) run.
 210                     ** Extend it as far as possible.
 211                     */
 212                     p = q = r;
 213                     while (((p += 2) < t) &&
 214                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 215                     r = p = q + 2;      /* no simple pairs, no after-run */
 216                 }
 217             }
 218             if (q > b) {                /* run of greater than 2 at b */
 219                 gptr *savep = p;
 220
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense)
 229                     while (b < --p) {
 230                         const gptr c = *b;
 231                         *b++ = *p;
 232                         *p = c;
 233                     }
 234                 p = savep;
 235             }
 236             while (q < p) {             /* simple pairs */
 237                 p2 = NEXT(p2) = p2 + 2; ++runs;
 238                 if (sense) {
 239                     const gptr c = *q++;
 240                     *(q-1) = *q;
 241                     *q++ = c;
 242                 } else q += 2;
 243             }
 244             if (((b = p) == t) && ((t+1) == last)) {
 245                 NEXT(p2) = p2 + 1; ++runs;
 246                 b++;
 247             }
 248             q = r;
 249         } while (b < t);
 250         sense = !sense;
 251     }
 252     return runs;
 253 }
 254
 255
 256 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 257  * qsort on many platforms, but slower than qsort, conspicuously so,
 258  * on others.  The most likely explanation was platform-specific
 259  * differences in cache sizes and relative speeds.
 260  *
 261  * The quicksort divide-and-conquer algorithm guarantees that, as the
 262  * problem is subdivided into smaller and smaller parts, the parts
 263  * fit into smaller (and faster) caches.  So it doesn't matter how
 264  * many levels of cache exist, quicksort will "find" them, and,
 265  * as long as smaller is faster, take advantage of them.
 266  *
 267  * By contrast, consider how the original mergesort algorithm worked.
 268  * Suppose we have five runs (each typically of length 2 after dynprep).
 269  *
 270  * pass               base                        aux
 271  *  0              1 2 3 4 5
 272  *  1                                           12 34 5
 273  *  2                1234 5
 274  *  3                                            12345
 275  *  4                 12345
 276  *
 277  * Adjacent pairs are merged in "grand sweeps" through the input.
 278  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 279  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 280  * The only cache that matters is one large enough to hold *all* the input.
 281  * On some platforms, this may be many times slower than smaller caches.
 282  *
 283  * The following pseudo-code uses the same basic merge algorithm,
 284  * but in a divide-and-conquer way.
 285  *
 286  * # merge $runs runs at offset $offset of list $list1 into $list2.
 287  * # all unmerged runs ($runs == 1) originate in list $base.
 288  * sub mgsort2 {
 289  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 290  *
 291  *     if ($runs == 1) {
 292  *         if ($list1 is $base) copy run to $list2
 293  *         return offset of end of list (or copy)
 294  *     } else {
 295  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 296  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 297  *         merge the adjacent runs at $offset of $list1 into $list2
 298  *         return the offset of the end of the merged runs
 299  *     }
 300  * }
 301  * mgsort2(0, $runs, $base, $aux, $base);
 302  *
 303  * For our 5 runs, the tree of calls looks like
 304  *
 305  *           5
 306  *      3        2
 307  *   2     1   1   1
 308  * 1   1
 309  *
 310  * 1   2   3   4   5
 311  *
 312  * and the corresponding activity looks like
 313  *
 314  * copy runs 1 and 2 from base to aux
 315  * merge runs 1 and 2 from aux to base
 316  * (run 3 is where it belongs, no copy needed)
 317  * merge runs 12 and 3 from base to aux
 318  * (runs 4 and 5 are where they belong, no copy needed)
 319  * merge runs 4 and 5 from base to aux
 320  * merge runs 123 and 45 from aux to base
 321  *
 322  * Note that we merge runs 1 and 2 immediately after copying them,
 323  * while they are still likely to be in fast cache.  Similarly,
 324  * run 3 is merged with run 12 while it still may be lingering in cache.
 325  * This implementation should therefore enjoy much of the cache-friendly
 326  * behavior that quicksort does.  In addition, it does less copying
 327  * than the original mergesort implementation (only runs 1 and 2 are copied)
 328  * and the "balancing" of merges is better (merged runs comprise more nearly
 329  * equal numbers of original runs).
 330  *
 331  * The actual cache-friendly implementation will use a pseudo-stack
 332  * to avoid recursion, and will unroll processing of runs of length 2,
 333  * but it is otherwise similar to the recursive implementation.
 334  */
 335
 336 typedef struct {
 337     IV  offset;         /* offset of 1st of 2 runs at this level */
 338     IV  runs;           /* how many runs must be combined into 1 */
 339 } off_runs;             /* pseudo-stack element */
 340
 341
 342 static I32
 343 cmp_desc(pTHX_ gptr const a, gptr const b)
 344 {
 345     return -PL_sort_RealCmp(aTHX_ a, b);
 346 }
 347
 348 /*
 349 =for apidoc sortsv_flags
 350
 351 In-place sort an array of SV pointers with the given comparison routine,
 352 with various SORTf_* flag options.
 353
 354 =cut
 355 */
 356 void
 357 Perl_sortsv_flags(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 358 {
 359     IV i, run, offset;
 360     I32 sense, level;
 361     gptr *f1, *f2, *t, *b, *p;
 362     int iwhich;
 363     gptr *aux;
 364     gptr *p1;
 365     gptr small[SMALLSORT];
 366     gptr *which[3];
 367     off_runs stack[60], *stackp;
 368     SVCOMPARE_t savecmp = NULL;
 369
 370     PERL_ARGS_ASSERT_SORTSV_FLAGS;
 371     if (nmemb <= 1) return;                     /* sorted trivially */
 372
 373     if ((flags & SORTf_DESC) != 0) {
 374         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 375         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 376         cmp = cmp_desc;
 377     }
 378
 379     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 380     else { Newx(aux,nmemb,gptr); }              /* allocate auxiliary array */
 381     level = 0;
 382     stackp = stack;
 383     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 384     stackp->offset = offset = 0;
 385     which[0] = which[2] = base;
 386     which[1] = aux;
 387     for (;;) {
 388         /* On levels where both runs have be constructed (stackp->runs == 0),
 389          * merge them, and note the offset of their end, in case the offset
 390          * is needed at the next level up.  Hop up a level, and,
 391          * as long as stackp->runs is 0, keep merging.
 392          */
 393         IV runs = stackp->runs;
 394         if (runs == 0) {
 395             gptr *list1, *list2;
 396             iwhich = level & 1;
 397             list1 = which[iwhich];              /* area where runs are now */
 398             list2 = which[++iwhich];            /* area for merged runs */
 399             do {
 400                 gptr *l1, *l2, *tp2;
 401                 offset = stackp->offset;
 402                 f1 = p1 = list1 + offset;               /* start of first run */
 403                 p = tp2 = list2 + offset;       /* where merged run will go */
 404                 t = NEXT(p);                    /* where first run ends */
 405                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 406                 t = NEXT(t);                    /* where second runs ends */
 407                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 408                 offset = PNELEM(list2, t);
 409                 while (f1 < l1 && f2 < l2) {
 410                     /* If head 1 is larger than head 2, find ALL the elements
 411                     ** in list 2 strictly less than head1, write them all,
 412                     ** then head 1.  Then compare the new heads, and repeat,
 413                     ** until one or both lists are exhausted.
 414                     **
 415                     ** In all comparisons (after establishing
 416                     ** which head to merge) the item to merge
 417                     ** (at pointer q) is the first operand of
 418                     ** the comparison.  When we want to know
 419                     ** if "q is strictly less than the other",
 420                     ** we can't just do
 421                     **    cmp(q, other) < 0
 422                     ** because stability demands that we treat equality
 423                     ** as high when q comes from l2, and as low when
 424                     ** q was from l1.  So we ask the question by doing
 425                     **    cmp(q, other) <= sense
 426                     ** and make sense == 0 when equality should look low,
 427                     ** and -1 when equality should look high.
 428                     */
 429
 430                     gptr *q;
 431                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 432                         q = f2; b = f1; t = l1;
 433                         sense = -1;
 434                     } else {
 435                         q = f1; b = f2; t = l2;
 436                         sense = 0;
 437                     }
 438
 439
 440                     /* ramp up
 441                     **
 442                     ** Leave t at something strictly
 443                     ** greater than q (or at the end of the list),
 444                     ** and b at something strictly less than q.
 445                     */
 446                     for (i = 1, run = 0 ;;) {
 447                         if ((p = PINDEX(b, i)) >= t) {
 448                             /* off the end */
 449                             if (((p = PINDEX(t, -1)) > b) &&
 450                                 (cmp(aTHX_ *q, *p) <= sense))
 451                                  t = p;
 452                             else b = p;
 453                             break;
 454                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 455                             t = p;
 456                             break;
 457                         } else b = p;
 458                         if (++run >= RTHRESH) i += i;
 459                     }
 460
 461
 462                     /* q is known to follow b and must be inserted before t.
 463                     ** Increment b, so the range of possibilities is [b,t).
 464                     ** Round binary split down, to favor early appearance.
 465                     ** Adjust b and t until q belongs just before t.
 466                     */
 467
 468                     b++;
 469                     while (b < t) {
 470                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 471                         if (cmp(aTHX_ *q, *p) <= sense) {
 472                             t = p;
 473                         } else b = p + 1;
 474                     }
 475
 476
 477                     /* Copy all the strictly low elements */
 478
 479                     if (q == f1) {
 480                         FROMTOUPTO(f2, tp2, t);
 481                         *tp2++ = *f1++;
 482                     } else {
 483                         FROMTOUPTO(f1, tp2, t);
 484                         *tp2++ = *f2++;
 485                     }
 486                 }
 487
 488
 489                 /* Run out remaining list */
 490                 if (f1 == l1) {
 491                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 492                 } else              FROMTOUPTO(f1, tp2, l1);
 493                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 494
 495                 if (--level == 0) goto done;
 496                 --stackp;
 497                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 498             } while ((runs = stackp->runs) == 0);
 499         }
 500
 501
 502         stackp->runs = 0;               /* current run will finish level */
 503         /* While there are more than 2 runs remaining,
 504          * turn them into exactly 2 runs (at the "other" level),
 505          * each made up of approximately half the runs.
 506          * Stack the second half for later processing,
 507          * and set about producing the first half now.
 508          */
 509         while (runs > 2) {
 510             ++level;
 511             ++stackp;
 512             stackp->offset = offset;
 513             runs -= stackp->runs = runs / 2;
 514         }
 515         /* We must construct a single run from 1 or 2 runs.
 516          * All the original runs are in which[0] == base.
 517          * The run we construct must end up in which[level&1].
 518          */
 519         iwhich = level & 1;
 520         if (runs == 1) {
 521             /* Constructing a single run from a single run.
 522              * If it's where it belongs already, there's nothing to do.
 523              * Otherwise, copy it to where it belongs.
 524              * A run of 1 is either a singleton at level 0,
 525              * or the second half of a split 3.  In neither event
 526              * is it necessary to set offset.  It will be set by the merge
 527              * that immediately follows.
 528              */
 529             if (iwhich) {       /* Belongs in aux, currently in base */
 530                 f1 = b = PINDEX(base, offset);  /* where list starts */
 531                 f2 = PINDEX(aux, offset);       /* where list goes */
 532                 t = NEXT(f2);                   /* where list will end */
 533                 offset = PNELEM(aux, t);        /* offset thereof */
 534                 t = PINDEX(base, offset);       /* where it currently ends */
 535                 FROMTOUPTO(f1, f2, t);          /* copy */
 536                 NEXT(b) = t;                    /* set up parallel pointer */
 537             } else if (level == 0) goto done;   /* single run at level 0 */
 538         } else {
 539             /* Constructing a single run from two runs.
 540              * The merge code at the top will do that.
 541              * We need only make sure the two runs are in the "other" array,
 542              * so they'll end up in the correct array after the merge.
 543              */
 544             ++level;
 545             ++stackp;
 546             stackp->offset = offset;
 547             stackp->runs = 0;   /* take care of both runs, trigger merge */
 548             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 549                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 550                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 551                 t = NEXT(f2);                   /* where first run will end */
 552                 offset = PNELEM(aux, t);        /* offset thereof */
 553                 p = PINDEX(base, offset);       /* end of first run */
 554                 t = NEXT(t);                    /* where second run will end */
 555                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 556                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 557                 NEXT(b) = p;                    /* paralleled pointer for 1st */
 558                 NEXT(p) = t;                    /* ... and for second */
 559             }
 560         }
 561     }
 562   done:
 563     if (aux != small) Safefree(aux);    /* free iff allocated */
 564     if (savecmp != NULL) {
 565          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 566     }
 567     return;
 568 }
 569
 570 /*
 571  * The quicksort implementation was derived from source code contributed
 572  * by Tom Horsley.
 573  *
 574  * NOTE: this code was derived from Tom Horsley's qsort replacement
 575  * and should not be confused with the original code.
 576  */
 577
 578 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 579
 580    Permission granted to distribute under the same terms as perl which are
 581    (briefly):
 582
 583     This program is free software; you can redistribute it and/or modify
 584     it under the terms of either:
 585
 586         a) the GNU General Public License as published by the Free
 587         Software Foundation; either version 1, or (at your option) any
 588         later version, or
 589
 590         b) the "Artistic License" which comes with this Kit.
 591
 592    Details on the perl license can be found in the perl source code which
 593    may be located via the www.perl.com web page.
 594
 595    This is the most wonderfulest possible qsort I can come up with (and
 596    still be mostly portable) My (limited) tests indicate it consistently
 597    does about 20% fewer calls to compare than does the qsort in the Visual
 598    C++ library, other vendors may vary.
 599
 600    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 601    others I invented myself (or more likely re-invented since they seemed
 602    pretty obvious once I watched the algorithm operate for a while).
 603
 604    Most of this code was written while watching the Marlins sweep the Giants
 605    in the 1997 National League Playoffs - no Braves fans allowed to use this
 606    code (just kidding :-).
 607
 608    I realize that if I wanted to be true to the perl tradition, the only
 609    comment in this file would be something like:
 610
 611    ...they shuffled back towards the rear of the line. 'No, not at the
 612    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 613
 614    However, I really needed to violate that tradition just so I could keep
 615    track of what happens myself, not to mention some poor fool trying to
 616    understand this years from now :-).
 617 */
 618
 619 /* ********************************************************** Configuration */
 620
 621 #ifndef QSORT_ORDER_GUESS
 622 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 623 #endif
 624
 625 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 626    future processing - a good max upper bound is log base 2 of memory size
 627    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 628    safely be smaller than that since the program is taking up some space and
 629    most operating systems only let you grab some subset of contiguous
 630    memory (not to mention that you are normally sorting data larger than
 631    1 byte element size :-).
 632 */
 633 #ifndef QSORT_MAX_STACK
 634 #define QSORT_MAX_STACK 32
 635 #endif
 636
 637 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 638    Anything bigger and we use qsort. If you make this too small, the qsort
 639    will probably break (or become less efficient), because it doesn't expect
 640    the middle element of a partition to be the same as the right or left -
 641    you have been warned).
 642 */
 643 #ifndef QSORT_BREAK_EVEN
 644 #define QSORT_BREAK_EVEN 6
 645 #endif
 646
 647 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 648    to go quadratic on.  We innoculate larger partitions against
 649    quadratic behavior by shuffling them before sorting.  This is not
 650    an absolute guarantee of non-quadratic behavior, but it would take
 651    staggeringly bad luck to pick extreme elements as the pivot
 652    from randomized data.
 653 */
 654 #ifndef QSORT_PLAY_SAFE
 655 #define QSORT_PLAY_SAFE 255
 656 #endif
 657
 658 /* ************************************************************* Data Types */
 659
 660 /* hold left and right index values of a partition waiting to be sorted (the
 661    partition includes both left and right - right is NOT one past the end or
 662    anything like that).
 663 */
 664 struct partition_stack_entry {
 665    int left;
 666    int right;
 667 #ifdef QSORT_ORDER_GUESS
 668    int qsort_break_even;
 669 #endif
 670 };
 671
 672 /* ******************************************************* Shorthand Macros */
 673
 674 /* Note that these macros will be used from inside the qsort function where
 675    we happen to know that the variable 'elt_size' contains the size of an
 676    array element and the variable 'temp' points to enough space to hold a
 677    temp element and the variable 'array' points to the array being sorted
 678    and 'compare' is the pointer to the compare routine.
 679
 680    Also note that there are very many highly architecture specific ways
 681    these might be sped up, but this is simply the most generally portable
 682    code I could think of.
 683 */
 684
 685 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 686 */
 687 #define qsort_cmp(elt1, elt2) \
 688    ((*compare)(aTHX_ array[elt1], array[elt2]))
 689
 690 #ifdef QSORT_ORDER_GUESS
 691 #define QSORT_NOTICE_SWAP swapped++;
 692 #else
 693 #define QSORT_NOTICE_SWAP
 694 #endif
 695
 696 /* swaps contents of array elements elt1, elt2.
 697 */
 698 #define qsort_swap(elt1, elt2) \
 699    STMT_START { \
 700       QSORT_NOTICE_SWAP \
 701       temp = array[elt1]; \
 702       array[elt1] = array[elt2]; \
 703       array[elt2] = temp; \
 704    } STMT_END
 705
 706 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 707    elt3 and elt3 gets elt1.
 708 */
 709 #define qsort_rotate(elt1, elt2, elt3) \
 710    STMT_START { \
 711       QSORT_NOTICE_SWAP \
 712       temp = array[elt1]; \
 713       array[elt1] = array[elt2]; \
 714       array[elt2] = array[elt3]; \
 715       array[elt3] = temp; \
 716    } STMT_END
 717
 718 /* ************************************************************ Debug stuff */
 719
 720 #ifdef QSORT_DEBUG
 721
 722 static void
 723 break_here()
 724 {
 725    return; /* good place to set a breakpoint */
 726 }
 727
 728 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 729
 730 static void
 731 doqsort_all_asserts(
 732    void * array,
 733    size_t num_elts,
 734    size_t elt_size,
 735    int (*compare)(const void * elt1, const void * elt2),
 736    int pc_left, int pc_right, int u_left, int u_right)
 737 {
 738    int i;
 739
 740    qsort_assert(pc_left <= pc_right);
 741    qsort_assert(u_right < pc_left);
 742    qsort_assert(pc_right < u_left);
 743    for (i = u_right + 1; i < pc_left; ++i) {
 744       qsort_assert(qsort_cmp(i, pc_left) < 0);
 745    }
 746    for (i = pc_left; i < pc_right; ++i) {
 747       qsort_assert(qsort_cmp(i, pc_right) == 0);
 748    }
 749    for (i = pc_right + 1; i < u_left; ++i) {
 750       qsort_assert(qsort_cmp(pc_right, i) < 0);
 751    }
 752 }
 753
 754 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 755    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 756                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 757
 758 #else
 759
 760 #define qsort_assert(t) ((void)0)
 761
 762 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 763
 764 #endif
 765
 766 /*
 767 =head1 Array Manipulation Functions
 768
 769 =for apidoc sortsv
 770
 771 In-place sort an array of SV pointers with the given comparison routine.
 772
 773 Currently this always uses mergesort.  See C<L</sortsv_flags>> for a more
 774 flexible routine.
 775
 776 =cut
 777 */
 778
 779 void
 780 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
 781 {
 782     PERL_ARGS_ASSERT_SORTSV;
 783
 784     sortsv_flags(array, nmemb, cmp, 0);
 785 }
 786
 787 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
 788 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
 789 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
 790
 791 PP(pp_sort)
 792 {
 793     dSP; dMARK; dORIGMARK;
 794     SV **p1 = ORIGMARK+1, **p2;
 795     SSize_t max, i;
 796     AV* av = NULL;
 797     GV *gv;
 798     CV *cv = NULL;
 799     U8 gimme = GIMME_V;
 800     OP* const nextop = PL_op->op_next;
 801     I32 overloading = 0;
 802     bool hasargs = FALSE;
 803     bool copytmps;
 804     I32 is_xsub = 0;
 805     const U8 priv = PL_op->op_private;
 806     const U8 flags = PL_op->op_flags;
 807     U32 sort_flags = 0;
 808     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 809       = Perl_sortsv_flags;
 810     I32 all_SIVs = 1;
 811
 812     if ((priv & OPpSORT_DESCEND) != 0)
 813         sort_flags |= SORTf_DESC;
 814     if ((priv & OPpSORT_STABLE) != 0)
 815         sort_flags |= SORTf_STABLE;
 816     if ((priv & OPpSORT_UNSTABLE) != 0)
 817         sort_flags |= SORTf_UNSTABLE;
 818
 819     if (gimme != G_ARRAY) {
 820         SP = MARK;
 821         EXTEND(SP,1);
 822         RETPUSHUNDEF;
 823     }
 824
 825     ENTER;
 826     SAVEVPTR(PL_sortcop);
 827     if (flags & OPf_STACKED) {
 828         if (flags & OPf_SPECIAL) {
 829             OP *nullop = OpSIBLING(cLISTOP->op_first);  /* pass pushmark */
 830             assert(nullop->op_type == OP_NULL);
 831             PL_sortcop = nullop->op_next;
 832         }
 833         else {
 834             GV *autogv = NULL;
 835             HV *stash;
 836             cv = sv_2cv(*++MARK, &stash, &gv, GV_ADD);
 837           check_cv:
 838             if (cv && SvPOK(cv)) {
 839                 const char * const proto = SvPV_nolen_const(MUTABLE_SV(cv));
 840                 if (proto && strEQ(proto, "$$")) {
 841                     hasargs = TRUE;
 842                 }
 843             }
 844             if (cv && CvISXSUB(cv) && CvXSUB(cv)) {
 845                 is_xsub = 1;
 846             }
 847             else if (!(cv && CvROOT(cv))) {
 848                 if (gv) {
 849                     goto autoload;
 850                 }
 851                 else if (!CvANON(cv) && (gv = CvGV(cv))) {
 852                   if (cv != GvCV(gv)) cv = GvCV(gv);
 853                  autoload:
 854                   if (!autogv && (
 855                         autogv = gv_autoload_pvn(
 856                             GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv),
 857                             GvNAMEUTF8(gv) ? SVf_UTF8 : 0
 858                         )
 859                      )) {
 860                     cv = GvCVu(autogv);
 861                     goto check_cv;
 862                   }
 863                   else {
 864                     SV *tmpstr = sv_newmortal();
 865                     gv_efullname3(tmpstr, gv, NULL);
 866                     DIE(aTHX_ "Undefined sort subroutine \"%" SVf "\" called",
 867                         SVfARG(tmpstr));
 868                   }
 869                 }
 870                 else {
 871                     DIE(aTHX_ "Undefined subroutine in sort");
 872                 }
 873             }
 874
 875             if (is_xsub)
 876                 PL_sortcop = (OP*)cv;
 877             else
 878                 PL_sortcop = CvSTART(cv);
 879         }
 880     }
 881     else {
 882         PL_sortcop = NULL;
 883     }
 884
 885     /* optimiser converts "@a = sort @a" to "sort \@a".  In this case,
 886      * push (@a) onto stack, then assign result back to @a at the end of
 887      * this function */
 888     if (priv & OPpSORT_INPLACE) {
 889         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
 890         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
 891         av = MUTABLE_AV((*SP));
 892         if (SvREADONLY(av))
 893             Perl_croak_no_modify();
 894         max = AvFILL(av) + 1;
 895         MEXTEND(SP, max);
 896         if (SvMAGICAL(av)) {
 897             for (i=0; i < max; i++) {
 898                 SV **svp = av_fetch(av, i, FALSE);
 899                 *SP++ = (svp) ? *svp : NULL;
 900             }
 901         }
 902         else {
 903             SV **svp = AvARRAY(av);
 904             assert(svp || max == 0);
 905             for (i = 0; i < max; i++)
 906                 *SP++ = *svp++;
 907         }
 908         SP--;
 909         p1 = p2 = SP - (max-1);
 910     }
 911     else {
 912         p2 = MARK+1;
 913         max = SP - MARK;
 914    }
 915
 916     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
 917      * any nulls; also stringify or converting to integer or number as
 918      * required any args */
 919     copytmps = cBOOL(PL_sortcop);
 920     for (i=max; i > 0 ; i--) {
 921         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
 922             if (copytmps && SvPADTMP(*p1)) {
 923                 *p1 = sv_mortalcopy(*p1);
 924             }
 925             SvTEMP_off(*p1);
 926             if (!PL_sortcop) {
 927                 if (priv & OPpSORT_NUMERIC) {
 928                     if (priv & OPpSORT_INTEGER) {
 929                         if (!SvIOK(*p1))
 930                             (void)sv_2iv_flags(*p1, SV_GMAGIC|SV_SKIP_OVERLOAD);
 931                     }
 932                     else {
 933                         if (!SvNSIOK(*p1))
 934                             (void)sv_2nv_flags(*p1, SV_GMAGIC|SV_SKIP_OVERLOAD);
 935                         if (all_SIVs && !SvSIOK(*p1))
 936                             all_SIVs = 0;
 937                     }
 938                 }
 939                 else {
 940                     if (!SvPOK(*p1))
 941                         (void)sv_2pv_flags(*p1, 0,
 942                             SV_GMAGIC|SV_CONST_RETURN|SV_SKIP_OVERLOAD);
 943                 }
 944                 if (SvAMAGIC(*p1))
 945                     overloading = 1;
 946             }
 947             p1++;
 948         }
 949         else
 950             max--;
 951     }
 952     if (max > 1) {
 953         SV **start;
 954         if (PL_sortcop) {
 955             PERL_CONTEXT *cx;
 956             const bool oldcatch = CATCH_GET;
 957             I32 old_savestack_ix = PL_savestack_ix;
 958
 959             SAVEOP();
 960
 961             CATCH_SET(TRUE);
 962             PUSHSTACKi(PERLSI_SORT);
 963             if (!hasargs && !is_xsub) {
 964                 SAVEGENERICSV(PL_firstgv);
 965                 SAVEGENERICSV(PL_secondgv);
 966                 PL_firstgv = MUTABLE_GV(SvREFCNT_inc(
 967                     gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV)
 968                 ));
 969                 PL_secondgv = MUTABLE_GV(SvREFCNT_inc(
 970                     gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV)
 971                 ));
 972                 /* make sure the GP isn't removed out from under us for
 973                  * the SAVESPTR() */
 974                 save_gp(PL_firstgv, 0);
 975                 save_gp(PL_secondgv, 0);
 976                 /* we don't want modifications localized */
 977                 GvINTRO_off(PL_firstgv);
 978                 GvINTRO_off(PL_secondgv);
 979                 SAVEGENERICSV(GvSV(PL_firstgv));
 980                 SvREFCNT_inc(GvSV(PL_firstgv));
 981                 SAVEGENERICSV(GvSV(PL_secondgv));
 982                 SvREFCNT_inc(GvSV(PL_secondgv));
 983             }
 984
 985             gimme = G_SCALAR;
 986             cx = cx_pushblock(CXt_NULL, gimme, PL_stack_base, old_savestack_ix);
 987             if (!(flags & OPf_SPECIAL)) {
 988                 cx->cx_type = CXt_SUB|CXp_MULTICALL;
 989                 cx_pushsub(cx, cv, NULL, hasargs);
 990                 if (!is_xsub) {
 991                     PADLIST * const padlist = CvPADLIST(cv);
 992
 993                     if (++CvDEPTH(cv) >= 2)
 994                         pad_push(padlist, CvDEPTH(cv));
 995                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
 996
 997                     if (hasargs) {
 998                         /* This is mostly copied from pp_entersub */
 999                         AV * const av = MUTABLE_AV(PAD_SVl(0));
1000
1001                         cx->blk_sub.savearray = GvAV(PL_defgv);
1002                         GvAV(PL_defgv) = MUTABLE_AV(SvREFCNT_inc_simple(av));
1003                     }
1004
1005                 }
1006             }
1007
1008             start = p1 - max;
1009             sortsvp(aTHX_ start, max,
1010                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
1011                     sort_flags);
1012
1013             /* Reset cx, in case the context stack has been reallocated. */
1014             cx = CX_CUR();
1015
1016             PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1017
1018             CX_LEAVE_SCOPE(cx);
1019             if (!(flags & OPf_SPECIAL)) {
1020                 assert(CxTYPE(cx) == CXt_SUB);
1021                 cx_popsub(cx);
1022             }
1023             else
1024                 assert(CxTYPE(cx) == CXt_NULL);
1025                 /* there isn't a POPNULL ! */
1026
1027             cx_popblock(cx);
1028             CX_POP(cx);
1029             POPSTACK;
1030             CATCH_SET(oldcatch);
1031         }
1032         else {
1033             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1034             start = ORIGMARK+1;
1035             sortsvp(aTHX_ start, max,
1036                     (priv & OPpSORT_NUMERIC)
1037                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1038                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1039                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1040                         : (
1041 #ifdef USE_LOCALE_COLLATE
1042                            IN_LC_RUNTIME(LC_COLLATE)
1043                             ? ( overloading
1044                                 ? (SVCOMPARE_t)S_amagic_cmp_locale
1045                                 : (SVCOMPARE_t)sv_cmp_locale_static)
1046                             :
1047 #endif
1048                               ( overloading ? (SVCOMPARE_t)S_amagic_cmp : (SVCOMPARE_t)sv_cmp_static)),
1049                     sort_flags);
1050         }
1051         if ((priv & OPpSORT_REVERSE) != 0) {
1052             SV **q = start+max-1;
1053             while (start < q) {
1054                 SV * const tmp = *start;
1055                 *start++ = *q;
1056                 *q-- = tmp;
1057             }
1058         }
1059     }
1060
1061     if (av) {
1062         /* copy back result to the array */
1063         SV** const base = MARK+1;
1064         if (SvMAGICAL(av)) {
1065             for (i = 0; i < max; i++)
1066                 base[i] = newSVsv(base[i]);
1067             av_clear(av);
1068             av_extend(av, max);
1069             for (i=0; i < max; i++) {
1070                 SV * const sv = base[i];
1071                 SV ** const didstore = av_store(av, i, sv);
1072                 if (SvSMAGICAL(sv))
1073                     mg_set(sv);
1074                 if (!didstore)
1075                     sv_2mortal(sv);
1076             }
1077         }
1078         else {
1079             /* the elements of av are likely to be the same as the
1080              * (non-refcounted) elements on the stack, just in a different
1081              * order. However, its possible that someone's messed with av
1082              * in the meantime. So bump and unbump the relevant refcounts
1083              * first.
1084              */
1085             for (i = 0; i < max; i++) {
1086                 SV *sv = base[i];
1087                 assert(sv);
1088                 if (SvREFCNT(sv) > 1)
1089                     base[i] = newSVsv(sv);
1090                 else
1091                     SvREFCNT_inc_simple_void_NN(sv);
1092             }
1093             av_clear(av);
1094             if (max > 0) {
1095                 av_extend(av, max);
1096                 Copy(base, AvARRAY(av), max, SV*);
1097             }
1098             AvFILLp(av) = max - 1;
1099             AvREIFY_off(av);
1100             AvREAL_on(av);
1101         }
1102     }
1103     LEAVE;
1104     PL_stack_sp = ORIGMARK +  max;
1105     return nextop;
1106 }
1107
1108 static I32
1109 S_sortcv(pTHX_ SV *const a, SV *const b)
1110 {
1111     const I32 oldsaveix = PL_savestack_ix;
1112     I32 result;
1113     PMOP * const pm = PL_curpm;
1114     COP * const cop = PL_curcop;
1115     SV *olda, *oldb;
1116
1117     PERL_ARGS_ASSERT_SORTCV;
1118
1119     olda = GvSV(PL_firstgv);
1120     GvSV(PL_firstgv) = SvREFCNT_inc_simple_NN(a);
1121     SvREFCNT_dec(olda);
1122     oldb = GvSV(PL_secondgv);
1123     GvSV(PL_secondgv) = SvREFCNT_inc_simple_NN(b);
1124     SvREFCNT_dec(oldb);
1125     PL_stack_sp = PL_stack_base;
1126     PL_op = PL_sortcop;
1127     CALLRUNOPS(aTHX);
1128     PL_curcop = cop;
1129     /* entry zero of a stack is always PL_sv_undef, which
1130      * simplifies converting a '()' return into undef in scalar context */
1131     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
1132     result = SvIV(*PL_stack_sp);
1133
1134     LEAVE_SCOPE(oldsaveix);
1135     PL_curpm = pm;
1136     return result;
1137 }
1138
1139 static I32
1140 S_sortcv_stacked(pTHX_ SV *const a, SV *const b)
1141 {
1142     const I32 oldsaveix = PL_savestack_ix;
1143     I32 result;
1144     AV * const av = GvAV(PL_defgv);
1145     PMOP * const pm = PL_curpm;
1146     COP * const cop = PL_curcop;
1147
1148     PERL_ARGS_ASSERT_SORTCV_STACKED;
1149
1150     if (AvREAL(av)) {
1151         av_clear(av);
1152         AvREAL_off(av);
1153         AvREIFY_on(av);
1154     }
1155     if (AvMAX(av) < 1) {
1156         SV **ary = AvALLOC(av);
1157         if (AvARRAY(av) != ary) {
1158             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1159             AvARRAY(av) = ary;
1160         }
1161         if (AvMAX(av) < 1) {
1162             Renew(ary,2,SV*);
1163             AvMAX(av) = 1;
1164             AvARRAY(av) = ary;
1165             AvALLOC(av) = ary;
1166         }
1167     }
1168     AvFILLp(av) = 1;
1169
1170     AvARRAY(av)[0] = a;
1171     AvARRAY(av)[1] = b;
1172     PL_stack_sp = PL_stack_base;
1173     PL_op = PL_sortcop;
1174     CALLRUNOPS(aTHX);
1175     PL_curcop = cop;
1176     /* entry zero of a stack is always PL_sv_undef, which
1177      * simplifies converting a '()' return into undef in scalar context */
1178     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
1179     result = SvIV(*PL_stack_sp);
1180
1181     LEAVE_SCOPE(oldsaveix);
1182     PL_curpm = pm;
1183     return result;
1184 }
1185
1186 static I32
1187 S_sortcv_xsub(pTHX_ SV *const a, SV *const b)
1188 {
1189     dSP;
1190     const I32 oldsaveix = PL_savestack_ix;
1191     CV * const cv=MUTABLE_CV(PL_sortcop);
1192     I32 result;
1193     PMOP * const pm = PL_curpm;
1194
1195     PERL_ARGS_ASSERT_SORTCV_XSUB;
1196
1197     SP = PL_stack_base;
1198     PUSHMARK(SP);
1199     EXTEND(SP, 2);
1200     *++SP = a;
1201     *++SP = b;
1202     PUTBACK;
1203     (void)(*CvXSUB(cv))(aTHX_ cv);
1204     /* entry zero of a stack is always PL_sv_undef, which
1205      * simplifies converting a '()' return into undef in scalar context */
1206     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
1207     result = SvIV(*PL_stack_sp);
1208
1209     LEAVE_SCOPE(oldsaveix);
1210     PL_curpm = pm;
1211     return result;
1212 }
1213
1214
1215 static I32
1216 S_sv_ncmp(pTHX_ SV *const a, SV *const b)
1217 {
1218     I32 cmp = do_ncmp(a, b);
1219
1220     PERL_ARGS_ASSERT_SV_NCMP;
1221
1222     if (cmp == 2) {
1223         if (ckWARN(WARN_UNINITIALIZED)) report_uninit(NULL);
1224         return 0;
1225     }
1226
1227     return cmp;
1228 }
1229
1230 static I32
1231 S_sv_i_ncmp(pTHX_ SV *const a, SV *const b)
1232 {
1233     const IV iv1 = SvIV(a);
1234     const IV iv2 = SvIV(b);
1235
1236     PERL_ARGS_ASSERT_SV_I_NCMP;
1237
1238     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1239 }
1240
1241 #define tryCALL_AMAGICbin(left,right,meth) \
1242     (SvAMAGIC(left)||SvAMAGIC(right)) \
1243         ? amagic_call(left, right, meth, 0) \
1244         : NULL;
1245
1246 #define SORT_NORMAL_RETURN_VALUE(val)  (((val) > 0) ? 1 : ((val) ? -1 : 0))
1247
1248 static I32
1249 S_amagic_ncmp(pTHX_ SV *const a, SV *const b)
1250 {
1251     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp_amg);
1252
1253     PERL_ARGS_ASSERT_AMAGIC_NCMP;
1254
1255     if (tmpsv) {
1256         if (SvIOK(tmpsv)) {
1257             const I32 i = SvIVX(tmpsv);
1258             return SORT_NORMAL_RETURN_VALUE(i);
1259         }
1260         else {
1261             const NV d = SvNV(tmpsv);
1262             return SORT_NORMAL_RETURN_VALUE(d);
1263         }
1264      }
1265      return S_sv_ncmp(aTHX_ a, b);
1266 }
1267
1268 static I32
1269 S_amagic_i_ncmp(pTHX_ SV *const a, SV *const b)
1270 {
1271     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp_amg);
1272
1273     PERL_ARGS_ASSERT_AMAGIC_I_NCMP;
1274
1275     if (tmpsv) {
1276         if (SvIOK(tmpsv)) {
1277             const I32 i = SvIVX(tmpsv);
1278             return SORT_NORMAL_RETURN_VALUE(i);
1279         }
1280         else {
1281             const NV d = SvNV(tmpsv);
1282             return SORT_NORMAL_RETURN_VALUE(d);
1283         }
1284     }
1285     return S_sv_i_ncmp(aTHX_ a, b);
1286 }
1287
1288 static I32
1289 S_amagic_cmp(pTHX_ SV *const str1, SV *const str2)
1290 {
1291     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp_amg);
1292
1293     PERL_ARGS_ASSERT_AMAGIC_CMP;
1294
1295     if (tmpsv) {
1296         if (SvIOK(tmpsv)) {
1297             const I32 i = SvIVX(tmpsv);
1298             return SORT_NORMAL_RETURN_VALUE(i);
1299         }
1300         else {
1301             const NV d = SvNV(tmpsv);
1302             return SORT_NORMAL_RETURN_VALUE(d);
1303         }
1304     }
1305     return sv_cmp(str1, str2);
1306 }
1307
1308 #ifdef USE_LOCALE_COLLATE
1309
1310 static I32
1311 S_amagic_cmp_locale(pTHX_ SV *const str1, SV *const str2)
1312 {
1313     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp_amg);
1314
1315     PERL_ARGS_ASSERT_AMAGIC_CMP_LOCALE;
1316
1317     if (tmpsv) {
1318         if (SvIOK(tmpsv)) {
1319             const I32 i = SvIVX(tmpsv);
1320             return SORT_NORMAL_RETURN_VALUE(i);
1321         }
1322         else {
1323             const NV d = SvNV(tmpsv);
1324             return SORT_NORMAL_RETURN_VALUE(d);
1325         }
1326     }
1327     return sv_cmp_locale(str1, str2);
1328 }
1329
1330 #endif
1331
1332 /*
1333  * ex: set ts=8 sts=4 sw=4 et:
1334  */