pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line.  'No, not at the
  13  *   rear!' the slave-driver shouted.  'Three files up. And stay there...
  14  *
  15  *     [p.931 of _The Lord of the Rings_, VI/ii: "The Land of Shadow"]
  16  */
  17
  18 /* This file contains pp ("push/pop") functions that
  19  * execute the opcodes that make up a perl program. A typical pp function
  20  * expects to find its arguments on the stack, and usually pushes its
  21  * results onto the stack, hence the 'pp' terminology. Each OP structure
  22  * contains a pointer to the relevant pp_foo() function.
  23  *
  24  * This particular file just contains pp_sort(), which is complex
  25  * enough to merit its own file! See the other pp*.c files for the rest of
  26  * the pp_ functions.
  27  */
  28
  29 #include "EXTERN.h"
  30 #define PERL_IN_PP_SORT_C
  31 #include "perl.h"
  32
  33 #define sv_cmp_static Perl_sv_cmp
  34 #define sv_cmp_locale_static Perl_sv_cmp_locale
  35
  36 #ifndef SMALLSORT
  37 #define SMALLSORT (200)
  38 #endif
  39
  40 /* Flags for sortsv_flags */
  41 #define SORTf_DESC   1
  42 #define SORTf_STABLE 2
  43 #define SORTf_UNSTABLE 8
  44
  45 /*
  46  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  47  *
  48  * The original code was written in conjunction with BSD Computer Software
  49  * Research Group at University of California, Berkeley.
  50  *
  51  * See also: "Optimistic Sorting and Information Theoretic Complexity"
  52  *           Peter McIlroy
  53  *           SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
  54  *           pp 467-474, Austin, Texas, 25-27 January 1993.
  55  *
  56  * The integration to Perl is by John P. Linderman <jpl.jpl@gmail.com>.
  57  *
  58  * The code can be distributed under the same terms as Perl itself.
  59  *
  60  */
  61
  62
  63 typedef char * aptr;            /* pointer for arithmetic on sizes */
  64 typedef SV * gptr;              /* pointers in our lists */
  65
  66 /* Binary merge internal sort, with a few special mods
  67 ** for the special perl environment it now finds itself in.
  68 **
  69 ** Things that were once options have been hotwired
  70 ** to values suitable for this use.  In particular, we'll always
  71 ** initialize looking for natural runs, we'll always produce stable
  72 ** output, and we'll always do Peter McIlroy's binary merge.
  73 */
  74
  75 /* Pointer types for arithmetic and storage and convenience casts */
  76
  77 #define APTR(P) ((aptr)(P))
  78 #define GPTP(P) ((gptr *)(P))
  79 #define GPPP(P) ((gptr **)(P))
  80
  81
  82 /* byte offset from pointer P to (larger) pointer Q */
  83 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  84
  85 #define PSIZE sizeof(gptr)
  86
  87 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  88
  89 #ifdef  PSHIFT
  90 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  91 #define PNBYTE(N)       ((N) << (PSHIFT))
  92 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  93 #else
  94 /* Leave optimization to compiler */
  95 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  96 #define PNBYTE(N)       ((N) * (PSIZE))
  97 #define PINDEX(P, N)    (GPTP(P) + (N))
  98 #endif
  99
 100 /* Pointer into other corresponding to pointer into this */
 101 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 102
 103 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 104
 105
 106 /* Runs are identified by a pointer in the auxiliary list.
 107 ** The pointer is at the start of the list,
 108 ** and it points to the start of the next list.
 109 ** NEXT is used as an lvalue, too.
 110 */
 111
 112 #define NEXT(P)         (*GPPP(P))
 113
 114
 115 /* PTHRESH is the minimum number of pairs with the same sense to justify
 116 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 117 ** not just elements, so PTHRESH == 8 means a run of 16.
 118 */
 119
 120 #define PTHRESH (8)
 121
 122 /* RTHRESH is the number of elements in a run that must compare low
 123 ** to the low element from the opposing run before we justify
 124 ** doing a binary rampup instead of single stepping.
 125 ** In random input, N in a row low should only happen with
 126 ** probability 2^(1-N), so we can risk that we are dealing
 127 ** with orderly input without paying much when we aren't.
 128 */
 129
 130 #define RTHRESH (6)
 131
 132
 133 /*
 134 ** Overview of algorithm and variables.
 135 ** The array of elements at list1 will be organized into runs of length 2,
 136 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 137 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 138 **
 139 ** Unless otherwise specified, pair pointers address the first of two elements.
 140 **
 141 ** b and b+1 are a pair that compare with sense "sense".
 142 ** b is the "bottom" of adjacent pairs that might form a longer run.
 143 **
 144 ** p2 parallels b in the list2 array, where runs are defined by
 145 ** a pointer chain.
 146 **
 147 ** t represents the "top" of the adjacent pairs that might extend
 148 ** the run beginning at b.  Usually, t addresses a pair
 149 ** that compares with opposite sense from (b,b+1).
 150 ** However, it may also address a singleton element at the end of list1,
 151 ** or it may be equal to "last", the first element beyond list1.
 152 **
 153 ** r addresses the Nth pair following b.  If this would be beyond t,
 154 ** we back it off to t.  Only when r is less than t do we consider the
 155 ** run long enough to consider checking.
 156 **
 157 ** q addresses a pair such that the pairs at b through q already form a run.
 158 ** Often, q will equal b, indicating we only are sure of the pair itself.
 159 ** However, a search on the previous cycle may have revealed a longer run,
 160 ** so q may be greater than b.
 161 **
 162 ** p is used to work back from a candidate r, trying to reach q,
 163 ** which would mean b through r would be a run.  If we discover such a run,
 164 ** we start q at r and try to push it further towards t.
 165 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 166 ** In any event, after the check (if any), we have two main cases.
 167 **
 168 ** 1) Short run.  b <= q < p <= r <= t.
 169 **      b through q is a run (perhaps trivial)
 170 **      q through p are uninteresting pairs
 171 **      p through r is a run
 172 **
 173 ** 2) Long run.  b < r <= q < t.
 174 **      b through q is a run (of length >= 2 * PTHRESH)
 175 **
 176 ** Note that degenerate cases are not only possible, but likely.
 177 ** For example, if the pair following b compares with opposite sense,
 178 ** then b == q < p == r == t.
 179 */
 180
 181
 182 static IV
 183 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, const SVCOMPARE_t cmp)
 184 {
 185     I32 sense;
 186     gptr *b, *p, *q, *t, *p2;
 187     gptr *last, *r;
 188     IV runs = 0;
 189
 190     b = list1;
 191     last = PINDEX(b, nmemb);
 192     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 193     for (p2 = list2; b < last; ) {
 194         /* We just started, or just reversed sense.
 195         ** Set t at end of pairs with the prevailing sense.
 196         */
 197         for (p = b+2, t = p; ++p < last; t = ++p) {
 198             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 199         }
 200         q = b;
 201         /* Having laid out the playing field, look for long runs */
 202         do {
 203             p = r = b + (2 * PTHRESH);
 204             if (r >= t) p = r = t;      /* too short to care about */
 205             else {
 206                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 207                        ((p -= 2) > q)) {}
 208                 if (p <= q) {
 209                     /* b through r is a (long) run.
 210                     ** Extend it as far as possible.
 211                     */
 212                     p = q = r;
 213                     while (((p += 2) < t) &&
 214                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 215                     r = p = q + 2;      /* no simple pairs, no after-run */
 216                 }
 217             }
 218             if (q > b) {                /* run of greater than 2 at b */
 219                 gptr *savep = p;
 220
 221                 p = q += 2;
 222                 /* pick up singleton, if possible */
 223                 if ((p == t) &&
 224                     ((t + 1) == last) &&
 225                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 226                     savep = r = p = q = last;
 227                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 228                 if (sense)
 229                     while (b < --p) {
 230                         const gptr c = *b;
 231                         *b++ = *p;
 232                         *p = c;
 233                     }
 234                 p = savep;
 235             }
 236             while (q < p) {             /* simple pairs */
 237                 p2 = NEXT(p2) = p2 + 2; ++runs;
 238                 if (sense) {
 239                     const gptr c = *q++;
 240                     *(q-1) = *q;
 241                     *q++ = c;
 242                 } else q += 2;
 243             }
 244             if (((b = p) == t) && ((t+1) == last)) {
 245                 NEXT(p2) = p2 + 1; ++runs;
 246                 b++;
 247             }
 248             q = r;
 249         } while (b < t);
 250         sense = !sense;
 251     }
 252     return runs;
 253 }
 254
 255
 256 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 257  * qsort on many platforms, but slower than qsort, conspicuously so,
 258  * on others.  The most likely explanation was platform-specific
 259  * differences in cache sizes and relative speeds.
 260  *
 261  * The quicksort divide-and-conquer algorithm guarantees that, as the
 262  * problem is subdivided into smaller and smaller parts, the parts
 263  * fit into smaller (and faster) caches.  So it doesn't matter how
 264  * many levels of cache exist, quicksort will "find" them, and,
 265  * as long as smaller is faster, take advantage of them.
 266  *
 267  * By contrast, consider how the original mergesort algorithm worked.
 268  * Suppose we have five runs (each typically of length 2 after dynprep).
 269  *
 270  * pass               base                        aux
 271  *  0              1 2 3 4 5
 272  *  1                                           12 34 5
 273  *  2                1234 5
 274  *  3                                            12345
 275  *  4                 12345
 276  *
 277  * Adjacent pairs are merged in "grand sweeps" through the input.
 278  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 279  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 280  * The only cache that matters is one large enough to hold *all* the input.
 281  * On some platforms, this may be many times slower than smaller caches.
 282  *
 283  * The following pseudo-code uses the same basic merge algorithm,
 284  * but in a divide-and-conquer way.
 285  *
 286  * # merge $runs runs at offset $offset of list $list1 into $list2.
 287  * # all unmerged runs ($runs == 1) originate in list $base.
 288  * sub mgsort2 {
 289  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 290  *
 291  *     if ($runs == 1) {
 292  *         if ($list1 is $base) copy run to $list2
 293  *         return offset of end of list (or copy)
 294  *     } else {
 295  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 296  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 297  *         merge the adjacent runs at $offset of $list1 into $list2
 298  *         return the offset of the end of the merged runs
 299  *     }
 300  * }
 301  * mgsort2(0, $runs, $base, $aux, $base);
 302  *
 303  * For our 5 runs, the tree of calls looks like
 304  *
 305  *           5
 306  *      3        2
 307  *   2     1   1   1
 308  * 1   1
 309  *
 310  * 1   2   3   4   5
 311  *
 312  * and the corresponding activity looks like
 313  *
 314  * copy runs 1 and 2 from base to aux
 315  * merge runs 1 and 2 from aux to base
 316  * (run 3 is where it belongs, no copy needed)
 317  * merge runs 12 and 3 from base to aux
 318  * (runs 4 and 5 are where they belong, no copy needed)
 319  * merge runs 4 and 5 from base to aux
 320  * merge runs 123 and 45 from aux to base
 321  *
 322  * Note that we merge runs 1 and 2 immediately after copying them,
 323  * while they are still likely to be in fast cache.  Similarly,
 324  * run 3 is merged with run 12 while it still may be lingering in cache.
 325  * This implementation should therefore enjoy much of the cache-friendly
 326  * behavior that quicksort does.  In addition, it does less copying
 327  * than the original mergesort implementation (only runs 1 and 2 are copied)
 328  * and the "balancing" of merges is better (merged runs comprise more nearly
 329  * equal numbers of original runs).
 330  *
 331  * The actual cache-friendly implementation will use a pseudo-stack
 332  * to avoid recursion, and will unroll processing of runs of length 2,
 333  * but it is otherwise similar to the recursive implementation.
 334  */
 335
 336 typedef struct {
 337     IV  offset;         /* offset of 1st of 2 runs at this level */
 338     IV  runs;           /* how many runs must be combined into 1 */
 339 } off_runs;             /* pseudo-stack element */
 340
 341
 342 static I32
 343 cmp_desc(pTHX_ gptr const a, gptr const b)
 344 {
 345     return -PL_sort_RealCmp(aTHX_ a, b);
 346 }
 347
 348 /*
 349 =head1 SV Manipulation Functions
 350
 351 =for apidoc sortsv_flags
 352
 353 In-place sort an array of SV pointers with the given comparison routine,
 354 with various SORTf_* flag options.
 355
 356 =cut
 357 */
 358 void
 359 Perl_sortsv_flags(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 360 {
 361     IV i, run, offset;
 362     I32 sense, level;
 363     gptr *f1, *f2, *t, *b, *p;
 364     int iwhich;
 365     gptr *aux;
 366     gptr *p1;
 367     gptr small[SMALLSORT];
 368     gptr *which[3];
 369     off_runs stack[60], *stackp;
 370     SVCOMPARE_t savecmp = NULL;
 371
 372     PERL_ARGS_ASSERT_SORTSV_FLAGS;
 373     if (nmemb <= 1) return;                     /* sorted trivially */
 374
 375     if ((flags & SORTf_DESC) != 0) {
 376         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 377         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 378         cmp = cmp_desc;
 379     }
 380
 381     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 382     else { Newx(aux,nmemb,gptr); }              /* allocate auxiliary array */
 383     level = 0;
 384     stackp = stack;
 385     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 386     stackp->offset = offset = 0;
 387     which[0] = which[2] = base;
 388     which[1] = aux;
 389     for (;;) {
 390         /* On levels where both runs have be constructed (stackp->runs == 0),
 391          * merge them, and note the offset of their end, in case the offset
 392          * is needed at the next level up.  Hop up a level, and,
 393          * as long as stackp->runs is 0, keep merging.
 394          */
 395         IV runs = stackp->runs;
 396         if (runs == 0) {
 397             gptr *list1, *list2;
 398             iwhich = level & 1;
 399             list1 = which[iwhich];              /* area where runs are now */
 400             list2 = which[++iwhich];            /* area for merged runs */
 401             do {
 402                 gptr *l1, *l2, *tp2;
 403                 offset = stackp->offset;
 404                 f1 = p1 = list1 + offset;               /* start of first run */
 405                 p = tp2 = list2 + offset;       /* where merged run will go */
 406                 t = NEXT(p);                    /* where first run ends */
 407                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 408                 t = NEXT(t);                    /* where second runs ends */
 409                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 410                 offset = PNELEM(list2, t);
 411                 while (f1 < l1 && f2 < l2) {
 412                     /* If head 1 is larger than head 2, find ALL the elements
 413                     ** in list 2 strictly less than head1, write them all,
 414                     ** then head 1.  Then compare the new heads, and repeat,
 415                     ** until one or both lists are exhausted.
 416                     **
 417                     ** In all comparisons (after establishing
 418                     ** which head to merge) the item to merge
 419                     ** (at pointer q) is the first operand of
 420                     ** the comparison.  When we want to know
 421                     ** if "q is strictly less than the other",
 422                     ** we can't just do
 423                     **    cmp(q, other) < 0
 424                     ** because stability demands that we treat equality
 425                     ** as high when q comes from l2, and as low when
 426                     ** q was from l1.  So we ask the question by doing
 427                     **    cmp(q, other) <= sense
 428                     ** and make sense == 0 when equality should look low,
 429                     ** and -1 when equality should look high.
 430                     */
 431
 432                     gptr *q;
 433                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 434                         q = f2; b = f1; t = l1;
 435                         sense = -1;
 436                     } else {
 437                         q = f1; b = f2; t = l2;
 438                         sense = 0;
 439                     }
 440
 441
 442                     /* ramp up
 443                     **
 444                     ** Leave t at something strictly
 445                     ** greater than q (or at the end of the list),
 446                     ** and b at something strictly less than q.
 447                     */
 448                     for (i = 1, run = 0 ;;) {
 449                         if ((p = PINDEX(b, i)) >= t) {
 450                             /* off the end */
 451                             if (((p = PINDEX(t, -1)) > b) &&
 452                                 (cmp(aTHX_ *q, *p) <= sense))
 453                                  t = p;
 454                             else b = p;
 455                             break;
 456                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 457                             t = p;
 458                             break;
 459                         } else b = p;
 460                         if (++run >= RTHRESH) i += i;
 461                     }
 462
 463
 464                     /* q is known to follow b and must be inserted before t.
 465                     ** Increment b, so the range of possibilities is [b,t).
 466                     ** Round binary split down, to favor early appearance.
 467                     ** Adjust b and t until q belongs just before t.
 468                     */
 469
 470                     b++;
 471                     while (b < t) {
 472                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 473                         if (cmp(aTHX_ *q, *p) <= sense) {
 474                             t = p;
 475                         } else b = p + 1;
 476                     }
 477
 478
 479                     /* Copy all the strictly low elements */
 480
 481                     if (q == f1) {
 482                         FROMTOUPTO(f2, tp2, t);
 483                         *tp2++ = *f1++;
 484                     } else {
 485                         FROMTOUPTO(f1, tp2, t);
 486                         *tp2++ = *f2++;
 487                     }
 488                 }
 489
 490
 491                 /* Run out remaining list */
 492                 if (f1 == l1) {
 493                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 494                 } else              FROMTOUPTO(f1, tp2, l1);
 495                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 496
 497                 if (--level == 0) goto done;
 498                 --stackp;
 499                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 500             } while ((runs = stackp->runs) == 0);
 501         }
 502
 503
 504         stackp->runs = 0;               /* current run will finish level */
 505         /* While there are more than 2 runs remaining,
 506          * turn them into exactly 2 runs (at the "other" level),
 507          * each made up of approximately half the runs.
 508          * Stack the second half for later processing,
 509          * and set about producing the first half now.
 510          */
 511         while (runs > 2) {
 512             ++level;
 513             ++stackp;
 514             stackp->offset = offset;
 515             runs -= stackp->runs = runs / 2;
 516         }
 517         /* We must construct a single run from 1 or 2 runs.
 518          * All the original runs are in which[0] == base.
 519          * The run we construct must end up in which[level&1].
 520          */
 521         iwhich = level & 1;
 522         if (runs == 1) {
 523             /* Constructing a single run from a single run.
 524              * If it's where it belongs already, there's nothing to do.
 525              * Otherwise, copy it to where it belongs.
 526              * A run of 1 is either a singleton at level 0,
 527              * or the second half of a split 3.  In neither event
 528              * is it necessary to set offset.  It will be set by the merge
 529              * that immediately follows.
 530              */
 531             if (iwhich) {       /* Belongs in aux, currently in base */
 532                 f1 = b = PINDEX(base, offset);  /* where list starts */
 533                 f2 = PINDEX(aux, offset);       /* where list goes */
 534                 t = NEXT(f2);                   /* where list will end */
 535                 offset = PNELEM(aux, t);        /* offset thereof */
 536                 t = PINDEX(base, offset);       /* where it currently ends */
 537                 FROMTOUPTO(f1, f2, t);          /* copy */
 538                 NEXT(b) = t;                    /* set up parallel pointer */
 539             } else if (level == 0) goto done;   /* single run at level 0 */
 540         } else {
 541             /* Constructing a single run from two runs.
 542              * The merge code at the top will do that.
 543              * We need only make sure the two runs are in the "other" array,
 544              * so they'll end up in the correct array after the merge.
 545              */
 546             ++level;
 547             ++stackp;
 548             stackp->offset = offset;
 549             stackp->runs = 0;   /* take care of both runs, trigger merge */
 550             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 551                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 552                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 553                 t = NEXT(f2);                   /* where first run will end */
 554                 offset = PNELEM(aux, t);        /* offset thereof */
 555                 p = PINDEX(base, offset);       /* end of first run */
 556                 t = NEXT(t);                    /* where second run will end */
 557                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 558                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 559                 NEXT(b) = p;                    /* paralleled pointer for 1st */
 560                 NEXT(p) = t;                    /* ... and for second */
 561             }
 562         }
 563     }
 564   done:
 565     if (aux != small) Safefree(aux);    /* free iff allocated */
 566     if (savecmp != NULL) {
 567          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 568     }
 569     return;
 570 }
 571
 572 /*
 573 =head1 Array Manipulation Functions
 574
 575 =for apidoc sortsv
 576
 577 In-place sort an array of SV pointers with the given comparison routine.
 578
 579 Currently this always uses mergesort.  See C<L</sortsv_flags>> for a more
 580 flexible routine.
 581
 582 =cut
 583 */
 584
 585 void
 586 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
 587 {
 588     PERL_ARGS_ASSERT_SORTSV;
 589
 590     sortsv_flags(array, nmemb, cmp, 0);
 591 }
 592
 593 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
 594 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
 595 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
 596
 597 PP(pp_sort)
 598 {
 599     dSP; dMARK; dORIGMARK;
 600     SV **p1 = ORIGMARK+1, **p2;
 601     SSize_t max, i;
 602     AV* av = NULL;
 603     GV *gv;
 604     CV *cv = NULL;
 605     U8 gimme = GIMME_V;
 606     OP* const nextop = PL_op->op_next;
 607     I32 overloading = 0;
 608     bool hasargs = FALSE;
 609     bool copytmps;
 610     I32 is_xsub = 0;
 611     const U8 priv = PL_op->op_private;
 612     const U8 flags = PL_op->op_flags;
 613     U32 sort_flags = 0;
 614     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 615       = Perl_sortsv_flags;
 616     I32 all_SIVs = 1;
 617
 618     if ((priv & OPpSORT_DESCEND) != 0)
 619         sort_flags |= SORTf_DESC;
 620     if ((priv & OPpSORT_STABLE) != 0)
 621         sort_flags |= SORTf_STABLE;
 622     if ((priv & OPpSORT_UNSTABLE) != 0)
 623         sort_flags |= SORTf_UNSTABLE;
 624
 625     if (gimme != G_ARRAY) {
 626         SP = MARK;
 627         EXTEND(SP,1);
 628         RETPUSHUNDEF;
 629     }
 630
 631     ENTER;
 632     SAVEVPTR(PL_sortcop);
 633     if (flags & OPf_STACKED) {
 634         if (flags & OPf_SPECIAL) {
 635             OP *nullop = OpSIBLING(cLISTOP->op_first);  /* pass pushmark */
 636             assert(nullop->op_type == OP_NULL);
 637             PL_sortcop = nullop->op_next;
 638         }
 639         else {
 640             GV *autogv = NULL;
 641             HV *stash;
 642             cv = sv_2cv(*++MARK, &stash, &gv, GV_ADD);
 643           check_cv:
 644             if (cv && SvPOK(cv)) {
 645                 const char * const proto = SvPV_nolen_const(MUTABLE_SV(cv));
 646                 if (proto && strEQ(proto, "$$")) {
 647                     hasargs = TRUE;
 648                 }
 649             }
 650             if (cv && CvISXSUB(cv) && CvXSUB(cv)) {
 651                 is_xsub = 1;
 652             }
 653             else if (!(cv && CvROOT(cv))) {
 654                 if (gv) {
 655                     goto autoload;
 656                 }
 657                 else if (!CvANON(cv) && (gv = CvGV(cv))) {
 658                   if (cv != GvCV(gv)) cv = GvCV(gv);
 659                  autoload:
 660                   if (!autogv && (
 661                         autogv = gv_autoload_pvn(
 662                             GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv),
 663                             GvNAMEUTF8(gv) ? SVf_UTF8 : 0
 664                         )
 665                      )) {
 666                     cv = GvCVu(autogv);
 667                     goto check_cv;
 668                   }
 669                   else {
 670                     SV *tmpstr = sv_newmortal();
 671                     gv_efullname3(tmpstr, gv, NULL);
 672                     DIE(aTHX_ "Undefined sort subroutine \"%" SVf "\" called",
 673                         SVfARG(tmpstr));
 674                   }
 675                 }
 676                 else {
 677                     DIE(aTHX_ "Undefined subroutine in sort");
 678                 }
 679             }
 680
 681             if (is_xsub)
 682                 PL_sortcop = (OP*)cv;
 683             else
 684                 PL_sortcop = CvSTART(cv);
 685         }
 686     }
 687     else {
 688         PL_sortcop = NULL;
 689     }
 690
 691     /* optimiser converts "@a = sort @a" to "sort \@a".  In this case,
 692      * push (@a) onto stack, then assign result back to @a at the end of
 693      * this function */
 694     if (priv & OPpSORT_INPLACE) {
 695         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
 696         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
 697         av = MUTABLE_AV((*SP));
 698         if (SvREADONLY(av))
 699             Perl_croak_no_modify();
 700         max = AvFILL(av) + 1;
 701         MEXTEND(SP, max);
 702         if (SvMAGICAL(av)) {
 703             for (i=0; i < max; i++) {
 704                 SV **svp = av_fetch(av, i, FALSE);
 705                 *SP++ = (svp) ? *svp : NULL;
 706             }
 707         }
 708         else {
 709             SV **svp = AvARRAY(av);
 710             assert(svp || max == 0);
 711             for (i = 0; i < max; i++)
 712                 *SP++ = *svp++;
 713         }
 714         SP--;
 715         p1 = p2 = SP - (max-1);
 716     }
 717     else {
 718         p2 = MARK+1;
 719         max = SP - MARK;
 720    }
 721
 722     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
 723      * any nulls; also stringify or converting to integer or number as
 724      * required any args */
 725     copytmps = cBOOL(PL_sortcop);
 726     for (i=max; i > 0 ; i--) {
 727         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
 728             if (copytmps && SvPADTMP(*p1)) {
 729                 *p1 = sv_mortalcopy(*p1);
 730             }
 731             SvTEMP_off(*p1);
 732             if (!PL_sortcop) {
 733                 if (priv & OPpSORT_NUMERIC) {
 734                     if (priv & OPpSORT_INTEGER) {
 735                         if (!SvIOK(*p1))
 736                             (void)sv_2iv_flags(*p1, SV_GMAGIC|SV_SKIP_OVERLOAD);
 737                     }
 738                     else {
 739                         if (!SvNSIOK(*p1))
 740                             (void)sv_2nv_flags(*p1, SV_GMAGIC|SV_SKIP_OVERLOAD);
 741                         if (all_SIVs && !SvSIOK(*p1))
 742                             all_SIVs = 0;
 743                     }
 744                 }
 745                 else {
 746                     if (!SvPOK(*p1))
 747                         (void)sv_2pv_flags(*p1, 0,
 748                             SV_GMAGIC|SV_CONST_RETURN|SV_SKIP_OVERLOAD);
 749                 }
 750                 if (SvAMAGIC(*p1))
 751                     overloading = 1;
 752             }
 753             p1++;
 754         }
 755         else
 756             max--;
 757     }
 758     if (max > 1) {
 759         SV **start;
 760         if (PL_sortcop) {
 761             PERL_CONTEXT *cx;
 762             const bool oldcatch = CATCH_GET;
 763             I32 old_savestack_ix = PL_savestack_ix;
 764
 765             SAVEOP();
 766
 767             CATCH_SET(TRUE);
 768             PUSHSTACKi(PERLSI_SORT);
 769             if (!hasargs && !is_xsub) {
 770                 SAVEGENERICSV(PL_firstgv);
 771                 SAVEGENERICSV(PL_secondgv);
 772                 PL_firstgv = MUTABLE_GV(SvREFCNT_inc(
 773                     gv_fetchpvs("a", GV_ADD|GV_NOTQUAL, SVt_PV)
 774                 ));
 775                 PL_secondgv = MUTABLE_GV(SvREFCNT_inc(
 776                     gv_fetchpvs("b", GV_ADD|GV_NOTQUAL, SVt_PV)
 777                 ));
 778                 /* make sure the GP isn't removed out from under us for
 779                  * the SAVESPTR() */
 780                 save_gp(PL_firstgv, 0);
 781                 save_gp(PL_secondgv, 0);
 782                 /* we don't want modifications localized */
 783                 GvINTRO_off(PL_firstgv);
 784                 GvINTRO_off(PL_secondgv);
 785                 SAVEGENERICSV(GvSV(PL_firstgv));
 786                 SvREFCNT_inc(GvSV(PL_firstgv));
 787                 SAVEGENERICSV(GvSV(PL_secondgv));
 788                 SvREFCNT_inc(GvSV(PL_secondgv));
 789             }
 790
 791             gimme = G_SCALAR;
 792             cx = cx_pushblock(CXt_NULL, gimme, PL_stack_base, old_savestack_ix);
 793             if (!(flags & OPf_SPECIAL)) {
 794                 cx->cx_type = CXt_SUB|CXp_MULTICALL;
 795                 cx_pushsub(cx, cv, NULL, hasargs);
 796                 if (!is_xsub) {
 797                     PADLIST * const padlist = CvPADLIST(cv);
 798
 799                     if (++CvDEPTH(cv) >= 2)
 800                         pad_push(padlist, CvDEPTH(cv));
 801                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
 802
 803                     if (hasargs) {
 804                         /* This is mostly copied from pp_entersub */
 805                         AV * const av = MUTABLE_AV(PAD_SVl(0));
 806
 807                         cx->blk_sub.savearray = GvAV(PL_defgv);
 808                         GvAV(PL_defgv) = MUTABLE_AV(SvREFCNT_inc_simple(av));
 809                     }
 810
 811                 }
 812             }
 813
 814             start = p1 - max;
 815             sortsvp(aTHX_ start, max,
 816                     (is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv),
 817                     sort_flags);
 818
 819             /* Reset cx, in case the context stack has been reallocated. */
 820             cx = CX_CUR();
 821
 822             PL_stack_sp = PL_stack_base + cx->blk_oldsp;
 823
 824             CX_LEAVE_SCOPE(cx);
 825             if (!(flags & OPf_SPECIAL)) {
 826                 assert(CxTYPE(cx) == CXt_SUB);
 827                 cx_popsub(cx);
 828             }
 829             else
 830                 assert(CxTYPE(cx) == CXt_NULL);
 831                 /* there isn't a POPNULL ! */
 832
 833             cx_popblock(cx);
 834             CX_POP(cx);
 835             POPSTACK;
 836             CATCH_SET(oldcatch);
 837         }
 838         else {
 839             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
 840             start = ORIGMARK+1;
 841             sortsvp(aTHX_ start, max,
 842                     (priv & OPpSORT_NUMERIC)
 843                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
 844                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
 845                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
 846                         : (
 847 #ifdef USE_LOCALE_COLLATE
 848                            IN_LC_RUNTIME(LC_COLLATE)
 849                             ? ( overloading
 850                                 ? (SVCOMPARE_t)S_amagic_cmp_locale
 851                                 : (SVCOMPARE_t)sv_cmp_locale_static)
 852                             :
 853 #endif
 854                               ( overloading ? (SVCOMPARE_t)S_amagic_cmp : (SVCOMPARE_t)sv_cmp_static)),
 855                     sort_flags);
 856         }
 857         if ((priv & OPpSORT_REVERSE) != 0) {
 858             SV **q = start+max-1;
 859             while (start < q) {
 860                 SV * const tmp = *start;
 861                 *start++ = *q;
 862                 *q-- = tmp;
 863             }
 864         }
 865     }
 866
 867     if (av) {
 868         /* copy back result to the array */
 869         SV** const base = MARK+1;
 870         SSize_t max_minus_one = max - 1; /* attempt to work around mingw bug */
 871         if (SvMAGICAL(av)) {
 872             for (i = 0; i <= max_minus_one; i++)
 873                 base[i] = newSVsv(base[i]);
 874             av_clear(av);
 875             if (max_minus_one >= 0)
 876                 av_extend(av, max_minus_one);
 877             for (i=0; i <= max_minus_one; i++) {
 878                 SV * const sv = base[i];
 879                 SV ** const didstore = av_store(av, i, sv);
 880                 if (SvSMAGICAL(sv))
 881                     mg_set(sv);
 882                 if (!didstore)
 883                     sv_2mortal(sv);
 884             }
 885         }
 886         else {
 887             /* the elements of av are likely to be the same as the
 888              * (non-refcounted) elements on the stack, just in a different
 889              * order. However, its possible that someone's messed with av
 890              * in the meantime. So bump and unbump the relevant refcounts
 891              * first.
 892              */
 893             for (i = 0; i <= max_minus_one; i++) {
 894                 SV *sv = base[i];
 895                 assert(sv);
 896                 if (SvREFCNT(sv) > 1)
 897                     base[i] = newSVsv(sv);
 898                 else
 899                     SvREFCNT_inc_simple_void_NN(sv);
 900             }
 901             av_clear(av);
 902             if (max_minus_one >= 0) {
 903                 av_extend(av, max_minus_one);
 904                 Copy(base, AvARRAY(av), max, SV*);
 905             }
 906             AvFILLp(av) = max_minus_one;
 907             AvREIFY_off(av);
 908             AvREAL_on(av);
 909         }
 910     }
 911     LEAVE;
 912     PL_stack_sp = ORIGMARK +  max;
 913     return nextop;
 914 }
 915
 916 static I32
 917 S_sortcv(pTHX_ SV *const a, SV *const b)
 918 {
 919     const I32 oldsaveix = PL_savestack_ix;
 920     I32 result;
 921     PMOP * const pm = PL_curpm;
 922     COP * const cop = PL_curcop;
 923     SV *olda, *oldb;
 924
 925     PERL_ARGS_ASSERT_SORTCV;
 926
 927     olda = GvSV(PL_firstgv);
 928     GvSV(PL_firstgv) = SvREFCNT_inc_simple_NN(a);
 929     SvREFCNT_dec(olda);
 930     oldb = GvSV(PL_secondgv);
 931     GvSV(PL_secondgv) = SvREFCNT_inc_simple_NN(b);
 932     SvREFCNT_dec(oldb);
 933     PL_stack_sp = PL_stack_base;
 934     PL_op = PL_sortcop;
 935     CALLRUNOPS(aTHX);
 936     PL_curcop = cop;
 937     /* entry zero of a stack is always PL_sv_undef, which
 938      * simplifies converting a '()' return into undef in scalar context */
 939     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
 940     result = SvIV(*PL_stack_sp);
 941
 942     LEAVE_SCOPE(oldsaveix);
 943     PL_curpm = pm;
 944     return result;
 945 }
 946
 947 static I32
 948 S_sortcv_stacked(pTHX_ SV *const a, SV *const b)
 949 {
 950     const I32 oldsaveix = PL_savestack_ix;
 951     I32 result;
 952     AV * const av = GvAV(PL_defgv);
 953     PMOP * const pm = PL_curpm;
 954     COP * const cop = PL_curcop;
 955
 956     PERL_ARGS_ASSERT_SORTCV_STACKED;
 957
 958     if (AvREAL(av)) {
 959         av_clear(av);
 960         AvREAL_off(av);
 961         AvREIFY_on(av);
 962     }
 963     if (AvMAX(av) < 1) {
 964         SV **ary = AvALLOC(av);
 965         if (AvARRAY(av) != ary) {
 966             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
 967             AvARRAY(av) = ary;
 968         }
 969         if (AvMAX(av) < 1) {
 970             Renew(ary,2,SV*);
 971             AvMAX(av) = 1;
 972             AvARRAY(av) = ary;
 973             AvALLOC(av) = ary;
 974         }
 975     }
 976     AvFILLp(av) = 1;
 977
 978     AvARRAY(av)[0] = a;
 979     AvARRAY(av)[1] = b;
 980     PL_stack_sp = PL_stack_base;
 981     PL_op = PL_sortcop;
 982     CALLRUNOPS(aTHX);
 983     PL_curcop = cop;
 984     /* entry zero of a stack is always PL_sv_undef, which
 985      * simplifies converting a '()' return into undef in scalar context */
 986     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
 987     result = SvIV(*PL_stack_sp);
 988
 989     LEAVE_SCOPE(oldsaveix);
 990     PL_curpm = pm;
 991     return result;
 992 }
 993
 994 static I32
 995 S_sortcv_xsub(pTHX_ SV *const a, SV *const b)
 996 {
 997     dSP;
 998     const I32 oldsaveix = PL_savestack_ix;
 999     CV * const cv=MUTABLE_CV(PL_sortcop);
1000     I32 result;
1001     PMOP * const pm = PL_curpm;
1002
1003     PERL_ARGS_ASSERT_SORTCV_XSUB;
1004
1005     SP = PL_stack_base;
1006     PUSHMARK(SP);
1007     EXTEND(SP, 2);
1008     *++SP = a;
1009     *++SP = b;
1010     PUTBACK;
1011     (void)(*CvXSUB(cv))(aTHX_ cv);
1012     /* entry zero of a stack is always PL_sv_undef, which
1013      * simplifies converting a '()' return into undef in scalar context */
1014     assert(PL_stack_sp > PL_stack_base || *PL_stack_base == &PL_sv_undef);
1015     result = SvIV(*PL_stack_sp);
1016
1017     LEAVE_SCOPE(oldsaveix);
1018     PL_curpm = pm;
1019     return result;
1020 }
1021
1022
1023 static I32
1024 S_sv_ncmp(pTHX_ SV *const a, SV *const b)
1025 {
1026     I32 cmp = do_ncmp(a, b);
1027
1028     PERL_ARGS_ASSERT_SV_NCMP;
1029
1030     if (cmp == 2) {
1031         if (ckWARN(WARN_UNINITIALIZED)) report_uninit(NULL);
1032         return 0;
1033     }
1034
1035     return cmp;
1036 }
1037
1038 static I32
1039 S_sv_i_ncmp(pTHX_ SV *const a, SV *const b)
1040 {
1041     const IV iv1 = SvIV(a);
1042     const IV iv2 = SvIV(b);
1043
1044     PERL_ARGS_ASSERT_SV_I_NCMP;
1045
1046     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1047 }
1048
1049 #define tryCALL_AMAGICbin(left,right,meth) \
1050     (SvAMAGIC(left)||SvAMAGIC(right)) \
1051         ? amagic_call(left, right, meth, 0) \
1052         : NULL;
1053
1054 #define SORT_NORMAL_RETURN_VALUE(val)  (((val) > 0) ? 1 : ((val) ? -1 : 0))
1055
1056 static I32
1057 S_amagic_ncmp(pTHX_ SV *const a, SV *const b)
1058 {
1059     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp_amg);
1060
1061     PERL_ARGS_ASSERT_AMAGIC_NCMP;
1062
1063     if (tmpsv) {
1064         if (SvIOK(tmpsv)) {
1065             const I32 i = SvIVX(tmpsv);
1066             return SORT_NORMAL_RETURN_VALUE(i);
1067         }
1068         else {
1069             const NV d = SvNV(tmpsv);
1070             return SORT_NORMAL_RETURN_VALUE(d);
1071         }
1072      }
1073      return S_sv_ncmp(aTHX_ a, b);
1074 }
1075
1076 static I32
1077 S_amagic_i_ncmp(pTHX_ SV *const a, SV *const b)
1078 {
1079     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp_amg);
1080
1081     PERL_ARGS_ASSERT_AMAGIC_I_NCMP;
1082
1083     if (tmpsv) {
1084         if (SvIOK(tmpsv)) {
1085             const I32 i = SvIVX(tmpsv);
1086             return SORT_NORMAL_RETURN_VALUE(i);
1087         }
1088         else {
1089             const NV d = SvNV(tmpsv);
1090             return SORT_NORMAL_RETURN_VALUE(d);
1091         }
1092     }
1093     return S_sv_i_ncmp(aTHX_ a, b);
1094 }
1095
1096 static I32
1097 S_amagic_cmp(pTHX_ SV *const str1, SV *const str2)
1098 {
1099     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp_amg);
1100
1101     PERL_ARGS_ASSERT_AMAGIC_CMP;
1102
1103     if (tmpsv) {
1104         if (SvIOK(tmpsv)) {
1105             const I32 i = SvIVX(tmpsv);
1106             return SORT_NORMAL_RETURN_VALUE(i);
1107         }
1108         else {
1109             const NV d = SvNV(tmpsv);
1110             return SORT_NORMAL_RETURN_VALUE(d);
1111         }
1112     }
1113     return sv_cmp(str1, str2);
1114 }
1115
1116 #ifdef USE_LOCALE_COLLATE
1117
1118 static I32
1119 S_amagic_cmp_locale(pTHX_ SV *const str1, SV *const str2)
1120 {
1121     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp_amg);
1122
1123     PERL_ARGS_ASSERT_AMAGIC_CMP_LOCALE;
1124
1125     if (tmpsv) {
1126         if (SvIOK(tmpsv)) {
1127             const I32 i = SvIVX(tmpsv);
1128             return SORT_NORMAL_RETURN_VALUE(i);
1129         }
1130         else {
1131             const NV d = SvNV(tmpsv);
1132             return SORT_NORMAL_RETURN_VALUE(d);
1133         }
1134     }
1135     return sv_cmp_locale(str1, str2);
1136 }
1137
1138 #endif
1139
1140 /*
1141  * ex: set ts=8 sts=4 sw=4 et:
1142  */