This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Carp.pm caller_info returns wrong info for eval
[perl5.git] / pp_sort.c
... / ...
CommitLineData
1/* pp_sort.c
2 *
3 * Copyright (c) 1991-2001, Larry Wall
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
10/*
11 * ...they shuffled back towards the rear of the line. 'No, not at the
12 * rear!' the slave-driver shouted. 'Three files up. And stay there...
13 */
14
15#include "EXTERN.h"
16#define PERL_IN_PP_SORT_C
17#include "perl.h"
18
19static I32 sortcv(pTHX_ SV *a, SV *b);
20static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
21static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
22static I32 sv_ncmp(pTHX_ SV *a, SV *b);
23static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
24static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
25static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
26static I32 amagic_cmp(pTHX_ SV *a, SV *b);
27static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
28
29#define sv_cmp_static Perl_sv_cmp
30#define sv_cmp_locale_static Perl_sv_cmp_locale
31
32#define SORTHINTS(hintsvp) \
33 ((PL_hintgv && \
34 (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
35 (I32)SvIV(*hintsvp) : 0)
36
37#ifndef SMALLSORT
38#define SMALLSORT (200)
39#endif
40
41/*
42 * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
43 *
44 * The original code was written in conjunction with BSD Computer Software
45 * Research Group at University of California, Berkeley.
46 *
47 * See also: "Optimistic Merge Sort" (SODA '92)
48 *
49 * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
50 *
51 * The code can be distributed under the same terms as Perl itself.
52 *
53 */
54
55
56typedef char * aptr; /* pointer for arithmetic on sizes */
57typedef SV * gptr; /* pointers in our lists */
58
59/* Binary merge internal sort, with a few special mods
60** for the special perl environment it now finds itself in.
61**
62** Things that were once options have been hotwired
63** to values suitable for this use. In particular, we'll always
64** initialize looking for natural runs, we'll always produce stable
65** output, and we'll always do Peter McIlroy's binary merge.
66*/
67
68/* Pointer types for arithmetic and storage and convenience casts */
69
70#define APTR(P) ((aptr)(P))
71#define GPTP(P) ((gptr *)(P))
72#define GPPP(P) ((gptr **)(P))
73
74
75/* byte offset from pointer P to (larger) pointer Q */
76#define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
77
78#define PSIZE sizeof(gptr)
79
80/* If PSIZE is power of 2, make PSHIFT that power, if that helps */
81
82#ifdef PSHIFT
83#define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT))
84#define PNBYTE(N) ((N) << (PSHIFT))
85#define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N)))
86#else
87/* Leave optimization to compiler */
88#define PNELEM(P, Q) (GPTP(Q) - GPTP(P))
89#define PNBYTE(N) ((N) * (PSIZE))
90#define PINDEX(P, N) (GPTP(P) + (N))
91#endif
92
93/* Pointer into other corresponding to pointer into this */
94#define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
95
96#define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
97
98
99/* Runs are identified by a pointer in the auxilliary list.
100** The pointer is at the start of the list,
101** and it points to the start of the next list.
102** NEXT is used as an lvalue, too.
103*/
104
105#define NEXT(P) (*GPPP(P))
106
107
108/* PTHRESH is the minimum number of pairs with the same sense to justify
109** checking for a run and extending it. Note that PTHRESH counts PAIRS,
110** not just elements, so PTHRESH == 8 means a run of 16.
111*/
112
113#define PTHRESH (8)
114
115/* RTHRESH is the number of elements in a run that must compare low
116** to the low element from the opposing run before we justify
117** doing a binary rampup instead of single stepping.
118** In random input, N in a row low should only happen with
119** probability 2^(1-N), so we can risk that we are dealing
120** with orderly input without paying much when we aren't.
121*/
122
123#define RTHRESH (6)
124
125
126/*
127** Overview of algorithm and variables.
128** The array of elements at list1 will be organized into runs of length 2,
129** or runs of length >= 2 * PTHRESH. We only try to form long runs when
130** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
131**
132** Unless otherwise specified, pair pointers address the first of two elements.
133**
134** b and b+1 are a pair that compare with sense ``sense''.
135** b is the ``bottom'' of adjacent pairs that might form a longer run.
136**
137** p2 parallels b in the list2 array, where runs are defined by
138** a pointer chain.
139**
140** t represents the ``top'' of the adjacent pairs that might extend
141** the run beginning at b. Usually, t addresses a pair
142** that compares with opposite sense from (b,b+1).
143** However, it may also address a singleton element at the end of list1,
144** or it may be equal to ``last'', the first element beyond list1.
145**
146** r addresses the Nth pair following b. If this would be beyond t,
147** we back it off to t. Only when r is less than t do we consider the
148** run long enough to consider checking.
149**
150** q addresses a pair such that the pairs at b through q already form a run.
151** Often, q will equal b, indicating we only are sure of the pair itself.
152** However, a search on the previous cycle may have revealed a longer run,
153** so q may be greater than b.
154**
155** p is used to work back from a candidate r, trying to reach q,
156** which would mean b through r would be a run. If we discover such a run,
157** we start q at r and try to push it further towards t.
158** If b through r is NOT a run, we detect the wrong order at (p-1,p).
159** In any event, after the check (if any), we have two main cases.
160**
161** 1) Short run. b <= q < p <= r <= t.
162** b through q is a run (perhaps trivial)
163** q through p are uninteresting pairs
164** p through r is a run
165**
166** 2) Long run. b < r <= q < t.
167** b through q is a run (of length >= 2 * PTHRESH)
168**
169** Note that degenerate cases are not only possible, but likely.
170** For example, if the pair following b compares with opposite sense,
171** then b == q < p == r == t.
172*/
173
174
175static IV
176dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
177{
178 I32 sense;
179 register gptr *b, *p, *q, *t, *p2;
180 register gptr c, *last, *r;
181 gptr *savep;
182 IV runs = 0;
183
184 b = list1;
185 last = PINDEX(b, nmemb);
186 sense = (cmp(aTHX_ *b, *(b+1)) > 0);
187 for (p2 = list2; b < last; ) {
188 /* We just started, or just reversed sense.
189 ** Set t at end of pairs with the prevailing sense.
190 */
191 for (p = b+2, t = p; ++p < last; t = ++p) {
192 if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
193 }
194 q = b;
195 /* Having laid out the playing field, look for long runs */
196 do {
197 p = r = b + (2 * PTHRESH);
198 if (r >= t) p = r = t; /* too short to care about */
199 else {
200 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
201 ((p -= 2) > q));
202 if (p <= q) {
203 /* b through r is a (long) run.
204 ** Extend it as far as possible.
205 */
206 p = q = r;
207 while (((p += 2) < t) &&
208 ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
209 r = p = q + 2; /* no simple pairs, no after-run */
210 }
211 }
212 if (q > b) { /* run of greater than 2 at b */
213 savep = p;
214 p = q += 2;
215 /* pick up singleton, if possible */
216 if ((p == t) &&
217 ((t + 1) == last) &&
218 ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
219 savep = r = p = q = last;
220 p2 = NEXT(p2) = p2 + (p - b); ++runs;
221 if (sense) while (b < --p) {
222 c = *b;
223 *b++ = *p;
224 *p = c;
225 }
226 p = savep;
227 }
228 while (q < p) { /* simple pairs */
229 p2 = NEXT(p2) = p2 + 2; ++runs;
230 if (sense) {
231 c = *q++;
232 *(q-1) = *q;
233 *q++ = c;
234 } else q += 2;
235 }
236 if (((b = p) == t) && ((t+1) == last)) {
237 NEXT(p2) = p2 + 1; ++runs;
238 b++;
239 }
240 q = r;
241 } while (b < t);
242 sense = !sense;
243 }
244 return runs;
245}
246
247
248/* The original merge sort, in use since 5.7, was as fast as, or faster than,
249 * qsort on many platforms, but slower than qsort, conspicuously so,
250 * on others. The most likely explanation was platform-specific
251 * differences in cache sizes and relative speeds.
252 *
253 * The quicksort divide-and-conquer algorithm guarantees that, as the
254 * problem is subdivided into smaller and smaller parts, the parts
255 * fit into smaller (and faster) caches. So it doesn't matter how
256 * many levels of cache exist, quicksort will "find" them, and,
257 * as long as smaller is faster, take advanatge of them.
258 *
259 * By contrast, consider how the original mergesort algorithm worked.
260 * Suppose we have five runs (each typically of length 2 after dynprep).
261 *
262 * pass base aux
263 * 0 1 2 3 4 5
264 * 1 12 34 5
265 * 2 1234 5
266 * 3 12345
267 * 4 12345
268 *
269 * Adjacent pairs are merged in "grand sweeps" through the input.
270 * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
271 * runs 3 and 4 are merged and the runs from run 5 have been copied.
272 * The only cache that matters is one large enough to hold *all* the input.
273 * On some platforms, this may be many times slower than smaller caches.
274 *
275 * The following pseudo-code uses the same basic merge algorithm,
276 * but in a divide-and-conquer way.
277 *
278 * # merge $runs runs at offset $offset of list $list1 into $list2.
279 * # all unmerged runs ($runs == 1) originate in list $base.
280 * sub mgsort2 {
281 * my ($offset, $runs, $base, $list1, $list2) = @_;
282 *
283 * if ($runs == 1) {
284 * if ($list1 is $base) copy run to $list2
285 * return offset of end of list (or copy)
286 * } else {
287 * $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
288 * mgsort2($off2, $runs/2, $base, $list2, $list1)
289 * merge the adjacent runs at $offset of $list1 into $list2
290 * return the offset of the end of the merged runs
291 * }
292 * }
293 * mgsort2(0, $runs, $base, $aux, $base);
294 *
295 * For our 5 runs, the tree of calls looks like
296 *
297 * 5
298 * 3 2
299 * 2 1 1 1
300 * 1 1
301 *
302 * 1 2 3 4 5
303 *
304 * and the corresponding activity looks like
305 *
306 * copy runs 1 and 2 from base to aux
307 * merge runs 1 and 2 from aux to base
308 * (run 3 is where it belongs, no copy needed)
309 * merge runs 12 and 3 from base to aux
310 * (runs 4 and 5 are where they belong, no copy needed)
311 * merge runs 4 and 5 from base to aux
312 * merge runs 123 and 45 from aux to base
313 *
314 * Note that we merge runs 1 and 2 immediately after copying them,
315 * while they are still likely to be in fast cache. Similarly,
316 * run 3 is merged with run 12 while it still may be lingering in cache.
317 * This implementation should therefore enjoy much of the cache-friendly
318 * behavior that quicksort does. In addition, it does less copying
319 * than the original mergesort implementation (only runs 1 and 2 are copied)
320 * and the "balancing" of merges is better (merged runs comprise more nearly
321 * equal numbers of original runs).
322 *
323 * The actual cache-friendly implementation will use a pseudo-stack
324 * to avoid recursion, and will unroll processing of runs of length 2,
325 * but it is otherwise similar to the recursive implementation.
326 */
327
328typedef struct {
329 IV offset; /* offset of 1st of 2 runs at this level */
330 IV runs; /* how many runs must be combined into 1 */
331} off_runs; /* pseudo-stack element */
332
333STATIC void
334S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
335{
336 IV i, run, runs, offset;
337 I32 sense, level;
338 int iwhich;
339 register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
340 gptr *aux, *list1, *list2;
341 gptr *p1;
342 gptr small[SMALLSORT];
343 gptr *which[3];
344 off_runs stack[60], *stackp;
345
346 if (nmemb <= 1) return; /* sorted trivially */
347 if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
348 else { New(799,aux,nmemb,gptr); } /* allocate auxilliary array */
349 level = 0;
350 stackp = stack;
351 stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
352 stackp->offset = offset = 0;
353 which[0] = which[2] = base;
354 which[1] = aux;
355 for (;;) {
356 /* On levels where both runs have be constructed (stackp->runs == 0),
357 * merge them, and note the offset of their end, in case the offset
358 * is needed at the next level up. Hop up a level, and,
359 * as long as stackp->runs is 0, keep merging.
360 */
361 if ((runs = stackp->runs) == 0) {
362 iwhich = level & 1;
363 list1 = which[iwhich]; /* area where runs are now */
364 list2 = which[++iwhich]; /* area for merged runs */
365 do {
366 offset = stackp->offset;
367 f1 = p1 = list1 + offset; /* start of first run */
368 p = tp2 = list2 + offset; /* where merged run will go */
369 t = NEXT(p); /* where first run ends */
370 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
371 t = NEXT(t); /* where second runs ends */
372 l2 = POTHER(t, list2, list1); /* ... on the other side */
373 offset = PNELEM(list2, t);
374 while (f1 < l1 && f2 < l2) {
375 /* If head 1 is larger than head 2, find ALL the elements
376 ** in list 2 strictly less than head1, write them all,
377 ** then head 1. Then compare the new heads, and repeat,
378 ** until one or both lists are exhausted.
379 **
380 ** In all comparisons (after establishing
381 ** which head to merge) the item to merge
382 ** (at pointer q) is the first operand of
383 ** the comparison. When we want to know
384 ** if ``q is strictly less than the other'',
385 ** we can't just do
386 ** cmp(q, other) < 0
387 ** because stability demands that we treat equality
388 ** as high when q comes from l2, and as low when
389 ** q was from l1. So we ask the question by doing
390 ** cmp(q, other) <= sense
391 ** and make sense == 0 when equality should look low,
392 ** and -1 when equality should look high.
393 */
394
395
396 if (cmp(aTHX_ *f1, *f2) <= 0) {
397 q = f2; b = f1; t = l1;
398 sense = -1;
399 } else {
400 q = f1; b = f2; t = l2;
401 sense = 0;
402 }
403
404
405 /* ramp up
406 **
407 ** Leave t at something strictly
408 ** greater than q (or at the end of the list),
409 ** and b at something strictly less than q.
410 */
411 for (i = 1, run = 0 ;;) {
412 if ((p = PINDEX(b, i)) >= t) {
413 /* off the end */
414 if (((p = PINDEX(t, -1)) > b) &&
415 (cmp(aTHX_ *q, *p) <= sense))
416 t = p;
417 else b = p;
418 break;
419 } else if (cmp(aTHX_ *q, *p) <= sense) {
420 t = p;
421 break;
422 } else b = p;
423 if (++run >= RTHRESH) i += i;
424 }
425
426
427 /* q is known to follow b and must be inserted before t.
428 ** Increment b, so the range of possibilities is [b,t).
429 ** Round binary split down, to favor early appearance.
430 ** Adjust b and t until q belongs just before t.
431 */
432
433 b++;
434 while (b < t) {
435 p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
436 if (cmp(aTHX_ *q, *p) <= sense) {
437 t = p;
438 } else b = p + 1;
439 }
440
441
442 /* Copy all the strictly low elements */
443
444 if (q == f1) {
445 FROMTOUPTO(f2, tp2, t);
446 *tp2++ = *f1++;
447 } else {
448 FROMTOUPTO(f1, tp2, t);
449 *tp2++ = *f2++;
450 }
451 }
452
453
454 /* Run out remaining list */
455 if (f1 == l1) {
456 if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
457 } else FROMTOUPTO(f1, tp2, l1);
458 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
459
460 if (--level == 0) goto done;
461 --stackp;
462 t = list1; list1 = list2; list2 = t; /* swap lists */
463 } while ((runs = stackp->runs) == 0);
464 }
465
466
467 stackp->runs = 0; /* current run will finish level */
468 /* While there are more than 2 runs remaining,
469 * turn them into exactly 2 runs (at the "other" level),
470 * each made up of approximately half the runs.
471 * Stack the second half for later processing,
472 * and set about producing the first half now.
473 */
474 while (runs > 2) {
475 ++level;
476 ++stackp;
477 stackp->offset = offset;
478 runs -= stackp->runs = runs / 2;
479 }
480 /* We must construct a single run from 1 or 2 runs.
481 * All the original runs are in which[0] == base.
482 * The run we construct must end up in which[level&1].
483 */
484 iwhich = level & 1;
485 if (runs == 1) {
486 /* Constructing a single run from a single run.
487 * If it's where it belongs already, there's nothing to do.
488 * Otherwise, copy it to where it belongs.
489 * A run of 1 is either a singleton at level 0,
490 * or the second half of a split 3. In neither event
491 * is it necessary to set offset. It will be set by the merge
492 * that immediately follows.
493 */
494 if (iwhich) { /* Belongs in aux, currently in base */
495 f1 = b = PINDEX(base, offset); /* where list starts */
496 f2 = PINDEX(aux, offset); /* where list goes */
497 t = NEXT(f2); /* where list will end */
498 offset = PNELEM(aux, t); /* offset thereof */
499 t = PINDEX(base, offset); /* where it currently ends */
500 FROMTOUPTO(f1, f2, t); /* copy */
501 NEXT(b) = t; /* set up parallel pointer */
502 } else if (level == 0) goto done; /* single run at level 0 */
503 } else {
504 /* Constructing a single run from two runs.
505 * The merge code at the top will do that.
506 * We need only make sure the two runs are in the "other" array,
507 * so they'll end up in the correct array after the merge.
508 */
509 ++level;
510 ++stackp;
511 stackp->offset = offset;
512 stackp->runs = 0; /* take care of both runs, trigger merge */
513 if (!iwhich) { /* Merged runs belong in aux, copy 1st */
514 f1 = b = PINDEX(base, offset); /* where first run starts */
515 f2 = PINDEX(aux, offset); /* where it will be copied */
516 t = NEXT(f2); /* where first run will end */
517 offset = PNELEM(aux, t); /* offset thereof */
518 p = PINDEX(base, offset); /* end of first run */
519 t = NEXT(t); /* where second run will end */
520 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
521 FROMTOUPTO(f1, f2, t); /* copy both runs */
522 NEXT(b) = p; /* paralled pointer for 1st */
523 NEXT(p) = t; /* ... and for second */
524 }
525 }
526 }
527done:
528 if (aux != small) Safefree(aux); /* free iff allocated */
529 return;
530}
531
532/*
533 * The quicksort implementation was derived from source code contributed
534 * by Tom Horsley.
535 *
536 * NOTE: this code was derived from Tom Horsley's qsort replacement
537 * and should not be confused with the original code.
538 */
539
540/* Copyright (C) Tom Horsley, 1997. All rights reserved.
541
542 Permission granted to distribute under the same terms as perl which are
543 (briefly):
544
545 This program is free software; you can redistribute it and/or modify
546 it under the terms of either:
547
548 a) the GNU General Public License as published by the Free
549 Software Foundation; either version 1, or (at your option) any
550 later version, or
551
552 b) the "Artistic License" which comes with this Kit.
553
554 Details on the perl license can be found in the perl source code which
555 may be located via the www.perl.com web page.
556
557 This is the most wonderfulest possible qsort I can come up with (and
558 still be mostly portable) My (limited) tests indicate it consistently
559 does about 20% fewer calls to compare than does the qsort in the Visual
560 C++ library, other vendors may vary.
561
562 Some of the ideas in here can be found in "Algorithms" by Sedgewick,
563 others I invented myself (or more likely re-invented since they seemed
564 pretty obvious once I watched the algorithm operate for a while).
565
566 Most of this code was written while watching the Marlins sweep the Giants
567 in the 1997 National League Playoffs - no Braves fans allowed to use this
568 code (just kidding :-).
569
570 I realize that if I wanted to be true to the perl tradition, the only
571 comment in this file would be something like:
572
573 ...they shuffled back towards the rear of the line. 'No, not at the
574 rear!' the slave-driver shouted. 'Three files up. And stay there...
575
576 However, I really needed to violate that tradition just so I could keep
577 track of what happens myself, not to mention some poor fool trying to
578 understand this years from now :-).
579*/
580
581/* ********************************************************** Configuration */
582
583#ifndef QSORT_ORDER_GUESS
584#define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */
585#endif
586
587/* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
588 future processing - a good max upper bound is log base 2 of memory size
589 (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
590 safely be smaller than that since the program is taking up some space and
591 most operating systems only let you grab some subset of contiguous
592 memory (not to mention that you are normally sorting data larger than
593 1 byte element size :-).
594*/
595#ifndef QSORT_MAX_STACK
596#define QSORT_MAX_STACK 32
597#endif
598
599/* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
600 Anything bigger and we use qsort. If you make this too small, the qsort
601 will probably break (or become less efficient), because it doesn't expect
602 the middle element of a partition to be the same as the right or left -
603 you have been warned).
604*/
605#ifndef QSORT_BREAK_EVEN
606#define QSORT_BREAK_EVEN 6
607#endif
608
609/* QSORT_PLAY_SAFE is the size of the largest partition we're willing
610 to go quadratic on. We innoculate larger partitions against
611 quadratic behavior by shuffling them before sorting. This is not
612 an absolute guarantee of non-quadratic behavior, but it would take
613 staggeringly bad luck to pick extreme elements as the pivot
614 from randomized data.
615*/
616#ifndef QSORT_PLAY_SAFE
617#define QSORT_PLAY_SAFE 255
618#endif
619
620/* ************************************************************* Data Types */
621
622/* hold left and right index values of a partition waiting to be sorted (the
623 partition includes both left and right - right is NOT one past the end or
624 anything like that).
625*/
626struct partition_stack_entry {
627 int left;
628 int right;
629#ifdef QSORT_ORDER_GUESS
630 int qsort_break_even;
631#endif
632};
633
634/* ******************************************************* Shorthand Macros */
635
636/* Note that these macros will be used from inside the qsort function where
637 we happen to know that the variable 'elt_size' contains the size of an
638 array element and the variable 'temp' points to enough space to hold a
639 temp element and the variable 'array' points to the array being sorted
640 and 'compare' is the pointer to the compare routine.
641
642 Also note that there are very many highly architecture specific ways
643 these might be sped up, but this is simply the most generally portable
644 code I could think of.
645*/
646
647/* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
648*/
649#define qsort_cmp(elt1, elt2) \
650 ((*compare)(aTHX_ array[elt1], array[elt2]))
651
652#ifdef QSORT_ORDER_GUESS
653#define QSORT_NOTICE_SWAP swapped++;
654#else
655#define QSORT_NOTICE_SWAP
656#endif
657
658/* swaps contents of array elements elt1, elt2.
659*/
660#define qsort_swap(elt1, elt2) \
661 STMT_START { \
662 QSORT_NOTICE_SWAP \
663 temp = array[elt1]; \
664 array[elt1] = array[elt2]; \
665 array[elt2] = temp; \
666 } STMT_END
667
668/* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
669 elt3 and elt3 gets elt1.
670*/
671#define qsort_rotate(elt1, elt2, elt3) \
672 STMT_START { \
673 QSORT_NOTICE_SWAP \
674 temp = array[elt1]; \
675 array[elt1] = array[elt2]; \
676 array[elt2] = array[elt3]; \
677 array[elt3] = temp; \
678 } STMT_END
679
680/* ************************************************************ Debug stuff */
681
682#ifdef QSORT_DEBUG
683
684static void
685break_here()
686{
687 return; /* good place to set a breakpoint */
688}
689
690#define qsort_assert(t) (void)( (t) || (break_here(), 0) )
691
692static void
693doqsort_all_asserts(
694 void * array,
695 size_t num_elts,
696 size_t elt_size,
697 int (*compare)(const void * elt1, const void * elt2),
698 int pc_left, int pc_right, int u_left, int u_right)
699{
700 int i;
701
702 qsort_assert(pc_left <= pc_right);
703 qsort_assert(u_right < pc_left);
704 qsort_assert(pc_right < u_left);
705 for (i = u_right + 1; i < pc_left; ++i) {
706 qsort_assert(qsort_cmp(i, pc_left) < 0);
707 }
708 for (i = pc_left; i < pc_right; ++i) {
709 qsort_assert(qsort_cmp(i, pc_right) == 0);
710 }
711 for (i = pc_right + 1; i < u_left; ++i) {
712 qsort_assert(qsort_cmp(pc_right, i) < 0);
713 }
714}
715
716#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
717 doqsort_all_asserts(array, num_elts, elt_size, compare, \
718 PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
719
720#else
721
722#define qsort_assert(t) ((void)0)
723
724#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
725
726#endif
727
728/* ****************************************************************** qsort */
729
730STATIC void /* the standard unstable (u) quicksort (qsort) */
731S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
732{
733 register SV * temp;
734
735 struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
736 int next_stack_entry = 0;
737
738 int part_left;
739 int part_right;
740#ifdef QSORT_ORDER_GUESS
741 int qsort_break_even;
742 int swapped;
743#endif
744
745 /* Make sure we actually have work to do.
746 */
747 if (num_elts <= 1) {
748 return;
749 }
750
751 /* Innoculate large partitions against quadratic behavior */
752 if (num_elts > QSORT_PLAY_SAFE) {
753 register size_t n, j;
754 register SV **q;
755 for (n = num_elts, q = array; n > 1; ) {
756 j = n-- * Drand01();
757 temp = q[j];
758 q[j] = q[n];
759 q[n] = temp;
760 }
761 }
762
763 /* Setup the initial partition definition and fall into the sorting loop
764 */
765 part_left = 0;
766 part_right = (int)(num_elts - 1);
767#ifdef QSORT_ORDER_GUESS
768 qsort_break_even = QSORT_BREAK_EVEN;
769#else
770#define qsort_break_even QSORT_BREAK_EVEN
771#endif
772 for ( ; ; ) {
773 if ((part_right - part_left) >= qsort_break_even) {
774 /* OK, this is gonna get hairy, so lets try to document all the
775 concepts and abbreviations and variables and what they keep
776 track of:
777
778 pc: pivot chunk - the set of array elements we accumulate in the
779 middle of the partition, all equal in value to the original
780 pivot element selected. The pc is defined by:
781
782 pc_left - the leftmost array index of the pc
783 pc_right - the rightmost array index of the pc
784
785 we start with pc_left == pc_right and only one element
786 in the pivot chunk (but it can grow during the scan).
787
788 u: uncompared elements - the set of elements in the partition
789 we have not yet compared to the pivot value. There are two
790 uncompared sets during the scan - one to the left of the pc
791 and one to the right.
792
793 u_right - the rightmost index of the left side's uncompared set
794 u_left - the leftmost index of the right side's uncompared set
795
796 The leftmost index of the left sides's uncompared set
797 doesn't need its own variable because it is always defined
798 by the leftmost edge of the whole partition (part_left). The
799 same goes for the rightmost edge of the right partition
800 (part_right).
801
802 We know there are no uncompared elements on the left once we
803 get u_right < part_left and no uncompared elements on the
804 right once u_left > part_right. When both these conditions
805 are met, we have completed the scan of the partition.
806
807 Any elements which are between the pivot chunk and the
808 uncompared elements should be less than the pivot value on
809 the left side and greater than the pivot value on the right
810 side (in fact, the goal of the whole algorithm is to arrange
811 for that to be true and make the groups of less-than and
812 greater-then elements into new partitions to sort again).
813
814 As you marvel at the complexity of the code and wonder why it
815 has to be so confusing. Consider some of the things this level
816 of confusion brings:
817
818 Once I do a compare, I squeeze every ounce of juice out of it. I
819 never do compare calls I don't have to do, and I certainly never
820 do redundant calls.
821
822 I also never swap any elements unless I can prove there is a
823 good reason. Many sort algorithms will swap a known value with
824 an uncompared value just to get things in the right place (or
825 avoid complexity :-), but that uncompared value, once it gets
826 compared, may then have to be swapped again. A lot of the
827 complexity of this code is due to the fact that it never swaps
828 anything except compared values, and it only swaps them when the
829 compare shows they are out of position.
830 */
831 int pc_left, pc_right;
832 int u_right, u_left;
833
834 int s;
835
836 pc_left = ((part_left + part_right) / 2);
837 pc_right = pc_left;
838 u_right = pc_left - 1;
839 u_left = pc_right + 1;
840
841 /* Qsort works best when the pivot value is also the median value
842 in the partition (unfortunately you can't find the median value
843 without first sorting :-), so to give the algorithm a helping
844 hand, we pick 3 elements and sort them and use the median value
845 of that tiny set as the pivot value.
846
847 Some versions of qsort like to use the left middle and right as
848 the 3 elements to sort so they can insure the ends of the
849 partition will contain values which will stop the scan in the
850 compare loop, but when you have to call an arbitrarily complex
851 routine to do a compare, its really better to just keep track of
852 array index values to know when you hit the edge of the
853 partition and avoid the extra compare. An even better reason to
854 avoid using a compare call is the fact that you can drop off the
855 edge of the array if someone foolishly provides you with an
856 unstable compare function that doesn't always provide consistent
857 results.
858
859 So, since it is simpler for us to compare the three adjacent
860 elements in the middle of the partition, those are the ones we
861 pick here (conveniently pointed at by u_right, pc_left, and
862 u_left). The values of the left, center, and right elements
863 are refered to as l c and r in the following comments.
864 */
865
866#ifdef QSORT_ORDER_GUESS
867 swapped = 0;
868#endif
869 s = qsort_cmp(u_right, pc_left);
870 if (s < 0) {
871 /* l < c */
872 s = qsort_cmp(pc_left, u_left);
873 /* if l < c, c < r - already in order - nothing to do */
874 if (s == 0) {
875 /* l < c, c == r - already in order, pc grows */
876 ++pc_right;
877 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
878 } else if (s > 0) {
879 /* l < c, c > r - need to know more */
880 s = qsort_cmp(u_right, u_left);
881 if (s < 0) {
882 /* l < c, c > r, l < r - swap c & r to get ordered */
883 qsort_swap(pc_left, u_left);
884 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
885 } else if (s == 0) {
886 /* l < c, c > r, l == r - swap c&r, grow pc */
887 qsort_swap(pc_left, u_left);
888 --pc_left;
889 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
890 } else {
891 /* l < c, c > r, l > r - make lcr into rlc to get ordered */
892 qsort_rotate(pc_left, u_right, u_left);
893 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
894 }
895 }
896 } else if (s == 0) {
897 /* l == c */
898 s = qsort_cmp(pc_left, u_left);
899 if (s < 0) {
900 /* l == c, c < r - already in order, grow pc */
901 --pc_left;
902 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
903 } else if (s == 0) {
904 /* l == c, c == r - already in order, grow pc both ways */
905 --pc_left;
906 ++pc_right;
907 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
908 } else {
909 /* l == c, c > r - swap l & r, grow pc */
910 qsort_swap(u_right, u_left);
911 ++pc_right;
912 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
913 }
914 } else {
915 /* l > c */
916 s = qsort_cmp(pc_left, u_left);
917 if (s < 0) {
918 /* l > c, c < r - need to know more */
919 s = qsort_cmp(u_right, u_left);
920 if (s < 0) {
921 /* l > c, c < r, l < r - swap l & c to get ordered */
922 qsort_swap(u_right, pc_left);
923 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
924 } else if (s == 0) {
925 /* l > c, c < r, l == r - swap l & c, grow pc */
926 qsort_swap(u_right, pc_left);
927 ++pc_right;
928 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
929 } else {
930 /* l > c, c < r, l > r - rotate lcr into crl to order */
931 qsort_rotate(u_right, pc_left, u_left);
932 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
933 }
934 } else if (s == 0) {
935 /* l > c, c == r - swap ends, grow pc */
936 qsort_swap(u_right, u_left);
937 --pc_left;
938 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
939 } else {
940 /* l > c, c > r - swap ends to get in order */
941 qsort_swap(u_right, u_left);
942 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
943 }
944 }
945 /* We now know the 3 middle elements have been compared and
946 arranged in the desired order, so we can shrink the uncompared
947 sets on both sides
948 */
949 --u_right;
950 ++u_left;
951 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
952
953 /* The above massive nested if was the simple part :-). We now have
954 the middle 3 elements ordered and we need to scan through the
955 uncompared sets on either side, swapping elements that are on
956 the wrong side or simply shuffling equal elements around to get
957 all equal elements into the pivot chunk.
958 */
959
960 for ( ; ; ) {
961 int still_work_on_left;
962 int still_work_on_right;
963
964 /* Scan the uncompared values on the left. If I find a value
965 equal to the pivot value, move it over so it is adjacent to
966 the pivot chunk and expand the pivot chunk. If I find a value
967 less than the pivot value, then just leave it - its already
968 on the correct side of the partition. If I find a greater
969 value, then stop the scan.
970 */
971 while ((still_work_on_left = (u_right >= part_left))) {
972 s = qsort_cmp(u_right, pc_left);
973 if (s < 0) {
974 --u_right;
975 } else if (s == 0) {
976 --pc_left;
977 if (pc_left != u_right) {
978 qsort_swap(u_right, pc_left);
979 }
980 --u_right;
981 } else {
982 break;
983 }
984 qsort_assert(u_right < pc_left);
985 qsort_assert(pc_left <= pc_right);
986 qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
987 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
988 }
989
990 /* Do a mirror image scan of uncompared values on the right
991 */
992 while ((still_work_on_right = (u_left <= part_right))) {
993 s = qsort_cmp(pc_right, u_left);
994 if (s < 0) {
995 ++u_left;
996 } else if (s == 0) {
997 ++pc_right;
998 if (pc_right != u_left) {
999 qsort_swap(pc_right, u_left);
1000 }
1001 ++u_left;
1002 } else {
1003 break;
1004 }
1005 qsort_assert(u_left > pc_right);
1006 qsort_assert(pc_left <= pc_right);
1007 qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1008 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1009 }
1010
1011 if (still_work_on_left) {
1012 /* I know I have a value on the left side which needs to be
1013 on the right side, but I need to know more to decide
1014 exactly the best thing to do with it.
1015 */
1016 if (still_work_on_right) {
1017 /* I know I have values on both side which are out of
1018 position. This is a big win because I kill two birds
1019 with one swap (so to speak). I can advance the
1020 uncompared pointers on both sides after swapping both
1021 of them into the right place.
1022 */
1023 qsort_swap(u_right, u_left);
1024 --u_right;
1025 ++u_left;
1026 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1027 } else {
1028 /* I have an out of position value on the left, but the
1029 right is fully scanned, so I "slide" the pivot chunk
1030 and any less-than values left one to make room for the
1031 greater value over on the right. If the out of position
1032 value is immediately adjacent to the pivot chunk (there
1033 are no less-than values), I can do that with a swap,
1034 otherwise, I have to rotate one of the less than values
1035 into the former position of the out of position value
1036 and the right end of the pivot chunk into the left end
1037 (got all that?).
1038 */
1039 --pc_left;
1040 if (pc_left == u_right) {
1041 qsort_swap(u_right, pc_right);
1042 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1043 } else {
1044 qsort_rotate(u_right, pc_left, pc_right);
1045 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1046 }
1047 --pc_right;
1048 --u_right;
1049 }
1050 } else if (still_work_on_right) {
1051 /* Mirror image of complex case above: I have an out of
1052 position value on the right, but the left is fully
1053 scanned, so I need to shuffle things around to make room
1054 for the right value on the left.
1055 */
1056 ++pc_right;
1057 if (pc_right == u_left) {
1058 qsort_swap(u_left, pc_left);
1059 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1060 } else {
1061 qsort_rotate(pc_right, pc_left, u_left);
1062 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1063 }
1064 ++pc_left;
1065 ++u_left;
1066 } else {
1067 /* No more scanning required on either side of partition,
1068 break out of loop and figure out next set of partitions
1069 */
1070 break;
1071 }
1072 }
1073
1074 /* The elements in the pivot chunk are now in the right place. They
1075 will never move or be compared again. All I have to do is decide
1076 what to do with the stuff to the left and right of the pivot
1077 chunk.
1078
1079 Notes on the QSORT_ORDER_GUESS ifdef code:
1080
1081 1. If I just built these partitions without swapping any (or
1082 very many) elements, there is a chance that the elements are
1083 already ordered properly (being properly ordered will
1084 certainly result in no swapping, but the converse can't be
1085 proved :-).
1086
1087 2. A (properly written) insertion sort will run faster on
1088 already ordered data than qsort will.
1089
1090 3. Perhaps there is some way to make a good guess about
1091 switching to an insertion sort earlier than partition size 6
1092 (for instance - we could save the partition size on the stack
1093 and increase the size each time we find we didn't swap, thus
1094 switching to insertion sort earlier for partitions with a
1095 history of not swapping).
1096
1097 4. Naturally, if I just switch right away, it will make
1098 artificial benchmarks with pure ascending (or descending)
1099 data look really good, but is that a good reason in general?
1100 Hard to say...
1101 */
1102
1103#ifdef QSORT_ORDER_GUESS
1104 if (swapped < 3) {
1105#if QSORT_ORDER_GUESS == 1
1106 qsort_break_even = (part_right - part_left) + 1;
1107#endif
1108#if QSORT_ORDER_GUESS == 2
1109 qsort_break_even *= 2;
1110#endif
1111#if QSORT_ORDER_GUESS == 3
1112 int prev_break = qsort_break_even;
1113 qsort_break_even *= qsort_break_even;
1114 if (qsort_break_even < prev_break) {
1115 qsort_break_even = (part_right - part_left) + 1;
1116 }
1117#endif
1118 } else {
1119 qsort_break_even = QSORT_BREAK_EVEN;
1120 }
1121#endif
1122
1123 if (part_left < pc_left) {
1124 /* There are elements on the left which need more processing.
1125 Check the right as well before deciding what to do.
1126 */
1127 if (pc_right < part_right) {
1128 /* We have two partitions to be sorted. Stack the biggest one
1129 and process the smallest one on the next iteration. This
1130 minimizes the stack height by insuring that any additional
1131 stack entries must come from the smallest partition which
1132 (because it is smallest) will have the fewest
1133 opportunities to generate additional stack entries.
1134 */
1135 if ((part_right - pc_right) > (pc_left - part_left)) {
1136 /* stack the right partition, process the left */
1137 partition_stack[next_stack_entry].left = pc_right + 1;
1138 partition_stack[next_stack_entry].right = part_right;
1139#ifdef QSORT_ORDER_GUESS
1140 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1141#endif
1142 part_right = pc_left - 1;
1143 } else {
1144 /* stack the left partition, process the right */
1145 partition_stack[next_stack_entry].left = part_left;
1146 partition_stack[next_stack_entry].right = pc_left - 1;
1147#ifdef QSORT_ORDER_GUESS
1148 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1149#endif
1150 part_left = pc_right + 1;
1151 }
1152 qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1153 ++next_stack_entry;
1154 } else {
1155 /* The elements on the left are the only remaining elements
1156 that need sorting, arrange for them to be processed as the
1157 next partition.
1158 */
1159 part_right = pc_left - 1;
1160 }
1161 } else if (pc_right < part_right) {
1162 /* There is only one chunk on the right to be sorted, make it
1163 the new partition and loop back around.
1164 */
1165 part_left = pc_right + 1;
1166 } else {
1167 /* This whole partition wound up in the pivot chunk, so
1168 we need to get a new partition off the stack.
1169 */
1170 if (next_stack_entry == 0) {
1171 /* the stack is empty - we are done */
1172 break;
1173 }
1174 --next_stack_entry;
1175 part_left = partition_stack[next_stack_entry].left;
1176 part_right = partition_stack[next_stack_entry].right;
1177#ifdef QSORT_ORDER_GUESS
1178 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1179#endif
1180 }
1181 } else {
1182 /* This partition is too small to fool with qsort complexity, just
1183 do an ordinary insertion sort to minimize overhead.
1184 */
1185 int i;
1186 /* Assume 1st element is in right place already, and start checking
1187 at 2nd element to see where it should be inserted.
1188 */
1189 for (i = part_left + 1; i <= part_right; ++i) {
1190 int j;
1191 /* Scan (backwards - just in case 'i' is already in right place)
1192 through the elements already sorted to see if the ith element
1193 belongs ahead of one of them.
1194 */
1195 for (j = i - 1; j >= part_left; --j) {
1196 if (qsort_cmp(i, j) >= 0) {
1197 /* i belongs right after j
1198 */
1199 break;
1200 }
1201 }
1202 ++j;
1203 if (j != i) {
1204 /* Looks like we really need to move some things
1205 */
1206 int k;
1207 temp = array[i];
1208 for (k = i - 1; k >= j; --k)
1209 array[k + 1] = array[k];
1210 array[j] = temp;
1211 }
1212 }
1213
1214 /* That partition is now sorted, grab the next one, or get out
1215 of the loop if there aren't any more.
1216 */
1217
1218 if (next_stack_entry == 0) {
1219 /* the stack is empty - we are done */
1220 break;
1221 }
1222 --next_stack_entry;
1223 part_left = partition_stack[next_stack_entry].left;
1224 part_right = partition_stack[next_stack_entry].right;
1225#ifdef QSORT_ORDER_GUESS
1226 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1227#endif
1228 }
1229 }
1230
1231 /* Believe it or not, the array is sorted at this point! */
1232}
1233
1234/* Stabilize what is, presumably, an otherwise unstable sort method.
1235 * We do that by allocating (or having on hand) an array of pointers
1236 * that is the same size as the original array of elements to be sorted.
1237 * We initialize this parallel array with the addresses of the original
1238 * array elements. This indirection can make you crazy.
1239 * Some pictures can help. After initializing, we have
1240 *
1241 * indir list1
1242 * +----+ +----+
1243 * | | --------------> | | ------> first element to be sorted
1244 * +----+ +----+
1245 * | | --------------> | | ------> second element to be sorted
1246 * +----+ +----+
1247 * | | --------------> | | ------> third element to be sorted
1248 * +----+ +----+
1249 * ...
1250 * +----+ +----+
1251 * | | --------------> | | ------> n-1st element to be sorted
1252 * +----+ +----+
1253 * | | --------------> | | ------> n-th element to be sorted
1254 * +----+ +----+
1255 *
1256 * During the sort phase, we leave the elements of list1 where they are,
1257 * and sort the pointers in the indirect array in the same order determined
1258 * by the original comparison routine on the elements pointed to.
1259 * Because we don't move the elements of list1 around through
1260 * this phase, we can break ties on elements that compare equal
1261 * using their address in the list1 array, ensuring stabilty.
1262 * This leaves us with something looking like
1263 *
1264 * indir list1
1265 * +----+ +----+
1266 * | | --+ +---> | | ------> first element to be sorted
1267 * +----+ | | +----+
1268 * | | --|-------|---> | | ------> second element to be sorted
1269 * +----+ | | +----+
1270 * | | --|-------+ +-> | | ------> third element to be sorted
1271 * +----+ | | +----+
1272 * ...
1273 * +----+ | | | | +----+
1274 * | | ---|-+ | +--> | | ------> n-1st element to be sorted
1275 * +----+ | | +----+
1276 * | | ---+ +----> | | ------> n-th element to be sorted
1277 * +----+ +----+
1278 *
1279 * where the i-th element of the indirect array points to the element
1280 * that should be i-th in the sorted array. After the sort phase,
1281 * we have to put the elements of list1 into the places
1282 * dictated by the indirect array.
1283 */
1284
1285static SVCOMPARE_t RealCmp;
1286
1287static I32
1288cmpindir(pTHX_ gptr a, gptr b)
1289{
1290 I32 sense;
1291 gptr *ap = (gptr *)a;
1292 gptr *bp = (gptr *)b;
1293
1294 if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1295 sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1296 return sense;
1297}
1298
1299STATIC void
1300S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1301{
1302 SV **hintsvp;
1303
1304 if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
1305 register gptr **pp, *q;
1306 register size_t n, j, i;
1307 gptr *small[SMALLSORT], **indir, tmp;
1308 SVCOMPARE_t savecmp;
1309 if (nmemb <= 1) return; /* sorted trivially */
1310
1311 /* Small arrays can use the stack, big ones must be allocated */
1312 if (nmemb <= SMALLSORT) indir = small;
1313 else { New(1799, indir, nmemb, gptr *); }
1314
1315 /* Copy pointers to original array elements into indirect array */
1316 for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1317
1318 savecmp = RealCmp; /* Save current comparison routine, if any */
1319 RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1320
1321 /* sort, with indirection */
1322 S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
1323
1324 pp = indir;
1325 q = list1;
1326 for (n = nmemb; n--; ) {
1327 /* Assert A: all elements of q with index > n are already
1328 * in place. This is vacuosly true at the start, and we
1329 * put element n where it belongs below (if it wasn't
1330 * already where it belonged). Assert B: we only move
1331 * elements that aren't where they belong,
1332 * so, by A, we never tamper with elements above n.
1333 */
1334 j = pp[n] - q; /* This sets j so that q[j] is
1335 * at pp[n]. *pp[j] belongs in
1336 * q[j], by construction.
1337 */
1338 if (n != j) { /* all's well if n == j */
1339 tmp = q[j]; /* save what's in q[j] */
1340 do {
1341 q[j] = *pp[j]; /* put *pp[j] where it belongs */
1342 i = pp[j] - q; /* the index in q of the element
1343 * just moved */
1344 pp[j] = q + j; /* this is ok now */
1345 } while ((j = i) != n);
1346 /* There are only finitely many (nmemb) addresses
1347 * in the pp array.
1348 * So we must eventually revisit an index we saw before.
1349 * Suppose the first revisited index is k != n.
1350 * An index is visited because something else belongs there.
1351 * If we visit k twice, then two different elements must
1352 * belong in the same place, which cannot be.
1353 * So j must get back to n, the loop terminates,
1354 * and we put the saved element where it belongs.
1355 */
1356 q[n] = tmp; /* put what belongs into
1357 * the n-th element */
1358 }
1359 }
1360
1361 /* free iff allocated */
1362 if (indir != small) { Safefree(indir); }
1363 /* restore prevailing comparison routine */
1364 RealCmp = savecmp;
1365 } else {
1366 S_qsortsvu(aTHX_ list1, nmemb, cmp);
1367 }
1368}
1369
1370/*
1371=for apidoc sortsv
1372
1373Sort an array. Here is an example:
1374
1375 sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1376
1377=cut
1378*/
1379
1380void
1381Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1382{
1383 void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1384 S_mergesortsv;
1385 SV **hintsvp;
1386 I32 hints;
1387
1388 if ((hints = SORTHINTS(hintsvp))) {
1389 if (hints & HINT_SORT_QUICKSORT)
1390 sortsvp = S_qsortsv;
1391 else {
1392 if (hints & HINT_SORT_MERGESORT)
1393 sortsvp = S_mergesortsv;
1394 else
1395 sortsvp = S_mergesortsv;
1396 }
1397 }
1398
1399 sortsvp(aTHX_ array, nmemb, cmp);
1400}
1401
1402PP(pp_sort)
1403{
1404 dSP; dMARK; dORIGMARK;
1405 register SV **up;
1406 SV **myorigmark = ORIGMARK;
1407 register I32 max;
1408 HV *stash;
1409 GV *gv;
1410 CV *cv = 0;
1411 I32 gimme = GIMME;
1412 OP* nextop = PL_op->op_next;
1413 I32 overloading = 0;
1414 bool hasargs = FALSE;
1415 I32 is_xsub = 0;
1416
1417 if (gimme != G_ARRAY) {
1418 SP = MARK;
1419 RETPUSHUNDEF;
1420 }
1421
1422 ENTER;
1423 SAVEVPTR(PL_sortcop);
1424 if (PL_op->op_flags & OPf_STACKED) {
1425 if (PL_op->op_flags & OPf_SPECIAL) {
1426 OP *kid = cLISTOP->op_first->op_sibling; /* pass pushmark */
1427 kid = kUNOP->op_first; /* pass rv2gv */
1428 kid = kUNOP->op_first; /* pass leave */
1429 PL_sortcop = kid->op_next;
1430 stash = CopSTASH(PL_curcop);
1431 }
1432 else {
1433 cv = sv_2cv(*++MARK, &stash, &gv, 0);
1434 if (cv && SvPOK(cv)) {
1435 STRLEN n_a;
1436 char *proto = SvPV((SV*)cv, n_a);
1437 if (proto && strEQ(proto, "$$")) {
1438 hasargs = TRUE;
1439 }
1440 }
1441 if (!(cv && CvROOT(cv))) {
1442 if (cv && CvXSUB(cv)) {
1443 is_xsub = 1;
1444 }
1445 else if (gv) {
1446 SV *tmpstr = sv_newmortal();
1447 gv_efullname3(tmpstr, gv, Nullch);
1448 DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1449 SvPVX(tmpstr));
1450 }
1451 else {
1452 DIE(aTHX_ "Undefined subroutine in sort");
1453 }
1454 }
1455
1456 if (is_xsub)
1457 PL_sortcop = (OP*)cv;
1458 else {
1459 PL_sortcop = CvSTART(cv);
1460 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1461 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1462
1463 SAVEVPTR(PL_curpad);
1464 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1465 }
1466 }
1467 }
1468 else {
1469 PL_sortcop = Nullop;
1470 stash = CopSTASH(PL_curcop);
1471 }
1472
1473 up = myorigmark + 1;
1474 while (MARK < SP) { /* This may or may not shift down one here. */
1475 /*SUPPRESS 560*/
1476 if ((*up = *++MARK)) { /* Weed out nulls. */
1477 SvTEMP_off(*up);
1478 if (!PL_sortcop && !SvPOK(*up)) {
1479 STRLEN n_a;
1480 if (SvAMAGIC(*up))
1481 overloading = 1;
1482 else
1483 (void)sv_2pv(*up, &n_a);
1484 }
1485 up++;
1486 }
1487 }
1488 max = --up - myorigmark;
1489 if (PL_sortcop) {
1490 if (max > 1) {
1491 PERL_CONTEXT *cx;
1492 SV** newsp;
1493 bool oldcatch = CATCH_GET;
1494
1495 SAVETMPS;
1496 SAVEOP();
1497
1498 CATCH_SET(TRUE);
1499 PUSHSTACKi(PERLSI_SORT);
1500 if (!hasargs && !is_xsub) {
1501 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1502 SAVESPTR(PL_firstgv);
1503 SAVESPTR(PL_secondgv);
1504 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1505 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1506 PL_sortstash = stash;
1507 }
1508#ifdef USE_5005THREADS
1509 sv_lock((SV *)PL_firstgv);
1510 sv_lock((SV *)PL_secondgv);
1511#endif
1512 SAVESPTR(GvSV(PL_firstgv));
1513 SAVESPTR(GvSV(PL_secondgv));
1514 }
1515
1516 PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1517 if (!(PL_op->op_flags & OPf_SPECIAL)) {
1518 cx->cx_type = CXt_SUB;
1519 cx->blk_gimme = G_SCALAR;
1520 PUSHSUB(cx);
1521 if (!CvDEPTH(cv))
1522 (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1523 }
1524 PL_sortcxix = cxstack_ix;
1525
1526 if (hasargs && !is_xsub) {
1527 /* This is mostly copied from pp_entersub */
1528 AV *av = (AV*)PL_curpad[0];
1529
1530#ifndef USE_5005THREADS
1531 cx->blk_sub.savearray = GvAV(PL_defgv);
1532 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1533#endif /* USE_5005THREADS */
1534 cx->blk_sub.oldcurpad = PL_curpad;
1535 cx->blk_sub.argarray = av;
1536 }
1537 sortsv((myorigmark+1), max,
1538 is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1539
1540 POPBLOCK(cx,PL_curpm);
1541 PL_stack_sp = newsp;
1542 POPSTACK;
1543 CATCH_SET(oldcatch);
1544 }
1545 }
1546 else {
1547 if (max > 1) {
1548 MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
1549 sortsv(ORIGMARK+1, max,
1550 (PL_op->op_private & OPpSORT_NUMERIC)
1551 ? ( (PL_op->op_private & OPpSORT_INTEGER)
1552 ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1553 : ( overloading ? amagic_ncmp : sv_ncmp))
1554 : ( IN_LOCALE_RUNTIME
1555 ? ( overloading
1556 ? amagic_cmp_locale
1557 : sv_cmp_locale_static)
1558 : ( overloading ? amagic_cmp : sv_cmp_static)));
1559 if (PL_op->op_private & OPpSORT_REVERSE) {
1560 SV **p = ORIGMARK+1;
1561 SV **q = ORIGMARK+max;
1562 while (p < q) {
1563 SV *tmp = *p;
1564 *p++ = *q;
1565 *q-- = tmp;
1566 }
1567 }
1568 }
1569 }
1570 LEAVE;
1571 PL_stack_sp = ORIGMARK + max;
1572 return nextop;
1573}
1574
1575static I32
1576sortcv(pTHX_ SV *a, SV *b)
1577{
1578 I32 oldsaveix = PL_savestack_ix;
1579 I32 oldscopeix = PL_scopestack_ix;
1580 I32 result;
1581 GvSV(PL_firstgv) = a;
1582 GvSV(PL_secondgv) = b;
1583 PL_stack_sp = PL_stack_base;
1584 PL_op = PL_sortcop;
1585 CALLRUNOPS(aTHX);
1586 if (PL_stack_sp != PL_stack_base + 1)
1587 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1588 if (!SvNIOKp(*PL_stack_sp))
1589 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1590 result = SvIV(*PL_stack_sp);
1591 while (PL_scopestack_ix > oldscopeix) {
1592 LEAVE;
1593 }
1594 leave_scope(oldsaveix);
1595 return result;
1596}
1597
1598static I32
1599sortcv_stacked(pTHX_ SV *a, SV *b)
1600{
1601 I32 oldsaveix = PL_savestack_ix;
1602 I32 oldscopeix = PL_scopestack_ix;
1603 I32 result;
1604 AV *av;
1605
1606#ifdef USE_5005THREADS
1607 av = (AV*)PL_curpad[0];
1608#else
1609 av = GvAV(PL_defgv);
1610#endif
1611
1612 if (AvMAX(av) < 1) {
1613 SV** ary = AvALLOC(av);
1614 if (AvARRAY(av) != ary) {
1615 AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1616 SvPVX(av) = (char*)ary;
1617 }
1618 if (AvMAX(av) < 1) {
1619 AvMAX(av) = 1;
1620 Renew(ary,2,SV*);
1621 SvPVX(av) = (char*)ary;
1622 }
1623 }
1624 AvFILLp(av) = 1;
1625
1626 AvARRAY(av)[0] = a;
1627 AvARRAY(av)[1] = b;
1628 PL_stack_sp = PL_stack_base;
1629 PL_op = PL_sortcop;
1630 CALLRUNOPS(aTHX);
1631 if (PL_stack_sp != PL_stack_base + 1)
1632 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1633 if (!SvNIOKp(*PL_stack_sp))
1634 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1635 result = SvIV(*PL_stack_sp);
1636 while (PL_scopestack_ix > oldscopeix) {
1637 LEAVE;
1638 }
1639 leave_scope(oldsaveix);
1640 return result;
1641}
1642
1643static I32
1644sortcv_xsub(pTHX_ SV *a, SV *b)
1645{
1646 dSP;
1647 I32 oldsaveix = PL_savestack_ix;
1648 I32 oldscopeix = PL_scopestack_ix;
1649 I32 result;
1650 CV *cv=(CV*)PL_sortcop;
1651
1652 SP = PL_stack_base;
1653 PUSHMARK(SP);
1654 EXTEND(SP, 2);
1655 *++SP = a;
1656 *++SP = b;
1657 PUTBACK;
1658 (void)(*CvXSUB(cv))(aTHX_ cv);
1659 if (PL_stack_sp != PL_stack_base + 1)
1660 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1661 if (!SvNIOKp(*PL_stack_sp))
1662 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1663 result = SvIV(*PL_stack_sp);
1664 while (PL_scopestack_ix > oldscopeix) {
1665 LEAVE;
1666 }
1667 leave_scope(oldsaveix);
1668 return result;
1669}
1670
1671
1672static I32
1673sv_ncmp(pTHX_ SV *a, SV *b)
1674{
1675 NV nv1 = SvNV(a);
1676 NV nv2 = SvNV(b);
1677 return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1678}
1679
1680static I32
1681sv_i_ncmp(pTHX_ SV *a, SV *b)
1682{
1683 IV iv1 = SvIV(a);
1684 IV iv2 = SvIV(b);
1685 return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1686}
1687#define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1688 *svp = Nullsv; \
1689 if (PL_amagic_generation) { \
1690 if (SvAMAGIC(left)||SvAMAGIC(right))\
1691 *svp = amagic_call(left, \
1692 right, \
1693 CAT2(meth,_amg), \
1694 0); \
1695 } \
1696 } STMT_END
1697
1698static I32
1699amagic_ncmp(pTHX_ register SV *a, register SV *b)
1700{
1701 SV *tmpsv;
1702 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1703 if (tmpsv) {
1704 NV d;
1705
1706 if (SvIOK(tmpsv)) {
1707 I32 i = SvIVX(tmpsv);
1708 if (i > 0)
1709 return 1;
1710 return i? -1 : 0;
1711 }
1712 d = SvNV(tmpsv);
1713 if (d > 0)
1714 return 1;
1715 return d? -1 : 0;
1716 }
1717 return sv_ncmp(aTHX_ a, b);
1718}
1719
1720static I32
1721amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1722{
1723 SV *tmpsv;
1724 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1725 if (tmpsv) {
1726 NV d;
1727
1728 if (SvIOK(tmpsv)) {
1729 I32 i = SvIVX(tmpsv);
1730 if (i > 0)
1731 return 1;
1732 return i? -1 : 0;
1733 }
1734 d = SvNV(tmpsv);
1735 if (d > 0)
1736 return 1;
1737 return d? -1 : 0;
1738 }
1739 return sv_i_ncmp(aTHX_ a, b);
1740}
1741
1742static I32
1743amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1744{
1745 SV *tmpsv;
1746 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1747 if (tmpsv) {
1748 NV d;
1749
1750 if (SvIOK(tmpsv)) {
1751 I32 i = SvIVX(tmpsv);
1752 if (i > 0)
1753 return 1;
1754 return i? -1 : 0;
1755 }
1756 d = SvNV(tmpsv);
1757 if (d > 0)
1758 return 1;
1759 return d? -1 : 0;
1760 }
1761 return sv_cmp(str1, str2);
1762}
1763
1764static I32
1765amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1766{
1767 SV *tmpsv;
1768 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1769 if (tmpsv) {
1770 NV d;
1771
1772 if (SvIOK(tmpsv)) {
1773 I32 i = SvIVX(tmpsv);
1774 if (i > 0)
1775 return 1;
1776 return i? -1 : 0;
1777 }
1778 d = SvNV(tmpsv);
1779 if (d > 0)
1780 return 1;
1781 return d? -1 : 0;
1782 }
1783 return sv_cmp_locale(str1, str2);
1784}