This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add a threads stress test for regular expressions
[perl5.git] / pp_sort.c
CommitLineData
84d4ea48
JH
1/* pp_sort.c
2 *
3 * Copyright (c) 1991-2001, Larry Wall
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
10/*
11 * ...they shuffled back towards the rear of the line. 'No, not at the
12 * rear!' the slave-driver shouted. 'Three files up. And stay there...
13 */
14
15#include "EXTERN.h"
16#define PERL_IN_PP_SORT_C
17#include "perl.h"
18
19static I32 sortcv(pTHX_ SV *a, SV *b);
20static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
21static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
22static I32 sv_ncmp(pTHX_ SV *a, SV *b);
23static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
24static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
25static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
26static I32 amagic_cmp(pTHX_ SV *a, SV *b);
27static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
28
29#define sv_cmp_static Perl_sv_cmp
30#define sv_cmp_locale_static Perl_sv_cmp_locale
31
32#define SORTHINTS(hintsvp) \
33 ((PL_hintgv && \
34 (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
35 (I32)SvIV(*hintsvp) : 0)
36
c53fc8a6
JH
37#ifndef SMALLSORT
38#define SMALLSORT (200)
39#endif
40
84d4ea48
JH
41/*
42 * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
43 *
44 * The original code was written in conjunction with BSD Computer Software
45 * Research Group at University of California, Berkeley.
46 *
47 * See also: "Optimistic Merge Sort" (SODA '92)
48 *
49 * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
50 *
51 * The code can be distributed under the same terms as Perl itself.
52 *
53 */
54
84d4ea48
JH
55
56typedef char * aptr; /* pointer for arithmetic on sizes */
57typedef SV * gptr; /* pointers in our lists */
58
59/* Binary merge internal sort, with a few special mods
60** for the special perl environment it now finds itself in.
61**
62** Things that were once options have been hotwired
63** to values suitable for this use. In particular, we'll always
64** initialize looking for natural runs, we'll always produce stable
65** output, and we'll always do Peter McIlroy's binary merge.
66*/
67
68/* Pointer types for arithmetic and storage and convenience casts */
69
70#define APTR(P) ((aptr)(P))
71#define GPTP(P) ((gptr *)(P))
72#define GPPP(P) ((gptr **)(P))
73
74
75/* byte offset from pointer P to (larger) pointer Q */
76#define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
77
78#define PSIZE sizeof(gptr)
79
80/* If PSIZE is power of 2, make PSHIFT that power, if that helps */
81
82#ifdef PSHIFT
83#define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT))
84#define PNBYTE(N) ((N) << (PSHIFT))
85#define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N)))
86#else
87/* Leave optimization to compiler */
88#define PNELEM(P, Q) (GPTP(Q) - GPTP(P))
89#define PNBYTE(N) ((N) * (PSIZE))
90#define PINDEX(P, N) (GPTP(P) + (N))
91#endif
92
93/* Pointer into other corresponding to pointer into this */
94#define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
95
96#define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
97
98
99/* Runs are identified by a pointer in the auxilliary list.
100** The pointer is at the start of the list,
101** and it points to the start of the next list.
102** NEXT is used as an lvalue, too.
103*/
104
105#define NEXT(P) (*GPPP(P))
106
107
108/* PTHRESH is the minimum number of pairs with the same sense to justify
109** checking for a run and extending it. Note that PTHRESH counts PAIRS,
110** not just elements, so PTHRESH == 8 means a run of 16.
111*/
112
113#define PTHRESH (8)
114
115/* RTHRESH is the number of elements in a run that must compare low
116** to the low element from the opposing run before we justify
117** doing a binary rampup instead of single stepping.
118** In random input, N in a row low should only happen with
119** probability 2^(1-N), so we can risk that we are dealing
120** with orderly input without paying much when we aren't.
121*/
122
123#define RTHRESH (6)
124
125
126/*
127** Overview of algorithm and variables.
128** The array of elements at list1 will be organized into runs of length 2,
129** or runs of length >= 2 * PTHRESH. We only try to form long runs when
130** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
131**
132** Unless otherwise specified, pair pointers address the first of two elements.
133**
134** b and b+1 are a pair that compare with sense ``sense''.
135** b is the ``bottom'' of adjacent pairs that might form a longer run.
136**
137** p2 parallels b in the list2 array, where runs are defined by
138** a pointer chain.
139**
140** t represents the ``top'' of the adjacent pairs that might extend
141** the run beginning at b. Usually, t addresses a pair
142** that compares with opposite sense from (b,b+1).
143** However, it may also address a singleton element at the end of list1,
144** or it may be equal to ``last'', the first element beyond list1.
145**
146** r addresses the Nth pair following b. If this would be beyond t,
147** we back it off to t. Only when r is less than t do we consider the
148** run long enough to consider checking.
149**
150** q addresses a pair such that the pairs at b through q already form a run.
151** Often, q will equal b, indicating we only are sure of the pair itself.
152** However, a search on the previous cycle may have revealed a longer run,
153** so q may be greater than b.
154**
155** p is used to work back from a candidate r, trying to reach q,
156** which would mean b through r would be a run. If we discover such a run,
157** we start q at r and try to push it further towards t.
158** If b through r is NOT a run, we detect the wrong order at (p-1,p).
159** In any event, after the check (if any), we have two main cases.
160**
161** 1) Short run. b <= q < p <= r <= t.
162** b through q is a run (perhaps trivial)
163** q through p are uninteresting pairs
164** p through r is a run
165**
166** 2) Long run. b < r <= q < t.
167** b through q is a run (of length >= 2 * PTHRESH)
168**
169** Note that degenerate cases are not only possible, but likely.
170** For example, if the pair following b compares with opposite sense,
171** then b == q < p == r == t.
172*/
173
174
957d8989 175static IV
84d4ea48
JH
176dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
177{
957d8989 178 I32 sense;
84d4ea48
JH
179 register gptr *b, *p, *q, *t, *p2;
180 register gptr c, *last, *r;
181 gptr *savep;
957d8989 182 IV runs = 0;
84d4ea48
JH
183
184 b = list1;
185 last = PINDEX(b, nmemb);
186 sense = (cmp(aTHX_ *b, *(b+1)) > 0);
187 for (p2 = list2; b < last; ) {
188 /* We just started, or just reversed sense.
189 ** Set t at end of pairs with the prevailing sense.
190 */
191 for (p = b+2, t = p; ++p < last; t = ++p) {
192 if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
193 }
194 q = b;
195 /* Having laid out the playing field, look for long runs */
196 do {
197 p = r = b + (2 * PTHRESH);
198 if (r >= t) p = r = t; /* too short to care about */
199 else {
200 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
201 ((p -= 2) > q));
202 if (p <= q) {
203 /* b through r is a (long) run.
204 ** Extend it as far as possible.
205 */
206 p = q = r;
207 while (((p += 2) < t) &&
208 ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
209 r = p = q + 2; /* no simple pairs, no after-run */
210 }
211 }
212 if (q > b) { /* run of greater than 2 at b */
213 savep = p;
214 p = q += 2;
215 /* pick up singleton, if possible */
216 if ((p == t) &&
217 ((t + 1) == last) &&
218 ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
219 savep = r = p = q = last;
957d8989 220 p2 = NEXT(p2) = p2 + (p - b); ++runs;
84d4ea48
JH
221 if (sense) while (b < --p) {
222 c = *b;
223 *b++ = *p;
224 *p = c;
225 }
226 p = savep;
227 }
228 while (q < p) { /* simple pairs */
957d8989 229 p2 = NEXT(p2) = p2 + 2; ++runs;
84d4ea48
JH
230 if (sense) {
231 c = *q++;
232 *(q-1) = *q;
233 *q++ = c;
234 } else q += 2;
235 }
236 if (((b = p) == t) && ((t+1) == last)) {
957d8989 237 NEXT(p2) = p2 + 1; ++runs;
84d4ea48
JH
238 b++;
239 }
240 q = r;
241 } while (b < t);
242 sense = !sense;
243 }
957d8989 244 return runs;
84d4ea48
JH
245}
246
247
3fe0b9a9 248/* The original merge sort, in use since 5.7, was as fast as, or faster than,
957d8989 249 * qsort on many platforms, but slower than qsort, conspicuously so,
3fe0b9a9 250 * on others. The most likely explanation was platform-specific
957d8989
JL
251 * differences in cache sizes and relative speeds.
252 *
253 * The quicksort divide-and-conquer algorithm guarantees that, as the
254 * problem is subdivided into smaller and smaller parts, the parts
255 * fit into smaller (and faster) caches. So it doesn't matter how
256 * many levels of cache exist, quicksort will "find" them, and,
257 * as long as smaller is faster, take advanatge of them.
258 *
3fe0b9a9 259 * By contrast, consider how the original mergesort algorithm worked.
957d8989
JL
260 * Suppose we have five runs (each typically of length 2 after dynprep).
261 *
262 * pass base aux
263 * 0 1 2 3 4 5
264 * 1 12 34 5
265 * 2 1234 5
266 * 3 12345
267 * 4 12345
268 *
269 * Adjacent pairs are merged in "grand sweeps" through the input.
270 * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
271 * runs 3 and 4 are merged and the runs from run 5 have been copied.
272 * The only cache that matters is one large enough to hold *all* the input.
273 * On some platforms, this may be many times slower than smaller caches.
274 *
275 * The following pseudo-code uses the same basic merge algorithm,
276 * but in a divide-and-conquer way.
277 *
278 * # merge $runs runs at offset $offset of list $list1 into $list2.
279 * # all unmerged runs ($runs == 1) originate in list $base.
280 * sub mgsort2 {
281 * my ($offset, $runs, $base, $list1, $list2) = @_;
282 *
283 * if ($runs == 1) {
284 * if ($list1 is $base) copy run to $list2
285 * return offset of end of list (or copy)
286 * } else {
287 * $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
288 * mgsort2($off2, $runs/2, $base, $list2, $list1)
289 * merge the adjacent runs at $offset of $list1 into $list2
290 * return the offset of the end of the merged runs
291 * }
292 * }
293 * mgsort2(0, $runs, $base, $aux, $base);
294 *
295 * For our 5 runs, the tree of calls looks like
296 *
297 * 5
298 * 3 2
299 * 2 1 1 1
300 * 1 1
301 *
302 * 1 2 3 4 5
303 *
304 * and the corresponding activity looks like
305 *
306 * copy runs 1 and 2 from base to aux
307 * merge runs 1 and 2 from aux to base
308 * (run 3 is where it belongs, no copy needed)
309 * merge runs 12 and 3 from base to aux
310 * (runs 4 and 5 are where they belong, no copy needed)
311 * merge runs 4 and 5 from base to aux
312 * merge runs 123 and 45 from aux to base
313 *
314 * Note that we merge runs 1 and 2 immediately after copying them,
315 * while they are still likely to be in fast cache. Similarly,
316 * run 3 is merged with run 12 while it still may be lingering in cache.
317 * This implementation should therefore enjoy much of the cache-friendly
318 * behavior that quicksort does. In addition, it does less copying
319 * than the original mergesort implementation (only runs 1 and 2 are copied)
320 * and the "balancing" of merges is better (merged runs comprise more nearly
321 * equal numbers of original runs).
322 *
323 * The actual cache-friendly implementation will use a pseudo-stack
324 * to avoid recursion, and will unroll processing of runs of length 2,
325 * but it is otherwise similar to the recursive implementation.
957d8989
JL
326 */
327
328typedef struct {
329 IV offset; /* offset of 1st of 2 runs at this level */
330 IV runs; /* how many runs must be combined into 1 */
331} off_runs; /* pseudo-stack element */
332
333STATIC void
3fe0b9a9 334S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
957d8989
JL
335{
336 IV i, run, runs, offset;
337 I32 sense, level;
338 int iwhich;
339 register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
340 gptr *aux, *list1, *list2;
341 gptr *p1;
342 gptr small[SMALLSORT];
343 gptr *which[3];
344 off_runs stack[60], *stackp;
345
346 if (nmemb <= 1) return; /* sorted trivially */
347 if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
348 else { New(799,aux,nmemb,gptr); } /* allocate auxilliary array */
349 level = 0;
350 stackp = stack;
351 stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
352 stackp->offset = offset = 0;
353 which[0] = which[2] = base;
354 which[1] = aux;
355 for (;;) {
356 /* On levels where both runs have be constructed (stackp->runs == 0),
357 * merge them, and note the offset of their end, in case the offset
358 * is needed at the next level up. Hop up a level, and,
359 * as long as stackp->runs is 0, keep merging.
360 */
361 if ((runs = stackp->runs) == 0) {
362 iwhich = level & 1;
363 list1 = which[iwhich]; /* area where runs are now */
364 list2 = which[++iwhich]; /* area for merged runs */
365 do {
366 offset = stackp->offset;
367 f1 = p1 = list1 + offset; /* start of first run */
368 p = tp2 = list2 + offset; /* where merged run will go */
369 t = NEXT(p); /* where first run ends */
370 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
371 t = NEXT(t); /* where second runs ends */
372 l2 = POTHER(t, list2, list1); /* ... on the other side */
373 offset = PNELEM(list2, t);
374 while (f1 < l1 && f2 < l2) {
375 /* If head 1 is larger than head 2, find ALL the elements
376 ** in list 2 strictly less than head1, write them all,
377 ** then head 1. Then compare the new heads, and repeat,
378 ** until one or both lists are exhausted.
379 **
380 ** In all comparisons (after establishing
381 ** which head to merge) the item to merge
382 ** (at pointer q) is the first operand of
383 ** the comparison. When we want to know
384 ** if ``q is strictly less than the other'',
385 ** we can't just do
386 ** cmp(q, other) < 0
387 ** because stability demands that we treat equality
388 ** as high when q comes from l2, and as low when
389 ** q was from l1. So we ask the question by doing
390 ** cmp(q, other) <= sense
391 ** and make sense == 0 when equality should look low,
392 ** and -1 when equality should look high.
393 */
394
395
396 if (cmp(aTHX_ *f1, *f2) <= 0) {
397 q = f2; b = f1; t = l1;
398 sense = -1;
399 } else {
400 q = f1; b = f2; t = l2;
401 sense = 0;
402 }
403
404
405 /* ramp up
406 **
407 ** Leave t at something strictly
408 ** greater than q (or at the end of the list),
409 ** and b at something strictly less than q.
410 */
411 for (i = 1, run = 0 ;;) {
412 if ((p = PINDEX(b, i)) >= t) {
413 /* off the end */
414 if (((p = PINDEX(t, -1)) > b) &&
415 (cmp(aTHX_ *q, *p) <= sense))
416 t = p;
417 else b = p;
418 break;
419 } else if (cmp(aTHX_ *q, *p) <= sense) {
420 t = p;
421 break;
422 } else b = p;
423 if (++run >= RTHRESH) i += i;
424 }
425
426
427 /* q is known to follow b and must be inserted before t.
428 ** Increment b, so the range of possibilities is [b,t).
429 ** Round binary split down, to favor early appearance.
430 ** Adjust b and t until q belongs just before t.
431 */
432
433 b++;
434 while (b < t) {
435 p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
436 if (cmp(aTHX_ *q, *p) <= sense) {
437 t = p;
438 } else b = p + 1;
439 }
440
441
442 /* Copy all the strictly low elements */
443
444 if (q == f1) {
445 FROMTOUPTO(f2, tp2, t);
446 *tp2++ = *f1++;
447 } else {
448 FROMTOUPTO(f1, tp2, t);
449 *tp2++ = *f2++;
450 }
451 }
452
453
454 /* Run out remaining list */
455 if (f1 == l1) {
456 if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
457 } else FROMTOUPTO(f1, tp2, l1);
458 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
459
460 if (--level == 0) goto done;
461 --stackp;
462 t = list1; list1 = list2; list2 = t; /* swap lists */
463 } while ((runs = stackp->runs) == 0);
464 }
465
466
467 stackp->runs = 0; /* current run will finish level */
468 /* While there are more than 2 runs remaining,
469 * turn them into exactly 2 runs (at the "other" level),
470 * each made up of approximately half the runs.
471 * Stack the second half for later processing,
472 * and set about producing the first half now.
473 */
474 while (runs > 2) {
475 ++level;
476 ++stackp;
477 stackp->offset = offset;
478 runs -= stackp->runs = runs / 2;
479 }
480 /* We must construct a single run from 1 or 2 runs.
481 * All the original runs are in which[0] == base.
482 * The run we construct must end up in which[level&1].
483 */
484 iwhich = level & 1;
485 if (runs == 1) {
486 /* Constructing a single run from a single run.
487 * If it's where it belongs already, there's nothing to do.
488 * Otherwise, copy it to where it belongs.
489 * A run of 1 is either a singleton at level 0,
490 * or the second half of a split 3. In neither event
491 * is it necessary to set offset. It will be set by the merge
492 * that immediately follows.
493 */
494 if (iwhich) { /* Belongs in aux, currently in base */
495 f1 = b = PINDEX(base, offset); /* where list starts */
496 f2 = PINDEX(aux, offset); /* where list goes */
497 t = NEXT(f2); /* where list will end */
498 offset = PNELEM(aux, t); /* offset thereof */
499 t = PINDEX(base, offset); /* where it currently ends */
500 FROMTOUPTO(f1, f2, t); /* copy */
501 NEXT(b) = t; /* set up parallel pointer */
502 } else if (level == 0) goto done; /* single run at level 0 */
503 } else {
504 /* Constructing a single run from two runs.
505 * The merge code at the top will do that.
506 * We need only make sure the two runs are in the "other" array,
507 * so they'll end up in the correct array after the merge.
508 */
509 ++level;
510 ++stackp;
511 stackp->offset = offset;
512 stackp->runs = 0; /* take care of both runs, trigger merge */
513 if (!iwhich) { /* Merged runs belong in aux, copy 1st */
514 f1 = b = PINDEX(base, offset); /* where first run starts */
515 f2 = PINDEX(aux, offset); /* where it will be copied */
516 t = NEXT(f2); /* where first run will end */
517 offset = PNELEM(aux, t); /* offset thereof */
518 p = PINDEX(base, offset); /* end of first run */
519 t = NEXT(t); /* where second run will end */
520 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
521 FROMTOUPTO(f1, f2, t); /* copy both runs */
522 NEXT(b) = p; /* paralled pointer for 1st */
523 NEXT(p) = t; /* ... and for second */
524 }
525 }
526 }
527done:
528 if (aux != small) Safefree(aux); /* free iff allocated */
529 return;
530}
531
84d4ea48
JH
532/*
533 * The quicksort implementation was derived from source code contributed
534 * by Tom Horsley.
535 *
536 * NOTE: this code was derived from Tom Horsley's qsort replacement
537 * and should not be confused with the original code.
538 */
539
540/* Copyright (C) Tom Horsley, 1997. All rights reserved.
541
542 Permission granted to distribute under the same terms as perl which are
543 (briefly):
544
545 This program is free software; you can redistribute it and/or modify
546 it under the terms of either:
547
548 a) the GNU General Public License as published by the Free
549 Software Foundation; either version 1, or (at your option) any
550 later version, or
551
552 b) the "Artistic License" which comes with this Kit.
553
554 Details on the perl license can be found in the perl source code which
555 may be located via the www.perl.com web page.
556
557 This is the most wonderfulest possible qsort I can come up with (and
558 still be mostly portable) My (limited) tests indicate it consistently
559 does about 20% fewer calls to compare than does the qsort in the Visual
560 C++ library, other vendors may vary.
561
562 Some of the ideas in here can be found in "Algorithms" by Sedgewick,
563 others I invented myself (or more likely re-invented since they seemed
564 pretty obvious once I watched the algorithm operate for a while).
565
566 Most of this code was written while watching the Marlins sweep the Giants
567 in the 1997 National League Playoffs - no Braves fans allowed to use this
568 code (just kidding :-).
569
570 I realize that if I wanted to be true to the perl tradition, the only
571 comment in this file would be something like:
572
573 ...they shuffled back towards the rear of the line. 'No, not at the
574 rear!' the slave-driver shouted. 'Three files up. And stay there...
575
576 However, I really needed to violate that tradition just so I could keep
577 track of what happens myself, not to mention some poor fool trying to
578 understand this years from now :-).
579*/
580
581/* ********************************************************** Configuration */
582
583#ifndef QSORT_ORDER_GUESS
584#define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */
585#endif
586
587/* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
588 future processing - a good max upper bound is log base 2 of memory size
589 (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
590 safely be smaller than that since the program is taking up some space and
591 most operating systems only let you grab some subset of contiguous
592 memory (not to mention that you are normally sorting data larger than
593 1 byte element size :-).
594*/
595#ifndef QSORT_MAX_STACK
596#define QSORT_MAX_STACK 32
597#endif
598
599/* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
600 Anything bigger and we use qsort. If you make this too small, the qsort
601 will probably break (or become less efficient), because it doesn't expect
602 the middle element of a partition to be the same as the right or left -
603 you have been warned).
604*/
605#ifndef QSORT_BREAK_EVEN
606#define QSORT_BREAK_EVEN 6
607#endif
608
4eb872f6
JL
609/* QSORT_PLAY_SAFE is the size of the largest partition we're willing
610 to go quadratic on. We innoculate larger partitions against
611 quadratic behavior by shuffling them before sorting. This is not
612 an absolute guarantee of non-quadratic behavior, but it would take
613 staggeringly bad luck to pick extreme elements as the pivot
614 from randomized data.
615*/
616#ifndef QSORT_PLAY_SAFE
617#define QSORT_PLAY_SAFE 255
618#endif
619
84d4ea48
JH
620/* ************************************************************* Data Types */
621
622/* hold left and right index values of a partition waiting to be sorted (the
623 partition includes both left and right - right is NOT one past the end or
624 anything like that).
625*/
626struct partition_stack_entry {
627 int left;
628 int right;
629#ifdef QSORT_ORDER_GUESS
630 int qsort_break_even;
631#endif
632};
633
634/* ******************************************************* Shorthand Macros */
635
636/* Note that these macros will be used from inside the qsort function where
637 we happen to know that the variable 'elt_size' contains the size of an
638 array element and the variable 'temp' points to enough space to hold a
639 temp element and the variable 'array' points to the array being sorted
640 and 'compare' is the pointer to the compare routine.
641
642 Also note that there are very many highly architecture specific ways
643 these might be sped up, but this is simply the most generally portable
644 code I could think of.
645*/
646
647/* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
648*/
649#define qsort_cmp(elt1, elt2) \
650 ((*compare)(aTHX_ array[elt1], array[elt2]))
651
652#ifdef QSORT_ORDER_GUESS
653#define QSORT_NOTICE_SWAP swapped++;
654#else
655#define QSORT_NOTICE_SWAP
656#endif
657
658/* swaps contents of array elements elt1, elt2.
659*/
660#define qsort_swap(elt1, elt2) \
661 STMT_START { \
662 QSORT_NOTICE_SWAP \
663 temp = array[elt1]; \
664 array[elt1] = array[elt2]; \
665 array[elt2] = temp; \
666 } STMT_END
667
668/* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
669 elt3 and elt3 gets elt1.
670*/
671#define qsort_rotate(elt1, elt2, elt3) \
672 STMT_START { \
673 QSORT_NOTICE_SWAP \
674 temp = array[elt1]; \
675 array[elt1] = array[elt2]; \
676 array[elt2] = array[elt3]; \
677 array[elt3] = temp; \
678 } STMT_END
679
680/* ************************************************************ Debug stuff */
681
682#ifdef QSORT_DEBUG
683
684static void
685break_here()
686{
687 return; /* good place to set a breakpoint */
688}
689
690#define qsort_assert(t) (void)( (t) || (break_here(), 0) )
691
692static void
693doqsort_all_asserts(
694 void * array,
695 size_t num_elts,
696 size_t elt_size,
697 int (*compare)(const void * elt1, const void * elt2),
698 int pc_left, int pc_right, int u_left, int u_right)
699{
700 int i;
701
702 qsort_assert(pc_left <= pc_right);
703 qsort_assert(u_right < pc_left);
704 qsort_assert(pc_right < u_left);
705 for (i = u_right + 1; i < pc_left; ++i) {
706 qsort_assert(qsort_cmp(i, pc_left) < 0);
707 }
708 for (i = pc_left; i < pc_right; ++i) {
709 qsort_assert(qsort_cmp(i, pc_right) == 0);
710 }
711 for (i = pc_right + 1; i < u_left; ++i) {
712 qsort_assert(qsort_cmp(pc_right, i) < 0);
713 }
714}
715
716#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
717 doqsort_all_asserts(array, num_elts, elt_size, compare, \
718 PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
719
720#else
721
722#define qsort_assert(t) ((void)0)
723
724#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
725
726#endif
727
728/* ****************************************************************** qsort */
729
730STATIC void /* the standard unstable (u) quicksort (qsort) */
731S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
732{
733 register SV * temp;
734
735 struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
736 int next_stack_entry = 0;
737
738 int part_left;
739 int part_right;
740#ifdef QSORT_ORDER_GUESS
741 int qsort_break_even;
742 int swapped;
743#endif
744
745 /* Make sure we actually have work to do.
746 */
747 if (num_elts <= 1) {
748 return;
749 }
750
4eb872f6
JL
751 /* Innoculate large partitions against quadratic behavior */
752 if (num_elts > QSORT_PLAY_SAFE) {
753 register size_t n, j;
754 register SV **q;
755 for (n = num_elts, q = array; n > 1; ) {
756 j = n-- * Drand01();
757 temp = q[j];
758 q[j] = q[n];
759 q[n] = temp;
760 }
761 }
762
84d4ea48
JH
763 /* Setup the initial partition definition and fall into the sorting loop
764 */
765 part_left = 0;
766 part_right = (int)(num_elts - 1);
767#ifdef QSORT_ORDER_GUESS
768 qsort_break_even = QSORT_BREAK_EVEN;
769#else
770#define qsort_break_even QSORT_BREAK_EVEN
771#endif
772 for ( ; ; ) {
773 if ((part_right - part_left) >= qsort_break_even) {
774 /* OK, this is gonna get hairy, so lets try to document all the
775 concepts and abbreviations and variables and what they keep
776 track of:
777
778 pc: pivot chunk - the set of array elements we accumulate in the
779 middle of the partition, all equal in value to the original
780 pivot element selected. The pc is defined by:
781
782 pc_left - the leftmost array index of the pc
783 pc_right - the rightmost array index of the pc
784
785 we start with pc_left == pc_right and only one element
786 in the pivot chunk (but it can grow during the scan).
787
788 u: uncompared elements - the set of elements in the partition
789 we have not yet compared to the pivot value. There are two
790 uncompared sets during the scan - one to the left of the pc
791 and one to the right.
792
793 u_right - the rightmost index of the left side's uncompared set
794 u_left - the leftmost index of the right side's uncompared set
795
796 The leftmost index of the left sides's uncompared set
797 doesn't need its own variable because it is always defined
798 by the leftmost edge of the whole partition (part_left). The
799 same goes for the rightmost edge of the right partition
800 (part_right).
801
802 We know there are no uncompared elements on the left once we
803 get u_right < part_left and no uncompared elements on the
804 right once u_left > part_right. When both these conditions
805 are met, we have completed the scan of the partition.
806
807 Any elements which are between the pivot chunk and the
808 uncompared elements should be less than the pivot value on
809 the left side and greater than the pivot value on the right
810 side (in fact, the goal of the whole algorithm is to arrange
811 for that to be true and make the groups of less-than and
812 greater-then elements into new partitions to sort again).
813
814 As you marvel at the complexity of the code and wonder why it
815 has to be so confusing. Consider some of the things this level
816 of confusion brings:
817
818 Once I do a compare, I squeeze every ounce of juice out of it. I
819 never do compare calls I don't have to do, and I certainly never
820 do redundant calls.
821
822 I also never swap any elements unless I can prove there is a
823 good reason. Many sort algorithms will swap a known value with
824 an uncompared value just to get things in the right place (or
825 avoid complexity :-), but that uncompared value, once it gets
826 compared, may then have to be swapped again. A lot of the
827 complexity of this code is due to the fact that it never swaps
828 anything except compared values, and it only swaps them when the
829 compare shows they are out of position.
830 */
831 int pc_left, pc_right;
832 int u_right, u_left;
833
834 int s;
835
836 pc_left = ((part_left + part_right) / 2);
837 pc_right = pc_left;
838 u_right = pc_left - 1;
839 u_left = pc_right + 1;
840
841 /* Qsort works best when the pivot value is also the median value
842 in the partition (unfortunately you can't find the median value
843 without first sorting :-), so to give the algorithm a helping
844 hand, we pick 3 elements and sort them and use the median value
845 of that tiny set as the pivot value.
846
847 Some versions of qsort like to use the left middle and right as
848 the 3 elements to sort so they can insure the ends of the
849 partition will contain values which will stop the scan in the
850 compare loop, but when you have to call an arbitrarily complex
851 routine to do a compare, its really better to just keep track of
852 array index values to know when you hit the edge of the
853 partition and avoid the extra compare. An even better reason to
854 avoid using a compare call is the fact that you can drop off the
855 edge of the array if someone foolishly provides you with an
856 unstable compare function that doesn't always provide consistent
857 results.
858
859 So, since it is simpler for us to compare the three adjacent
860 elements in the middle of the partition, those are the ones we
861 pick here (conveniently pointed at by u_right, pc_left, and
862 u_left). The values of the left, center, and right elements
863 are refered to as l c and r in the following comments.
864 */
865
866#ifdef QSORT_ORDER_GUESS
867 swapped = 0;
868#endif
869 s = qsort_cmp(u_right, pc_left);
870 if (s < 0) {
871 /* l < c */
872 s = qsort_cmp(pc_left, u_left);
873 /* if l < c, c < r - already in order - nothing to do */
874 if (s == 0) {
875 /* l < c, c == r - already in order, pc grows */
876 ++pc_right;
877 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
878 } else if (s > 0) {
879 /* l < c, c > r - need to know more */
880 s = qsort_cmp(u_right, u_left);
881 if (s < 0) {
882 /* l < c, c > r, l < r - swap c & r to get ordered */
883 qsort_swap(pc_left, u_left);
884 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
885 } else if (s == 0) {
886 /* l < c, c > r, l == r - swap c&r, grow pc */
887 qsort_swap(pc_left, u_left);
888 --pc_left;
889 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
890 } else {
891 /* l < c, c > r, l > r - make lcr into rlc to get ordered */
892 qsort_rotate(pc_left, u_right, u_left);
893 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
894 }
895 }
896 } else if (s == 0) {
897 /* l == c */
898 s = qsort_cmp(pc_left, u_left);
899 if (s < 0) {
900 /* l == c, c < r - already in order, grow pc */
901 --pc_left;
902 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
903 } else if (s == 0) {
904 /* l == c, c == r - already in order, grow pc both ways */
905 --pc_left;
906 ++pc_right;
907 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
908 } else {
909 /* l == c, c > r - swap l & r, grow pc */
910 qsort_swap(u_right, u_left);
911 ++pc_right;
912 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
913 }
914 } else {
915 /* l > c */
916 s = qsort_cmp(pc_left, u_left);
917 if (s < 0) {
918 /* l > c, c < r - need to know more */
919 s = qsort_cmp(u_right, u_left);
920 if (s < 0) {
921 /* l > c, c < r, l < r - swap l & c to get ordered */
922 qsort_swap(u_right, pc_left);
923 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
924 } else if (s == 0) {
925 /* l > c, c < r, l == r - swap l & c, grow pc */
926 qsort_swap(u_right, pc_left);
927 ++pc_right;
928 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
929 } else {
930 /* l > c, c < r, l > r - rotate lcr into crl to order */
931 qsort_rotate(u_right, pc_left, u_left);
932 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
933 }
934 } else if (s == 0) {
935 /* l > c, c == r - swap ends, grow pc */
936 qsort_swap(u_right, u_left);
937 --pc_left;
938 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
939 } else {
940 /* l > c, c > r - swap ends to get in order */
941 qsort_swap(u_right, u_left);
942 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
943 }
944 }
945 /* We now know the 3 middle elements have been compared and
946 arranged in the desired order, so we can shrink the uncompared
947 sets on both sides
948 */
949 --u_right;
950 ++u_left;
951 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
952
953 /* The above massive nested if was the simple part :-). We now have
954 the middle 3 elements ordered and we need to scan through the
955 uncompared sets on either side, swapping elements that are on
956 the wrong side or simply shuffling equal elements around to get
957 all equal elements into the pivot chunk.
958 */
959
960 for ( ; ; ) {
961 int still_work_on_left;
962 int still_work_on_right;
963
964 /* Scan the uncompared values on the left. If I find a value
965 equal to the pivot value, move it over so it is adjacent to
966 the pivot chunk and expand the pivot chunk. If I find a value
967 less than the pivot value, then just leave it - its already
968 on the correct side of the partition. If I find a greater
969 value, then stop the scan.
970 */
971 while ((still_work_on_left = (u_right >= part_left))) {
972 s = qsort_cmp(u_right, pc_left);
973 if (s < 0) {
974 --u_right;
975 } else if (s == 0) {
976 --pc_left;
977 if (pc_left != u_right) {
978 qsort_swap(u_right, pc_left);
979 }
980 --u_right;
981 } else {
982 break;
983 }
984 qsort_assert(u_right < pc_left);
985 qsort_assert(pc_left <= pc_right);
986 qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
987 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
988 }
989
990 /* Do a mirror image scan of uncompared values on the right
991 */
992 while ((still_work_on_right = (u_left <= part_right))) {
993 s = qsort_cmp(pc_right, u_left);
994 if (s < 0) {
995 ++u_left;
996 } else if (s == 0) {
997 ++pc_right;
998 if (pc_right != u_left) {
999 qsort_swap(pc_right, u_left);
1000 }
1001 ++u_left;
1002 } else {
1003 break;
1004 }
1005 qsort_assert(u_left > pc_right);
1006 qsort_assert(pc_left <= pc_right);
1007 qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1008 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1009 }
1010
1011 if (still_work_on_left) {
1012 /* I know I have a value on the left side which needs to be
1013 on the right side, but I need to know more to decide
1014 exactly the best thing to do with it.
1015 */
1016 if (still_work_on_right) {
1017 /* I know I have values on both side which are out of
1018 position. This is a big win because I kill two birds
1019 with one swap (so to speak). I can advance the
1020 uncompared pointers on both sides after swapping both
1021 of them into the right place.
1022 */
1023 qsort_swap(u_right, u_left);
1024 --u_right;
1025 ++u_left;
1026 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1027 } else {
1028 /* I have an out of position value on the left, but the
1029 right is fully scanned, so I "slide" the pivot chunk
1030 and any less-than values left one to make room for the
1031 greater value over on the right. If the out of position
1032 value is immediately adjacent to the pivot chunk (there
1033 are no less-than values), I can do that with a swap,
1034 otherwise, I have to rotate one of the less than values
1035 into the former position of the out of position value
1036 and the right end of the pivot chunk into the left end
1037 (got all that?).
1038 */
1039 --pc_left;
1040 if (pc_left == u_right) {
1041 qsort_swap(u_right, pc_right);
1042 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1043 } else {
1044 qsort_rotate(u_right, pc_left, pc_right);
1045 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1046 }
1047 --pc_right;
1048 --u_right;
1049 }
1050 } else if (still_work_on_right) {
1051 /* Mirror image of complex case above: I have an out of
1052 position value on the right, but the left is fully
1053 scanned, so I need to shuffle things around to make room
1054 for the right value on the left.
1055 */
1056 ++pc_right;
1057 if (pc_right == u_left) {
1058 qsort_swap(u_left, pc_left);
1059 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1060 } else {
1061 qsort_rotate(pc_right, pc_left, u_left);
1062 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1063 }
1064 ++pc_left;
1065 ++u_left;
1066 } else {
1067 /* No more scanning required on either side of partition,
1068 break out of loop and figure out next set of partitions
1069 */
1070 break;
1071 }
1072 }
1073
1074 /* The elements in the pivot chunk are now in the right place. They
1075 will never move or be compared again. All I have to do is decide
1076 what to do with the stuff to the left and right of the pivot
1077 chunk.
1078
1079 Notes on the QSORT_ORDER_GUESS ifdef code:
1080
1081 1. If I just built these partitions without swapping any (or
1082 very many) elements, there is a chance that the elements are
1083 already ordered properly (being properly ordered will
1084 certainly result in no swapping, but the converse can't be
1085 proved :-).
1086
1087 2. A (properly written) insertion sort will run faster on
1088 already ordered data than qsort will.
1089
1090 3. Perhaps there is some way to make a good guess about
1091 switching to an insertion sort earlier than partition size 6
1092 (for instance - we could save the partition size on the stack
1093 and increase the size each time we find we didn't swap, thus
1094 switching to insertion sort earlier for partitions with a
1095 history of not swapping).
1096
1097 4. Naturally, if I just switch right away, it will make
1098 artificial benchmarks with pure ascending (or descending)
1099 data look really good, but is that a good reason in general?
1100 Hard to say...
1101 */
1102
1103#ifdef QSORT_ORDER_GUESS
1104 if (swapped < 3) {
1105#if QSORT_ORDER_GUESS == 1
1106 qsort_break_even = (part_right - part_left) + 1;
1107#endif
1108#if QSORT_ORDER_GUESS == 2
1109 qsort_break_even *= 2;
1110#endif
1111#if QSORT_ORDER_GUESS == 3
1112 int prev_break = qsort_break_even;
1113 qsort_break_even *= qsort_break_even;
1114 if (qsort_break_even < prev_break) {
1115 qsort_break_even = (part_right - part_left) + 1;
1116 }
1117#endif
1118 } else {
1119 qsort_break_even = QSORT_BREAK_EVEN;
1120 }
1121#endif
1122
1123 if (part_left < pc_left) {
1124 /* There are elements on the left which need more processing.
1125 Check the right as well before deciding what to do.
1126 */
1127 if (pc_right < part_right) {
1128 /* We have two partitions to be sorted. Stack the biggest one
1129 and process the smallest one on the next iteration. This
1130 minimizes the stack height by insuring that any additional
1131 stack entries must come from the smallest partition which
1132 (because it is smallest) will have the fewest
1133 opportunities to generate additional stack entries.
1134 */
1135 if ((part_right - pc_right) > (pc_left - part_left)) {
1136 /* stack the right partition, process the left */
1137 partition_stack[next_stack_entry].left = pc_right + 1;
1138 partition_stack[next_stack_entry].right = part_right;
1139#ifdef QSORT_ORDER_GUESS
1140 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1141#endif
1142 part_right = pc_left - 1;
1143 } else {
1144 /* stack the left partition, process the right */
1145 partition_stack[next_stack_entry].left = part_left;
1146 partition_stack[next_stack_entry].right = pc_left - 1;
1147#ifdef QSORT_ORDER_GUESS
1148 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1149#endif
1150 part_left = pc_right + 1;
1151 }
1152 qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1153 ++next_stack_entry;
1154 } else {
1155 /* The elements on the left are the only remaining elements
1156 that need sorting, arrange for them to be processed as the
1157 next partition.
1158 */
1159 part_right = pc_left - 1;
1160 }
1161 } else if (pc_right < part_right) {
1162 /* There is only one chunk on the right to be sorted, make it
1163 the new partition and loop back around.
1164 */
1165 part_left = pc_right + 1;
1166 } else {
1167 /* This whole partition wound up in the pivot chunk, so
1168 we need to get a new partition off the stack.
1169 */
1170 if (next_stack_entry == 0) {
1171 /* the stack is empty - we are done */
1172 break;
1173 }
1174 --next_stack_entry;
1175 part_left = partition_stack[next_stack_entry].left;
1176 part_right = partition_stack[next_stack_entry].right;
1177#ifdef QSORT_ORDER_GUESS
1178 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1179#endif
1180 }
1181 } else {
1182 /* This partition is too small to fool with qsort complexity, just
1183 do an ordinary insertion sort to minimize overhead.
1184 */
1185 int i;
1186 /* Assume 1st element is in right place already, and start checking
1187 at 2nd element to see where it should be inserted.
1188 */
1189 for (i = part_left + 1; i <= part_right; ++i) {
1190 int j;
1191 /* Scan (backwards - just in case 'i' is already in right place)
1192 through the elements already sorted to see if the ith element
1193 belongs ahead of one of them.
1194 */
1195 for (j = i - 1; j >= part_left; --j) {
1196 if (qsort_cmp(i, j) >= 0) {
1197 /* i belongs right after j
1198 */
1199 break;
1200 }
1201 }
1202 ++j;
1203 if (j != i) {
1204 /* Looks like we really need to move some things
1205 */
1206 int k;
1207 temp = array[i];
1208 for (k = i - 1; k >= j; --k)
1209 array[k + 1] = array[k];
1210 array[j] = temp;
1211 }
1212 }
1213
1214 /* That partition is now sorted, grab the next one, or get out
1215 of the loop if there aren't any more.
1216 */
1217
1218 if (next_stack_entry == 0) {
1219 /* the stack is empty - we are done */
1220 break;
1221 }
1222 --next_stack_entry;
1223 part_left = partition_stack[next_stack_entry].left;
1224 part_right = partition_stack[next_stack_entry].right;
1225#ifdef QSORT_ORDER_GUESS
1226 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1227#endif
1228 }
1229 }
1230
1231 /* Believe it or not, the array is sorted at this point! */
1232}
1233
84d4ea48
JH
1234/* Stabilize what is, presumably, an otherwise unstable sort method.
1235 * We do that by allocating (or having on hand) an array of pointers
1236 * that is the same size as the original array of elements to be sorted.
1237 * We initialize this parallel array with the addresses of the original
1238 * array elements. This indirection can make you crazy.
1239 * Some pictures can help. After initializing, we have
1240 *
1241 * indir list1
1242 * +----+ +----+
1243 * | | --------------> | | ------> first element to be sorted
1244 * +----+ +----+
1245 * | | --------------> | | ------> second element to be sorted
1246 * +----+ +----+
1247 * | | --------------> | | ------> third element to be sorted
1248 * +----+ +----+
1249 * ...
1250 * +----+ +----+
1251 * | | --------------> | | ------> n-1st element to be sorted
1252 * +----+ +----+
1253 * | | --------------> | | ------> n-th element to be sorted
1254 * +----+ +----+
1255 *
1256 * During the sort phase, we leave the elements of list1 where they are,
1257 * and sort the pointers in the indirect array in the same order determined
1258 * by the original comparison routine on the elements pointed to.
1259 * Because we don't move the elements of list1 around through
1260 * this phase, we can break ties on elements that compare equal
1261 * using their address in the list1 array, ensuring stabilty.
1262 * This leaves us with something looking like
1263 *
1264 * indir list1
1265 * +----+ +----+
1266 * | | --+ +---> | | ------> first element to be sorted
1267 * +----+ | | +----+
1268 * | | --|-------|---> | | ------> second element to be sorted
1269 * +----+ | | +----+
1270 * | | --|-------+ +-> | | ------> third element to be sorted
1271 * +----+ | | +----+
1272 * ...
1273 * +----+ | | | | +----+
1274 * | | ---|-+ | +--> | | ------> n-1st element to be sorted
1275 * +----+ | | +----+
1276 * | | ---+ +----> | | ------> n-th element to be sorted
1277 * +----+ +----+
1278 *
1279 * where the i-th element of the indirect array points to the element
1280 * that should be i-th in the sorted array. After the sort phase,
1281 * we have to put the elements of list1 into the places
1282 * dictated by the indirect array.
1283 */
1284
1285static SVCOMPARE_t RealCmp;
1286
1287static I32
1288cmpindir(pTHX_ gptr a, gptr b)
1289{
1290 I32 sense;
1291 gptr *ap = (gptr *)a;
1292 gptr *bp = (gptr *)b;
1293
1294 if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1295 sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1296 return sense;
1297}
1298
1299STATIC void
1300S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1301{
1302 SV **hintsvp;
1303
c53fc8a6 1304 if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
84d4ea48
JH
1305 register gptr **pp, *q;
1306 register size_t n, j, i;
1307 gptr *small[SMALLSORT], **indir, tmp;
1308 SVCOMPARE_t savecmp;
1309 if (nmemb <= 1) return; /* sorted trivially */
4eb872f6 1310
84d4ea48
JH
1311 /* Small arrays can use the stack, big ones must be allocated */
1312 if (nmemb <= SMALLSORT) indir = small;
1313 else { New(1799, indir, nmemb, gptr *); }
4eb872f6 1314
84d4ea48
JH
1315 /* Copy pointers to original array elements into indirect array */
1316 for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
4eb872f6 1317
84d4ea48
JH
1318 savecmp = RealCmp; /* Save current comparison routine, if any */
1319 RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
4eb872f6 1320
84d4ea48
JH
1321 /* sort, with indirection */
1322 S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
4eb872f6 1323
84d4ea48
JH
1324 pp = indir;
1325 q = list1;
1326 for (n = nmemb; n--; ) {
1327 /* Assert A: all elements of q with index > n are already
1328 * in place. This is vacuosly true at the start, and we
1329 * put element n where it belongs below (if it wasn't
1330 * already where it belonged). Assert B: we only move
1331 * elements that aren't where they belong,
1332 * so, by A, we never tamper with elements above n.
1333 */
1334 j = pp[n] - q; /* This sets j so that q[j] is
1335 * at pp[n]. *pp[j] belongs in
1336 * q[j], by construction.
1337 */
1338 if (n != j) { /* all's well if n == j */
1339 tmp = q[j]; /* save what's in q[j] */
1340 do {
1341 q[j] = *pp[j]; /* put *pp[j] where it belongs */
1342 i = pp[j] - q; /* the index in q of the element
1343 * just moved */
1344 pp[j] = q + j; /* this is ok now */
1345 } while ((j = i) != n);
1346 /* There are only finitely many (nmemb) addresses
1347 * in the pp array.
1348 * So we must eventually revisit an index we saw before.
1349 * Suppose the first revisited index is k != n.
1350 * An index is visited because something else belongs there.
1351 * If we visit k twice, then two different elements must
1352 * belong in the same place, which cannot be.
1353 * So j must get back to n, the loop terminates,
1354 * and we put the saved element where it belongs.
1355 */
1356 q[n] = tmp; /* put what belongs into
1357 * the n-th element */
1358 }
1359 }
1360
1361 /* free iff allocated */
1362 if (indir != small) { Safefree(indir); }
1363 /* restore prevailing comparison routine */
1364 RealCmp = savecmp;
c53fc8a6
JH
1365 } else {
1366 S_qsortsvu(aTHX_ list1, nmemb, cmp);
84d4ea48
JH
1367 }
1368}
4eb872f6
JL
1369
1370/*
ccfc67b7
JH
1371=head1 Array Manipulation Functions
1372
84d4ea48
JH
1373=for apidoc sortsv
1374
1375Sort an array. Here is an example:
1376
4eb872f6 1377 sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
84d4ea48
JH
1378
1379=cut
1380*/
4eb872f6 1381
84d4ea48
JH
1382void
1383Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1384{
1385 void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1386 S_mergesortsv;
1387 SV **hintsvp;
1388 I32 hints;
4eb872f6 1389
84d4ea48
JH
1390 if ((hints = SORTHINTS(hintsvp))) {
1391 if (hints & HINT_SORT_QUICKSORT)
1392 sortsvp = S_qsortsv;
1393 else {
1394 if (hints & HINT_SORT_MERGESORT)
3fe0b9a9 1395 sortsvp = S_mergesortsv;
84d4ea48
JH
1396 else
1397 sortsvp = S_mergesortsv;
1398 }
1399 }
4eb872f6 1400
84d4ea48
JH
1401 sortsvp(aTHX_ array, nmemb, cmp);
1402}
1403
1404PP(pp_sort)
1405{
1406 dSP; dMARK; dORIGMARK;
1407 register SV **up;
1408 SV **myorigmark = ORIGMARK;
1409 register I32 max;
1410 HV *stash;
1411 GV *gv;
1412 CV *cv = 0;
1413 I32 gimme = GIMME;
1414 OP* nextop = PL_op->op_next;
1415 I32 overloading = 0;
1416 bool hasargs = FALSE;
1417 I32 is_xsub = 0;
1418
1419 if (gimme != G_ARRAY) {
1420 SP = MARK;
1421 RETPUSHUNDEF;
1422 }
1423
1424 ENTER;
1425 SAVEVPTR(PL_sortcop);
1426 if (PL_op->op_flags & OPf_STACKED) {
1427 if (PL_op->op_flags & OPf_SPECIAL) {
1428 OP *kid = cLISTOP->op_first->op_sibling; /* pass pushmark */
1429 kid = kUNOP->op_first; /* pass rv2gv */
1430 kid = kUNOP->op_first; /* pass leave */
1431 PL_sortcop = kid->op_next;
1432 stash = CopSTASH(PL_curcop);
1433 }
1434 else {
1435 cv = sv_2cv(*++MARK, &stash, &gv, 0);
1436 if (cv && SvPOK(cv)) {
1437 STRLEN n_a;
1438 char *proto = SvPV((SV*)cv, n_a);
1439 if (proto && strEQ(proto, "$$")) {
1440 hasargs = TRUE;
1441 }
1442 }
1443 if (!(cv && CvROOT(cv))) {
1444 if (cv && CvXSUB(cv)) {
1445 is_xsub = 1;
1446 }
1447 else if (gv) {
1448 SV *tmpstr = sv_newmortal();
1449 gv_efullname3(tmpstr, gv, Nullch);
1450 DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1451 SvPVX(tmpstr));
1452 }
1453 else {
1454 DIE(aTHX_ "Undefined subroutine in sort");
1455 }
1456 }
1457
1458 if (is_xsub)
1459 PL_sortcop = (OP*)cv;
1460 else {
1461 PL_sortcop = CvSTART(cv);
1462 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1463 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1464
1465 SAVEVPTR(PL_curpad);
1466 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1467 }
1468 }
1469 }
1470 else {
1471 PL_sortcop = Nullop;
1472 stash = CopSTASH(PL_curcop);
1473 }
1474
1475 up = myorigmark + 1;
1476 while (MARK < SP) { /* This may or may not shift down one here. */
1477 /*SUPPRESS 560*/
1478 if ((*up = *++MARK)) { /* Weed out nulls. */
1479 SvTEMP_off(*up);
1480 if (!PL_sortcop && !SvPOK(*up)) {
1481 STRLEN n_a;
1482 if (SvAMAGIC(*up))
1483 overloading = 1;
1484 else
1485 (void)sv_2pv(*up, &n_a);
1486 }
1487 up++;
1488 }
1489 }
1490 max = --up - myorigmark;
1491 if (PL_sortcop) {
1492 if (max > 1) {
1493 PERL_CONTEXT *cx;
1494 SV** newsp;
1495 bool oldcatch = CATCH_GET;
1496
1497 SAVETMPS;
1498 SAVEOP();
1499
1500 CATCH_SET(TRUE);
1501 PUSHSTACKi(PERLSI_SORT);
1502 if (!hasargs && !is_xsub) {
1503 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1504 SAVESPTR(PL_firstgv);
1505 SAVESPTR(PL_secondgv);
1506 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1507 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1508 PL_sortstash = stash;
1509 }
1510#ifdef USE_5005THREADS
1511 sv_lock((SV *)PL_firstgv);
1512 sv_lock((SV *)PL_secondgv);
1513#endif
1514 SAVESPTR(GvSV(PL_firstgv));
1515 SAVESPTR(GvSV(PL_secondgv));
1516 }
1517
1518 PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1519 if (!(PL_op->op_flags & OPf_SPECIAL)) {
1520 cx->cx_type = CXt_SUB;
1521 cx->blk_gimme = G_SCALAR;
1522 PUSHSUB(cx);
1523 if (!CvDEPTH(cv))
1524 (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1525 }
1526 PL_sortcxix = cxstack_ix;
1527
1528 if (hasargs && !is_xsub) {
1529 /* This is mostly copied from pp_entersub */
1530 AV *av = (AV*)PL_curpad[0];
1531
1532#ifndef USE_5005THREADS
1533 cx->blk_sub.savearray = GvAV(PL_defgv);
1534 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1535#endif /* USE_5005THREADS */
1536 cx->blk_sub.oldcurpad = PL_curpad;
1537 cx->blk_sub.argarray = av;
1538 }
1539 sortsv((myorigmark+1), max,
1540 is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1541
1542 POPBLOCK(cx,PL_curpm);
1543 PL_stack_sp = newsp;
1544 POPSTACK;
1545 CATCH_SET(oldcatch);
1546 }
1547 }
1548 else {
1549 if (max > 1) {
1550 MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
1551 sortsv(ORIGMARK+1, max,
1552 (PL_op->op_private & OPpSORT_NUMERIC)
1553 ? ( (PL_op->op_private & OPpSORT_INTEGER)
1554 ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1555 : ( overloading ? amagic_ncmp : sv_ncmp))
1556 : ( IN_LOCALE_RUNTIME
1557 ? ( overloading
1558 ? amagic_cmp_locale
1559 : sv_cmp_locale_static)
1560 : ( overloading ? amagic_cmp : sv_cmp_static)));
1561 if (PL_op->op_private & OPpSORT_REVERSE) {
1562 SV **p = ORIGMARK+1;
1563 SV **q = ORIGMARK+max;
1564 while (p < q) {
1565 SV *tmp = *p;
1566 *p++ = *q;
1567 *q-- = tmp;
1568 }
1569 }
1570 }
1571 }
1572 LEAVE;
1573 PL_stack_sp = ORIGMARK + max;
1574 return nextop;
1575}
1576
1577static I32
1578sortcv(pTHX_ SV *a, SV *b)
1579{
1580 I32 oldsaveix = PL_savestack_ix;
1581 I32 oldscopeix = PL_scopestack_ix;
1582 I32 result;
1583 GvSV(PL_firstgv) = a;
1584 GvSV(PL_secondgv) = b;
1585 PL_stack_sp = PL_stack_base;
1586 PL_op = PL_sortcop;
1587 CALLRUNOPS(aTHX);
1588 if (PL_stack_sp != PL_stack_base + 1)
1589 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1590 if (!SvNIOKp(*PL_stack_sp))
1591 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1592 result = SvIV(*PL_stack_sp);
1593 while (PL_scopestack_ix > oldscopeix) {
1594 LEAVE;
1595 }
1596 leave_scope(oldsaveix);
1597 return result;
1598}
1599
1600static I32
1601sortcv_stacked(pTHX_ SV *a, SV *b)
1602{
1603 I32 oldsaveix = PL_savestack_ix;
1604 I32 oldscopeix = PL_scopestack_ix;
1605 I32 result;
1606 AV *av;
1607
1608#ifdef USE_5005THREADS
1609 av = (AV*)PL_curpad[0];
1610#else
1611 av = GvAV(PL_defgv);
1612#endif
1613
1614 if (AvMAX(av) < 1) {
1615 SV** ary = AvALLOC(av);
1616 if (AvARRAY(av) != ary) {
1617 AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1618 SvPVX(av) = (char*)ary;
1619 }
1620 if (AvMAX(av) < 1) {
1621 AvMAX(av) = 1;
1622 Renew(ary,2,SV*);
1623 SvPVX(av) = (char*)ary;
1624 }
1625 }
1626 AvFILLp(av) = 1;
1627
1628 AvARRAY(av)[0] = a;
1629 AvARRAY(av)[1] = b;
1630 PL_stack_sp = PL_stack_base;
1631 PL_op = PL_sortcop;
1632 CALLRUNOPS(aTHX);
1633 if (PL_stack_sp != PL_stack_base + 1)
1634 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1635 if (!SvNIOKp(*PL_stack_sp))
1636 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1637 result = SvIV(*PL_stack_sp);
1638 while (PL_scopestack_ix > oldscopeix) {
1639 LEAVE;
1640 }
1641 leave_scope(oldsaveix);
1642 return result;
1643}
1644
1645static I32
1646sortcv_xsub(pTHX_ SV *a, SV *b)
1647{
1648 dSP;
1649 I32 oldsaveix = PL_savestack_ix;
1650 I32 oldscopeix = PL_scopestack_ix;
1651 I32 result;
1652 CV *cv=(CV*)PL_sortcop;
1653
1654 SP = PL_stack_base;
1655 PUSHMARK(SP);
1656 EXTEND(SP, 2);
1657 *++SP = a;
1658 *++SP = b;
1659 PUTBACK;
1660 (void)(*CvXSUB(cv))(aTHX_ cv);
1661 if (PL_stack_sp != PL_stack_base + 1)
1662 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1663 if (!SvNIOKp(*PL_stack_sp))
1664 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1665 result = SvIV(*PL_stack_sp);
1666 while (PL_scopestack_ix > oldscopeix) {
1667 LEAVE;
1668 }
1669 leave_scope(oldsaveix);
1670 return result;
1671}
1672
1673
1674static I32
1675sv_ncmp(pTHX_ SV *a, SV *b)
1676{
1677 NV nv1 = SvNV(a);
1678 NV nv2 = SvNV(b);
1679 return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1680}
1681
1682static I32
1683sv_i_ncmp(pTHX_ SV *a, SV *b)
1684{
1685 IV iv1 = SvIV(a);
1686 IV iv2 = SvIV(b);
1687 return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1688}
1689#define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1690 *svp = Nullsv; \
1691 if (PL_amagic_generation) { \
1692 if (SvAMAGIC(left)||SvAMAGIC(right))\
1693 *svp = amagic_call(left, \
1694 right, \
1695 CAT2(meth,_amg), \
1696 0); \
1697 } \
1698 } STMT_END
1699
1700static I32
1701amagic_ncmp(pTHX_ register SV *a, register SV *b)
1702{
1703 SV *tmpsv;
1704 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1705 if (tmpsv) {
1706 NV d;
4eb872f6 1707
84d4ea48
JH
1708 if (SvIOK(tmpsv)) {
1709 I32 i = SvIVX(tmpsv);
1710 if (i > 0)
1711 return 1;
1712 return i? -1 : 0;
1713 }
1714 d = SvNV(tmpsv);
1715 if (d > 0)
1716 return 1;
1717 return d? -1 : 0;
1718 }
1719 return sv_ncmp(aTHX_ a, b);
1720}
1721
1722static I32
1723amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1724{
1725 SV *tmpsv;
1726 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1727 if (tmpsv) {
1728 NV d;
4eb872f6 1729
84d4ea48
JH
1730 if (SvIOK(tmpsv)) {
1731 I32 i = SvIVX(tmpsv);
1732 if (i > 0)
1733 return 1;
1734 return i? -1 : 0;
1735 }
1736 d = SvNV(tmpsv);
1737 if (d > 0)
1738 return 1;
1739 return d? -1 : 0;
1740 }
1741 return sv_i_ncmp(aTHX_ a, b);
1742}
1743
1744static I32
1745amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1746{
1747 SV *tmpsv;
1748 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1749 if (tmpsv) {
1750 NV d;
4eb872f6 1751
84d4ea48
JH
1752 if (SvIOK(tmpsv)) {
1753 I32 i = SvIVX(tmpsv);
1754 if (i > 0)
1755 return 1;
1756 return i? -1 : 0;
1757 }
1758 d = SvNV(tmpsv);
1759 if (d > 0)
1760 return 1;
1761 return d? -1 : 0;
1762 }
1763 return sv_cmp(str1, str2);
1764}
1765
1766static I32
1767amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1768{
1769 SV *tmpsv;
1770 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1771 if (tmpsv) {
1772 NV d;
4eb872f6 1773
84d4ea48
JH
1774 if (SvIOK(tmpsv)) {
1775 I32 i = SvIVX(tmpsv);
1776 if (i > 0)
1777 return 1;
1778 return i? -1 : 0;
1779 }
1780 d = SvNV(tmpsv);
1781 if (d > 0)
1782 return 1;
1783 return d? -1 : 0;
1784 }
1785 return sv_cmp_locale(str1, str2);
1786}