perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_sort.c
	2	*
	3	* Copyright (c) 1991-2001, Larry Wall
	4	*
	5	* You may distribute under the terms of either the GNU General Public
	6	* License or the Artistic License, as specified in the README file.
	7	*
	8	*/
	9
	10	/*
	11	* ...they shuffled back towards the rear of the line. 'No, not at the
	12	* rear!' the slave-driver shouted. 'Three files up. And stay there...
	13	*/
	14
	15	#include "EXTERN.h"
	16	#define PERL_IN_PP_SORT_C
	17	#include "perl.h"
	18
	19	static I32 sortcv(pTHX_ SV a, SV b);
	20	static I32 sortcv_stacked(pTHX_ SV a, SV b);
	21	static I32 sortcv_xsub(pTHX_ SV a, SV b);
	22	static I32 sv_ncmp(pTHX_ SV a, SV b);
	23	static I32 sv_i_ncmp(pTHX_ SV a, SV b);
	24	static I32 amagic_ncmp(pTHX_ SV a, SV b);
	25	static I32 amagic_i_ncmp(pTHX_ SV a, SV b);
	26	static I32 amagic_cmp(pTHX_ SV a, SV b);
	27	static I32 amagic_cmp_locale(pTHX_ SV a, SV b);
	28
	29	#define sv_cmp_static Perl_sv_cmp
	30	#define sv_cmp_locale_static Perl_sv_cmp_locale
	31
	32	#define SORTHINTS(hintsvp) \
	33	((PL_hintgv && \
	34	(hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
	35	(I32)SvIV(*hintsvp) : 0)
	36
	37	#ifndef SMALLSORT
	38	#define SMALLSORT (200)
	39	#endif
	40
	41	/*
	42	* The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
	43	*
	44	* The original code was written in conjunction with BSD Computer Software
	45	* Research Group at University of California, Berkeley.
	46	*
	47	* See also: "Optimistic Merge Sort" (SODA '92)
	48	*
	49	* The integration to Perl is by John P. Linderman <jpl@research.att.com>.
	50	*
	51	* The code can be distributed under the same terms as Perl itself.
	52	*
	53	*/
	54
	55
	56	typedef char * aptr; /* pointer for arithmetic on sizes */
	57	typedef SV * gptr; /* pointers in our lists */
	58
	59	/* Binary merge internal sort, with a few special mods
	60	** for the special perl environment it now finds itself in.
	61	**
	62	** Things that were once options have been hotwired
	63	** to values suitable for this use. In particular, we'll always
	64	** initialize looking for natural runs, we'll always produce stable
	65	** output, and we'll always do Peter McIlroy's binary merge.
	66	*/
	67
	68	/* Pointer types for arithmetic and storage and convenience casts */
	69
	70	#define APTR(P) ((aptr)(P))
	71	#define GPTP(P) ((gptr *)(P))
	72	#define GPPP(P) ((gptr **)(P))
	73
	74
	75	/* byte offset from pointer P to (larger) pointer Q */
	76	#define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
	77
	78	#define PSIZE sizeof(gptr)
	79
	80	/* If PSIZE is power of 2, make PSHIFT that power, if that helps */
	81
	82	#ifdef PSHIFT
	83	#define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT))
	84	#define PNBYTE(N) ((N) << (PSHIFT))
	85	#define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N)))
	86	#else
	87	/* Leave optimization to compiler */
	88	#define PNELEM(P, Q) (GPTP(Q) - GPTP(P))
	89	#define PNBYTE(N) ((N) * (PSIZE))
	90	#define PINDEX(P, N) (GPTP(P) + (N))
	91	#endif
	92
	93	/* Pointer into other corresponding to pointer into this */
	94	#define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
	95
	96	#define FROMTOUPTO(src, dst, lim) do dst++ = src++; while(src<lim)
	97
	98
	99	/* Runs are identified by a pointer in the auxilliary list.
	100	** The pointer is at the start of the list,
	101	** and it points to the start of the next list.
	102	** NEXT is used as an lvalue, too.
	103	*/
	104
	105	#define NEXT(P) (*GPPP(P))
	106
	107
	108	/* PTHRESH is the minimum number of pairs with the same sense to justify
	109	** checking for a run and extending it. Note that PTHRESH counts PAIRS,
	110	** not just elements, so PTHRESH == 8 means a run of 16.
	111	*/
	112
	113	#define PTHRESH (8)
	114
	115	/* RTHRESH is the number of elements in a run that must compare low
	116	** to the low element from the opposing run before we justify
	117	** doing a binary rampup instead of single stepping.
	118	** In random input, N in a row low should only happen with
	119	** probability 2^(1-N), so we can risk that we are dealing
	120	** with orderly input without paying much when we aren't.
	121	*/
	122
	123	#define RTHRESH (6)
	124
	125
	126	/*
	127	** Overview of algorithm and variables.
	128	** The array of elements at list1 will be organized into runs of length 2,
	129	** or runs of length >= 2 * PTHRESH. We only try to form long runs when
	130	** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
	131	**
	132	** Unless otherwise specified, pair pointers address the first of two elements.
	133	**
	134	** b and b+1 are a pair that compare with sense ``sense''.
	135	** b is the ``bottom'' of adjacent pairs that might form a longer run.
	136	**
	137	** p2 parallels b in the list2 array, where runs are defined by
	138	** a pointer chain.
	139	**
	140	** t represents the ``top'' of the adjacent pairs that might extend
	141	** the run beginning at b. Usually, t addresses a pair
	142	** that compares with opposite sense from (b,b+1).
	143	** However, it may also address a singleton element at the end of list1,
	144	** or it may be equal to ``last'', the first element beyond list1.
	145	**
	146	** r addresses the Nth pair following b. If this would be beyond t,
	147	** we back it off to t. Only when r is less than t do we consider the
	148	** run long enough to consider checking.
	149	**
	150	** q addresses a pair such that the pairs at b through q already form a run.
	151	** Often, q will equal b, indicating we only are sure of the pair itself.
	152	** However, a search on the previous cycle may have revealed a longer run,
	153	** so q may be greater than b.
	154	**
	155	** p is used to work back from a candidate r, trying to reach q,
	156	** which would mean b through r would be a run. If we discover such a run,
	157	** we start q at r and try to push it further towards t.
	158	** If b through r is NOT a run, we detect the wrong order at (p-1,p).
	159	** In any event, after the check (if any), we have two main cases.
	160	**
	161	** 1) Short run. b <= q < p <= r <= t.
	162	** b through q is a run (perhaps trivial)
	163	** q through p are uninteresting pairs
	164	** p through r is a run
	165	**
	166	** 2) Long run. b < r <= q < t.
	167	** b through q is a run (of length >= 2 * PTHRESH)
	168	**
	169	** Note that degenerate cases are not only possible, but likely.
	170	** For example, if the pair following b compares with opposite sense,
	171	** then b == q < p == r == t.
	172	*/
	173
	174
	175	static IV
	176	dynprep(pTHX_ gptr list1, gptr list2, size_t nmemb, SVCOMPARE_t cmp)
	177	{
	178	I32 sense;
	179	register gptr b, p, q, t, *p2;
	180	register gptr c, last, r;
	181	gptr *savep;
	182	IV runs = 0;
	183
	184	b = list1;
	185	last = PINDEX(b, nmemb);
	186	sense = (cmp(aTHX_ b, (b+1)) > 0);
	187	for (p2 = list2; b < last; ) {
	188	/* We just started, or just reversed sense.
	189	** Set t at end of pairs with the prevailing sense.
	190	*/
	191	for (p = b+2, t = p; ++p < last; t = ++p) {
	192	if ((cmp(aTHX_ t, p) > 0) != sense) break;
	193	}
	194	q = b;
	195	/* Having laid out the playing field, look for long runs */
	196	do {
	197	p = r = b + (2 * PTHRESH);
	198	if (r >= t) p = r = t; /* too short to care about */
	199	else {
	200	while (((cmp(aTHX_ (p-1), p) > 0) == sense) &&
	201	((p -= 2) > q));
	202	if (p <= q) {
	203	/* b through r is a (long) run.
	204	** Extend it as far as possible.
	205	*/
	206	p = q = r;
	207	while (((p += 2) < t) &&
	208	((cmp(aTHX_ (p-1), p) > 0) == sense)) q = p;
	209	r = p = q + 2; /* no simple pairs, no after-run */
	210	}
	211	}
	212	if (q > b) { /* run of greater than 2 at b */
	213	savep = p;
	214	p = q += 2;
	215	/* pick up singleton, if possible */
	216	if ((p == t) &&
	217	((t + 1) == last) &&
	218	((cmp(aTHX_ (p-1), p) > 0) == sense))
	219	savep = r = p = q = last;
	220	p2 = NEXT(p2) = p2 + (p - b); ++runs;
	221	if (sense) while (b < --p) {
	222	c = *b;
	223	b++ = p;
	224	*p = c;
	225	}
	226	p = savep;
	227	}
	228	while (q < p) { /* simple pairs */
	229	p2 = NEXT(p2) = p2 + 2; ++runs;
	230	if (sense) {
	231	c = *q++;
	232	(q-1) = q;
	233	*q++ = c;
	234	} else q += 2;
	235	}
	236	if (((b = p) == t) && ((t+1) == last)) {
	237	NEXT(p2) = p2 + 1; ++runs;
	238	b++;
	239	}
	240	q = r;
	241	} while (b < t);
	242	sense = !sense;
	243	}
	244	return runs;
	245	}
	246
	247
	248	/* The original merge sort, in use since 5.7, was as fast as, or faster than,
	249	* qsort on many platforms, but slower than qsort, conspicuously so,
	250	* on others. The most likely explanation was platform-specific
	251	* differences in cache sizes and relative speeds.
	252	*
	253	* The quicksort divide-and-conquer algorithm guarantees that, as the
	254	* problem is subdivided into smaller and smaller parts, the parts
	255	* fit into smaller (and faster) caches. So it doesn't matter how
	256	* many levels of cache exist, quicksort will "find" them, and,
	257	* as long as smaller is faster, take advanatge of them.
	258	*
	259	* By contrast, consider how the original mergesort algorithm worked.
	260	* Suppose we have five runs (each typically of length 2 after dynprep).
	261	*
	262	* pass base aux
	263	* 0 1 2 3 4 5
	264	* 1 12 34 5
	265	* 2 1234 5
	266	* 3 12345
	267	* 4 12345
	268	*
	269	* Adjacent pairs are merged in "grand sweeps" through the input.
	270	* This means, on pass 1, the records in runs 1 and 2 aren't revisited until
	271	* runs 3 and 4 are merged and the runs from run 5 have been copied.
	272	* The only cache that matters is one large enough to hold all the input.
	273	* On some platforms, this may be many times slower than smaller caches.
	274	*
	275	* The following pseudo-code uses the same basic merge algorithm,
	276	* but in a divide-and-conquer way.
	277	*
	278	* # merge $runs runs at offset $offset of list $list1 into $list2.
	279	* # all unmerged runs ($runs == 1) originate in list $base.
	280	* sub mgsort2 {
	281	* my ($offset, $runs, $base, $list1, $list2) = @_;
	282	*
	283	* if ($runs == 1) {
	284	* if ($list1 is $base) copy run to $list2
	285	* return offset of end of list (or copy)
	286	* } else {
	287	* $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
	288	* mgsort2($off2, $runs/2, $base, $list2, $list1)
	289	* merge the adjacent runs at $offset of $list1 into $list2
	290	* return the offset of the end of the merged runs
	291	* }
	292	* }
	293	* mgsort2(0, $runs, $base, $aux, $base);
	294	*
	295	* For our 5 runs, the tree of calls looks like
	296	*
	297	* 5
	298	* 3 2
	299	* 2 1 1 1
	300	* 1 1
	301	*
	302	* 1 2 3 4 5
	303	*
	304	* and the corresponding activity looks like
	305	*
	306	* copy runs 1 and 2 from base to aux
	307	* merge runs 1 and 2 from aux to base
	308	* (run 3 is where it belongs, no copy needed)
	309	* merge runs 12 and 3 from base to aux
	310	* (runs 4 and 5 are where they belong, no copy needed)
	311	* merge runs 4 and 5 from base to aux
	312	* merge runs 123 and 45 from aux to base
	313	*
	314	* Note that we merge runs 1 and 2 immediately after copying them,
	315	* while they are still likely to be in fast cache. Similarly,
	316	* run 3 is merged with run 12 while it still may be lingering in cache.
	317	* This implementation should therefore enjoy much of the cache-friendly
	318	* behavior that quicksort does. In addition, it does less copying
	319	* than the original mergesort implementation (only runs 1 and 2 are copied)
	320	* and the "balancing" of merges is better (merged runs comprise more nearly
	321	* equal numbers of original runs).
	322	*
	323	* The actual cache-friendly implementation will use a pseudo-stack
	324	* to avoid recursion, and will unroll processing of runs of length 2,
	325	* but it is otherwise similar to the recursive implementation.
	326	*/
	327
	328	typedef struct {
	329	IV offset; /* offset of 1st of 2 runs at this level */
	330	IV runs; /* how many runs must be combined into 1 */
	331	} off_runs; /* pseudo-stack element */
	332
	333	STATIC void
	334	S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
	335	{
	336	IV i, run, runs, offset;
	337	I32 sense, level;
	338	int iwhich;
	339	register gptr f1, f2, t, b, p, tp2, l1, l2, *q;
	340	gptr aux, list1, *list2;
	341	gptr *p1;
	342	gptr small[SMALLSORT];
	343	gptr *which[3];
	344	off_runs stack[60], *stackp;
	345
	346	if (nmemb <= 1) return; /* sorted trivially */
	347	if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
	348	else { New(799,aux,nmemb,gptr); } /* allocate auxilliary array */
	349	level = 0;
	350	stackp = stack;
	351	stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
	352	stackp->offset = offset = 0;
	353	which[0] = which[2] = base;
	354	which[1] = aux;
	355	for (;;) {
	356	/* On levels where both runs have be constructed (stackp->runs == 0),
	357	* merge them, and note the offset of their end, in case the offset
	358	* is needed at the next level up. Hop up a level, and,
	359	* as long as stackp->runs is 0, keep merging.
	360	*/
	361	if ((runs = stackp->runs) == 0) {
	362	iwhich = level & 1;
	363	list1 = which[iwhich]; /* area where runs are now */
	364	list2 = which[++iwhich]; /* area for merged runs */
	365	do {
	366	offset = stackp->offset;
	367	f1 = p1 = list1 + offset; /* start of first run */
	368	p = tp2 = list2 + offset; /* where merged run will go */
	369	t = NEXT(p); /* where first run ends */
	370	f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
	371	t = NEXT(t); /* where second runs ends */
	372	l2 = POTHER(t, list2, list1); /* ... on the other side */
	373	offset = PNELEM(list2, t);
	374	while (f1 < l1 && f2 < l2) {
	375	/* If head 1 is larger than head 2, find ALL the elements
	376	** in list 2 strictly less than head1, write them all,
	377	** then head 1. Then compare the new heads, and repeat,
	378	** until one or both lists are exhausted.
	379	**
	380	** In all comparisons (after establishing
	381	** which head to merge) the item to merge
	382	** (at pointer q) is the first operand of
	383	** the comparison. When we want to know
	384	** if ``q is strictly less than the other'',
	385	** we can't just do
	386	** cmp(q, other) < 0
	387	** because stability demands that we treat equality
	388	** as high when q comes from l2, and as low when
	389	** q was from l1. So we ask the question by doing
	390	** cmp(q, other) <= sense
	391	** and make sense == 0 when equality should look low,
	392	** and -1 when equality should look high.
	393	*/
	394
	395
	396	if (cmp(aTHX_ f1, f2) <= 0) {
	397	q = f2; b = f1; t = l1;
	398	sense = -1;
	399	} else {
	400	q = f1; b = f2; t = l2;
	401	sense = 0;
	402	}
	403
	404
	405	/* ramp up
	406	**
	407	** Leave t at something strictly
	408	** greater than q (or at the end of the list),
	409	** and b at something strictly less than q.
	410	*/
	411	for (i = 1, run = 0 ;;) {
	412	if ((p = PINDEX(b, i)) >= t) {
	413	/* off the end */
	414	if (((p = PINDEX(t, -1)) > b) &&
	415	(cmp(aTHX_ q, p) <= sense))
	416	t = p;
	417	else b = p;
	418	break;
	419	} else if (cmp(aTHX_ q, p) <= sense) {
	420	t = p;
	421	break;
	422	} else b = p;
	423	if (++run >= RTHRESH) i += i;
	424	}
	425
	426
	427	/* q is known to follow b and must be inserted before t.
	428	** Increment b, so the range of possibilities is [b,t).
	429	** Round binary split down, to favor early appearance.
	430	** Adjust b and t until q belongs just before t.
	431	*/
	432
	433	b++;
	434	while (b < t) {
	435	p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
	436	if (cmp(aTHX_ q, p) <= sense) {
	437	t = p;
	438	} else b = p + 1;
	439	}
	440
	441
	442	/* Copy all the strictly low elements */
	443
	444	if (q == f1) {
	445	FROMTOUPTO(f2, tp2, t);
	446	tp2++ = f1++;
	447	} else {
	448	FROMTOUPTO(f1, tp2, t);
	449	tp2++ = f2++;
	450	}
	451	}
	452
	453
	454	/* Run out remaining list */
	455	if (f1 == l1) {
	456	if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
	457	} else FROMTOUPTO(f1, tp2, l1);
	458	p1 = NEXT(p1) = POTHER(tp2, list2, list1);
	459
	460	if (--level == 0) goto done;
	461	--stackp;
	462	t = list1; list1 = list2; list2 = t; /* swap lists */
	463	} while ((runs = stackp->runs) == 0);
	464	}
	465
	466
	467	stackp->runs = 0; /* current run will finish level */
	468	/* While there are more than 2 runs remaining,
	469	* turn them into exactly 2 runs (at the "other" level),
	470	* each made up of approximately half the runs.
	471	* Stack the second half for later processing,
	472	* and set about producing the first half now.
	473	*/
	474	while (runs > 2) {
	475	++level;
	476	++stackp;
	477	stackp->offset = offset;
	478	runs -= stackp->runs = runs / 2;
	479	}
	480	/* We must construct a single run from 1 or 2 runs.
	481	* All the original runs are in which[0] == base.
	482	* The run we construct must end up in which[level&1].
	483	*/
	484	iwhich = level & 1;
	485	if (runs == 1) {
	486	/* Constructing a single run from a single run.
	487	* If it's where it belongs already, there's nothing to do.
	488	* Otherwise, copy it to where it belongs.
	489	* A run of 1 is either a singleton at level 0,
	490	* or the second half of a split 3. In neither event
	491	* is it necessary to set offset. It will be set by the merge
	492	* that immediately follows.
	493	*/
	494	if (iwhich) { /* Belongs in aux, currently in base */
	495	f1 = b = PINDEX(base, offset); /* where list starts */
	496	f2 = PINDEX(aux, offset); /* where list goes */
	497	t = NEXT(f2); /* where list will end */
	498	offset = PNELEM(aux, t); /* offset thereof */
	499	t = PINDEX(base, offset); /* where it currently ends */
	500	FROMTOUPTO(f1, f2, t); /* copy */
	501	NEXT(b) = t; /* set up parallel pointer */
	502	} else if (level == 0) goto done; /* single run at level 0 */
	503	} else {
	504	/* Constructing a single run from two runs.
	505	* The merge code at the top will do that.
	506	* We need only make sure the two runs are in the "other" array,
	507	* so they'll end up in the correct array after the merge.
	508	*/
	509	++level;
	510	++stackp;
	511	stackp->offset = offset;
	512	stackp->runs = 0; /* take care of both runs, trigger merge */
	513	if (!iwhich) { /* Merged runs belong in aux, copy 1st */
	514	f1 = b = PINDEX(base, offset); /* where first run starts */
	515	f2 = PINDEX(aux, offset); /* where it will be copied */
	516	t = NEXT(f2); /* where first run will end */
	517	offset = PNELEM(aux, t); /* offset thereof */
	518	p = PINDEX(base, offset); /* end of first run */
	519	t = NEXT(t); /* where second run will end */
	520	t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
	521	FROMTOUPTO(f1, f2, t); /* copy both runs */
	522	NEXT(b) = p; /* paralled pointer for 1st */
	523	NEXT(p) = t; /* ... and for second */
	524	}
	525	}
	526	}
	527	done:
	528	if (aux != small) Safefree(aux); /* free iff allocated */
	529	return;
	530	}
	531
	532	/*
	533	* The quicksort implementation was derived from source code contributed
	534	* by Tom Horsley.
	535	*
	536	* NOTE: this code was derived from Tom Horsley's qsort replacement
	537	* and should not be confused with the original code.
	538	*/
	539
	540	/* Copyright (C) Tom Horsley, 1997. All rights reserved.
	541
	542	Permission granted to distribute under the same terms as perl which are
	543	(briefly):
	544
	545	This program is free software; you can redistribute it and/or modify
	546	it under the terms of either:
	547
	548	a) the GNU General Public License as published by the Free
	549	Software Foundation; either version 1, or (at your option) any
	550	later version, or
	551
	552	b) the "Artistic License" which comes with this Kit.
	553
	554	Details on the perl license can be found in the perl source code which
	555	may be located via the www.perl.com web page.
	556
	557	This is the most wonderfulest possible qsort I can come up with (and
	558	still be mostly portable) My (limited) tests indicate it consistently
	559	does about 20% fewer calls to compare than does the qsort in the Visual
	560	C++ library, other vendors may vary.
	561
	562	Some of the ideas in here can be found in "Algorithms" by Sedgewick,
	563	others I invented myself (or more likely re-invented since they seemed
	564	pretty obvious once I watched the algorithm operate for a while).
	565
	566	Most of this code was written while watching the Marlins sweep the Giants
	567	in the 1997 National League Playoffs - no Braves fans allowed to use this
	568	code (just kidding :-).
	569
	570	I realize that if I wanted to be true to the perl tradition, the only
	571	comment in this file would be something like:
	572
	573	...they shuffled back towards the rear of the line. 'No, not at the
	574	rear!' the slave-driver shouted. 'Three files up. And stay there...
	575
	576	However, I really needed to violate that tradition just so I could keep
	577	track of what happens myself, not to mention some poor fool trying to
	578	understand this years from now :-).
	579	*/
	580
	581	/* ********************************************************** Configuration */
	582
	583	#ifndef QSORT_ORDER_GUESS
	584	#define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */
	585	#endif
	586
	587	/* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
	588	future processing - a good max upper bound is log base 2 of memory size
	589	(32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
	590	safely be smaller than that since the program is taking up some space and
	591	most operating systems only let you grab some subset of contiguous
	592	memory (not to mention that you are normally sorting data larger than
	593	1 byte element size :-).
	594	*/
	595	#ifndef QSORT_MAX_STACK
	596	#define QSORT_MAX_STACK 32
	597	#endif
	598
	599	/* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
	600	Anything bigger and we use qsort. If you make this too small, the qsort
	601	will probably break (or become less efficient), because it doesn't expect
	602	the middle element of a partition to be the same as the right or left -
	603	you have been warned).
	604	*/
	605	#ifndef QSORT_BREAK_EVEN
	606	#define QSORT_BREAK_EVEN 6
	607	#endif
	608
	609	/* QSORT_PLAY_SAFE is the size of the largest partition we're willing
	610	to go quadratic on. We innoculate larger partitions against
	611	quadratic behavior by shuffling them before sorting. This is not
	612	an absolute guarantee of non-quadratic behavior, but it would take
	613	staggeringly bad luck to pick extreme elements as the pivot
	614	from randomized data.
	615	*/
	616	#ifndef QSORT_PLAY_SAFE
	617	#define QSORT_PLAY_SAFE 255
	618	#endif
	619
	620	/* ************************************************************* Data Types */
	621
	622	/* hold left and right index values of a partition waiting to be sorted (the
	623	partition includes both left and right - right is NOT one past the end or
	624	anything like that).
	625	*/
	626	struct partition_stack_entry {
	627	int left;
	628	int right;
	629	#ifdef QSORT_ORDER_GUESS
	630	int qsort_break_even;
	631	#endif
	632	};
	633
	634	/* ******************************************************* Shorthand Macros */
	635
	636	/* Note that these macros will be used from inside the qsort function where
	637	we happen to know that the variable 'elt_size' contains the size of an
	638	array element and the variable 'temp' points to enough space to hold a
	639	temp element and the variable 'array' points to the array being sorted
	640	and 'compare' is the pointer to the compare routine.
	641
	642	Also note that there are very many highly architecture specific ways
	643	these might be sped up, but this is simply the most generally portable
	644	code I could think of.
	645	*/
	646
	647	/* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
	648	*/
	649	#define qsort_cmp(elt1, elt2) \
	650	((*compare)(aTHX_ array[elt1], array[elt2]))
	651
	652	#ifdef QSORT_ORDER_GUESS
	653	#define QSORT_NOTICE_SWAP swapped++;
	654	#else
	655	#define QSORT_NOTICE_SWAP
	656	#endif
	657
	658	/* swaps contents of array elements elt1, elt2.
	659	*/
	660	#define qsort_swap(elt1, elt2) \
	661	STMT_START { \
	662	QSORT_NOTICE_SWAP \
	663	temp = array[elt1]; \
	664	array[elt1] = array[elt2]; \
	665	array[elt2] = temp; \
	666	} STMT_END
	667
	668	/* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
	669	elt3 and elt3 gets elt1.
	670	*/
	671	#define qsort_rotate(elt1, elt2, elt3) \
	672	STMT_START { \
	673	QSORT_NOTICE_SWAP \
	674	temp = array[elt1]; \
	675	array[elt1] = array[elt2]; \
	676	array[elt2] = array[elt3]; \
	677	array[elt3] = temp; \
	678	} STMT_END
	679
	680	/* ************************************************************ Debug stuff */
	681
	682	#ifdef QSORT_DEBUG
	683
	684	static void
	685	break_here()
	686	{
	687	return; /* good place to set a breakpoint */
	688	}
	689
	690	#define qsort_assert(t) (void)( (t) \|\| (break_here(), 0) )
	691
	692	static void
	693	doqsort_all_asserts(
	694	void * array,
	695	size_t num_elts,
	696	size_t elt_size,
	697	int (compare)(const void elt1, const void * elt2),
	698	int pc_left, int pc_right, int u_left, int u_right)
	699	{
	700	int i;
	701
	702	qsort_assert(pc_left <= pc_right);
	703	qsort_assert(u_right < pc_left);
	704	qsort_assert(pc_right < u_left);
	705	for (i = u_right + 1; i < pc_left; ++i) {
	706	qsort_assert(qsort_cmp(i, pc_left) < 0);
	707	}
	708	for (i = pc_left; i < pc_right; ++i) {
	709	qsort_assert(qsort_cmp(i, pc_right) == 0);
	710	}
	711	for (i = pc_right + 1; i < u_left; ++i) {
	712	qsort_assert(qsort_cmp(pc_right, i) < 0);
	713	}
	714	}
	715
	716	#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
	717	doqsort_all_asserts(array, num_elts, elt_size, compare, \
	718	PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
	719
	720	#else
	721
	722	#define qsort_assert(t) ((void)0)
	723
	724	#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
	725
	726	#endif
	727
	728	/* ****************************************************************** qsort */
	729
	730	STATIC void /* the standard unstable (u) quicksort (qsort) */
	731	S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
	732	{
	733	register SV * temp;
	734
	735	struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
	736	int next_stack_entry = 0;
	737
	738	int part_left;
	739	int part_right;
	740	#ifdef QSORT_ORDER_GUESS
	741	int qsort_break_even;
	742	int swapped;
	743	#endif
	744
	745	/* Make sure we actually have work to do.
	746	*/
	747	if (num_elts <= 1) {
	748	return;
	749	}
	750
	751	/* Innoculate large partitions against quadratic behavior */
	752	if (num_elts > QSORT_PLAY_SAFE) {
	753	register size_t n, j;
	754	register SV **q;
	755	for (n = num_elts, q = array; n > 1; ) {
	756	j = n-- * Drand01();
	757	temp = q[j];
	758	q[j] = q[n];
	759	q[n] = temp;
	760	}
	761	}
	762
	763	/* Setup the initial partition definition and fall into the sorting loop
	764	*/
	765	part_left = 0;
	766	part_right = (int)(num_elts - 1);
	767	#ifdef QSORT_ORDER_GUESS
	768	qsort_break_even = QSORT_BREAK_EVEN;
	769	#else
	770	#define qsort_break_even QSORT_BREAK_EVEN
	771	#endif
	772	for ( ; ; ) {
	773	if ((part_right - part_left) >= qsort_break_even) {
	774	/* OK, this is gonna get hairy, so lets try to document all the
	775	concepts and abbreviations and variables and what they keep
	776	track of:
	777
	778	pc: pivot chunk - the set of array elements we accumulate in the
	779	middle of the partition, all equal in value to the original
	780	pivot element selected. The pc is defined by:
	781
	782	pc_left - the leftmost array index of the pc
	783	pc_right - the rightmost array index of the pc
	784
	785	we start with pc_left == pc_right and only one element
	786	in the pivot chunk (but it can grow during the scan).
	787
	788	u: uncompared elements - the set of elements in the partition
	789	we have not yet compared to the pivot value. There are two
	790	uncompared sets during the scan - one to the left of the pc
	791	and one to the right.
	792
	793	u_right - the rightmost index of the left side's uncompared set
	794	u_left - the leftmost index of the right side's uncompared set
	795
	796	The leftmost index of the left sides's uncompared set
	797	doesn't need its own variable because it is always defined
	798	by the leftmost edge of the whole partition (part_left). The
	799	same goes for the rightmost edge of the right partition
	800	(part_right).
	801
	802	We know there are no uncompared elements on the left once we
	803	get u_right < part_left and no uncompared elements on the
	804	right once u_left > part_right. When both these conditions
	805	are met, we have completed the scan of the partition.
	806
	807	Any elements which are between the pivot chunk and the
	808	uncompared elements should be less than the pivot value on
	809	the left side and greater than the pivot value on the right
	810	side (in fact, the goal of the whole algorithm is to arrange
	811	for that to be true and make the groups of less-than and
	812	greater-then elements into new partitions to sort again).
	813
	814	As you marvel at the complexity of the code and wonder why it
	815	has to be so confusing. Consider some of the things this level
	816	of confusion brings:
	817
	818	Once I do a compare, I squeeze every ounce of juice out of it. I
	819	never do compare calls I don't have to do, and I certainly never
	820	do redundant calls.
	821
	822	I also never swap any elements unless I can prove there is a
	823	good reason. Many sort algorithms will swap a known value with
	824	an uncompared value just to get things in the right place (or
	825	avoid complexity :-), but that uncompared value, once it gets
	826	compared, may then have to be swapped again. A lot of the
	827	complexity of this code is due to the fact that it never swaps
	828	anything except compared values, and it only swaps them when the
	829	compare shows they are out of position.
	830	*/
	831	int pc_left, pc_right;
	832	int u_right, u_left;
	833
	834	int s;
	835
	836	pc_left = ((part_left + part_right) / 2);
	837	pc_right = pc_left;
	838	u_right = pc_left - 1;
	839	u_left = pc_right + 1;
	840
	841	/* Qsort works best when the pivot value is also the median value
	842	in the partition (unfortunately you can't find the median value
	843	without first sorting :-), so to give the algorithm a helping
	844	hand, we pick 3 elements and sort them and use the median value
	845	of that tiny set as the pivot value.
	846
	847	Some versions of qsort like to use the left middle and right as
	848	the 3 elements to sort so they can insure the ends of the
	849	partition will contain values which will stop the scan in the
	850	compare loop, but when you have to call an arbitrarily complex
	851	routine to do a compare, its really better to just keep track of
	852	array index values to know when you hit the edge of the
	853	partition and avoid the extra compare. An even better reason to
	854	avoid using a compare call is the fact that you can drop off the
	855	edge of the array if someone foolishly provides you with an
	856	unstable compare function that doesn't always provide consistent
	857	results.
	858
	859	So, since it is simpler for us to compare the three adjacent
	860	elements in the middle of the partition, those are the ones we
	861	pick here (conveniently pointed at by u_right, pc_left, and
	862	u_left). The values of the left, center, and right elements
	863	are refered to as l c and r in the following comments.
	864	*/
	865
	866	#ifdef QSORT_ORDER_GUESS
	867	swapped = 0;
	868	#endif
	869	s = qsort_cmp(u_right, pc_left);
	870	if (s < 0) {
	871	/* l < c */
	872	s = qsort_cmp(pc_left, u_left);
	873	/* if l < c, c < r - already in order - nothing to do */
	874	if (s == 0) {
	875	/* l < c, c == r - already in order, pc grows */
	876	++pc_right;
	877	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	878	} else if (s > 0) {
	879	/* l < c, c > r - need to know more */
	880	s = qsort_cmp(u_right, u_left);
	881	if (s < 0) {
	882	/* l < c, c > r, l < r - swap c & r to get ordered */
	883	qsort_swap(pc_left, u_left);
	884	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	885	} else if (s == 0) {
	886	/* l < c, c > r, l == r - swap c&r, grow pc */
	887	qsort_swap(pc_left, u_left);
	888	--pc_left;
	889	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	890	} else {
	891	/* l < c, c > r, l > r - make lcr into rlc to get ordered */
	892	qsort_rotate(pc_left, u_right, u_left);
	893	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	894	}
	895	}
	896	} else if (s == 0) {
	897	/* l == c */
	898	s = qsort_cmp(pc_left, u_left);
	899	if (s < 0) {
	900	/* l == c, c < r - already in order, grow pc */
	901	--pc_left;
	902	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	903	} else if (s == 0) {
	904	/* l == c, c == r - already in order, grow pc both ways */
	905	--pc_left;
	906	++pc_right;
	907	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	908	} else {
	909	/* l == c, c > r - swap l & r, grow pc */
	910	qsort_swap(u_right, u_left);
	911	++pc_right;
	912	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	913	}
	914	} else {
	915	/* l > c */
	916	s = qsort_cmp(pc_left, u_left);
	917	if (s < 0) {
	918	/* l > c, c < r - need to know more */
	919	s = qsort_cmp(u_right, u_left);
	920	if (s < 0) {
	921	/* l > c, c < r, l < r - swap l & c to get ordered */
	922	qsort_swap(u_right, pc_left);
	923	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	924	} else if (s == 0) {
	925	/* l > c, c < r, l == r - swap l & c, grow pc */
	926	qsort_swap(u_right, pc_left);
	927	++pc_right;
	928	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	929	} else {
	930	/* l > c, c < r, l > r - rotate lcr into crl to order */
	931	qsort_rotate(u_right, pc_left, u_left);
	932	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	933	}
	934	} else if (s == 0) {
	935	/* l > c, c == r - swap ends, grow pc */
	936	qsort_swap(u_right, u_left);
	937	--pc_left;
	938	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	939	} else {
	940	/* l > c, c > r - swap ends to get in order */
	941	qsort_swap(u_right, u_left);
	942	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	943	}
	944	}
	945	/* We now know the 3 middle elements have been compared and
	946	arranged in the desired order, so we can shrink the uncompared
	947	sets on both sides
	948	*/
	949	--u_right;
	950	++u_left;
	951	qsort_all_asserts(pc_left, pc_right, u_left, u_right);
	952
	953	/* The above massive nested if was the simple part :-). We now have
	954	the middle 3 elements ordered and we need to scan through the
	955	uncompared sets on either side, swapping elements that are on
	956	the wrong side or simply shuffling equal elements around to get
	957	all equal elements into the pivot chunk.
	958	*/
	959
	960	for ( ; ; ) {
	961	int still_work_on_left;
	962	int still_work_on_right;
	963
	964	/* Scan the uncompared values on the left. If I find a value
	965	equal to the pivot value, move it over so it is adjacent to
	966	the pivot chunk and expand the pivot chunk. If I find a value
	967	less than the pivot value, then just leave it - its already
	968	on the correct side of the partition. If I find a greater
	969	value, then stop the scan.
	970	*/
	971	while ((still_work_on_left = (u_right >= part_left))) {
	972	s = qsort_cmp(u_right, pc_left);
	973	if (s < 0) {
	974	--u_right;
	975	} else if (s == 0) {
	976	--pc_left;
	977	if (pc_left != u_right) {
	978	qsort_swap(u_right, pc_left);
	979	}
	980	--u_right;
	981	} else {
	982	break;
	983	}
	984	qsort_assert(u_right < pc_left);
	985	qsort_assert(pc_left <= pc_right);
	986	qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
	987	qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
	988	}
	989
	990	/* Do a mirror image scan of uncompared values on the right
	991	*/
	992	while ((still_work_on_right = (u_left <= part_right))) {
	993	s = qsort_cmp(pc_right, u_left);
	994	if (s < 0) {
	995	++u_left;
	996	} else if (s == 0) {
	997	++pc_right;
	998	if (pc_right != u_left) {
	999	qsort_swap(pc_right, u_left);
	1000	}
	1001	++u_left;
	1002	} else {
	1003	break;
	1004	}
	1005	qsort_assert(u_left > pc_right);
	1006	qsort_assert(pc_left <= pc_right);
	1007	qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
	1008	qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
	1009	}
	1010
	1011	if (still_work_on_left) {
	1012	/* I know I have a value on the left side which needs to be
	1013	on the right side, but I need to know more to decide
	1014	exactly the best thing to do with it.
	1015	*/
	1016	if (still_work_on_right) {
	1017	/* I know I have values on both side which are out of
	1018	position. This is a big win because I kill two birds
	1019	with one swap (so to speak). I can advance the
	1020	uncompared pointers on both sides after swapping both
	1021	of them into the right place.
	1022	*/
	1023	qsort_swap(u_right, u_left);
	1024	--u_right;
	1025	++u_left;
	1026	qsort_all_asserts(pc_left, pc_right, u_left, u_right);
	1027	} else {
	1028	/* I have an out of position value on the left, but the
	1029	right is fully scanned, so I "slide" the pivot chunk
	1030	and any less-than values left one to make room for the
	1031	greater value over on the right. If the out of position
	1032	value is immediately adjacent to the pivot chunk (there
	1033	are no less-than values), I can do that with a swap,
	1034	otherwise, I have to rotate one of the less than values
	1035	into the former position of the out of position value
	1036	and the right end of the pivot chunk into the left end
	1037	(got all that?).
	1038	*/
	1039	--pc_left;
	1040	if (pc_left == u_right) {
	1041	qsort_swap(u_right, pc_right);
	1042	qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
	1043	} else {
	1044	qsort_rotate(u_right, pc_left, pc_right);
	1045	qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
	1046	}
	1047	--pc_right;
	1048	--u_right;
	1049	}
	1050	} else if (still_work_on_right) {
	1051	/* Mirror image of complex case above: I have an out of
	1052	position value on the right, but the left is fully
	1053	scanned, so I need to shuffle things around to make room
	1054	for the right value on the left.
	1055	*/
	1056	++pc_right;
	1057	if (pc_right == u_left) {
	1058	qsort_swap(u_left, pc_left);
	1059	qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
	1060	} else {
	1061	qsort_rotate(pc_right, pc_left, u_left);
	1062	qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
	1063	}
	1064	++pc_left;
	1065	++u_left;
	1066	} else {
	1067	/* No more scanning required on either side of partition,
	1068	break out of loop and figure out next set of partitions
	1069	*/
	1070	break;
	1071	}
	1072	}
	1073
	1074	/* The elements in the pivot chunk are now in the right place. They
	1075	will never move or be compared again. All I have to do is decide
	1076	what to do with the stuff to the left and right of the pivot
	1077	chunk.
	1078
	1079	Notes on the QSORT_ORDER_GUESS ifdef code:
	1080
	1081	1. If I just built these partitions without swapping any (or
	1082	very many) elements, there is a chance that the elements are
	1083	already ordered properly (being properly ordered will
	1084	certainly result in no swapping, but the converse can't be
	1085	proved :-).
	1086
	1087	2. A (properly written) insertion sort will run faster on
	1088	already ordered data than qsort will.
	1089
	1090	3. Perhaps there is some way to make a good guess about
	1091	switching to an insertion sort earlier than partition size 6
	1092	(for instance - we could save the partition size on the stack
	1093	and increase the size each time we find we didn't swap, thus
	1094	switching to insertion sort earlier for partitions with a
	1095	history of not swapping).
	1096
	1097	4. Naturally, if I just switch right away, it will make
	1098	artificial benchmarks with pure ascending (or descending)
	1099	data look really good, but is that a good reason in general?
	1100	Hard to say...
	1101	*/
	1102
	1103	#ifdef QSORT_ORDER_GUESS
	1104	if (swapped < 3) {
	1105	#if QSORT_ORDER_GUESS == 1
	1106	qsort_break_even = (part_right - part_left) + 1;
	1107	#endif
	1108	#if QSORT_ORDER_GUESS == 2
	1109	qsort_break_even *= 2;
	1110	#endif
	1111	#if QSORT_ORDER_GUESS == 3
	1112	int prev_break = qsort_break_even;
	1113	qsort_break_even *= qsort_break_even;
	1114	if (qsort_break_even < prev_break) {
	1115	qsort_break_even = (part_right - part_left) + 1;
	1116	}
	1117	#endif
	1118	} else {
	1119	qsort_break_even = QSORT_BREAK_EVEN;
	1120	}
	1121	#endif
	1122
	1123	if (part_left < pc_left) {
	1124	/* There are elements on the left which need more processing.
	1125	Check the right as well before deciding what to do.
	1126	*/
	1127	if (pc_right < part_right) {
	1128	/* We have two partitions to be sorted. Stack the biggest one
	1129	and process the smallest one on the next iteration. This
	1130	minimizes the stack height by insuring that any additional
	1131	stack entries must come from the smallest partition which
	1132	(because it is smallest) will have the fewest
	1133	opportunities to generate additional stack entries.
	1134	*/
	1135	if ((part_right - pc_right) > (pc_left - part_left)) {
	1136	/* stack the right partition, process the left */
	1137	partition_stack[next_stack_entry].left = pc_right + 1;
	1138	partition_stack[next_stack_entry].right = part_right;
	1139	#ifdef QSORT_ORDER_GUESS
	1140	partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
	1141	#endif
	1142	part_right = pc_left - 1;
	1143	} else {
	1144	/* stack the left partition, process the right */
	1145	partition_stack[next_stack_entry].left = part_left;
	1146	partition_stack[next_stack_entry].right = pc_left - 1;
	1147	#ifdef QSORT_ORDER_GUESS
	1148	partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
	1149	#endif
	1150	part_left = pc_right + 1;
	1151	}
	1152	qsort_assert(next_stack_entry < QSORT_MAX_STACK);
	1153	++next_stack_entry;
	1154	} else {
	1155	/* The elements on the left are the only remaining elements
	1156	that need sorting, arrange for them to be processed as the
	1157	next partition.
	1158	*/
	1159	part_right = pc_left - 1;
	1160	}
	1161	} else if (pc_right < part_right) {
	1162	/* There is only one chunk on the right to be sorted, make it
	1163	the new partition and loop back around.
	1164	*/
	1165	part_left = pc_right + 1;
	1166	} else {
	1167	/* This whole partition wound up in the pivot chunk, so
	1168	we need to get a new partition off the stack.
	1169	*/
	1170	if (next_stack_entry == 0) {
	1171	/* the stack is empty - we are done */
	1172	break;
	1173	}
	1174	--next_stack_entry;
	1175	part_left = partition_stack[next_stack_entry].left;
	1176	part_right = partition_stack[next_stack_entry].right;
	1177	#ifdef QSORT_ORDER_GUESS
	1178	qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
	1179	#endif
	1180	}
	1181	} else {
	1182	/* This partition is too small to fool with qsort complexity, just
	1183	do an ordinary insertion sort to minimize overhead.
	1184	*/
	1185	int i;
	1186	/* Assume 1st element is in right place already, and start checking
	1187	at 2nd element to see where it should be inserted.
	1188	*/
	1189	for (i = part_left + 1; i <= part_right; ++i) {
	1190	int j;
	1191	/* Scan (backwards - just in case 'i' is already in right place)
	1192	through the elements already sorted to see if the ith element
	1193	belongs ahead of one of them.
	1194	*/
	1195	for (j = i - 1; j >= part_left; --j) {
	1196	if (qsort_cmp(i, j) >= 0) {
	1197	/* i belongs right after j
	1198	*/
	1199	break;
	1200	}
	1201	}
	1202	++j;
	1203	if (j != i) {
	1204	/* Looks like we really need to move some things
	1205	*/
	1206	int k;
	1207	temp = array[i];
	1208	for (k = i - 1; k >= j; --k)
	1209	array[k + 1] = array[k];
	1210	array[j] = temp;
	1211	}
	1212	}
	1213
	1214	/* That partition is now sorted, grab the next one, or get out
	1215	of the loop if there aren't any more.
	1216	*/
	1217
	1218	if (next_stack_entry == 0) {
	1219	/* the stack is empty - we are done */
	1220	break;
	1221	}
	1222	--next_stack_entry;
	1223	part_left = partition_stack[next_stack_entry].left;
	1224	part_right = partition_stack[next_stack_entry].right;
	1225	#ifdef QSORT_ORDER_GUESS
	1226	qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
	1227	#endif
	1228	}
	1229	}
	1230
	1231	/* Believe it or not, the array is sorted at this point! */
	1232	}
	1233
	1234	/* Stabilize what is, presumably, an otherwise unstable sort method.
	1235	* We do that by allocating (or having on hand) an array of pointers
	1236	* that is the same size as the original array of elements to be sorted.
	1237	* We initialize this parallel array with the addresses of the original
	1238	* array elements. This indirection can make you crazy.
	1239	* Some pictures can help. After initializing, we have
	1240	*
	1241	* indir list1
	1242	* +----+ +----+
	1243	* \| \| --------------> \| \| ------> first element to be sorted
	1244	* +----+ +----+
	1245	* \| \| --------------> \| \| ------> second element to be sorted
	1246	* +----+ +----+
	1247	* \| \| --------------> \| \| ------> third element to be sorted
	1248	* +----+ +----+
	1249	* ...
	1250	* +----+ +----+
	1251	* \| \| --------------> \| \| ------> n-1st element to be sorted
	1252	* +----+ +----+
	1253	* \| \| --------------> \| \| ------> n-th element to be sorted
	1254	* +----+ +----+
	1255	*
	1256	* During the sort phase, we leave the elements of list1 where they are,
	1257	* and sort the pointers in the indirect array in the same order determined
	1258	* by the original comparison routine on the elements pointed to.
	1259	* Because we don't move the elements of list1 around through
	1260	* this phase, we can break ties on elements that compare equal
	1261	* using their address in the list1 array, ensuring stabilty.
	1262	* This leaves us with something looking like
	1263	*
	1264	* indir list1
	1265	* +----+ +----+
	1266	* \| \| --+ +---> \| \| ------> first element to be sorted
	1267	* +----+ \| \| +----+
	1268	* \| \| --\|-------\|---> \| \| ------> second element to be sorted
	1269	* +----+ \| \| +----+
	1270	* \| \| --\|-------+ +-> \| \| ------> third element to be sorted
	1271	* +----+ \| \| +----+
	1272	* ...
	1273	* +----+ \| \| \| \| +----+
	1274	* \| \| ---\|-+ \| +--> \| \| ------> n-1st element to be sorted
	1275	* +----+ \| \| +----+
	1276	* \| \| ---+ +----> \| \| ------> n-th element to be sorted
	1277	* +----+ +----+
	1278	*
	1279	* where the i-th element of the indirect array points to the element
	1280	* that should be i-th in the sorted array. After the sort phase,
	1281	* we have to put the elements of list1 into the places
	1282	* dictated by the indirect array.
	1283	*/
	1284
	1285	static SVCOMPARE_t RealCmp;
	1286
	1287	static I32
	1288	cmpindir(pTHX_ gptr a, gptr b)
	1289	{
	1290	I32 sense;
	1291	gptr ap = (gptr )a;
	1292	gptr bp = (gptr )b;
	1293
	1294	if ((sense = RealCmp(aTHX_ ap, bp)) == 0)
	1295	sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
	1296	return sense;
	1297	}
	1298
	1299	STATIC void
	1300	S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
	1301	{
	1302	SV **hintsvp;
	1303
	1304	if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
	1305	register gptr *pp, q;
	1306	register size_t n, j, i;
	1307	gptr small[SMALLSORT], *indir, tmp;
	1308	SVCOMPARE_t savecmp;
	1309	if (nmemb <= 1) return; /* sorted trivially */
	1310
	1311	/* Small arrays can use the stack, big ones must be allocated */
	1312	if (nmemb <= SMALLSORT) indir = small;
	1313	else { New(1799, indir, nmemb, gptr *); }
	1314
	1315	/* Copy pointers to original array elements into indirect array */
	1316	for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
	1317
	1318	savecmp = RealCmp; /* Save current comparison routine, if any */
	1319	RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
	1320
	1321	/* sort, with indirection */
	1322	S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
	1323
	1324	pp = indir;
	1325	q = list1;
	1326	for (n = nmemb; n--; ) {
	1327	/* Assert A: all elements of q with index > n are already
	1328	* in place. This is vacuosly true at the start, and we
	1329	* put element n where it belongs below (if it wasn't
	1330	* already where it belonged). Assert B: we only move
	1331	* elements that aren't where they belong,
	1332	* so, by A, we never tamper with elements above n.
	1333	*/
	1334	j = pp[n] - q; /* This sets j so that q[j] is
	1335	* at pp[n]. *pp[j] belongs in
	1336	* q[j], by construction.
	1337	*/
	1338	if (n != j) { /* all's well if n == j */
	1339	tmp = q[j]; /* save what's in q[j] */
	1340	do {
	1341	q[j] = pp[j]; / put pp[j] where it belongs /
	1342	i = pp[j] - q; /* the index in q of the element
	1343	* just moved */
	1344	pp[j] = q + j; /* this is ok now */
	1345	} while ((j = i) != n);
	1346	/* There are only finitely many (nmemb) addresses
	1347	* in the pp array.
	1348	* So we must eventually revisit an index we saw before.
	1349	* Suppose the first revisited index is k != n.
	1350	* An index is visited because something else belongs there.
	1351	* If we visit k twice, then two different elements must
	1352	* belong in the same place, which cannot be.
	1353	* So j must get back to n, the loop terminates,
	1354	* and we put the saved element where it belongs.
	1355	*/
	1356	q[n] = tmp; /* put what belongs into
	1357	* the n-th element */
	1358	}
	1359	}
	1360
	1361	/* free iff allocated */
	1362	if (indir != small) { Safefree(indir); }
	1363	/* restore prevailing comparison routine */
	1364	RealCmp = savecmp;
	1365	} else {
	1366	S_qsortsvu(aTHX_ list1, nmemb, cmp);
	1367	}
	1368	}
	1369
	1370	/*
	1371	=for apidoc sortsv
	1372
	1373	Sort an array. Here is an example:
	1374
	1375	sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
	1376
	1377	=cut
	1378	*/
	1379
	1380	void
	1381	Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
	1382	{
	1383	void (sortsvp)(pTHX_ SV *array, size_t nmemb, SVCOMPARE_t cmp) =
	1384	S_mergesortsv;
	1385	SV **hintsvp;
	1386	I32 hints;
	1387
	1388	if ((hints = SORTHINTS(hintsvp))) {
	1389	if (hints & HINT_SORT_QUICKSORT)
	1390	sortsvp = S_qsortsv;
	1391	else {
	1392	if (hints & HINT_SORT_MERGESORT)
	1393	sortsvp = S_mergesortsv;
	1394	else
	1395	sortsvp = S_mergesortsv;
	1396	}
	1397	}
	1398
	1399	sortsvp(aTHX_ array, nmemb, cmp);
	1400	}
	1401
	1402	PP(pp_sort)
	1403	{
	1404	dSP; dMARK; dORIGMARK;
	1405	register SV **up;
	1406	SV **myorigmark = ORIGMARK;
	1407	register I32 max;
	1408	HV *stash;
	1409	GV *gv;
	1410	CV *cv = 0;
	1411	I32 gimme = GIMME;
	1412	OP* nextop = PL_op->op_next;
	1413	I32 overloading = 0;
	1414	bool hasargs = FALSE;
	1415	I32 is_xsub = 0;
	1416
	1417	if (gimme != G_ARRAY) {
	1418	SP = MARK;
	1419	RETPUSHUNDEF;
	1420	}
	1421
	1422	ENTER;
	1423	SAVEVPTR(PL_sortcop);
	1424	if (PL_op->op_flags & OPf_STACKED) {
	1425	if (PL_op->op_flags & OPf_SPECIAL) {
	1426	OP kid = cLISTOP->op_first->op_sibling; / pass pushmark */
	1427	kid = kUNOP->op_first; /* pass rv2gv */
	1428	kid = kUNOP->op_first; /* pass leave */
	1429	PL_sortcop = kid->op_next;
	1430	stash = CopSTASH(PL_curcop);
	1431	}
	1432	else {
	1433	cv = sv_2cv(*++MARK, &stash, &gv, 0);
	1434	if (cv && SvPOK(cv)) {
	1435	STRLEN n_a;
	1436	char proto = SvPV((SV)cv, n_a);
	1437	if (proto && strEQ(proto, "$$")) {
	1438	hasargs = TRUE;
	1439	}
	1440	}
	1441	if (!(cv && CvROOT(cv))) {
	1442	if (cv && CvXSUB(cv)) {
	1443	is_xsub = 1;
	1444	}
	1445	else if (gv) {
	1446	SV *tmpstr = sv_newmortal();
	1447	gv_efullname3(tmpstr, gv, Nullch);
	1448	DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
	1449	SvPVX(tmpstr));
	1450	}
	1451	else {
	1452	DIE(aTHX_ "Undefined subroutine in sort");
	1453	}
	1454	}
	1455
	1456	if (is_xsub)
	1457	PL_sortcop = (OP*)cv;
	1458	else {
	1459	PL_sortcop = CvSTART(cv);
	1460	SAVEVPTR(CvROOT(cv)->op_ppaddr);
	1461	CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
	1462
	1463	SAVEVPTR(PL_curpad);
	1464	PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
	1465	}
	1466	}
	1467	}
	1468	else {
	1469	PL_sortcop = Nullop;
	1470	stash = CopSTASH(PL_curcop);
	1471	}
	1472
	1473	up = myorigmark + 1;
	1474	while (MARK < SP) { /* This may or may not shift down one here. */
	1475	/SUPPRESS 560/
	1476	if ((up = ++MARK)) { /* Weed out nulls. */
	1477	SvTEMP_off(*up);
	1478	if (!PL_sortcop && !SvPOK(*up)) {
	1479	STRLEN n_a;
	1480	if (SvAMAGIC(*up))
	1481	overloading = 1;
	1482	else
	1483	(void)sv_2pv(*up, &n_a);
	1484	}
	1485	up++;
	1486	}
	1487	}
	1488	max = --up - myorigmark;
	1489	if (PL_sortcop) {
	1490	if (max > 1) {
	1491	PERL_CONTEXT *cx;
	1492	SV** newsp;
	1493	bool oldcatch = CATCH_GET;
	1494
	1495	SAVETMPS;
	1496	SAVEOP();
	1497
	1498	CATCH_SET(TRUE);
	1499	PUSHSTACKi(PERLSI_SORT);
	1500	if (!hasargs && !is_xsub) {
	1501	if (PL_sortstash != stash \|\| !PL_firstgv \|\| !PL_secondgv) {
	1502	SAVESPTR(PL_firstgv);
	1503	SAVESPTR(PL_secondgv);
	1504	PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
	1505	PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
	1506	PL_sortstash = stash;
	1507	}
	1508	#ifdef USE_5005THREADS
	1509	sv_lock((SV *)PL_firstgv);
	1510	sv_lock((SV *)PL_secondgv);
	1511	#endif
	1512	SAVESPTR(GvSV(PL_firstgv));
	1513	SAVESPTR(GvSV(PL_secondgv));
	1514	}
	1515
	1516	PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
	1517	if (!(PL_op->op_flags & OPf_SPECIAL)) {
	1518	cx->cx_type = CXt_SUB;
	1519	cx->blk_gimme = G_SCALAR;
	1520	PUSHSUB(cx);
	1521	if (!CvDEPTH(cv))
	1522	(void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
	1523	}
	1524	PL_sortcxix = cxstack_ix;
	1525
	1526	if (hasargs && !is_xsub) {
	1527	/* This is mostly copied from pp_entersub */
	1528	AV av = (AV)PL_curpad[0];
	1529
	1530	#ifndef USE_5005THREADS
	1531	cx->blk_sub.savearray = GvAV(PL_defgv);
	1532	GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
	1533	#endif /* USE_5005THREADS */
	1534	cx->blk_sub.oldcurpad = PL_curpad;
	1535	cx->blk_sub.argarray = av;
	1536	}
	1537	sortsv((myorigmark+1), max,
	1538	is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
	1539
	1540	POPBLOCK(cx,PL_curpm);
	1541	PL_stack_sp = newsp;
	1542	POPSTACK;
	1543	CATCH_SET(oldcatch);
	1544	}
	1545	}
	1546	else {
	1547	if (max > 1) {
	1548	MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
	1549	sortsv(ORIGMARK+1, max,
	1550	(PL_op->op_private & OPpSORT_NUMERIC)
	1551	? ( (PL_op->op_private & OPpSORT_INTEGER)
	1552	? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
	1553	: ( overloading ? amagic_ncmp : sv_ncmp))
	1554	: ( IN_LOCALE_RUNTIME
	1555	? ( overloading
	1556	? amagic_cmp_locale
	1557	: sv_cmp_locale_static)
	1558	: ( overloading ? amagic_cmp : sv_cmp_static)));
	1559	if (PL_op->op_private & OPpSORT_REVERSE) {
	1560	SV **p = ORIGMARK+1;
	1561	SV **q = ORIGMARK+max;
	1562	while (p < q) {
	1563	SV tmp = p;
	1564	p++ = q;
	1565	*q-- = tmp;
	1566	}
	1567	}
	1568	}
	1569	}
	1570	LEAVE;
	1571	PL_stack_sp = ORIGMARK + max;
	1572	return nextop;
	1573	}
	1574
	1575	static I32
	1576	sortcv(pTHX_ SV a, SV b)
	1577	{
	1578	I32 oldsaveix = PL_savestack_ix;
	1579	I32 oldscopeix = PL_scopestack_ix;
	1580	I32 result;
	1581	GvSV(PL_firstgv) = a;
	1582	GvSV(PL_secondgv) = b;
	1583	PL_stack_sp = PL_stack_base;
	1584	PL_op = PL_sortcop;
	1585	CALLRUNOPS(aTHX);
	1586	if (PL_stack_sp != PL_stack_base + 1)
	1587	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1588	if (!SvNIOKp(*PL_stack_sp))
	1589	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1590	result = SvIV(*PL_stack_sp);
	1591	while (PL_scopestack_ix > oldscopeix) {
	1592	LEAVE;
	1593	}
	1594	leave_scope(oldsaveix);
	1595	return result;
	1596	}
	1597
	1598	static I32
	1599	sortcv_stacked(pTHX_ SV a, SV b)
	1600	{
	1601	I32 oldsaveix = PL_savestack_ix;
	1602	I32 oldscopeix = PL_scopestack_ix;
	1603	I32 result;
	1604	AV *av;
	1605
	1606	#ifdef USE_5005THREADS
	1607	av = (AV*)PL_curpad[0];
	1608	#else
	1609	av = GvAV(PL_defgv);
	1610	#endif
	1611
	1612	if (AvMAX(av) < 1) {
	1613	SV** ary = AvALLOC(av);
	1614	if (AvARRAY(av) != ary) {
	1615	AvMAX(av) += AvARRAY(av) - AvALLOC(av);
	1616	SvPVX(av) = (char*)ary;
	1617	}
	1618	if (AvMAX(av) < 1) {
	1619	AvMAX(av) = 1;
	1620	Renew(ary,2,SV*);
	1621	SvPVX(av) = (char*)ary;
	1622	}
	1623	}
	1624	AvFILLp(av) = 1;
	1625
	1626	AvARRAY(av)[0] = a;
	1627	AvARRAY(av)[1] = b;
	1628	PL_stack_sp = PL_stack_base;
	1629	PL_op = PL_sortcop;
	1630	CALLRUNOPS(aTHX);
	1631	if (PL_stack_sp != PL_stack_base + 1)
	1632	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1633	if (!SvNIOKp(*PL_stack_sp))
	1634	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1635	result = SvIV(*PL_stack_sp);
	1636	while (PL_scopestack_ix > oldscopeix) {
	1637	LEAVE;
	1638	}
	1639	leave_scope(oldsaveix);
	1640	return result;
	1641	}
	1642
	1643	static I32
	1644	sortcv_xsub(pTHX_ SV a, SV b)
	1645	{
	1646	dSP;
	1647	I32 oldsaveix = PL_savestack_ix;
	1648	I32 oldscopeix = PL_scopestack_ix;
	1649	I32 result;
	1650	CV cv=(CV)PL_sortcop;
	1651
	1652	SP = PL_stack_base;
	1653	PUSHMARK(SP);
	1654	EXTEND(SP, 2);
	1655	*++SP = a;
	1656	*++SP = b;
	1657	PUTBACK;
	1658	(void)(*CvXSUB(cv))(aTHX_ cv);
	1659	if (PL_stack_sp != PL_stack_base + 1)
	1660	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1661	if (!SvNIOKp(*PL_stack_sp))
	1662	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1663	result = SvIV(*PL_stack_sp);
	1664	while (PL_scopestack_ix > oldscopeix) {
	1665	LEAVE;
	1666	}
	1667	leave_scope(oldsaveix);
	1668	return result;
	1669	}
	1670
	1671
	1672	static I32
	1673	sv_ncmp(pTHX_ SV a, SV b)
	1674	{
	1675	NV nv1 = SvNV(a);
	1676	NV nv2 = SvNV(b);
	1677	return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
	1678	}
	1679
	1680	static I32
	1681	sv_i_ncmp(pTHX_ SV a, SV b)
	1682	{
	1683	IV iv1 = SvIV(a);
	1684	IV iv2 = SvIV(b);
	1685	return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
	1686	}
	1687	#define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
	1688	*svp = Nullsv; \
	1689	if (PL_amagic_generation) { \
	1690	if (SvAMAGIC(left)\|\|SvAMAGIC(right))\
	1691	*svp = amagic_call(left, \
	1692	right, \
	1693	CAT2(meth,_amg), \
	1694	0); \
	1695	} \
	1696	} STMT_END
	1697
	1698	static I32
	1699	amagic_ncmp(pTHX_ register SV a, register SV b)
	1700	{
	1701	SV *tmpsv;
	1702	tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
	1703	if (tmpsv) {
	1704	NV d;
	1705
	1706	if (SvIOK(tmpsv)) {
	1707	I32 i = SvIVX(tmpsv);
	1708	if (i > 0)
	1709	return 1;
	1710	return i? -1 : 0;
	1711	}
	1712	d = SvNV(tmpsv);
	1713	if (d > 0)
	1714	return 1;
	1715	return d? -1 : 0;
	1716	}
	1717	return sv_ncmp(aTHX_ a, b);
	1718	}
	1719
	1720	static I32
	1721	amagic_i_ncmp(pTHX_ register SV a, register SV b)
	1722	{
	1723	SV *tmpsv;
	1724	tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
	1725	if (tmpsv) {
	1726	NV d;
	1727
	1728	if (SvIOK(tmpsv)) {
	1729	I32 i = SvIVX(tmpsv);
	1730	if (i > 0)
	1731	return 1;
	1732	return i? -1 : 0;
	1733	}
	1734	d = SvNV(tmpsv);
	1735	if (d > 0)
	1736	return 1;
	1737	return d? -1 : 0;
	1738	}
	1739	return sv_i_ncmp(aTHX_ a, b);
	1740	}
	1741
	1742	static I32
	1743	amagic_cmp(pTHX_ register SV str1, register SV str2)
	1744	{
	1745	SV *tmpsv;
	1746	tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
	1747	if (tmpsv) {
	1748	NV d;
	1749
	1750	if (SvIOK(tmpsv)) {
	1751	I32 i = SvIVX(tmpsv);
	1752	if (i > 0)
	1753	return 1;
	1754	return i? -1 : 0;
	1755	}
	1756	d = SvNV(tmpsv);
	1757	if (d > 0)
	1758	return 1;
	1759	return d? -1 : 0;
	1760	}
	1761	return sv_cmp(str1, str2);
	1762	}
	1763
	1764	static I32
	1765	amagic_cmp_locale(pTHX_ register SV str1, register SV str2)
	1766	{
	1767	SV *tmpsv;
	1768	tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
	1769	if (tmpsv) {
	1770	NV d;
	1771
	1772	if (SvIOK(tmpsv)) {
	1773	I32 i = SvIVX(tmpsv);
	1774	if (i > 0)
	1775	return 1;
	1776	return i? -1 : 0;
	1777	}
	1778	d = SvNV(tmpsv);
	1779	if (d > 0)
	1780	return 1;
	1781	return d? -1 : 0;
	1782	}
	1783	return sv_cmp_locale(str1, str2);
	1784	}