perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* pp_sort.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
	4	* 2000, 2001, 2002, 2003, by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* ...they shuffled back towards the rear of the line. 'No, not at the
	13	* rear!' the slave-driver shouted. 'Three files up. And stay there...
	14	*/
	15
	16	#include "EXTERN.h"
	17	#define PERL_IN_PP_SORT_C
	18	#include "perl.h"
	19
	20	#if defined(UNDER_CE)
	21	/* looks like 'small' is reserved word for WINCE (or somesuch)*/
	22	#define small xsmall
	23	#endif
	24
	25	static I32 sortcv(pTHX_ SV a, SV b);
	26	static I32 sortcv_stacked(pTHX_ SV a, SV b);
	27	static I32 sortcv_xsub(pTHX_ SV a, SV b);
	28	static I32 sv_ncmp(pTHX_ SV a, SV b);
	29	static I32 sv_i_ncmp(pTHX_ SV a, SV b);
	30	static I32 amagic_ncmp(pTHX_ SV a, SV b);
	31	static I32 amagic_i_ncmp(pTHX_ SV a, SV b);
	32	static I32 amagic_cmp(pTHX_ SV a, SV b);
	33	static I32 amagic_cmp_locale(pTHX_ SV a, SV b);
	34
	35	#define sv_cmp_static Perl_sv_cmp
	36	#define sv_cmp_locale_static Perl_sv_cmp_locale
	37
	38	#define SORTHINTS(hintsv) \
	39	(((hintsv) = GvSV(gv_fetchpv("sort::hints", GV_ADDMULTI, SVt_IV))), \
	40	(SvIOK(hintsv) ? ((I32)SvIV(hintsv)) : 0))
	41
	42	#ifndef SMALLSORT
	43	#define SMALLSORT (200)
	44	#endif
	45
	46	/*
	47	* The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
	48	*
	49	* The original code was written in conjunction with BSD Computer Software
	50	* Research Group at University of California, Berkeley.
	51	*
	52	* See also: "Optimistic Merge Sort" (SODA '92)
	53	*
	54	* The integration to Perl is by John P. Linderman <jpl@research.att.com>.
	55	*
	56	* The code can be distributed under the same terms as Perl itself.
	57	*
	58	*/
	59
	60
	61	typedef char * aptr; /* pointer for arithmetic on sizes */
	62	typedef SV * gptr; /* pointers in our lists */
	63
	64	/* Binary merge internal sort, with a few special mods
	65	** for the special perl environment it now finds itself in.
	66	**
	67	** Things that were once options have been hotwired
	68	** to values suitable for this use. In particular, we'll always
	69	** initialize looking for natural runs, we'll always produce stable
	70	** output, and we'll always do Peter McIlroy's binary merge.
	71	*/
	72
	73	/* Pointer types for arithmetic and storage and convenience casts */
	74
	75	#define APTR(P) ((aptr)(P))
	76	#define GPTP(P) ((gptr *)(P))
	77	#define GPPP(P) ((gptr **)(P))
	78
	79
	80	/* byte offset from pointer P to (larger) pointer Q */
	81	#define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
	82
	83	#define PSIZE sizeof(gptr)
	84
	85	/* If PSIZE is power of 2, make PSHIFT that power, if that helps */
	86
	87	#ifdef PSHIFT
	88	#define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT))
	89	#define PNBYTE(N) ((N) << (PSHIFT))
	90	#define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N)))
	91	#else
	92	/* Leave optimization to compiler */
	93	#define PNELEM(P, Q) (GPTP(Q) - GPTP(P))
	94	#define PNBYTE(N) ((N) * (PSIZE))
	95	#define PINDEX(P, N) (GPTP(P) + (N))
	96	#endif
	97
	98	/* Pointer into other corresponding to pointer into this */
	99	#define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
	100
	101	#define FROMTOUPTO(src, dst, lim) do dst++ = src++; while(src<lim)
	102
	103
	104	/* Runs are identified by a pointer in the auxilliary list.
	105	** The pointer is at the start of the list,
	106	** and it points to the start of the next list.
	107	** NEXT is used as an lvalue, too.
	108	*/
	109
	110	#define NEXT(P) (*GPPP(P))
	111
	112
	113	/* PTHRESH is the minimum number of pairs with the same sense to justify
	114	** checking for a run and extending it. Note that PTHRESH counts PAIRS,
	115	** not just elements, so PTHRESH == 8 means a run of 16.
	116	*/
	117
	118	#define PTHRESH (8)
	119
	120	/* RTHRESH is the number of elements in a run that must compare low
	121	** to the low element from the opposing run before we justify
	122	** doing a binary rampup instead of single stepping.
	123	** In random input, N in a row low should only happen with
	124	** probability 2^(1-N), so we can risk that we are dealing
	125	** with orderly input without paying much when we aren't.
	126	*/
	127
	128	#define RTHRESH (6)
	129
	130
	131	/*
	132	** Overview of algorithm and variables.
	133	** The array of elements at list1 will be organized into runs of length 2,
	134	** or runs of length >= 2 * PTHRESH. We only try to form long runs when
	135	** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
	136	**
	137	** Unless otherwise specified, pair pointers address the first of two elements.
	138	**
	139	** b and b+1 are a pair that compare with sense ``sense''.
	140	** b is the ``bottom'' of adjacent pairs that might form a longer run.
	141	**
	142	** p2 parallels b in the list2 array, where runs are defined by
	143	** a pointer chain.
	144	**
	145	** t represents the ``top'' of the adjacent pairs that might extend
	146	** the run beginning at b. Usually, t addresses a pair
	147	** that compares with opposite sense from (b,b+1).
	148	** However, it may also address a singleton element at the end of list1,
	149	** or it may be equal to ``last'', the first element beyond list1.
	150	**
	151	** r addresses the Nth pair following b. If this would be beyond t,
	152	** we back it off to t. Only when r is less than t do we consider the
	153	** run long enough to consider checking.
	154	**
	155	** q addresses a pair such that the pairs at b through q already form a run.
	156	** Often, q will equal b, indicating we only are sure of the pair itself.
	157	** However, a search on the previous cycle may have revealed a longer run,
	158	** so q may be greater than b.
	159	**
	160	** p is used to work back from a candidate r, trying to reach q,
	161	** which would mean b through r would be a run. If we discover such a run,
	162	** we start q at r and try to push it further towards t.
	163	** If b through r is NOT a run, we detect the wrong order at (p-1,p).
	164	** In any event, after the check (if any), we have two main cases.
	165	**
	166	** 1) Short run. b <= q < p <= r <= t.
	167	** b through q is a run (perhaps trivial)
	168	** q through p are uninteresting pairs
	169	** p through r is a run
	170	**
	171	** 2) Long run. b < r <= q < t.
	172	** b through q is a run (of length >= 2 * PTHRESH)
	173	**
	174	** Note that degenerate cases are not only possible, but likely.
	175	** For example, if the pair following b compares with opposite sense,
	176	** then b == q < p == r == t.
	177	*/
	178
	179
	180	static IV
	181	dynprep(pTHX_ gptr list1, gptr list2, size_t nmemb, SVCOMPARE_t cmp)
	182	{
	183	I32 sense;
	184	register gptr b, p, q, t, *p2;
	185	register gptr c, last, r;
	186	gptr *savep;
	187	IV runs = 0;
	188
	189	b = list1;
	190	last = PINDEX(b, nmemb);
	191	sense = (cmp(aTHX_ b, (b+1)) > 0);
	192	for (p2 = list2; b < last; ) {
	193	/* We just started, or just reversed sense.
	194	** Set t at end of pairs with the prevailing sense.
	195	*/
	196	for (p = b+2, t = p; ++p < last; t = ++p) {
	197	if ((cmp(aTHX_ t, p) > 0) != sense) break;
	198	}
	199	q = b;
	200	/* Having laid out the playing field, look for long runs */
	201	do {
	202	p = r = b + (2 * PTHRESH);
	203	if (r >= t) p = r = t; /* too short to care about */
	204	else {
	205	while (((cmp(aTHX_ (p-1), p) > 0) == sense) &&
	206	((p -= 2) > q));
	207	if (p <= q) {
	208	/* b through r is a (long) run.
	209	** Extend it as far as possible.
	210	*/
	211	p = q = r;
	212	while (((p += 2) < t) &&
	213	((cmp(aTHX_ (p-1), p) > 0) == sense)) q = p;
	214	r = p = q + 2; /* no simple pairs, no after-run */
	215	}
	216	}
	217	if (q > b) { /* run of greater than 2 at b */
	218	savep = p;
	219	p = q += 2;
	220	/* pick up singleton, if possible */
	221	if ((p == t) &&
	222	((t + 1) == last) &&
	223	((cmp(aTHX_ (p-1), p) > 0) == sense))
	224	savep = r = p = q = last;
	225	p2 = NEXT(p2) = p2 + (p - b); ++runs;
	226	if (sense) while (b < --p) {
	227	c = *b;
	228	b++ = p;
	229	*p = c;
	230	}
	231	p = savep;
	232	}
	233	while (q < p) { /* simple pairs */
	234	p2 = NEXT(p2) = p2 + 2; ++runs;
	235	if (sense) {
	236	c = *q++;
	237	(q-1) = q;
	238	*q++ = c;
	239	} else q += 2;
	240	}
	241	if (((b = p) == t) && ((t+1) == last)) {
	242	NEXT(p2) = p2 + 1; ++runs;
	243	b++;
	244	}
	245	q = r;
	246	} while (b < t);
	247	sense = !sense;
	248	}
	249	return runs;
	250	}
	251
	252
	253	/* The original merge sort, in use since 5.7, was as fast as, or faster than,
	254	* qsort on many platforms, but slower than qsort, conspicuously so,
	255	* on others. The most likely explanation was platform-specific
	256	* differences in cache sizes and relative speeds.
	257	*
	258	* The quicksort divide-and-conquer algorithm guarantees that, as the
	259	* problem is subdivided into smaller and smaller parts, the parts
	260	* fit into smaller (and faster) caches. So it doesn't matter how
	261	* many levels of cache exist, quicksort will "find" them, and,
	262	* as long as smaller is faster, take advanatge of them.
	263	*
	264	* By contrast, consider how the original mergesort algorithm worked.
	265	* Suppose we have five runs (each typically of length 2 after dynprep).
	266	*
	267	* pass base aux
	268	* 0 1 2 3 4 5
	269	* 1 12 34 5
	270	* 2 1234 5
	271	* 3 12345
	272	* 4 12345
	273	*
	274	* Adjacent pairs are merged in "grand sweeps" through the input.
	275	* This means, on pass 1, the records in runs 1 and 2 aren't revisited until
	276	* runs 3 and 4 are merged and the runs from run 5 have been copied.
	277	* The only cache that matters is one large enough to hold all the input.
	278	* On some platforms, this may be many times slower than smaller caches.
	279	*
	280	* The following pseudo-code uses the same basic merge algorithm,
	281	* but in a divide-and-conquer way.
	282	*
	283	* # merge $runs runs at offset $offset of list $list1 into $list2.
	284	* # all unmerged runs ($runs == 1) originate in list $base.
	285	* sub mgsort2 {
	286	* my ($offset, $runs, $base, $list1, $list2) = @_;
	287	*
	288	* if ($runs == 1) {
	289	* if ($list1 is $base) copy run to $list2
	290	* return offset of end of list (or copy)
	291	* } else {
	292	* $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
	293	* mgsort2($off2, $runs/2, $base, $list2, $list1)
	294	* merge the adjacent runs at $offset of $list1 into $list2
	295	* return the offset of the end of the merged runs
	296	* }
	297	* }
	298	* mgsort2(0, $runs, $base, $aux, $base);
	299	*
	300	* For our 5 runs, the tree of calls looks like
	301	*
	302	* 5
	303	* 3 2
	304	* 2 1 1 1
	305	* 1 1
	306	*
	307	* 1 2 3 4 5
	308	*
	309	* and the corresponding activity looks like
	310	*
	311	* copy runs 1 and 2 from base to aux
	312	* merge runs 1 and 2 from aux to base
	313	* (run 3 is where it belongs, no copy needed)
	314	* merge runs 12 and 3 from base to aux
	315	* (runs 4 and 5 are where they belong, no copy needed)
	316	* merge runs 4 and 5 from base to aux
	317	* merge runs 123 and 45 from aux to base
	318	*
	319	* Note that we merge runs 1 and 2 immediately after copying them,
	320	* while they are still likely to be in fast cache. Similarly,
	321	* run 3 is merged with run 12 while it still may be lingering in cache.
	322	* This implementation should therefore enjoy much of the cache-friendly
	323	* behavior that quicksort does. In addition, it does less copying
	324	* than the original mergesort implementation (only runs 1 and 2 are copied)
	325	* and the "balancing" of merges is better (merged runs comprise more nearly
	326	* equal numbers of original runs).
	327	*
	328	* The actual cache-friendly implementation will use a pseudo-stack
	329	* to avoid recursion, and will unroll processing of runs of length 2,
	330	* but it is otherwise similar to the recursive implementation.
	331	*/
	332
	333	typedef struct {
	334	IV offset; /* offset of 1st of 2 runs at this level */
	335	IV runs; /* how many runs must be combined into 1 */
	336	} off_runs; /* pseudo-stack element */
	337
	338	STATIC void
	339	S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
	340	{
	341	IV i, run, runs, offset;
	342	I32 sense, level;
	343	int iwhich;
	344	register gptr f1, f2, t, b, p, tp2, l1, l2, *q;
	345	gptr aux, list1, *list2;
	346	gptr *p1;
	347	gptr small[SMALLSORT];
	348	gptr *which[3];
	349	off_runs stack[60], *stackp;
	350
	351	if (nmemb <= 1) return; /* sorted trivially */
	352	if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
	353	else { New(799,aux,nmemb,gptr); } /* allocate auxilliary array */
	354	level = 0;
	355	stackp = stack;
	356	stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
	357	stackp->offset = offset = 0;
	358	which[0] = which[2] = base;
	359	which[1] = aux;
	360	for (;;) {
	361	/* On levels where both runs have be constructed (stackp->runs == 0),
	362	* merge them, and note the offset of their end, in case the offset
	363	* is needed at the next level up. Hop up a level, and,
	364	* as long as stackp->runs is 0, keep merging.
	365	*/
	366	if ((runs = stackp->runs) == 0) {
	367	iwhich = level & 1;
	368	list1 = which[iwhich]; /* area where runs are now */
	369	list2 = which[++iwhich]; /* area for merged runs */
	370	do {
	371	offset = stackp->offset;
	372	f1 = p1 = list1 + offset; /* start of first run */
	373	p = tp2 = list2 + offset; /* where merged run will go */
	374	t = NEXT(p); /* where first run ends */
	375	f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
	376	t = NEXT(t); /* where second runs ends */
	377	l2 = POTHER(t, list2, list1); /* ... on the other side */
	378	offset = PNELEM(list2, t);
	379	while (f1 < l1 && f2 < l2) {
	380	/* If head 1 is larger than head 2, find ALL the elements
	381	** in list 2 strictly less than head1, write them all,
	382	** then head 1. Then compare the new heads, and repeat,
	383	** until one or both lists are exhausted.
	384	**
	385	** In all comparisons (after establishing
	386	** which head to merge) the item to merge
	387	** (at pointer q) is the first operand of
	388	** the comparison. When we want to know
	389	** if ``q is strictly less than the other'',
	390	** we can't just do
	391	** cmp(q, other) < 0
	392	** because stability demands that we treat equality
	393	** as high when q comes from l2, and as low when
	394	** q was from l1. So we ask the question by doing
	395	** cmp(q, other) <= sense
	396	** and make sense == 0 when equality should look low,
	397	** and -1 when equality should look high.
	398	*/
	399
	400
	401	if (cmp(aTHX_ f1, f2) <= 0) {
	402	q = f2; b = f1; t = l1;
	403	sense = -1;
	404	} else {
	405	q = f1; b = f2; t = l2;
	406	sense = 0;
	407	}
	408
	409
	410	/* ramp up
	411	**
	412	** Leave t at something strictly
	413	** greater than q (or at the end of the list),
	414	** and b at something strictly less than q.
	415	*/
	416	for (i = 1, run = 0 ;;) {
	417	if ((p = PINDEX(b, i)) >= t) {
	418	/* off the end */
	419	if (((p = PINDEX(t, -1)) > b) &&
	420	(cmp(aTHX_ q, p) <= sense))
	421	t = p;
	422	else b = p;
	423	break;
	424	} else if (cmp(aTHX_ q, p) <= sense) {
	425	t = p;
	426	break;
	427	} else b = p;
	428	if (++run >= RTHRESH) i += i;
	429	}
	430
	431
	432	/* q is known to follow b and must be inserted before t.
	433	** Increment b, so the range of possibilities is [b,t).
	434	** Round binary split down, to favor early appearance.
	435	** Adjust b and t until q belongs just before t.
	436	*/
	437
	438	b++;
	439	while (b < t) {
	440	p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
	441	if (cmp(aTHX_ q, p) <= sense) {
	442	t = p;
	443	} else b = p + 1;
	444	}
	445
	446
	447	/* Copy all the strictly low elements */
	448
	449	if (q == f1) {
	450	FROMTOUPTO(f2, tp2, t);
	451	tp2++ = f1++;
	452	} else {
	453	FROMTOUPTO(f1, tp2, t);
	454	tp2++ = f2++;
	455	}
	456	}
	457
	458
	459	/* Run out remaining list */
	460	if (f1 == l1) {
	461	if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
	462	} else FROMTOUPTO(f1, tp2, l1);
	463	p1 = NEXT(p1) = POTHER(tp2, list2, list1);
	464
	465	if (--level == 0) goto done;
	466	--stackp;
	467	t = list1; list1 = list2; list2 = t; /* swap lists */
	468	} while ((runs = stackp->runs) == 0);
	469	}
	470
	471
	472	stackp->runs = 0; /* current run will finish level */
	473	/* While there are more than 2 runs remaining,
	474	* turn them into exactly 2 runs (at the "other" level),
	475	* each made up of approximately half the runs.
	476	* Stack the second half for later processing,
	477	* and set about producing the first half now.
	478	*/
	479	while (runs > 2) {
	480	++level;
	481	++stackp;
	482	stackp->offset = offset;
	483	runs -= stackp->runs = runs / 2;
	484	}
	485	/* We must construct a single run from 1 or 2 runs.
	486	* All the original runs are in which[0] == base.
	487	* The run we construct must end up in which[level&1].
	488	*/
	489	iwhich = level & 1;
	490	if (runs == 1) {
	491	/* Constructing a single run from a single run.
	492	* If it's where it belongs already, there's nothing to do.
	493	* Otherwise, copy it to where it belongs.
	494	* A run of 1 is either a singleton at level 0,
	495	* or the second half of a split 3. In neither event
	496	* is it necessary to set offset. It will be set by the merge
	497	* that immediately follows.
	498	*/
	499	if (iwhich) { /* Belongs in aux, currently in base */
	500	f1 = b = PINDEX(base, offset); /* where list starts */
	501	f2 = PINDEX(aux, offset); /* where list goes */
	502	t = NEXT(f2); /* where list will end */
	503	offset = PNELEM(aux, t); /* offset thereof */
	504	t = PINDEX(base, offset); /* where it currently ends */
	505	FROMTOUPTO(f1, f2, t); /* copy */
	506	NEXT(b) = t; /* set up parallel pointer */
	507	} else if (level == 0) goto done; /* single run at level 0 */
	508	} else {
	509	/* Constructing a single run from two runs.
	510	* The merge code at the top will do that.
	511	* We need only make sure the two runs are in the "other" array,
	512	* so they'll end up in the correct array after the merge.
	513	*/
	514	++level;
	515	++stackp;
	516	stackp->offset = offset;
	517	stackp->runs = 0; /* take care of both runs, trigger merge */
	518	if (!iwhich) { /* Merged runs belong in aux, copy 1st */
	519	f1 = b = PINDEX(base, offset); /* where first run starts */
	520	f2 = PINDEX(aux, offset); /* where it will be copied */
	521	t = NEXT(f2); /* where first run will end */
	522	offset = PNELEM(aux, t); /* offset thereof */
	523	p = PINDEX(base, offset); /* end of first run */
	524	t = NEXT(t); /* where second run will end */
	525	t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
	526	FROMTOUPTO(f1, f2, t); /* copy both runs */
	527	NEXT(b) = p; /* paralled pointer for 1st */
	528	NEXT(p) = t; /* ... and for second */
	529	}
	530	}
	531	}
	532	done:
	533	if (aux != small) Safefree(aux); /* free iff allocated */
	534	return;
	535	}
	536
	537	/*
	538	* The quicksort implementation was derived from source code contributed
	539	* by Tom Horsley.
	540	*
	541	* NOTE: this code was derived from Tom Horsley's qsort replacement
	542	* and should not be confused with the original code.
	543	*/
	544
	545	/* Copyright (C) Tom Horsley, 1997. All rights reserved.
	546
	547	Permission granted to distribute under the same terms as perl which are
	548	(briefly):
	549
	550	This program is free software; you can redistribute it and/or modify
	551	it under the terms of either:
	552
	553	a) the GNU General Public License as published by the Free
	554	Software Foundation; either version 1, or (at your option) any
	555	later version, or
	556
	557	b) the "Artistic License" which comes with this Kit.
	558
	559	Details on the perl license can be found in the perl source code which
	560	may be located via the www.perl.com web page.
	561
	562	This is the most wonderfulest possible qsort I can come up with (and
	563	still be mostly portable) My (limited) tests indicate it consistently
	564	does about 20% fewer calls to compare than does the qsort in the Visual
	565	C++ library, other vendors may vary.
	566
	567	Some of the ideas in here can be found in "Algorithms" by Sedgewick,
	568	others I invented myself (or more likely re-invented since they seemed
	569	pretty obvious once I watched the algorithm operate for a while).
	570
	571	Most of this code was written while watching the Marlins sweep the Giants
	572	in the 1997 National League Playoffs - no Braves fans allowed to use this
	573	code (just kidding :-).
	574
	575	I realize that if I wanted to be true to the perl tradition, the only
	576	comment in this file would be something like:
	577
	578	...they shuffled back towards the rear of the line. 'No, not at the
	579	rear!' the slave-driver shouted. 'Three files up. And stay there...
	580
	581	However, I really needed to violate that tradition just so I could keep
	582	track of what happens myself, not to mention some poor fool trying to
	583	understand this years from now :-).
	584	*/
	585
	586	/* ********************************************************** Configuration */
	587
	588	#ifndef QSORT_ORDER_GUESS
	589	#define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */
	590	#endif
	591
	592	/* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
	593	future processing - a good max upper bound is log base 2 of memory size
	594	(32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
	595	safely be smaller than that since the program is taking up some space and
	596	most operating systems only let you grab some subset of contiguous
	597	memory (not to mention that you are normally sorting data larger than
	598	1 byte element size :-).
	599	*/
	600	#ifndef QSORT_MAX_STACK
	601	#define QSORT_MAX_STACK 32
	602	#endif
	603
	604	/* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
	605	Anything bigger and we use qsort. If you make this too small, the qsort
	606	will probably break (or become less efficient), because it doesn't expect
	607	the middle element of a partition to be the same as the right or left -
	608	you have been warned).
	609	*/
	610	#ifndef QSORT_BREAK_EVEN
	611	#define QSORT_BREAK_EVEN 6
	612	#endif
	613
	614	/* QSORT_PLAY_SAFE is the size of the largest partition we're willing
	615	to go quadratic on. We innoculate larger partitions against
	616	quadratic behavior by shuffling them before sorting. This is not
	617	an absolute guarantee of non-quadratic behavior, but it would take
	618	staggeringly bad luck to pick extreme elements as the pivot
	619	from randomized data.
	620	*/
	621	#ifndef QSORT_PLAY_SAFE
	622	#define QSORT_PLAY_SAFE 255
	623	#endif
	624
	625	/* ************************************************************* Data Types */
	626
	627	/* hold left and right index values of a partition waiting to be sorted (the
	628	partition includes both left and right - right is NOT one past the end or
	629	anything like that).
	630	*/
	631	struct partition_stack_entry {
	632	int left;
	633	int right;
	634	#ifdef QSORT_ORDER_GUESS
	635	int qsort_break_even;
	636	#endif
	637	};
	638
	639	/* ******************************************************* Shorthand Macros */
	640
	641	/* Note that these macros will be used from inside the qsort function where
	642	we happen to know that the variable 'elt_size' contains the size of an
	643	array element and the variable 'temp' points to enough space to hold a
	644	temp element and the variable 'array' points to the array being sorted
	645	and 'compare' is the pointer to the compare routine.
	646
	647	Also note that there are very many highly architecture specific ways
	648	these might be sped up, but this is simply the most generally portable
	649	code I could think of.
	650	*/
	651
	652	/* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
	653	*/
	654	#define qsort_cmp(elt1, elt2) \
	655	((*compare)(aTHX_ array[elt1], array[elt2]))
	656
	657	#ifdef QSORT_ORDER_GUESS
	658	#define QSORT_NOTICE_SWAP swapped++;
	659	#else
	660	#define QSORT_NOTICE_SWAP
	661	#endif
	662
	663	/* swaps contents of array elements elt1, elt2.
	664	*/
	665	#define qsort_swap(elt1, elt2) \
	666	STMT_START { \
	667	QSORT_NOTICE_SWAP \
	668	temp = array[elt1]; \
	669	array[elt1] = array[elt2]; \
	670	array[elt2] = temp; \
	671	} STMT_END
	672
	673	/* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
	674	elt3 and elt3 gets elt1.
	675	*/
	676	#define qsort_rotate(elt1, elt2, elt3) \
	677	STMT_START { \
	678	QSORT_NOTICE_SWAP \
	679	temp = array[elt1]; \
	680	array[elt1] = array[elt2]; \
	681	array[elt2] = array[elt3]; \
	682	array[elt3] = temp; \
	683	} STMT_END
	684
	685	/* ************************************************************ Debug stuff */
	686
	687	#ifdef QSORT_DEBUG
	688
	689	static void
	690	break_here()
	691	{
	692	return; /* good place to set a breakpoint */
	693	}
	694
	695	#define qsort_assert(t) (void)( (t) \|\| (break_here(), 0) )
	696
	697	static void
	698	doqsort_all_asserts(
	699	void * array,
	700	size_t num_elts,
	701	size_t elt_size,
	702	int (compare)(const void elt1, const void * elt2),
	703	int pc_left, int pc_right, int u_left, int u_right)
	704	{
	705	int i;
	706
	707	qsort_assert(pc_left <= pc_right);
	708	qsort_assert(u_right < pc_left);
	709	qsort_assert(pc_right < u_left);
	710	for (i = u_right + 1; i < pc_left; ++i) {
	711	qsort_assert(qsort_cmp(i, pc_left) < 0);
	712	}
	713	for (i = pc_left; i < pc_right; ++i) {
	714	qsort_assert(qsort_cmp(i, pc_right) == 0);
	715	}
	716	for (i = pc_right + 1; i < u_left; ++i) {
	717	qsort_assert(qsort_cmp(pc_right, i) < 0);
	718	}
	719	}
	720
	721	#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
	722	doqsort_all_asserts(array, num_elts, elt_size, compare, \
	723	PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
	724
	725	#else
	726
	727	#define qsort_assert(t) ((void)0)
	728
	729	#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
	730
	731	#endif
	732
	733	/* ****************************************************************** qsort */
	734
	735	STATIC void /* the standard unstable (u) quicksort (qsort) */
	736	S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
	737	{
	738	register SV * temp;
	739
	740	struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
	741	int next_stack_entry = 0;
	742
	743	int part_left;
	744	int part_right;
	745	#ifdef QSORT_ORDER_GUESS
	746	int qsort_break_even;
	747	int swapped;
	748	#endif
	749
	750	/* Make sure we actually have work to do.
	751	*/
	752	if (num_elts <= 1) {
	753	return;
	754	}
	755
	756	/* Innoculate large partitions against quadratic behavior */
	757	if (num_elts > QSORT_PLAY_SAFE) {
	758	register size_t n, j;
	759	register SV **q;
	760	for (n = num_elts, q = array; n > 1; ) {
	761	j = (size_t)(n-- * Drand01());
	762	temp = q[j];
	763	q[j] = q[n];
	764	q[n] = temp;
	765	}
	766	}
	767
	768	/* Setup the initial partition definition and fall into the sorting loop
	769	*/
	770	part_left = 0;
	771	part_right = (int)(num_elts - 1);
	772	#ifdef QSORT_ORDER_GUESS
	773	qsort_break_even = QSORT_BREAK_EVEN;
	774	#else
	775	#define qsort_break_even QSORT_BREAK_EVEN
	776	#endif
	777	for ( ; ; ) {
	778	if ((part_right - part_left) >= qsort_break_even) {
	779	/* OK, this is gonna get hairy, so lets try to document all the
	780	concepts and abbreviations and variables and what they keep
	781	track of:
	782
	783	pc: pivot chunk - the set of array elements we accumulate in the
	784	middle of the partition, all equal in value to the original
	785	pivot element selected. The pc is defined by:
	786
	787	pc_left - the leftmost array index of the pc
	788	pc_right - the rightmost array index of the pc
	789
	790	we start with pc_left == pc_right and only one element
	791	in the pivot chunk (but it can grow during the scan).
	792
	793	u: uncompared elements - the set of elements in the partition
	794	we have not yet compared to the pivot value. There are two
	795	uncompared sets during the scan - one to the left of the pc
	796	and one to the right.
	797
	798	u_right - the rightmost index of the left side's uncompared set
	799	u_left - the leftmost index of the right side's uncompared set
	800
	801	The leftmost index of the left sides's uncompared set
	802	doesn't need its own variable because it is always defined
	803	by the leftmost edge of the whole partition (part_left). The
	804	same goes for the rightmost edge of the right partition
	805	(part_right).
	806
	807	We know there are no uncompared elements on the left once we
	808	get u_right < part_left and no uncompared elements on the
	809	right once u_left > part_right. When both these conditions
	810	are met, we have completed the scan of the partition.
	811
	812	Any elements which are between the pivot chunk and the
	813	uncompared elements should be less than the pivot value on
	814	the left side and greater than the pivot value on the right
	815	side (in fact, the goal of the whole algorithm is to arrange
	816	for that to be true and make the groups of less-than and
	817	greater-then elements into new partitions to sort again).
	818
	819	As you marvel at the complexity of the code and wonder why it
	820	has to be so confusing. Consider some of the things this level
	821	of confusion brings:
	822
	823	Once I do a compare, I squeeze every ounce of juice out of it. I
	824	never do compare calls I don't have to do, and I certainly never
	825	do redundant calls.
	826
	827	I also never swap any elements unless I can prove there is a
	828	good reason. Many sort algorithms will swap a known value with
	829	an uncompared value just to get things in the right place (or
	830	avoid complexity :-), but that uncompared value, once it gets
	831	compared, may then have to be swapped again. A lot of the
	832	complexity of this code is due to the fact that it never swaps
	833	anything except compared values, and it only swaps them when the
	834	compare shows they are out of position.
	835	*/
	836	int pc_left, pc_right;
	837	int u_right, u_left;
	838
	839	int s;
	840
	841	pc_left = ((part_left + part_right) / 2);
	842	pc_right = pc_left;
	843	u_right = pc_left - 1;
	844	u_left = pc_right + 1;
	845
	846	/* Qsort works best when the pivot value is also the median value
	847	in the partition (unfortunately you can't find the median value
	848	without first sorting :-), so to give the algorithm a helping
	849	hand, we pick 3 elements and sort them and use the median value
	850	of that tiny set as the pivot value.
	851
	852	Some versions of qsort like to use the left middle and right as
	853	the 3 elements to sort so they can insure the ends of the
	854	partition will contain values which will stop the scan in the
	855	compare loop, but when you have to call an arbitrarily complex
	856	routine to do a compare, its really better to just keep track of
	857	array index values to know when you hit the edge of the
	858	partition and avoid the extra compare. An even better reason to
	859	avoid using a compare call is the fact that you can drop off the
	860	edge of the array if someone foolishly provides you with an
	861	unstable compare function that doesn't always provide consistent
	862	results.
	863
	864	So, since it is simpler for us to compare the three adjacent
	865	elements in the middle of the partition, those are the ones we
	866	pick here (conveniently pointed at by u_right, pc_left, and
	867	u_left). The values of the left, center, and right elements
	868	are refered to as l c and r in the following comments.
	869	*/
	870
	871	#ifdef QSORT_ORDER_GUESS
	872	swapped = 0;
	873	#endif
	874	s = qsort_cmp(u_right, pc_left);
	875	if (s < 0) {
	876	/* l < c */
	877	s = qsort_cmp(pc_left, u_left);
	878	/* if l < c, c < r - already in order - nothing to do */
	879	if (s == 0) {
	880	/* l < c, c == r - already in order, pc grows */
	881	++pc_right;
	882	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	883	} else if (s > 0) {
	884	/* l < c, c > r - need to know more */
	885	s = qsort_cmp(u_right, u_left);
	886	if (s < 0) {
	887	/* l < c, c > r, l < r - swap c & r to get ordered */
	888	qsort_swap(pc_left, u_left);
	889	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	890	} else if (s == 0) {
	891	/* l < c, c > r, l == r - swap c&r, grow pc */
	892	qsort_swap(pc_left, u_left);
	893	--pc_left;
	894	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	895	} else {
	896	/* l < c, c > r, l > r - make lcr into rlc to get ordered */
	897	qsort_rotate(pc_left, u_right, u_left);
	898	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	899	}
	900	}
	901	} else if (s == 0) {
	902	/* l == c */
	903	s = qsort_cmp(pc_left, u_left);
	904	if (s < 0) {
	905	/* l == c, c < r - already in order, grow pc */
	906	--pc_left;
	907	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	908	} else if (s == 0) {
	909	/* l == c, c == r - already in order, grow pc both ways */
	910	--pc_left;
	911	++pc_right;
	912	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	913	} else {
	914	/* l == c, c > r - swap l & r, grow pc */
	915	qsort_swap(u_right, u_left);
	916	++pc_right;
	917	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	918	}
	919	} else {
	920	/* l > c */
	921	s = qsort_cmp(pc_left, u_left);
	922	if (s < 0) {
	923	/* l > c, c < r - need to know more */
	924	s = qsort_cmp(u_right, u_left);
	925	if (s < 0) {
	926	/* l > c, c < r, l < r - swap l & c to get ordered */
	927	qsort_swap(u_right, pc_left);
	928	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	929	} else if (s == 0) {
	930	/* l > c, c < r, l == r - swap l & c, grow pc */
	931	qsort_swap(u_right, pc_left);
	932	++pc_right;
	933	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	934	} else {
	935	/* l > c, c < r, l > r - rotate lcr into crl to order */
	936	qsort_rotate(u_right, pc_left, u_left);
	937	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	938	}
	939	} else if (s == 0) {
	940	/* l > c, c == r - swap ends, grow pc */
	941	qsort_swap(u_right, u_left);
	942	--pc_left;
	943	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	944	} else {
	945	/* l > c, c > r - swap ends to get in order */
	946	qsort_swap(u_right, u_left);
	947	qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
	948	}
	949	}
	950	/* We now know the 3 middle elements have been compared and
	951	arranged in the desired order, so we can shrink the uncompared
	952	sets on both sides
	953	*/
	954	--u_right;
	955	++u_left;
	956	qsort_all_asserts(pc_left, pc_right, u_left, u_right);
	957
	958	/* The above massive nested if was the simple part :-). We now have
	959	the middle 3 elements ordered and we need to scan through the
	960	uncompared sets on either side, swapping elements that are on
	961	the wrong side or simply shuffling equal elements around to get
	962	all equal elements into the pivot chunk.
	963	*/
	964
	965	for ( ; ; ) {
	966	int still_work_on_left;
	967	int still_work_on_right;
	968
	969	/* Scan the uncompared values on the left. If I find a value
	970	equal to the pivot value, move it over so it is adjacent to
	971	the pivot chunk and expand the pivot chunk. If I find a value
	972	less than the pivot value, then just leave it - its already
	973	on the correct side of the partition. If I find a greater
	974	value, then stop the scan.
	975	*/
	976	while ((still_work_on_left = (u_right >= part_left))) {
	977	s = qsort_cmp(u_right, pc_left);
	978	if (s < 0) {
	979	--u_right;
	980	} else if (s == 0) {
	981	--pc_left;
	982	if (pc_left != u_right) {
	983	qsort_swap(u_right, pc_left);
	984	}
	985	--u_right;
	986	} else {
	987	break;
	988	}
	989	qsort_assert(u_right < pc_left);
	990	qsort_assert(pc_left <= pc_right);
	991	qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
	992	qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
	993	}
	994
	995	/* Do a mirror image scan of uncompared values on the right
	996	*/
	997	while ((still_work_on_right = (u_left <= part_right))) {
	998	s = qsort_cmp(pc_right, u_left);
	999	if (s < 0) {
	1000	++u_left;
	1001	} else if (s == 0) {
	1002	++pc_right;
	1003	if (pc_right != u_left) {
	1004	qsort_swap(pc_right, u_left);
	1005	}
	1006	++u_left;
	1007	} else {
	1008	break;
	1009	}
	1010	qsort_assert(u_left > pc_right);
	1011	qsort_assert(pc_left <= pc_right);
	1012	qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
	1013	qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
	1014	}
	1015
	1016	if (still_work_on_left) {
	1017	/* I know I have a value on the left side which needs to be
	1018	on the right side, but I need to know more to decide
	1019	exactly the best thing to do with it.
	1020	*/
	1021	if (still_work_on_right) {
	1022	/* I know I have values on both side which are out of
	1023	position. This is a big win because I kill two birds
	1024	with one swap (so to speak). I can advance the
	1025	uncompared pointers on both sides after swapping both
	1026	of them into the right place.
	1027	*/
	1028	qsort_swap(u_right, u_left);
	1029	--u_right;
	1030	++u_left;
	1031	qsort_all_asserts(pc_left, pc_right, u_left, u_right);
	1032	} else {
	1033	/* I have an out of position value on the left, but the
	1034	right is fully scanned, so I "slide" the pivot chunk
	1035	and any less-than values left one to make room for the
	1036	greater value over on the right. If the out of position
	1037	value is immediately adjacent to the pivot chunk (there
	1038	are no less-than values), I can do that with a swap,
	1039	otherwise, I have to rotate one of the less than values
	1040	into the former position of the out of position value
	1041	and the right end of the pivot chunk into the left end
	1042	(got all that?).
	1043	*/
	1044	--pc_left;
	1045	if (pc_left == u_right) {
	1046	qsort_swap(u_right, pc_right);
	1047	qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
	1048	} else {
	1049	qsort_rotate(u_right, pc_left, pc_right);
	1050	qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
	1051	}
	1052	--pc_right;
	1053	--u_right;
	1054	}
	1055	} else if (still_work_on_right) {
	1056	/* Mirror image of complex case above: I have an out of
	1057	position value on the right, but the left is fully
	1058	scanned, so I need to shuffle things around to make room
	1059	for the right value on the left.
	1060	*/
	1061	++pc_right;
	1062	if (pc_right == u_left) {
	1063	qsort_swap(u_left, pc_left);
	1064	qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
	1065	} else {
	1066	qsort_rotate(pc_right, pc_left, u_left);
	1067	qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
	1068	}
	1069	++pc_left;
	1070	++u_left;
	1071	} else {
	1072	/* No more scanning required on either side of partition,
	1073	break out of loop and figure out next set of partitions
	1074	*/
	1075	break;
	1076	}
	1077	}
	1078
	1079	/* The elements in the pivot chunk are now in the right place. They
	1080	will never move or be compared again. All I have to do is decide
	1081	what to do with the stuff to the left and right of the pivot
	1082	chunk.
	1083
	1084	Notes on the QSORT_ORDER_GUESS ifdef code:
	1085
	1086	1. If I just built these partitions without swapping any (or
	1087	very many) elements, there is a chance that the elements are
	1088	already ordered properly (being properly ordered will
	1089	certainly result in no swapping, but the converse can't be
	1090	proved :-).
	1091
	1092	2. A (properly written) insertion sort will run faster on
	1093	already ordered data than qsort will.
	1094
	1095	3. Perhaps there is some way to make a good guess about
	1096	switching to an insertion sort earlier than partition size 6
	1097	(for instance - we could save the partition size on the stack
	1098	and increase the size each time we find we didn't swap, thus
	1099	switching to insertion sort earlier for partitions with a
	1100	history of not swapping).
	1101
	1102	4. Naturally, if I just switch right away, it will make
	1103	artificial benchmarks with pure ascending (or descending)
	1104	data look really good, but is that a good reason in general?
	1105	Hard to say...
	1106	*/
	1107
	1108	#ifdef QSORT_ORDER_GUESS
	1109	if (swapped < 3) {
	1110	#if QSORT_ORDER_GUESS == 1
	1111	qsort_break_even = (part_right - part_left) + 1;
	1112	#endif
	1113	#if QSORT_ORDER_GUESS == 2
	1114	qsort_break_even *= 2;
	1115	#endif
	1116	#if QSORT_ORDER_GUESS == 3
	1117	int prev_break = qsort_break_even;
	1118	qsort_break_even *= qsort_break_even;
	1119	if (qsort_break_even < prev_break) {
	1120	qsort_break_even = (part_right - part_left) + 1;
	1121	}
	1122	#endif
	1123	} else {
	1124	qsort_break_even = QSORT_BREAK_EVEN;
	1125	}
	1126	#endif
	1127
	1128	if (part_left < pc_left) {
	1129	/* There are elements on the left which need more processing.
	1130	Check the right as well before deciding what to do.
	1131	*/
	1132	if (pc_right < part_right) {
	1133	/* We have two partitions to be sorted. Stack the biggest one
	1134	and process the smallest one on the next iteration. This
	1135	minimizes the stack height by insuring that any additional
	1136	stack entries must come from the smallest partition which
	1137	(because it is smallest) will have the fewest
	1138	opportunities to generate additional stack entries.
	1139	*/
	1140	if ((part_right - pc_right) > (pc_left - part_left)) {
	1141	/* stack the right partition, process the left */
	1142	partition_stack[next_stack_entry].left = pc_right + 1;
	1143	partition_stack[next_stack_entry].right = part_right;
	1144	#ifdef QSORT_ORDER_GUESS
	1145	partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
	1146	#endif
	1147	part_right = pc_left - 1;
	1148	} else {
	1149	/* stack the left partition, process the right */
	1150	partition_stack[next_stack_entry].left = part_left;
	1151	partition_stack[next_stack_entry].right = pc_left - 1;
	1152	#ifdef QSORT_ORDER_GUESS
	1153	partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
	1154	#endif
	1155	part_left = pc_right + 1;
	1156	}
	1157	qsort_assert(next_stack_entry < QSORT_MAX_STACK);
	1158	++next_stack_entry;
	1159	} else {
	1160	/* The elements on the left are the only remaining elements
	1161	that need sorting, arrange for them to be processed as the
	1162	next partition.
	1163	*/
	1164	part_right = pc_left - 1;
	1165	}
	1166	} else if (pc_right < part_right) {
	1167	/* There is only one chunk on the right to be sorted, make it
	1168	the new partition and loop back around.
	1169	*/
	1170	part_left = pc_right + 1;
	1171	} else {
	1172	/* This whole partition wound up in the pivot chunk, so
	1173	we need to get a new partition off the stack.
	1174	*/
	1175	if (next_stack_entry == 0) {
	1176	/* the stack is empty - we are done */
	1177	break;
	1178	}
	1179	--next_stack_entry;
	1180	part_left = partition_stack[next_stack_entry].left;
	1181	part_right = partition_stack[next_stack_entry].right;
	1182	#ifdef QSORT_ORDER_GUESS
	1183	qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
	1184	#endif
	1185	}
	1186	} else {
	1187	/* This partition is too small to fool with qsort complexity, just
	1188	do an ordinary insertion sort to minimize overhead.
	1189	*/
	1190	int i;
	1191	/* Assume 1st element is in right place already, and start checking
	1192	at 2nd element to see where it should be inserted.
	1193	*/
	1194	for (i = part_left + 1; i <= part_right; ++i) {
	1195	int j;
	1196	/* Scan (backwards - just in case 'i' is already in right place)
	1197	through the elements already sorted to see if the ith element
	1198	belongs ahead of one of them.
	1199	*/
	1200	for (j = i - 1; j >= part_left; --j) {
	1201	if (qsort_cmp(i, j) >= 0) {
	1202	/* i belongs right after j
	1203	*/
	1204	break;
	1205	}
	1206	}
	1207	++j;
	1208	if (j != i) {
	1209	/* Looks like we really need to move some things
	1210	*/
	1211	int k;
	1212	temp = array[i];
	1213	for (k = i - 1; k >= j; --k)
	1214	array[k + 1] = array[k];
	1215	array[j] = temp;
	1216	}
	1217	}
	1218
	1219	/* That partition is now sorted, grab the next one, or get out
	1220	of the loop if there aren't any more.
	1221	*/
	1222
	1223	if (next_stack_entry == 0) {
	1224	/* the stack is empty - we are done */
	1225	break;
	1226	}
	1227	--next_stack_entry;
	1228	part_left = partition_stack[next_stack_entry].left;
	1229	part_right = partition_stack[next_stack_entry].right;
	1230	#ifdef QSORT_ORDER_GUESS
	1231	qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
	1232	#endif
	1233	}
	1234	}
	1235
	1236	/* Believe it or not, the array is sorted at this point! */
	1237	}
	1238
	1239	/* Stabilize what is, presumably, an otherwise unstable sort method.
	1240	* We do that by allocating (or having on hand) an array of pointers
	1241	* that is the same size as the original array of elements to be sorted.
	1242	* We initialize this parallel array with the addresses of the original
	1243	* array elements. This indirection can make you crazy.
	1244	* Some pictures can help. After initializing, we have
	1245	*
	1246	* indir list1
	1247	* +----+ +----+
	1248	* \| \| --------------> \| \| ------> first element to be sorted
	1249	* +----+ +----+
	1250	* \| \| --------------> \| \| ------> second element to be sorted
	1251	* +----+ +----+
	1252	* \| \| --------------> \| \| ------> third element to be sorted
	1253	* +----+ +----+
	1254	* ...
	1255	* +----+ +----+
	1256	* \| \| --------------> \| \| ------> n-1st element to be sorted
	1257	* +----+ +----+
	1258	* \| \| --------------> \| \| ------> n-th element to be sorted
	1259	* +----+ +----+
	1260	*
	1261	* During the sort phase, we leave the elements of list1 where they are,
	1262	* and sort the pointers in the indirect array in the same order determined
	1263	* by the original comparison routine on the elements pointed to.
	1264	* Because we don't move the elements of list1 around through
	1265	* this phase, we can break ties on elements that compare equal
	1266	* using their address in the list1 array, ensuring stabilty.
	1267	* This leaves us with something looking like
	1268	*
	1269	* indir list1
	1270	* +----+ +----+
	1271	* \| \| --+ +---> \| \| ------> first element to be sorted
	1272	* +----+ \| \| +----+
	1273	* \| \| --\|-------\|---> \| \| ------> second element to be sorted
	1274	* +----+ \| \| +----+
	1275	* \| \| --\|-------+ +-> \| \| ------> third element to be sorted
	1276	* +----+ \| \| +----+
	1277	* ...
	1278	* +----+ \| \| \| \| +----+
	1279	* \| \| ---\|-+ \| +--> \| \| ------> n-1st element to be sorted
	1280	* +----+ \| \| +----+
	1281	* \| \| ---+ +----> \| \| ------> n-th element to be sorted
	1282	* +----+ +----+
	1283	*
	1284	* where the i-th element of the indirect array points to the element
	1285	* that should be i-th in the sorted array. After the sort phase,
	1286	* we have to put the elements of list1 into the places
	1287	* dictated by the indirect array.
	1288	*/
	1289
	1290
	1291	static I32
	1292	cmpindir(pTHX_ gptr a, gptr b)
	1293	{
	1294	I32 sense;
	1295	gptr ap = (gptr )a;
	1296	gptr bp = (gptr )b;
	1297
	1298	if ((sense = PL_sort_RealCmp(aTHX_ ap, bp)) == 0)
	1299	sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
	1300	return sense;
	1301	}
	1302
	1303	STATIC void
	1304	S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
	1305	{
	1306	SV *hintsv;
	1307
	1308	if (SORTHINTS(hintsv) & HINT_SORT_STABLE) {
	1309	register gptr *pp, q;
	1310	register size_t n, j, i;
	1311	gptr small[SMALLSORT], *indir, tmp;
	1312	SVCOMPARE_t savecmp;
	1313	if (nmemb <= 1) return; /* sorted trivially */
	1314
	1315	/* Small arrays can use the stack, big ones must be allocated */
	1316	if (nmemb <= SMALLSORT) indir = small;
	1317	else { New(1799, indir, nmemb, gptr *); }
	1318
	1319	/* Copy pointers to original array elements into indirect array */
	1320	for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
	1321
	1322	savecmp = PL_sort_RealCmp; /* Save current comparison routine, if any */
	1323	PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
	1324
	1325	/* sort, with indirection */
	1326	S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
	1327
	1328	pp = indir;
	1329	q = list1;
	1330	for (n = nmemb; n--; ) {
	1331	/* Assert A: all elements of q with index > n are already
	1332	* in place. This is vacuosly true at the start, and we
	1333	* put element n where it belongs below (if it wasn't
	1334	* already where it belonged). Assert B: we only move
	1335	* elements that aren't where they belong,
	1336	* so, by A, we never tamper with elements above n.
	1337	*/
	1338	j = pp[n] - q; /* This sets j so that q[j] is
	1339	* at pp[n]. *pp[j] belongs in
	1340	* q[j], by construction.
	1341	*/
	1342	if (n != j) { /* all's well if n == j */
	1343	tmp = q[j]; /* save what's in q[j] */
	1344	do {
	1345	q[j] = pp[j]; / put pp[j] where it belongs /
	1346	i = pp[j] - q; /* the index in q of the element
	1347	* just moved */
	1348	pp[j] = q + j; /* this is ok now */
	1349	} while ((j = i) != n);
	1350	/* There are only finitely many (nmemb) addresses
	1351	* in the pp array.
	1352	* So we must eventually revisit an index we saw before.
	1353	* Suppose the first revisited index is k != n.
	1354	* An index is visited because something else belongs there.
	1355	* If we visit k twice, then two different elements must
	1356	* belong in the same place, which cannot be.
	1357	* So j must get back to n, the loop terminates,
	1358	* and we put the saved element where it belongs.
	1359	*/
	1360	q[n] = tmp; /* put what belongs into
	1361	* the n-th element */
	1362	}
	1363	}
	1364
	1365	/* free iff allocated */
	1366	if (indir != small) { Safefree(indir); }
	1367	/* restore prevailing comparison routine */
	1368	PL_sort_RealCmp = savecmp;
	1369	} else {
	1370	S_qsortsvu(aTHX_ list1, nmemb, cmp);
	1371	}
	1372	}
	1373
	1374	/*
	1375	=head1 Array Manipulation Functions
	1376
	1377	=for apidoc sortsv
	1378
	1379	Sort an array. Here is an example:
	1380
	1381	sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
	1382
	1383	See lib/sort.pm for details about controlling the sorting algorithm.
	1384
	1385	=cut
	1386	*/
	1387
	1388	void
	1389	Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
	1390	{
	1391	void (sortsvp)(pTHX_ SV *array, size_t nmemb, SVCOMPARE_t cmp) =
	1392	S_mergesortsv;
	1393	SV *hintsv;
	1394	I32 hints;
	1395
	1396	/* Sun's Compiler (cc: WorkShop Compilers 4.2 30 Oct 1996 C 4.2) used
	1397	to miscompile this function under optimization -O. If you get test
	1398	errors related to picking the correct sort() function, try recompiling
	1399	this file without optimiziation. -- A.D. 4/2002.
	1400	*/
	1401	hints = SORTHINTS(hintsv);
	1402	if (hints & HINT_SORT_QUICKSORT) {
	1403	sortsvp = S_qsortsv;
	1404	}
	1405	else {
	1406	/* The default as of 5.8.0 is mergesort */
	1407	sortsvp = S_mergesortsv;
	1408	}
	1409
	1410	sortsvp(aTHX_ array, nmemb, cmp);
	1411	}
	1412
	1413	PP(pp_sort)
	1414	{
	1415	dSP; dMARK; dORIGMARK;
	1416	register SV p1 = ORIGMARK+1, p2;
	1417	register I32 max, i;
	1418	AV* av = Nullav;
	1419	HV *stash;
	1420	GV *gv;
	1421	CV *cv = 0;
	1422	I32 gimme = GIMME;
	1423	OP* nextop = PL_op->op_next;
	1424	I32 overloading = 0;
	1425	bool hasargs = FALSE;
	1426	I32 is_xsub = 0;
	1427	I32 sorting_av = 0;
	1428
	1429	if (gimme != G_ARRAY) {
	1430	SP = MARK;
	1431	RETPUSHUNDEF;
	1432	}
	1433
	1434	ENTER;
	1435	SAVEVPTR(PL_sortcop);
	1436	if (PL_op->op_flags & OPf_STACKED) {
	1437	if (PL_op->op_flags & OPf_SPECIAL) {
	1438	OP kid = cLISTOP->op_first->op_sibling; / pass pushmark */
	1439	kid = kUNOP->op_first; /* pass rv2gv */
	1440	kid = kUNOP->op_first; /* pass leave */
	1441	PL_sortcop = kid->op_next;
	1442	stash = CopSTASH(PL_curcop);
	1443	}
	1444	else {
	1445	cv = sv_2cv(*++MARK, &stash, &gv, 0);
	1446	if (cv && SvPOK(cv)) {
	1447	STRLEN n_a;
	1448	char proto = SvPV((SV)cv, n_a);
	1449	if (proto && strEQ(proto, "$$")) {
	1450	hasargs = TRUE;
	1451	}
	1452	}
	1453	if (!(cv && CvROOT(cv))) {
	1454	if (cv && CvXSUB(cv)) {
	1455	is_xsub = 1;
	1456	}
	1457	else if (gv) {
	1458	SV *tmpstr = sv_newmortal();
	1459	gv_efullname3(tmpstr, gv, Nullch);
	1460	DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
	1461	tmpstr);
	1462	}
	1463	else {
	1464	DIE(aTHX_ "Undefined subroutine in sort");
	1465	}
	1466	}
	1467
	1468	if (is_xsub)
	1469	PL_sortcop = (OP*)cv;
	1470	else {
	1471	PL_sortcop = CvSTART(cv);
	1472	SAVEVPTR(CvROOT(cv)->op_ppaddr);
	1473	CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
	1474
	1475	PAD_SET_CUR(CvPADLIST(cv), 1);
	1476	}
	1477	}
	1478	}
	1479	else {
	1480	PL_sortcop = Nullop;
	1481	stash = CopSTASH(PL_curcop);
	1482	}
	1483
	1484	/* optimiser converts "@a = sort @a" to "sort \@a";
	1485	* in case of tied @a, pessimise: push (@a) onto stack, then assign
	1486	* result back to @a at the end of this function */
	1487	if (PL_op->op_private & OPpSORT_INPLACE) {
	1488	assert( MARK+1 == SP && SP && SvTYPE(SP) == SVt_PVAV);
	1489	(void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
	1490	av = (AV)(SP);
	1491	max = AvFILL(av) + 1;
	1492	if (SvMAGICAL(av)) {
	1493	MEXTEND(SP, max);
	1494	p2 = SP;
	1495	for (i=0; i < (U32)max; i++) {
	1496	SV **svp = av_fetch(av, i, FALSE);
	1497	SP++ = (svp) ? svp : Nullsv;
	1498	}
	1499	}
	1500	else {
	1501	p1 = p2 = AvARRAY(av);
	1502	sorting_av = 1;
	1503	}
	1504	}
	1505	else {
	1506	p2 = MARK+1;
	1507	max = SP - MARK;
	1508	}
	1509
	1510	/* shuffle stack down, removing optional initial cv (p1!=p2), plus any
	1511	* nulls; also stringify any args */
	1512	for (i=max; i > 0 ; i--) {
	1513	if ((p1 = p2++)) { /* Weed out nulls. */
	1514	SvTEMP_off(*p1);
	1515	if (!PL_sortcop && !SvPOK(*p1)) {
	1516	STRLEN n_a;
	1517	if (SvAMAGIC(*p1))
	1518	overloading = 1;
	1519	else
	1520	(void)sv_2pv(*p1, &n_a);
	1521	}
	1522	p1++;
	1523	}
	1524	else
	1525	max--;
	1526	}
	1527	if (sorting_av)
	1528	AvFILLp(av) = max-1;
	1529
	1530	if (max > 1) {
	1531	if (PL_sortcop) {
	1532	PERL_CONTEXT *cx;
	1533	SV** newsp;
	1534	bool oldcatch = CATCH_GET;
	1535
	1536	SAVETMPS;
	1537	SAVEOP();
	1538
	1539	CATCH_SET(TRUE);
	1540	PUSHSTACKi(PERLSI_SORT);
	1541	if (!hasargs && !is_xsub) {
	1542	if (PL_sortstash != stash \|\| !PL_firstgv \|\| !PL_secondgv) {
	1543	SAVESPTR(PL_firstgv);
	1544	SAVESPTR(PL_secondgv);
	1545	PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
	1546	PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
	1547	PL_sortstash = stash;
	1548	}
	1549	SAVESPTR(GvSV(PL_firstgv));
	1550	SAVESPTR(GvSV(PL_secondgv));
	1551	}
	1552
	1553	PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
	1554	if (!(PL_op->op_flags & OPf_SPECIAL)) {
	1555	cx->cx_type = CXt_SUB;
	1556	cx->blk_gimme = G_SCALAR;
	1557	PUSHSUB(cx);
	1558	}
	1559	PL_sortcxix = cxstack_ix;
	1560
	1561	if (hasargs && !is_xsub) {
	1562	/* This is mostly copied from pp_entersub */
	1563	AV av = (AV)PAD_SVl(0);
	1564
	1565	cx->blk_sub.savearray = GvAV(PL_defgv);
	1566	GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
	1567	CX_CURPAD_SAVE(cx->blk_sub);
	1568	cx->blk_sub.argarray = av;
	1569	}
	1570	sortsv(p1-max, max,
	1571	is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
	1572
	1573	POPBLOCK(cx,PL_curpm);
	1574	PL_stack_sp = newsp;
	1575	POPSTACK;
	1576	CATCH_SET(oldcatch);
	1577	}
	1578	else {
	1579	MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
	1580	sortsv(sorting_av ? AvARRAY(av) : ORIGMARK+1, max,
	1581	(PL_op->op_private & OPpSORT_NUMERIC)
	1582	? ( (PL_op->op_private & OPpSORT_INTEGER)
	1583	? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
	1584	: ( overloading ? amagic_ncmp : sv_ncmp))
	1585	: ( IN_LOCALE_RUNTIME
	1586	? ( overloading
	1587	? amagic_cmp_locale
	1588	: sv_cmp_locale_static)
	1589	: ( overloading ? amagic_cmp : sv_cmp_static)));
	1590	if (PL_op->op_private & OPpSORT_REVERSE) {
	1591	SV **p = sorting_av ? AvARRAY(av) : ORIGMARK+1;
	1592	SV **q = p+max-1;
	1593	while (p < q) {
	1594	SV tmp = p;
	1595	p++ = q;
	1596	*q-- = tmp;
	1597	}
	1598	}
	1599	}
	1600	}
	1601	if (av && !sorting_av) {
	1602	/* simulate pp_aassign of tied AV */
	1603	SV *sv;
	1604	SV base, didstore;
	1605	for (base = ORIGMARK+1, i=0; i < max; i++) {
	1606	sv = NEWSV(28,0);
	1607	sv_setsv(sv, base[i]);
	1608	base[i] = sv;
	1609	}
	1610	av_clear(av);
	1611	av_extend(av, max);
	1612	for (i=0; i < max; i++) {
	1613	sv = base[i];
	1614	didstore = av_store(av, i, sv);
	1615	if (SvSMAGICAL(sv))
	1616	mg_set(sv);
	1617	if (!didstore)
	1618	sv_2mortal(sv);
	1619	}
	1620	}
	1621	LEAVE;
	1622	PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
	1623	return nextop;
	1624	}
	1625
	1626	static I32
	1627	sortcv(pTHX_ SV a, SV b)
	1628	{
	1629	I32 oldsaveix = PL_savestack_ix;
	1630	I32 oldscopeix = PL_scopestack_ix;
	1631	I32 result;
	1632	GvSV(PL_firstgv) = a;
	1633	GvSV(PL_secondgv) = b;
	1634	PL_stack_sp = PL_stack_base;
	1635	PL_op = PL_sortcop;
	1636	CALLRUNOPS(aTHX);
	1637	if (PL_stack_sp != PL_stack_base + 1)
	1638	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1639	if (!SvNIOKp(*PL_stack_sp))
	1640	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1641	result = SvIV(*PL_stack_sp);
	1642	while (PL_scopestack_ix > oldscopeix) {
	1643	LEAVE;
	1644	}
	1645	leave_scope(oldsaveix);
	1646	return result;
	1647	}
	1648
	1649	static I32
	1650	sortcv_stacked(pTHX_ SV a, SV b)
	1651	{
	1652	I32 oldsaveix = PL_savestack_ix;
	1653	I32 oldscopeix = PL_scopestack_ix;
	1654	I32 result;
	1655	AV *av;
	1656
	1657	av = GvAV(PL_defgv);
	1658
	1659	if (AvMAX(av) < 1) {
	1660	SV** ary = AvALLOC(av);
	1661	if (AvARRAY(av) != ary) {
	1662	AvMAX(av) += AvARRAY(av) - AvALLOC(av);
	1663	SvPVX(av) = (char*)ary;
	1664	}
	1665	if (AvMAX(av) < 1) {
	1666	AvMAX(av) = 1;
	1667	Renew(ary,2,SV*);
	1668	SvPVX(av) = (char*)ary;
	1669	}
	1670	}
	1671	AvFILLp(av) = 1;
	1672
	1673	AvARRAY(av)[0] = a;
	1674	AvARRAY(av)[1] = b;
	1675	PL_stack_sp = PL_stack_base;
	1676	PL_op = PL_sortcop;
	1677	CALLRUNOPS(aTHX);
	1678	if (PL_stack_sp != PL_stack_base + 1)
	1679	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1680	if (!SvNIOKp(*PL_stack_sp))
	1681	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1682	result = SvIV(*PL_stack_sp);
	1683	while (PL_scopestack_ix > oldscopeix) {
	1684	LEAVE;
	1685	}
	1686	leave_scope(oldsaveix);
	1687	return result;
	1688	}
	1689
	1690	static I32
	1691	sortcv_xsub(pTHX_ SV a, SV b)
	1692	{
	1693	dSP;
	1694	I32 oldsaveix = PL_savestack_ix;
	1695	I32 oldscopeix = PL_scopestack_ix;
	1696	I32 result;
	1697	CV cv=(CV)PL_sortcop;
	1698
	1699	SP = PL_stack_base;
	1700	PUSHMARK(SP);
	1701	EXTEND(SP, 2);
	1702	*++SP = a;
	1703	*++SP = b;
	1704	PUTBACK;
	1705	(void)(*CvXSUB(cv))(aTHX_ cv);
	1706	if (PL_stack_sp != PL_stack_base + 1)
	1707	Perl_croak(aTHX_ "Sort subroutine didn't return single value");
	1708	if (!SvNIOKp(*PL_stack_sp))
	1709	Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
	1710	result = SvIV(*PL_stack_sp);
	1711	while (PL_scopestack_ix > oldscopeix) {
	1712	LEAVE;
	1713	}
	1714	leave_scope(oldsaveix);
	1715	return result;
	1716	}
	1717
	1718
	1719	static I32
	1720	sv_ncmp(pTHX_ SV a, SV b)
	1721	{
	1722	NV nv1 = SvNV(a);
	1723	NV nv2 = SvNV(b);
	1724	return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
	1725	}
	1726
	1727	static I32
	1728	sv_i_ncmp(pTHX_ SV a, SV b)
	1729	{
	1730	IV iv1 = SvIV(a);
	1731	IV iv2 = SvIV(b);
	1732	return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
	1733	}
	1734	#define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
	1735	*svp = Nullsv; \
	1736	if (PL_amagic_generation) { \
	1737	if (SvAMAGIC(left)\|\|SvAMAGIC(right))\
	1738	*svp = amagic_call(left, \
	1739	right, \
	1740	CAT2(meth,_amg), \
	1741	0); \
	1742	} \
	1743	} STMT_END
	1744
	1745	static I32
	1746	amagic_ncmp(pTHX_ register SV a, register SV b)
	1747	{
	1748	SV *tmpsv;
	1749	tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
	1750	if (tmpsv) {
	1751	NV d;
	1752
	1753	if (SvIOK(tmpsv)) {
	1754	I32 i = SvIVX(tmpsv);
	1755	if (i > 0)
	1756	return 1;
	1757	return i? -1 : 0;
	1758	}
	1759	d = SvNV(tmpsv);
	1760	if (d > 0)
	1761	return 1;
	1762	return d? -1 : 0;
	1763	}
	1764	return sv_ncmp(aTHX_ a, b);
	1765	}
	1766
	1767	static I32
	1768	amagic_i_ncmp(pTHX_ register SV a, register SV b)
	1769	{
	1770	SV *tmpsv;
	1771	tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
	1772	if (tmpsv) {
	1773	NV d;
	1774
	1775	if (SvIOK(tmpsv)) {
	1776	I32 i = SvIVX(tmpsv);
	1777	if (i > 0)
	1778	return 1;
	1779	return i? -1 : 0;
	1780	}
	1781	d = SvNV(tmpsv);
	1782	if (d > 0)
	1783	return 1;
	1784	return d? -1 : 0;
	1785	}
	1786	return sv_i_ncmp(aTHX_ a, b);
	1787	}
	1788
	1789	static I32
	1790	amagic_cmp(pTHX_ register SV str1, register SV str2)
	1791	{
	1792	SV *tmpsv;
	1793	tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
	1794	if (tmpsv) {
	1795	NV d;
	1796
	1797	if (SvIOK(tmpsv)) {
	1798	I32 i = SvIVX(tmpsv);
	1799	if (i > 0)
	1800	return 1;
	1801	return i? -1 : 0;
	1802	}
	1803	d = SvNV(tmpsv);
	1804	if (d > 0)
	1805	return 1;
	1806	return d? -1 : 0;
	1807	}
	1808	return sv_cmp(str1, str2);
	1809	}
	1810
	1811	static I32
	1812	amagic_cmp_locale(pTHX_ register SV str1, register SV str2)
	1813	{
	1814	SV *tmpsv;
	1815	tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
	1816	if (tmpsv) {
	1817	NV d;
	1818
	1819	if (SvIOK(tmpsv)) {
	1820	I32 i = SvIVX(tmpsv);
	1821	if (i > 0)
	1822	return 1;
	1823	return i? -1 : 0;
	1824	}
	1825	d = SvNV(tmpsv);
	1826	if (d > 0)
	1827	return 1;
	1828	return d? -1 : 0;
	1829	}
	1830	return sv_cmp_locale(str1, str2);
	1831	}