Commit | Line | Data |
---|---|---|
84d4ea48 JH |
1 | package sort; |
2 | ||
e3e60425 | 3 | our $VERSION = '2.01'; |
84d4ea48 | 4 | |
7b9ef140 RH |
5 | # The hints for pp_sort are now stored in $^H{sort}; older versions |
6 | # of perl used the global variable $sort::hints. -- rjh 2005-12-19 | |
045ac317 | 7 | |
84d4ea48 JH |
8 | $sort::quicksort_bit = 0x00000001; |
9 | $sort::mergesort_bit = 0x00000002; | |
10 | $sort::sort_bits = 0x000000FF; # allow 256 different ones | |
11 | $sort::stable_bit = 0x00000100; | |
84d4ea48 JH |
12 | |
13 | use strict; | |
14 | ||
15 | sub import { | |
16 | shift; | |
17 | if (@_ == 0) { | |
18 | require Carp; | |
19 | Carp::croak("sort pragma requires arguments"); | |
20 | } | |
84d4ea48 | 21 | local $_; |
7b9ef140 | 22 | $^H{sort} //= 0; |
84d4ea48 | 23 | while ($_ = shift(@_)) { |
c53fc8a6 | 24 | if (/^_q(?:uick)?sort$/) { |
7b9ef140 RH |
25 | $^H{sort} &= ~$sort::sort_bits; |
26 | $^H{sort} |= $sort::quicksort_bit; | |
c53fc8a6 | 27 | } elsif ($_ eq '_mergesort') { |
7b9ef140 RH |
28 | $^H{sort} &= ~$sort::sort_bits; |
29 | $^H{sort} |= $sort::mergesort_bit; | |
c53fc8a6 | 30 | } elsif ($_ eq 'stable') { |
7b9ef140 | 31 | $^H{sort} |= $sort::stable_bit; |
7a8ff2dd | 32 | } elsif ($_ eq 'defaults') { |
7b9ef140 | 33 | $^H{sort} = 0; |
7a8ff2dd JL |
34 | } else { |
35 | require Carp; | |
36 | Carp::croak("sort: unknown subpragma '$_'"); | |
37 | } | |
38 | } | |
39 | } | |
40 | ||
41 | sub unimport { | |
42 | shift; | |
43 | if (@_ == 0) { | |
44 | require Carp; | |
45 | Carp::croak("sort pragma requires arguments"); | |
46 | } | |
47 | local $_; | |
48 | no warnings 'uninitialized'; # bitops would warn | |
49 | while ($_ = shift(@_)) { | |
50 | if (/^_q(?:uick)?sort$/) { | |
7b9ef140 | 51 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd | 52 | } elsif ($_ eq '_mergesort') { |
7b9ef140 | 53 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd | 54 | } elsif ($_ eq 'stable') { |
7b9ef140 | 55 | $^H{sort} &= ~$sort::stable_bit; |
84d4ea48 JH |
56 | } else { |
57 | require Carp; | |
71c4de84 | 58 | Carp::croak("sort: unknown subpragma '$_'"); |
84d4ea48 JH |
59 | } |
60 | } | |
61 | } | |
62 | ||
63 | sub current { | |
64 | my @sort; | |
7b9ef140 RH |
65 | if ($^H{sort}) { |
66 | push @sort, 'quicksort' if $^H{sort} & $sort::quicksort_bit; | |
67 | push @sort, 'mergesort' if $^H{sort} & $sort::mergesort_bit; | |
68 | push @sort, 'stable' if $^H{sort} & $sort::stable_bit; | |
84d4ea48 JH |
69 | } |
70 | push @sort, 'mergesort' unless @sort; | |
71 | join(' ', @sort); | |
72 | } | |
73 | ||
74 | 1; | |
75 | __END__ | |
76 | ||
77 | =head1 NAME | |
78 | ||
79 | sort - perl pragma to control sort() behaviour | |
80 | ||
81 | =head1 SYNOPSIS | |
82 | ||
c53fc8a6 JH |
83 | use sort 'stable'; # guarantee stability |
84 | use sort '_quicksort'; # use a quicksort algorithm | |
85 | use sort '_mergesort'; # use a mergesort algorithm | |
7a8ff2dd JL |
86 | use sort 'defaults'; # revert to default behavior |
87 | no sort 'stable'; # stability not important | |
84d4ea48 | 88 | |
c53fc8a6 | 89 | use sort '_qsort'; # alias for quicksort |
84d4ea48 | 90 | |
7b9ef140 RH |
91 | my $current; |
92 | BEGIN { | |
93 | $current = sort::current(); # identify prevailing algorithm | |
94 | } | |
84d4ea48 JH |
95 | |
96 | =head1 DESCRIPTION | |
97 | ||
7a8ff2dd JL |
98 | With the C<sort> pragma you can control the behaviour of the builtin |
99 | C<sort()> function. | |
84d4ea48 JH |
100 | |
101 | In Perl versions 5.6 and earlier the quicksort algorithm was used to | |
7a8ff2dd | 102 | implement C<sort()>, but in Perl 5.8 a mergesort algorithm was also made |
c53fc8a6 JH |
103 | available, mainly to guarantee worst case O(N log N) behaviour: |
104 | the worst case of quicksort is O(N**2). In Perl 5.8 and later, | |
105 | quicksort defends against quadratic behaviour by shuffling large | |
106 | arrays before sorting. | |
107 | ||
108 | A stable sort means that for records that compare equal, the original | |
b0ae2885 | 109 | input ordering is preserved. Mergesort is stable, quicksort is not. |
c53fc8a6 JH |
110 | Stability will matter only if elements that compare equal can be |
111 | distinguished in some other way. That means that simple numerical | |
112 | and lexical sorts do not profit from stability, since equal elements | |
113 | are indistinguishable. However, with a comparison such as | |
114 | ||
115 | { substr($a, 0, 3) cmp substr($b, 0, 3) } | |
116 | ||
117 | stability might matter because elements that compare equal on the | |
118 | first 3 characters may be distinguished based on subsequent characters. | |
119 | In Perl 5.8 and later, quicksort can be stabilized, but doing so will | |
120 | add overhead, so it should only be done if it matters. | |
121 | ||
122 | The best algorithm depends on many things. On average, mergesort | |
123 | does fewer comparisons than quicksort, so it may be better when | |
124 | complicated comparison routines are used. Mergesort also takes | |
125 | advantage of pre-existing order, so it would be favored for using | |
7a8ff2dd JL |
126 | C<sort()> to merge several sorted arrays. On the other hand, quicksort |
127 | is often faster for small arrays, and on arrays of a few distinct | |
128 | values, repeated many times. You can force the | |
c53fc8a6 JH |
129 | choice of algorithm with this pragma, but this feels heavy-handed, |
130 | so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. | |
7a8ff2dd JL |
131 | The default algorithm is mergesort, which will be stable even if |
132 | you do not explicitly demand it. | |
133 | But the stability of the default sort is a side-effect that could | |
134 | change in later versions. If stability is important, be sure to | |
135 | say so with a | |
136 | ||
137 | use sort 'stable'; | |
138 | ||
139 | The C<no sort> pragma doesn't | |
140 | I<forbid> what follows, it just leaves the choice open. Thus, after | |
141 | ||
142 | no sort qw(_mergesort stable); | |
143 | ||
144 | a mergesort, which happens to be stable, will be employed anyway. | |
145 | Note that | |
146 | ||
147 | no sort "_quicksort"; | |
148 | no sort "_mergesort"; | |
149 | ||
150 | have exactly the same effect, leaving the choice of sort algorithm open. | |
84d4ea48 | 151 | |
0e59b7c6 RGS |
152 | =head1 CAVEATS |
153 | ||
7b9ef140 RH |
154 | As of Perl 5.10, this pragma is lexically scoped and takes effect |
155 | at compile time. In earlier versions its effect was global and took | |
156 | effect at run-time; the documentation suggested using C<eval()> to | |
157 | change the behaviour: | |
7a8ff2dd | 158 | |
7b9ef140 RH |
159 | { eval 'use sort qw(defaults _quicksort)'; # force quicksort |
160 | eval 'no sort "stable"'; # stability not wanted | |
7a8ff2dd JL |
161 | print sort::current . "\n"; |
162 | @a = sort @b; | |
7b9ef140 | 163 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd | 164 | } |
7b9ef140 | 165 | { eval 'use sort qw(defaults stable)'; # force stability |
7a8ff2dd JL |
166 | print sort::current . "\n"; |
167 | @c = sort @d; | |
7b9ef140 | 168 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd | 169 | } |
7a8ff2dd | 170 | |
7b9ef140 RH |
171 | Such code no longer has the desired effect, for two reasons. |
172 | Firstly, the use of C<eval()> means that the sorting algorithm | |
173 | is not changed until runtime, by which time it's too late to | |
174 | have any effect. Secondly, C<sort::current> is also called at | |
175 | run-time, when in fact the compile-time value of C<sort::current> | |
176 | is the one that matters. | |
7a8ff2dd | 177 | |
7b9ef140 | 178 | So now this code would be written: |
7a8ff2dd | 179 | |
7b9ef140 RH |
180 | { use sort qw(defaults _quicksort); # force quicksort |
181 | no sort "stable"; # stability not wanted | |
182 | my $current; | |
183 | BEGIN { $current = print sort::current; } | |
184 | print "$current\n"; | |
7a8ff2dd | 185 | @a = sort @b; |
7b9ef140 | 186 | # Pragmas go out of scope at the end of the block |
7a8ff2dd | 187 | } |
7b9ef140 RH |
188 | { use sort qw(defaults stable); # force stability |
189 | my $current; | |
190 | BEGIN { $current = print sort::current; } | |
191 | print "$current\n"; | |
7a8ff2dd | 192 | @c = sort @d; |
7a8ff2dd | 193 | } |
0e59b7c6 | 194 | |
84d4ea48 JH |
195 | =cut |
196 |