Commit | Line | Data |
---|---|---|

84d4ea48 JH |
1 | package sort; |

2 | ||

1f17861c | 3 | our $VERSION = '2.00'; |

84d4ea48 | 4 | |

7b9ef140 RH |
5 | # The hints for pp_sort are now stored in $^H{sort}; older versions |

6 | # of perl used the global variable $sort::hints. -- rjh 2005-12-19 | |

045ac317 | 7 | |

7168684c | 8 | $sort::hint_bits = 0x00020000; # HINT_LOCALIZE_HH |

84d4ea48 JH |
9 | |

10 | $sort::quicksort_bit = 0x00000001; | |

11 | $sort::mergesort_bit = 0x00000002; | |

12 | $sort::sort_bits = 0x000000FF; # allow 256 different ones | |

13 | $sort::stable_bit = 0x00000100; | |

84d4ea48 JH |
14 | |

15 | use strict; | |

16 | ||

17 | sub import { | |

18 | shift; | |

19 | if (@_ == 0) { | |

20 | require Carp; | |

21 | Carp::croak("sort pragma requires arguments"); | |

22 | } | |

84d4ea48 | 23 | local $_; |

045ac317 | 24 | no warnings 'uninitialized'; # bitops would warn |

7b9ef140 | 25 | $^H{sort} //= 0; |

84d4ea48 | 26 | while ($_ = shift(@_)) { |

c53fc8a6 | 27 | if (/^_q(?:uick)?sort$/) { |

7b9ef140 RH |
28 | $^H{sort} &= ~$sort::sort_bits; |

29 | $^H{sort} |= $sort::quicksort_bit; | |

c53fc8a6 | 30 | } elsif ($_ eq '_mergesort') { |

7b9ef140 RH |
31 | $^H{sort} &= ~$sort::sort_bits; |

32 | $^H{sort} |= $sort::mergesort_bit; | |

c53fc8a6 | 33 | } elsif ($_ eq 'stable') { |

7b9ef140 | 34 | $^H{sort} |= $sort::stable_bit; |

7a8ff2dd | 35 | } elsif ($_ eq 'defaults') { |

7b9ef140 | 36 | $^H{sort} = 0; |

7a8ff2dd JL |
37 | } else { |

38 | require Carp; | |

39 | Carp::croak("sort: unknown subpragma '$_'"); | |

40 | } | |

41 | } | |

7b9ef140 | 42 | $^H |= $sort::hint_bits; |

7a8ff2dd JL |
43 | } |

44 | ||

45 | sub unimport { | |

46 | shift; | |

47 | if (@_ == 0) { | |

48 | require Carp; | |

49 | Carp::croak("sort pragma requires arguments"); | |

50 | } | |

51 | local $_; | |

52 | no warnings 'uninitialized'; # bitops would warn | |

53 | while ($_ = shift(@_)) { | |

54 | if (/^_q(?:uick)?sort$/) { | |

7b9ef140 | 55 | $^H{sort} &= ~$sort::sort_bits; |

7a8ff2dd | 56 | } elsif ($_ eq '_mergesort') { |

7b9ef140 | 57 | $^H{sort} &= ~$sort::sort_bits; |

7a8ff2dd | 58 | } elsif ($_ eq 'stable') { |

7b9ef140 | 59 | $^H{sort} &= ~$sort::stable_bit; |

84d4ea48 JH |
60 | } else { |

61 | require Carp; | |

71c4de84 | 62 | Carp::croak("sort: unknown subpragma '$_'"); |

84d4ea48 JH |
63 | } |

64 | } | |

65 | } | |

66 | ||

67 | sub current { | |

68 | my @sort; | |

7b9ef140 RH |
69 | if ($^H{sort}) { |

70 | push @sort, 'quicksort' if $^H{sort} & $sort::quicksort_bit; | |

71 | push @sort, 'mergesort' if $^H{sort} & $sort::mergesort_bit; | |

72 | push @sort, 'stable' if $^H{sort} & $sort::stable_bit; | |

84d4ea48 JH |
73 | } |

74 | push @sort, 'mergesort' unless @sort; | |

75 | join(' ', @sort); | |

76 | } | |

77 | ||

78 | 1; | |

79 | __END__ | |

80 | ||

81 | =head1 NAME | |

82 | ||

83 | sort - perl pragma to control sort() behaviour | |

84 | ||

85 | =head1 SYNOPSIS | |

86 | ||

c53fc8a6 JH |
87 | use sort 'stable'; # guarantee stability |

88 | use sort '_quicksort'; # use a quicksort algorithm | |

89 | use sort '_mergesort'; # use a mergesort algorithm | |

7a8ff2dd JL |
90 | use sort 'defaults'; # revert to default behavior |

91 | no sort 'stable'; # stability not important | |

84d4ea48 | 92 | |

c53fc8a6 | 93 | use sort '_qsort'; # alias for quicksort |

84d4ea48 | 94 | |

7b9ef140 RH |
95 | my $current; |

96 | BEGIN { | |

97 | $current = sort::current(); # identify prevailing algorithm | |

98 | } | |

84d4ea48 JH |
99 | |

100 | =head1 DESCRIPTION | |

101 | ||

7a8ff2dd JL |
102 | With the C<sort> pragma you can control the behaviour of the builtin |

103 | C<sort()> function. | |

84d4ea48 JH |
104 | |

105 | In Perl versions 5.6 and earlier the quicksort algorithm was used to | |

7a8ff2dd | 106 | implement C<sort()>, but in Perl 5.8 a mergesort algorithm was also made |

c53fc8a6 JH |
107 | available, mainly to guarantee worst case O(N log N) behaviour: |

108 | the worst case of quicksort is O(N**2). In Perl 5.8 and later, | |

109 | quicksort defends against quadratic behaviour by shuffling large | |

110 | arrays before sorting. | |

111 | ||

112 | A stable sort means that for records that compare equal, the original | |

b0ae2885 | 113 | input ordering is preserved. Mergesort is stable, quicksort is not. |

c53fc8a6 JH |
114 | Stability will matter only if elements that compare equal can be |

115 | distinguished in some other way. That means that simple numerical | |

116 | and lexical sorts do not profit from stability, since equal elements | |

117 | are indistinguishable. However, with a comparison such as | |

118 | ||

119 | { substr($a, 0, 3) cmp substr($b, 0, 3) } | |

120 | ||

121 | stability might matter because elements that compare equal on the | |

122 | first 3 characters may be distinguished based on subsequent characters. | |

123 | In Perl 5.8 and later, quicksort can be stabilized, but doing so will | |

124 | add overhead, so it should only be done if it matters. | |

125 | ||

126 | The best algorithm depends on many things. On average, mergesort | |

127 | does fewer comparisons than quicksort, so it may be better when | |

128 | complicated comparison routines are used. Mergesort also takes | |

129 | advantage of pre-existing order, so it would be favored for using | |

7a8ff2dd JL |
130 | C<sort()> to merge several sorted arrays. On the other hand, quicksort |

131 | is often faster for small arrays, and on arrays of a few distinct | |

132 | values, repeated many times. You can force the | |

c53fc8a6 JH |
133 | choice of algorithm with this pragma, but this feels heavy-handed, |

134 | so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. | |

7a8ff2dd JL |
135 | The default algorithm is mergesort, which will be stable even if |

136 | you do not explicitly demand it. | |

137 | But the stability of the default sort is a side-effect that could | |

138 | change in later versions. If stability is important, be sure to | |

139 | say so with a | |

140 | ||

141 | use sort 'stable'; | |

142 | ||

143 | The C<no sort> pragma doesn't | |

144 | I<forbid> what follows, it just leaves the choice open. Thus, after | |

145 | ||

146 | no sort qw(_mergesort stable); | |

147 | ||

148 | a mergesort, which happens to be stable, will be employed anyway. | |

149 | Note that | |

150 | ||

151 | no sort "_quicksort"; | |

152 | no sort "_mergesort"; | |

153 | ||

154 | have exactly the same effect, leaving the choice of sort algorithm open. | |

84d4ea48 | 155 | |

0e59b7c6 RGS |
156 | =head1 CAVEATS |

157 | ||

7b9ef140 RH |
158 | As of Perl 5.10, this pragma is lexically scoped and takes effect |

159 | at compile time. In earlier versions its effect was global and took | |

160 | effect at run-time; the documentation suggested using C<eval()> to | |

161 | change the behaviour: | |

7a8ff2dd | 162 | |

7b9ef140 RH |
163 | { eval 'use sort qw(defaults _quicksort)'; # force quicksort |

164 | eval 'no sort "stable"'; # stability not wanted | |

7a8ff2dd JL |
165 | print sort::current . "\n"; |

166 | @a = sort @b; | |

7b9ef140 | 167 | eval 'use sort "defaults"'; # clean up, for others |

7a8ff2dd | 168 | } |

7b9ef140 | 169 | { eval 'use sort qw(defaults stable)'; # force stability |

7a8ff2dd JL |
170 | print sort::current . "\n"; |

171 | @c = sort @d; | |

7b9ef140 | 172 | eval 'use sort "defaults"'; # clean up, for others |

7a8ff2dd | 173 | } |

7a8ff2dd | 174 | |

7b9ef140 RH |
175 | Such code no longer has the desired effect, for two reasons. |

176 | Firstly, the use of C<eval()> means that the sorting algorithm | |

177 | is not changed until runtime, by which time it's too late to | |

178 | have any effect. Secondly, C<sort::current> is also called at | |

179 | run-time, when in fact the compile-time value of C<sort::current> | |

180 | is the one that matters. | |

7a8ff2dd | 181 | |

7b9ef140 | 182 | So now this code would be written: |

7a8ff2dd | 183 | |

7b9ef140 RH |
184 | { use sort qw(defaults _quicksort); # force quicksort |

185 | no sort "stable"; # stability not wanted | |

186 | my $current; | |

187 | BEGIN { $current = print sort::current; } | |

188 | print "$current\n"; | |

7a8ff2dd | 189 | @a = sort @b; |

7b9ef140 | 190 | # Pragmas go out of scope at the end of the block |

7a8ff2dd | 191 | } |

7b9ef140 RH |
192 | { use sort qw(defaults stable); # force stability |

193 | my $current; | |

194 | BEGIN { $current = print sort::current; } | |

195 | print "$current\n"; | |

7a8ff2dd | 196 | @c = sort @d; |

7a8ff2dd | 197 | } |

0e59b7c6 | 198 | |

84d4ea48 JH |
199 | =cut |

200 |