Commit | Line | Data |
---|---|---|
ff0cee69 | 1 | package ExtUtils::XSSymSet; |
2 | ||
ff0cee69 | 3 | use strict; |
84efe3df | 4 | use Config; |
ff0cee69 | 5 | use vars qw( $VERSION ); |
84efe3df | 6 | $VERSION = '1.2'; |
ff0cee69 | 7 | |
8 | ||
9 | sub new { | |
10 | my($pkg,$maxlen,$silent) = @_; | |
11 | $maxlen ||= 31; | |
84efe3df CB |
12 | # Allow absurdly long symbols here if we've told the compiler to |
13 | # do the shortening for us. | |
14 | $maxlen = 2048 if $Config{'useshortenedsymbols'}; | |
ff0cee69 | 15 | $silent ||= 0; |
16 | my($obj) = { '__M@xLen' => $maxlen, '__S!lent' => $silent }; | |
17 | bless $obj, $pkg; | |
18 | } | |
19 | ||
20 | ||
21 | sub trimsym { | |
22 | my($self,$name,$maxlen,$silent) = @_; | |
23 | ||
24 | unless (defined $maxlen) { | |
25 | if (ref $self) { $maxlen ||= $self->{'__M@xLen'}; } | |
26 | $maxlen ||= 31; | |
27 | } | |
84efe3df CB |
28 | $maxlen = 2048 if $Config{'useshortenedsymbols'}; |
29 | ||
ff0cee69 | 30 | unless (defined $silent) { |
31 | if (ref $self) { $silent ||= $self->{'__S!lent'}; } | |
32 | $silent ||= 0; | |
33 | } | |
34 | return $name if (length $name <= $maxlen); | |
35 | ||
36 | my $trimmed = $name; | |
37 | # First, just try to remove duplicated delimiters | |
38 | $trimmed =~ s/__/_/g; | |
39 | if (length $trimmed > $maxlen) { | |
40 | # Next, all duplicated chars | |
41 | $trimmed =~ s/(.)\1+/$1/g; | |
42 | if (length $trimmed > $maxlen) { | |
43 | my $squeezed = $trimmed; | |
44 | my($xs,$prefix,$func) = $trimmed =~ /^(XS_)?(.*)_([^_]*)$/; | |
b4ff380f CB |
45 | $xs ||= ''; |
46 | my $frac = 3; # replaces broken length-based calculations but w/same result | |
47 | my $pat = '([^_])'; | |
ff0cee69 | 48 | if (length $func <= 12) { # Try to preserve short function names |
ff0cee69 | 49 | if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; } |
50 | $prefix =~ s/$pat/$1/g; | |
51 | $squeezed = "$xs$prefix" . "_$func"; | |
52 | if (length $squeezed > $maxlen) { | |
53 | $pat =~ s/A-Z//; | |
54 | $prefix =~ s/$pat/$1/g; | |
55 | $squeezed = "$xs$prefix" . "_$func"; | |
56 | } | |
57 | } | |
58 | else { | |
ff0cee69 | 59 | if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; } |
60 | $squeezed = "$prefix$func"; | |
61 | $squeezed =~ s/$pat/$1/g; | |
62 | if (length "$xs$squeezed" > $maxlen) { | |
63 | $pat =~ s/A-Z//; | |
64 | $squeezed =~ s/$pat/$1/g; | |
65 | } | |
66 | $squeezed = "$xs$squeezed"; | |
67 | } | |
68 | if (length $squeezed <= $maxlen) { $trimmed = $squeezed; } | |
69 | else { | |
70 | my $frac = int((length $trimmed - $maxlen) / length $trimmed + 0.5); | |
71 | my $pat = '(.).{$frac}'; | |
72 | $trimmed =~ s/$pat/$1/g; | |
73 | } | |
74 | } | |
75 | } | |
622db3b8 | 76 | warn "Warning: long symbol $name\n\ttrimmed to $trimmed\n\t" unless $silent; |
ff0cee69 | 77 | return $trimmed; |
78 | } | |
79 | ||
80 | ||
81 | sub addsym { | |
82 | my($self,$sym,$maxlen,$silent) = @_; | |
83 | my $trimmed = $self->get_trimmed($sym); | |
84 | ||
85 | return $trimmed if defined $trimmed; | |
86 | ||
87 | $maxlen ||= $self->{'__M@xLen'} || 31; | |
88 | $silent ||= $self->{'__S!lent'} || 0; | |
89 | $trimmed = $self->trimsym($sym,$maxlen,1); | |
90 | if (exists $self->{$trimmed}) { | |
91 | my($i) = "00"; | |
92 | $trimmed = $self->trimsym($sym,$maxlen-3,$silent); | |
93 | while (exists $self->{"${trimmed}_$i"}) { $i++; } | |
622db3b8 | 94 | warn "Warning: duplicate symbol $trimmed\n\tchanged to ${trimmed}_$i\n\t(original was $sym)\n\t" |
ff0cee69 | 95 | unless $silent; |
96 | $trimmed .= "_$i"; | |
97 | } | |
98 | elsif (not $silent and $trimmed ne $sym) { | |
622db3b8 | 99 | warn "Warning: long symbol $sym\n\ttrimmed to $trimmed\n\t"; |
ff0cee69 | 100 | } |
101 | $self->{$trimmed} = $sym; | |
102 | $self->{'__N+Map'}->{$sym} = $trimmed; | |
103 | $trimmed; | |
104 | } | |
105 | ||
106 | ||
107 | sub delsym { | |
108 | my($self,$sym) = @_; | |
109 | my $trimmed = $self->{'__N+Map'}->{$sym}; | |
110 | if (defined $trimmed) { | |
111 | delete $self->{'__N+Map'}->{$sym}; | |
112 | delete $self->{$trimmed}; | |
113 | } | |
114 | $trimmed; | |
115 | } | |
116 | ||
117 | ||
118 | sub get_trimmed { | |
119 | my($self,$sym) = @_; | |
120 | $self->{'__N+Map'}->{$sym}; | |
121 | } | |
122 | ||
123 | ||
124 | sub get_orig { | |
125 | my($self,$trimmed) = @_; | |
126 | $self->{$trimmed}; | |
127 | } | |
128 | ||
129 | ||
130 | sub all_orig { (keys %{$_[0]->{'__N+Map'}}); } | |
131 | sub all_trimmed { (grep { /^\w+$/ } keys %{$_[0]}); } | |
132 | ||
133 | __END__ | |
134 | ||
135 | =head1 NAME | |
136 | ||
57f7515d | 137 | ExtUtils::XSSymSet - keep sets of symbol names palatable to the VMS linker |
ff0cee69 | 138 | |
139 | =head1 SYNOPSIS | |
140 | ||
57f7515d | 141 | use ExtUtils::XSSymSet; |
ff0cee69 | 142 | |
57f7515d | 143 | $set = new ExtUtils::XSSymSet; |
ff0cee69 | 144 | while ($sym = make_symbol()) { $set->addsym($sym); } |
145 | foreach $safesym ($set->all_trimmed) { | |
146 | print "Processing $safesym (derived from ",$self->get_orig($safesym),")\n"; | |
147 | do_stuff($safesym); | |
148 | } | |
149 | ||
57f7515d | 150 | $safesym = ExtUtils::XSSymSet->trimsym($onesym); |
ff0cee69 | 151 | |
152 | =head1 DESCRIPTION | |
153 | ||
154 | Since the VMS linker distinguishes symbols based only on the first 31 | |
155 | characters of their names, it is occasionally necessary to shorten | |
156 | symbol names in order to avoid collisions. (This is especially true of | |
157 | names generated by xsubpp, since prefixes generated by nested package | |
57f7515d | 158 | names can become quite long.) C<ExtUtils::XSSymSet> provides functions to |
ff0cee69 | 159 | shorten names in a consistent fashion, and to track a set of names to |
160 | insure that each is unique. While designed with F<xsubpp> in mind, it | |
161 | may be used with any set of strings. | |
162 | ||
163 | This package supplies the following functions, all of which should be | |
164 | called as methods. | |
165 | ||
166 | =over 4 | |
167 | ||
168 | =item new([$maxlen[,$silent]]) | |
169 | ||
57f7515d | 170 | Creates an empty C<ExtUtils::XSSymset> set of symbols. This function may be |
ff0cee69 | 171 | called as a static method or via an existing object. If C<$maxlen> or |
172 | C<$silent> are specified, they are used as the defaults for maximum | |
173 | name length and warning behavior in future calls to addsym() or | |
84efe3df CB |
174 | trimsym() via this object. If the compiler has been instructed to do its |
175 | own symbol shortening via C<$Config{'useshortenedsymbols'}>, a value of | |
176 | 2048 is assumed for C<$maxlen> as a way of bypassing the shortening done by | |
177 | this module. | |
ff0cee69 | 178 | |
179 | =item addsym($name[,$maxlen[,$silent]]) | |
180 | ||
181 | Creates a symbol name from C<$name>, using the methods described | |
182 | under trimsym(), which is unique in this set of symbols, and returns | |
183 | the new name. C<$name> and its resultant are added to the set, and | |
184 | any future calls to addsym() specifying the same C<$name> will return | |
185 | the same result, regardless of the value of C<$maxlen> specified. | |
186 | Unless C<$silent> is true, warnings are output if C<$name> had to be | |
187 | trimmed or changed in order to avoid collision with an existing symbol | |
188 | name. C<$maxlen> and C<$silent> default to the values specified when | |
189 | this set of symbols was created. This method must be called via an | |
190 | existing object. | |
191 | ||
192 | =item trimsym($name[,$maxlen[,$silent]]) | |
193 | ||
194 | Creates a symbol name C<$maxlen> or fewer characters long from | |
195 | C<$name> and returns it. If C<$name> is too long, it first tries to | |
196 | shorten it by removing duplicate characters, then by periodically | |
197 | removing non-underscore characters, and finally, if necessary, by | |
198 | periodically removing characters of any type. C<$maxlen> defaults | |
199 | to 31. Unless C<$silent> is true, a warning is output if C<$name> | |
200 | is altered in any way. This function may be called either as a | |
201 | static method or via an existing object, but in the latter case no | |
202 | check is made to insure that the resulting name is unique in the | |
84efe3df CB |
203 | set of symbols. If the compiler has been instructed to do its |
204 | own symbol shortening via C<$Config{'useshortenedsymbols'}>, a value | |
205 | of 2048 is assumed for C<$maxlen> as a way of bypassing the shortening | |
206 | done by this module. | |
ff0cee69 | 207 | |
208 | =item delsym($name) | |
209 | ||
210 | Removes C<$name> from the set of symbols, where C<$name> is the | |
211 | original symbol name passed previously to addsym(). If C<$name> | |
212 | existed in the set of symbols, returns its "trimmed" equivalent, | |
213 | otherwise returns C<undef>. This method must be called via an | |
214 | existing object. | |
215 | ||
216 | =item get_orig($trimmed) | |
217 | ||
218 | Returns the original name which was trimmed to C<$trimmed> by a | |
219 | previous call to addsym(), or C<undef> if C<$trimmed> does not | |
220 | correspond to a member of this set of symbols. This method must be | |
221 | called via an existing object. | |
222 | ||
223 | =item get_trimmed($name) | |
224 | ||
225 | Returns the trimmed name which was generated from C<$name> by a | |
226 | previous call to addsym(), or C<undef> if C<$name> is not a member | |
227 | of this set of symbols. This method must be called via an | |
228 | existing object. | |
229 | ||
230 | =item all_orig() | |
231 | ||
232 | Returns a list containing all of the original symbol names | |
233 | from this set. | |
234 | ||
235 | =item all_trimmed() | |
236 | ||
237 | Returns a list containing all of the trimmed symbol names | |
238 | from this set. | |
239 | ||
240 | =back | |
241 | ||
242 | =head1 AUTHOR | |
243 | ||
bd3fa61c | 244 | Charles Bailey E<lt>I<bailey@newman.upenn.edu>E<gt> |
ff0cee69 | 245 | |
246 | =head1 REVISION | |
247 | ||
84efe3df | 248 | Last revised 8-Oct-2010, for Perl 5.13.6. |
ff0cee69 | 249 |