Commit | Line | Data |
---|---|---|
d92f47ae JH |
1 | #!/usr/bin/perl -w |
2 | ||
831ed035 JH |
3 | # Try opening libperl.a with nm, and verifying it has the kind of |
4 | # symbols we expect, and no symbols we should avoid. | |
5 | # | |
6 | # Fail softly, expect things only on known platforms. | |
d92f47ae JH |
7 | # |
8 | # Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or | |
9 | # -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what | |
10 | # they were meant to do, hide the global variables (see perlguts for | |
11 | # the details). | |
caea674c JH |
12 | # |
13 | # Debugging tip: nm output (this script's input) can be faked by | |
14 | # giving one command line argument for this script: it should be | |
15 | # either the filename to read, or "-" for STDIN. | |
831ed035 JH |
16 | # |
17 | # Some terminology: | |
18 | # - "text" symbols are code | |
19 | # - "data" symbols are data (duh), with subdivisions: | |
20 | # - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), | |
21 | # uninitialized data, which often even doesn't exist in the object | |
22 | # file as such, only its size does, which is then created on demand | |
23 | # by the loader | |
24 | # - "const": initialized read-only data, like string literals | |
25 | # - "common": uninitialized data unless initialized... | |
26 | # (the full story is too long for here, see "man nm") | |
27 | # - "data": initialized read-write data | |
28 | # (somewhat confusingly below: "data data", but it makes code simpler) | |
29 | # - "undefined": external symbol referred to by an object, | |
30 | # most likely a text symbol. Can be either a symbol defined by | |
31 | # a Perl object file but referred to by other Perl object files, | |
32 | # or a completely external symbol from libc, or other system libraries. | |
d92f47ae JH |
33 | |
34 | BEGIN { | |
35 | chdir 't' if -d 't'; | |
36 | @INC = '../lib'; | |
37 | require "./test.pl"; | |
38 | } | |
39 | ||
40 | use strict; | |
41 | ||
42 | use Config; | |
43 | ||
44 | if ($Config{cc} =~ /g\+\+/) { | |
45 | # XXX Could use c++filt, maybe. | |
46 | skip_all "on g++"; | |
47 | } | |
48 | ||
49 | my $libperl_a; | |
50 | ||
51 | for my $f (qw(../libperl.a libperl.a)) { | |
52 | if (-f $f) { | |
53 | $libperl_a = $f; | |
54 | last; | |
55 | } | |
56 | } | |
57 | ||
58 | unless (defined $libperl_a) { | |
59 | skip_all "no libperl.a"; | |
60 | } | |
61 | ||
cb498e56 JH |
62 | print "# \$^O = $^O\n"; |
63 | print "# \$Config{cc} = $Config{cc}\n"; | |
d92f47ae JH |
64 | print "# libperl = $libperl_a\n"; |
65 | ||
66 | my $nm; | |
67 | my $nm_opt = ''; | |
68 | my $nm_style; | |
69 | ||
70 | if ($^O eq 'linux') { | |
71 | $nm = '/usr/bin/nm'; | |
72 | $nm_style = 'gnu'; | |
73 | } elsif ($^O eq 'darwin') { | |
74 | $nm = '/usr/bin/nm'; | |
75 | $nm_style = 'darwin'; | |
76 | # With the -m option we get better information than the BSD-like | |
77 | # default: with the default, a lot of symbols get dumped into 'S' | |
78 | # or 's', for example one cannot tell the difference between const | |
79 | # and non-const symbols. | |
80 | $nm_opt = '-m'; | |
81 | } | |
82 | ||
83 | unless (defined $nm) { | |
84 | skip_all "no nm"; | |
85 | } | |
86 | ||
87 | unless (defined $nm_style) { | |
88 | skip_all "no nm style"; | |
89 | } | |
90 | ||
91 | print "# nm = $nm\n"; | |
92 | print "# nm_style = $nm_style\n"; | |
93 | print "# nm_opt = $nm_opt\n"; | |
94 | ||
95 | unless (-x $nm) { | |
96 | skip_all "no executable nm $nm"; | |
97 | } | |
98 | ||
99 | if ($nm_style eq 'gnu') { | |
100 | open(my $nm_fh, "$nm --version|") or | |
101 | skip_all "nm failed: $!"; | |
102 | my $gnu_verified; | |
103 | while (<$nm_fh>) { | |
104 | if (/^GNU nm/) { | |
105 | $gnu_verified = 1; | |
106 | last; | |
107 | } | |
108 | } | |
109 | unless ($gnu_verified) { | |
f7bdd0be | 110 | skip_all "no GNU nm"; |
d92f47ae JH |
111 | } |
112 | } | |
113 | ||
b52baaa9 JH |
114 | my $nm_err_tmp = "libperl$$"; |
115 | ||
116 | END { | |
f7bdd0be TC |
117 | # this is still executed when we skip_all above, avoid a warning |
118 | unlink $nm_err_tmp if $nm_err_tmp; | |
b52baaa9 JH |
119 | } |
120 | ||
caea674c | 121 | my $nm_fh; |
d92f47ae | 122 | |
caea674c JH |
123 | if (@ARGV == 1) { |
124 | my $fake = shift @ARGV; | |
125 | print "# Faking nm output from $fake\n"; | |
126 | if ($fake eq '-') { | |
127 | open($nm_fh, "<&STDIN") or | |
128 | skip_all "Duping STDIN failed: $!"; | |
129 | } else { | |
130 | open($nm_fh, "<", $fake) or | |
131 | skip_all "Opening '$fake' failed: $!"; | |
132 | } | |
133 | undef $nm_err_tmp; # In this case there will be no nm errors. | |
134 | } else { | |
135 | open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or | |
136 | skip_all "$nm $nm_opt $libperl_a failed: $!"; | |
137 | } | |
831ed035 JH |
138 | |
139 | sub is_perlish_symbol { | |
140 | $_[0] =~ /^(?:PL_|Perl|PerlIO)/; | |
141 | } | |
142 | ||
d92f47ae JH |
143 | sub nm_parse_gnu { |
144 | my $symbols = shift; | |
7c616f70 | 145 | my $line = $_; |
d92f47ae | 146 | if (m{^(\w+\.o):$}) { |
68472ab1 | 147 | # object file name |
d92f47ae JH |
148 | $symbols->{obj}{$1}++; |
149 | $symbols->{o} = $1; | |
7c616f70 | 150 | return; |
d92f47ae | 151 | } else { |
7c616f70 JH |
152 | die "$0: undefined current object: $line" |
153 | unless defined $symbols->{o}; | |
68472ab1 | 154 | # 64-bit systems have 16 hexdigits, 32-bit systems have 8. |
7c616f70 JH |
155 | if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { |
156 | if (/^[Rr] (\w+)$/) { | |
68472ab1 | 157 | # R: read only (const) |
7c616f70 JH |
158 | $symbols->{data}{const}{$1}{$symbols->{o}}++; |
159 | } elsif (/^r .+$/) { | |
68472ab1 | 160 | # Skip local const (read only). |
7c616f70 JH |
161 | } elsif (/^[Tti] (\w+)(\..+)?$/) { |
162 | $symbols->{text}{$1}{$symbols->{o}}++; | |
163 | } elsif (/^C (\w+)$/) { | |
164 | $symbols->{data}{common}{$1}{$symbols->{o}}++; | |
165 | } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { | |
68472ab1 JH |
166 | # Bb: uninitialized data (bss) |
167 | # Ss: uninitialized data "for small objects" | |
7c616f70 JH |
168 | $symbols->{data}{bss}{$1}{$symbols->{o}}++; |
169 | } elsif (/^0{16} D _LIB_VERSION$/) { | |
68472ab1 | 170 | # Skip the _LIB_VERSION (not ours, probably libm) |
7c616f70 | 171 | } elsif (/^[DdGg] (\w+)$/) { |
68472ab1 JH |
172 | # Dd: initialized data |
173 | # Gg: initialized "for small objects" | |
7c616f70 JH |
174 | $symbols->{data}{data}{$1}{$symbols->{o}}++; |
175 | } elsif (/^. \.?(\w+)$/) { | |
176 | # Skip the unknown types. | |
177 | print "# Unknown type: $line ($symbols->{o})\n"; | |
178 | } | |
179 | return; | |
831ed035 JH |
180 | } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { |
181 | my ($symbol) = $1; | |
182 | return if is_perlish_symbol($symbol); | |
183 | $symbols->{undef}{$symbol}{$symbols->{o}}++; | |
7c616f70 | 184 | return; |
d92f47ae JH |
185 | } |
186 | } | |
7c616f70 | 187 | print "# Unexpected nm output '$line' ($symbols->{o})\n"; |
d92f47ae JH |
188 | } |
189 | ||
190 | sub nm_parse_darwin { | |
191 | my $symbols = shift; | |
7c616f70 | 192 | my $line = $_; |
68472ab1 JH |
193 | if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { |
194 | # object file name | |
d92f47ae JH |
195 | $symbols->{obj}{$1}++; |
196 | $symbols->{o} = $1; | |
7c616f70 | 197 | return; |
d92f47ae | 198 | } else { |
7c616f70 | 199 | die "$0: undefined current object: $line" unless defined $symbols->{o}; |
68472ab1 | 200 | # 64-bit systems have 16 hexdigits, 32-bit systems have 8. |
7c616f70 | 201 | if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { |
68472ab1 JH |
202 | # String literals can live in different sections |
203 | # depending on the compiler and os release, assumedly | |
204 | # also linker flags. | |
6d1ca00b | 205 | if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { |
7c616f70 JH |
206 | my ($symbol, $suffix) = ($1, $2); |
207 | # Ignore function-local constants like | |
208 | # _Perl_av_extend_guts.oom_array_extend | |
209 | return if defined $suffix && /__TEXT,__const/; | |
6d1ca00b JH |
210 | # Ignore the cstring unnamed strings. |
211 | return if $symbol =~ /^L\.str\d+$/; | |
7c616f70 JH |
212 | $symbols->{data}{const}{$symbol}{$symbols->{o}}++; |
213 | } elsif (/^\(__TEXT,__text\) (?:non-)?external _(\w+)$/) { | |
214 | $symbols->{text}{$1}{$symbols->{o}}++; | |
215 | } elsif (/^\(__DATA,__(const|data|bss|common)\) (?:non-)?external _(\w+)(\.\w+)?$/) { | |
216 | my ($dtype, $symbol, $suffix) = ($1, $2, $3); | |
217 | # Ignore function-local constants like | |
218 | # _Perl_pp_gmtime.dayname | |
219 | return if defined $suffix; | |
220 | $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; | |
221 | } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { | |
222 | # Skip this, whatever it is (some inlined leakage from | |
223 | # darwin libc?) | |
6d1ca00b | 224 | } elsif (/^\(__TEXT,__eh_frame/) { |
68472ab1 | 225 | # Skip the eh_frame (exception handling) symbols. |
6d1ca00b | 226 | return; |
7c616f70 JH |
227 | } elsif (/^\(__\w+,__\w+\) /) { |
228 | # Skip the unknown types. | |
229 | print "# Unknown type: $line ($symbols->{o})\n"; | |
230 | } | |
231 | return; | |
831ed035 | 232 | } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { |
68472ab1 JH |
233 | # darwin/ppc marks most undefined text symbols |
234 | # as "[lazy bound]". | |
831ed035 JH |
235 | my ($symbol) = $1; |
236 | return if is_perlish_symbol($symbol); | |
237 | $symbols->{undef}{$symbol}{$symbols->{o}}++; | |
7c616f70 | 238 | return; |
d92f47ae JH |
239 | } |
240 | } | |
7c616f70 | 241 | print "# Unexpected nm output '$line' ($symbols->{o})\n"; |
d92f47ae JH |
242 | } |
243 | ||
244 | my $nm_parse; | |
245 | ||
246 | if ($nm_style eq 'gnu') { | |
247 | $nm_parse = \&nm_parse_gnu; | |
248 | } elsif ($nm_style eq 'darwin') { | |
249 | $nm_parse = \&nm_parse_darwin; | |
250 | } | |
251 | ||
252 | unless (defined $nm_parse) { | |
cb498e56 | 253 | skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; |
d92f47ae JH |
254 | } |
255 | ||
256 | my %symbols; | |
257 | ||
258 | while (<$nm_fh>) { | |
259 | next if /^$/; | |
260 | chomp; | |
261 | $nm_parse->(\%symbols); | |
262 | } | |
263 | ||
264 | # use Data::Dumper; print Dumper(\%symbols); | |
265 | ||
266 | if (keys %symbols == 0) { | |
267 | skip_all "no symbols\n"; | |
268 | } | |
269 | ||
270 | # These should always be true for everyone. | |
271 | ||
272 | ok($symbols{obj}{'pp.o'}, "has object pp.o"); | |
273 | ok($symbols{text}{'Perl_peep'}, "has text Perl_peep"); | |
274 | ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o"); | |
275 | ok(exists $symbols{data}{const}, "has data const symbols"); | |
276 | ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); | |
277 | ||
278 | my $DEBUGGING = $Config{ccflags} =~ /-DDEBUGGING/ ? 1 : 0; | |
279 | ||
280 | my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0; | |
281 | my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0; | |
282 | ||
283 | print "# GS = $GS\n"; | |
284 | print "# GSP = $GSP\n"; | |
285 | ||
286 | my %data_symbols; | |
287 | ||
288 | for my $dtype (sort keys %{$symbols{data}}) { | |
289 | for my $symbol (sort keys %{$symbols{data}{$dtype}}) { | |
290 | $data_symbols{$symbol}++; | |
291 | } | |
292 | } | |
293 | ||
d92f47ae | 294 | # The following tests differ between vanilla vs $GSP or $GS. |
d92f47ae JH |
295 | |
296 | if ($GSP) { | |
297 | print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n"; | |
298 | ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); | |
299 | ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); | |
300 | ||
301 | ok(! exists $symbols{data}{bss}, "has no data bss symbols"); | |
26443f8e DM |
302 | ok(! exists $symbols{data}{data} || |
303 | # clang with ASAN seems to add this symbol to every object file: | |
304 | !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}), | |
305 | "has no data data symbols"); | |
d92f47ae JH |
306 | ok(! exists $symbols{data}{common}, "has no data common symbols"); |
307 | ||
308 | # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have | |
309 | # the extra text symbol for accessing the vars | |
310 | # (as opposed to "just" -DPERL_GLOBAL_STRUCT) | |
311 | ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); | |
312 | } elsif ($GS) { | |
313 | print "# -DPERL_GLOBAL_STRUCT\n"; | |
314 | ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); | |
315 | ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); | |
316 | ||
317 | ok(! exists $symbols{data}{bss}, "has no data bss symbols"); | |
318 | ||
319 | # These PerlIO data symbols are left visible with | |
320 | # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE) | |
321 | my @PerlIO = | |
322 | qw( | |
323 | PerlIO_byte | |
324 | PerlIO_crlf | |
325 | PerlIO_pending | |
326 | PerlIO_perlio | |
327 | PerlIO_raw | |
328 | PerlIO_remove | |
329 | PerlIO_stdio | |
330 | PerlIO_unix | |
331 | PerlIO_utf8 | |
332 | ); | |
333 | ||
334 | # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but | |
335 | # otherwise not const -- because of SWIG which wants to modify | |
336 | # the table. Evil SWIG, eeevil. | |
337 | ||
338 | # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which | |
339 | # -DPERL_GLOBAL_STRUCT has turned on. | |
99a0b65c JH |
340 | eq_array([sort keys %{$symbols{data}{data}}], |
341 | [sort('PL_VarsPtr', | |
342 | @PerlIO, | |
343 | 'PL_magic_vtables', | |
344 | 'my_cxt_index')], | |
345 | "data data symbols"); | |
d92f47ae JH |
346 | |
347 | # Only one data common symbol, our "supervariable". | |
99a0b65c JH |
348 | eq_array([sort keys %{$symbols{data}{common}}], |
349 | ['PL_Vars'], | |
350 | "data common symbols"); | |
d92f47ae JH |
351 | |
352 | ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr"); | |
353 | ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars"); | |
354 | ||
355 | # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars. | |
356 | ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars"); | |
357 | } else { | |
358 | print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n"; | |
359 | ||
a9abea17 | 360 | if ( !$symbols{data}{common} ) { |
831ed035 | 361 | # This is likely because Perl was compiled with |
a9abea17 BF |
362 | # -Accflags="-fno-common" |
363 | $symbols{data}{common} = $symbols{data}{bss}; | |
364 | } | |
831ed035 | 365 | |
d92f47ae JH |
366 | ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed"); |
367 | ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); | |
368 | ||
369 | # None of the GLOBAL_STRUCT* business here. | |
370 | ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr"); | |
371 | ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars"); | |
372 | ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); | |
373 | } | |
374 | ||
831ed035 JH |
375 | ok(keys %{$symbols{undef}}, "has undefined symbols"); |
376 | ||
377 | my @good = qw(memchr memcmp memcpy chmod socket getenv sigaction sqrt time); | |
378 | if ($Config{usedl}) { | |
379 | push @good, 'dlopen'; | |
380 | } | |
381 | for my $good (@good) { | |
382 | my @o = exists $symbols{undef}{$good} ? | |
383 | sort keys %{ $symbols{undef}{$good} } : (); | |
384 | ok(@o, "uses $good (@o)"); | |
385 | } | |
386 | ||
387 | my @bad = qw(gets strcpy strcat strncpy strncat sprintf vsprintf); | |
388 | # XXX: add atoi() to @bad | |
389 | for my $bad (@bad) { | |
390 | my @o = exists $symbols{undef}{$bad} ? | |
391 | sort keys %{ $symbols{undef}{$bad} } : (); | |
392 | is(@o, 0, "uses no $bad (@o)"); | |
393 | } | |
394 | ||
caea674c JH |
395 | if (defined $nm_err_tmp) { |
396 | if (open(my $nm_err_fh, $nm_err_tmp)) { | |
397 | my $error; | |
398 | while (<$nm_err_fh>) { | |
399 | # OS X has weird error where nm warns about | |
400 | # "no name list" but then outputs fine. | |
401 | if (/nm: no name list/ && $^O eq 'darwin') { | |
402 | print "# $^O ignoring $nm output: $_"; | |
403 | next; | |
404 | } | |
405 | warn "$0: Unexpected $nm error: $_"; | |
406 | $error++; | |
b52baaa9 | 407 | } |
caea674c JH |
408 | die "$0: Unexpected $nm errors\n" if $error; |
409 | } else { | |
410 | warn "Failed to open '$nm_err_tmp': $!\n"; | |
b52baaa9 | 411 | } |
b52baaa9 JH |
412 | } |
413 | ||
d92f47ae | 414 | done_testing(); |