3 # Try opening libperl.a with nm, and verifying it has the kind of
4 # symbols we expect, and no symbols we should avoid.
6 # Fail softly, expect things only on known platforms:
7 # - linux, x86 only (ppc linux has odd symbol tables)
8 # - darwin (OS X), both x86 and ppc
10 # and on other platforms, and if things seem odd, just give up (skip_all).
12 # Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or
13 # -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what
14 # they were meant to do, hide the global variables (see perlguts for
17 # Debugging tip: nm output (this script's input) can be faked by
18 # giving one command line argument for this script: it should be
19 # either the filename to read, or "-" for STDIN. You can also append
20 # "@style" (where style is a supported nm style, like "gnu" or "darwin")
21 # to this filename for "cross-parsing".
24 # - "text" symbols are code
25 # - "data" symbols are data (duh), with subdivisions:
26 # - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
27 # uninitialized data, which often even doesn't exist in the object
28 # file as such, only its size does, which is then created on demand
30 # - "const": initialized read-only data, like string literals
31 # - "common": uninitialized data unless initialized...
32 # (the full story is too long for here, see "man nm")
33 # - "data": initialized read-write data
34 # (somewhat confusingly below: "data data", but it makes code simpler)
35 # - "undefined": external symbol referred to by an object,
36 # most likely a text symbol. Can be either a symbol defined by
37 # a Perl object file but referred to by other Perl object files,
38 # or a completely external symbol from libc, or other system libraries.
50 if ($Config{cc} =~ /g\+\+/) {
51 # XXX Could use c++filt, maybe.
57 for my $f (qw(../libperl.a libperl.a)) {
64 unless (defined $libperl_a) {
65 skip_all "no libperl.a";
68 print "# \$^O = $^O\n";
69 print "# \$Config{archname} = $Config{archname}\n";
70 print "# \$Config{cc} = $Config{cc}\n";
71 print "# libperl = $libperl_a\n";
77 my $nm_err_tmp = "libperl$$";
80 # this is still executed when we skip_all above, avoid a warning
81 unlink $nm_err_tmp if $nm_err_tmp;
88 $fake_input = shift @ARGV;
89 print "# Faking nm output from $fake_input\n";
90 if ($fake_input =~ s/\@(.+)$//) {
92 print "# Faking nm style from $fake_style\n";
93 if ($fake_style eq 'gnu' ||
94 $fake_style eq 'linux' ||
95 $fake_style eq 'freebsd') {
97 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
100 die "$0: Unknown explicit nm style '$fake_style'\n";
105 unless (defined $nm_style) {
106 if ($^O eq 'linux') {
107 # The 'gnu' style could be equally well be called 'bsd' style,
108 # since the output format of the GNU binutils nm is really BSD.
110 } elsif ($^O eq 'freebsd') {
112 } elsif ($^O eq 'darwin') {
113 $nm_style = 'darwin';
117 if (defined $nm_style) {
118 if ($nm_style eq 'gnu') {
120 } elsif ($nm_style eq 'darwin') {
122 # With the -m option we get better information than the BSD-like
123 # default: with the default, a lot of symbols get dumped into 'S'
124 # or 's', for example one cannot tell the difference between const
125 # and non-const data symbols.
128 die "$0: Unexpected nm style '$nm_style'\n";
132 if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) {
133 # For example in ppc most (but not all!) code symbols are placed
134 # in 'D' (data), not in ' T '. We cannot work under such conditions.
135 skip_all "linux but archname $Config{archname} not x86*";
138 unless (defined $nm) {
142 unless (defined $nm_style) {
143 skip_all "no nm style";
146 print "# nm = $nm\n";
147 print "# nm_style = $nm_style\n";
148 print "# nm_opt = $nm_opt\n";
151 skip_all "no executable nm $nm";
154 if ($nm_style eq 'gnu' && !defined $fake_style) {
155 open(my $gnu_verify, "$nm --version|") or
156 skip_all "nm failed: $!";
158 while (<$gnu_verify>) {
164 unless ($gnu_verified) {
165 skip_all "no GNU nm";
169 if (defined $fake_input) {
170 if ($fake_input eq '-') {
171 open($nm_fh, "<&STDIN") or
172 skip_all "Duping STDIN failed: $!";
174 open($nm_fh, "<", $fake_input) or
175 skip_all "Opening '$fake_input' failed: $!";
177 undef $nm_err_tmp; # In this case there will be no nm errors.
179 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n};
180 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
181 skip_all "$nm $nm_opt $libperl_a failed: $!";
184 sub is_perlish_symbol {
185 $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
188 # XXX Implement "internal test" for this script (option -t?)
189 # to verify that the parsing does what it's intended to.
194 if (m{^(\w+\.o):$}) {
196 $symbols->{obj}{$1}++;
200 die "$0: undefined current object: $line"
201 unless defined $symbols->{o};
202 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
203 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
204 if (/^[Rr] (\w+)$/) {
205 # R: read only (const)
206 $symbols->{data}{const}{$1}{$symbols->{o}}++;
208 # Skip local const (read only).
209 } elsif (/^([Tti]) (\w+)(\..+)?$/) {
210 $symbols->{text}{$2}{$symbols->{o}}{$1}++;
211 } elsif (/^C (\w+)$/) {
212 $symbols->{data}{common}{$1}{$symbols->{o}}++;
213 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
214 # Bb: uninitialized data (bss)
215 # Ss: uninitialized data "for small objects"
216 $symbols->{data}{bss}{$1}{$symbols->{o}}++;
217 } elsif (/^D _LIB_VERSION$/) {
218 # Skip the _LIB_VERSION (not ours, probably libm)
219 } elsif (/^[DdGg] (\w+)$/) {
220 # Dd: initialized data
221 # Gg: initialized "for small objects"
222 $symbols->{data}{data}{$1}{$symbols->{o}}++;
223 } elsif (/^. \.?(\w+)$/) {
224 # Skip the unknown types.
225 print "# Unknown type: $line ($symbols->{o})\n";
228 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
230 return if is_perlish_symbol($symbol);
231 $symbols->{undef}{$symbol}{$symbols->{o}}++;
235 print "# Unexpected nm output '$line' ($symbols->{o})\n";
238 sub nm_parse_darwin {
241 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
243 $symbols->{obj}{$1}++;
247 die "$0: undefined current object: $line" unless defined $symbols->{o};
248 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
249 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
250 # String literals can live in different sections
251 # depending on the compiler and os release, assumedly
253 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
254 my ($symbol, $suffix) = ($1, $2);
255 # Ignore function-local constants like
256 # _Perl_av_extend_guts.oom_array_extend
257 return if defined $suffix && /__TEXT,__const/;
258 # Ignore the cstring unnamed strings.
259 return if $symbol =~ /^L\.str\d+$/;
260 $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
261 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) {
262 my ($exp, $sym) = ($1, $2);
263 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++;
264 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
265 my ($dtype, $symbol, $suffix) = ($1, $2, $3);
266 # Ignore function-local constants like
267 # _Perl_pp_gmtime.dayname
268 return if defined $suffix;
269 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
270 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
271 # Skip this, whatever it is (some inlined leakage from
273 } elsif (/^\(__TEXT,__eh_frame/) {
274 # Skip the eh_frame (exception handling) symbols.
276 } elsif (/^\(__\w+,__\w+\) /) {
277 # Skip the unknown types.
278 print "# Unknown type: $line ($symbols->{o})\n";
281 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
282 # darwin/ppc marks most undefined text symbols
284 my ($symbol) = $1 =~ s/\$UNIX2003\z//r;
285 return if is_perlish_symbol($symbol);
286 $symbols->{undef}{$symbol}{$symbols->{o}}++;
290 print "# Unexpected nm output '$line' ($symbols->{o})\n";
295 if ($nm_style eq 'gnu') {
296 $nm_parse = \&nm_parse_gnu;
297 } elsif ($nm_style eq 'darwin') {
298 $nm_parse = \&nm_parse_darwin;
301 unless (defined $nm_parse) {
302 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
310 $nm_parse->(\%symbols);
313 # use Data::Dumper; print Dumper(\%symbols);
315 # Something went awfully wrong. Wrong nm? Wrong options?
316 unless (keys %symbols) {
317 skip_all "no symbols\n";
319 unless (exists $symbols{text}) {
320 skip_all "no text symbols\n";
323 # These should always be true for everyone.
325 ok($symbols{obj}{'pp.o'}, "has object pp.o");
326 ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
327 ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
328 ok(exists $symbols{data}{const}, "has data const symbols");
329 ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
331 my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0;
332 my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0;
333 my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0;
335 print "# GS = $GS\n";
336 print "# GSP = $GSP\n";
337 print "# nocommon = $nocommon\n";
341 for my $dtype (sort keys %{$symbols{data}}) {
342 for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
343 $data_symbols{$symbol}++;
347 # The following tests differ between vanilla vs $GSP or $GS.
350 print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n";
351 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
352 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
354 ok(! exists $symbols{data}{bss}, "has no data bss symbols")
356 my $bad = "BSS entries (there are supposed to be none):\n";
357 $bad .= " bss sym: $_\n" for sort keys %{$symbols{data}{bss}};
361 ok(! exists $symbols{data}{data} ||
362 # clang with ASAN seems to add this symbol to every object file:
363 !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}),
364 "has no data data symbols")
366 my $bad = "DATA entries (there are supposed to be none):\n";
367 $bad .= " data sym: $_\n" for sort keys %{$symbols{data}{data}};
371 ok(! exists $symbols{data}{common}, "has no data common symbols")
373 my $bad = "COMMON entries (there are supposed to be none):\n";
374 $bad .= " common sym: $_\n" for sort keys %{$symbols{data}{common}};
378 # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have
379 # the extra text symbol for accessing the vars
380 # (as opposed to "just" -DPERL_GLOBAL_STRUCT)
381 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
383 print "# -DPERL_GLOBAL_STRUCT\n";
384 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
385 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
388 $symbols{data}{common} = $symbols{data}{bss};
389 delete $symbols{data}{bss};
392 ok(! exists $symbols{data}{bss}, "has no data bss symbols")
394 my $bad = "BSS entries (there are supposed to be none):\n";
395 $bad .= " bss sym: $_\n" for sort keys %{$symbols{data}{bss}};
400 # These PerlIO data symbols are left visible with
401 # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE)
415 # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but
416 # otherwise not const -- because of SWIG which wants to modify
417 # the table. Evil SWIG, eeevil.
419 # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which
420 # -DPERL_GLOBAL_STRUCT has turned on.
421 eq_array([sort keys %{$symbols{data}{data}}],
426 "data data symbols");
428 # Only one data common symbol, our "supervariable".
429 eq_array([sort keys %{$symbols{data}{common}}],
431 "data common symbols");
433 ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr");
434 ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars");
436 # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars.
437 ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars");
439 print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n";
441 if ( !$symbols{data}{common} ) {
442 # This is likely because Perl was compiled with
443 # -Accflags="-fno-common"
444 $symbols{data}{common} = $symbols{data}{bss};
447 ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed");
448 ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
450 # None of the GLOBAL_STRUCT* business here.
451 ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr");
452 ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars");
453 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
456 # See the comments in the beginning for what "undefined symbols"
457 # really means. We *should* have many of those, that is a good thing.
458 ok(keys %{$symbols{undef}}, "has undefined symbols");
460 # There are certain symbols we expect to see.
462 # chmod, socket, getenv, sigaction, exp, time are system/library
463 # calls that should each see at least one use. exp can be expl
466 chmod => undef, # There is no Configure symbol for chmod.
467 socket => 'd_socket',
468 getenv => undef, # There is no Configure symbol for getenv,
469 sigaction => 'd_sigaction',
473 if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) {
474 $expected{expl} = undef; # There is no Configure symbol for expl.
475 } elsif ($Config{usequadmath}) {
476 $expected{expq} = undef; # There is no Configure symbol for expq.
478 $expected{exp} = undef; # There is no Configure symbol for exp.
481 # DynaLoader will use dlopen, unless we are building static,
482 # and it is used in the platforms we are supporting in this test.
483 if ($Config{usedl} ) {
484 $expected{dlopen} = 'd_dlopen';
487 for my $symbol (sort keys %expected) {
488 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
494 my @o = exists $symbols{undef}{$symbol} ?
495 sort keys %{ $symbols{undef}{$symbol} } : ();
496 ok(@o, "uses $symbol (@o)");
499 # There are certain symbols we expect NOT to see.
501 # gets is horribly unsafe.
503 # fgets should not be used (Perl has its own API, sv_gets),
504 # even without perlio.
508 # strcat, strcpy, strncat, strncpy are unsafe.
510 # sprintf and vsprintf should not be used because
511 # Perl has its own safer and more portable implementations.
512 # (One exception: for certain floating point outputs
513 # the native sprintf is still used in some platforms, see below.)
515 # atoi has unsafe and undefined failure modes, and is affected by locale.
516 # Its cousins include atol and atoll.
518 # strtol and strtoul are affected by locale.
519 # Cousins include strtoq.
521 # system should not be used, use pp_system or my_popen.
526 for my $str (qw(system)) {
527 $unexpected{$str} = "d_$str";
530 for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
531 $unexpected{$stdio} = undef; # No Configure symbol for these.
533 for my $str (qw(strcat strcpy strncat strncpy)) {
534 $unexpected{$str} = undef; # No Configure symbol for these.
537 $unexpected{atoi} = undef; # No Configure symbol for atoi.
538 $unexpected{atol} = undef; # No Configure symbol for atol.
540 for my $str (qw(atoll strtol strtoul strtoq)) {
541 $unexpected{$str} = "d_$str";
544 for my $symbol (sort keys %unexpected) {
545 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
551 my @o = exists $symbols{undef}{$symbol} ?
552 sort keys %{ $symbols{undef}{$symbol} } : ();
553 # While sprintf() is bad in the general case,
554 # some platforms implement Gconvert via sprintf, in sv.o.
555 if ($symbol eq 'sprintf' &&
556 $Config{d_Gconvert} =~ /^sprintf/ &&
557 @o == 1 && $o[0] eq 'sv.o') {
559 skip("uses sprintf for Gconvert in sv.o");
562 is(@o, 0, "uses no $symbol (@o)");
566 # Check that any text symbols named S_ are not exported.
567 my $export_S_prefix = 0;
568 for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) {
569 for my $o (sort keys %{$symbols{text}{$t}}) {
570 if (exists $symbols{text}{$t}{$o}{T}) {
571 fail($t, "$t exported from $o");
576 is($export_S_prefix, 0, "no S_ exports");
578 if (defined $nm_err_tmp) {
579 if (open(my $nm_err_fh, $nm_err_tmp)) {
581 while (<$nm_err_fh>) {
582 # OS X has weird error where nm warns about
583 # "no name list" but then outputs fine.
584 if (/nm: no name list/ && $^O eq 'darwin') {
585 print "# $^O ignoring $nm output: $_";
588 warn "$0: Unexpected $nm error: $_";
591 die "$0: Unexpected $nm errors\n" if $error;
593 warn "Failed to open '$nm_err_tmp': $!\n";