X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7c616f707fe627c4ae26d40f1c5003ff89a63944..446b89af54d4dd848d5432e339a1456502eaee01:/t/porting/libperl.t diff --git a/t/porting/libperl.t b/t/porting/libperl.t index 60003f1..f62b80d 100644 --- a/t/porting/libperl.t +++ b/t/porting/libperl.t @@ -1,7 +1,13 @@ #!/usr/bin/perl -w -# Try opening libperl.a with nm, and verifying it has the kind of symbols -# we expected. Fail softly, expect things only on known platforms. +# Try opening libperl.a with nm, and verifying it has the kind of +# symbols we expect, and no symbols we should avoid. +# +# Fail softly, expect things only on known platforms: +# - linux +# - darwin (OS X), both x86 and ppc +# - freebsd +# and on other platforms, and if things seem odd, just give up (skip_all). # # Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or # -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what @@ -10,7 +16,26 @@ # # Debugging tip: nm output (this script's input) can be faked by # giving one command line argument for this script: it should be -# either the filename to read, or "-" for STDIN. +# either the filename to read, or "-" for STDIN. You can also append +# "@style" (where style is a supported nm style, like "gnu" or "darwin") +# to this filename for "cross-parsing". +# +# Some terminology: +# - "text" symbols are code +# - "data" symbols are data (duh), with subdivisions: +# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), +# uninitialized data, which often even doesn't exist in the object +# file as such, only its size does, which is then created on demand +# by the loader +# - "const": initialized read-only data, like string literals +# - "common": uninitialized data unless initialized... +# (the full story is too long for here, see "man nm") +# - "data": initialized read-write data +# (somewhat confusingly below: "data data", but it makes code simpler) +# - "undefined": external symbol referred to by an object, +# most likely a text symbol. Can be either a symbol defined by +# a Perl object file but referred to by other Perl object files, +# or a completely external symbol from libc, or other system libraries. BEGIN { chdir 't' if -d 't'; @@ -40,23 +65,67 @@ unless (defined $libperl_a) { skip_all "no libperl.a"; } +print "# \$^O = $^O\n"; +print "# \$Config{cc} = $Config{cc}\n"; print "# libperl = $libperl_a\n"; my $nm; my $nm_opt = ''; my $nm_style; +my $nm_fh; +my $nm_err_tmp = "libperl$$"; -if ($^O eq 'linux') { - $nm = '/usr/bin/nm'; - $nm_style = 'gnu'; -} elsif ($^O eq 'darwin') { - $nm = '/usr/bin/nm'; - $nm_style = 'darwin'; - # With the -m option we get better information than the BSD-like - # default: with the default, a lot of symbols get dumped into 'S' - # or 's', for example one cannot tell the difference between const - # and non-const symbols. - $nm_opt = '-m'; +END { + # this is still executed when we skip_all above, avoid a warning + unlink $nm_err_tmp if $nm_err_tmp; +} + +my $fake_input; +my $fake_style; + +if (@ARGV == 1) { + $fake_input = shift @ARGV; + print "# Faking nm output from $fake_input\n"; + if ($fake_input =~ s/\@(.+)$//) { + $fake_style = $1; + print "# Faking nm style from $fake_style\n"; + if ($fake_style eq 'gnu' || + $fake_style eq 'linux' || + $fake_style eq 'freebsd') { + $nm_style = 'gnu' + } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { + $nm_style = 'darwin' + } else { + die "$0: Unknown explicit nm style '$fake_style'\n"; + } + } +} + +unless (defined $nm_style) { + if ($^O eq 'linux') { + # The 'gnu' style could be equally well be called 'bsd' style, + # since the output format of the GNU binutils nm is really BSD. + $nm_style = 'gnu'; + } elsif ($^O eq 'freebsd') { + $nm_style = 'gnu'; + } elsif ($^O eq 'darwin') { + $nm_style = 'darwin'; + } +} + +if (defined $nm_style) { + if ($nm_style eq 'gnu') { + $nm = '/usr/bin/nm'; + } elsif ($nm_style eq 'darwin') { + $nm = '/usr/bin/nm'; + # With the -m option we get better information than the BSD-like + # default: with the default, a lot of symbols get dumped into 'S' + # or 's', for example one cannot tell the difference between const + # and non-const data symbols. + $nm_opt = '-m'; + } else { + die "$0: Unexpected nm style '$nm_style'\n"; + } } unless (defined $nm) { @@ -75,11 +144,11 @@ unless (-x $nm) { skip_all "no executable nm $nm"; } -if ($nm_style eq 'gnu') { - open(my $nm_fh, "$nm --version|") or +if ($nm_style eq 'gnu' && !defined $fake_style) { + open(my $gnu_verify, "$nm --version|") or skip_all "nm failed: $!"; my $gnu_verified; - while (<$nm_fh>) { + while (<$gnu_verify>) { if (/^GNU nm/) { $gnu_verified = 1; last; @@ -90,63 +159,68 @@ if ($nm_style eq 'gnu') { } } -my $nm_err_tmp = "libperl$$"; - -END { - # this is still executed when we skip_all above, avoid a warning - unlink $nm_err_tmp if $nm_err_tmp; -} - -my $nm_fh; - -if (@ARGV == 1) { - my $fake = shift @ARGV; - print "# Faking nm output from $fake\n"; - if ($fake eq '-') { +if (defined $fake_input) { + if ($fake_input eq '-') { open($nm_fh, "<&STDIN") or skip_all "Duping STDIN failed: $!"; } else { - open($nm_fh, "<", $fake) or - skip_all "Opening '$fake' failed: $!"; + open($nm_fh, "<", $fake_input) or + skip_all "Opening '$fake_input' failed: $!"; } undef $nm_err_tmp; # In this case there will be no nm errors. } else { open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or skip_all "$nm $nm_opt $libperl_a failed: $!"; } - + +sub is_perlish_symbol { + $_[0] =~ /^(?:PL_|Perl|PerlIO)/; +} + +# XXX Implement "internal test" for this script (option -t?) +# to verify that the parsing does what it's intended to. + sub nm_parse_gnu { my $symbols = shift; my $line = $_; if (m{^(\w+\.o):$}) { + # object file name $symbols->{obj}{$1}++; $symbols->{o} = $1; return; } else { die "$0: undefined current object: $line" unless defined $symbols->{o}; + # 64-bit systems have 16 hexdigits, 32-bit systems have 8. if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { if (/^[Rr] (\w+)$/) { + # R: read only (const) $symbols->{data}{const}{$1}{$symbols->{o}}++; } elsif (/^r .+$/) { - # Skip local const. + # Skip local const (read only). } elsif (/^[Tti] (\w+)(\..+)?$/) { $symbols->{text}{$1}{$symbols->{o}}++; } elsif (/^C (\w+)$/) { $symbols->{data}{common}{$1}{$symbols->{o}}++; } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { + # Bb: uninitialized data (bss) + # Ss: uninitialized data "for small objects" $symbols->{data}{bss}{$1}{$symbols->{o}}++; - } elsif (/^0{16} D _LIB_VERSION$/) { - # Skip the _LIB_VERSION (not ours). + } elsif (/^D _LIB_VERSION$/) { + # Skip the _LIB_VERSION (not ours, probably libm) } elsif (/^[DdGg] (\w+)$/) { + # Dd: initialized data + # Gg: initialized "for small objects" $symbols->{data}{data}{$1}{$symbols->{o}}++; } elsif (/^. \.?(\w+)$/) { # Skip the unknown types. print "# Unknown type: $line ($symbols->{o})\n"; } return; - } elsif (/^ {8}(?: {8})? U (\w+)$/) { - # Skip the undefined. + } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { + my ($symbol) = $1; + return if is_perlish_symbol($symbol); + $symbols->{undef}{$symbol}{$symbols->{o}}++; return; } } @@ -156,20 +230,25 @@ sub nm_parse_gnu { sub nm_parse_darwin { my $symbols = shift; my $line = $_; - if (m{^(?:\.\./)?libperl\.a\((\w+\.o)\):$}) { + if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { + # object file name $symbols->{obj}{$1}++; $symbols->{o} = $1; return; } else { die "$0: undefined current object: $line" unless defined $symbols->{o}; + # 64-bit systems have 16 hexdigits, 32-bit systems have 8. if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { - if (/^\(__TEXT,__(?:eh_frame|cstring)\) /) { - # Skip the eh_frame and cstring. - } elsif (/^\(__TEXT,__(?:const|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { + # String literals can live in different sections + # depending on the compiler and os release, assumedly + # also linker flags. + if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { my ($symbol, $suffix) = ($1, $2); # Ignore function-local constants like # _Perl_av_extend_guts.oom_array_extend return if defined $suffix && /__TEXT,__const/; + # Ignore the cstring unnamed strings. + return if $symbol =~ /^L\.str\d+$/; $symbols->{data}{const}{$symbol}{$symbols->{o}}++; } elsif (/^\(__TEXT,__text\) (?:non-)?external _(\w+)$/) { $symbols->{text}{$1}{$symbols->{o}}++; @@ -182,13 +261,20 @@ sub nm_parse_darwin { } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { # Skip this, whatever it is (some inlined leakage from # darwin libc?) + } elsif (/^\(__TEXT,__eh_frame/) { + # Skip the eh_frame (exception handling) symbols. + return; } elsif (/^\(__\w+,__\w+\) /) { # Skip the unknown types. print "# Unknown type: $line ($symbols->{o})\n"; } return; - } elsif (/^ {8}(?: {8})? \(undefined\) /) { - # Skip the undefined. + } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { + # darwin/ppc marks most undefined text symbols + # as "[lazy bound]". + my ($symbol) = $1; + return if is_perlish_symbol($symbol); + $symbols->{undef}{$symbol}{$symbols->{o}}++; return; } } @@ -204,7 +290,7 @@ if ($nm_style eq 'gnu') { } unless (defined $nm_parse) { - skip_all "no nm parser"; + skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; } my %symbols; @@ -245,43 +331,7 @@ for my $dtype (sort keys %{$symbols{data}}) { } } -# Since we are deprived of Test::More. -sub is_deeply { - my ($a, $b) = @_; - if (ref $a eq 'ARRAY' && ref $b eq 'ARRAY') { - if (@$a == @$b) { - for my $i (0..$#$a) { - unless ($a->[$i] eq $b->[$i]) { - printf("# LHS elem #%d '%s' ne RHS elem #%d '%s'\n", - $a->[$i], $b->[$i]); - return 0; - } - } - return 1; - } else { - printf("# LHS length %d, RHS length %d\n", - @$a, @$b); - return 0; - } - } else { - die "$0: Unexpcted: is_deeply $a $b\n"; - } -} - # The following tests differ between vanilla vs $GSP or $GS. -# -# Some terminology: -# - "text" symbols are code -# - "data" symbols are data (duh), with subdivisions: -# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), -# uninitialized data, which often even doesn't exist in the object -# file as such, only its size does, which is then created on demand -# by the loader -# - "const": initialized read-only data, like string literals -# - "common": uninitialized data unless initialized... -# (the full story is too long for here, see "man nm") -# - "data": initialized read-write data -# (somewhat confusingly below: "data data", but it makes code simpler) if ($GSP) { print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n"; @@ -327,17 +377,17 @@ if ($GSP) { # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which # -DPERL_GLOBAL_STRUCT has turned on. - is_deeply([sort keys %{$symbols{data}{data}}], - [sort('PL_VarsPtr', - @PerlIO, - 'PL_magic_vtables', - 'my_cxt_index')], - "data data symbols"); + eq_array([sort keys %{$symbols{data}{data}}], + [sort('PL_VarsPtr', + @PerlIO, + 'PL_magic_vtables', + 'my_cxt_index')], + "data data symbols"); # Only one data common symbol, our "supervariable". - is_deeply([sort keys %{$symbols{data}{common}}], - ['PL_Vars'], - "data common symbols"); + eq_array([sort keys %{$symbols{data}{common}}], + ['PL_Vars'], + "data common symbols"); ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr"); ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars"); @@ -348,11 +398,11 @@ if ($GSP) { print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n"; if ( !$symbols{data}{common} ) { - # This is likely because Perl was compiled with + # This is likely because Perl was compiled with # -Accflags="-fno-common" $symbols{data}{common} = $symbols{data}{bss}; } - + ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed"); ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); @@ -362,6 +412,128 @@ if ($GSP) { ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); } +# See the comments in the beginning for what "undefined symbols" +# really means. We *should* have many of those, that is a good thing. +ok(keys %{$symbols{undef}}, "has undefined symbols"); + +# There are certain symbols we expect to see. + +# memchr, memcmp, memcpy should be used all over the place. +# +# chmod, socket, getenv, sigaction, sqrt, time are system/library +# calls that should each see at least one use. sqrt can be sqrtl +# if so configured. +my %expected = ( + memchr => 'd_memchr', + memcmp => 'd_memcmp', + memcpy => 'd_memcpy', + chmod => undef, # There is no Configure symbol for chmod. + socket => 'd_socket', + getenv => undef, # There is no Configure symbol for getenv, + sigaction => 'd_sigaction', + time => 'd_time', + ); + +if ($Config{uselongdouble} && $Config{d_longdbl}) { + $expected{sqrtl} = 'd_sqrtl'; +} else { + $expected{sqrt} = undef; # There is no Configure symbol for sqrt. +} + +# DynaLoader will use dlopen, unless we are building static, +# and in the platforms we are supporting in this test. +if ($Config{usedl} ) { + $expected{dlopen} = 'd_dlopen'; +} + +for my $symbol (sort keys %expected) { + if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { + SKIP: { + skip("no $symbol"); + } + next; + } + my @o = exists $symbols{undef}{$symbol} ? + sort keys %{ $symbols{undef}{$symbol} } : (); + # In some FreeBSD versions memcmp disappears (compiler inlining?). + if (($^O eq 'freebsd' || + (defined $fake_style && $fake_style eq 'freebsd')) && + $symbol eq 'memcmp' && @o == 0) { + SKIP: { + skip("freebsd memcmp"); + } + } else { + ok(@o, "uses $symbol (@o)"); + } +} + +# There are certain symbols we expect NOT to see. +# +# gets is horribly unsafe. +# +# fgets should not be used (Perl has its own API, sv_gets), +# even without perlio. +# +# tmpfile is unsafe. +# +# strcat, strcpy, strncat, strncpy are unsafe. +# +# sprintf and vsprintf should not be used because +# Perl has its own safer and more portable implementations. +# (One exception: for certain floating point outputs +# the native sprintf is still used in some platforms, see below.) +# +# atoi has unsafe and undefined failure modes, and is affected by locale. +# Its cousins include atol and atoll. +# +# strtol and strtoul are affected by locale. +# Cousins include strtoq. +# +# system should not be used, use pp_system or my_popen. +# + +my %unexpected; + +for my $str (qw(system)) { + $unexpected{$str} = "d_$str"; +} + +for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { + $unexpected{$stdio} = undef; # No Configure symbol for these. +} +for my $str (qw(strcat strcpy strncat strncpy)) { + $unexpected{$str} = undef; # No Configure symbol for these. +} + +$unexpected{atoi} = undef; # No Configure symbol for atoi. +$unexpected{atol} = undef; # No Configure symbol for atol. + +for my $str (qw(atoll strtol strtoul strtoq)) { + $unexpected{$str} = "d_$str"; +} + +for my $symbol (sort keys %unexpected) { + if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { + SKIP: { + skip("no $symbol"); + } + next; + } + my @o = exists $symbols{undef}{$symbol} ? + sort keys %{ $symbols{undef}{$symbol} } : (); + # While sprintf() is bad in the general case, + # some platforms implement Gconvert via sprintf, in sv.o. + if ($symbol eq 'sprintf' && + $Config{d_Gconvert} =~ /^sprintf/ && + @o == 1 && $o[0] eq 'sv.o') { + SKIP: { + skip("uses sprintf for Gconvert in sv.o"); + } + } else { + is(@o, 0, "uses no $symbol (@o)"); + } +} + if (defined $nm_err_tmp) { if (open(my $nm_err_fh, $nm_err_tmp)) { my $error;