This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
t/porting/libperl.t: don't check for non-public symbols
[perl5.git] / t / porting / libperl.t
CommitLineData
d92f47ae
JH
1#!/usr/bin/perl -w
2
831ed035
JH
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
aab7f3d5 6# Fail softly, expect things only on known platforms:
63b7eadb 7# - linux, x86 only (ppc linux has odd symbol tables)
aab7f3d5
JH
8# - darwin (OS X), both x86 and ppc
9# - freebsd
10# and on other platforms, and if things seem odd, just give up (skip_all).
d92f47ae 11#
caea674c
JH
12# Debugging tip: nm output (this script's input) can be faked by
13# giving one command line argument for this script: it should be
88abe8fb
JH
14# either the filename to read, or "-" for STDIN. You can also append
15# "@style" (where style is a supported nm style, like "gnu" or "darwin")
16# to this filename for "cross-parsing".
831ed035
JH
17#
18# Some terminology:
19# - "text" symbols are code
20# - "data" symbols are data (duh), with subdivisions:
21# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
22# uninitialized data, which often even doesn't exist in the object
23# file as such, only its size does, which is then created on demand
24# by the loader
25# - "const": initialized read-only data, like string literals
26# - "common": uninitialized data unless initialized...
27# (the full story is too long for here, see "man nm")
28# - "data": initialized read-write data
29# (somewhat confusingly below: "data data", but it makes code simpler)
30# - "undefined": external symbol referred to by an object,
31# most likely a text symbol. Can be either a symbol defined by
32# a Perl object file but referred to by other Perl object files,
33# or a completely external symbol from libc, or other system libraries.
d92f47ae
JH
34
35BEGIN {
36 chdir 't' if -d 't';
37 @INC = '../lib';
38 require "./test.pl";
39}
40
41use strict;
42
43use Config;
44
45if ($Config{cc} =~ /g\+\+/) {
46 # XXX Could use c++filt, maybe.
47 skip_all "on g++";
48}
49
50my $libperl_a;
51
52for my $f (qw(../libperl.a libperl.a)) {
53 if (-f $f) {
54 $libperl_a = $f;
55 last;
56 }
57}
58
59unless (defined $libperl_a) {
60 skip_all "no libperl.a";
61}
62
cb498e56 63print "# \$^O = $^O\n";
63b7eadb 64print "# \$Config{archname} = $Config{archname}\n";
cb498e56 65print "# \$Config{cc} = $Config{cc}\n";
d92f47ae
JH
66print "# libperl = $libperl_a\n";
67
68my $nm;
69my $nm_opt = '';
70my $nm_style;
88abe8fb
JH
71my $nm_fh;
72my $nm_err_tmp = "libperl$$";
73
74END {
75 # this is still executed when we skip_all above, avoid a warning
76 unlink $nm_err_tmp if $nm_err_tmp;
77}
78
79my $fake_input;
80my $fake_style;
81
82if (@ARGV == 1) {
83 $fake_input = shift @ARGV;
84 print "# Faking nm output from $fake_input\n";
85 if ($fake_input =~ s/\@(.+)$//) {
86 $fake_style = $1;
87 print "# Faking nm style from $fake_style\n";
aab7f3d5
JH
88 if ($fake_style eq 'gnu' ||
89 $fake_style eq 'linux' ||
90 $fake_style eq 'freebsd') {
88abe8fb
JH
91 $nm_style = 'gnu'
92 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
93 $nm_style = 'darwin'
94 } else {
95 die "$0: Unknown explicit nm style '$fake_style'\n";
96 }
97 }
98}
99
100unless (defined $nm_style) {
101 if ($^O eq 'linux') {
aab7f3d5
JH
102 # The 'gnu' style could be equally well be called 'bsd' style,
103 # since the output format of the GNU binutils nm is really BSD.
104 $nm_style = 'gnu';
105 } elsif ($^O eq 'freebsd') {
88abe8fb
JH
106 $nm_style = 'gnu';
107 } elsif ($^O eq 'darwin') {
108 $nm_style = 'darwin';
109 }
110}
d92f47ae 111
88abe8fb
JH
112if (defined $nm_style) {
113 if ($nm_style eq 'gnu') {
114 $nm = '/usr/bin/nm';
115 } elsif ($nm_style eq 'darwin') {
116 $nm = '/usr/bin/nm';
117 # With the -m option we get better information than the BSD-like
118 # default: with the default, a lot of symbols get dumped into 'S'
119 # or 's', for example one cannot tell the difference between const
120 # and non-const data symbols.
121 $nm_opt = '-m';
122 } else {
123 die "$0: Unexpected nm style '$nm_style'\n";
124 }
d92f47ae
JH
125}
126
a1b6fca3 127if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) {
63b7eadb
JH
128 # For example in ppc most (but not all!) code symbols are placed
129 # in 'D' (data), not in ' T '. We cannot work under such conditions.
130 skip_all "linux but archname $Config{archname} not x86*";
131}
132
d92f47ae
JH
133unless (defined $nm) {
134 skip_all "no nm";
135}
136
137unless (defined $nm_style) {
138 skip_all "no nm style";
139}
140
141print "# nm = $nm\n";
142print "# nm_style = $nm_style\n";
143print "# nm_opt = $nm_opt\n";
144
145unless (-x $nm) {
146 skip_all "no executable nm $nm";
147}
148
88abe8fb
JH
149if ($nm_style eq 'gnu' && !defined $fake_style) {
150 open(my $gnu_verify, "$nm --version|") or
d92f47ae
JH
151 skip_all "nm failed: $!";
152 my $gnu_verified;
88abe8fb 153 while (<$gnu_verify>) {
d92f47ae
JH
154 if (/^GNU nm/) {
155 $gnu_verified = 1;
156 last;
157 }
158 }
159 unless ($gnu_verified) {
f7bdd0be 160 skip_all "no GNU nm";
d92f47ae
JH
161 }
162}
163
88abe8fb
JH
164if (defined $fake_input) {
165 if ($fake_input eq '-') {
caea674c
JH
166 open($nm_fh, "<&STDIN") or
167 skip_all "Duping STDIN failed: $!";
168 } else {
88abe8fb
JH
169 open($nm_fh, "<", $fake_input) or
170 skip_all "Opening '$fake_input' failed: $!";
caea674c
JH
171 }
172 undef $nm_err_tmp; # In this case there will be no nm errors.
173} else {
1ee983f5 174 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n};
caea674c
JH
175 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
176 skip_all "$nm $nm_opt $libperl_a failed: $!";
177}
831ed035
JH
178
179sub is_perlish_symbol {
180 $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
181}
182
88abe8fb
JH
183# XXX Implement "internal test" for this script (option -t?)
184# to verify that the parsing does what it's intended to.
185
d92f47ae
JH
186sub nm_parse_gnu {
187 my $symbols = shift;
7c616f70 188 my $line = $_;
d92f47ae 189 if (m{^(\w+\.o):$}) {
68472ab1 190 # object file name
d92f47ae
JH
191 $symbols->{obj}{$1}++;
192 $symbols->{o} = $1;
7c616f70 193 return;
d92f47ae 194 } else {
7c616f70
JH
195 die "$0: undefined current object: $line"
196 unless defined $symbols->{o};
68472ab1 197 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70
JH
198 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
199 if (/^[Rr] (\w+)$/) {
68472ab1 200 # R: read only (const)
7c616f70
JH
201 $symbols->{data}{const}{$1}{$symbols->{o}}++;
202 } elsif (/^r .+$/) {
68472ab1 203 # Skip local const (read only).
3faa8758
JH
204 } elsif (/^([Tti]) (\w+)(\..+)?$/) {
205 $symbols->{text}{$2}{$symbols->{o}}{$1}++;
7c616f70
JH
206 } elsif (/^C (\w+)$/) {
207 $symbols->{data}{common}{$1}{$symbols->{o}}++;
208 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
68472ab1
JH
209 # Bb: uninitialized data (bss)
210 # Ss: uninitialized data "for small objects"
7c616f70 211 $symbols->{data}{bss}{$1}{$symbols->{o}}++;
4613f3c1 212 } elsif (/^D _LIB_VERSION$/) {
68472ab1 213 # Skip the _LIB_VERSION (not ours, probably libm)
7c616f70 214 } elsif (/^[DdGg] (\w+)$/) {
68472ab1
JH
215 # Dd: initialized data
216 # Gg: initialized "for small objects"
7c616f70
JH
217 $symbols->{data}{data}{$1}{$symbols->{o}}++;
218 } elsif (/^. \.?(\w+)$/) {
219 # Skip the unknown types.
220 print "# Unknown type: $line ($symbols->{o})\n";
221 }
222 return;
831ed035
JH
223 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
224 my ($symbol) = $1;
225 return if is_perlish_symbol($symbol);
226 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 227 return;
d92f47ae
JH
228 }
229 }
7c616f70 230 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
231}
232
233sub nm_parse_darwin {
234 my $symbols = shift;
7c616f70 235 my $line = $_;
68472ab1
JH
236 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
237 # object file name
d92f47ae
JH
238 $symbols->{obj}{$1}++;
239 $symbols->{o} = $1;
7c616f70 240 return;
d92f47ae 241 } else {
7c616f70 242 die "$0: undefined current object: $line" unless defined $symbols->{o};
68472ab1 243 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70 244 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
68472ab1
JH
245 # String literals can live in different sections
246 # depending on the compiler and os release, assumedly
247 # also linker flags.
dc882775 248 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
7c616f70
JH
249 my ($symbol, $suffix) = ($1, $2);
250 # Ignore function-local constants like
251 # _Perl_av_extend_guts.oom_array_extend
252 return if defined $suffix && /__TEXT,__const/;
6d1ca00b
JH
253 # Ignore the cstring unnamed strings.
254 return if $symbol =~ /^L\.str\d+$/;
7c616f70 255 $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
3faa8758
JH
256 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) {
257 my ($exp, $sym) = ($1, $2);
258 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++;
de36638a 259 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
7c616f70
JH
260 my ($dtype, $symbol, $suffix) = ($1, $2, $3);
261 # Ignore function-local constants like
262 # _Perl_pp_gmtime.dayname
263 return if defined $suffix;
264 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
265 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
266 # Skip this, whatever it is (some inlined leakage from
267 # darwin libc?)
6d1ca00b 268 } elsif (/^\(__TEXT,__eh_frame/) {
68472ab1 269 # Skip the eh_frame (exception handling) symbols.
6d1ca00b 270 return;
7c616f70
JH
271 } elsif (/^\(__\w+,__\w+\) /) {
272 # Skip the unknown types.
273 print "# Unknown type: $line ($symbols->{o})\n";
274 }
275 return;
831ed035 276 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
68472ab1
JH
277 # darwin/ppc marks most undefined text symbols
278 # as "[lazy bound]".
18d04e70 279 my ($symbol) = $1 =~ s/\$UNIX2003\z//r;
831ed035
JH
280 return if is_perlish_symbol($symbol);
281 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 282 return;
d92f47ae
JH
283 }
284 }
7c616f70 285 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
286}
287
288my $nm_parse;
289
290if ($nm_style eq 'gnu') {
291 $nm_parse = \&nm_parse_gnu;
292} elsif ($nm_style eq 'darwin') {
293 $nm_parse = \&nm_parse_darwin;
294}
295
296unless (defined $nm_parse) {
cb498e56 297 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
d92f47ae
JH
298}
299
300my %symbols;
301
302while (<$nm_fh>) {
303 next if /^$/;
304 chomp;
305 $nm_parse->(\%symbols);
306}
307
308# use Data::Dumper; print Dumper(\%symbols);
309
2c93157d
JH
310# Something went awfully wrong. Wrong nm? Wrong options?
311unless (keys %symbols) {
d92f47ae
JH
312 skip_all "no symbols\n";
313}
2c93157d
JH
314unless (exists $symbols{text}) {
315 skip_all "no text symbols\n";
316}
d92f47ae
JH
317
318# These should always be true for everyone.
319
2287d330
TK
320ok($symbols{obj}{'util.o'}, "has object util.o");
321ok($symbols{text}{'Perl_croak'}{'util.o'}, "has text Perl_croak in util.o");
d92f47ae
JH
322ok(exists $symbols{data}{const}, "has data const symbols");
323ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
324
d88a9086 325my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0;
d92f47ae 326
d88a9086 327print "# nocommon = $nocommon\n";
d92f47ae
JH
328
329my %data_symbols;
330
331for my $dtype (sort keys %{$symbols{data}}) {
332 for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
333 $data_symbols{$symbol}++;
334 }
335}
336
8c3a0f6c
DIM
337if ( !$symbols{data}{common} ) {
338 # This is likely because Perl was compiled with
339 # -Accflags="-fno-common"
340 $symbols{data}{common} = $symbols{data}{bss};
d92f47ae
JH
341}
342
f43079cb 343ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w");
8c3a0f6c
DIM
344ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
345
182bc989
JH
346# See the comments in the beginning for what "undefined symbols"
347# really means. We *should* have many of those, that is a good thing.
831ed035
JH
348ok(keys %{$symbols{undef}}, "has undefined symbols");
349
182bc989
JH
350# There are certain symbols we expect to see.
351
0fa5dd23
JH
352# chmod, socket, getenv, sigaction, exp, time are system/library
353# calls that should each see at least one use. exp can be expl
9be194d5 354# if so configured.
182bc989 355my %expected = (
182bc989
JH
356 chmod => undef, # There is no Configure symbol for chmod.
357 socket => 'd_socket',
358 getenv => undef, # There is no Configure symbol for getenv,
359 sigaction => 'd_sigaction',
360 time => 'd_time',
361 );
362
57ae61ef 363if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) {
84e6cb05
JH
364 $expected{expl} = undef; # There is no Configure symbol for expl.
365} elsif ($Config{usequadmath}) {
366 $expected{expq} = undef; # There is no Configure symbol for expq.
182bc989 367} else {
0fa5dd23 368 $expected{exp} = undef; # There is no Configure symbol for exp.
182bc989 369}
1671bb9e
JH
370
371# DynaLoader will use dlopen, unless we are building static,
93e77b8c 372# and it is used in the platforms we are supporting in this test.
182bc989
JH
373if ($Config{usedl} ) {
374 $expected{dlopen} = 'd_dlopen';
831ed035 375}
1671bb9e 376
182bc989
JH
377for my $symbol (sort keys %expected) {
378 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
379 SKIP: {
380 skip("no $symbol");
381 }
382 next;
383 }
384 my @o = exists $symbols{undef}{$symbol} ?
385 sort keys %{ $symbols{undef}{$symbol} } : ();
93e77b8c 386 ok(@o, "uses $symbol (@o)");
831ed035
JH
387}
388
182bc989 389# There are certain symbols we expect NOT to see.
446b89af 390#
31e62fa1
JH
391# gets is horribly unsafe.
392#
446b89af
JH
393# fgets should not be used (Perl has its own API, sv_gets),
394# even without perlio.
31e62fa1
JH
395#
396# tmpfile is unsafe.
397#
182bc989 398# strcat, strcpy, strncat, strncpy are unsafe.
31e62fa1
JH
399#
400# sprintf and vsprintf should not be used because
401# Perl has its own safer and more portable implementations.
402# (One exception: for certain floating point outputs
182bc989 403# the native sprintf is still used in some platforms, see below.)
31e62fa1 404#
a7941017 405# atoi has unsafe and undefined failure modes, and is affected by locale.
26065e6c 406# Its cousins include atol and atoll.
31e62fa1 407#
68419f9c 408# strtol and strtoul are affected by locale.
26065e6c 409# Cousins include strtoq.
68419f9c 410#
6c1246d3
JH
411# system should not be used, use pp_system or my_popen.
412#
182bc989
JH
413
414my %unexpected;
415
6c1246d3
JH
416for my $str (qw(system)) {
417 $unexpected{$str} = "d_$str";
418}
419
182bc989
JH
420for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
421 $unexpected{$stdio} = undef; # No Configure symbol for these.
422}
423for my $str (qw(strcat strcpy strncat strncpy)) {
424 $unexpected{$str} = undef; # No Configure symbol for these.
425}
68419f9c 426
a7941017 427$unexpected{atoi} = undef; # No Configure symbol for atoi.
26065e6c 428$unexpected{atol} = undef; # No Configure symbol for atol.
182bc989 429
26065e6c 430for my $str (qw(atoll strtol strtoul strtoq)) {
68419f9c
JH
431 $unexpected{$str} = "d_$str";
432}
433
182bc989
JH
434for my $symbol (sort keys %unexpected) {
435 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
436 SKIP: {
437 skip("no $symbol");
438 }
439 next;
440 }
441 my @o = exists $symbols{undef}{$symbol} ?
442 sort keys %{ $symbols{undef}{$symbol} } : ();
aab7f3d5
JH
443 # While sprintf() is bad in the general case,
444 # some platforms implement Gconvert via sprintf, in sv.o.
182bc989 445 if ($symbol eq 'sprintf' &&
aab7f3d5
JH
446 $Config{d_Gconvert} =~ /^sprintf/ &&
447 @o == 1 && $o[0] eq 'sv.o') {
448 SKIP: {
449 skip("uses sprintf for Gconvert in sv.o");
450 }
451 } else {
182bc989 452 is(@o, 0, "uses no $symbol (@o)");
aab7f3d5 453 }
831ed035
JH
454}
455
3faa8758
JH
456# Check that any text symbols named S_ are not exported.
457my $export_S_prefix = 0;
458for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) {
459 for my $o (sort keys %{$symbols{text}{$t}}) {
460 if (exists $symbols{text}{$t}{$o}{T}) {
461 fail($t, "$t exported from $o");
462 $export_S_prefix++;
463 }
464 }
465}
466is($export_S_prefix, 0, "no S_ exports");
467
caea674c
JH
468if (defined $nm_err_tmp) {
469 if (open(my $nm_err_fh, $nm_err_tmp)) {
470 my $error;
471 while (<$nm_err_fh>) {
472 # OS X has weird error where nm warns about
473 # "no name list" but then outputs fine.
4fc148a8
DM
474 # llvm-nm may also complain about 'no symbols'. In some
475 # versions this is exactly the string "no symbols\n" but in later
476 # versions becomes a string followed by ": no symbols\n". For this
477 # test it is typically "../libperl.a:perlapi.o: no symbols\n"
d74b131b 478 if ( $^O eq 'darwin' ) {
4fc148a8 479 if (/nm: no name list/ || /^(.*: )?no symbols$/ ) {
d74b131b
N
480 print "# $^O ignoring $nm output: $_";
481 next;
482 }
caea674c
JH
483 }
484 warn "$0: Unexpected $nm error: $_";
485 $error++;
b52baaa9 486 }
caea674c
JH
487 die "$0: Unexpected $nm errors\n" if $error;
488 } else {
489 warn "Failed to open '$nm_err_tmp': $!\n";
b52baaa9 490 }
b52baaa9
JH
491}
492
d92f47ae 493done_testing();