This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Mention sv_gets as the Perl-ish fgets-ish API.
[perl5.git] / t / porting / libperl.t
CommitLineData
d92f47ae
JH
1#!/usr/bin/perl -w
2
831ed035
JH
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
aab7f3d5
JH
6# Fail softly, expect things only on known platforms:
7# - linux
8# - darwin (OS X), both x86 and ppc
9# - freebsd
10# and on other platforms, and if things seem odd, just give up (skip_all).
d92f47ae
JH
11#
12# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or
13# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what
14# they were meant to do, hide the global variables (see perlguts for
15# the details).
caea674c
JH
16#
17# Debugging tip: nm output (this script's input) can be faked by
18# giving one command line argument for this script: it should be
88abe8fb
JH
19# either the filename to read, or "-" for STDIN. You can also append
20# "@style" (where style is a supported nm style, like "gnu" or "darwin")
21# to this filename for "cross-parsing".
831ed035
JH
22#
23# Some terminology:
24# - "text" symbols are code
25# - "data" symbols are data (duh), with subdivisions:
26# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
27# uninitialized data, which often even doesn't exist in the object
28# file as such, only its size does, which is then created on demand
29# by the loader
30# - "const": initialized read-only data, like string literals
31# - "common": uninitialized data unless initialized...
32# (the full story is too long for here, see "man nm")
33# - "data": initialized read-write data
34# (somewhat confusingly below: "data data", but it makes code simpler)
35# - "undefined": external symbol referred to by an object,
36# most likely a text symbol. Can be either a symbol defined by
37# a Perl object file but referred to by other Perl object files,
38# or a completely external symbol from libc, or other system libraries.
d92f47ae
JH
39
40BEGIN {
41 chdir 't' if -d 't';
42 @INC = '../lib';
43 require "./test.pl";
44}
45
46use strict;
47
48use Config;
49
50if ($Config{cc} =~ /g\+\+/) {
51 # XXX Could use c++filt, maybe.
52 skip_all "on g++";
53}
54
55my $libperl_a;
56
57for my $f (qw(../libperl.a libperl.a)) {
58 if (-f $f) {
59 $libperl_a = $f;
60 last;
61 }
62}
63
64unless (defined $libperl_a) {
65 skip_all "no libperl.a";
66}
67
cb498e56
JH
68print "# \$^O = $^O\n";
69print "# \$Config{cc} = $Config{cc}\n";
d92f47ae
JH
70print "# libperl = $libperl_a\n";
71
72my $nm;
73my $nm_opt = '';
74my $nm_style;
88abe8fb
JH
75my $nm_fh;
76my $nm_err_tmp = "libperl$$";
77
78END {
79 # this is still executed when we skip_all above, avoid a warning
80 unlink $nm_err_tmp if $nm_err_tmp;
81}
82
83my $fake_input;
84my $fake_style;
85
86if (@ARGV == 1) {
87 $fake_input = shift @ARGV;
88 print "# Faking nm output from $fake_input\n";
89 if ($fake_input =~ s/\@(.+)$//) {
90 $fake_style = $1;
91 print "# Faking nm style from $fake_style\n";
aab7f3d5
JH
92 if ($fake_style eq 'gnu' ||
93 $fake_style eq 'linux' ||
94 $fake_style eq 'freebsd') {
88abe8fb
JH
95 $nm_style = 'gnu'
96 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') {
97 $nm_style = 'darwin'
98 } else {
99 die "$0: Unknown explicit nm style '$fake_style'\n";
100 }
101 }
102}
103
104unless (defined $nm_style) {
105 if ($^O eq 'linux') {
aab7f3d5
JH
106 # The 'gnu' style could be equally well be called 'bsd' style,
107 # since the output format of the GNU binutils nm is really BSD.
108 $nm_style = 'gnu';
109 } elsif ($^O eq 'freebsd') {
88abe8fb
JH
110 $nm_style = 'gnu';
111 } elsif ($^O eq 'darwin') {
112 $nm_style = 'darwin';
113 }
114}
d92f47ae 115
88abe8fb
JH
116if (defined $nm_style) {
117 if ($nm_style eq 'gnu') {
118 $nm = '/usr/bin/nm';
119 } elsif ($nm_style eq 'darwin') {
120 $nm = '/usr/bin/nm';
121 # With the -m option we get better information than the BSD-like
122 # default: with the default, a lot of symbols get dumped into 'S'
123 # or 's', for example one cannot tell the difference between const
124 # and non-const data symbols.
125 $nm_opt = '-m';
126 } else {
127 die "$0: Unexpected nm style '$nm_style'\n";
128 }
d92f47ae
JH
129}
130
131unless (defined $nm) {
132 skip_all "no nm";
133}
134
135unless (defined $nm_style) {
136 skip_all "no nm style";
137}
138
139print "# nm = $nm\n";
140print "# nm_style = $nm_style\n";
141print "# nm_opt = $nm_opt\n";
142
143unless (-x $nm) {
144 skip_all "no executable nm $nm";
145}
146
88abe8fb
JH
147if ($nm_style eq 'gnu' && !defined $fake_style) {
148 open(my $gnu_verify, "$nm --version|") or
d92f47ae
JH
149 skip_all "nm failed: $!";
150 my $gnu_verified;
88abe8fb 151 while (<$gnu_verify>) {
d92f47ae
JH
152 if (/^GNU nm/) {
153 $gnu_verified = 1;
154 last;
155 }
156 }
157 unless ($gnu_verified) {
f7bdd0be 158 skip_all "no GNU nm";
d92f47ae
JH
159 }
160}
161
88abe8fb
JH
162if (defined $fake_input) {
163 if ($fake_input eq '-') {
caea674c
JH
164 open($nm_fh, "<&STDIN") or
165 skip_all "Duping STDIN failed: $!";
166 } else {
88abe8fb
JH
167 open($nm_fh, "<", $fake_input) or
168 skip_all "Opening '$fake_input' failed: $!";
caea674c
JH
169 }
170 undef $nm_err_tmp; # In this case there will be no nm errors.
171} else {
172 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
173 skip_all "$nm $nm_opt $libperl_a failed: $!";
174}
831ed035
JH
175
176sub is_perlish_symbol {
177 $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
178}
179
88abe8fb
JH
180# XXX Implement "internal test" for this script (option -t?)
181# to verify that the parsing does what it's intended to.
182
d92f47ae
JH
183sub nm_parse_gnu {
184 my $symbols = shift;
7c616f70 185 my $line = $_;
d92f47ae 186 if (m{^(\w+\.o):$}) {
68472ab1 187 # object file name
d92f47ae
JH
188 $symbols->{obj}{$1}++;
189 $symbols->{o} = $1;
7c616f70 190 return;
d92f47ae 191 } else {
7c616f70
JH
192 die "$0: undefined current object: $line"
193 unless defined $symbols->{o};
68472ab1 194 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70
JH
195 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
196 if (/^[Rr] (\w+)$/) {
68472ab1 197 # R: read only (const)
7c616f70
JH
198 $symbols->{data}{const}{$1}{$symbols->{o}}++;
199 } elsif (/^r .+$/) {
68472ab1 200 # Skip local const (read only).
7c616f70
JH
201 } elsif (/^[Tti] (\w+)(\..+)?$/) {
202 $symbols->{text}{$1}{$symbols->{o}}++;
203 } elsif (/^C (\w+)$/) {
204 $symbols->{data}{common}{$1}{$symbols->{o}}++;
205 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
68472ab1
JH
206 # Bb: uninitialized data (bss)
207 # Ss: uninitialized data "for small objects"
7c616f70 208 $symbols->{data}{bss}{$1}{$symbols->{o}}++;
4613f3c1 209 } elsif (/^D _LIB_VERSION$/) {
68472ab1 210 # Skip the _LIB_VERSION (not ours, probably libm)
7c616f70 211 } elsif (/^[DdGg] (\w+)$/) {
68472ab1
JH
212 # Dd: initialized data
213 # Gg: initialized "for small objects"
7c616f70
JH
214 $symbols->{data}{data}{$1}{$symbols->{o}}++;
215 } elsif (/^. \.?(\w+)$/) {
216 # Skip the unknown types.
217 print "# Unknown type: $line ($symbols->{o})\n";
218 }
219 return;
831ed035
JH
220 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
221 my ($symbol) = $1;
222 return if is_perlish_symbol($symbol);
223 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 224 return;
d92f47ae
JH
225 }
226 }
7c616f70 227 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
228}
229
230sub nm_parse_darwin {
231 my $symbols = shift;
7c616f70 232 my $line = $_;
68472ab1
JH
233 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
234 # object file name
d92f47ae
JH
235 $symbols->{obj}{$1}++;
236 $symbols->{o} = $1;
7c616f70 237 return;
d92f47ae 238 } else {
7c616f70 239 die "$0: undefined current object: $line" unless defined $symbols->{o};
68472ab1 240 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70 241 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
68472ab1
JH
242 # String literals can live in different sections
243 # depending on the compiler and os release, assumedly
244 # also linker flags.
6d1ca00b 245 if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
7c616f70
JH
246 my ($symbol, $suffix) = ($1, $2);
247 # Ignore function-local constants like
248 # _Perl_av_extend_guts.oom_array_extend
249 return if defined $suffix && /__TEXT,__const/;
6d1ca00b
JH
250 # Ignore the cstring unnamed strings.
251 return if $symbol =~ /^L\.str\d+$/;
7c616f70
JH
252 $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
253 } elsif (/^\(__TEXT,__text\) (?:non-)?external _(\w+)$/) {
254 $symbols->{text}{$1}{$symbols->{o}}++;
255 } elsif (/^\(__DATA,__(const|data|bss|common)\) (?:non-)?external _(\w+)(\.\w+)?$/) {
256 my ($dtype, $symbol, $suffix) = ($1, $2, $3);
257 # Ignore function-local constants like
258 # _Perl_pp_gmtime.dayname
259 return if defined $suffix;
260 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
261 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
262 # Skip this, whatever it is (some inlined leakage from
263 # darwin libc?)
6d1ca00b 264 } elsif (/^\(__TEXT,__eh_frame/) {
68472ab1 265 # Skip the eh_frame (exception handling) symbols.
6d1ca00b 266 return;
7c616f70
JH
267 } elsif (/^\(__\w+,__\w+\) /) {
268 # Skip the unknown types.
269 print "# Unknown type: $line ($symbols->{o})\n";
270 }
271 return;
831ed035 272 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
68472ab1
JH
273 # darwin/ppc marks most undefined text symbols
274 # as "[lazy bound]".
831ed035
JH
275 my ($symbol) = $1;
276 return if is_perlish_symbol($symbol);
277 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 278 return;
d92f47ae
JH
279 }
280 }
7c616f70 281 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
282}
283
284my $nm_parse;
285
286if ($nm_style eq 'gnu') {
287 $nm_parse = \&nm_parse_gnu;
288} elsif ($nm_style eq 'darwin') {
289 $nm_parse = \&nm_parse_darwin;
290}
291
292unless (defined $nm_parse) {
cb498e56 293 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
d92f47ae
JH
294}
295
296my %symbols;
297
298while (<$nm_fh>) {
299 next if /^$/;
300 chomp;
301 $nm_parse->(\%symbols);
302}
303
304# use Data::Dumper; print Dumper(\%symbols);
305
306if (keys %symbols == 0) {
307 skip_all "no symbols\n";
308}
309
310# These should always be true for everyone.
311
312ok($symbols{obj}{'pp.o'}, "has object pp.o");
313ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
314ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
315ok(exists $symbols{data}{const}, "has data const symbols");
316ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
317
318my $DEBUGGING = $Config{ccflags} =~ /-DDEBUGGING/ ? 1 : 0;
319
320my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0;
321my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0;
322
323print "# GS = $GS\n";
324print "# GSP = $GSP\n";
325
326my %data_symbols;
327
328for my $dtype (sort keys %{$symbols{data}}) {
329 for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
330 $data_symbols{$symbol}++;
331 }
332}
333
d92f47ae 334# The following tests differ between vanilla vs $GSP or $GS.
d92f47ae
JH
335
336if ($GSP) {
337 print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n";
338 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
339 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
340
341 ok(! exists $symbols{data}{bss}, "has no data bss symbols");
26443f8e
DM
342 ok(! exists $symbols{data}{data} ||
343 # clang with ASAN seems to add this symbol to every object file:
344 !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}),
345 "has no data data symbols");
d92f47ae
JH
346 ok(! exists $symbols{data}{common}, "has no data common symbols");
347
348 # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have
349 # the extra text symbol for accessing the vars
350 # (as opposed to "just" -DPERL_GLOBAL_STRUCT)
351 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
352} elsif ($GS) {
353 print "# -DPERL_GLOBAL_STRUCT\n";
354 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
355 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
356
357 ok(! exists $symbols{data}{bss}, "has no data bss symbols");
358
359 # These PerlIO data symbols are left visible with
360 # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE)
361 my @PerlIO =
362 qw(
363 PerlIO_byte
364 PerlIO_crlf
365 PerlIO_pending
366 PerlIO_perlio
367 PerlIO_raw
368 PerlIO_remove
369 PerlIO_stdio
370 PerlIO_unix
371 PerlIO_utf8
372 );
373
374 # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but
375 # otherwise not const -- because of SWIG which wants to modify
376 # the table. Evil SWIG, eeevil.
377
378 # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which
379 # -DPERL_GLOBAL_STRUCT has turned on.
99a0b65c
JH
380 eq_array([sort keys %{$symbols{data}{data}}],
381 [sort('PL_VarsPtr',
382 @PerlIO,
383 'PL_magic_vtables',
384 'my_cxt_index')],
385 "data data symbols");
d92f47ae
JH
386
387 # Only one data common symbol, our "supervariable".
99a0b65c
JH
388 eq_array([sort keys %{$symbols{data}{common}}],
389 ['PL_Vars'],
390 "data common symbols");
d92f47ae
JH
391
392 ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr");
393 ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars");
394
395 # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars.
396 ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars");
397} else {
398 print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n";
399
a9abea17 400 if ( !$symbols{data}{common} ) {
831ed035 401 # This is likely because Perl was compiled with
a9abea17
BF
402 # -Accflags="-fno-common"
403 $symbols{data}{common} = $symbols{data}{bss};
404 }
831ed035 405
d92f47ae
JH
406 ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed");
407 ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
408
409 # None of the GLOBAL_STRUCT* business here.
410 ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr");
411 ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars");
412 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
413}
414
182bc989
JH
415# See the comments in the beginning for what "undefined symbols"
416# really means. We *should* have many of those, that is a good thing.
831ed035
JH
417ok(keys %{$symbols{undef}}, "has undefined symbols");
418
182bc989
JH
419# There are certain symbols we expect to see.
420
31e62fa1
JH
421# memchr, memcmp, memcpy should be used all over the place.
422#
423# chmod, socket, getenv, sigaction, sqrt, time are system/library
9be194d5
JH
424# calls that should each see at least one use. sqrt can be sqrtl
425# if so configured.
182bc989
JH
426my %expected = (
427 memchr => 'd_memchr',
428 memcmp => 'd_memcmp',
429 memcpy => 'd_memcpy',
430 chmod => undef, # There is no Configure symbol for chmod.
431 socket => 'd_socket',
432 getenv => undef, # There is no Configure symbol for getenv,
433 sigaction => 'd_sigaction',
434 time => 'd_time',
435 );
436
437if ($Config{uselongdouble} && $Config{d_longdbl}) {
438 $expected{sqrtl} = 'd_sqrtl';
439} else {
440 $expected{sqrt} = undef; # There is no Configure symbol for sqrt.
441}
1671bb9e
JH
442
443# DynaLoader will use dlopen, unless we are building static,
444# and in the platforms we are supporting in this test.
182bc989
JH
445if ($Config{usedl} ) {
446 $expected{dlopen} = 'd_dlopen';
831ed035 447}
1671bb9e 448
182bc989
JH
449for my $symbol (sort keys %expected) {
450 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) {
451 SKIP: {
452 skip("no $symbol");
453 }
454 next;
455 }
456 my @o = exists $symbols{undef}{$symbol} ?
457 sort keys %{ $symbols{undef}{$symbol} } : ();
d2f5d4f9 458 # In some FreeBSD versions memcmp disappears (compiler inlining?).
b5525e68
JH
459 if (($^O eq 'freebsd' ||
460 (defined $fake_style && $fake_style eq 'freebsd')) &&
d2f5d4f9
JH
461 $symbol eq 'memcmp' && @o == 0) {
462 SKIP: {
463 skip("freebsd memcmp");
464 }
465 } else {
466 ok(@o, "uses $symbol (@o)");
467 }
831ed035
JH
468}
469
182bc989 470# There are certain symbols we expect NOT to see.
446b89af 471#
31e62fa1
JH
472# gets is horribly unsafe.
473#
446b89af
JH
474# fgets should not be used (Perl has its own API, sv_gets),
475# even without perlio.
31e62fa1
JH
476#
477# tmpfile is unsafe.
478#
182bc989 479# strcat, strcpy, strncat, strncpy are unsafe.
31e62fa1
JH
480#
481# sprintf and vsprintf should not be used because
482# Perl has its own safer and more portable implementations.
483# (One exception: for certain floating point outputs
182bc989 484# the native sprintf is still used in some platforms, see below.)
31e62fa1 485#
a7941017 486# atoi has unsafe and undefined failure modes, and is affected by locale.
26065e6c 487# Its cousins include atol and atoll.
31e62fa1 488#
68419f9c 489# strtol and strtoul are affected by locale.
26065e6c 490# Cousins include strtoq.
68419f9c 491#
6c1246d3
JH
492# system should not be used, use pp_system or my_popen.
493#
182bc989
JH
494
495my %unexpected;
496
6c1246d3
JH
497for my $str (qw(system)) {
498 $unexpected{$str} = "d_$str";
499}
500
182bc989
JH
501for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) {
502 $unexpected{$stdio} = undef; # No Configure symbol for these.
503}
504for my $str (qw(strcat strcpy strncat strncpy)) {
505 $unexpected{$str} = undef; # No Configure symbol for these.
506}
68419f9c 507
a7941017 508$unexpected{atoi} = undef; # No Configure symbol for atoi.
26065e6c 509$unexpected{atol} = undef; # No Configure symbol for atol.
182bc989 510
26065e6c 511for my $str (qw(atoll strtol strtoul strtoq)) {
68419f9c
JH
512 $unexpected{$str} = "d_$str";
513}
514
182bc989
JH
515for my $symbol (sort keys %unexpected) {
516 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
517 SKIP: {
518 skip("no $symbol");
519 }
520 next;
521 }
522 my @o = exists $symbols{undef}{$symbol} ?
523 sort keys %{ $symbols{undef}{$symbol} } : ();
aab7f3d5
JH
524 # While sprintf() is bad in the general case,
525 # some platforms implement Gconvert via sprintf, in sv.o.
182bc989 526 if ($symbol eq 'sprintf' &&
aab7f3d5
JH
527 $Config{d_Gconvert} =~ /^sprintf/ &&
528 @o == 1 && $o[0] eq 'sv.o') {
529 SKIP: {
530 skip("uses sprintf for Gconvert in sv.o");
531 }
532 } else {
182bc989 533 is(@o, 0, "uses no $symbol (@o)");
aab7f3d5 534 }
831ed035
JH
535}
536
caea674c
JH
537if (defined $nm_err_tmp) {
538 if (open(my $nm_err_fh, $nm_err_tmp)) {
539 my $error;
540 while (<$nm_err_fh>) {
541 # OS X has weird error where nm warns about
542 # "no name list" but then outputs fine.
543 if (/nm: no name list/ && $^O eq 'darwin') {
544 print "# $^O ignoring $nm output: $_";
545 next;
546 }
547 warn "$0: Unexpected $nm error: $_";
548 $error++;
b52baaa9 549 }
caea674c
JH
550 die "$0: Unexpected $nm errors\n" if $error;
551 } else {
552 warn "Failed to open '$nm_err_tmp': $!\n";
b52baaa9 553 }
b52baaa9
JH
554}
555
d92f47ae 556done_testing();