This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add comments.
[perl5.git] / t / porting / libperl.t
CommitLineData
d92f47ae
JH
1#!/usr/bin/perl -w
2
831ed035
JH
3# Try opening libperl.a with nm, and verifying it has the kind of
4# symbols we expect, and no symbols we should avoid.
5#
6# Fail softly, expect things only on known platforms.
d92f47ae
JH
7#
8# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or
9# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what
10# they were meant to do, hide the global variables (see perlguts for
11# the details).
caea674c
JH
12#
13# Debugging tip: nm output (this script's input) can be faked by
14# giving one command line argument for this script: it should be
15# either the filename to read, or "-" for STDIN.
831ed035
JH
16#
17# Some terminology:
18# - "text" symbols are code
19# - "data" symbols are data (duh), with subdivisions:
20# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...),
21# uninitialized data, which often even doesn't exist in the object
22# file as such, only its size does, which is then created on demand
23# by the loader
24# - "const": initialized read-only data, like string literals
25# - "common": uninitialized data unless initialized...
26# (the full story is too long for here, see "man nm")
27# - "data": initialized read-write data
28# (somewhat confusingly below: "data data", but it makes code simpler)
29# - "undefined": external symbol referred to by an object,
30# most likely a text symbol. Can be either a symbol defined by
31# a Perl object file but referred to by other Perl object files,
32# or a completely external symbol from libc, or other system libraries.
d92f47ae
JH
33
34BEGIN {
35 chdir 't' if -d 't';
36 @INC = '../lib';
37 require "./test.pl";
38}
39
40use strict;
41
42use Config;
43
44if ($Config{cc} =~ /g\+\+/) {
45 # XXX Could use c++filt, maybe.
46 skip_all "on g++";
47}
48
49my $libperl_a;
50
51for my $f (qw(../libperl.a libperl.a)) {
52 if (-f $f) {
53 $libperl_a = $f;
54 last;
55 }
56}
57
58unless (defined $libperl_a) {
59 skip_all "no libperl.a";
60}
61
cb498e56
JH
62print "# \$^O = $^O\n";
63print "# \$Config{cc} = $Config{cc}\n";
d92f47ae
JH
64print "# libperl = $libperl_a\n";
65
66my $nm;
67my $nm_opt = '';
68my $nm_style;
69
70if ($^O eq 'linux') {
71 $nm = '/usr/bin/nm';
72 $nm_style = 'gnu';
73} elsif ($^O eq 'darwin') {
74 $nm = '/usr/bin/nm';
75 $nm_style = 'darwin';
76 # With the -m option we get better information than the BSD-like
77 # default: with the default, a lot of symbols get dumped into 'S'
78 # or 's', for example one cannot tell the difference between const
79 # and non-const symbols.
80 $nm_opt = '-m';
81}
82
83unless (defined $nm) {
84 skip_all "no nm";
85}
86
87unless (defined $nm_style) {
88 skip_all "no nm style";
89}
90
91print "# nm = $nm\n";
92print "# nm_style = $nm_style\n";
93print "# nm_opt = $nm_opt\n";
94
95unless (-x $nm) {
96 skip_all "no executable nm $nm";
97}
98
99if ($nm_style eq 'gnu') {
100 open(my $nm_fh, "$nm --version|") or
101 skip_all "nm failed: $!";
102 my $gnu_verified;
103 while (<$nm_fh>) {
104 if (/^GNU nm/) {
105 $gnu_verified = 1;
106 last;
107 }
108 }
109 unless ($gnu_verified) {
f7bdd0be 110 skip_all "no GNU nm";
d92f47ae
JH
111 }
112}
113
b52baaa9
JH
114my $nm_err_tmp = "libperl$$";
115
116END {
f7bdd0be
TC
117 # this is still executed when we skip_all above, avoid a warning
118 unlink $nm_err_tmp if $nm_err_tmp;
b52baaa9
JH
119}
120
caea674c 121my $nm_fh;
d92f47ae 122
caea674c
JH
123if (@ARGV == 1) {
124 my $fake = shift @ARGV;
125 print "# Faking nm output from $fake\n";
126 if ($fake eq '-') {
127 open($nm_fh, "<&STDIN") or
128 skip_all "Duping STDIN failed: $!";
129 } else {
130 open($nm_fh, "<", $fake) or
131 skip_all "Opening '$fake' failed: $!";
132 }
133 undef $nm_err_tmp; # In this case there will be no nm errors.
134} else {
135 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or
136 skip_all "$nm $nm_opt $libperl_a failed: $!";
137}
831ed035
JH
138
139sub is_perlish_symbol {
140 $_[0] =~ /^(?:PL_|Perl|PerlIO)/;
141}
142
d92f47ae
JH
143sub nm_parse_gnu {
144 my $symbols = shift;
7c616f70 145 my $line = $_;
d92f47ae 146 if (m{^(\w+\.o):$}) {
68472ab1 147 # object file name
d92f47ae
JH
148 $symbols->{obj}{$1}++;
149 $symbols->{o} = $1;
7c616f70 150 return;
d92f47ae 151 } else {
7c616f70
JH
152 die "$0: undefined current object: $line"
153 unless defined $symbols->{o};
68472ab1 154 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70
JH
155 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
156 if (/^[Rr] (\w+)$/) {
68472ab1 157 # R: read only (const)
7c616f70
JH
158 $symbols->{data}{const}{$1}{$symbols->{o}}++;
159 } elsif (/^r .+$/) {
68472ab1 160 # Skip local const (read only).
7c616f70
JH
161 } elsif (/^[Tti] (\w+)(\..+)?$/) {
162 $symbols->{text}{$1}{$symbols->{o}}++;
163 } elsif (/^C (\w+)$/) {
164 $symbols->{data}{common}{$1}{$symbols->{o}}++;
165 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) {
68472ab1
JH
166 # Bb: uninitialized data (bss)
167 # Ss: uninitialized data "for small objects"
7c616f70
JH
168 $symbols->{data}{bss}{$1}{$symbols->{o}}++;
169 } elsif (/^0{16} D _LIB_VERSION$/) {
68472ab1 170 # Skip the _LIB_VERSION (not ours, probably libm)
7c616f70 171 } elsif (/^[DdGg] (\w+)$/) {
68472ab1
JH
172 # Dd: initialized data
173 # Gg: initialized "for small objects"
7c616f70
JH
174 $symbols->{data}{data}{$1}{$symbols->{o}}++;
175 } elsif (/^. \.?(\w+)$/) {
176 # Skip the unknown types.
177 print "# Unknown type: $line ($symbols->{o})\n";
178 }
179 return;
831ed035
JH
180 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) {
181 my ($symbol) = $1;
182 return if is_perlish_symbol($symbol);
183 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 184 return;
d92f47ae
JH
185 }
186 }
7c616f70 187 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
188}
189
190sub nm_parse_darwin {
191 my $symbols = shift;
7c616f70 192 my $line = $_;
68472ab1
JH
193 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) {
194 # object file name
d92f47ae
JH
195 $symbols->{obj}{$1}++;
196 $symbols->{o} = $1;
7c616f70 197 return;
d92f47ae 198 } else {
7c616f70 199 die "$0: undefined current object: $line" unless defined $symbols->{o};
68472ab1 200 # 64-bit systems have 16 hexdigits, 32-bit systems have 8.
7c616f70 201 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) {
68472ab1
JH
202 # String literals can live in different sections
203 # depending on the compiler and os release, assumedly
204 # also linker flags.
6d1ca00b 205 if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) {
7c616f70
JH
206 my ($symbol, $suffix) = ($1, $2);
207 # Ignore function-local constants like
208 # _Perl_av_extend_guts.oom_array_extend
209 return if defined $suffix && /__TEXT,__const/;
6d1ca00b
JH
210 # Ignore the cstring unnamed strings.
211 return if $symbol =~ /^L\.str\d+$/;
7c616f70
JH
212 $symbols->{data}{const}{$symbol}{$symbols->{o}}++;
213 } elsif (/^\(__TEXT,__text\) (?:non-)?external _(\w+)$/) {
214 $symbols->{text}{$1}{$symbols->{o}}++;
215 } elsif (/^\(__DATA,__(const|data|bss|common)\) (?:non-)?external _(\w+)(\.\w+)?$/) {
216 my ($dtype, $symbol, $suffix) = ($1, $2, $3);
217 # Ignore function-local constants like
218 # _Perl_pp_gmtime.dayname
219 return if defined $suffix;
220 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++;
221 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) {
222 # Skip this, whatever it is (some inlined leakage from
223 # darwin libc?)
6d1ca00b 224 } elsif (/^\(__TEXT,__eh_frame/) {
68472ab1 225 # Skip the eh_frame (exception handling) symbols.
6d1ca00b 226 return;
7c616f70
JH
227 } elsif (/^\(__\w+,__\w+\) /) {
228 # Skip the unknown types.
229 print "# Unknown type: $line ($symbols->{o})\n";
230 }
231 return;
831ed035 232 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) {
68472ab1
JH
233 # darwin/ppc marks most undefined text symbols
234 # as "[lazy bound]".
831ed035
JH
235 my ($symbol) = $1;
236 return if is_perlish_symbol($symbol);
237 $symbols->{undef}{$symbol}{$symbols->{o}}++;
7c616f70 238 return;
d92f47ae
JH
239 }
240 }
7c616f70 241 print "# Unexpected nm output '$line' ($symbols->{o})\n";
d92f47ae
JH
242}
243
244my $nm_parse;
245
246if ($nm_style eq 'gnu') {
247 $nm_parse = \&nm_parse_gnu;
248} elsif ($nm_style eq 'darwin') {
249 $nm_parse = \&nm_parse_darwin;
250}
251
252unless (defined $nm_parse) {
cb498e56 253 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)";
d92f47ae
JH
254}
255
256my %symbols;
257
258while (<$nm_fh>) {
259 next if /^$/;
260 chomp;
261 $nm_parse->(\%symbols);
262}
263
264# use Data::Dumper; print Dumper(\%symbols);
265
266if (keys %symbols == 0) {
267 skip_all "no symbols\n";
268}
269
270# These should always be true for everyone.
271
272ok($symbols{obj}{'pp.o'}, "has object pp.o");
273ok($symbols{text}{'Perl_peep'}, "has text Perl_peep");
274ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o");
275ok(exists $symbols{data}{const}, "has data const symbols");
276ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem");
277
278my $DEBUGGING = $Config{ccflags} =~ /-DDEBUGGING/ ? 1 : 0;
279
280my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0;
281my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0;
282
283print "# GS = $GS\n";
284print "# GSP = $GSP\n";
285
286my %data_symbols;
287
288for my $dtype (sort keys %{$symbols{data}}) {
289 for my $symbol (sort keys %{$symbols{data}{$dtype}}) {
290 $data_symbols{$symbol}++;
291 }
292}
293
d92f47ae 294# The following tests differ between vanilla vs $GSP or $GS.
d92f47ae
JH
295
296if ($GSP) {
297 print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n";
298 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
299 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
300
301 ok(! exists $symbols{data}{bss}, "has no data bss symbols");
26443f8e
DM
302 ok(! exists $symbols{data}{data} ||
303 # clang with ASAN seems to add this symbol to every object file:
304 !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}),
305 "has no data data symbols");
d92f47ae
JH
306 ok(! exists $symbols{data}{common}, "has no data common symbols");
307
308 # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have
309 # the extra text symbol for accessing the vars
310 # (as opposed to "just" -DPERL_GLOBAL_STRUCT)
311 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
312} elsif ($GS) {
313 print "# -DPERL_GLOBAL_STRUCT\n";
314 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed");
315 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr");
316
317 ok(! exists $symbols{data}{bss}, "has no data bss symbols");
318
319 # These PerlIO data symbols are left visible with
320 # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE)
321 my @PerlIO =
322 qw(
323 PerlIO_byte
324 PerlIO_crlf
325 PerlIO_pending
326 PerlIO_perlio
327 PerlIO_raw
328 PerlIO_remove
329 PerlIO_stdio
330 PerlIO_unix
331 PerlIO_utf8
332 );
333
334 # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but
335 # otherwise not const -- because of SWIG which wants to modify
336 # the table. Evil SWIG, eeevil.
337
338 # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which
339 # -DPERL_GLOBAL_STRUCT has turned on.
99a0b65c
JH
340 eq_array([sort keys %{$symbols{data}{data}}],
341 [sort('PL_VarsPtr',
342 @PerlIO,
343 'PL_magic_vtables',
344 'my_cxt_index')],
345 "data data symbols");
d92f47ae
JH
346
347 # Only one data common symbol, our "supervariable".
99a0b65c
JH
348 eq_array([sort keys %{$symbols{data}{common}}],
349 ['PL_Vars'],
350 "data common symbols");
d92f47ae
JH
351
352 ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr");
353 ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars");
354
355 # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars.
356 ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars");
357} else {
358 print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n";
359
a9abea17 360 if ( !$symbols{data}{common} ) {
831ed035 361 # This is likely because Perl was compiled with
a9abea17
BF
362 # -Accflags="-fno-common"
363 $symbols{data}{common} = $symbols{data}{bss};
364 }
831ed035 365
d92f47ae
JH
366 ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed");
367 ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr");
368
369 # None of the GLOBAL_STRUCT* business here.
370 ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr");
371 ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars");
372 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars");
373}
374
831ed035
JH
375ok(keys %{$symbols{undef}}, "has undefined symbols");
376
377my @good = qw(memchr memcmp memcpy chmod socket getenv sigaction sqrt time);
378if ($Config{usedl}) {
379 push @good, 'dlopen';
380}
381for my $good (@good) {
382 my @o = exists $symbols{undef}{$good} ?
383 sort keys %{ $symbols{undef}{$good} } : ();
384 ok(@o, "uses $good (@o)");
385}
386
387my @bad = qw(gets strcpy strcat strncpy strncat sprintf vsprintf);
388# XXX: add atoi() to @bad
389for my $bad (@bad) {
390 my @o = exists $symbols{undef}{$bad} ?
391 sort keys %{ $symbols{undef}{$bad} } : ();
392 is(@o, 0, "uses no $bad (@o)");
393}
394
caea674c
JH
395if (defined $nm_err_tmp) {
396 if (open(my $nm_err_fh, $nm_err_tmp)) {
397 my $error;
398 while (<$nm_err_fh>) {
399 # OS X has weird error where nm warns about
400 # "no name list" but then outputs fine.
401 if (/nm: no name list/ && $^O eq 'darwin') {
402 print "# $^O ignoring $nm output: $_";
403 next;
404 }
405 warn "$0: Unexpected $nm error: $_";
406 $error++;
b52baaa9 407 }
caea674c
JH
408 die "$0: Unexpected $nm errors\n" if $error;
409 } else {
410 warn "Failed to open '$nm_err_tmp': $!\n";
b52baaa9 411 }
b52baaa9
JH
412}
413
d92f47ae 414done_testing();