This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
devel/mktodo.pl: Explicitly close file
[perl5.git] / dist / Devel-PPPort / devel / mktodo.pl
CommitLineData
adfe19db
MHM
1#!/usr/bin/perl -w
2################################################################################
3#
4# mktodo.pl -- generate baseline and todo files
5#
bc24b511
KW
6# It makes the todo file for the single passed in perl binary. If --base is
7# not specified it compiles with ppport.h.
adfe19db
MHM
8################################################################################
9#
b2049988 10# Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
adfe19db
MHM
11# Version 2.x, Copyright (C) 2001, Paul Marquess.
12# Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13#
14# This program is free software; you can redistribute it and/or
15# modify it under the same terms as Perl itself.
16#
17################################################################################
18
19use strict;
20use Getopt::Long;
21use Data::Dumper;
22use IO::File;
23use IO::Select;
ba120f6f 24use Config;
0c96388f 25use Time::HiRes qw( gettimeofday tv_interval );
adfe19db 26
3d7c117d 27require './devel/devtools.pl';
adfe19db 28
0c96388f 29our %opt = (
ac18778d 30 blead => 0, # ? Is this perl blead
bc24b511
KW
31 debug => 0, # Adding --verbose increases the detail
32 base => 0, # Don't use ppport.h when generating
0c96388f 33 verbose => 0,
ba120f6f 34 check => 1,
f59197b8 35 todo => "", # If no --todo, this is a blead perl
ba120f6f 36 shlib => 'blib/arch/auto/Devel/PPPort/PPPort.so',
0c96388f 37);
adfe19db
MHM
38
39GetOptions(\%opt, qw(
ac18778d 40perl=s todo=s blead version=s shlib=s debug base verbose check!
adfe19db
MHM
41 )) or die;
42
0c96388f
MHM
43identify();
44
45print "\n", ident_str(), "\n\n";
46
adfe19db
MHM
47my $fullperl = `which $opt{perl}`;
48chomp $fullperl;
49
0c96388f
MHM
50$ENV{SKIP_SLOW_TESTS} = 1;
51
bc24b511 52# Generate the Makefile using the passed in perl
c0a15b19 53regen_Makefile();
adfe19db 54
bc24b511 55# List of functions that are never considered undefined. Add to as necessary
b2049988
MHM
56my %stdsym = map { ($_ => 1) } qw (
57 strlen
58 snprintf
59 strcmp
60 memcpy
61 strncmp
62 memmove
63 memcmp
64 tolower
65 exit
66 memset
67 vsnprintf
68 siglongjmp
69 sprintf
70);
71
bc24b511
KW
72# Initialize %sym so that the keys are all the Text symbols for this perl,
73# output from the system's 'nm'
adfe19db 74my %sym;
ba120f6f 75for (`$Config{nm} $fullperl`) {
adfe19db
MHM
76 chomp;
77 /\s+T\s+(\w+)\s*$/ and $sym{$1}++;
78}
79keys %sym >= 50 or die "less than 50 symbols found in $fullperl\n";
80
bc24b511
KW
81# %todo is initialized to be the symbols in the current todo file, like so:
82# {
83# 'UTF8_SAFE_SKIP' => 'U',
84# 'newSVsv_flags' => 'U',
85# 'newSVsv_nomg' => 'U',
86# }
87#
88# The values are the outputs from nm, plus 'E' from us, for Error
c0a15b19 89my %todo = %{load_todo($opt{todo}, $opt{version})} if $opt{todo};
bc24b511 90
adfe19db
MHM
91my @recheck;
92
bc24b511
KW
93# Get an exhaustive list from apicheck.i of symbols, what functions contain
94# them, and how many in each function.
95# symbol fcn count
96# ------ --- -----
97# 'UV' => {
98# 'SvUVX' => 1,
99# 'toFOLD_uvchr' => 2,
100# 'sv_uni_display' => 1,
101# ...
102# }
ba120f6f
MHM
103my $symmap = get_apicheck_symbol_map();
104
bc24b511
KW
105# In each iteration of the loop we create an apicheck.c. This will contain a
106# generated wrapper function for each API function and macro. The wrapper
107# contains one or more calls to its API element. Then we attempt to compile
108# apicheck.c into apicheck.o. If it compiles, then every API element exists
109# in this version of perl. If not, we figure out which ones were undefined,
110# and set things up so that in the next iteration of the loop, the wrappers
111# for those elements are #ifdef'd out.
adfe19db
MHM
112for (;;) {
113 my $retry = 1;
ba120f6f 114 my $trynm = 1;
bc24b511 115
adfe19db 116 regen_apicheck();
ba120f6f 117
adfe19db 118retry:
c0a15b19 119 my(@new, @already_in_sym, %seen);
ba120f6f
MHM
120
121 my $r = run(qw(make));
f59197b8
KW
122 $r->{didnotrun} and die "couldn't run make: $!\n" .
123 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f 124
ac18778d
KW
125 # If there were warnings, we ask the user before continuing when creating
126 # the base files of blead. This leads to a potential early exit when things
127 # aren't working right.
128 if ($opt{blead} && $opt{base}) {
129 undef $opt{blead}; # Only warn once.
130 if (@{$r->{stderr}}) {
131 print STDERR "Warnings and errors from compiling blead:\n";
132 print STDERR @{$r->{stderr}};
133 ask_or_quit("\nUnexpected warnings when compiling blead can lead to"
134 . " wrong results. Please examine the above list.\n"
135 . "Shall I proceed?");
136 }
137 else {
138 print STDERR "blead compiled without warnings nor errors.\n"
139 . "Proceeding with everything else\n";
140 }
141 }
142
bc24b511
KW
143 # Examine stderr. For each wrapper function listed in it, we create an
144 # 'E' (for error) entry. If the function (possibly prefixed by '[Pp]erl')
145 # is in %sym, it is added to @already_in_sym. Otherwise, @new.
adfe19db
MHM
146 for my $l (@{$r->{stderr}}) {
147 if ($l =~ /_DPPP_test_(\w+)/) {
148 if (!$seen{$1}++) {
149 my @s = grep { exists $sym{$_} } $1, "Perl_$1", "perl_$1";
150 if (@s) {
c0a15b19 151 push @already_in_sym, [$1, "E (@s)"];
adfe19db
MHM
152 }
153 else {
154 push @new, [$1, "E"];
155 }
156 }
157 }
ba120f6f
MHM
158 }
159
160 if ($r->{status} == 0) {
161 my @u;
162 my @usym;
163
bc24b511
KW
164 # Here, apicheck.o was successfully created. It likely will need
165 # functions from outside it in order to form a complete executable a.out.
166 # In the first iteration, look to see if all needed externs are available.
167 # (We don't actually try to create an a.out)
ba120f6f
MHM
168 if ($trynm) {
169 @u = eval { find_undefined_symbols($fullperl, $opt{shlib}) };
170 warn "warning: $@" if $@;
171 $trynm = 0;
172 }
173
bc24b511
KW
174 # If it didn't find any undefined symbols, everything should be working.
175 # Run the test suite.
ba120f6f
MHM
176 unless (@u) {
177 $r = run(qw(make test));
f59197b8
KW
178 $r->{didnotrun} and die "couldn't run make test: $!\n" .
179 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f 180
bc24b511
KW
181 $r->{status} == 0 and last; # It worked!!
182
183 # Alas, something was wrong. Add any undefined symbols listed in the
184 # output to our list
ba120f6f
MHM
185 for my $l (@{$r->{stderr}}) {
186 if ($l =~ /undefined symbol: (\w+)/) {
187 push @u, $1;
188 }
189 }
190 }
191
bc24b511 192 # For each undefined symbol
ba120f6f 193 for my $u (@u) {
bc24b511
KW
194
195 # If this is an API symbol, $symmap->{$u} will exist and be a hash of
196 # keys, being all the symbols referred to within it (with their values
197 # btw being the count of occurrences in the element).
ba120f6f
MHM
198 for my $m (keys %{$symmap->{$u}}) {
199 if (!$seen{$m}++) {
200 my $pl = $m;
201 $pl =~ s/^[Pp]erl_//;
202 my @s = grep { exists $sym{$_} } $pl, "Perl_$pl", "perl_$pl";
bc24b511
KW
203
204 # The comment for this entry that goes into the file that gets
205 # written includes any [Pp]erl prefix.
ba120f6f
MHM
206 push @new, [$m, @s ? "U (@s)" : "U"];
207 }
adfe19db
MHM
208 }
209 }
210 }
ba120f6f 211
bc24b511 212 # Remove from @new all the current todo symbols
c0a15b19 213 @new = grep !$todo{$_->[0]}, @new;
ba120f6f 214
bc24b511
KW
215 # If none remain, start over with those we know about, minus the todo
216 # symbols. khw doesn't understand why this is necessary
adfe19db 217 unless (@new) {
c0a15b19 218 @new = grep !$todo{$_->[0]}, @already_in_sym;
adfe19db 219 }
ba120f6f 220
bc24b511
KW
221 # This retries once if nothing new was found (khw guesses that is just to
222 # be sure, or maybe it's because we ran nm the first time through)
adfe19db
MHM
223 unless (@new) {
224 if ($retry > 0) {
225 $retry--;
c0a15b19 226 regen_Makefile();
adfe19db
MHM
227 goto retry;
228 }
229 print Dumper($r);
230 die "no new TODO symbols found...";
231 }
ba120f6f 232
bc24b511 233 # recheck symbols except undefined ones reported by the dynamic linker
0c96388f 234 push @recheck, map { $_->[0] } grep { $_->[1] !~ /^U/ } @new;
ba120f6f 235
bc24b511
KW
236 # Display each newly found undefined symbol, and add it to the list of todo
237 # symbols
adfe19db 238 for (@new) {
c0a15b19
KW
239 display_sym('new', @$_);
240 $todo{$_->[0]} = $_->[1];
adfe19db 241 }
0c96388f 242
bc24b511
KW
243 # Write the revised todo, so that apicheck.c when generated in the next
244 # iteration will have these #ifdef'd out
c0a15b19 245 write_todo($opt{todo}, $opt{version}, \%todo);
bc24b511 246} # End of loop
0c96388f 247
bc24b511
KW
248# If we are to check our work, do so. This verifies that each symbol
249# identified above is really a problem in this version. (khw doesn't know
250# under what circumstances this becomes an issue)
251#
252# We go through each symbol on the @recheck list, and create an apicheck.c
253# with it enabled.
ba120f6f 254if ($opt{check}) {
bc24b511
KW
255
256 # Create something like '%3d'
ba120f6f 257 my $ifmt = '%' . length(scalar @recheck) . 'd';
bc24b511 258
ba120f6f 259 my $t0 = [gettimeofday];
b2049988 260
ba120f6f
MHM
261 RECHECK: for my $i (0 .. $#recheck) {
262 my $sym = $recheck[$i];
bc24b511
KW
263
264 # Assume it will work
c0a15b19 265 my $cur = delete $todo{$sym};
b2049988 266
bc24b511 267 # Give a progress report
c0a15b19 268 display_sym('chk', $sym, $cur, sprintf(" [$ifmt/$ifmt, ETA %s]",
ba120f6f 269 $i + 1, scalar @recheck, eta($t0, $i, scalar @recheck)));
b2049988 270
bc24b511
KW
271 # Write out the todo file without this symbol, meaning it will be enabled
272 # in the generated apicheck.c file
c0a15b19 273 write_todo($opt{todo}, $opt{version}, \%todo);
b2049988 274
bc24b511 275 # E is not an nm symbol, but was added by us to indicate 'Error'
ba120f6f 276 if ($cur eq "E (Perl_$sym)") {
bc24b511
KW
277
278 # We can try a shortcut here. Create an apicheck.c file for just this
279 # symbol.
ba120f6f 280 regen_apicheck($sym);
b2049988 281
ba120f6f 282 my $r = run(qw(make test));
b2049988 283
ba120f6f 284 if (!$r->{didnotrun} && $r->{status} == 0) {
bc24b511
KW
285
286 # Shortcut indicated that this function compiles..
c0a15b19 287 display_sym('del', $sym, $cur);
ba120f6f
MHM
288 next RECHECK;
289 }
bc24b511
KW
290
291 # Here, the api file with just this entry failed to compile. (khw
292 # doesn't know why we just don't give up on it now, but we don't.) We
293 # drop down below to generate and compile a full apicheck.c with this
294 # symbol enabled. (XXX Perhaps we could look at stderr and if it
295 # contained things about parameter mismatch, (which is a common
296 # occurrence), we could skip the steps below.)
ba120f6f 297 }
b2049988 298
bc24b511
KW
299 # Either can't shortcut, or the shortcut indicated that the function
300 # doesn't compile in isolation. Create, compile and test with this
301 # function/symbol enabled. (Remember that this should have succeeded
302 # above to get to here when this symbol was disabled, so enabling just
303 # this one will tell us for sure that it works or doesn't work. (khw
304 # wonders if this is actually a DAG, or perhaps with cycles, so this is
305 # under it all, insufficient.)
c0a15b19 306 regen_Makefile();
b2049988 307
0c96388f 308 my $r = run(qw(make test));
b2049988 309
9251f208
KW
310 # This regenerated apicheck.c
311 dump_apicheck() if $opt{debug};
312
f59197b8
KW
313 $r->{didnotrun} and die "couldn't run make test: $!\n" .
314 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
b2049988 315
bc24b511
KW
316 if ($r->{status} == 0) { # This symbol compiles and tests ok, so retain
317 # in this version
c0a15b19 318 display_sym('del', $sym, $cur);
ba120f6f 319 }
bc24b511 320 else { # Revert to this symbol is bad in this version
c0a15b19 321 $todo{$sym} = $cur;
0c96388f 322 }
adfe19db 323 }
bc24b511 324} # End of checking our work
adfe19db 325
c0a15b19 326write_todo($opt{todo}, $opt{version}, \%todo);
adfe19db 327
bc24b511 328# Clean up after ourselves
adfe19db
MHM
329run(qw(make realclean));
330
331exit 0;
332
c0a15b19 333sub display_sym
ba120f6f
MHM
334{
335 my($what, $sym, $reason, $extra) = @_;
336 $extra ||= '';
337 my %col = (
338 'new' => 'bold red',
339 'chk' => 'bold magenta',
340 'del' => 'bold green',
341 );
342 $what = colored("$what symbol", $col{$what});
343
344 printf "[%s] %s %-30s # %s%s\n",
345 $opt{version}, $what, $sym, $reason, $extra;
346}
347
c0a15b19 348sub regen_Makefile
adfe19db 349{
bc24b511 350 # We make sure to add rules for creating apicheck.c
ba120f6f 351 my @mf_arg = ('--with-apicheck', 'OPTIMIZE=-O0 -w');
bc24b511
KW
352
353 # It doesn't include ppport.h if generating the base files.
adfe19db
MHM
354 push @mf_arg, qw( DEFINE=-DDPPP_APICHECK_NO_PPPORT_H ) if $opt{base};
355
356 # just to be sure
357 run(qw(make realclean));
bc24b511 358
f59197b8
KW
359 my $r = run($fullperl, "Makefile.PL", @mf_arg);
360 unless ($r->{status} == 0) {
361 die "cannot run Makefile.PL: $!\n" .
362 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
363 }
adfe19db
MHM
364}
365
bc24b511 366sub regen_apicheck # Regeneration can also occur by calling 'make'
adfe19db
MHM
367{
368 unlink qw(apicheck.c apicheck.o);
0c96388f
MHM
369 runtool({ out => '/dev/null' }, $fullperl, 'apicheck_c.PL', map { "--api=$_" } @_)
370 or die "cannot regenerate apicheck.c\n";
9251f208
KW
371 dump_apicheck() if $opt{debug};
372}
373
374sub dump_apicheck
375{
376 my $apicheck = "apicheck.c";
377 my $f = new IO::File $apicheck or die "cannot open $apicheck: $!\n";
378 my @lines = <$f>;
379 print STDERR __FILE__, ": ", __LINE__, ": $apicheck (",
380 scalar @lines,
381 " lines) for $fullperl";
382 print STDERR " and '" if @_;
383 print STDERR join "', '", @_;
384 print STDERR "'" if @_;
385 print STDERR ":\n";
386 my $n = 1;
387 print STDERR $n++, " ", $_ for @lines;
adfe19db
MHM
388}
389
bc24b511
KW
390sub load_todo # Return entries from $file; skip if the first line
391 # isn't $expver (expected version)
adfe19db
MHM
392{
393 my($file, $expver) = @_;
394
395 if (-e $file) {
396 my $f = new IO::File $file or die "cannot open $file: $!\n";
397 my $ver = <$f>;
398 chomp $ver;
399 if ($ver eq $expver) {
400 my %sym;
401 while (<$f>) {
402 chomp;
403 /^(\w+)\s+#\s+(.*)/ or goto nuke_file;
404 exists $sym{$1} and goto nuke_file;
405 $sym{$1} = $2;
406 }
407 return \%sym;
408 }
409
410nuke_file:
411 undef $f;
412 unlink $file or die "cannot remove $file: $!\n";
413 }
414
415 return {};
416}
417
bc24b511
KW
418sub write_todo # Write out the todo file. The keys of %sym are known to not
419 # be in this version, hence are 'todo'
adfe19db
MHM
420{
421 my($file, $ver, $sym) = @_;
422 my $f;
423
424 $f = new IO::File ">$file" or die "cannot open $file: $!\n";
425 $f->print("$ver\n");
426
55179e46
KW
427 # Dictionary ordering, with only alphanumerics
428 for (sort dictionary_order keys %$sym) {
adfe19db
MHM
429 $f->print(sprintf "%-30s # %s\n", $_, $sym->{$_});
430 }
6fc0eb4b
KW
431
432 $f->close;
adfe19db
MHM
433}
434
ba120f6f
MHM
435sub find_undefined_symbols
436{
bc24b511
KW
437 # returns a list of undefined symbols in $shlib. To be considered
438 # undefined, it must also not be defined in $perl. Symbols that begin with
439 # underscore, or contain '@', or are some libc ones are not returned.
440 # Presumably, the list of libc could be expanded if necessary.
441
ba120f6f
MHM
442 my($perl, $shlib) = @_;
443
444 my $ps = read_sym(file => $perl, options => [qw( --defined-only )]);
445 my $ls = read_sym(file => $shlib, options => [qw( --undefined-only )]);
446
447 my @undefined;
448
449 for my $sym (keys %$ls) {
0d2a8114 450 next if $sym =~ /\@/ or $sym =~ /^_/ or exists $stdsym{$sym};
ba120f6f 451 unless (exists $ps->{$sym}) {
0d2a8114 452 push @undefined, $sym;
ba120f6f
MHM
453 }
454 }
455
456 return @undefined;
457}
458
459sub read_sym
460{
461 my %opt = ( options => [], @_ );
462
463 my $r = run($Config{nm}, @{$opt{options}}, $opt{file});
464
465 if ($r->{didnotrun} or $r->{status}) {
f59197b8
KW
466 die "cannot run $Config{nm}" .
467 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f
MHM
468 }
469
470 my %sym;
471
472 for (@{$r->{stdout}}) {
473 chomp;
474 my($adr, $fmt, $sym) = /^\s*([[:xdigit:]]+)?\s+([ABCDGINRSTUVW?-])\s+(\S+)\s*$/i
475 or die "cannot parse $Config{nm} output:\n[$_]\n";
476 $sym{$sym} = { format => $fmt };
477 $sym{$sym}{address} = $adr if defined $adr;
478 }
479
480 return \%sym;
481}
482
483sub get_apicheck_symbol_map
484{
49ef49fe
CBW
485 my $r;
486
487 while (1) {
f59197b8
KW
488
489 # Create apicheck.i
49ef49fe
CBW
490 $r = run(qw(make apicheck.i));
491
f59197b8 492 # Quit the loop if it succeeded
49ef49fe
CBW
493 last unless $r->{didnotrun} or $r->{status};
494
bc24b511
KW
495 # Get the list of macros that had parameter issues. These are marked as
496 # A, for absolute in nm terms
49ef49fe
CBW
497 my %sym = map { /error: macro "(\w+)" (?:requires|passed) \d+ argument/ ? ($1 => 'A') : () }
498 @{$r->{stderr}};
499
bc24b511 500 # Display these, and add them to the global %todo.
49ef49fe 501 if (keys %sym) {
55179e46 502 for my $s (sort dictionary_order keys %sym) {
c0a15b19
KW
503 display_sym('new', $s, $sym{$s});
504 $todo{$s} = $sym{$s};
49ef49fe 505 }
bc24b511
KW
506
507 # And rewrite the todo file, including these new symbols.
c0a15b19 508 write_todo($opt{todo}, $opt{version}, \%todo);
bc24b511
KW
509
510 # Regenerate apicheck.c for the next iteration
49ef49fe
CBW
511 regen_apicheck();
512 }
bc24b511 513 else { # It failed for some other reason than parameter issues: give up
49ef49fe
CBW
514 die "cannot run make apicheck.i ($r->{didnotrun} / $r->{status}):\n".
515 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
516 }
ba120f6f
MHM
517 }
518
bc24b511 519 # Here, have an apicheck.i. Read it in
ba120f6f
MHM
520 my $fh = IO::File->new('apicheck.i')
521 or die "cannot open apicheck.i: $!";
522
523 local $_;
524 my %symmap;
525 my $cur;
526
527 while (<$fh>) {
528 next if /^#/;
e49c33df 529
bc24b511
KW
530 # We only care about lines within one of our _DPPP_test_ functions. If
531 # we're in one, $cur is set to the name of the current one.
532 if (! defined $cur) { # Not within such a function; see if this starts
533 # one
e49c33df
KW
534 /_DPPP_test_(\w+)/ and $cur = $1;
535 }
536 else {
bc24b511
KW
537
538 # For anything that looks like a symbol, note it as a key, and as its
539 # value, the name of the function. Actually the value is another key,
540 # whose value is the count of this symbol's occurrences, so it looks
541 # like:
542 # 'UV' => {
543 # 'SvUVX' => 1,
544 # 'toFOLD_uvchr' => 2,
545 # 'sv_uni_display' => 1,
546 # ...
547 # }
ba120f6f
MHM
548 for my $sym (/\b([A-Za-z_]\w+)\b/g) {
549 $symmap{$sym}{$cur}++;
550 }
bc24b511
KW
551
552 # This line marks the end of this function, as constructed by us.
ba120f6f
MHM
553 undef $cur if /^}$/;
554 }
ba120f6f
MHM
555 }
556
557 return \%symmap;
558}