This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
devel/mktodo.pl: Stop using a known bad API element
[perl5.git] / dist / Devel-PPPort / devel / mktodo.pl
CommitLineData
adfe19db
MHM
1#!/usr/bin/perl -w
2################################################################################
3#
4# mktodo.pl -- generate baseline and todo files
5#
bc24b511
KW
6# It makes the todo file for the single passed in perl binary. If --base is
7# not specified it compiles with ppport.h.
adfe19db
MHM
8################################################################################
9#
b2049988 10# Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
adfe19db
MHM
11# Version 2.x, Copyright (C) 2001, Paul Marquess.
12# Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13#
14# This program is free software; you can redistribute it and/or
15# modify it under the same terms as Perl itself.
16#
17################################################################################
18
19use strict;
20use Getopt::Long;
21use Data::Dumper;
22use IO::File;
23use IO::Select;
ba120f6f 24use Config;
0c96388f 25use Time::HiRes qw( gettimeofday tv_interval );
adfe19db 26
3d7c117d 27require './devel/devtools.pl';
adfe19db 28
0c96388f 29our %opt = (
ac18778d 30 blead => 0, # ? Is this perl blead
bc24b511
KW
31 debug => 0, # Adding --verbose increases the detail
32 base => 0, # Don't use ppport.h when generating
0c96388f 33 verbose => 0,
ba120f6f 34 check => 1,
f59197b8 35 todo => "", # If no --todo, this is a blead perl
ba120f6f 36 shlib => 'blib/arch/auto/Devel/PPPort/PPPort.so',
0c96388f 37);
adfe19db
MHM
38
39GetOptions(\%opt, qw(
ac18778d 40perl=s todo=s blead version=s shlib=s debug base verbose check!
adfe19db
MHM
41 )) or die;
42
0c96388f
MHM
43identify();
44
5b5bb2e0
KW
45my $todo_file;
46my $todo_version;
47if ($opt{todo}) {
48 $todo_file = $opt{todo};
49 $todo_version = $opt{version};
50}
51
0c96388f
MHM
52print "\n", ident_str(), "\n\n";
53
adfe19db
MHM
54my $fullperl = `which $opt{perl}`;
55chomp $fullperl;
56
0c96388f
MHM
57$ENV{SKIP_SLOW_TESTS} = 1;
58
bc24b511 59# Generate the Makefile using the passed in perl
c0a15b19 60regen_Makefile();
adfe19db 61
bc24b511 62# List of functions that are never considered undefined. Add to as necessary
b2049988
MHM
63my %stdsym = map { ($_ => 1) } qw (
64 strlen
65 snprintf
66 strcmp
67 memcpy
68 strncmp
69 memmove
70 memcmp
71 tolower
72 exit
73 memset
74 vsnprintf
75 siglongjmp
76 sprintf
77);
78
bc24b511
KW
79# Initialize %sym so that the keys are all the Text symbols for this perl,
80# output from the system's 'nm'
adfe19db 81my %sym;
ba120f6f 82for (`$Config{nm} $fullperl`) {
adfe19db
MHM
83 chomp;
84 /\s+T\s+(\w+)\s*$/ and $sym{$1}++;
85}
86keys %sym >= 50 or die "less than 50 symbols found in $fullperl\n";
87
bc24b511
KW
88# %todo is initialized to be the symbols in the current todo file, like so:
89# {
90# 'UTF8_SAFE_SKIP' => 'U',
91# 'newSVsv_flags' => 'U',
92# 'newSVsv_nomg' => 'U',
93# }
94#
95# The values are the outputs from nm, plus 'E' from us, for Error
5b5bb2e0 96my %todo = %{load_todo($todo_file, $todo_version)} if $opt{todo};
bc24b511 97
adfe19db
MHM
98my @recheck;
99
bc24b511
KW
100# Get an exhaustive list from apicheck.i of symbols, what functions contain
101# them, and how many in each function.
102# symbol fcn count
103# ------ --- -----
104# 'UV' => {
105# 'SvUVX' => 1,
106# 'toFOLD_uvchr' => 2,
107# 'sv_uni_display' => 1,
108# ...
109# }
ba120f6f
MHM
110my $symmap = get_apicheck_symbol_map();
111
bc24b511
KW
112# In each iteration of the loop we create an apicheck.c. This will contain a
113# generated wrapper function for each API function and macro. The wrapper
114# contains one or more calls to its API element. Then we attempt to compile
115# apicheck.c into apicheck.o. If it compiles, then every API element exists
116# in this version of perl. If not, we figure out which ones were undefined,
117# and set things up so that in the next iteration of the loop, the wrappers
118# for those elements are #ifdef'd out.
adfe19db
MHM
119for (;;) {
120 my $retry = 1;
ba120f6f 121 my $trynm = 1;
bc24b511 122
adfe19db 123 regen_apicheck();
ba120f6f 124
adfe19db 125retry:
c0a15b19 126 my(@new, @already_in_sym, %seen);
ba120f6f
MHM
127
128 my $r = run(qw(make));
f59197b8
KW
129 $r->{didnotrun} and die "couldn't run make: $!\n" .
130 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f 131
ac18778d
KW
132 # If there were warnings, we ask the user before continuing when creating
133 # the base files of blead. This leads to a potential early exit when things
134 # aren't working right.
135 if ($opt{blead} && $opt{base}) {
136 undef $opt{blead}; # Only warn once.
137 if (@{$r->{stderr}}) {
138 print STDERR "Warnings and errors from compiling blead:\n";
139 print STDERR @{$r->{stderr}};
140 ask_or_quit("\nUnexpected warnings when compiling blead can lead to"
141 . " wrong results. Please examine the above list.\n"
142 . "Shall I proceed?");
143 }
144 else {
145 print STDERR "blead compiled without warnings nor errors.\n"
146 . "Proceeding with everything else\n";
147 }
148 }
149
bc24b511
KW
150 # Examine stderr. For each wrapper function listed in it, we create an
151 # 'E' (for error) entry. If the function (possibly prefixed by '[Pp]erl')
152 # is in %sym, it is added to @already_in_sym. Otherwise, @new.
adfe19db
MHM
153 for my $l (@{$r->{stderr}}) {
154 if ($l =~ /_DPPP_test_(\w+)/) {
155 if (!$seen{$1}++) {
156 my @s = grep { exists $sym{$_} } $1, "Perl_$1", "perl_$1";
157 if (@s) {
c0a15b19 158 push @already_in_sym, [$1, "E (@s)"];
adfe19db
MHM
159 }
160 else {
161 push @new, [$1, "E"];
162 }
163 }
164 }
ba120f6f
MHM
165 }
166
167 if ($r->{status} == 0) {
168 my @u;
169 my @usym;
170
bc24b511
KW
171 # Here, apicheck.o was successfully created. It likely will need
172 # functions from outside it in order to form a complete executable a.out.
173 # In the first iteration, look to see if all needed externs are available.
174 # (We don't actually try to create an a.out)
ba120f6f
MHM
175 if ($trynm) {
176 @u = eval { find_undefined_symbols($fullperl, $opt{shlib}) };
177 warn "warning: $@" if $@;
178 $trynm = 0;
179 }
180
bc24b511
KW
181 # If it didn't find any undefined symbols, everything should be working.
182 # Run the test suite.
ba120f6f
MHM
183 unless (@u) {
184 $r = run(qw(make test));
f59197b8
KW
185 $r->{didnotrun} and die "couldn't run make test: $!\n" .
186 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f 187
bc24b511
KW
188 $r->{status} == 0 and last; # It worked!!
189
190 # Alas, something was wrong. Add any undefined symbols listed in the
191 # output to our list
ba120f6f
MHM
192 for my $l (@{$r->{stderr}}) {
193 if ($l =~ /undefined symbol: (\w+)/) {
194 push @u, $1;
195 }
196 }
197 }
198
bc24b511 199 # For each undefined symbol
ba120f6f 200 for my $u (@u) {
bc24b511
KW
201
202 # If this is an API symbol, $symmap->{$u} will exist and be a hash of
203 # keys, being all the symbols referred to within it (with their values
204 # btw being the count of occurrences in the element).
ba120f6f
MHM
205 for my $m (keys %{$symmap->{$u}}) {
206 if (!$seen{$m}++) {
207 my $pl = $m;
208 $pl =~ s/^[Pp]erl_//;
209 my @s = grep { exists $sym{$_} } $pl, "Perl_$pl", "perl_$pl";
bc24b511
KW
210
211 # The comment for this entry that goes into the file that gets
212 # written includes any [Pp]erl prefix.
ba120f6f
MHM
213 push @new, [$m, @s ? "U (@s)" : "U"];
214 }
adfe19db
MHM
215 }
216 }
217 }
ba120f6f 218
bc24b511 219 # Remove from @new all the current todo symbols
c0a15b19 220 @new = grep !$todo{$_->[0]}, @new;
ba120f6f 221
bc24b511
KW
222 # If none remain, start over with those we know about, minus the todo
223 # symbols. khw doesn't understand why this is necessary
adfe19db 224 unless (@new) {
c0a15b19 225 @new = grep !$todo{$_->[0]}, @already_in_sym;
adfe19db 226 }
ba120f6f 227
bc24b511
KW
228 # This retries once if nothing new was found (khw guesses that is just to
229 # be sure, or maybe it's because we ran nm the first time through)
adfe19db
MHM
230 unless (@new) {
231 if ($retry > 0) {
232 $retry--;
c0a15b19 233 regen_Makefile();
adfe19db
MHM
234 goto retry;
235 }
236 print Dumper($r);
237 die "no new TODO symbols found...";
238 }
ba120f6f 239
bc24b511 240 # recheck symbols except undefined ones reported by the dynamic linker
0c96388f 241 push @recheck, map { $_->[0] } grep { $_->[1] !~ /^U/ } @new;
ba120f6f 242
bc24b511
KW
243 # Display each newly found undefined symbol, and add it to the list of todo
244 # symbols
adfe19db 245 for (@new) {
c0a15b19
KW
246 display_sym('new', @$_);
247 $todo{$_->[0]} = $_->[1];
adfe19db 248 }
0c96388f 249
bc24b511
KW
250 # Write the revised todo, so that apicheck.c when generated in the next
251 # iteration will have these #ifdef'd out
5b5bb2e0 252 write_todo($todo_file, $todo_version, \%todo);
bc24b511 253} # End of loop
0c96388f 254
bc24b511
KW
255# If we are to check our work, do so. This verifies that each symbol
256# identified above is really a problem in this version. (khw doesn't know
257# under what circumstances this becomes an issue)
258#
259# We go through each symbol on the @recheck list, and create an apicheck.c
260# with it enabled.
ba120f6f 261if ($opt{check}) {
bc24b511
KW
262
263 # Create something like '%3d'
ba120f6f 264 my $ifmt = '%' . length(scalar @recheck) . 'd';
bc24b511 265
ba120f6f 266 my $t0 = [gettimeofday];
b2049988 267
ba120f6f
MHM
268 RECHECK: for my $i (0 .. $#recheck) {
269 my $sym = $recheck[$i];
bc24b511
KW
270
271 # Assume it will work
c0a15b19 272 my $cur = delete $todo{$sym};
b2049988 273
bc24b511 274 # Give a progress report
c0a15b19 275 display_sym('chk', $sym, $cur, sprintf(" [$ifmt/$ifmt, ETA %s]",
ba120f6f 276 $i + 1, scalar @recheck, eta($t0, $i, scalar @recheck)));
b2049988 277
bc24b511
KW
278 # Write out the todo file without this symbol, meaning it will be enabled
279 # in the generated apicheck.c file
5b5bb2e0 280 write_todo($todo_file, $todo_version, \%todo);
b2049988 281
bc24b511 282 # E is not an nm symbol, but was added by us to indicate 'Error'
ba120f6f 283 if ($cur eq "E (Perl_$sym)") {
bc24b511
KW
284
285 # We can try a shortcut here. Create an apicheck.c file for just this
286 # symbol.
ba120f6f 287 regen_apicheck($sym);
b2049988 288
ba120f6f 289 my $r = run(qw(make test));
b2049988 290
ba120f6f 291 if (!$r->{didnotrun} && $r->{status} == 0) {
bc24b511
KW
292
293 # Shortcut indicated that this function compiles..
c0a15b19 294 display_sym('del', $sym, $cur);
ba120f6f
MHM
295 next RECHECK;
296 }
bc24b511
KW
297
298 # Here, the api file with just this entry failed to compile. (khw
299 # doesn't know why we just don't give up on it now, but we don't.) We
300 # drop down below to generate and compile a full apicheck.c with this
301 # symbol enabled. (XXX Perhaps we could look at stderr and if it
302 # contained things about parameter mismatch, (which is a common
303 # occurrence), we could skip the steps below.)
ba120f6f 304 }
b2049988 305
bc24b511
KW
306 # Either can't shortcut, or the shortcut indicated that the function
307 # doesn't compile in isolation. Create, compile and test with this
308 # function/symbol enabled. (Remember that this should have succeeded
309 # above to get to here when this symbol was disabled, so enabling just
310 # this one will tell us for sure that it works or doesn't work. (khw
311 # wonders if this is actually a DAG, or perhaps with cycles, so this is
312 # under it all, insufficient.)
c0a15b19 313 regen_Makefile();
b2049988 314
0c96388f 315 my $r = run(qw(make test));
b2049988 316
9251f208
KW
317 # This regenerated apicheck.c
318 dump_apicheck() if $opt{debug};
319
f59197b8
KW
320 $r->{didnotrun} and die "couldn't run make test: $!\n" .
321 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
b2049988 322
bc24b511
KW
323 if ($r->{status} == 0) { # This symbol compiles and tests ok, so retain
324 # in this version
c0a15b19 325 display_sym('del', $sym, $cur);
ba120f6f 326 }
bc24b511 327 else { # Revert to this symbol is bad in this version
c0a15b19 328 $todo{$sym} = $cur;
d33cd6dc 329 write_todo($todo_file, $todo_version, \%todo);
0c96388f 330 }
adfe19db 331 }
bc24b511 332} # End of checking our work
adfe19db 333
5b5bb2e0 334write_todo($todo_file, $todo_version, \%todo);
adfe19db 335
bc24b511 336# Clean up after ourselves
adfe19db
MHM
337run(qw(make realclean));
338
339exit 0;
340
c0a15b19 341sub display_sym
ba120f6f
MHM
342{
343 my($what, $sym, $reason, $extra) = @_;
344 $extra ||= '';
345 my %col = (
346 'new' => 'bold red',
347 'chk' => 'bold magenta',
348 'del' => 'bold green',
349 );
350 $what = colored("$what symbol", $col{$what});
351
352 printf "[%s] %s %-30s # %s%s\n",
5b5bb2e0 353 $todo_version, $what, $sym, $reason, $extra;
ba120f6f
MHM
354}
355
c0a15b19 356sub regen_Makefile
adfe19db 357{
bc24b511 358 # We make sure to add rules for creating apicheck.c
ba120f6f 359 my @mf_arg = ('--with-apicheck', 'OPTIMIZE=-O0 -w');
bc24b511
KW
360
361 # It doesn't include ppport.h if generating the base files.
adfe19db
MHM
362 push @mf_arg, qw( DEFINE=-DDPPP_APICHECK_NO_PPPORT_H ) if $opt{base};
363
364 # just to be sure
365 run(qw(make realclean));
bc24b511 366
f59197b8
KW
367 my $r = run($fullperl, "Makefile.PL", @mf_arg);
368 unless ($r->{status} == 0) {
369 die "cannot run Makefile.PL: $!\n" .
370 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
371 }
adfe19db
MHM
372}
373
bc24b511 374sub regen_apicheck # Regeneration can also occur by calling 'make'
adfe19db
MHM
375{
376 unlink qw(apicheck.c apicheck.o);
0c96388f
MHM
377 runtool({ out => '/dev/null' }, $fullperl, 'apicheck_c.PL', map { "--api=$_" } @_)
378 or die "cannot regenerate apicheck.c\n";
9251f208
KW
379 dump_apicheck() if $opt{debug};
380}
381
382sub dump_apicheck
383{
384 my $apicheck = "apicheck.c";
385 my $f = new IO::File $apicheck or die "cannot open $apicheck: $!\n";
386 my @lines = <$f>;
387 print STDERR __FILE__, ": ", __LINE__, ": $apicheck (",
388 scalar @lines,
389 " lines) for $fullperl";
390 print STDERR " and '" if @_;
391 print STDERR join "', '", @_;
392 print STDERR "'" if @_;
393 print STDERR ":\n";
394 my $n = 1;
395 print STDERR $n++, " ", $_ for @lines;
adfe19db
MHM
396}
397
bc24b511
KW
398sub load_todo # Return entries from $file; skip if the first line
399 # isn't $expver (expected version)
adfe19db
MHM
400{
401 my($file, $expver) = @_;
402
403 if (-e $file) {
404 my $f = new IO::File $file or die "cannot open $file: $!\n";
405 my $ver = <$f>;
406 chomp $ver;
407 if ($ver eq $expver) {
408 my %sym;
409 while (<$f>) {
410 chomp;
411 /^(\w+)\s+#\s+(.*)/ or goto nuke_file;
412 exists $sym{$1} and goto nuke_file;
413 $sym{$1} = $2;
414 }
415 return \%sym;
416 }
417
418nuke_file:
419 undef $f;
420 unlink $file or die "cannot remove $file: $!\n";
421 }
422
423 return {};
424}
425
bc24b511
KW
426sub write_todo # Write out the todo file. The keys of %sym are known to not
427 # be in this version, hence are 'todo'
adfe19db
MHM
428{
429 my($file, $ver, $sym) = @_;
430 my $f;
431
432 $f = new IO::File ">$file" or die "cannot open $file: $!\n";
433 $f->print("$ver\n");
434
55179e46
KW
435 # Dictionary ordering, with only alphanumerics
436 for (sort dictionary_order keys %$sym) {
adfe19db
MHM
437 $f->print(sprintf "%-30s # %s\n", $_, $sym->{$_});
438 }
6fc0eb4b
KW
439
440 $f->close;
adfe19db
MHM
441}
442
ba120f6f
MHM
443sub find_undefined_symbols
444{
bc24b511
KW
445 # returns a list of undefined symbols in $shlib. To be considered
446 # undefined, it must also not be defined in $perl. Symbols that begin with
447 # underscore, or contain '@', or are some libc ones are not returned.
448 # Presumably, the list of libc could be expanded if necessary.
449
ba120f6f
MHM
450 my($perl, $shlib) = @_;
451
452 my $ps = read_sym(file => $perl, options => [qw( --defined-only )]);
453 my $ls = read_sym(file => $shlib, options => [qw( --undefined-only )]);
454
455 my @undefined;
456
457 for my $sym (keys %$ls) {
0d2a8114 458 next if $sym =~ /\@/ or $sym =~ /^_/ or exists $stdsym{$sym};
ba120f6f 459 unless (exists $ps->{$sym}) {
0d2a8114 460 push @undefined, $sym;
ba120f6f
MHM
461 }
462 }
463
464 return @undefined;
465}
466
467sub read_sym
468{
469 my %opt = ( options => [], @_ );
470
471 my $r = run($Config{nm}, @{$opt{options}}, $opt{file});
472
473 if ($r->{didnotrun} or $r->{status}) {
f59197b8
KW
474 die "cannot run $Config{nm}" .
475 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
ba120f6f
MHM
476 }
477
478 my %sym;
479
480 for (@{$r->{stdout}}) {
481 chomp;
482 my($adr, $fmt, $sym) = /^\s*([[:xdigit:]]+)?\s+([ABCDGINRSTUVW?-])\s+(\S+)\s*$/i
483 or die "cannot parse $Config{nm} output:\n[$_]\n";
484 $sym{$sym} = { format => $fmt };
485 $sym{$sym}{address} = $adr if defined $adr;
486 }
487
488 return \%sym;
489}
490
491sub get_apicheck_symbol_map
492{
49ef49fe
CBW
493 my $r;
494
495 while (1) {
f59197b8
KW
496
497 # Create apicheck.i
49ef49fe
CBW
498 $r = run(qw(make apicheck.i));
499
f59197b8 500 # Quit the loop if it succeeded
49ef49fe
CBW
501 last unless $r->{didnotrun} or $r->{status};
502
bc24b511
KW
503 # Get the list of macros that had parameter issues. These are marked as
504 # A, for absolute in nm terms
49ef49fe
CBW
505 my %sym = map { /error: macro "(\w+)" (?:requires|passed) \d+ argument/ ? ($1 => 'A') : () }
506 @{$r->{stderr}};
507
bc24b511 508 # Display these, and add them to the global %todo.
49ef49fe 509 if (keys %sym) {
55179e46 510 for my $s (sort dictionary_order keys %sym) {
c0a15b19
KW
511 display_sym('new', $s, $sym{$s});
512 $todo{$s} = $sym{$s};
49ef49fe 513 }
bc24b511
KW
514
515 # And rewrite the todo file, including these new symbols.
5b5bb2e0 516 write_todo($todo_file, $todo_version, \%todo);
bc24b511
KW
517
518 # Regenerate apicheck.c for the next iteration
49ef49fe
CBW
519 regen_apicheck();
520 }
bc24b511 521 else { # It failed for some other reason than parameter issues: give up
49ef49fe
CBW
522 die "cannot run make apicheck.i ($r->{didnotrun} / $r->{status}):\n".
523 join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
524 }
ba120f6f
MHM
525 }
526
bc24b511 527 # Here, have an apicheck.i. Read it in
ba120f6f
MHM
528 my $fh = IO::File->new('apicheck.i')
529 or die "cannot open apicheck.i: $!";
530
531 local $_;
532 my %symmap;
533 my $cur;
534
535 while (<$fh>) {
536 next if /^#/;
e49c33df 537
bc24b511
KW
538 # We only care about lines within one of our _DPPP_test_ functions. If
539 # we're in one, $cur is set to the name of the current one.
540 if (! defined $cur) { # Not within such a function; see if this starts
541 # one
e49c33df
KW
542 /_DPPP_test_(\w+)/ and $cur = $1;
543 }
544 else {
bc24b511
KW
545
546 # For anything that looks like a symbol, note it as a key, and as its
547 # value, the name of the function. Actually the value is another key,
548 # whose value is the count of this symbol's occurrences, so it looks
549 # like:
550 # 'UV' => {
551 # 'SvUVX' => 1,
552 # 'toFOLD_uvchr' => 2,
553 # 'sv_uni_display' => 1,
554 # ...
555 # }
ba120f6f
MHM
556 for my $sym (/\b([A-Za-z_]\w+)\b/g) {
557 $symmap{$sym}{$cur}++;
558 }
bc24b511
KW
559
560 # This line marks the end of this function, as constructed by us.
ba120f6f
MHM
561 undef $cur if /^}$/;
562 }
ba120f6f
MHM
563 }
564
565 return \%symmap;
566}