This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta: Revise text on Unicode bug
[perl5.git] / regen / opcode.pl
CommitLineData
d6480c9d 1#!/usr/bin/perl -w
6294c161
DM
2#
3# Regenerate (overwriting only if changed):
4#
5# opcode.h
6# opnames.h
897d3989 7# pp_proto.h
6294c161 8#
f8a58b02 9# from information stored in regen/opcodes, plus the
6294c161
DM
10# values hardcoded into this script in @raw_alias.
11#
12# Accepts the standard regen_lib -q and -v args.
13#
14# This script is normally invoked from regen.pl.
15
d6480c9d
NC
16use strict;
17
36bb303b
NC
18BEGIN {
19 # Get function prototypes
af001346 20 require 'regen/regen_lib.pl';
36bb303b 21}
79072805 22
f038801a
NC
23my $oc = safer_open('opcode.h-new', 'opcode.h');
24my $on = safer_open('opnames.h-new', 'opnames.h');
79072805
LW
25
26# Read data.
27
d6480c9d 28my %seen;
e71197e2 29my (@ops, %desc, %check, %ckname, %flags, %args, %opnum);
d6480c9d 30
f8a58b02
NC
31open OPS, 'regen/opcodes' or die $!;
32
33while (<OPS>) {
79072805
LW
34 chop;
35 next unless $_;
36 next if /^#/;
d6480c9d
NC
37 my ($key, $desc, $check, $flags, $args) = split(/\t+/, $_, 5);
38 $args = '' unless defined $args;
c07a80fd 39
6342d5c5
FC
40 warn qq[Description "$desc" duplicates $seen{$desc}\n]
41 if $seen{$desc} and $key ne "transr";
c07a80fd 42 die qq[Opcode "$key" duplicates $seen{$key}\n] if $seen{$key};
43 $seen{$desc} = qq[description of opcode "$key"];
44 $seen{$key} = qq[opcode "$key"];
45
79072805 46 push(@ops, $key);
e71197e2 47 $opnum{$key} = $#ops;
c07a80fd 48 $desc{$key} = $desc;
79072805
LW
49 $check{$key} = $check;
50 $ckname{$check}++;
51 $flags{$key} = $flags;
52 $args{$key} = $args;
53}
54
1d5774de
NC
55# Set up aliases
56
57my %alias;
58
59# Format is "this function" => "does these op names"
60my @raw_alias = (
6faeeb49 61 Perl_do_kv => [qw( keys values )],
d83386fa 62 Perl_unimplemented_op => [qw(padany mapstart custom)],
0b612f93
NC
63 # All the ops with a body of { return NORMAL; }
64 Perl_pp_null => [qw(scalar regcmaybe lineseq scope)],
65
66 Perl_pp_goto => ['dump'],
67 Perl_pp_require => ['dofile'],
68 Perl_pp_untie => ['dbmclose'],
7627e6d0 69 Perl_pp_sysread => {read => '', recv => '#ifdef HAS_SOCKET'},
0b612f93
NC
70 Perl_pp_sysseek => ['seek'],
71 Perl_pp_ioctl => ['fcntl'],
7627e6d0
NC
72 Perl_pp_ssockopt => {gsockopt => '#ifdef HAS_SOCKET'},
73 Perl_pp_getpeername => {getsockname => '#ifdef HAS_SOCKET'},
0b612f93 74 Perl_pp_stat => ['lstat'],
f1cb2d48 75 Perl_pp_ftrowned => [qw(fteowned ftzero ftsock ftchr ftblk
17ad201a
NC
76 ftfile ftdir ftpipe ftsuid ftsgid
77 ftsvtx)],
0b612f93
NC
78 Perl_pp_fttext => ['ftbinary'],
79 Perl_pp_gmtime => ['localtime'],
80 Perl_pp_semget => [qw(shmget msgget)],
81 Perl_pp_semctl => [qw(shmctl msgctl)],
0b612f93
NC
82 Perl_pp_ghostent => [qw(ghbyname ghbyaddr)],
83 Perl_pp_gnetent => [qw(gnbyname gnbyaddr)],
84 Perl_pp_gprotoent => [qw(gpbyname gpbynumber)],
85 Perl_pp_gservent => [qw(gsbyname gsbyport)],
86 Perl_pp_gpwent => [qw(gpwnam gpwuid)],
87 Perl_pp_ggrent => [qw(ggrnam ggrgid)],
957b0e1d 88 Perl_pp_ftis => [qw(ftsize ftmtime ftatime ftctime)],
605b9385 89 Perl_pp_chown => [qw(unlink chmod utime kill)],
ce6987d0 90 Perl_pp_link => ['symlink'],
af9e49b4
NC
91 Perl_pp_ftrread => [qw(ftrwrite ftrexec fteread ftewrite
92 fteexec)],
ca563b4e 93 Perl_pp_shmwrite => [qw(shmread msgsnd msgrcv semop)],
7627e6d0 94 Perl_pp_syswrite => {send => '#ifdef HAS_SOCKET'},
c960fc3b 95 Perl_pp_defined => [qw(dor dorassign)],
62726f23
SP
96 Perl_pp_and => ['andassign'],
97 Perl_pp_or => ['orassign'],
12e9c124 98 Perl_pp_ucfirst => ['lcfirst'],
afd9910b 99 Perl_pp_sle => [qw(slt sgt sge)],
0d863452 100 Perl_pp_print => ['say'],
2723d216 101 Perl_pp_index => ['rindex'],
daa2adfd 102 Perl_pp_oct => ['hex'],
789b4bc9 103 Perl_pp_shift => ['pop'],
71302fe3 104 Perl_pp_sin => [qw(cos exp log sqrt)],
3658c1f1 105 Perl_pp_bit_or => ['bit_xor'],
17ab7946 106 Perl_pp_rv2av => ['rv2hv'],
878d132a 107 Perl_pp_akeys => ['avalues'],
cba5a3b0 108 Perl_pp_rkeys => [qw(rvalues reach)],
7627e6d0
NC
109 Perl_pp_trans => [qw(trans transr)],
110 Perl_pp_chop => [qw(chop chomp)],
111 Perl_pp_schop => [qw(schop schomp)],
112 Perl_pp_bind => {connect => '#ifdef HAS_SOCKET'},
9561d06f
NC
113 Perl_pp_preinc => ['i_preinc'],
114 Perl_pp_predec => ['i_predec'],
115 Perl_pp_postinc => ['i_postinc'],
116 Perl_pp_postdec => ['i_postdec'],
720d5dbf
NC
117 Perl_pp_ehostent => [qw(enetent eprotoent eservent
118 spwent epwent sgrent egrent)],
396166e1 119 Perl_pp_shostent => [qw(snetent sprotoent sservent)],
605b9385 120 );
1d5774de
NC
121
122while (my ($func, $names) = splice @raw_alias, 0, 2) {
7627e6d0
NC
123 if (ref $names eq 'ARRAY') {
124 foreach (@$names) {
125 $alias{$_} = [$func, ''];
126 }
127 } else {
128 while (my ($opname, $cond) = each %$names) {
129 $alias{$opname} = [$func, $cond];
130 }
916e4025 131 }
1d5774de
NC
132}
133
7627e6d0
NC
134foreach my $sock_func (qw(socket bind listen accept shutdown
135 ssockopt getpeername)) {
136 $alias{$sock_func} = ["Perl_pp_$sock_func", '#ifdef HAS_SOCKET'],
137}
138
79072805
LW
139# Emit defines.
140
2d6469fe
NC
141print $oc read_only_top(lang => 'C', by => 'regen/opcode.pl', from => 'its data',
142 file => 'opcode.h', style => '*',
143 copyright => [1993 .. 2007]),
78102347 144 "#ifndef PERL_GLOBAL_STRUCT_INIT\n\n";
9561d06f 145
7627e6d0
NC
146{
147 my $last_cond = '';
148 my @unimplemented;
149
150 sub unimplemented {
151 if (@unimplemented) {
2d6469fe 152 print $oc "#else\n";
7627e6d0 153 foreach (@unimplemented) {
2d6469fe 154 print $oc "#define $_ Perl_unimplemented_op\n";
7627e6d0 155 }
2d6469fe 156 print $oc "#endif\n";
7627e6d0
NC
157 @unimplemented = ();
158 }
159
160 }
161
162 for (@ops) {
163 my ($impl, $cond) = @{$alias{$_} || ["Perl_pp_$_", '']};
164 my $op_func = "Perl_pp_$_";
165
166 if ($cond ne $last_cond) {
167 # A change in condition. (including to or from no condition)
168 unimplemented();
169 $last_cond = $cond;
170 if ($last_cond) {
2d6469fe 171 print $oc "$last_cond\n";
7627e6d0
NC
172 }
173 }
174 push @unimplemented, $op_func if $last_cond;
2d6469fe 175 print $oc "#define $op_func $impl\n" if $impl ne $op_func;
7627e6d0
NC
176 }
177 # If the last op was conditional, we need to close it out:
178 unimplemented();
9561d06f
NC
179}
180
78102347
NC
181print $on read_only_top(lang => 'C', by => 'regen/opcode.pl',
182 from => 'its data', style => '*',
183 file => 'opnames.h', copyright => [1999 .. 2008]),
184 "typedef enum opcode {\n";
abdd5c84 185
d6480c9d 186my $i = 0;
79072805 187for (@ops) {
2d6469fe 188 print $on "\t", tab(3,"OP_\U$_"), " = ", $i++, ",\n";
79072805 189}
2d6469fe 190print $on "\t", tab(3,"OP_max"), "\n";
424a4936
NC
191print $on "} opcode;\n";
192print $on "\n#define MAXO ", scalar @ops, "\n";
79072805 193
c07a80fd 194# Emit op names and descriptions.
79072805 195
2d6469fe 196print $oc <<'END';
73c4f7a1
GS
197START_EXTERN_C
198
79072805 199#ifndef DOINIT
27da23d5 200EXTCONST char* const PL_op_name[];
79072805 201#else
27da23d5 202EXTCONST char* const PL_op_name[] = {
79072805
LW
203END
204
205for (@ops) {
2d6469fe 206 print $oc qq(\t"$_",\n);
c07a80fd 207}
208
2d6469fe 209print $oc <<'END';
c07a80fd 210};
211#endif
212
c07a80fd 213#ifndef DOINIT
27da23d5 214EXTCONST char* const PL_op_desc[];
c07a80fd 215#else
27da23d5 216EXTCONST char* const PL_op_desc[] = {
c07a80fd 217END
218
219for (@ops) {
42d38218
MS
220 my($safe_desc) = $desc{$_};
221
a567e93b 222 # Have to escape double quotes and escape characters.
b0c6325e 223 $safe_desc =~ s/([\\"])/\\$1/g;
42d38218 224
2d6469fe 225 print $oc qq(\t"$safe_desc",\n);
79072805
LW
226}
227
2d6469fe 228print $oc <<'END';
79072805
LW
229};
230#endif
231
73c4f7a1
GS
232END_EXTERN_C
233
27da23d5 234#endif /* !PERL_GLOBAL_STRUCT_INIT */
22c35a8c 235END
79072805 236
79072805
LW
237# Emit ppcode switch array.
238
2d6469fe 239print $oc <<'END';
79072805 240
73c4f7a1
GS
241START_EXTERN_C
242
27da23d5 243#ifdef PERL_GLOBAL_STRUCT_INIT
97aff369 244# define PERL_PPADDR_INITED
27da23d5 245static const Perl_ppaddr_t Gppaddr[]
79072805 246#else
27da23d5 247# ifndef PERL_GLOBAL_STRUCT
97aff369 248# define PERL_PPADDR_INITED
27da23d5
JH
249EXT Perl_ppaddr_t PL_ppaddr[] /* or perlvars.h */
250# endif
251#endif /* PERL_GLOBAL_STRUCT */
252#if (defined(DOINIT) && !defined(PERL_GLOBAL_STRUCT)) || defined(PERL_GLOBAL_STRUCT_INIT)
97aff369 253# define PERL_PPADDR_INITED
27da23d5 254= {
79072805
LW
255END
256
257for (@ops) {
7627e6d0
NC
258 my $op_func = "Perl_pp_$_";
259 my $name = $alias{$_};
260 if ($name && $name->[0] ne $op_func) {
2d6469fe 261 print $oc "\t$op_func,\t/* implemented by $name->[0] */\n";
6faeeb49
MB
262 }
263 else {
2d6469fe 264 print $oc "\t$op_func,\n";
6faeeb49 265 }
79072805
LW
266}
267
2d6469fe 268print $oc <<'END';
27da23d5 269}
79072805 270#endif
97aff369 271#ifdef PERL_PPADDR_INITED
27da23d5 272;
97aff369 273#endif
79072805 274
27da23d5 275#ifdef PERL_GLOBAL_STRUCT_INIT
97aff369 276# define PERL_CHECK_INITED
27da23d5 277static const Perl_check_t Gcheck[]
79072805 278#else
27da23d5 279# ifndef PERL_GLOBAL_STRUCT
97aff369 280# define PERL_CHECK_INITED
27da23d5
JH
281EXT Perl_check_t PL_check[] /* or perlvars.h */
282# endif
283#endif
284#if (defined(DOINIT) && !defined(PERL_GLOBAL_STRUCT)) || defined(PERL_GLOBAL_STRUCT_INIT)
97aff369 285# define PERL_CHECK_INITED
27da23d5 286= {
79072805
LW
287END
288
289for (@ops) {
2d6469fe 290 print $oc "\t", tab(3, "Perl_$check{$_},"), "\t/* $_ */\n";
79072805
LW
291}
292
2d6469fe 293print $oc <<'END';
27da23d5 294}
79072805 295#endif
97aff369 296#ifdef PERL_CHECK_INITED
27da23d5 297;
97aff369 298#endif /* #ifdef PERL_CHECK_INITED */
79072805 299
27da23d5
JH
300#ifndef PERL_GLOBAL_STRUCT_INIT
301
79072805 302#ifndef DOINIT
1ccb7c8d 303EXTCONST U32 PL_opargs[];
79072805 304#else
1ccb7c8d 305EXTCONST U32 PL_opargs[] = {
79072805
LW
306END
307
2d6469fe
NC
308# Emit allowed argument types.
309
310my $ARGBITS = 32;
311
d6480c9d
NC
312my %argnum = (
313 'S', 1, # scalar
314 'L', 2, # list
315 'A', 3, # array value
316 'H', 4, # hash value
317 'C', 5, # code value
318 'F', 6, # file value
319 'R', 7, # scalar reference
79072805
LW
320);
321
d6480c9d 322my %opclass = (
db173bac
MB
323 '0', 0, # baseop
324 '1', 1, # unop
325 '2', 2, # binop
326 '|', 3, # logop
1a67a97c
SM
327 '@', 4, # listop
328 '/', 5, # pmop
350de78d 329 '$', 6, # svop_or_padop
7934575e 330 '#', 7, # padop
1a67a97c
SM
331 '"', 8, # pvop_or_svop
332 '{', 9, # loop
333 ';', 10, # cop
334 '%', 11, # baseop_or_unop
335 '-', 12, # filestatop
336 '}', 13, # loopexop
db173bac
MB
337);
338
c2dedb93
MHM
339my %opflags = (
340 'm' => 1, # needs stack mark
341 'f' => 2, # fold constants
342 's' => 4, # always produces scalar
343 't' => 8, # needs target scalar
903fd87c
NC
344 'T' => 8 | 16, # ... which may be lexical
345 'i' => 0, # always produces integer (unused since e7311069)
c2dedb93
MHM
346 'I' => 32, # has corresponding int op
347 'd' => 64, # danger, unknown side effects
348 'u' => 128, # defaults to $_
349);
350
a85d93d9
JH
351my %OP_IS_SOCKET;
352my %OP_IS_FILETEST;
6ecf81d6 353my %OP_IS_FT_ACCESS;
903fd87c
NC
354my $OCSHIFT = 8;
355my $OASHIFT = 12;
a85d93d9 356
c2dedb93 357for my $op (@ops) {
d6480c9d 358 my $argsum = 0;
c2dedb93
MHM
359 my $flags = $flags{$op};
360 for my $flag (keys %opflags) {
361 if ($flags =~ s/$flag//) {
cb7b5e07 362 die "Flag collision for '$op' ($flags{$op}, $flag)\n"
c2dedb93
MHM
363 if $argsum & $opflags{$flag};
364 $argsum |= $opflags{$flag};
365 }
366 }
cb7b5e07 367 die qq[Opcode '$op' has no class indicator ($flags{$op} => $flags)\n]
c2dedb93
MHM
368 unless exists $opclass{$flags};
369 $argsum |= $opclass{$flags} << $OCSHIFT;
370 my $argshift = $OASHIFT;
371 for my $arg (split(' ',$args{$op})) {
a85d93d9 372 if ($arg =~ /^F/) {
e71197e2
JC
373 # record opnums of these opnames
374 $OP_IS_SOCKET{$op} = $opnum{$op} if $arg =~ s/s//;
375 $OP_IS_FILETEST{$op} = $opnum{$op} if $arg =~ s/-//;
6ecf81d6 376 $OP_IS_FT_ACCESS{$op} = $opnum{$op} if $arg =~ s/\+//;
a85d93d9 377 }
d6480c9d 378 my $argnum = ($arg =~ s/\?//) ? 8 : 0;
c2dedb93
MHM
379 die "op = $op, arg = $arg\n"
380 unless exists $argnum{$arg};
79072805 381 $argnum += $argnum{$arg};
c2dedb93
MHM
382 die "Argument overflow for '$op'\n"
383 if $argshift >= $ARGBITS ||
384 $argnum > ((1 << ($ARGBITS - $argshift)) - 1);
385 $argsum += $argnum << $argshift;
386 $argshift += 4;
79072805
LW
387 }
388 $argsum = sprintf("0x%08x", $argsum);
2d6469fe 389 print $oc "\t", tab(3, "$argsum,"), "/* $op */\n";
79072805
LW
390}
391
2d6469fe 392print $oc <<'END';
79072805
LW
393};
394#endif
73c4f7a1 395
bae1192d
JH
396#endif /* !PERL_GLOBAL_STRUCT_INIT */
397
73c4f7a1 398END_EXTERN_C
79072805
LW
399END
400
e71197e2
JC
401# Emit OP_IS_* macros
402
2d6469fe 403print $on <<'EO_OP_IS_COMMENT';
e71197e2
JC
404
405/* the OP_IS_(SOCKET|FILETEST) macros are optimized to a simple range
406 check because all the member OPs are contiguous in opcode.pl
f8a58b02 407 <OPS> table. opcode.pl verifies the range contiguity. */
e71197e2
JC
408EO_OP_IS_COMMENT
409
410gen_op_is_macro( \%OP_IS_SOCKET, 'OP_IS_SOCKET');
411gen_op_is_macro( \%OP_IS_FILETEST, 'OP_IS_FILETEST');
6ecf81d6 412gen_op_is_macro( \%OP_IS_FT_ACCESS, 'OP_IS_FILETEST_ACCESS');
e71197e2
JC
413
414sub gen_op_is_macro {
415 my ($op_is, $macname) = @_;
416 if (keys %$op_is) {
417
418 # get opnames whose numbers are lowest and highest
419 my ($first, @rest) = sort {
420 $op_is->{$a} <=> $op_is->{$b}
421 } keys %$op_is;
422
423 my $last = pop @rest; # @rest slurped, get its last
cb7b5e07 424 die "Invalid range of ops: $first .. $last\n" unless $last;
6ecf81d6 425
ce716c52 426 print $on "\n#define $macname(op) \\\n\t(";
6ecf81d6 427
e71197e2
JC
428 # verify that op-ct matches 1st..last range (and fencepost)
429 # (we know there are no dups)
430 if ( $op_is->{$last} - $op_is->{$first} == scalar @rest + 1) {
431
432 # contiguous ops -> optimized version
424a4936 433 print $on "(op) >= OP_" . uc($first) . " && (op) <= OP_" . uc($last);
ce716c52 434 print $on ")\n";
e71197e2
JC
435 }
436 else {
424a4936 437 print $on join(" || \\\n\t ",
6ecf81d6 438 map { "(op) == OP_" . uc() } sort keys %$op_is);
ce716c52 439 print $on ")\n";
e71197e2
JC
440 }
441 }
a85d93d9
JH
442}
443
f038801a 444my $pp = safer_open('pp_proto.h-new', 'pp_proto.h');
a27f85b3 445
78102347 446print $pp read_only_top(lang => 'C', by => 'opcode.pl', from => 'its data');
a27f85b3 447
981b7185
NC
448{
449 my %funcs;
450 for (@ops) {
7627e6d0 451 my $name = $alias{$_} ? $alias{$_}[0] : "Perl_pp_$_";
981b7185
NC
452 ++$funcs{$name};
453 }
454 print $pp "PERL_CALLCONV OP *$_(pTHX);\n" foreach sort keys %funcs;
735e0d5c 455}
ce716c52
NC
456foreach ($oc, $on, $pp) {
457 read_only_bottom_close_and_rename($_);
458}
b162f9ea
IZ
459
460# Some comments about 'T' opcode classifier:
461
462# Safe to set if the ppcode uses:
463# tryAMAGICbin, tryAMAGICun, SETn, SETi, SETu, PUSHn, PUSHTARG, SETTARG,
464# SETs(TARG), XPUSHn, XPUSHu,
465
466# Unsafe to set if the ppcode uses dTARG or [X]RETPUSH[YES|NO|UNDEF]
467
468# lt and friends do SETs (including ncmp, but not scmp)
469
21f5b33c
GS
470# Additional mode of failure: the opcode can modify TARG before it "used"
471# all the arguments (or may call an external function which does the same).
472# If the target coincides with one of the arguments ==> kaboom.
473
b162f9ea
IZ
474# pp.c pos substr each not OK (RETPUSHUNDEF)
475# substr vec also not OK due to LV to target (are they???)
476# ref not OK (RETPUSHNO)
477# trans not OK (dTARG; TARG = sv_newmortal();)
478# ucfirst etc not OK: TMP arg processed inplace
69b47968 479# quotemeta not OK (unsafe when TARG == arg)
91e74348 480# each repeat not OK too due to list context
b162f9ea 481# pack split - unknown whether they are safe
dae78bb1
IZ
482# sprintf: is calling do_sprintf(TARG,...) which can act on TARG
483# before other args are processed.
b162f9ea 484
21f5b33c
GS
485# Suspicious wrt "additional mode of failure" (and only it):
486# schop, chop, postinc/dec, bit_and etc, negate, complement.
487
488# Also suspicious: 4-arg substr, sprintf, uc/lc (POK_only), reverse, pack.
489
490# substr/vec: doing TAINT_off()???
491
b162f9ea
IZ
492# pp_hot.c
493# readline - unknown whether it is safe
494# match subst not OK (dTARG)
495# grepwhile not OK (not always setting)
69b47968 496# join not OK (unsafe when TARG == arg)
b162f9ea 497
21f5b33c
GS
498# Suspicious wrt "additional mode of failure": concat (dealt with
499# in ck_sassign()), join (same).
500
b162f9ea
IZ
501# pp_ctl.c
502# mapwhile flip caller not OK (not always setting)
503
504# pp_sys.c
505# backtick glob warn die not OK (not always setting)
506# warn not OK (RETPUSHYES)
507# open fileno getc sysread syswrite ioctl accept shutdown
508# ftsize(etc) readlink telldir fork alarm getlogin not OK (RETPUSHUNDEF)
509# umask select not OK (XPUSHs(&PL_sv_undef);)
510# fileno getc sysread syswrite tell not OK (meth("FILENO" "GETC"))
511# sselect shm* sem* msg* syscall - unknown whether they are safe
512# gmtime not OK (list context)
513
21f5b33c 514# Suspicious wrt "additional mode of failure": warn, die, select.