This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Create `defer` syntax and `OP_PUSHDEFER` opcode
[perl5.git] / regen / regcomp.pl
CommitLineData
916e4025 1#!/usr/bin/perl -w
f83e001e
YO
2#
3#
6294c161
DM
4# Regenerate (overwriting only if changed):
5#
65aa4ca7 6# pod/perldebguts.pod
6294c161
DM
7# regnodes.h
8#
9# from information stored in
10#
11# regcomp.sym
12# regexp.h
13#
65aa4ca7
FC
14# pod/perldebguts.pod is not completely regenerated. Only the table of
15# regexp nodes is replaced; other parts remain unchanged.
16#
6294c161
DM
17# Accepts the standard regen_lib -q and -v args.
18#
19# This script is normally invoked from regen.pl.
20
36bb303b
NC
21BEGIN {
22 # Get function prototypes
3d7c117d 23 require './regen/regen_lib.pl';
36bb303b 24}
ce553cf5 25
03363afd 26use strict;
03363afd 27
f83e001e
YO
28# NOTE I don't think anyone actually knows what all of these properties mean,
29# and I suspect some of them are outright unused. This is a first attempt to
30# clean up the generation so maybe one day we can move to something more self
31# documenting. (One might argue that an array of hashes of properties would
32# be easier to use.)
33#
34# Why we use the term regnode and nodes, and not say, opcodes, I am not sure.
35
36# General thoughts:
37# 1. We use a single continuum to represent both opcodes and states,
38# and in regexec.c we switch on the combined set.
39# 2. Opcodes have more information associated to them, states are simpler,
40# basically just an identifier/number that can be used to switch within
41# the state machine.
42# 3. Some opcode are order dependent.
43# 4. Output files often use "tricks" to reduce diff effects. Some of what
44# we do below is more clumsy looking than it could be because of this.
45
46# Op/state properties:
47#
48# Property In Descr
49# ----------------------------------------------------------------------------
50# name Both Name of op/state
51# id Both integer value for this opcode/state
52# optype Both Either 'op' or 'state'
e21ef692 53# line_num Both line_num number of the input file for this item.
f83e001e 54# type Op Type of node (aka regkind)
e21ef692
KW
55# code Op Apparently not used
56# suffix Op which regnode struct this uses, so if this is '1', it
57# uses 'struct regnode_1'
58# flags Op S for simple; V for varies
46167d76 59# longj Op Boolean as to if this node is a longjump
e21ef692
KW
60# comment Both Comment about node, if any. Placed in perlredebguts
61# as its description
f83e001e 62# pod_comment Both Special comments for pod output (preceding lines in def)
e21ef692 63# Such lines begin with '#*'
f83e001e
YO
64
65# Global State
66my @all; # all opcodes/state
67my %all; # hash of all opcode/state names
68
69my @ops; # array of just opcodes
70my @states; # array of just states
71
72my $longest_name_length= 0; # track lengths of names for nicer reports
73my (%type_alias); # map the type (??)
74
75# register a newly constructed node into our state tables.
76# ensures that we have no name collisions (on name anyway),
77# and issues the "id" for the node.
78sub register_node {
79 my ($node)= @_;
80
81 if ( $all{ $node->{name} } ) {
82 die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} "
83 . "previously defined on line $all{ $node->{name} }{line_num}\n";
84 } elsif (!$node->{optype}) {
85 die "must have an optype in node ", Dumper($node);
86 } elsif ($node->{optype} eq "op") {
87 push @ops, $node;
88 } elsif ($node->{optype} eq "state") {
89 push @states, $node;
90 } else {
91 die "Uknown optype '$node->{optype}' in ", Dumper($node);
03363afd 92 }
f83e001e
YO
93 $node->{id}= 0 + @all;
94 push @all, $node;
95 $all{ $node->{name} }= $node;
46167d76
KW
96
97 if ($node->{longj} && $node->{longj} != 1) {
98 die "longj field must be in [01] if present in ", Dumper($node);
99 }
100
f83e001e 101}
d3d47aac 102
f83e001e 103# Parse and add an opcode definition to the global state.
e21ef692 104# What an opcode definition looks like is given in regcomp.sym.
f83e001e 105#
e21ef692
KW
106# Not every opcode definition has all of the components. We should maybe make
107# this nicer/easier to read in the future. Also note that the above is tab
f83e001e
YO
108# sensitive.
109
e21ef692
KW
110# Special comments for an entry precede it, and begin with '#*' and are placed
111# in the generated pod file just before the entry.
112
f83e001e
YO
113sub parse_opcode_def {
114 my ( $text, $line_num, $pod_comment )= @_;
115 my $node= {
116 line_num => $line_num,
117 pod_comment => $pod_comment,
118 optype => "op",
119 };
d3d47aac 120
f83e001e
YO
121 # first split the line into three, the initial NAME, a middle part
122 # that we call "desc" which contains various (not well documented) things,
123 # and a comment section.
124 @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/
125 or die "Failed to match $_";
d3d47aac 126
f83e001e 127 # the content of the "desc" field from the first step is extracted here:
484678fc 128 @{$node}{qw(type code suffix flags longj)}= split /[,\s]\s*/, $node->{desc};
d3d47aac 129
d1bd48a0 130 defined $node->{$_} or $node->{$_} = ""
484678fc 131 for qw(type code suffix flags longj);
f83e001e
YO
132
133 register_node($node); # has to be before the type_alias code below
134
135 if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) {
136
137 #warn "Regop type '$node->{type}' from regcomp.sym line $line_num"
138 # ." is not an existing regop, and will be aliased to $node->{name}\n"
139 # if -t STDERR;
140 $type_alias{ $node->{type} }= $node->{name};
141 }
142
143 $longest_name_length= length $node->{name}
144 if length $node->{name} > $longest_name_length;
145}
146
147# parse out a state definition and add the resulting data
148# into the global state. may create multiple new states from
149# a single definition (this is part of the point).
150# Format for states:
151# REGOP \t typelist [ \t typelist]
152# typelist= namelist
153# = namelist:FAIL
154# = name:count
155# Eg:
156# WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL
157# BRANCH next:FAIL
158# CURLYM A,B:FAIL
159#
160# The CURLYM definition would create the states:
161# CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail
162sub parse_state_def {
163 my ( $text, $line_num, $pod_comment )= @_;
164 my ( $type, @lists )= split /\s+/, $text;
165 die "No list? $type" if !@lists;
166 foreach my $list (@lists) {
167 my ( $names, $special )= split /:/, $list, 2;
168 $special ||= "";
169 foreach my $name ( split /,/, $names ) {
170 my $real=
171 $name eq 'resume'
172 ? "resume_$type"
173 : "${type}_$name";
174 my @suffix;
175 if ( !$special ) {
176 @suffix= ("");
177 }
178 elsif ( $special =~ /\d/ ) {
179 @suffix= ( 1 .. $special );
180 }
181 elsif ( $special eq 'FAIL' ) {
182 @suffix= ( "", "_fail" );
183 }
184 else {
185 die "unknown :type ':$special'";
186 }
187 foreach my $suffix (@suffix) {
188 my $node= {
189 name => "$real$suffix",
190 optype => "state",
191 type => $type || "",
192 comment => "state for $type",
193 line_num => $line_num,
194 };
195 register_node($node);
03363afd
YO
196 }
197 }
03363afd
YO
198 }
199}
d09b2d29 200
f9ef50a7 201sub process_flags {
f83e001e
YO
202 my ( $flag, $varname, $comment )= @_;
203 $comment= '' unless defined $comment;
204
205 my @selected;
206 my $bitmap= '';
207 for my $node (@ops) {
208 my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0;
209
210 # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic
211 # ops in the C code.
212 my $current= do {
213 no warnings;
214 ord substr $bitmap, ( $node->{id} >> 3 );
215 };
216 substr( $bitmap, ( $node->{id} >> 3 ), 1 )=
217 chr( $current | ( $set << ( $node->{id} & 7 ) ) );
218
219 push @selected, $node->{name} if $set;
220 }
221 my $out_string= join ', ', @selected, 0;
222 $out_string =~ s/(.{1,70},) /$1\n /g;
ded4dd2a 223
f83e001e 224 my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap;
ded4dd2a 225
f83e001e 226 return $comment . <<"EOP";
ded4dd2a 227#define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 228
f9ef50a7 229#ifndef DOINIT
916e4025 230EXTCONST U8 PL_${varname}\[] __attribute__deprecated__;
f9ef50a7 231#else
916e4025 232EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = {
f9ef50a7
NC
233 $out_string
234};
235#endif /* DOINIT */
236
ded4dd2a
NC
237#ifndef DOINIT
238EXTCONST U8 PL_${varname}_bitmask[];
239#else
240EXTCONST U8 PL_${varname}_bitmask[] = {
241 $out_mask
242};
243#endif /* DOINIT */
f9ef50a7
NC
244EOP
245}
246
938090ac
KW
247sub print_process_EXACTish {
248 my ($out)= @_;
249
250 # Creates some bitmaps for EXACTish nodes.
251
252 my @folded;
253 my @req8;
254
255 my $base;
256 for my $node (@ops) {
257 next unless $node->{type} eq 'EXACT';
258 my $name = $node->{name};
259 $base = $node->{id} if $name eq 'EXACT';
260
261 my $index = $node->{id} - $base;
262
263 # This depends entirely on naming conventions in regcomp.sym
264 $folded[$index] = $name =~ /^EXACTF/ || 0;
265 $req8[$index] = $name =~ /8/ || 0;
266 }
267
268 die "Can't cope with > 32 EXACTish nodes" if @folded > 32;
269
270 my $exactf = sprintf "%X", oct("0b" . join "", reverse @folded);
271 my $req8 = sprintf "%X", oct("0b" . join "", reverse @req8);
272 print $out <<EOP,
273
274/* Is 'op', known to be of type EXACT, folding? */
275#define isEXACTFish(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT))))
276
277/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */
278#define isEXACT_REQ8(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT))))
279
280#ifndef DOINIT
281EXTCONST U32 PL_EXACTFish_bitmask;
282EXTCONST U32 PL_EXACT_REQ8_bitmask;
283#else
284EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf;
285EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8;
286#endif /* DOINIT */
287EOP
288}
289
f83e001e
YO
290sub read_definition {
291 my ( $file )= @_;
292 my ( $seen_sep, $pod_comment )= "";
293 open my $in_fh, "<", $file
294 or die "Failed to open '$file' for reading: $!";
295 while (<$in_fh>) {
296
297 # Special pod comments
298 if (/^#\* ?/) { $pod_comment .= "# $'"; }
299
300 # Truly blank lines possibly surrounding pod comments
301 elsif (/^\s*$/) { $pod_comment .= "\n" }
302
303 next if /\A\s*#/ || /\A\s*\z/;
304
305 s/\s*\z//;
306 if (/^-+\s*$/) {
307 $seen_sep= 1;
308 next;
309 }
310
311 if ($seen_sep) {
312 parse_state_def( $_, $., $pod_comment );
313 }
314 else {
315 parse_opcode_def( $_, $., $pod_comment );
316 }
317 $pod_comment= "";
318 }
319 close $in_fh;
320 die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all,
321 " in file!"
322 if @all > 256;
323}
324
325# use fixed width to keep the diffs between regcomp.pl recompiles
326# as small as possible.
519d76f5 327my ( $base_name_width, $rwidth, $twidth )= ( 22, 12, 9 );
f83e001e
YO
328
329sub print_state_defs {
330 my ($out)= @_;
331 printf $out <<EOP,
6bda09f9
YO
332/* Regops and State definitions */
333
03363afd
YO
334#define %*s\t%d
335#define %*s\t%d
336
d09b2d29 337EOP
519d76f5 338 -$base_name_width,
f83e001e 339 REGNODE_MAX => $#ops,
519d76f5 340 -$base_name_width, REGMATCH_STATE_MAX => $#all;
f83e001e
YO
341
342 my %rev_type_alias= reverse %type_alias;
ce553cf5
KW
343 my $base_format = "#define %*s\t%d\t/* %#04x %s */\n";
344 my @withs;
345 my $in_states = 0;
346
347 my $max_name_width = 0;
348 for my $ref (\@ops, \@states) {
349 for my $node ($ref->@*) {
350 my $len = length $node->{name};
351 $max_name_width = $len if $max_name_width < $len;
352 }
353 }
354
355 die "Do a white-space only commit to increase \$base_name_width to"
356 . " $max_name_width; then re-run" if $base_name_width < $max_name_width;
357
358 print $out <<EOT;
bb24c5fa
KW
359/* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */
360#define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8)))
361/* -- same, but also with pattern (p8, pb) -- */
ce553cf5
KW
362#define with_tp_UTF8ness(op, t_utf8, p_utf8) \\
363\t\t(((op) << 2) + (cBOOL(t_utf8) << 1) + cBOOL(p_utf8))
364
365/* The #defines below give both the basic regnode and the expanded version for
366 switching on utf8ness */
367EOT
368
f83e001e 369 for my $node (@ops) {
ce553cf5 370 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment});
f83e001e 371 if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) {
ce553cf5 372 print_state_def_line($out, $alias, $node->{id}, $node->{comment});
f83e001e
YO
373 }
374 }
d3d47aac 375
f83e001e
YO
376 print $out "\t/* ------------ States ------------- */\n";
377 for my $node (@states) {
ce553cf5
KW
378 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment});
379 }
380}
381
382sub print_state_def_line
383{
384 my ($fh, $name, $id, $comment) = @_;
385
bb24c5fa 386 # The sub-names are like '_tb' or '_tb_p8' = max 6 chars wide
ce553cf5
KW
387 my $name_col_width = $base_name_width + 6;
388 my $base_id_width = 3; # Max is '255' or 3 cols
bb24c5fa 389 my $mid_id_width = 3; # Max is '511' or 3 cols
ce553cf5
KW
390 my $full_id_width = 3; # Max is '1023' but not close to using the 4th
391
392 my $line = "#define " . $name;
393 $line .= " " x ($name_col_width - length($name));
394
395 $line .= sprintf "%*s", $base_id_width, $id;
bb24c5fa 396 $line .= " " x $mid_id_width;
ce553cf5
KW
397 $line .= " " x ($full_id_width + 2);
398
399 $line .= "/* ";
400 my $hanging = length $line; # Indent any subsequent line to this pos
401 $line .= sprintf "0x%02x", $id;
402
c1ec4bdd 403 my $columns = 78;
ce553cf5 404
c1ec4bdd
KW
405 # From the documentation: 'In fact, every resulting line will have length
406 # of no more than "$columns - 1"'
ce553cf5
KW
407 $line = wrap($columns + 1, "", " " x $hanging, "$line $comment");
408 chomp $line; # wrap always adds a trailing \n
409 $line =~ s/ \s+ $ //x; # trim, just in case.
410
411 # The comment may have wrapped. Find the final \n and measure the length
412 # to the end. If it is short enough, just append the ' */' to the line.
413 # If it is too close to the end of the space available, add an extra line
414 # that consists solely of blanks and the ' */'
415 my $len = length($line); my $rindex = rindex($line, "\n");
416 if (length($line) - rindex($line, "\n") - 1 <= $columns - 3) {
417 $line .= " */\n";
f83e001e 418 }
ce553cf5
KW
419 else {
420 $line .= "\n" . " " x ($hanging - 3) . "*/\n";
421 }
422
423 print $fh $line;
424
bb24c5fa
KW
425 # And add the 2 subsidiary #defines used when switching on
426 # with_t_UTF8nes()
427 my $with_id_t = $id * 2;
428 for my $with (qw(tb t8)) {
429 my $with_name = "${name}_$with";
430 print $fh "#define ", $with_name;
431 print $fh " " x ($name_col_width - length($with_name) + $base_id_width);
432 printf $fh "%*s", $mid_id_width, $with_id_t;
433 print $fh " " x $full_id_width;
434 printf $fh " /*";
435 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses
436 printf $fh "0x%03x */\n", $with_id_t;
437
438 $with_id_t++;
439 }
440
441 # Finally add the 4 subsidiary #defines used when switching on
ce553cf5 442 # with_tp_UTF8nes()
bb24c5fa 443 my $with_id_tp = $id * 4;
ce553cf5
KW
444 for my $with (qw(tb_pb tb_p8 t8_pb t8_p8)) {
445 my $with_name = "${name}_$with";
446 print $fh "#define ", $with_name;
bb24c5fa
KW
447 print $fh " " x ($name_col_width - length($with_name) + $base_id_width + $mid_id_width);
448 printf $fh "%*s", $full_id_width, $with_id_tp;
ce553cf5
KW
449 printf $fh " /*";
450 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses
bb24c5fa 451 printf $fh "0x%03x */\n", $with_id_tp;
ce553cf5 452
bb24c5fa 453 $with_id_tp++;
ce553cf5
KW
454 }
455
456 print $fh "\n"; # Blank line separates groups for clarity
d09b2d29
IZ
457}
458
f83e001e
YO
459sub print_regkind {
460 my ($out)= @_;
461 print $out <<EOP;
03363afd 462
6bda09f9 463/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
464
465#ifndef DOINIT
22c35a8c 466EXTCONST U8 PL_regkind[];
d09b2d29 467#else
22c35a8c 468EXTCONST U8 PL_regkind[] = {
d09b2d29 469EOP
f83e001e
YO
470 use Data::Dumper;
471 foreach my $node (@all) {
472 print Dumper($node) if !defined $node->{type} or !defined( $node->{name} );
473 printf $out "\t%*s\t/* %*s */\n",
519d76f5 474 -1 - $twidth, "$node->{type},", -$base_name_width, $node->{name};
f83e001e
YO
475 print $out "\t/* ------------ States ------------- */\n"
476 if $node->{id} == $#ops and $node->{id} != $#all;
477 }
d09b2d29 478
f83e001e 479 print $out <<EOP;
d09b2d29
IZ
480};
481#endif
f83e001e
YO
482EOP
483}
484
485sub wrap_ifdef_print {
486 my $out= shift;
487 my $token= shift;
488 print $out <<EOP;
489
490#ifdef $token
491EOP
492 $_->($out) for @_;
493 print $out <<EOP;
494#endif /* $token */
495
496EOP
497}
498
499sub print_regarglen {
500 my ($out)= @_;
501 print $out <<EOP;
d09b2d29 502
6bda09f9 503/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29 504
29de9391 505static const U8 regarglen[] = {
d09b2d29
IZ
506EOP
507
f83e001e
YO
508 foreach my $node (@ops) {
509 my $size= 0;
484678fc 510 $size= "EXTRA_SIZE(struct regnode_$node->{suffix})" if $node->{suffix};
d09b2d29 511
f83e001e
YO
512 printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name};
513 }
514
515 print $out <<EOP;
d09b2d29 516};
f83e001e
YO
517EOP
518}
519
520sub print_reg_off_by_arg {
521 my ($out)= @_;
522 print $out <<EOP;
d09b2d29 523
6bda09f9
YO
524/* reg_off_by_arg[] - Which argument holds the offset to the next node */
525
29de9391 526static const char reg_off_by_arg[] = {
d09b2d29
IZ
527EOP
528
f83e001e
YO
529 foreach my $node (@ops) {
530 my $size= $node->{longj} || 0;
9b155405 531
f83e001e
YO
532 printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name};
533 }
d09b2d29 534
f83e001e 535 print $out <<EOP;
d09b2d29 536};
9b155405 537
f83e001e
YO
538EOP
539}
540
541sub print_reg_name {
542 my ($out)= @_;
543 print $out <<EOP;
13d6edb4 544
6bda09f9
YO
545/* reg_name[] - Opcode/state names in string form, for debugging */
546
22429478 547#ifndef DOINIT
13d6edb4 548EXTCONST char * PL_reg_name[];
22429478 549#else
4764e399 550EXTCONST char * const PL_reg_name[] = {
9b155405
IZ
551EOP
552
f83e001e
YO
553 my $ofs= 0;
554 my $sym= "";
555 foreach my $node (@all) {
556 my $size= $node->{longj} || 0;
557
558 printf $out "\t%*s\t/* $sym%#04x */\n",
519d76f5 559 -3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs;
f83e001e
YO
560 if ( $node->{id} == $#ops and @ops != @all ) {
561 print $out "\t/* ------------ States ------------- */\n";
562 $ofs= $#ops;
563 $sym= 'REGNODE_MAX +';
564 }
565 }
9b155405 566
f83e001e 567 print $out <<EOP;
9b155405 568};
22429478 569#endif /* DOINIT */
d09b2d29 570
337ff307 571EOP
f83e001e 572}
337ff307 573
f83e001e
YO
574sub print_reg_extflags_name {
575 my ($out)= @_;
576 print $out <<EOP;
f7819f85
A
577/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
578
579#ifndef DOINIT
580EXTCONST char * PL_reg_extflags_name[];
581#else
582EXTCONST char * const PL_reg_extflags_name[] = {
d09b2d29
IZ
583EOP
584
f83e001e
YO
585 my %rxfv;
586 my %definitions; # Remember what the symbol definitions are
587 my $val= 0;
588 my %reverse;
589 my $REG_EXTFLAGS_NAME_SIZE= 0;
590 foreach my $file ( "op_reg_common.h", "regexp.h" ) {
591 open my $in_fh, "<", $file or die "Can't read '$file': $!";
592 while (<$in_fh>) {
593
594 # optional leading '_'. Return symbol in $1, and strip it from
f2323142
KW
595 # comment of line. Currently doesn't handle comments running onto
596 # next line
f83e001e
YO
597 if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
598 chomp;
599 my $define= $1;
600 my $orig= $_;
601 s{ /\* .*? \*/ }{ }x; # Replace comments by a blank
602
603 # Replace any prior defined symbols by their values
604 foreach my $key ( keys %definitions ) {
605 s/\b$key\b/$definitions{$key}/g;
606 }
5c72e80d 607
f83e001e
YO
608 # Remove the U suffix from unsigned int literals
609 s/\b([0-9]+)U\b/$1/g;
5c72e80d 610
f83e001e 611 my $newval= eval $_; # Get numeric definition
6a080ccd 612
f83e001e 613 $definitions{$define}= $newval;
6a080ccd 614
f83e001e
YO
615 next unless $_ =~ /<</; # Bit defines use left shift
616 if ( $val & $newval ) {
617 my @names= ( $define, $reverse{$newval} );
618 s/PMf_// for @names;
619 if ( $names[0] ne $names[1] ) {
620 die sprintf
621 "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)",
622 $newval, $orig, $_;
623 }
624 next;
6976c986 625 }
f83e001e
YO
626 $val |= $newval;
627 $rxfv{$define}= $newval;
628 $reverse{$newval}= $define;
1850c8f9 629 }
6a080ccd 630 }
f7819f85 631 }
f83e001e
YO
632 my %vrxf= reverse %rxfv;
633 printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N',
634 $val;
635 my %multibits;
636 for ( 0 .. 31 ) {
637 my $power_of_2= 2**$_;
638 my $n= $vrxf{$power_of_2};
639 my $extra= "";
640 if ( !$n ) {
641
642 # Here, there was no name that matched exactly the bit. It could be
643 # either that it is unused, or the name matches multiple bits.
644 if ( !( $val & $power_of_2 ) ) {
645 $n= "UNUSED_BIT_$_";
646 }
647 else {
648
649 # Here, must be because it matches multiple bits. Look through
650 # all possibilities until find one that matches this one. Use
651 # that name, and all the bits it matches
652 foreach my $name ( keys %rxfv ) {
653 if ( $rxfv{$name} & $power_of_2 ) {
654 $n= $name . ( $multibits{$name}++ );
655 $extra= sprintf qq{ : "%s" - 0x%08x}, $name,
656 $rxfv{$name}
657 if $power_of_2 != $rxfv{$name};
658 last;
659 }
5458d9a0
KW
660 }
661 }
662 }
f83e001e
YO
663 s/\bRXf_(PMf_)?// for $n, $extra;
664 printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra;
665 $REG_EXTFLAGS_NAME_SIZE++;
5458d9a0 666 }
f83e001e
YO
667
668 print $out <<EOP;
f7819f85
A
669};
670#endif /* DOINIT */
671
adc2d0c9
JH
672#ifdef DEBUGGING
673# define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE
674#endif
f83e001e 675EOP
adc2d0c9 676
337ff307 677}
f83e001e
YO
678
679sub print_reg_intflags_name {
680 my ($out)= @_;
681 print $out <<EOP;
682
337ff307
YO
683/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
684
685#ifndef DOINIT
686EXTCONST char * PL_reg_intflags_name[];
687#else
688EXTCONST char * const PL_reg_intflags_name[] = {
689EOP
690
f83e001e
YO
691 my %rxfv;
692 my %definitions; # Remember what the symbol definitions are
693 my $val= 0;
694 my %reverse;
695 my $REG_INTFLAGS_NAME_SIZE= 0;
696 foreach my $file ("regcomp.h") {
697 open my $fh, "<", $file or die "Can't read $file: $!";
698 while (<$fh>) {
699
700 # optional leading '_'. Return symbol in $1, and strip it from
701 # comment of line
702 if (
703 m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi
704 )
705 {
706 chomp;
707 my $define= $1;
708 my $abbr= $2;
709 my $hex= $3;
710 my $comment= $4;
711 my $val= hex($hex);
712 $comment= $comment ? " - $comment" : "";
713
714 printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",),
715 $val, $define, $comment;
716 $REG_INTFLAGS_NAME_SIZE++;
717 }
337ff307
YO
718 }
719 }
337ff307 720
f83e001e 721 print $out <<EOP;
337ff307
YO
722};
723#endif /* DOINIT */
724
725EOP
f83e001e 726 print $out <<EOQ;
adc2d0c9
JH
727#ifdef DEBUGGING
728# define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE
729#endif
337ff307 730
adc2d0c9
JH
731EOQ
732}
f9ef50a7 733
f83e001e
YO
734sub print_process_flags {
735 my ($out)= @_;
736
737 print $out process_flags( 'V', 'varies', <<'EOC');
f9ef50a7
NC
738/* The following have no fixed length. U8 so we can do strchr() on it. */
739EOC
740
f83e001e 741 print $out process_flags( 'S', 'simple', <<'EOC');
ce716c52 742
f9ef50a7
NC
743/* The following always have a length of 1. U8 we can do strchr() on it. */
744/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
745EOC
746
f83e001e 747}
65aa4ca7 748
f83e001e
YO
749sub do_perldebguts {
750 my $guts= open_new( 'pod/perldebguts.pod', '>' );
65aa4ca7 751
f83e001e
YO
752 my $node;
753 my $code;
754 my $name_fmt= '<' x ( $longest_name_length - 1 );
755 my $descr_fmt= '<' x ( 58 - $longest_name_length );
756 eval <<EOD or die $@;
65aa4ca7
FC
757format GuTS =
758 ^*~~
f83e001e 759 \$node->{pod_comment}
95fe686d 760 ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~
d1bd48a0 761 \$node->{name}, \$code, defined \$node->{comment} ? \$node->{comment} : ''
65aa4ca7 762.
f83e001e 7631;
65aa4ca7 764EOD
ce553cf5 765
f83e001e
YO
766 my $old_fh= select($guts);
767 $~= "GuTS";
65aa4ca7 768
1ae6ead9 769 open my $oldguts, '<', 'pod/perldebguts.pod'
65aa4ca7 770 or die "$0 cannot open pod/perldebguts.pod for reading: $!";
f83e001e 771 while (<$oldguts>) {
65aa4ca7
FC
772 print;
773 last if /=for regcomp.pl begin/;
774 }
775
f83e001e 776 print <<'END_OF_DESCR';
65aa4ca7 777
e21ef692 778 # TYPE arg-description [regnode-struct-suffix] [longjump-len] DESCRIPTION
f83e001e
YO
779END_OF_DESCR
780 for my $n (@ops) {
781 $node= $n;
484678fc 782 $code= "$node->{code} " . ( $node->{suffix} || "" );
f83e001e
YO
783 $code .= " $node->{longj}" if $node->{longj};
784 if ( $node->{pod_comment} ||= "" ) {
785
65aa4ca7 786 # Trim multiple blanks
f83e001e
YO
787 $node->{pod_comment} =~ s/^\n\n+/\n/;
788 $node->{pod_comment} =~ s/\n\n+$/\n\n/;
65aa4ca7
FC
789 }
790 write;
791 }
792 print "\n";
793
f83e001e 794 while (<$oldguts>) {
65aa4ca7
FC
795 last if /=for regcomp.pl end/;
796 }
4ac5f10b
DD
797 do { print } while <$oldguts>; #win32 can't unlink an open FH
798 close $oldguts or die "Error closing pod/perldebguts.pod: $!";
f83e001e
YO
799 select $old_fh;
800 close_and_rename($guts);
801}
65aa4ca7 802
33b57041 803my $confine_to_core = 'defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD)';
f83e001e
YO
804read_definition("regcomp.sym");
805my $out= open_new( 'regnodes.h', '>',
806 { by => 'regen/regcomp.pl', from => 'regcomp.sym' } );
33b57041 807print $out "#if $confine_to_core\n\n";
f83e001e
YO
808print_state_defs($out);
809print_regkind($out);
810wrap_ifdef_print(
811 $out,
812 "REG_COMP_C",
813 \&print_regarglen,
814 \&print_reg_off_by_arg
815);
816print_reg_name($out);
817print_reg_extflags_name($out);
818print_reg_intflags_name($out);
819print_process_flags($out);
938090ac 820print_process_EXACTish($out);
33b57041 821print $out "\n#endif /* $confine_to_core */\n";
f83e001e 822read_only_bottom_close_and_rename($out);
65aa4ca7 823
f83e001e 824do_perldebguts();