Commit | Line | Data |
---|---|---|
916e4025 | 1 | #!/usr/bin/perl -w |
f83e001e YO |
2 | # |
3 | # | |
6294c161 DM |
4 | # Regenerate (overwriting only if changed): |
5 | # | |
65aa4ca7 | 6 | # pod/perldebguts.pod |
6294c161 DM |
7 | # regnodes.h |
8 | # | |
9 | # from information stored in | |
10 | # | |
11 | # regcomp.sym | |
12 | # regexp.h | |
13 | # | |
65aa4ca7 FC |
14 | # pod/perldebguts.pod is not completely regenerated. Only the table of |
15 | # regexp nodes is replaced; other parts remain unchanged. | |
16 | # | |
6294c161 DM |
17 | # Accepts the standard regen_lib -q and -v args. |
18 | # | |
19 | # This script is normally invoked from regen.pl. | |
20 | ||
36bb303b NC |
21 | BEGIN { |
22 | # Get function prototypes | |
3d7c117d | 23 | require './regen/regen_lib.pl'; |
36bb303b | 24 | } |
03363afd | 25 | use strict; |
03363afd | 26 | |
f83e001e YO |
27 | # NOTE I don't think anyone actually knows what all of these properties mean, |
28 | # and I suspect some of them are outright unused. This is a first attempt to | |
29 | # clean up the generation so maybe one day we can move to something more self | |
30 | # documenting. (One might argue that an array of hashes of properties would | |
31 | # be easier to use.) | |
32 | # | |
33 | # Why we use the term regnode and nodes, and not say, opcodes, I am not sure. | |
34 | ||
35 | # General thoughts: | |
36 | # 1. We use a single continuum to represent both opcodes and states, | |
37 | # and in regexec.c we switch on the combined set. | |
38 | # 2. Opcodes have more information associated to them, states are simpler, | |
39 | # basically just an identifier/number that can be used to switch within | |
40 | # the state machine. | |
41 | # 3. Some opcode are order dependent. | |
42 | # 4. Output files often use "tricks" to reduce diff effects. Some of what | |
43 | # we do below is more clumsy looking than it could be because of this. | |
44 | ||
45 | # Op/state properties: | |
46 | # | |
47 | # Property In Descr | |
48 | # ---------------------------------------------------------------------------- | |
49 | # name Both Name of op/state | |
50 | # id Both integer value for this opcode/state | |
51 | # optype Both Either 'op' or 'state' | |
52 | # line_num Both line_num number of the input file for this item. | |
53 | # type Op Type of node (aka regkind) | |
54 | # code Op what code is associated with this node (???) | |
55 | # args Op what type of args the node has (which regnode struct) | |
56 | # flags Op (???) | |
46167d76 | 57 | # longj Op Boolean as to if this node is a longjump |
f83e001e YO |
58 | # comment Both Comment about node, if any |
59 | # pod_comment Both Special comments for pod output (preceding lines in def) | |
60 | ||
61 | # Global State | |
62 | my @all; # all opcodes/state | |
63 | my %all; # hash of all opcode/state names | |
64 | ||
65 | my @ops; # array of just opcodes | |
66 | my @states; # array of just states | |
67 | ||
68 | my $longest_name_length= 0; # track lengths of names for nicer reports | |
69 | my (%type_alias); # map the type (??) | |
70 | ||
71 | # register a newly constructed node into our state tables. | |
72 | # ensures that we have no name collisions (on name anyway), | |
73 | # and issues the "id" for the node. | |
74 | sub register_node { | |
75 | my ($node)= @_; | |
76 | ||
77 | if ( $all{ $node->{name} } ) { | |
78 | die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " | |
79 | . "previously defined on line $all{ $node->{name} }{line_num}\n"; | |
80 | } elsif (!$node->{optype}) { | |
81 | die "must have an optype in node ", Dumper($node); | |
82 | } elsif ($node->{optype} eq "op") { | |
83 | push @ops, $node; | |
84 | } elsif ($node->{optype} eq "state") { | |
85 | push @states, $node; | |
86 | } else { | |
87 | die "Uknown optype '$node->{optype}' in ", Dumper($node); | |
03363afd | 88 | } |
f83e001e YO |
89 | $node->{id}= 0 + @all; |
90 | push @all, $node; | |
91 | $all{ $node->{name} }= $node; | |
46167d76 KW |
92 | |
93 | if ($node->{longj} && $node->{longj} != 1) { | |
94 | die "longj field must be in [01] if present in ", Dumper($node); | |
95 | } | |
96 | ||
f83e001e | 97 | } |
d3d47aac | 98 | |
f83e001e YO |
99 | # Parse and add an opcode definition to the global state. |
100 | # An opcode definition looks like this: | |
101 | # | |
102 | # +- args | |
103 | # | +- flags | |
104 | # | | +- longjmp | |
105 | # Name Type code | | | ; comment | |
106 | # -------------------------------------------------------------------------- | |
107 | # IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches. | |
108 | # UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches. | |
109 | # SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE. | |
110 | # IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher. | |
111 | # GROUPP GROUPP, num 1 ; Whether the group matched. | |
112 | # | |
113 | # Not every opcode definition has all of these. We should maybe make this | |
114 | # nicer/easier to read in the future. Also note that the above is tab | |
115 | # sensitive. | |
116 | ||
117 | sub parse_opcode_def { | |
118 | my ( $text, $line_num, $pod_comment )= @_; | |
119 | my $node= { | |
120 | line_num => $line_num, | |
121 | pod_comment => $pod_comment, | |
122 | optype => "op", | |
123 | }; | |
d3d47aac | 124 | |
f83e001e YO |
125 | # first split the line into three, the initial NAME, a middle part |
126 | # that we call "desc" which contains various (not well documented) things, | |
127 | # and a comment section. | |
128 | @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ | |
129 | or die "Failed to match $_"; | |
d3d47aac | 130 | |
f83e001e YO |
131 | # the content of the "desc" field from the first step is extracted here: |
132 | @{$node}{qw(type code args flags longj)}= split /[,\s]\s*/, $node->{desc}; | |
d3d47aac | 133 | |
d1bd48a0 FC |
134 | defined $node->{$_} or $node->{$_} = "" |
135 | for qw(type code args flags longj); | |
f83e001e YO |
136 | |
137 | register_node($node); # has to be before the type_alias code below | |
138 | ||
139 | if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { | |
140 | ||
141 | #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" | |
142 | # ." is not an existing regop, and will be aliased to $node->{name}\n" | |
143 | # if -t STDERR; | |
144 | $type_alias{ $node->{type} }= $node->{name}; | |
145 | } | |
146 | ||
147 | $longest_name_length= length $node->{name} | |
148 | if length $node->{name} > $longest_name_length; | |
149 | } | |
150 | ||
151 | # parse out a state definition and add the resulting data | |
152 | # into the global state. may create multiple new states from | |
153 | # a single definition (this is part of the point). | |
154 | # Format for states: | |
155 | # REGOP \t typelist [ \t typelist] | |
156 | # typelist= namelist | |
157 | # = namelist:FAIL | |
158 | # = name:count | |
159 | # Eg: | |
160 | # WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL | |
161 | # BRANCH next:FAIL | |
162 | # CURLYM A,B:FAIL | |
163 | # | |
164 | # The CURLYM definition would create the states: | |
165 | # CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail | |
166 | sub parse_state_def { | |
167 | my ( $text, $line_num, $pod_comment )= @_; | |
168 | my ( $type, @lists )= split /\s+/, $text; | |
169 | die "No list? $type" if !@lists; | |
170 | foreach my $list (@lists) { | |
171 | my ( $names, $special )= split /:/, $list, 2; | |
172 | $special ||= ""; | |
173 | foreach my $name ( split /,/, $names ) { | |
174 | my $real= | |
175 | $name eq 'resume' | |
176 | ? "resume_$type" | |
177 | : "${type}_$name"; | |
178 | my @suffix; | |
179 | if ( !$special ) { | |
180 | @suffix= (""); | |
181 | } | |
182 | elsif ( $special =~ /\d/ ) { | |
183 | @suffix= ( 1 .. $special ); | |
184 | } | |
185 | elsif ( $special eq 'FAIL' ) { | |
186 | @suffix= ( "", "_fail" ); | |
187 | } | |
188 | else { | |
189 | die "unknown :type ':$special'"; | |
190 | } | |
191 | foreach my $suffix (@suffix) { | |
192 | my $node= { | |
193 | name => "$real$suffix", | |
194 | optype => "state", | |
195 | type => $type || "", | |
196 | comment => "state for $type", | |
197 | line_num => $line_num, | |
198 | }; | |
199 | register_node($node); | |
03363afd YO |
200 | } |
201 | } | |
03363afd YO |
202 | } |
203 | } | |
d09b2d29 | 204 | |
f9ef50a7 | 205 | sub process_flags { |
f83e001e YO |
206 | my ( $flag, $varname, $comment )= @_; |
207 | $comment= '' unless defined $comment; | |
208 | ||
209 | my @selected; | |
210 | my $bitmap= ''; | |
211 | for my $node (@ops) { | |
212 | my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; | |
213 | ||
214 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic | |
215 | # ops in the C code. | |
216 | my $current= do { | |
217 | no warnings; | |
218 | ord substr $bitmap, ( $node->{id} >> 3 ); | |
219 | }; | |
220 | substr( $bitmap, ( $node->{id} >> 3 ), 1 )= | |
221 | chr( $current | ( $set << ( $node->{id} & 7 ) ) ); | |
222 | ||
223 | push @selected, $node->{name} if $set; | |
224 | } | |
225 | my $out_string= join ', ', @selected, 0; | |
226 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
ded4dd2a | 227 | |
f83e001e | 228 | my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; |
ded4dd2a | 229 | |
f83e001e | 230 | return $comment . <<"EOP"; |
ded4dd2a | 231 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 232 | |
f9ef50a7 | 233 | #ifndef DOINIT |
916e4025 | 234 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
f9ef50a7 | 235 | #else |
916e4025 | 236 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
f9ef50a7 NC |
237 | $out_string |
238 | }; | |
239 | #endif /* DOINIT */ | |
240 | ||
ded4dd2a NC |
241 | #ifndef DOINIT |
242 | EXTCONST U8 PL_${varname}_bitmask[]; | |
243 | #else | |
244 | EXTCONST U8 PL_${varname}_bitmask[] = { | |
245 | $out_mask | |
246 | }; | |
247 | #endif /* DOINIT */ | |
f9ef50a7 NC |
248 | EOP |
249 | } | |
250 | ||
f83e001e YO |
251 | sub read_definition { |
252 | my ( $file )= @_; | |
253 | my ( $seen_sep, $pod_comment )= ""; | |
254 | open my $in_fh, "<", $file | |
255 | or die "Failed to open '$file' for reading: $!"; | |
256 | while (<$in_fh>) { | |
257 | ||
258 | # Special pod comments | |
259 | if (/^#\* ?/) { $pod_comment .= "# $'"; } | |
260 | ||
261 | # Truly blank lines possibly surrounding pod comments | |
262 | elsif (/^\s*$/) { $pod_comment .= "\n" } | |
263 | ||
264 | next if /\A\s*#/ || /\A\s*\z/; | |
265 | ||
266 | s/\s*\z//; | |
267 | if (/^-+\s*$/) { | |
268 | $seen_sep= 1; | |
269 | next; | |
270 | } | |
271 | ||
272 | if ($seen_sep) { | |
273 | parse_state_def( $_, $., $pod_comment ); | |
274 | } | |
275 | else { | |
276 | parse_opcode_def( $_, $., $pod_comment ); | |
277 | } | |
278 | $pod_comment= ""; | |
279 | } | |
280 | close $in_fh; | |
281 | die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, | |
282 | " in file!" | |
283 | if @all > 256; | |
284 | } | |
285 | ||
286 | # use fixed width to keep the diffs between regcomp.pl recompiles | |
287 | # as small as possible. | |
288 | my ( $width, $rwidth, $twidth )= ( 22, 12, 9 ); | |
289 | ||
290 | sub print_state_defs { | |
291 | my ($out)= @_; | |
292 | printf $out <<EOP, | |
6bda09f9 YO |
293 | /* Regops and State definitions */ |
294 | ||
03363afd YO |
295 | #define %*s\t%d |
296 | #define %*s\t%d | |
297 | ||
d09b2d29 | 298 | EOP |
f83e001e YO |
299 | -$width, |
300 | REGNODE_MAX => $#ops, | |
301 | -$width, REGMATCH_STATE_MAX => $#all; | |
302 | ||
303 | my %rev_type_alias= reverse %type_alias; | |
304 | for my $node (@ops) { | |
305 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", | |
306 | -$width, $node->{name}, $node->{id}, $node->{id}, $node->{comment}; | |
307 | if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { | |
308 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", | |
309 | -$width, $alias, $node->{id}, $node->{id}, "type alias"; | |
310 | } | |
311 | } | |
d3d47aac | 312 | |
f83e001e YO |
313 | print $out "\t/* ------------ States ------------- */\n"; |
314 | for my $node (@states) { | |
315 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", | |
316 | -$width, $node->{name}, $node->{id} - $#ops, $node->{comment}; | |
317 | } | |
d09b2d29 IZ |
318 | } |
319 | ||
f83e001e YO |
320 | sub print_regkind { |
321 | my ($out)= @_; | |
322 | print $out <<EOP; | |
03363afd | 323 | |
6bda09f9 | 324 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
325 | |
326 | #ifndef DOINIT | |
22c35a8c | 327 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 328 | #else |
22c35a8c | 329 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 | 330 | EOP |
f83e001e YO |
331 | use Data::Dumper; |
332 | foreach my $node (@all) { | |
333 | print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); | |
334 | printf $out "\t%*s\t/* %*s */\n", | |
335 | -1 - $twidth, "$node->{type},", -$width, $node->{name}; | |
336 | print $out "\t/* ------------ States ------------- */\n" | |
337 | if $node->{id} == $#ops and $node->{id} != $#all; | |
338 | } | |
d09b2d29 | 339 | |
f83e001e | 340 | print $out <<EOP; |
d09b2d29 IZ |
341 | }; |
342 | #endif | |
f83e001e YO |
343 | EOP |
344 | } | |
345 | ||
346 | sub wrap_ifdef_print { | |
347 | my $out= shift; | |
348 | my $token= shift; | |
349 | print $out <<EOP; | |
350 | ||
351 | #ifdef $token | |
352 | EOP | |
353 | $_->($out) for @_; | |
354 | print $out <<EOP; | |
355 | #endif /* $token */ | |
356 | ||
357 | EOP | |
358 | } | |
359 | ||
360 | sub print_regarglen { | |
361 | my ($out)= @_; | |
362 | print $out <<EOP; | |
d09b2d29 | 363 | |
6bda09f9 | 364 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 | 365 | |
29de9391 | 366 | static const U8 regarglen[] = { |
d09b2d29 IZ |
367 | EOP |
368 | ||
f83e001e YO |
369 | foreach my $node (@ops) { |
370 | my $size= 0; | |
371 | $size= "EXTRA_SIZE(struct regnode_$node->{args})" if $node->{args}; | |
d09b2d29 | 372 | |
f83e001e YO |
373 | printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; |
374 | } | |
375 | ||
376 | print $out <<EOP; | |
d09b2d29 | 377 | }; |
f83e001e YO |
378 | EOP |
379 | } | |
380 | ||
381 | sub print_reg_off_by_arg { | |
382 | my ($out)= @_; | |
383 | print $out <<EOP; | |
d09b2d29 | 384 | |
6bda09f9 YO |
385 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
386 | ||
29de9391 | 387 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
388 | EOP |
389 | ||
f83e001e YO |
390 | foreach my $node (@ops) { |
391 | my $size= $node->{longj} || 0; | |
9b155405 | 392 | |
f83e001e YO |
393 | printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; |
394 | } | |
d09b2d29 | 395 | |
f83e001e | 396 | print $out <<EOP; |
d09b2d29 | 397 | }; |
9b155405 | 398 | |
f83e001e YO |
399 | EOP |
400 | } | |
401 | ||
402 | sub print_reg_name { | |
403 | my ($out)= @_; | |
404 | print $out <<EOP; | |
13d6edb4 | 405 | |
6bda09f9 YO |
406 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
407 | ||
22429478 | 408 | #ifndef DOINIT |
13d6edb4 | 409 | EXTCONST char * PL_reg_name[]; |
22429478 | 410 | #else |
4764e399 | 411 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
412 | EOP |
413 | ||
f83e001e YO |
414 | my $ofs= 0; |
415 | my $sym= ""; | |
416 | foreach my $node (@all) { | |
417 | my $size= $node->{longj} || 0; | |
418 | ||
419 | printf $out "\t%*s\t/* $sym%#04x */\n", | |
420 | -3 - $width, qq("$node->{name}",), $node->{id} - $ofs; | |
421 | if ( $node->{id} == $#ops and @ops != @all ) { | |
422 | print $out "\t/* ------------ States ------------- */\n"; | |
423 | $ofs= $#ops; | |
424 | $sym= 'REGNODE_MAX +'; | |
425 | } | |
426 | } | |
9b155405 | 427 | |
f83e001e | 428 | print $out <<EOP; |
9b155405 | 429 | }; |
22429478 | 430 | #endif /* DOINIT */ |
d09b2d29 | 431 | |
337ff307 | 432 | EOP |
f83e001e | 433 | } |
337ff307 | 434 | |
f83e001e YO |
435 | sub print_reg_extflags_name { |
436 | my ($out)= @_; | |
437 | print $out <<EOP; | |
f7819f85 A |
438 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
439 | ||
440 | #ifndef DOINIT | |
441 | EXTCONST char * PL_reg_extflags_name[]; | |
442 | #else | |
443 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
444 | EOP |
445 | ||
f83e001e YO |
446 | my %rxfv; |
447 | my %definitions; # Remember what the symbol definitions are | |
448 | my $val= 0; | |
449 | my %reverse; | |
450 | my $REG_EXTFLAGS_NAME_SIZE= 0; | |
451 | foreach my $file ( "op_reg_common.h", "regexp.h" ) { | |
452 | open my $in_fh, "<", $file or die "Can't read '$file': $!"; | |
453 | while (<$in_fh>) { | |
454 | ||
455 | # optional leading '_'. Return symbol in $1, and strip it from | |
456 | # comment of line | |
457 | if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { | |
458 | chomp; | |
459 | my $define= $1; | |
460 | my $orig= $_; | |
461 | s{ /\* .*? \*/ }{ }x; # Replace comments by a blank | |
462 | ||
463 | # Replace any prior defined symbols by their values | |
464 | foreach my $key ( keys %definitions ) { | |
465 | s/\b$key\b/$definitions{$key}/g; | |
466 | } | |
5c72e80d | 467 | |
f83e001e YO |
468 | # Remove the U suffix from unsigned int literals |
469 | s/\b([0-9]+)U\b/$1/g; | |
5c72e80d | 470 | |
f83e001e | 471 | my $newval= eval $_; # Get numeric definition |
6a080ccd | 472 | |
f83e001e | 473 | $definitions{$define}= $newval; |
6a080ccd | 474 | |
f83e001e YO |
475 | next unless $_ =~ /<</; # Bit defines use left shift |
476 | if ( $val & $newval ) { | |
477 | my @names= ( $define, $reverse{$newval} ); | |
478 | s/PMf_// for @names; | |
479 | if ( $names[0] ne $names[1] ) { | |
480 | die sprintf | |
481 | "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", | |
482 | $newval, $orig, $_; | |
483 | } | |
484 | next; | |
6976c986 | 485 | } |
f83e001e YO |
486 | $val |= $newval; |
487 | $rxfv{$define}= $newval; | |
488 | $reverse{$newval}= $define; | |
1850c8f9 | 489 | } |
6a080ccd | 490 | } |
f7819f85 | 491 | } |
f83e001e YO |
492 | my %vrxf= reverse %rxfv; |
493 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', | |
494 | $val; | |
495 | my %multibits; | |
496 | for ( 0 .. 31 ) { | |
497 | my $power_of_2= 2**$_; | |
498 | my $n= $vrxf{$power_of_2}; | |
499 | my $extra= ""; | |
500 | if ( !$n ) { | |
501 | ||
502 | # Here, there was no name that matched exactly the bit. It could be | |
503 | # either that it is unused, or the name matches multiple bits. | |
504 | if ( !( $val & $power_of_2 ) ) { | |
505 | $n= "UNUSED_BIT_$_"; | |
506 | } | |
507 | else { | |
508 | ||
509 | # Here, must be because it matches multiple bits. Look through | |
510 | # all possibilities until find one that matches this one. Use | |
511 | # that name, and all the bits it matches | |
512 | foreach my $name ( keys %rxfv ) { | |
513 | if ( $rxfv{$name} & $power_of_2 ) { | |
514 | $n= $name . ( $multibits{$name}++ ); | |
515 | $extra= sprintf qq{ : "%s" - 0x%08x}, $name, | |
516 | $rxfv{$name} | |
517 | if $power_of_2 != $rxfv{$name}; | |
518 | last; | |
519 | } | |
5458d9a0 KW |
520 | } |
521 | } | |
522 | } | |
f83e001e YO |
523 | s/\bRXf_(PMf_)?// for $n, $extra; |
524 | printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; | |
525 | $REG_EXTFLAGS_NAME_SIZE++; | |
5458d9a0 | 526 | } |
f83e001e YO |
527 | |
528 | print $out <<EOP; | |
f7819f85 A |
529 | }; |
530 | #endif /* DOINIT */ | |
531 | ||
adc2d0c9 JH |
532 | #ifdef DEBUGGING |
533 | # define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE | |
534 | #endif | |
f83e001e | 535 | EOP |
adc2d0c9 | 536 | |
337ff307 | 537 | } |
f83e001e YO |
538 | |
539 | sub print_reg_intflags_name { | |
540 | my ($out)= @_; | |
541 | print $out <<EOP; | |
542 | ||
337ff307 YO |
543 | /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ |
544 | ||
545 | #ifndef DOINIT | |
546 | EXTCONST char * PL_reg_intflags_name[]; | |
547 | #else | |
548 | EXTCONST char * const PL_reg_intflags_name[] = { | |
549 | EOP | |
550 | ||
f83e001e YO |
551 | my %rxfv; |
552 | my %definitions; # Remember what the symbol definitions are | |
553 | my $val= 0; | |
554 | my %reverse; | |
555 | my $REG_INTFLAGS_NAME_SIZE= 0; | |
556 | foreach my $file ("regcomp.h") { | |
557 | open my $fh, "<", $file or die "Can't read $file: $!"; | |
558 | while (<$fh>) { | |
559 | ||
560 | # optional leading '_'. Return symbol in $1, and strip it from | |
561 | # comment of line | |
562 | if ( | |
563 | m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi | |
564 | ) | |
565 | { | |
566 | chomp; | |
567 | my $define= $1; | |
568 | my $abbr= $2; | |
569 | my $hex= $3; | |
570 | my $comment= $4; | |
571 | my $val= hex($hex); | |
572 | $comment= $comment ? " - $comment" : ""; | |
573 | ||
574 | printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), | |
575 | $val, $define, $comment; | |
576 | $REG_INTFLAGS_NAME_SIZE++; | |
577 | } | |
337ff307 YO |
578 | } |
579 | } | |
337ff307 | 580 | |
f83e001e | 581 | print $out <<EOP; |
337ff307 YO |
582 | }; |
583 | #endif /* DOINIT */ | |
584 | ||
585 | EOP | |
f83e001e | 586 | print $out <<EOQ; |
adc2d0c9 JH |
587 | #ifdef DEBUGGING |
588 | # define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE | |
589 | #endif | |
337ff307 | 590 | |
adc2d0c9 JH |
591 | EOQ |
592 | } | |
f9ef50a7 | 593 | |
f83e001e YO |
594 | sub print_process_flags { |
595 | my ($out)= @_; | |
596 | ||
597 | print $out process_flags( 'V', 'varies', <<'EOC'); | |
f9ef50a7 NC |
598 | /* The following have no fixed length. U8 so we can do strchr() on it. */ |
599 | EOC | |
600 | ||
f83e001e | 601 | print $out process_flags( 'S', 'simple', <<'EOC'); |
ce716c52 | 602 | |
f9ef50a7 NC |
603 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
604 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
605 | EOC | |
606 | ||
f83e001e | 607 | } |
65aa4ca7 | 608 | |
f83e001e YO |
609 | sub do_perldebguts { |
610 | my $guts= open_new( 'pod/perldebguts.pod', '>' ); | |
65aa4ca7 | 611 | |
f83e001e YO |
612 | my $node; |
613 | my $code; | |
614 | my $name_fmt= '<' x ( $longest_name_length - 1 ); | |
615 | my $descr_fmt= '<' x ( 58 - $longest_name_length ); | |
616 | eval <<EOD or die $@; | |
65aa4ca7 FC |
617 | format GuTS = |
618 | ^*~~ | |
f83e001e | 619 | \$node->{pod_comment} |
95fe686d | 620 | ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ |
d1bd48a0 | 621 | \$node->{name}, \$code, defined \$node->{comment} ? \$node->{comment} : '' |
65aa4ca7 | 622 | . |
f83e001e | 623 | 1; |
65aa4ca7 | 624 | EOD |
f83e001e YO |
625 | |
626 | my $old_fh= select($guts); | |
627 | $~= "GuTS"; | |
65aa4ca7 | 628 | |
1ae6ead9 | 629 | open my $oldguts, '<', 'pod/perldebguts.pod' |
65aa4ca7 | 630 | or die "$0 cannot open pod/perldebguts.pod for reading: $!"; |
f83e001e | 631 | while (<$oldguts>) { |
65aa4ca7 FC |
632 | print; |
633 | last if /=for regcomp.pl begin/; | |
634 | } | |
635 | ||
f83e001e | 636 | print <<'END_OF_DESCR'; |
65aa4ca7 FC |
637 | |
638 | # TYPE arg-description [num-args] [longjump-len] DESCRIPTION | |
f83e001e YO |
639 | END_OF_DESCR |
640 | for my $n (@ops) { | |
641 | $node= $n; | |
642 | $code= "$node->{code} " . ( $node->{args} || "" ); | |
643 | $code .= " $node->{longj}" if $node->{longj}; | |
644 | if ( $node->{pod_comment} ||= "" ) { | |
645 | ||
65aa4ca7 | 646 | # Trim multiple blanks |
f83e001e YO |
647 | $node->{pod_comment} =~ s/^\n\n+/\n/; |
648 | $node->{pod_comment} =~ s/\n\n+$/\n\n/; | |
65aa4ca7 FC |
649 | } |
650 | write; | |
651 | } | |
652 | print "\n"; | |
653 | ||
f83e001e | 654 | while (<$oldguts>) { |
65aa4ca7 FC |
655 | last if /=for regcomp.pl end/; |
656 | } | |
4ac5f10b DD |
657 | do { print } while <$oldguts>; #win32 can't unlink an open FH |
658 | close $oldguts or die "Error closing pod/perldebguts.pod: $!"; | |
f83e001e YO |
659 | select $old_fh; |
660 | close_and_rename($guts); | |
661 | } | |
65aa4ca7 | 662 | |
f83e001e YO |
663 | read_definition("regcomp.sym"); |
664 | my $out= open_new( 'regnodes.h', '>', | |
665 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' } ); | |
666 | print_state_defs($out); | |
667 | print_regkind($out); | |
668 | wrap_ifdef_print( | |
669 | $out, | |
670 | "REG_COMP_C", | |
671 | \&print_regarglen, | |
672 | \&print_reg_off_by_arg | |
673 | ); | |
674 | print_reg_name($out); | |
675 | print_reg_extflags_name($out); | |
676 | print_reg_intflags_name($out); | |
677 | print_process_flags($out); | |
678 | read_only_bottom_close_and_rename($out); | |
65aa4ca7 | 679 | |
f83e001e | 680 | do_perldebguts(); |