Commit | Line | Data |
---|---|---|
916e4025 | 1 | #!/usr/bin/perl -w |
f83e001e YO |
2 | # |
3 | # | |
6294c161 DM |
4 | # Regenerate (overwriting only if changed): |
5 | # | |
65aa4ca7 | 6 | # pod/perldebguts.pod |
6294c161 DM |
7 | # regnodes.h |
8 | # | |
9 | # from information stored in | |
10 | # | |
11 | # regcomp.sym | |
12 | # regexp.h | |
13 | # | |
65aa4ca7 FC |
14 | # pod/perldebguts.pod is not completely regenerated. Only the table of |
15 | # regexp nodes is replaced; other parts remain unchanged. | |
16 | # | |
6294c161 DM |
17 | # Accepts the standard regen_lib -q and -v args. |
18 | # | |
19 | # This script is normally invoked from regen.pl. | |
20 | ||
36bb303b NC |
21 | BEGIN { |
22 | # Get function prototypes | |
af001346 | 23 | require 'regen/regen_lib.pl'; |
36bb303b | 24 | } |
03363afd | 25 | use strict; |
03363afd | 26 | |
f83e001e YO |
27 | # NOTE I don't think anyone actually knows what all of these properties mean, |
28 | # and I suspect some of them are outright unused. This is a first attempt to | |
29 | # clean up the generation so maybe one day we can move to something more self | |
30 | # documenting. (One might argue that an array of hashes of properties would | |
31 | # be easier to use.) | |
32 | # | |
33 | # Why we use the term regnode and nodes, and not say, opcodes, I am not sure. | |
34 | ||
35 | # General thoughts: | |
36 | # 1. We use a single continuum to represent both opcodes and states, | |
37 | # and in regexec.c we switch on the combined set. | |
38 | # 2. Opcodes have more information associated to them, states are simpler, | |
39 | # basically just an identifier/number that can be used to switch within | |
40 | # the state machine. | |
41 | # 3. Some opcode are order dependent. | |
42 | # 4. Output files often use "tricks" to reduce diff effects. Some of what | |
43 | # we do below is more clumsy looking than it could be because of this. | |
44 | ||
45 | # Op/state properties: | |
46 | # | |
47 | # Property In Descr | |
48 | # ---------------------------------------------------------------------------- | |
49 | # name Both Name of op/state | |
50 | # id Both integer value for this opcode/state | |
51 | # optype Both Either 'op' or 'state' | |
52 | # line_num Both line_num number of the input file for this item. | |
53 | # type Op Type of node (aka regkind) | |
54 | # code Op what code is associated with this node (???) | |
55 | # args Op what type of args the node has (which regnode struct) | |
56 | # flags Op (???) | |
57 | # longj Op Whether this node is a longjump | |
58 | # comment Both Comment about node, if any | |
59 | # pod_comment Both Special comments for pod output (preceding lines in def) | |
60 | ||
61 | # Global State | |
62 | my @all; # all opcodes/state | |
63 | my %all; # hash of all opcode/state names | |
64 | ||
65 | my @ops; # array of just opcodes | |
66 | my @states; # array of just states | |
67 | ||
68 | my $longest_name_length= 0; # track lengths of names for nicer reports | |
69 | my (%type_alias); # map the type (??) | |
70 | ||
71 | # register a newly constructed node into our state tables. | |
72 | # ensures that we have no name collisions (on name anyway), | |
73 | # and issues the "id" for the node. | |
74 | sub register_node { | |
75 | my ($node)= @_; | |
76 | ||
77 | if ( $all{ $node->{name} } ) { | |
78 | die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " | |
79 | . "previously defined on line $all{ $node->{name} }{line_num}\n"; | |
80 | } elsif (!$node->{optype}) { | |
81 | die "must have an optype in node ", Dumper($node); | |
82 | } elsif ($node->{optype} eq "op") { | |
83 | push @ops, $node; | |
84 | } elsif ($node->{optype} eq "state") { | |
85 | push @states, $node; | |
86 | } else { | |
87 | die "Uknown optype '$node->{optype}' in ", Dumper($node); | |
03363afd | 88 | } |
f83e001e YO |
89 | $node->{id}= 0 + @all; |
90 | push @all, $node; | |
91 | $all{ $node->{name} }= $node; | |
92 | } | |
d3d47aac | 93 | |
f83e001e YO |
94 | # Parse and add an opcode definition to the global state. |
95 | # An opcode definition looks like this: | |
96 | # | |
97 | # +- args | |
98 | # | +- flags | |
99 | # | | +- longjmp | |
100 | # Name Type code | | | ; comment | |
101 | # -------------------------------------------------------------------------- | |
102 | # IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches. | |
103 | # UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches. | |
104 | # SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE. | |
105 | # IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher. | |
106 | # GROUPP GROUPP, num 1 ; Whether the group matched. | |
107 | # | |
108 | # Not every opcode definition has all of these. We should maybe make this | |
109 | # nicer/easier to read in the future. Also note that the above is tab | |
110 | # sensitive. | |
111 | ||
112 | sub parse_opcode_def { | |
113 | my ( $text, $line_num, $pod_comment )= @_; | |
114 | my $node= { | |
115 | line_num => $line_num, | |
116 | pod_comment => $pod_comment, | |
117 | optype => "op", | |
118 | }; | |
d3d47aac | 119 | |
f83e001e YO |
120 | # first split the line into three, the initial NAME, a middle part |
121 | # that we call "desc" which contains various (not well documented) things, | |
122 | # and a comment section. | |
123 | @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ | |
124 | or die "Failed to match $_"; | |
d3d47aac | 125 | |
f83e001e YO |
126 | # the content of the "desc" field from the first step is extracted here: |
127 | @{$node}{qw(type code args flags longj)}= split /[,\s]\s*/, $node->{desc}; | |
d3d47aac | 128 | |
f83e001e YO |
129 | $node->{$_} //= "" for qw(type code args flags longj); |
130 | ||
131 | register_node($node); # has to be before the type_alias code below | |
132 | ||
133 | if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { | |
134 | ||
135 | #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" | |
136 | # ." is not an existing regop, and will be aliased to $node->{name}\n" | |
137 | # if -t STDERR; | |
138 | $type_alias{ $node->{type} }= $node->{name}; | |
139 | } | |
140 | ||
141 | $longest_name_length= length $node->{name} | |
142 | if length $node->{name} > $longest_name_length; | |
143 | } | |
144 | ||
145 | # parse out a state definition and add the resulting data | |
146 | # into the global state. may create multiple new states from | |
147 | # a single definition (this is part of the point). | |
148 | # Format for states: | |
149 | # REGOP \t typelist [ \t typelist] | |
150 | # typelist= namelist | |
151 | # = namelist:FAIL | |
152 | # = name:count | |
153 | # Eg: | |
154 | # WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL | |
155 | # BRANCH next:FAIL | |
156 | # CURLYM A,B:FAIL | |
157 | # | |
158 | # The CURLYM definition would create the states: | |
159 | # CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail | |
160 | sub parse_state_def { | |
161 | my ( $text, $line_num, $pod_comment )= @_; | |
162 | my ( $type, @lists )= split /\s+/, $text; | |
163 | die "No list? $type" if !@lists; | |
164 | foreach my $list (@lists) { | |
165 | my ( $names, $special )= split /:/, $list, 2; | |
166 | $special ||= ""; | |
167 | foreach my $name ( split /,/, $names ) { | |
168 | my $real= | |
169 | $name eq 'resume' | |
170 | ? "resume_$type" | |
171 | : "${type}_$name"; | |
172 | my @suffix; | |
173 | if ( !$special ) { | |
174 | @suffix= (""); | |
175 | } | |
176 | elsif ( $special =~ /\d/ ) { | |
177 | @suffix= ( 1 .. $special ); | |
178 | } | |
179 | elsif ( $special eq 'FAIL' ) { | |
180 | @suffix= ( "", "_fail" ); | |
181 | } | |
182 | else { | |
183 | die "unknown :type ':$special'"; | |
184 | } | |
185 | foreach my $suffix (@suffix) { | |
186 | my $node= { | |
187 | name => "$real$suffix", | |
188 | optype => "state", | |
189 | type => $type || "", | |
190 | comment => "state for $type", | |
191 | line_num => $line_num, | |
192 | }; | |
193 | register_node($node); | |
03363afd YO |
194 | } |
195 | } | |
03363afd YO |
196 | } |
197 | } | |
d09b2d29 | 198 | |
f9ef50a7 | 199 | sub process_flags { |
f83e001e YO |
200 | my ( $flag, $varname, $comment )= @_; |
201 | $comment= '' unless defined $comment; | |
202 | ||
203 | my @selected; | |
204 | my $bitmap= ''; | |
205 | for my $node (@ops) { | |
206 | my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; | |
207 | ||
208 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic | |
209 | # ops in the C code. | |
210 | my $current= do { | |
211 | no warnings; | |
212 | ord substr $bitmap, ( $node->{id} >> 3 ); | |
213 | }; | |
214 | substr( $bitmap, ( $node->{id} >> 3 ), 1 )= | |
215 | chr( $current | ( $set << ( $node->{id} & 7 ) ) ); | |
216 | ||
217 | push @selected, $node->{name} if $set; | |
218 | } | |
219 | my $out_string= join ', ', @selected, 0; | |
220 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
ded4dd2a | 221 | |
f83e001e | 222 | my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; |
ded4dd2a | 223 | |
f83e001e | 224 | return $comment . <<"EOP"; |
ded4dd2a | 225 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 226 | |
f9ef50a7 | 227 | #ifndef DOINIT |
916e4025 | 228 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
f9ef50a7 | 229 | #else |
916e4025 | 230 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
f9ef50a7 NC |
231 | $out_string |
232 | }; | |
233 | #endif /* DOINIT */ | |
234 | ||
ded4dd2a NC |
235 | #ifndef DOINIT |
236 | EXTCONST U8 PL_${varname}_bitmask[]; | |
237 | #else | |
238 | EXTCONST U8 PL_${varname}_bitmask[] = { | |
239 | $out_mask | |
240 | }; | |
241 | #endif /* DOINIT */ | |
f9ef50a7 NC |
242 | EOP |
243 | } | |
244 | ||
f83e001e YO |
245 | sub read_definition { |
246 | my ( $file )= @_; | |
247 | my ( $seen_sep, $pod_comment )= ""; | |
248 | open my $in_fh, "<", $file | |
249 | or die "Failed to open '$file' for reading: $!"; | |
250 | while (<$in_fh>) { | |
251 | ||
252 | # Special pod comments | |
253 | if (/^#\* ?/) { $pod_comment .= "# $'"; } | |
254 | ||
255 | # Truly blank lines possibly surrounding pod comments | |
256 | elsif (/^\s*$/) { $pod_comment .= "\n" } | |
257 | ||
258 | next if /\A\s*#/ || /\A\s*\z/; | |
259 | ||
260 | s/\s*\z//; | |
261 | if (/^-+\s*$/) { | |
262 | $seen_sep= 1; | |
263 | next; | |
264 | } | |
265 | ||
266 | if ($seen_sep) { | |
267 | parse_state_def( $_, $., $pod_comment ); | |
268 | } | |
269 | else { | |
270 | parse_opcode_def( $_, $., $pod_comment ); | |
271 | } | |
272 | $pod_comment= ""; | |
273 | } | |
274 | close $in_fh; | |
275 | die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, | |
276 | " in file!" | |
277 | if @all > 256; | |
278 | } | |
279 | ||
280 | # use fixed width to keep the diffs between regcomp.pl recompiles | |
281 | # as small as possible. | |
282 | my ( $width, $rwidth, $twidth )= ( 22, 12, 9 ); | |
283 | ||
284 | sub print_state_defs { | |
285 | my ($out)= @_; | |
286 | printf $out <<EOP, | |
6bda09f9 YO |
287 | /* Regops and State definitions */ |
288 | ||
03363afd YO |
289 | #define %*s\t%d |
290 | #define %*s\t%d | |
291 | ||
d09b2d29 | 292 | EOP |
f83e001e YO |
293 | -$width, |
294 | REGNODE_MAX => $#ops, | |
295 | -$width, REGMATCH_STATE_MAX => $#all; | |
296 | ||
297 | my %rev_type_alias= reverse %type_alias; | |
298 | for my $node (@ops) { | |
299 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", | |
300 | -$width, $node->{name}, $node->{id}, $node->{id}, $node->{comment}; | |
301 | if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { | |
302 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", | |
303 | -$width, $alias, $node->{id}, $node->{id}, "type alias"; | |
304 | } | |
305 | } | |
d3d47aac | 306 | |
f83e001e YO |
307 | print $out "\t/* ------------ States ------------- */\n"; |
308 | for my $node (@states) { | |
309 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", | |
310 | -$width, $node->{name}, $node->{id} - $#ops, $node->{comment}; | |
311 | } | |
d09b2d29 IZ |
312 | } |
313 | ||
f83e001e YO |
314 | sub print_regkind { |
315 | my ($out)= @_; | |
316 | print $out <<EOP; | |
03363afd | 317 | |
6bda09f9 | 318 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
319 | |
320 | #ifndef DOINIT | |
22c35a8c | 321 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 322 | #else |
22c35a8c | 323 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 | 324 | EOP |
f83e001e YO |
325 | use Data::Dumper; |
326 | foreach my $node (@all) { | |
327 | print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); | |
328 | printf $out "\t%*s\t/* %*s */\n", | |
329 | -1 - $twidth, "$node->{type},", -$width, $node->{name}; | |
330 | print $out "\t/* ------------ States ------------- */\n" | |
331 | if $node->{id} == $#ops and $node->{id} != $#all; | |
332 | } | |
d09b2d29 | 333 | |
f83e001e | 334 | print $out <<EOP; |
d09b2d29 IZ |
335 | }; |
336 | #endif | |
f83e001e YO |
337 | EOP |
338 | } | |
339 | ||
340 | sub wrap_ifdef_print { | |
341 | my $out= shift; | |
342 | my $token= shift; | |
343 | print $out <<EOP; | |
344 | ||
345 | #ifdef $token | |
346 | EOP | |
347 | $_->($out) for @_; | |
348 | print $out <<EOP; | |
349 | #endif /* $token */ | |
350 | ||
351 | EOP | |
352 | } | |
353 | ||
354 | sub print_regarglen { | |
355 | my ($out)= @_; | |
356 | print $out <<EOP; | |
d09b2d29 | 357 | |
6bda09f9 | 358 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 | 359 | |
29de9391 | 360 | static const U8 regarglen[] = { |
d09b2d29 IZ |
361 | EOP |
362 | ||
f83e001e YO |
363 | foreach my $node (@ops) { |
364 | my $size= 0; | |
365 | $size= "EXTRA_SIZE(struct regnode_$node->{args})" if $node->{args}; | |
d09b2d29 | 366 | |
f83e001e YO |
367 | printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; |
368 | } | |
369 | ||
370 | print $out <<EOP; | |
d09b2d29 | 371 | }; |
f83e001e YO |
372 | EOP |
373 | } | |
374 | ||
375 | sub print_reg_off_by_arg { | |
376 | my ($out)= @_; | |
377 | print $out <<EOP; | |
d09b2d29 | 378 | |
6bda09f9 YO |
379 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
380 | ||
29de9391 | 381 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
382 | EOP |
383 | ||
f83e001e YO |
384 | foreach my $node (@ops) { |
385 | my $size= $node->{longj} || 0; | |
9b155405 | 386 | |
f83e001e YO |
387 | printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; |
388 | } | |
d09b2d29 | 389 | |
f83e001e | 390 | print $out <<EOP; |
d09b2d29 | 391 | }; |
9b155405 | 392 | |
f83e001e YO |
393 | EOP |
394 | } | |
395 | ||
396 | sub print_reg_name { | |
397 | my ($out)= @_; | |
398 | print $out <<EOP; | |
13d6edb4 | 399 | |
6bda09f9 YO |
400 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
401 | ||
22429478 | 402 | #ifndef DOINIT |
13d6edb4 | 403 | EXTCONST char * PL_reg_name[]; |
22429478 | 404 | #else |
4764e399 | 405 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
406 | EOP |
407 | ||
f83e001e YO |
408 | my $ofs= 0; |
409 | my $sym= ""; | |
410 | foreach my $node (@all) { | |
411 | my $size= $node->{longj} || 0; | |
412 | ||
413 | printf $out "\t%*s\t/* $sym%#04x */\n", | |
414 | -3 - $width, qq("$node->{name}",), $node->{id} - $ofs; | |
415 | if ( $node->{id} == $#ops and @ops != @all ) { | |
416 | print $out "\t/* ------------ States ------------- */\n"; | |
417 | $ofs= $#ops; | |
418 | $sym= 'REGNODE_MAX +'; | |
419 | } | |
420 | } | |
9b155405 | 421 | |
f83e001e | 422 | print $out <<EOP; |
9b155405 | 423 | }; |
22429478 | 424 | #endif /* DOINIT */ |
d09b2d29 | 425 | |
337ff307 | 426 | EOP |
f83e001e | 427 | } |
337ff307 | 428 | |
f83e001e YO |
429 | sub print_reg_extflags_name { |
430 | my ($out)= @_; | |
431 | print $out <<EOP; | |
f7819f85 A |
432 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
433 | ||
434 | #ifndef DOINIT | |
435 | EXTCONST char * PL_reg_extflags_name[]; | |
436 | #else | |
437 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
438 | EOP |
439 | ||
f83e001e YO |
440 | my %rxfv; |
441 | my %definitions; # Remember what the symbol definitions are | |
442 | my $val= 0; | |
443 | my %reverse; | |
444 | my $REG_EXTFLAGS_NAME_SIZE= 0; | |
445 | foreach my $file ( "op_reg_common.h", "regexp.h" ) { | |
446 | open my $in_fh, "<", $file or die "Can't read '$file': $!"; | |
447 | while (<$in_fh>) { | |
448 | ||
449 | # optional leading '_'. Return symbol in $1, and strip it from | |
450 | # comment of line | |
451 | if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { | |
452 | chomp; | |
453 | my $define= $1; | |
454 | my $orig= $_; | |
455 | s{ /\* .*? \*/ }{ }x; # Replace comments by a blank | |
456 | ||
457 | # Replace any prior defined symbols by their values | |
458 | foreach my $key ( keys %definitions ) { | |
459 | s/\b$key\b/$definitions{$key}/g; | |
460 | } | |
5c72e80d | 461 | |
f83e001e YO |
462 | # Remove the U suffix from unsigned int literals |
463 | s/\b([0-9]+)U\b/$1/g; | |
5c72e80d | 464 | |
f83e001e | 465 | my $newval= eval $_; # Get numeric definition |
6a080ccd | 466 | |
f83e001e | 467 | $definitions{$define}= $newval; |
6a080ccd | 468 | |
f83e001e YO |
469 | next unless $_ =~ /<</; # Bit defines use left shift |
470 | if ( $val & $newval ) { | |
471 | my @names= ( $define, $reverse{$newval} ); | |
472 | s/PMf_// for @names; | |
473 | if ( $names[0] ne $names[1] ) { | |
474 | die sprintf | |
475 | "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", | |
476 | $newval, $orig, $_; | |
477 | } | |
478 | next; | |
6976c986 | 479 | } |
f83e001e YO |
480 | $val |= $newval; |
481 | $rxfv{$define}= $newval; | |
482 | $reverse{$newval}= $define; | |
1850c8f9 | 483 | } |
6a080ccd | 484 | } |
f7819f85 | 485 | } |
f83e001e YO |
486 | my %vrxf= reverse %rxfv; |
487 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', | |
488 | $val; | |
489 | my %multibits; | |
490 | for ( 0 .. 31 ) { | |
491 | my $power_of_2= 2**$_; | |
492 | my $n= $vrxf{$power_of_2}; | |
493 | my $extra= ""; | |
494 | if ( !$n ) { | |
495 | ||
496 | # Here, there was no name that matched exactly the bit. It could be | |
497 | # either that it is unused, or the name matches multiple bits. | |
498 | if ( !( $val & $power_of_2 ) ) { | |
499 | $n= "UNUSED_BIT_$_"; | |
500 | } | |
501 | else { | |
502 | ||
503 | # Here, must be because it matches multiple bits. Look through | |
504 | # all possibilities until find one that matches this one. Use | |
505 | # that name, and all the bits it matches | |
506 | foreach my $name ( keys %rxfv ) { | |
507 | if ( $rxfv{$name} & $power_of_2 ) { | |
508 | $n= $name . ( $multibits{$name}++ ); | |
509 | $extra= sprintf qq{ : "%s" - 0x%08x}, $name, | |
510 | $rxfv{$name} | |
511 | if $power_of_2 != $rxfv{$name}; | |
512 | last; | |
513 | } | |
5458d9a0 KW |
514 | } |
515 | } | |
516 | } | |
f83e001e YO |
517 | s/\bRXf_(PMf_)?// for $n, $extra; |
518 | printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; | |
519 | $REG_EXTFLAGS_NAME_SIZE++; | |
5458d9a0 | 520 | } |
f83e001e YO |
521 | |
522 | print $out <<EOP; | |
f7819f85 A |
523 | }; |
524 | #endif /* DOINIT */ | |
525 | ||
adc2d0c9 JH |
526 | #ifdef DEBUGGING |
527 | # define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE | |
528 | #endif | |
f83e001e | 529 | EOP |
adc2d0c9 | 530 | |
337ff307 | 531 | } |
f83e001e YO |
532 | |
533 | sub print_reg_intflags_name { | |
534 | my ($out)= @_; | |
535 | print $out <<EOP; | |
536 | ||
337ff307 YO |
537 | /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ |
538 | ||
539 | #ifndef DOINIT | |
540 | EXTCONST char * PL_reg_intflags_name[]; | |
541 | #else | |
542 | EXTCONST char * const PL_reg_intflags_name[] = { | |
543 | EOP | |
544 | ||
f83e001e YO |
545 | my %rxfv; |
546 | my %definitions; # Remember what the symbol definitions are | |
547 | my $val= 0; | |
548 | my %reverse; | |
549 | my $REG_INTFLAGS_NAME_SIZE= 0; | |
550 | foreach my $file ("regcomp.h") { | |
551 | open my $fh, "<", $file or die "Can't read $file: $!"; | |
552 | while (<$fh>) { | |
553 | ||
554 | # optional leading '_'. Return symbol in $1, and strip it from | |
555 | # comment of line | |
556 | if ( | |
557 | m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi | |
558 | ) | |
559 | { | |
560 | chomp; | |
561 | my $define= $1; | |
562 | my $abbr= $2; | |
563 | my $hex= $3; | |
564 | my $comment= $4; | |
565 | my $val= hex($hex); | |
566 | $comment= $comment ? " - $comment" : ""; | |
567 | ||
568 | printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), | |
569 | $val, $define, $comment; | |
570 | $REG_INTFLAGS_NAME_SIZE++; | |
571 | } | |
337ff307 YO |
572 | } |
573 | } | |
337ff307 | 574 | |
f83e001e | 575 | print $out <<EOP; |
337ff307 YO |
576 | }; |
577 | #endif /* DOINIT */ | |
578 | ||
579 | EOP | |
f83e001e | 580 | print $out <<EOQ; |
adc2d0c9 JH |
581 | #ifdef DEBUGGING |
582 | # define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE | |
583 | #endif | |
337ff307 | 584 | |
adc2d0c9 JH |
585 | EOQ |
586 | } | |
f9ef50a7 | 587 | |
f83e001e YO |
588 | sub print_process_flags { |
589 | my ($out)= @_; | |
590 | ||
591 | print $out process_flags( 'V', 'varies', <<'EOC'); | |
f9ef50a7 NC |
592 | /* The following have no fixed length. U8 so we can do strchr() on it. */ |
593 | EOC | |
594 | ||
f83e001e | 595 | print $out process_flags( 'S', 'simple', <<'EOC'); |
ce716c52 | 596 | |
f9ef50a7 NC |
597 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
598 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
599 | EOC | |
600 | ||
f83e001e | 601 | } |
65aa4ca7 | 602 | |
f83e001e YO |
603 | sub do_perldebguts { |
604 | my $guts= open_new( 'pod/perldebguts.pod', '>' ); | |
65aa4ca7 | 605 | |
f83e001e YO |
606 | my $node; |
607 | my $code; | |
608 | my $name_fmt= '<' x ( $longest_name_length - 1 ); | |
609 | my $descr_fmt= '<' x ( 58 - $longest_name_length ); | |
610 | eval <<EOD or die $@; | |
65aa4ca7 FC |
611 | format GuTS = |
612 | ^*~~ | |
f83e001e | 613 | \$node->{pod_comment} |
95fe686d | 614 | ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ |
f83e001e | 615 | \$node->{name}, \$code, \$node->{comment}//'' |
65aa4ca7 | 616 | . |
f83e001e | 617 | 1; |
65aa4ca7 | 618 | EOD |
f83e001e YO |
619 | |
620 | my $old_fh= select($guts); | |
621 | $~= "GuTS"; | |
65aa4ca7 FC |
622 | |
623 | open my $oldguts, "pod/perldebguts.pod" | |
624 | or die "$0 cannot open pod/perldebguts.pod for reading: $!"; | |
f83e001e | 625 | while (<$oldguts>) { |
65aa4ca7 FC |
626 | print; |
627 | last if /=for regcomp.pl begin/; | |
628 | } | |
629 | ||
f83e001e | 630 | print <<'END_OF_DESCR'; |
65aa4ca7 FC |
631 | |
632 | # TYPE arg-description [num-args] [longjump-len] DESCRIPTION | |
f83e001e YO |
633 | END_OF_DESCR |
634 | for my $n (@ops) { | |
635 | $node= $n; | |
636 | $code= "$node->{code} " . ( $node->{args} || "" ); | |
637 | $code .= " $node->{longj}" if $node->{longj}; | |
638 | if ( $node->{pod_comment} ||= "" ) { | |
639 | ||
65aa4ca7 | 640 | # Trim multiple blanks |
f83e001e YO |
641 | $node->{pod_comment} =~ s/^\n\n+/\n/; |
642 | $node->{pod_comment} =~ s/\n\n+$/\n\n/; | |
65aa4ca7 FC |
643 | } |
644 | write; | |
645 | } | |
646 | print "\n"; | |
647 | ||
f83e001e | 648 | while (<$oldguts>) { |
65aa4ca7 FC |
649 | last if /=for regcomp.pl end/; |
650 | } | |
651 | do { print } while <$oldguts>; | |
f83e001e YO |
652 | select $old_fh; |
653 | close_and_rename($guts); | |
654 | } | |
65aa4ca7 | 655 | |
f83e001e YO |
656 | read_definition("regcomp.sym"); |
657 | my $out= open_new( 'regnodes.h', '>', | |
658 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' } ); | |
659 | print_state_defs($out); | |
660 | print_regkind($out); | |
661 | wrap_ifdef_print( | |
662 | $out, | |
663 | "REG_COMP_C", | |
664 | \&print_regarglen, | |
665 | \&print_reg_off_by_arg | |
666 | ); | |
667 | print_reg_name($out); | |
668 | print_reg_extflags_name($out); | |
669 | print_reg_intflags_name($out); | |
670 | print_process_flags($out); | |
671 | read_only_bottom_close_and_rename($out); | |
65aa4ca7 | 672 | |
f83e001e | 673 | do_perldebguts(); |