Commit | Line | Data |
---|---|---|
916e4025 | 1 | #!/usr/bin/perl -w |
f83e001e YO |
2 | # |
3 | # | |
6294c161 DM |
4 | # Regenerate (overwriting only if changed): |
5 | # | |
65aa4ca7 | 6 | # pod/perldebguts.pod |
6294c161 DM |
7 | # regnodes.h |
8 | # | |
9 | # from information stored in | |
10 | # | |
11 | # regcomp.sym | |
12 | # regexp.h | |
13 | # | |
65aa4ca7 FC |
14 | # pod/perldebguts.pod is not completely regenerated. Only the table of |
15 | # regexp nodes is replaced; other parts remain unchanged. | |
16 | # | |
6294c161 DM |
17 | # Accepts the standard regen_lib -q and -v args. |
18 | # | |
19 | # This script is normally invoked from regen.pl. | |
20 | ||
36bb303b NC |
21 | BEGIN { |
22 | # Get function prototypes | |
3d7c117d | 23 | require './regen/regen_lib.pl'; |
36bb303b | 24 | } |
ce553cf5 | 25 | |
03363afd | 26 | use strict; |
03363afd | 27 | |
f83e001e YO |
28 | # NOTE I don't think anyone actually knows what all of these properties mean, |
29 | # and I suspect some of them are outright unused. This is a first attempt to | |
30 | # clean up the generation so maybe one day we can move to something more self | |
31 | # documenting. (One might argue that an array of hashes of properties would | |
32 | # be easier to use.) | |
33 | # | |
34 | # Why we use the term regnode and nodes, and not say, opcodes, I am not sure. | |
35 | ||
36 | # General thoughts: | |
37 | # 1. We use a single continuum to represent both opcodes and states, | |
38 | # and in regexec.c we switch on the combined set. | |
39 | # 2. Opcodes have more information associated to them, states are simpler, | |
40 | # basically just an identifier/number that can be used to switch within | |
41 | # the state machine. | |
42 | # 3. Some opcode are order dependent. | |
43 | # 4. Output files often use "tricks" to reduce diff effects. Some of what | |
44 | # we do below is more clumsy looking than it could be because of this. | |
45 | ||
46 | # Op/state properties: | |
47 | # | |
48 | # Property In Descr | |
49 | # ---------------------------------------------------------------------------- | |
50 | # name Both Name of op/state | |
51 | # id Both integer value for this opcode/state | |
52 | # optype Both Either 'op' or 'state' | |
e21ef692 | 53 | # line_num Both line_num number of the input file for this item. |
f83e001e | 54 | # type Op Type of node (aka regkind) |
e21ef692 KW |
55 | # code Op Apparently not used |
56 | # suffix Op which regnode struct this uses, so if this is '1', it | |
57 | # uses 'struct regnode_1' | |
58 | # flags Op S for simple; V for varies | |
46167d76 | 59 | # longj Op Boolean as to if this node is a longjump |
e21ef692 KW |
60 | # comment Both Comment about node, if any. Placed in perlredebguts |
61 | # as its description | |
f83e001e | 62 | # pod_comment Both Special comments for pod output (preceding lines in def) |
e21ef692 | 63 | # Such lines begin with '#*' |
f83e001e YO |
64 | |
65 | # Global State | |
66 | my @all; # all opcodes/state | |
67 | my %all; # hash of all opcode/state names | |
68 | ||
69 | my @ops; # array of just opcodes | |
70 | my @states; # array of just states | |
71 | ||
72 | my $longest_name_length= 0; # track lengths of names for nicer reports | |
73 | my (%type_alias); # map the type (??) | |
74 | ||
75 | # register a newly constructed node into our state tables. | |
76 | # ensures that we have no name collisions (on name anyway), | |
77 | # and issues the "id" for the node. | |
78 | sub register_node { | |
79 | my ($node)= @_; | |
80 | ||
81 | if ( $all{ $node->{name} } ) { | |
82 | die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " | |
83 | . "previously defined on line $all{ $node->{name} }{line_num}\n"; | |
84 | } elsif (!$node->{optype}) { | |
85 | die "must have an optype in node ", Dumper($node); | |
86 | } elsif ($node->{optype} eq "op") { | |
87 | push @ops, $node; | |
88 | } elsif ($node->{optype} eq "state") { | |
89 | push @states, $node; | |
90 | } else { | |
91 | die "Uknown optype '$node->{optype}' in ", Dumper($node); | |
03363afd | 92 | } |
f83e001e YO |
93 | $node->{id}= 0 + @all; |
94 | push @all, $node; | |
95 | $all{ $node->{name} }= $node; | |
46167d76 KW |
96 | |
97 | if ($node->{longj} && $node->{longj} != 1) { | |
98 | die "longj field must be in [01] if present in ", Dumper($node); | |
99 | } | |
100 | ||
f83e001e | 101 | } |
d3d47aac | 102 | |
f83e001e | 103 | # Parse and add an opcode definition to the global state. |
e21ef692 | 104 | # What an opcode definition looks like is given in regcomp.sym. |
f83e001e | 105 | # |
e21ef692 KW |
106 | # Not every opcode definition has all of the components. We should maybe make |
107 | # this nicer/easier to read in the future. Also note that the above is tab | |
f83e001e YO |
108 | # sensitive. |
109 | ||
e21ef692 KW |
110 | # Special comments for an entry precede it, and begin with '#*' and are placed |
111 | # in the generated pod file just before the entry. | |
112 | ||
f83e001e YO |
113 | sub parse_opcode_def { |
114 | my ( $text, $line_num, $pod_comment )= @_; | |
115 | my $node= { | |
116 | line_num => $line_num, | |
117 | pod_comment => $pod_comment, | |
118 | optype => "op", | |
119 | }; | |
d3d47aac | 120 | |
f83e001e YO |
121 | # first split the line into three, the initial NAME, a middle part |
122 | # that we call "desc" which contains various (not well documented) things, | |
123 | # and a comment section. | |
124 | @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ | |
125 | or die "Failed to match $_"; | |
d3d47aac | 126 | |
f83e001e | 127 | # the content of the "desc" field from the first step is extracted here: |
484678fc | 128 | @{$node}{qw(type code suffix flags longj)}= split /[,\s]\s*/, $node->{desc}; |
d3d47aac | 129 | |
d1bd48a0 | 130 | defined $node->{$_} or $node->{$_} = "" |
484678fc | 131 | for qw(type code suffix flags longj); |
f83e001e YO |
132 | |
133 | register_node($node); # has to be before the type_alias code below | |
134 | ||
135 | if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { | |
136 | ||
137 | #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" | |
138 | # ." is not an existing regop, and will be aliased to $node->{name}\n" | |
139 | # if -t STDERR; | |
140 | $type_alias{ $node->{type} }= $node->{name}; | |
141 | } | |
142 | ||
143 | $longest_name_length= length $node->{name} | |
144 | if length $node->{name} > $longest_name_length; | |
145 | } | |
146 | ||
147 | # parse out a state definition and add the resulting data | |
148 | # into the global state. may create multiple new states from | |
149 | # a single definition (this is part of the point). | |
150 | # Format for states: | |
151 | # REGOP \t typelist [ \t typelist] | |
152 | # typelist= namelist | |
153 | # = namelist:FAIL | |
154 | # = name:count | |
155 | # Eg: | |
156 | # WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL | |
157 | # BRANCH next:FAIL | |
158 | # CURLYM A,B:FAIL | |
159 | # | |
160 | # The CURLYM definition would create the states: | |
161 | # CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail | |
162 | sub parse_state_def { | |
163 | my ( $text, $line_num, $pod_comment )= @_; | |
164 | my ( $type, @lists )= split /\s+/, $text; | |
165 | die "No list? $type" if !@lists; | |
166 | foreach my $list (@lists) { | |
167 | my ( $names, $special )= split /:/, $list, 2; | |
168 | $special ||= ""; | |
169 | foreach my $name ( split /,/, $names ) { | |
170 | my $real= | |
171 | $name eq 'resume' | |
172 | ? "resume_$type" | |
173 | : "${type}_$name"; | |
174 | my @suffix; | |
175 | if ( !$special ) { | |
176 | @suffix= (""); | |
177 | } | |
178 | elsif ( $special =~ /\d/ ) { | |
179 | @suffix= ( 1 .. $special ); | |
180 | } | |
181 | elsif ( $special eq 'FAIL' ) { | |
182 | @suffix= ( "", "_fail" ); | |
183 | } | |
184 | else { | |
185 | die "unknown :type ':$special'"; | |
186 | } | |
187 | foreach my $suffix (@suffix) { | |
188 | my $node= { | |
189 | name => "$real$suffix", | |
190 | optype => "state", | |
191 | type => $type || "", | |
192 | comment => "state for $type", | |
193 | line_num => $line_num, | |
194 | }; | |
195 | register_node($node); | |
03363afd YO |
196 | } |
197 | } | |
03363afd YO |
198 | } |
199 | } | |
d09b2d29 | 200 | |
f9ef50a7 | 201 | sub process_flags { |
f83e001e YO |
202 | my ( $flag, $varname, $comment )= @_; |
203 | $comment= '' unless defined $comment; | |
204 | ||
205 | my @selected; | |
206 | my $bitmap= ''; | |
207 | for my $node (@ops) { | |
208 | my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; | |
209 | ||
210 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic | |
211 | # ops in the C code. | |
212 | my $current= do { | |
213 | no warnings; | |
214 | ord substr $bitmap, ( $node->{id} >> 3 ); | |
215 | }; | |
216 | substr( $bitmap, ( $node->{id} >> 3 ), 1 )= | |
217 | chr( $current | ( $set << ( $node->{id} & 7 ) ) ); | |
218 | ||
219 | push @selected, $node->{name} if $set; | |
220 | } | |
221 | my $out_string= join ', ', @selected, 0; | |
222 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
ded4dd2a | 223 | |
f83e001e | 224 | my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; |
ded4dd2a | 225 | |
f83e001e | 226 | return $comment . <<"EOP"; |
ded4dd2a | 227 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 228 | |
f9ef50a7 | 229 | #ifndef DOINIT |
916e4025 | 230 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
f9ef50a7 | 231 | #else |
916e4025 | 232 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
f9ef50a7 NC |
233 | $out_string |
234 | }; | |
235 | #endif /* DOINIT */ | |
236 | ||
ded4dd2a NC |
237 | #ifndef DOINIT |
238 | EXTCONST U8 PL_${varname}_bitmask[]; | |
239 | #else | |
240 | EXTCONST U8 PL_${varname}_bitmask[] = { | |
241 | $out_mask | |
242 | }; | |
243 | #endif /* DOINIT */ | |
f9ef50a7 NC |
244 | EOP |
245 | } | |
246 | ||
938090ac KW |
247 | sub print_process_EXACTish { |
248 | my ($out)= @_; | |
249 | ||
250 | # Creates some bitmaps for EXACTish nodes. | |
251 | ||
252 | my @folded; | |
253 | my @req8; | |
254 | ||
255 | my $base; | |
256 | for my $node (@ops) { | |
257 | next unless $node->{type} eq 'EXACT'; | |
258 | my $name = $node->{name}; | |
259 | $base = $node->{id} if $name eq 'EXACT'; | |
260 | ||
261 | my $index = $node->{id} - $base; | |
262 | ||
263 | # This depends entirely on naming conventions in regcomp.sym | |
264 | $folded[$index] = $name =~ /^EXACTF/ || 0; | |
265 | $req8[$index] = $name =~ /8/ || 0; | |
266 | } | |
267 | ||
268 | die "Can't cope with > 32 EXACTish nodes" if @folded > 32; | |
269 | ||
270 | my $exactf = sprintf "%X", oct("0b" . join "", reverse @folded); | |
271 | my $req8 = sprintf "%X", oct("0b" . join "", reverse @req8); | |
272 | print $out <<EOP, | |
273 | ||
274 | /* Is 'op', known to be of type EXACT, folding? */ | |
275 | #define isEXACTFish(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) | |
276 | ||
277 | /* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ | |
278 | #define isEXACT_REQ8(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) | |
279 | ||
280 | #ifndef DOINIT | |
281 | EXTCONST U32 PL_EXACTFish_bitmask; | |
282 | EXTCONST U32 PL_EXACT_REQ8_bitmask; | |
283 | #else | |
284 | EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf; | |
285 | EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8; | |
286 | #endif /* DOINIT */ | |
287 | EOP | |
288 | } | |
289 | ||
f83e001e YO |
290 | sub read_definition { |
291 | my ( $file )= @_; | |
292 | my ( $seen_sep, $pod_comment )= ""; | |
293 | open my $in_fh, "<", $file | |
294 | or die "Failed to open '$file' for reading: $!"; | |
295 | while (<$in_fh>) { | |
296 | ||
297 | # Special pod comments | |
298 | if (/^#\* ?/) { $pod_comment .= "# $'"; } | |
299 | ||
300 | # Truly blank lines possibly surrounding pod comments | |
301 | elsif (/^\s*$/) { $pod_comment .= "\n" } | |
302 | ||
303 | next if /\A\s*#/ || /\A\s*\z/; | |
304 | ||
305 | s/\s*\z//; | |
306 | if (/^-+\s*$/) { | |
307 | $seen_sep= 1; | |
308 | next; | |
309 | } | |
310 | ||
311 | if ($seen_sep) { | |
312 | parse_state_def( $_, $., $pod_comment ); | |
313 | } | |
314 | else { | |
315 | parse_opcode_def( $_, $., $pod_comment ); | |
316 | } | |
317 | $pod_comment= ""; | |
318 | } | |
319 | close $in_fh; | |
320 | die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, | |
321 | " in file!" | |
322 | if @all > 256; | |
323 | } | |
324 | ||
325 | # use fixed width to keep the diffs between regcomp.pl recompiles | |
326 | # as small as possible. | |
519d76f5 | 327 | my ( $base_name_width, $rwidth, $twidth )= ( 22, 12, 9 ); |
f83e001e YO |
328 | |
329 | sub print_state_defs { | |
330 | my ($out)= @_; | |
331 | printf $out <<EOP, | |
6bda09f9 YO |
332 | /* Regops and State definitions */ |
333 | ||
03363afd YO |
334 | #define %*s\t%d |
335 | #define %*s\t%d | |
336 | ||
d09b2d29 | 337 | EOP |
519d76f5 | 338 | -$base_name_width, |
f83e001e | 339 | REGNODE_MAX => $#ops, |
519d76f5 | 340 | -$base_name_width, REGMATCH_STATE_MAX => $#all; |
f83e001e YO |
341 | |
342 | my %rev_type_alias= reverse %type_alias; | |
ce553cf5 KW |
343 | my $base_format = "#define %*s\t%d\t/* %#04x %s */\n"; |
344 | my @withs; | |
345 | my $in_states = 0; | |
346 | ||
347 | my $max_name_width = 0; | |
348 | for my $ref (\@ops, \@states) { | |
349 | for my $node ($ref->@*) { | |
350 | my $len = length $node->{name}; | |
351 | $max_name_width = $len if $max_name_width < $len; | |
352 | } | |
353 | } | |
354 | ||
355 | die "Do a white-space only commit to increase \$base_name_width to" | |
356 | . " $max_name_width; then re-run" if $base_name_width < $max_name_width; | |
357 | ||
358 | print $out <<EOT; | |
bb24c5fa KW |
359 | /* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */ |
360 | #define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8))) | |
361 | /* -- same, but also with pattern (p8, pb) -- */ | |
ce553cf5 KW |
362 | #define with_tp_UTF8ness(op, t_utf8, p_utf8) \\ |
363 | \t\t(((op) << 2) + (cBOOL(t_utf8) << 1) + cBOOL(p_utf8)) | |
364 | ||
365 | /* The #defines below give both the basic regnode and the expanded version for | |
366 | switching on utf8ness */ | |
367 | EOT | |
368 | ||
f83e001e | 369 | for my $node (@ops) { |
ce553cf5 | 370 | print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); |
f83e001e | 371 | if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { |
ce553cf5 | 372 | print_state_def_line($out, $alias, $node->{id}, $node->{comment}); |
f83e001e YO |
373 | } |
374 | } | |
d3d47aac | 375 | |
f83e001e YO |
376 | print $out "\t/* ------------ States ------------- */\n"; |
377 | for my $node (@states) { | |
ce553cf5 KW |
378 | print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); |
379 | } | |
380 | } | |
381 | ||
382 | sub print_state_def_line | |
383 | { | |
384 | my ($fh, $name, $id, $comment) = @_; | |
385 | ||
bb24c5fa | 386 | # The sub-names are like '_tb' or '_tb_p8' = max 6 chars wide |
ce553cf5 KW |
387 | my $name_col_width = $base_name_width + 6; |
388 | my $base_id_width = 3; # Max is '255' or 3 cols | |
bb24c5fa | 389 | my $mid_id_width = 3; # Max is '511' or 3 cols |
ce553cf5 KW |
390 | my $full_id_width = 3; # Max is '1023' but not close to using the 4th |
391 | ||
392 | my $line = "#define " . $name; | |
393 | $line .= " " x ($name_col_width - length($name)); | |
394 | ||
395 | $line .= sprintf "%*s", $base_id_width, $id; | |
bb24c5fa | 396 | $line .= " " x $mid_id_width; |
ce553cf5 KW |
397 | $line .= " " x ($full_id_width + 2); |
398 | ||
399 | $line .= "/* "; | |
400 | my $hanging = length $line; # Indent any subsequent line to this pos | |
401 | $line .= sprintf "0x%02x", $id; | |
402 | ||
c1ec4bdd | 403 | my $columns = 78; |
ce553cf5 | 404 | |
c1ec4bdd KW |
405 | # From the documentation: 'In fact, every resulting line will have length |
406 | # of no more than "$columns - 1"' | |
ce553cf5 KW |
407 | $line = wrap($columns + 1, "", " " x $hanging, "$line $comment"); |
408 | chomp $line; # wrap always adds a trailing \n | |
409 | $line =~ s/ \s+ $ //x; # trim, just in case. | |
410 | ||
411 | # The comment may have wrapped. Find the final \n and measure the length | |
412 | # to the end. If it is short enough, just append the ' */' to the line. | |
413 | # If it is too close to the end of the space available, add an extra line | |
414 | # that consists solely of blanks and the ' */' | |
415 | my $len = length($line); my $rindex = rindex($line, "\n"); | |
416 | if (length($line) - rindex($line, "\n") - 1 <= $columns - 3) { | |
417 | $line .= " */\n"; | |
f83e001e | 418 | } |
ce553cf5 KW |
419 | else { |
420 | $line .= "\n" . " " x ($hanging - 3) . "*/\n"; | |
421 | } | |
422 | ||
423 | print $fh $line; | |
424 | ||
bb24c5fa KW |
425 | # And add the 2 subsidiary #defines used when switching on |
426 | # with_t_UTF8nes() | |
427 | my $with_id_t = $id * 2; | |
428 | for my $with (qw(tb t8)) { | |
429 | my $with_name = "${name}_$with"; | |
430 | print $fh "#define ", $with_name; | |
431 | print $fh " " x ($name_col_width - length($with_name) + $base_id_width); | |
432 | printf $fh "%*s", $mid_id_width, $with_id_t; | |
433 | print $fh " " x $full_id_width; | |
434 | printf $fh " /*"; | |
435 | print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses | |
436 | printf $fh "0x%03x */\n", $with_id_t; | |
437 | ||
438 | $with_id_t++; | |
439 | } | |
440 | ||
441 | # Finally add the 4 subsidiary #defines used when switching on | |
ce553cf5 | 442 | # with_tp_UTF8nes() |
bb24c5fa | 443 | my $with_id_tp = $id * 4; |
ce553cf5 KW |
444 | for my $with (qw(tb_pb tb_p8 t8_pb t8_p8)) { |
445 | my $with_name = "${name}_$with"; | |
446 | print $fh "#define ", $with_name; | |
bb24c5fa KW |
447 | print $fh " " x ($name_col_width - length($with_name) + $base_id_width + $mid_id_width); |
448 | printf $fh "%*s", $full_id_width, $with_id_tp; | |
ce553cf5 KW |
449 | printf $fh " /*"; |
450 | print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses | |
bb24c5fa | 451 | printf $fh "0x%03x */\n", $with_id_tp; |
ce553cf5 | 452 | |
bb24c5fa | 453 | $with_id_tp++; |
ce553cf5 KW |
454 | } |
455 | ||
456 | print $fh "\n"; # Blank line separates groups for clarity | |
d09b2d29 IZ |
457 | } |
458 | ||
f83e001e YO |
459 | sub print_regkind { |
460 | my ($out)= @_; | |
461 | print $out <<EOP; | |
03363afd | 462 | |
6bda09f9 | 463 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
464 | |
465 | #ifndef DOINIT | |
22c35a8c | 466 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 467 | #else |
22c35a8c | 468 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 | 469 | EOP |
f83e001e YO |
470 | use Data::Dumper; |
471 | foreach my $node (@all) { | |
472 | print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); | |
473 | printf $out "\t%*s\t/* %*s */\n", | |
519d76f5 | 474 | -1 - $twidth, "$node->{type},", -$base_name_width, $node->{name}; |
f83e001e YO |
475 | print $out "\t/* ------------ States ------------- */\n" |
476 | if $node->{id} == $#ops and $node->{id} != $#all; | |
477 | } | |
d09b2d29 | 478 | |
f83e001e | 479 | print $out <<EOP; |
d09b2d29 IZ |
480 | }; |
481 | #endif | |
f83e001e YO |
482 | EOP |
483 | } | |
484 | ||
485 | sub wrap_ifdef_print { | |
486 | my $out= shift; | |
487 | my $token= shift; | |
488 | print $out <<EOP; | |
489 | ||
490 | #ifdef $token | |
491 | EOP | |
492 | $_->($out) for @_; | |
493 | print $out <<EOP; | |
494 | #endif /* $token */ | |
495 | ||
496 | EOP | |
497 | } | |
498 | ||
499 | sub print_regarglen { | |
500 | my ($out)= @_; | |
501 | print $out <<EOP; | |
d09b2d29 | 502 | |
6bda09f9 | 503 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 | 504 | |
29de9391 | 505 | static const U8 regarglen[] = { |
d09b2d29 IZ |
506 | EOP |
507 | ||
f83e001e YO |
508 | foreach my $node (@ops) { |
509 | my $size= 0; | |
484678fc | 510 | $size= "EXTRA_SIZE(struct regnode_$node->{suffix})" if $node->{suffix}; |
d09b2d29 | 511 | |
f83e001e YO |
512 | printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; |
513 | } | |
514 | ||
515 | print $out <<EOP; | |
d09b2d29 | 516 | }; |
f83e001e YO |
517 | EOP |
518 | } | |
519 | ||
520 | sub print_reg_off_by_arg { | |
521 | my ($out)= @_; | |
522 | print $out <<EOP; | |
d09b2d29 | 523 | |
6bda09f9 YO |
524 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
525 | ||
29de9391 | 526 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
527 | EOP |
528 | ||
f83e001e YO |
529 | foreach my $node (@ops) { |
530 | my $size= $node->{longj} || 0; | |
9b155405 | 531 | |
f83e001e YO |
532 | printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; |
533 | } | |
d09b2d29 | 534 | |
f83e001e | 535 | print $out <<EOP; |
d09b2d29 | 536 | }; |
9b155405 | 537 | |
f83e001e YO |
538 | EOP |
539 | } | |
540 | ||
541 | sub print_reg_name { | |
542 | my ($out)= @_; | |
543 | print $out <<EOP; | |
13d6edb4 | 544 | |
6bda09f9 YO |
545 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
546 | ||
22429478 | 547 | #ifndef DOINIT |
13d6edb4 | 548 | EXTCONST char * PL_reg_name[]; |
22429478 | 549 | #else |
4764e399 | 550 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
551 | EOP |
552 | ||
f83e001e YO |
553 | my $ofs= 0; |
554 | my $sym= ""; | |
555 | foreach my $node (@all) { | |
556 | my $size= $node->{longj} || 0; | |
557 | ||
558 | printf $out "\t%*s\t/* $sym%#04x */\n", | |
519d76f5 | 559 | -3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs; |
f83e001e YO |
560 | if ( $node->{id} == $#ops and @ops != @all ) { |
561 | print $out "\t/* ------------ States ------------- */\n"; | |
562 | $ofs= $#ops; | |
563 | $sym= 'REGNODE_MAX +'; | |
564 | } | |
565 | } | |
9b155405 | 566 | |
f83e001e | 567 | print $out <<EOP; |
9b155405 | 568 | }; |
22429478 | 569 | #endif /* DOINIT */ |
d09b2d29 | 570 | |
337ff307 | 571 | EOP |
f83e001e | 572 | } |
337ff307 | 573 | |
f83e001e YO |
574 | sub print_reg_extflags_name { |
575 | my ($out)= @_; | |
576 | print $out <<EOP; | |
f7819f85 A |
577 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
578 | ||
579 | #ifndef DOINIT | |
580 | EXTCONST char * PL_reg_extflags_name[]; | |
581 | #else | |
582 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
583 | EOP |
584 | ||
f83e001e YO |
585 | my %rxfv; |
586 | my %definitions; # Remember what the symbol definitions are | |
587 | my $val= 0; | |
588 | my %reverse; | |
589 | my $REG_EXTFLAGS_NAME_SIZE= 0; | |
590 | foreach my $file ( "op_reg_common.h", "regexp.h" ) { | |
591 | open my $in_fh, "<", $file or die "Can't read '$file': $!"; | |
592 | while (<$in_fh>) { | |
593 | ||
594 | # optional leading '_'. Return symbol in $1, and strip it from | |
f2323142 KW |
595 | # comment of line. Currently doesn't handle comments running onto |
596 | # next line | |
f83e001e YO |
597 | if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { |
598 | chomp; | |
599 | my $define= $1; | |
600 | my $orig= $_; | |
601 | s{ /\* .*? \*/ }{ }x; # Replace comments by a blank | |
602 | ||
603 | # Replace any prior defined symbols by their values | |
604 | foreach my $key ( keys %definitions ) { | |
605 | s/\b$key\b/$definitions{$key}/g; | |
606 | } | |
5c72e80d | 607 | |
f83e001e YO |
608 | # Remove the U suffix from unsigned int literals |
609 | s/\b([0-9]+)U\b/$1/g; | |
5c72e80d | 610 | |
f83e001e | 611 | my $newval= eval $_; # Get numeric definition |
6a080ccd | 612 | |
f83e001e | 613 | $definitions{$define}= $newval; |
6a080ccd | 614 | |
f83e001e YO |
615 | next unless $_ =~ /<</; # Bit defines use left shift |
616 | if ( $val & $newval ) { | |
617 | my @names= ( $define, $reverse{$newval} ); | |
618 | s/PMf_// for @names; | |
619 | if ( $names[0] ne $names[1] ) { | |
620 | die sprintf | |
621 | "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", | |
622 | $newval, $orig, $_; | |
623 | } | |
624 | next; | |
6976c986 | 625 | } |
f83e001e YO |
626 | $val |= $newval; |
627 | $rxfv{$define}= $newval; | |
628 | $reverse{$newval}= $define; | |
1850c8f9 | 629 | } |
6a080ccd | 630 | } |
f7819f85 | 631 | } |
f83e001e YO |
632 | my %vrxf= reverse %rxfv; |
633 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', | |
634 | $val; | |
635 | my %multibits; | |
636 | for ( 0 .. 31 ) { | |
637 | my $power_of_2= 2**$_; | |
638 | my $n= $vrxf{$power_of_2}; | |
639 | my $extra= ""; | |
640 | if ( !$n ) { | |
641 | ||
642 | # Here, there was no name that matched exactly the bit. It could be | |
643 | # either that it is unused, or the name matches multiple bits. | |
644 | if ( !( $val & $power_of_2 ) ) { | |
645 | $n= "UNUSED_BIT_$_"; | |
646 | } | |
647 | else { | |
648 | ||
649 | # Here, must be because it matches multiple bits. Look through | |
650 | # all possibilities until find one that matches this one. Use | |
651 | # that name, and all the bits it matches | |
652 | foreach my $name ( keys %rxfv ) { | |
653 | if ( $rxfv{$name} & $power_of_2 ) { | |
654 | $n= $name . ( $multibits{$name}++ ); | |
655 | $extra= sprintf qq{ : "%s" - 0x%08x}, $name, | |
656 | $rxfv{$name} | |
657 | if $power_of_2 != $rxfv{$name}; | |
658 | last; | |
659 | } | |
5458d9a0 KW |
660 | } |
661 | } | |
662 | } | |
f83e001e YO |
663 | s/\bRXf_(PMf_)?// for $n, $extra; |
664 | printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; | |
665 | $REG_EXTFLAGS_NAME_SIZE++; | |
5458d9a0 | 666 | } |
f83e001e YO |
667 | |
668 | print $out <<EOP; | |
f7819f85 A |
669 | }; |
670 | #endif /* DOINIT */ | |
671 | ||
adc2d0c9 JH |
672 | #ifdef DEBUGGING |
673 | # define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE | |
674 | #endif | |
f83e001e | 675 | EOP |
adc2d0c9 | 676 | |
337ff307 | 677 | } |
f83e001e YO |
678 | |
679 | sub print_reg_intflags_name { | |
680 | my ($out)= @_; | |
681 | print $out <<EOP; | |
682 | ||
337ff307 YO |
683 | /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ |
684 | ||
685 | #ifndef DOINIT | |
686 | EXTCONST char * PL_reg_intflags_name[]; | |
687 | #else | |
688 | EXTCONST char * const PL_reg_intflags_name[] = { | |
689 | EOP | |
690 | ||
f83e001e YO |
691 | my %rxfv; |
692 | my %definitions; # Remember what the symbol definitions are | |
693 | my $val= 0; | |
694 | my %reverse; | |
695 | my $REG_INTFLAGS_NAME_SIZE= 0; | |
696 | foreach my $file ("regcomp.h") { | |
697 | open my $fh, "<", $file or die "Can't read $file: $!"; | |
698 | while (<$fh>) { | |
699 | ||
700 | # optional leading '_'. Return symbol in $1, and strip it from | |
701 | # comment of line | |
702 | if ( | |
703 | m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi | |
704 | ) | |
705 | { | |
706 | chomp; | |
707 | my $define= $1; | |
708 | my $abbr= $2; | |
709 | my $hex= $3; | |
710 | my $comment= $4; | |
711 | my $val= hex($hex); | |
712 | $comment= $comment ? " - $comment" : ""; | |
713 | ||
714 | printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), | |
715 | $val, $define, $comment; | |
716 | $REG_INTFLAGS_NAME_SIZE++; | |
717 | } | |
337ff307 YO |
718 | } |
719 | } | |
337ff307 | 720 | |
f83e001e | 721 | print $out <<EOP; |
337ff307 YO |
722 | }; |
723 | #endif /* DOINIT */ | |
724 | ||
725 | EOP | |
f83e001e | 726 | print $out <<EOQ; |
adc2d0c9 JH |
727 | #ifdef DEBUGGING |
728 | # define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE | |
729 | #endif | |
337ff307 | 730 | |
adc2d0c9 JH |
731 | EOQ |
732 | } | |
f9ef50a7 | 733 | |
f83e001e YO |
734 | sub print_process_flags { |
735 | my ($out)= @_; | |
736 | ||
737 | print $out process_flags( 'V', 'varies', <<'EOC'); | |
f9ef50a7 NC |
738 | /* The following have no fixed length. U8 so we can do strchr() on it. */ |
739 | EOC | |
740 | ||
f83e001e | 741 | print $out process_flags( 'S', 'simple', <<'EOC'); |
ce716c52 | 742 | |
f9ef50a7 NC |
743 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
744 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
745 | EOC | |
746 | ||
f83e001e | 747 | } |
65aa4ca7 | 748 | |
f83e001e YO |
749 | sub do_perldebguts { |
750 | my $guts= open_new( 'pod/perldebguts.pod', '>' ); | |
65aa4ca7 | 751 | |
f83e001e YO |
752 | my $node; |
753 | my $code; | |
754 | my $name_fmt= '<' x ( $longest_name_length - 1 ); | |
755 | my $descr_fmt= '<' x ( 58 - $longest_name_length ); | |
756 | eval <<EOD or die $@; | |
65aa4ca7 FC |
757 | format GuTS = |
758 | ^*~~ | |
f83e001e | 759 | \$node->{pod_comment} |
95fe686d | 760 | ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ |
d1bd48a0 | 761 | \$node->{name}, \$code, defined \$node->{comment} ? \$node->{comment} : '' |
65aa4ca7 | 762 | . |
f83e001e | 763 | 1; |
65aa4ca7 | 764 | EOD |
ce553cf5 | 765 | |
f83e001e YO |
766 | my $old_fh= select($guts); |
767 | $~= "GuTS"; | |
65aa4ca7 | 768 | |
1ae6ead9 | 769 | open my $oldguts, '<', 'pod/perldebguts.pod' |
65aa4ca7 | 770 | or die "$0 cannot open pod/perldebguts.pod for reading: $!"; |
f83e001e | 771 | while (<$oldguts>) { |
65aa4ca7 FC |
772 | print; |
773 | last if /=for regcomp.pl begin/; | |
774 | } | |
775 | ||
f83e001e | 776 | print <<'END_OF_DESCR'; |
65aa4ca7 | 777 | |
e21ef692 | 778 | # TYPE arg-description [regnode-struct-suffix] [longjump-len] DESCRIPTION |
f83e001e YO |
779 | END_OF_DESCR |
780 | for my $n (@ops) { | |
781 | $node= $n; | |
484678fc | 782 | $code= "$node->{code} " . ( $node->{suffix} || "" ); |
f83e001e YO |
783 | $code .= " $node->{longj}" if $node->{longj}; |
784 | if ( $node->{pod_comment} ||= "" ) { | |
785 | ||
65aa4ca7 | 786 | # Trim multiple blanks |
f83e001e YO |
787 | $node->{pod_comment} =~ s/^\n\n+/\n/; |
788 | $node->{pod_comment} =~ s/\n\n+$/\n\n/; | |
65aa4ca7 FC |
789 | } |
790 | write; | |
791 | } | |
792 | print "\n"; | |
793 | ||
f83e001e | 794 | while (<$oldguts>) { |
65aa4ca7 FC |
795 | last if /=for regcomp.pl end/; |
796 | } | |
4ac5f10b DD |
797 | do { print } while <$oldguts>; #win32 can't unlink an open FH |
798 | close $oldguts or die "Error closing pod/perldebguts.pod: $!"; | |
f83e001e YO |
799 | select $old_fh; |
800 | close_and_rename($guts); | |
801 | } | |
65aa4ca7 | 802 | |
33b57041 | 803 | my $confine_to_core = 'defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD)'; |
f83e001e YO |
804 | read_definition("regcomp.sym"); |
805 | my $out= open_new( 'regnodes.h', '>', | |
806 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' } ); | |
33b57041 | 807 | print $out "#if $confine_to_core\n\n"; |
f83e001e YO |
808 | print_state_defs($out); |
809 | print_regkind($out); | |
810 | wrap_ifdef_print( | |
811 | $out, | |
812 | "REG_COMP_C", | |
813 | \&print_regarglen, | |
814 | \&print_reg_off_by_arg | |
815 | ); | |
816 | print_reg_name($out); | |
817 | print_reg_extflags_name($out); | |
818 | print_reg_intflags_name($out); | |
819 | print_process_flags($out); | |
938090ac | 820 | print_process_EXACTish($out); |
33b57041 | 821 | print $out "\n#endif /* $confine_to_core */\n"; |
f83e001e | 822 | read_only_bottom_close_and_rename($out); |
65aa4ca7 | 823 | |
f83e001e | 824 | do_perldebguts(); |