Commit | Line | Data |
---|---|---|
916e4025 | 1 | #!/usr/bin/perl -w |
6294c161 DM |
2 | # |
3 | # Regenerate (overwriting only if changed): | |
4 | # | |
65aa4ca7 | 5 | # pod/perldebguts.pod |
6294c161 DM |
6 | # regnodes.h |
7 | # | |
8 | # from information stored in | |
9 | # | |
10 | # regcomp.sym | |
11 | # regexp.h | |
12 | # | |
65aa4ca7 FC |
13 | # pod/perldebguts.pod is not completely regenerated. Only the table of |
14 | # regexp nodes is replaced; other parts remain unchanged. | |
15 | # | |
6294c161 DM |
16 | # Accepts the standard regen_lib -q and -v args. |
17 | # | |
18 | # This script is normally invoked from regen.pl. | |
19 | ||
36bb303b NC |
20 | BEGIN { |
21 | # Get function prototypes | |
af001346 | 22 | require 'regen/regen_lib.pl'; |
36bb303b | 23 | } |
03363afd | 24 | use strict; |
03363afd | 25 | |
d09b2d29 | 26 | open DESC, 'regcomp.sym'; |
d09b2d29 | 27 | |
03363afd | 28 | my $ind = 0; |
65aa4ca7 FC |
29 | my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt); |
30 | my ($longest_name_length,$desc,$lastregop) = 0; | |
d3d47aac | 31 | my (%seen_op, %type_alias); |
d09b2d29 | 32 | while (<DESC>) { |
65aa4ca7 FC |
33 | # Special pod comments |
34 | if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; } | |
35 | # Truly blank lines possibly surrounding pod comments | |
36 | elsif (/^\s*$/) { $cmnt[$ind] .= "\n" } | |
37 | ||
38 | next if /^(?:#|\s*$)/; | |
916e4025 NC |
39 | chomp; # No \z in 5.004 |
40 | s/\s*$//; | |
03363afd YO |
41 | if (/^-+\s*$/) { |
42 | $lastregop= $ind; | |
43 | next; | |
44 | } | |
45 | unless ($lastregop) { | |
44f5ace3 | 46 | ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/; |
d3d47aac YO |
47 | |
48 | if (defined $seen_op{$name[$ind]}) { | |
49 | die "Duplicate regop $name[$ind] in regcomp.sym line $. previously defined on line $seen_op{$name[$ind]}\n"; | |
50 | } else { | |
51 | $seen_op{$name[$ind]}= $.; | |
52 | } | |
53 | ||
f9ef50a7 NC |
54 | ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) |
55 | = split /[,\s]\s*/, $desc; | |
d3d47aac YO |
56 | |
57 | if (!defined $seen_op{$type[$ind]} and !defined $type_alias{$type[$ind]}) { | |
8f945cf0 YO |
58 | #warn "Regop type '$type[$ind]' from regcomp.sym line $. is not an existing regop, and will be aliased to $name[$ind]\n" |
59 | # if -t STDERR; | |
d3d47aac YO |
60 | $type_alias{$type[$ind]}= $name[$ind]; |
61 | } | |
62 | ||
65aa4ca7 FC |
63 | $longest_name_length = length $name[$ind] |
64 | if length $name[$ind] > $longest_name_length; | |
93882df0 | 65 | ++$ind; |
03363afd | 66 | } else { |
f8abb37e | 67 | my ($type,@lists)=split /\s+/, $_; |
03363afd YO |
68 | die "No list? $type" if !@lists; |
69 | foreach my $list (@lists) { | |
70 | my ($names,$special)=split /:/, $list , 2; | |
71 | $special ||= ""; | |
72 | foreach my $name (split /,/,$names) { | |
73 | my $real= $name eq 'resume' | |
74 | ? "resume_$type" | |
75 | : "${type}_$name"; | |
76 | my @suffix; | |
77 | if (!$special) { | |
78 | @suffix=(""); | |
79 | } elsif ($special=~/\d/) { | |
80 | @suffix=(1..$special); | |
81 | } elsif ($special eq 'FAIL') { | |
82 | @suffix=("","_fail"); | |
83 | } else { | |
84 | die "unknown :type ':$special'"; | |
85 | } | |
86 | foreach my $suffix (@suffix) { | |
03363afd YO |
87 | $name[$ind]="$real$suffix"; |
88 | $type[$ind]=$type; | |
24b23f37 | 89 | $rest[$ind]="state for $type"; |
93882df0 | 90 | ++$ind; |
03363afd YO |
91 | } |
92 | } | |
93 | } | |
94 | ||
95 | } | |
96 | } | |
5d458dd8 YO |
97 | # use fixed width to keep the diffs between regcomp.pl recompiles |
98 | # as small as possible. | |
99 | my ($width,$rwidth,$twidth)=(22,12,9); | |
03363afd YO |
100 | $lastregop ||= $ind; |
101 | my $tot = $ind; | |
d09b2d29 | 102 | close DESC; |
03363afd YO |
103 | die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" |
104 | if $lastregop>256; | |
d09b2d29 | 105 | |
f9ef50a7 NC |
106 | sub process_flags { |
107 | my ($flag, $varname, $comment) = @_; | |
108 | $comment = '' unless defined $comment; | |
109 | ||
110 | $ind = 0; | |
111 | my @selected; | |
ded4dd2a | 112 | my $bitmap = ''; |
93882df0 | 113 | do { |
ded4dd2a NC |
114 | my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; |
115 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic | |
116 | # ops in the C code. | |
117 | my $current = do { | |
916e4025 | 118 | local $^W; |
ded4dd2a | 119 | ord do { |
ded4dd2a NC |
120 | substr $bitmap, ($ind >> 3); |
121 | } | |
122 | }; | |
916e4025 | 123 | substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7))); |
ded4dd2a NC |
124 | |
125 | push @selected, $name[$ind] if $set; | |
93882df0 | 126 | } while (++$ind < $lastregop); |
f9ef50a7 NC |
127 | my $out_string = join ', ', @selected, 0; |
128 | $out_string =~ s/(.{1,70},) /$1\n /g; | |
ded4dd2a NC |
129 | |
130 | my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; | |
131 | ||
f9ef50a7 | 132 | return $comment . <<"EOP"; |
ded4dd2a | 133 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 134 | |
f9ef50a7 | 135 | #ifndef DOINIT |
916e4025 | 136 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
f9ef50a7 | 137 | #else |
916e4025 | 138 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
f9ef50a7 NC |
139 | $out_string |
140 | }; | |
141 | #endif /* DOINIT */ | |
142 | ||
ded4dd2a NC |
143 | #ifndef DOINIT |
144 | EXTCONST U8 PL_${varname}_bitmask[]; | |
145 | #else | |
146 | EXTCONST U8 PL_${varname}_bitmask[] = { | |
147 | $out_mask | |
148 | }; | |
149 | #endif /* DOINIT */ | |
f9ef50a7 NC |
150 | EOP |
151 | } | |
152 | ||
cc49830d NC |
153 | my $out = open_new('regnodes.h', '>', |
154 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' }); | |
424a4936 | 155 | printf $out <<EOP, |
6bda09f9 YO |
156 | /* Regops and State definitions */ |
157 | ||
03363afd YO |
158 | #define %*s\t%d |
159 | #define %*s\t%d | |
160 | ||
d09b2d29 | 161 | EOP |
f9f4320a YO |
162 | -$width, REGNODE_MAX => $lastregop - 1, |
163 | -$width, REGMATCH_STATE_MAX => $tot - 1 | |
164 | ; | |
d09b2d29 | 165 | |
d3d47aac | 166 | my %rev_type_alias= reverse %type_alias; |
93882df0 | 167 | for ($ind=0; $ind < $lastregop ; ++$ind) { |
424a4936 | 168 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", |
93882df0 | 169 | -$width, $name[$ind], $ind, $ind, $rest[$ind]; |
d3d47aac YO |
170 | if (defined(my $alias= $rev_type_alias{$name[$ind]})) { |
171 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", | |
172 | -$width, $alias, $ind, $ind, "type alias"; | |
173 | } | |
174 | ||
24b23f37 | 175 | } |
424a4936 | 176 | print $out "\t/* ------------ States ------------- */\n"; |
93882df0 | 177 | for ( ; $ind < $tot ; $ind++) { |
424a4936 | 178 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", |
93882df0 | 179 | -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind]; |
d09b2d29 IZ |
180 | } |
181 | ||
424a4936 | 182 | print $out <<EOP; |
03363afd | 183 | |
6bda09f9 | 184 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
185 | |
186 | #ifndef DOINIT | |
22c35a8c | 187 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 188 | #else |
22c35a8c | 189 | EXTCONST U8 PL_regkind[] = { |
d09b2d29 IZ |
190 | EOP |
191 | ||
192 | $ind = 0; | |
93882df0 | 193 | do { |
424a4936 | 194 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 195 | -1-$twidth, "$type[$ind],", -$width, $name[$ind]; |
424a4936 | 196 | print $out "\t/* ------------ States ------------- */\n" |
93882df0 NC |
197 | if $ind + 1 == $lastregop and $lastregop != $tot; |
198 | } while (++$ind < $tot); | |
d09b2d29 | 199 | |
424a4936 | 200 | print $out <<EOP; |
d09b2d29 IZ |
201 | }; |
202 | #endif | |
203 | ||
6bda09f9 | 204 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 IZ |
205 | |
206 | #ifdef REG_COMP_C | |
29de9391 | 207 | static const U8 regarglen[] = { |
d09b2d29 IZ |
208 | EOP |
209 | ||
210 | $ind = 0; | |
93882df0 | 211 | do { |
03363afd | 212 | my $size = 0; |
d09b2d29 IZ |
213 | $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; |
214 | ||
424a4936 | 215 | printf $out "\t%*s\t/* %*s */\n", |
03363afd | 216 | -37, "$size,",-$rwidth,$name[$ind]; |
93882df0 | 217 | } while (++$ind < $lastregop); |
d09b2d29 | 218 | |
424a4936 | 219 | print $out <<EOP; |
d09b2d29 IZ |
220 | }; |
221 | ||
6bda09f9 YO |
222 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
223 | ||
29de9391 | 224 | static const char reg_off_by_arg[] = { |
d09b2d29 IZ |
225 | EOP |
226 | ||
227 | $ind = 0; | |
93882df0 | 228 | do { |
03363afd | 229 | my $size = $longj[$ind] || 0; |
9b155405 | 230 | |
424a4936 | 231 | printf $out "\t%d,\t/* %*s */\n", |
03363afd | 232 | $size, -$rwidth, $name[$ind] |
93882df0 | 233 | } while (++$ind < $lastregop); |
d09b2d29 | 234 | |
424a4936 | 235 | print $out <<EOP; |
d09b2d29 | 236 | }; |
9b155405 | 237 | |
13d6edb4 NC |
238 | #endif /* REG_COMP_C */ |
239 | ||
6bda09f9 YO |
240 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
241 | ||
22429478 | 242 | #ifndef DOINIT |
13d6edb4 | 243 | EXTCONST char * PL_reg_name[]; |
22429478 | 244 | #else |
4764e399 | 245 | EXTCONST char * const PL_reg_name[] = { |
9b155405 IZ |
246 | EOP |
247 | ||
248 | $ind = 0; | |
93882df0 | 249 | my $ofs = 0; |
24b23f37 | 250 | my $sym = ""; |
93882df0 | 251 | do { |
03363afd | 252 | my $size = $longj[$ind] || 0; |
9b155405 | 253 | |
424a4936 | 254 | printf $out "\t%*s\t/* $sym%#04x */\n", |
24b23f37 | 255 | -3-$width,qq("$name[$ind]",), $ind - $ofs; |
93882df0 | 256 | if ($ind + 1 == $lastregop and $lastregop != $tot) { |
424a4936 | 257 | print $out "\t/* ------------ States ------------- */\n"; |
93882df0 | 258 | $ofs = $lastregop - 1; |
24b23f37 YO |
259 | $sym = 'REGNODE_MAX +'; |
260 | } | |
261 | ||
93882df0 | 262 | } while (++$ind < $tot); |
9b155405 | 263 | |
424a4936 | 264 | print $out <<EOP; |
9b155405 | 265 | }; |
22429478 | 266 | #endif /* DOINIT */ |
d09b2d29 | 267 | |
337ff307 YO |
268 | EOP |
269 | ||
270 | { | |
271 | print $out <<EOP; | |
f7819f85 A |
272 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
273 | ||
274 | #ifndef DOINIT | |
275 | EXTCONST char * PL_reg_extflags_name[]; | |
276 | #else | |
277 | EXTCONST char * const PL_reg_extflags_name[] = { | |
d09b2d29 IZ |
278 | EOP |
279 | ||
f7819f85 | 280 | my %rxfv; |
6a080ccd | 281 | my %definitions; # Remember what the symbol definitions are |
c8e4cf8b NC |
282 | my $val = 0; |
283 | my %reverse; | |
adc2d0c9 | 284 | my $REG_EXTFLAGS_NAME_SIZE = 0; |
1850c8f9 | 285 | foreach my $file ("op_reg_common.h", "regexp.h") { |
916e4025 NC |
286 | open FH,"<$file" or die "Can't read $file: $!"; |
287 | while (<FH>) { | |
1850c8f9 KW |
288 | |
289 | # optional leading '_'. Return symbol in $1, and strip it from | |
290 | # rest of line | |
337ff307 | 291 | if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { |
1850c8f9 KW |
292 | chomp; |
293 | my $define = $1; | |
6976c986 YO |
294 | my $orig= $_; |
295 | s{ /\* .*? \*/ }{ }x; # Replace comments by a blank | |
1850c8f9 KW |
296 | |
297 | # Replace any prior defined symbols by their values | |
298 | foreach my $key (keys %definitions) { | |
299 | s/\b$key\b/$definitions{$key}/g; | |
300 | } | |
5c72e80d TC |
301 | |
302 | # Remove the U suffix from unsigned int literals | |
303 | s/\b([0-9]+)U\b/$1/g; | |
304 | ||
1850c8f9 | 305 | my $newval = eval $_; # Get numeric definition |
6a080ccd | 306 | |
1850c8f9 | 307 | $definitions{$define} = $newval; |
6a080ccd | 308 | |
1850c8f9 KW |
309 | next unless $_ =~ /<</; # Bit defines use left shift |
310 | if($val & $newval) { | |
6976c986 YO |
311 | my @names=($define, $reverse{$newval}); |
312 | s/PMf_// for @names; | |
313 | if ($names[0] ne $names[1]) { | |
314 | die sprintf "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", $newval, $orig, $_; | |
315 | } | |
316 | next; | |
1850c8f9 KW |
317 | } |
318 | $val|=$newval; | |
319 | $rxfv{$define}= $newval; | |
320 | $reverse{$newval} = $define; | |
6a080ccd | 321 | } |
f7819f85 | 322 | } |
1850c8f9 | 323 | } |
f7819f85 | 324 | my %vrxf=reverse %rxfv; |
916e4025 | 325 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val; |
6976c986 | 326 | my %multibits; |
f7819f85 | 327 | for (0..31) { |
5458d9a0 KW |
328 | my $power_of_2 = 2**$_; |
329 | my $n=$vrxf{$power_of_2}; | |
6976c986 | 330 | my $extra = ""; |
5458d9a0 KW |
331 | if (! $n) { |
332 | ||
333 | # Here, there was no name that matched exactly the bit. It could be | |
334 | # either that it is unused, or the name matches multiple bits. | |
335 | if (! ($val & $power_of_2)) { | |
336 | $n = "UNUSED_BIT_$_"; | |
337 | } | |
338 | else { | |
339 | ||
340 | # Here, must be because it matches multiple bits. Look through | |
341 | # all possibilities until find one that matches this one. Use | |
342 | # that name, and all the bits it matches | |
343 | foreach my $name (keys %rxfv) { | |
344 | if ($rxfv{$name} & $power_of_2) { | |
6976c986 YO |
345 | $n = $name . ( $multibits{$name}++ ); |
346 | $extra= sprintf qq{ : "%s" - 0x%08x}, $name, $rxfv{$name} | |
347 | if $power_of_2 != $rxfv{$name}; | |
5458d9a0 KW |
348 | last; |
349 | } | |
350 | } | |
351 | } | |
352 | } | |
6976c986 YO |
353 | s/\bRXf_(PMf_)?// for $n, $extra; |
354 | printf $out qq(\t%-20s/* 0x%08x%s */\n), | |
355 | qq("$n",),$power_of_2, $extra; | |
adc2d0c9 | 356 | $REG_EXTFLAGS_NAME_SIZE++; |
f7819f85 A |
357 | } |
358 | ||
424a4936 | 359 | print $out <<EOP; |
f7819f85 A |
360 | }; |
361 | #endif /* DOINIT */ | |
362 | ||
f9ef50a7 | 363 | EOP |
adc2d0c9 JH |
364 | print $out <<EOQ |
365 | #ifdef DEBUGGING | |
366 | # define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE | |
367 | #endif | |
368 | ||
369 | EOQ | |
337ff307 YO |
370 | } |
371 | { | |
372 | print $out <<EOP; | |
373 | /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ | |
374 | ||
375 | #ifndef DOINIT | |
376 | EXTCONST char * PL_reg_intflags_name[]; | |
377 | #else | |
378 | EXTCONST char * const PL_reg_intflags_name[] = { | |
379 | EOP | |
380 | ||
381 | my %rxfv; | |
382 | my %definitions; # Remember what the symbol definitions are | |
383 | my $val = 0; | |
384 | my %reverse; | |
adc2d0c9 | 385 | my $REG_INTFLAGS_NAME_SIZE = 0; |
337ff307 YO |
386 | foreach my $file ("regcomp.h") { |
387 | open my $fh, "<", $file or die "Can't read $file: $!"; | |
388 | while (<$fh>) { | |
389 | # optional leading '_'. Return symbol in $1, and strip it from | |
390 | # rest of line | |
391 | if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) { | |
392 | chomp; | |
393 | my $define = $1; | |
394 | my $abbr= $2; | |
395 | my $hex= $3; | |
396 | my $comment= $4; | |
397 | my $val= hex($hex); | |
398 | $comment= $comment ? " - $comment" : ""; | |
399 | ||
400 | printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment; | |
adc2d0c9 | 401 | $REG_INTFLAGS_NAME_SIZE++; |
337ff307 YO |
402 | } |
403 | } | |
404 | } | |
405 | ||
406 | print $out <<EOP; | |
407 | }; | |
408 | #endif /* DOINIT */ | |
409 | ||
410 | EOP | |
adc2d0c9 JH |
411 | print $out <<EOQ; |
412 | #ifdef DEBUGGING | |
413 | # define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE | |
414 | #endif | |
337ff307 | 415 | |
adc2d0c9 JH |
416 | EOQ |
417 | } | |
f9ef50a7 NC |
418 | |
419 | print $out process_flags('V', 'varies', <<'EOC'); | |
420 | /* The following have no fixed length. U8 so we can do strchr() on it. */ | |
421 | EOC | |
422 | ||
423 | print $out process_flags('S', 'simple', <<'EOC'); | |
ce716c52 | 424 | |
f9ef50a7 NC |
425 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
426 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
427 | EOC | |
428 | ||
ce716c52 | 429 | read_only_bottom_close_and_rename($out); |
65aa4ca7 FC |
430 | |
431 | my $guts = open_new('pod/perldebguts.pod', '>'); | |
432 | ||
433 | my $code; | |
434 | my $name_fmt = '<' x ($longest_name_length-1); | |
435 | my $descr_fmt = '<' x (58-$longest_name_length); | |
436 | eval <<EOD; | |
437 | format GuTS = | |
438 | ^*~~ | |
439 | \$cmnt[\$_] | |
95fe686d | 440 | ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ |
65aa4ca7 FC |
441 | \$name[\$_], \$code, \$rest[\$_] |
442 | . | |
443 | EOD | |
444 | ||
445 | select +(select($guts), do { | |
446 | $~ = "GuTS"; | |
447 | ||
448 | open my $oldguts, "pod/perldebguts.pod" | |
449 | or die "$0 cannot open pod/perldebguts.pod for reading: $!"; | |
450 | while(<$oldguts>) { | |
451 | print; | |
452 | last if /=for regcomp.pl begin/; | |
453 | } | |
454 | ||
455 | print <<'end'; | |
456 | ||
457 | # TYPE arg-description [num-args] [longjump-len] DESCRIPTION | |
458 | end | |
459 | for (0..$lastregop-1) { | |
460 | $code = "$code[$_] ".($args[$_]||""); | |
461 | $code .= " $longj[$_]" if $longj[$_]; | |
462 | if ($cmnt[$_] ||= "") { | |
463 | # Trim multiple blanks | |
464 | $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/ | |
465 | } | |
466 | write; | |
467 | } | |
468 | print "\n"; | |
469 | ||
470 | while(<$oldguts>) { | |
471 | last if /=for regcomp.pl end/; | |
472 | } | |
473 | do { print } while <$oldguts>; | |
474 | ||
475 | })[0]; | |
476 | ||
477 | close_and_rename($guts); |