| 1 | #!/usr/bin/perl -w |
| 2 | # |
| 3 | # Regenerate (overwriting only if changed): |
| 4 | # |
| 5 | # pod/perldebguts.pod |
| 6 | # regnodes.h |
| 7 | # |
| 8 | # from information stored in |
| 9 | # |
| 10 | # regcomp.sym |
| 11 | # regexp.h |
| 12 | # |
| 13 | # pod/perldebguts.pod is not completely regenerated. Only the table of |
| 14 | # regexp nodes is replaced; other parts remain unchanged. |
| 15 | # |
| 16 | # Accepts the standard regen_lib -q and -v args. |
| 17 | # |
| 18 | # This script is normally invoked from regen.pl. |
| 19 | |
| 20 | BEGIN { |
| 21 | # Get function prototypes |
| 22 | require 'regen/regen_lib.pl'; |
| 23 | } |
| 24 | use strict; |
| 25 | |
| 26 | open DESC, 'regcomp.sym'; |
| 27 | |
| 28 | my $ind = 0; |
| 29 | my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt); |
| 30 | my ($longest_name_length,$desc,$lastregop) = 0; |
| 31 | while (<DESC>) { |
| 32 | # Special pod comments |
| 33 | if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; } |
| 34 | # Truly blank lines possibly surrounding pod comments |
| 35 | elsif (/^\s*$/) { $cmnt[$ind] .= "\n" } |
| 36 | |
| 37 | next if /^(?:#|\s*$)/; |
| 38 | chomp; # No \z in 5.004 |
| 39 | s/\s*$//; |
| 40 | if (/^-+\s*$/) { |
| 41 | $lastregop= $ind; |
| 42 | next; |
| 43 | } |
| 44 | unless ($lastregop) { |
| 45 | ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/; |
| 46 | ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) |
| 47 | = split /[,\s]\s*/, $desc; |
| 48 | $longest_name_length = length $name[$ind] |
| 49 | if length $name[$ind] > $longest_name_length; |
| 50 | ++$ind; |
| 51 | } else { |
| 52 | my ($type,@lists)=split /\s+/, $_; |
| 53 | die "No list? $type" if !@lists; |
| 54 | foreach my $list (@lists) { |
| 55 | my ($names,$special)=split /:/, $list , 2; |
| 56 | $special ||= ""; |
| 57 | foreach my $name (split /,/,$names) { |
| 58 | my $real= $name eq 'resume' |
| 59 | ? "resume_$type" |
| 60 | : "${type}_$name"; |
| 61 | my @suffix; |
| 62 | if (!$special) { |
| 63 | @suffix=(""); |
| 64 | } elsif ($special=~/\d/) { |
| 65 | @suffix=(1..$special); |
| 66 | } elsif ($special eq 'FAIL') { |
| 67 | @suffix=("","_fail"); |
| 68 | } else { |
| 69 | die "unknown :type ':$special'"; |
| 70 | } |
| 71 | foreach my $suffix (@suffix) { |
| 72 | $name[$ind]="$real$suffix"; |
| 73 | $type[$ind]=$type; |
| 74 | $rest[$ind]="state for $type"; |
| 75 | ++$ind; |
| 76 | } |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | } |
| 81 | } |
| 82 | # use fixed width to keep the diffs between regcomp.pl recompiles |
| 83 | # as small as possible. |
| 84 | my ($width,$rwidth,$twidth)=(22,12,9); |
| 85 | $lastregop ||= $ind; |
| 86 | my $tot = $ind; |
| 87 | close DESC; |
| 88 | die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" |
| 89 | if $lastregop>256; |
| 90 | |
| 91 | sub process_flags { |
| 92 | my ($flag, $varname, $comment) = @_; |
| 93 | $comment = '' unless defined $comment; |
| 94 | |
| 95 | $ind = 0; |
| 96 | my @selected; |
| 97 | my $bitmap = ''; |
| 98 | do { |
| 99 | my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; |
| 100 | # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic |
| 101 | # ops in the C code. |
| 102 | my $current = do { |
| 103 | local $^W; |
| 104 | ord do { |
| 105 | substr $bitmap, ($ind >> 3); |
| 106 | } |
| 107 | }; |
| 108 | substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7))); |
| 109 | |
| 110 | push @selected, $name[$ind] if $set; |
| 111 | } while (++$ind < $lastregop); |
| 112 | my $out_string = join ', ', @selected, 0; |
| 113 | $out_string =~ s/(.{1,70},) /$1\n /g; |
| 114 | |
| 115 | my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; |
| 116 | |
| 117 | return $comment . <<"EOP"; |
| 118 | #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
| 119 | |
| 120 | #ifndef DOINIT |
| 121 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; |
| 122 | #else |
| 123 | EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { |
| 124 | $out_string |
| 125 | }; |
| 126 | #endif /* DOINIT */ |
| 127 | |
| 128 | #ifndef DOINIT |
| 129 | EXTCONST U8 PL_${varname}_bitmask[]; |
| 130 | #else |
| 131 | EXTCONST U8 PL_${varname}_bitmask[] = { |
| 132 | $out_mask |
| 133 | }; |
| 134 | #endif /* DOINIT */ |
| 135 | EOP |
| 136 | } |
| 137 | |
| 138 | my $out = open_new('regnodes.h', '>', |
| 139 | { by => 'regen/regcomp.pl', from => 'regcomp.sym' }); |
| 140 | printf $out <<EOP, |
| 141 | /* Regops and State definitions */ |
| 142 | |
| 143 | #define %*s\t%d |
| 144 | #define %*s\t%d |
| 145 | |
| 146 | EOP |
| 147 | -$width, REGNODE_MAX => $lastregop - 1, |
| 148 | -$width, REGMATCH_STATE_MAX => $tot - 1 |
| 149 | ; |
| 150 | |
| 151 | |
| 152 | for ($ind=0; $ind < $lastregop ; ++$ind) { |
| 153 | printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", |
| 154 | -$width, $name[$ind], $ind, $ind, $rest[$ind]; |
| 155 | } |
| 156 | print $out "\t/* ------------ States ------------- */\n"; |
| 157 | for ( ; $ind < $tot ; $ind++) { |
| 158 | printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", |
| 159 | -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind]; |
| 160 | } |
| 161 | |
| 162 | print $out <<EOP; |
| 163 | |
| 164 | /* PL_regkind[] What type of regop or state is this. */ |
| 165 | |
| 166 | #ifndef DOINIT |
| 167 | EXTCONST U8 PL_regkind[]; |
| 168 | #else |
| 169 | EXTCONST U8 PL_regkind[] = { |
| 170 | EOP |
| 171 | |
| 172 | $ind = 0; |
| 173 | do { |
| 174 | printf $out "\t%*s\t/* %*s */\n", |
| 175 | -1-$twidth, "$type[$ind],", -$width, $name[$ind]; |
| 176 | print $out "\t/* ------------ States ------------- */\n" |
| 177 | if $ind + 1 == $lastregop and $lastregop != $tot; |
| 178 | } while (++$ind < $tot); |
| 179 | |
| 180 | print $out <<EOP; |
| 181 | }; |
| 182 | #endif |
| 183 | |
| 184 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
| 185 | |
| 186 | #ifdef REG_COMP_C |
| 187 | static const U8 regarglen[] = { |
| 188 | EOP |
| 189 | |
| 190 | $ind = 0; |
| 191 | do { |
| 192 | my $size = 0; |
| 193 | $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; |
| 194 | |
| 195 | printf $out "\t%*s\t/* %*s */\n", |
| 196 | -37, "$size,",-$rwidth,$name[$ind]; |
| 197 | } while (++$ind < $lastregop); |
| 198 | |
| 199 | print $out <<EOP; |
| 200 | }; |
| 201 | |
| 202 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
| 203 | |
| 204 | static const char reg_off_by_arg[] = { |
| 205 | EOP |
| 206 | |
| 207 | $ind = 0; |
| 208 | do { |
| 209 | my $size = $longj[$ind] || 0; |
| 210 | |
| 211 | printf $out "\t%d,\t/* %*s */\n", |
| 212 | $size, -$rwidth, $name[$ind] |
| 213 | } while (++$ind < $lastregop); |
| 214 | |
| 215 | print $out <<EOP; |
| 216 | }; |
| 217 | |
| 218 | #endif /* REG_COMP_C */ |
| 219 | |
| 220 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
| 221 | |
| 222 | #ifndef DOINIT |
| 223 | EXTCONST char * PL_reg_name[]; |
| 224 | #else |
| 225 | EXTCONST char * const PL_reg_name[] = { |
| 226 | EOP |
| 227 | |
| 228 | $ind = 0; |
| 229 | my $ofs = 0; |
| 230 | my $sym = ""; |
| 231 | do { |
| 232 | my $size = $longj[$ind] || 0; |
| 233 | |
| 234 | printf $out "\t%*s\t/* $sym%#04x */\n", |
| 235 | -3-$width,qq("$name[$ind]",), $ind - $ofs; |
| 236 | if ($ind + 1 == $lastregop and $lastregop != $tot) { |
| 237 | print $out "\t/* ------------ States ------------- */\n"; |
| 238 | $ofs = $lastregop - 1; |
| 239 | $sym = 'REGNODE_MAX +'; |
| 240 | } |
| 241 | |
| 242 | } while (++$ind < $tot); |
| 243 | |
| 244 | print $out <<EOP; |
| 245 | }; |
| 246 | #endif /* DOINIT */ |
| 247 | |
| 248 | EOP |
| 249 | |
| 250 | { |
| 251 | print $out <<EOP; |
| 252 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
| 253 | |
| 254 | #ifndef DOINIT |
| 255 | EXTCONST char * PL_reg_extflags_name[]; |
| 256 | #else |
| 257 | EXTCONST char * const PL_reg_extflags_name[] = { |
| 258 | EOP |
| 259 | |
| 260 | my %rxfv; |
| 261 | my %definitions; # Remember what the symbol definitions are |
| 262 | my $val = 0; |
| 263 | my %reverse; |
| 264 | foreach my $file ("op_reg_common.h", "regexp.h") { |
| 265 | open FH,"<$file" or die "Can't read $file: $!"; |
| 266 | while (<FH>) { |
| 267 | |
| 268 | # optional leading '_'. Return symbol in $1, and strip it from |
| 269 | # rest of line |
| 270 | if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { |
| 271 | chomp; |
| 272 | my $define = $1; |
| 273 | my $orig= $_; |
| 274 | s{ /\* .*? \*/ }{ }x; # Replace comments by a blank |
| 275 | |
| 276 | # Replace any prior defined symbols by their values |
| 277 | foreach my $key (keys %definitions) { |
| 278 | s/\b$key\b/$definitions{$key}/g; |
| 279 | } |
| 280 | |
| 281 | # Remove the U suffix from unsigned int literals |
| 282 | s/\b([0-9]+)U\b/$1/g; |
| 283 | |
| 284 | my $newval = eval $_; # Get numeric definition |
| 285 | |
| 286 | $definitions{$define} = $newval; |
| 287 | |
| 288 | next unless $_ =~ /<</; # Bit defines use left shift |
| 289 | if($val & $newval) { |
| 290 | my @names=($define, $reverse{$newval}); |
| 291 | s/PMf_// for @names; |
| 292 | if ($names[0] ne $names[1]) { |
| 293 | die sprintf "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", $newval, $orig, $_; |
| 294 | } |
| 295 | next; |
| 296 | } |
| 297 | $val|=$newval; |
| 298 | $rxfv{$define}= $newval; |
| 299 | $reverse{$newval} = $define; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | my %vrxf=reverse %rxfv; |
| 304 | printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val; |
| 305 | my %multibits; |
| 306 | for (0..31) { |
| 307 | my $power_of_2 = 2**$_; |
| 308 | my $n=$vrxf{$power_of_2}; |
| 309 | my $extra = ""; |
| 310 | if (! $n) { |
| 311 | |
| 312 | # Here, there was no name that matched exactly the bit. It could be |
| 313 | # either that it is unused, or the name matches multiple bits. |
| 314 | if (! ($val & $power_of_2)) { |
| 315 | $n = "UNUSED_BIT_$_"; |
| 316 | } |
| 317 | else { |
| 318 | |
| 319 | # Here, must be because it matches multiple bits. Look through |
| 320 | # all possibilities until find one that matches this one. Use |
| 321 | # that name, and all the bits it matches |
| 322 | foreach my $name (keys %rxfv) { |
| 323 | if ($rxfv{$name} & $power_of_2) { |
| 324 | $n = $name . ( $multibits{$name}++ ); |
| 325 | $extra= sprintf qq{ : "%s" - 0x%08x}, $name, $rxfv{$name} |
| 326 | if $power_of_2 != $rxfv{$name}; |
| 327 | last; |
| 328 | } |
| 329 | } |
| 330 | } |
| 331 | } |
| 332 | s/\bRXf_(PMf_)?// for $n, $extra; |
| 333 | printf $out qq(\t%-20s/* 0x%08x%s */\n), |
| 334 | qq("$n",),$power_of_2, $extra; |
| 335 | } |
| 336 | |
| 337 | print $out <<EOP; |
| 338 | }; |
| 339 | #endif /* DOINIT */ |
| 340 | |
| 341 | EOP |
| 342 | } |
| 343 | { |
| 344 | print $out <<EOP; |
| 345 | /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ |
| 346 | |
| 347 | #ifndef DOINIT |
| 348 | EXTCONST char * PL_reg_intflags_name[]; |
| 349 | #else |
| 350 | EXTCONST char * const PL_reg_intflags_name[] = { |
| 351 | EOP |
| 352 | |
| 353 | my %rxfv; |
| 354 | my %definitions; # Remember what the symbol definitions are |
| 355 | my $val = 0; |
| 356 | my %reverse; |
| 357 | foreach my $file ("regcomp.h") { |
| 358 | open my $fh, "<", $file or die "Can't read $file: $!"; |
| 359 | while (<$fh>) { |
| 360 | # optional leading '_'. Return symbol in $1, and strip it from |
| 361 | # rest of line |
| 362 | if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) { |
| 363 | chomp; |
| 364 | my $define = $1; |
| 365 | my $abbr= $2; |
| 366 | my $hex= $3; |
| 367 | my $comment= $4; |
| 368 | my $val= hex($hex); |
| 369 | $comment= $comment ? " - $comment" : ""; |
| 370 | |
| 371 | printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment; |
| 372 | } |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | print $out <<EOP; |
| 377 | }; |
| 378 | #endif /* DOINIT */ |
| 379 | |
| 380 | EOP |
| 381 | } |
| 382 | |
| 383 | |
| 384 | print $out process_flags('V', 'varies', <<'EOC'); |
| 385 | /* The following have no fixed length. U8 so we can do strchr() on it. */ |
| 386 | EOC |
| 387 | |
| 388 | print $out process_flags('S', 'simple', <<'EOC'); |
| 389 | |
| 390 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
| 391 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ |
| 392 | EOC |
| 393 | |
| 394 | read_only_bottom_close_and_rename($out); |
| 395 | |
| 396 | my $guts = open_new('pod/perldebguts.pod', '>'); |
| 397 | |
| 398 | my $code; |
| 399 | my $name_fmt = '<' x ($longest_name_length-1); |
| 400 | my $descr_fmt = '<' x (58-$longest_name_length); |
| 401 | eval <<EOD; |
| 402 | format GuTS = |
| 403 | ^*~~ |
| 404 | \$cmnt[\$_] |
| 405 | ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ |
| 406 | \$name[\$_], \$code, \$rest[\$_] |
| 407 | . |
| 408 | EOD |
| 409 | |
| 410 | select +(select($guts), do { |
| 411 | $~ = "GuTS"; |
| 412 | |
| 413 | open my $oldguts, "pod/perldebguts.pod" |
| 414 | or die "$0 cannot open pod/perldebguts.pod for reading: $!"; |
| 415 | while(<$oldguts>) { |
| 416 | print; |
| 417 | last if /=for regcomp.pl begin/; |
| 418 | } |
| 419 | |
| 420 | print <<'end'; |
| 421 | |
| 422 | # TYPE arg-description [num-args] [longjump-len] DESCRIPTION |
| 423 | end |
| 424 | for (0..$lastregop-1) { |
| 425 | $code = "$code[$_] ".($args[$_]||""); |
| 426 | $code .= " $longj[$_]" if $longj[$_]; |
| 427 | if ($cmnt[$_] ||= "") { |
| 428 | # Trim multiple blanks |
| 429 | $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/ |
| 430 | } |
| 431 | write; |
| 432 | } |
| 433 | print "\n"; |
| 434 | |
| 435 | while(<$oldguts>) { |
| 436 | last if /=for regcomp.pl end/; |
| 437 | } |
| 438 | do { print } while <$oldguts>; |
| 439 | |
| 440 | })[0]; |
| 441 | |
| 442 | close_and_rename($guts); |