#
# Regenerate (overwriting only if changed):
#
+# pod/perldebguts.pod
# regnodes.h
#
# from information stored in
# regcomp.sym
# regexp.h
#
+# pod/perldebguts.pod is not completely regenerated. Only the table of
+# regexp nodes is replaced; other parts remain unchanged.
+#
# Accepts the standard regen_lib -q and -v args.
#
# This script is normally invoked from regen.pl.
# Get function prototypes
require 'regen/regen_lib.pl';
}
-#use Fatal qw(open close rename chmod unlink);
use strict;
open DESC, 'regcomp.sym';
my $ind = 0;
-my (@name,@rest,@type,@code,@args,@flags,@longj);
-my ($desc,$lastregop);
+my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt);
+my ($longest_name_length,$desc,$lastregop) = 0;
+my (%seen_op, %type_alias);
while (<DESC>) {
- s/#.*$//;
- next if /^\s*$/;
+ # Special pod comments
+ if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; }
+ # Truly blank lines possibly surrounding pod comments
+ elsif (/^\s*$/) { $cmnt[$ind] .= "\n" }
+
+ next if /^(?:#|\s*$)/;
chomp; # No \z in 5.004
s/\s*$//;
if (/^-+\s*$/) {
next;
}
unless ($lastregop) {
- ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+)\s*;\s*(.*)/;
+ ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/;
+
+ if (defined $seen_op{$name[$ind]}) {
+ die "Duplicate regop $name[$ind] in regcomp.sym line $. previously defined on line $seen_op{$name[$ind]}\n";
+ } else {
+ $seen_op{$name[$ind]}= $.;
+ }
+
($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind])
= split /[,\s]\s*/, $desc;
+
+ if (!defined $seen_op{$type[$ind]} and !defined $type_alias{$type[$ind]}) {
+ #warn "Regop type '$type[$ind]' from regcomp.sym line $. is not an existing regop, and will be aliased to $name[$ind]\n"
+ # if -t STDERR;
+ $type_alias{$type[$ind]}= $name[$ind];
+ }
+
+ $longest_name_length = length $name[$ind]
+ if length $name[$ind] > $longest_name_length;
++$ind;
} else {
my ($type,@lists)=split /\s+/, $_;
$out_mask
};
#endif /* DOINIT */
-
EOP
}
-my $tmp_h = 'regnodes.h-new';
-
-unlink $tmp_h if -f $tmp_h;
-
-my $out = safer_open($tmp_h);
-
-print $out read_only_top(lang => 'C', by => 'regen/regcomp.pl',
- from => 'regcomp.sym');
+my $out = open_new('regnodes.h', '>',
+ { by => 'regen/regcomp.pl', from => 'regcomp.sym' });
printf $out <<EOP,
/* Regops and State definitions */
-$width, REGMATCH_STATE_MAX => $tot - 1
;
-
+my %rev_type_alias= reverse %type_alias;
for ($ind=0; $ind < $lastregop ; ++$ind) {
printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
-$width, $name[$ind], $ind, $ind, $rest[$ind];
+ if (defined(my $alias= $rev_type_alias{$name[$ind]})) {
+ printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
+ -$width, $alias, $ind, $ind, "type alias";
+ }
+
}
print $out "\t/* ------------ States ------------- */\n";
for ( ; $ind < $tot ; $ind++) {
};
#endif /* DOINIT */
+EOP
+
+{
+print $out <<EOP;
/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
#ifndef DOINIT
my %definitions; # Remember what the symbol definitions are
my $val = 0;
my %reverse;
+my $REG_EXTFLAGS_NAME_SIZE = 0;
foreach my $file ("op_reg_common.h", "regexp.h") {
open FH,"<$file" or die "Can't read $file: $!";
while (<FH>) {
# optional leading '_'. Return symbol in $1, and strip it from
# rest of line
- if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
+ if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
chomp;
my $define = $1;
- s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank
+ my $orig= $_;
+ s{ /\* .*? \*/ }{ }x; # Replace comments by a blank
# Replace any prior defined symbols by their values
foreach my $key (keys %definitions) {
s/\b$key\b/$definitions{$key}/g;
}
+
+ # Remove the U suffix from unsigned int literals
+ s/\b([0-9]+)U\b/$1/g;
+
my $newval = eval $_; # Get numeric definition
$definitions{$define} = $newval;
next unless $_ =~ /<</; # Bit defines use left shift
if($val & $newval) {
- die sprintf "Both $define and $reverse{$newval} use %08X", $newval;
+ my @names=($define, $reverse{$newval});
+ s/PMf_// for @names;
+ if ($names[0] ne $names[1]) {
+ die sprintf "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", $newval, $orig, $_;
+ }
+ next;
}
$val|=$newval;
$rxfv{$define}= $newval;
}
my %vrxf=reverse %rxfv;
printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val;
+my %multibits;
for (0..31) {
my $power_of_2 = 2**$_;
my $n=$vrxf{$power_of_2};
+ my $extra = "";
if (! $n) {
# Here, there was no name that matched exactly the bit. It could be
# that name, and all the bits it matches
foreach my $name (keys %rxfv) {
if ($rxfv{$name} & $power_of_2) {
- $n = $name;
- $power_of_2 = $rxfv{$name};
+ $n = $name . ( $multibits{$name}++ );
+ $extra= sprintf qq{ : "%s" - 0x%08x}, $name, $rxfv{$name}
+ if $power_of_2 != $rxfv{$name};
last;
}
}
}
}
- $n=~s/^RXf_(PMf_)?//;
- printf $out qq(\t%-20s/* 0x%08x */\n),
- qq("$n",),$power_of_2;
+ s/\bRXf_(PMf_)?// for $n, $extra;
+ printf $out qq(\t%-20s/* 0x%08x%s */\n),
+ qq("$n",),$power_of_2, $extra;
+ $REG_EXTFLAGS_NAME_SIZE++;
}
print $out <<EOP;
#endif /* DOINIT */
EOP
+print $out <<EOQ
+#ifdef DEBUGGING
+# define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE
+#endif
+
+EOQ
+}
+{
+print $out <<EOP;
+/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
+
+#ifndef DOINIT
+EXTCONST char * PL_reg_intflags_name[];
+#else
+EXTCONST char * const PL_reg_intflags_name[] = {
+EOP
+
+my %rxfv;
+my %definitions; # Remember what the symbol definitions are
+my $val = 0;
+my %reverse;
+my $REG_INTFLAGS_NAME_SIZE = 0;
+foreach my $file ("regcomp.h") {
+ open my $fh, "<", $file or die "Can't read $file: $!";
+ while (<$fh>) {
+ # optional leading '_'. Return symbol in $1, and strip it from
+ # rest of line
+ if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) {
+ chomp;
+ my $define = $1;
+ my $abbr= $2;
+ my $hex= $3;
+ my $comment= $4;
+ my $val= hex($hex);
+ $comment= $comment ? " - $comment" : "";
+
+ printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment;
+ $REG_INTFLAGS_NAME_SIZE++;
+ }
+ }
+}
+
+print $out <<EOP;
+};
+#endif /* DOINIT */
+
+EOP
+print $out <<EOQ;
+#ifdef DEBUGGING
+# define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE
+#endif
+
+EOQ
+}
print $out process_flags('V', 'varies', <<'EOC');
/* The following have no fixed length. U8 so we can do strchr() on it. */
EOC
print $out process_flags('S', 'simple', <<'EOC');
+
/* The following always have a length of 1. U8 we can do strchr() on it. */
/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
EOC
-print $out <<EOP;
-/* ex: set ro: */
-EOP
-safer_close($out);
+read_only_bottom_close_and_rename($out);
+
+my $guts = open_new('pod/perldebguts.pod', '>');
+
+my $code;
+my $name_fmt = '<' x ($longest_name_length-1);
+my $descr_fmt = '<' x (58-$longest_name_length);
+eval <<EOD;
+format GuTS =
+ ^*~~
+ \$cmnt[\$_]
+ ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~
+ \$name[\$_], \$code, \$rest[\$_]
+.
+EOD
+
+select +(select($guts), do {
+ $~ = "GuTS";
+
+ open my $oldguts, "pod/perldebguts.pod"
+ or die "$0 cannot open pod/perldebguts.pod for reading: $!";
+ while(<$oldguts>) {
+ print;
+ last if /=for regcomp.pl begin/;
+ }
+
+ print <<'end';
+
+ # TYPE arg-description [num-args] [longjump-len] DESCRIPTION
+end
+ for (0..$lastregop-1) {
+ $code = "$code[$_] ".($args[$_]||"");
+ $code .= " $longj[$_]" if $longj[$_];
+ if ($cmnt[$_] ||= "") {
+ # Trim multiple blanks
+ $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/
+ }
+ write;
+ }
+ print "\n";
+
+ while(<$oldguts>) {
+ last if /=for regcomp.pl end/;
+ }
+ do { print } while <$oldguts>;
+
+})[0];
-rename_if_different $tmp_h, 'regnodes.h';
+close_and_rename($guts);