Correct code-like snippet in documentation

[perl5.git] / regen / regcomp.pl
diff --git a/regen/regcomp.pl b/regen/regcomp.pl

index 17f2cc0..94a53db 100644 (file)
--- a/regen/regcomp.pl
+++ b/regen/regcomp.pl
@@ -9,6 +9,7 @@
  # from information stored in
  #
  #    regcomp.sym
+#    op_reg_common.h
  #    regexp.h
  #
  # pod/perldebguts.pod is not completely regenerated.  Only the table of
@@ -17,10 +18,39 @@
  # Accepts the standard regen_lib -q and -v args.
  #
  # This script is normally invoked from regen.pl.
+#
+# F<regcomp.sym> defines the opcodes and states used in the regex
+# engine, it also includes documentation on the opcodes. This script
+# parses those definitions out and turns them into typedefs, defines,
+# and data structures, and maybe even code which the regex engine can
+# use to operate.
+#
+# F<regexp.h> and op_reg_common.h contain defines C<RXf_xxx> and
+# C<PREGf_xxx> that are used in flags in our code. These defines are
+# parsed out and data structures are created to allow the debug mode of
+# the regex engine to show things such as which flags were set during
+# compilation. In some cases we transform the C code in the header files
+# into perl code which we execute to C<eval()> the contents. For instance
+# in a situation like this:
+#
+#   #define RXf_X 0x1   /* the X mode */
+#   #define RXf_Y 0x2   /* the Y mode */
+#   #define RXf_Z (X|Y) /* the Z mode */
+#
+# this script might end up eval()ing something like C<0x1> and then
+# C<0x2> and then C<(0x1|0x2)> the results of which it then might use in
+# constructing a data structure, or pod in perldebguts, or a comment in
+# C<regnodes.h>. It also would separate out the "X", "Y", and "Z" and
+# use them, and would also use the data in the line comment if present.
+#
+# If you compile a regex under perl -Mre=Debug,ALL you can see much
+# of the content that this file generates and parses out of its input
+# files.
  
  BEGIN {
      # Get function prototypes
      require './regen/regen_lib.pl';
+    require './regen/HeaderParser.pm';
  }
  
  use strict;
@@ -501,7 +531,7 @@ sub print_regnode_info {
  #ifndef DOINIT
  EXTCONST struct regnode_meta PL_regnode_info[];
  #else
-EXTCONST struct regnode_meta const PL_regnode_info[] = {
+EXTCONST struct regnode_meta PL_regnode_info[] = {
  EOP
      my @fields= qw(type arg_len arg_len_varies off_by_arg);
      foreach my $node_idx (0..$#all) {
@@ -584,39 +614,44 @@ EOP
      my $val= 0;
      my %reverse;
      my $REG_EXTFLAGS_NAME_SIZE= 0;
+    my $hp= HeaderParser->new();
      foreach my $file ( "op_reg_common.h", "regexp.h" ) {
-        open my $in_fh, "<", $file or die "Can't read '$file': $!";
-        while (<$in_fh>) {
+        $hp->read_file($file);
+        foreach my $line_info (@{$hp->lines}) {
+            next unless $line_info->{type}     eq "content"
+                    and $line_info->{sub_type} eq "#define";
+            my $line= $line_info->{line};
+            $line=~s/\s*\\\n\s*/ /g;
  
              # optional leading '_'.  Return symbol in $1, and strip it from
              # comment of line.  Currently doesn't handle comments running onto
              # next line
-            if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
-                chomp;
+            if ($line=~s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
+                chomp($line);
                  my $define= $1;
                  my $orig= $_;
-                s{ /\* .*? \*/ }{ }x;    # Replace comments by a blank
+                $line=~s{ /\* .*? \*/ }{ }x;    # Replace comments by a blank
  
                  # Replace any prior defined symbols by their values
                  foreach my $key ( keys %definitions ) {
-                    s/\b$key\b/$definitions{$key}/g;
+                    $line=~s/\b$key\b/$definitions{$key}/g;
                  }
  
                  # Remove the U suffix from unsigned int literals
-                s/\b([0-9]+)U\b/$1/g;
+                $line=~s/\b([0-9]+)U\b/$1/g;
  
-                my $newval= eval $_;     # Get numeric definition
+                my $newval= eval $line;     # Get numeric definition
  
                  $definitions{$define}= $newval;
  
-                next unless $_ =~ /<</;    # Bit defines use left shift
+                next unless $line =~ /<</;    # Bit defines use left shift
                  if ( $val & $newval ) {
                      my @names= ( $define, $reverse{$newval} );
                      s/PMf_// for @names;
                      if ( $names[0] ne $names[1] ) {
                          die sprintf
                              "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)",
-                            $newval, $orig, $_;
+                            $newval, $orig, $line;
                      }
                      next;
                  }
@@ -690,29 +725,48 @@ EOP
      my $val= 0;
      my %reverse;
      my $REG_INTFLAGS_NAME_SIZE= 0;
+    my $hp= HeaderParser->new();
+    my $last_val = 0;
      foreach my $file ("regcomp.h") {
-        open my $fh, "<", $file or die "Can't read $file: $!";
-        while (<$fh>) {
+        $hp->read_file($file);
+        my @bit_tuples;
+        foreach my $line_info (@{$hp->lines}) {
+            next unless $line_info->{type}     eq "content"
+                    and $line_info->{sub_type} eq "#define";
+            my $line= $line_info->{line};
+            $line=~s/\s*\\\n\s*/ /g;
  
              # optional leading '_'.  Return symbol in $1, and strip it from
              # comment of line
              if (
-                m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi
-                )
-            {
-                chomp;
+                $line =~ m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi
+            ){
+                chomp $line;
                  my $define= $1;
                  my $abbr= $2;
                  my $hex= $3;
                  my $comment= $4;
                  my $val= hex($hex);
+                my $bin= sprintf "%b", $val;
+                if ($bin=~/1.*?1/) { die "Not expecting multiple bits in PREGf" }
+                my $bit= length($bin) - 1 ;
                  $comment= $comment ? " - $comment" : "";
-
-                printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",),
-                    $val, $define, $comment;
-                $REG_INTFLAGS_NAME_SIZE++;
+                if ($bit_tuples[$bit]) {
+                    die "Duplicate PREGf bit '$bit': $define $val ($hex)";
+                }
+                $bit_tuples[$bit]= [ $bit, $val, $abbr, $define, $comment ];
+            }
+        }
+        foreach my $i (0..$#bit_tuples) {
+            my $bit_tuple= $bit_tuples[$i];
+            if (!$bit_tuple) {
+                $bit_tuple= [ $i, 1<<$i, "", "", "*UNUSED*" ];
              }
+            my ($bit, $val, $abbr, $define, $comment)= @$bit_tuple;
+            printf $out qq(\t%-30s/* (1<<%2d) - 0x%08x - %s%s */\n),
+                qq("$abbr",), $bit, $val, $define, $comment;
          }
+        $REG_INTFLAGS_NAME_SIZE=0+@bit_tuples;
      }
  
      print $out <<EOP;
@@ -805,7 +859,11 @@ if ($ENV{DUMP}) {
      exit(1);
  }
  my $out= open_new( 'regnodes.h', '>',
-    { by => 'regen/regcomp.pl', from => 'regcomp.sym' } );
+    {
+        by      => 'regen/regcomp.pl',
+        from    => [ 'regcomp.sym', 'op_reg_common.h', 'regexp.h' ],
+    },
+);
  print $out "#if $confine_to_core\n\n";
  print_typedefs($out);
  print_state_defs($out);