From 1850c8f94216e3e6bf08ca1f3121b4a91d01d1bf Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 19 Jul 2010 22:26:43 -0600 Subject: [PATCH] Refactor common parts of op.h, regexp.h into new .h op.h and regexp.h share common elements in their data structures. They have had to manually be kept in sync. This patch makes it easier by putting those common parts into a common header #included by the two. To do this, it seemed easiest to change the symbol definitions to use left shifts to generate the flag bits. But this meant that regcomp.pl and axt/B/defsubs_h.PL had to be taught to recognize those forms of expressions, done in separate commits --- MANIFEST | 1 + ext/B/defsubs_h.PL | 2 +- op.h | 48 ++++++++++++++++++++++--------------------- op_reg_common.h | 27 ++++++++++++++++++++++++ regcomp.pl | 47 ++++++++++++++++++++++-------------------- regexp.h | 60 ++++++++++++++++++++++++++---------------------------- 6 files changed, 108 insertions(+), 77 deletions(-) create mode 100644 op_reg_common.h diff --git a/MANIFEST b/MANIFEST index 111d4f2..c28e5ad8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3790,6 +3790,7 @@ opcode.h Automatically generated opcode header opcode.pl Opcode header generator op.h Opcode syntax tree header opnames.h Automatically generated opcode header +op_reg_common.h Common parts of op.h, regexp.h header os2/Changes Changelog for OS/2 port os2/diff.configure Patches to Configure os2/dlfcn.h Addon for dl_open diff --git a/ext/B/defsubs_h.PL b/ext/B/defsubs_h.PL index d8e1439..b6d8aaa 100644 --- a/ext/B/defsubs_h.PL +++ b/ext/B/defsubs_h.PL @@ -76,7 +76,7 @@ if ($] < 5.011) { # giving the prefix to limit the names of symbols to define that come # from that file. If none, all symbols will be defined whose values # match the pattern below. -foreach my $tuple (['op.h'],['cop.h'],['regexp.h','RXf_']) +foreach my $tuple (['op_reg_common.h','(?:(?:RXf_)?PMf_)'],['op.h'],['cop.h'],['regexp.h','RXf_']) { my $file = $tuple->[0]; my $pfx = $tuple->[1] || ''; diff --git a/op.h b/op.h index 257a951..7873a74 100644 --- a/op.h +++ b/op.h @@ -36,6 +36,7 @@ * the operation is privatized by a check routine, * which may or may not check number of children). */ +#include "op_reg_common.h" #define OPCODE U16 @@ -359,38 +360,39 @@ struct pmop { #define PM_SETRE(o,r) ((o)->op_pmregexp = (r)) #endif - -#define PMf_RETAINT 0x00000040 /* taint $1 etc. if target tainted */ +/* taint $1 etc. if target tainted */ +#define PMf_RETAINT (1<<(_RXf_PMf_SHIFT+1)) /* match successfully only once per reset, with related flag RXf_USED in * re->extflags holding state. This is used only for ?? matches, and only on * OP_MATCH and OP_QR */ -#define PMf_ONCE 0x00000080 -#define PMf_UNUSED 0x00000100 /* free for use */ -#define PMf_MAYBE_CONST 0x00000200 /* replacement contains variables */ +#define PMf_ONCE (1<<(_RXf_PMf_SHIFT+2)) + +/* replacement contains variables */ +#define PMf_MAYBE_CONST (1<<(_RXf_PMf_SHIFT+3)) + +/* PMf_ONCE has matched successfully. Not used under threading. */ +#define PMf_USED (1<<(_RXf_PMf_SHIFT+4)) + +/* subst replacement is constant */ +#define PMf_CONST (1<<(_RXf_PMf_SHIFT+5)) -/* PMf_ONCE has matched successfully. Not used under threading. */ -#define PMf_USED 0x00000400 +/* keep 1st runtime pattern forever */ +#define PMf_KEEP (1<<(_RXf_PMf_SHIFT+6)) +#define PMf_GLOBAL (1<<(_RXf_PMf_SHIFT+7)) /* pattern had a g modifier */ -#define PMf_CONST 0x00000800 /* subst replacement is constant */ -#define PMf_KEEP 0x00001000 /* keep 1st runtime pattern forever */ -#define PMf_GLOBAL 0x00002000 /* pattern had a g modifier */ -#define PMf_CONTINUE 0x00004000 /* don't reset pos() if //g fails */ -#define PMf_EVAL 0x00008000 /* evaluating replacement as expr */ +/* don't reset pos() if //g fails */ +#define PMf_CONTINUE (1<<(_RXf_PMf_SHIFT+8)) + +/* evaluating replacement as expr */ +#define PMf_EVAL (1<<(_RXf_PMf_SHIFT+9)) /* Return substituted string instead of modifying it. */ -#define PMf_NONDESTRUCT 0x00010000 +#define PMf_NONDESTRUCT (1<<(_RXf_PMf_SHIFT+10)) -/* The following flags have exact equivalents in regcomp.h with the prefix RXf_ - * which are stored in the regexp->extflags member. If you change them here, - * you have to change them there, and vice versa. - */ -#define PMf_MULTILINE 0x00000001 /* assume multiple lines */ -#define PMf_SINGLELINE 0x00000002 /* assume single line */ -#define PMf_FOLD 0x00000004 /* case insensitivity */ -#define PMf_EXTENDED 0x00000008 /* chuck embedded whitespace */ -#define PMf_KEEPCOPY 0x00000010 /* copy the string when matching */ -#define PMf_LOCALE 0x00000020 /* use locale for character types */ +#if _RXf_PMf_SHIFT+10 > 31 +# error Too many RXf_PMf bits used. See above and regnodes.h for any spare in middle +#endif /* mask of bits that need to be transfered to re->extflags */ #define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED|PMf_KEEPCOPY) diff --git a/op_reg_common.h b/op_reg_common.h new file mode 100644 index 0000000..b0fd273 --- /dev/null +++ b/op_reg_common.h @@ -0,0 +1,27 @@ +/* op_reg_common.h + * + * Definitions common to by op.h and regexp.h + * + * Copyright (C) 2010 by Larry Wall and others + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the README file. + * + */ + +/* These defines are used in both op.h and regexp.h The definitions use the + * shift form so that ext/B/defsubs_h.PL will pick them up */ +#define RXf_PMf_MULTILINE (1 << 0) /* /m */ +#define PMf_MULTILINE (1 << 0) /* /m */ +#define RXf_PMf_SINGLELINE (1 << 1) /* /s */ +#define PMf_SINGLELINE (1 << 1) /* /s */ +#define RXf_PMf_FOLD (1 << 2) /* /i */ +#define PMf_FOLD (1 << 2) /* /i */ +#define RXf_PMf_EXTENDED (1 << 3) /* /x */ +#define PMf_EXTENDED (1 << 3) /* /x */ +#define RXf_PMf_KEEPCOPY (1 << 4) /* /p */ +#define PMf_KEEPCOPY (1 << 4) /* /p */ +#define RXf_PMf_LOCALE (1 << 5) +#define PMf_LOCALE (1 << 5) + +#define _RXf_PMf_SHIFT 5 /* Begins with '_' so won't be exported by B */ diff --git a/regcomp.pl b/regcomp.pl index aa0f0fe..d85482c 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -256,36 +256,39 @@ EXTCONST char * PL_reg_extflags_name[]; EXTCONST char * const PL_reg_extflags_name[] = { EOP -open my $fh,"<","regexp.h" or die "Can't read regexp.h: $!"; my %rxfv; my %definitions; # Remember what the symbol definitions are my $val = 0; my %reverse; -while (<$fh>) { - - # optional leading '_'. Return symbol in $1, and strip it from - # rest of line - if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { - chomp; - my $define = $1; - s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank - - # Replace any prior defined symbols by their values - foreach my $key (keys %definitions) { - s/\b$key\b/$definitions{$key}/g; - } - my $newval = eval $_; # Get numeric definition +foreach my $file ("op_reg_common.h", "regexp.h") { + open my $fh,"<", $file or die "Can't read $file: $!"; + while (<$fh>) { + + # optional leading '_'. Return symbol in $1, and strip it from + # rest of line + if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { + chomp; + my $define = $1; + s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank + + # Replace any prior defined symbols by their values + foreach my $key (keys %definitions) { + s/\b$key\b/$definitions{$key}/g; + } + my $newval = eval $_; # Get numeric definition - $definitions{$define} = $newval; + $definitions{$define} = $newval; - if($val & $newval) { - die sprintf "Both $define and $reverse{$newval} use %08X", $newval; + next unless $_ =~ /<op_pmflags member during * compilation */ -#define RXf_PMf_MULTILINE 0x00000001 /* /m */ -#define RXf_PMf_SINGLELINE 0x00000002 /* /s */ -#define RXf_PMf_FOLD 0x00000004 /* /i */ -#define RXf_PMf_EXTENDED 0x00000008 /* /x */ -#define RXf_PMf_KEEPCOPY 0x00000010 /* /p */ -#define RXf_PMf_LOCALE 0x00000020 /* use locale */ -/* these flags are transfered from the PMOP->op_pmflags member during compilation */ #define RXf_PMf_STD_PMMOD_SHIFT 0 #define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) #define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) @@ -297,53 +292,56 @@ and check for NULL. */ /* Anchor and GPOS related stuff */ -#define RXf_ANCH_BOL 0x00000100 -#define RXf_ANCH_MBOL 0x00000200 -#define RXf_ANCH_SBOL 0x00000400 -#define RXf_ANCH_GPOS 0x00000800 -#define RXf_GPOS_SEEN 0x00001000 -#define RXf_GPOS_FLOAT 0x00002000 +#define RXf_ANCH_BOL (1<<(_RXf_PMf_SHIFT+3)) +#define RXf_ANCH_MBOL (1<<(_RXf_PMf_SHIFT+4)) +#define RXf_ANCH_SBOL (1<<(_RXf_PMf_SHIFT+5)) +#define RXf_ANCH_GPOS (1<<(_RXf_PMf_SHIFT+6)) +#define RXf_GPOS_SEEN (1<<(_RXf_PMf_SHIFT+7)) +#define RXf_GPOS_FLOAT (1<<(_RXf_PMf_SHIFT+8)) /* two bits here */ #define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL) #define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS) #define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) /* What we have seen */ -#define RXf_LOOKBEHIND_SEEN 0x00004000 -#define RXf_EVAL_SEEN 0x00008000 -#define RXf_CANY_SEEN 0x00010000 +#define RXf_LOOKBEHIND_SEEN (1<<(_RXf_PMf_SHIFT+9)) +#define RXf_EVAL_SEEN (1<<(_RXf_PMf_SHIFT+10)) +#define RXf_CANY_SEEN (1<<(_RXf_PMf_SHIFT+11)) /* Special */ -#define RXf_NOSCAN 0x00020000 -#define RXf_CHECK_ALL 0x00040000 +#define RXf_NOSCAN (1<<(_RXf_PMf_SHIFT+12)) +#define RXf_CHECK_ALL (1<<(_RXf_PMf_SHIFT+13)) /* UTF8 related */ -#define RXf_MATCH_UTF8 0x00100000 +#define RXf_MATCH_UTF8 (1<<(_RXf_PMf_SHIFT+15)) /* Intuit related */ -#define RXf_USE_INTUIT_NOML 0x00200000 -#define RXf_USE_INTUIT_ML 0x00400000 -#define RXf_INTUIT_TAIL 0x00800000 +#define RXf_USE_INTUIT_NOML (1<<(_RXf_PMf_SHIFT+16)) +#define RXf_USE_INTUIT_ML (1<<(_RXf_PMf_SHIFT+17)) +#define RXf_INTUIT_TAIL (1<<(_RXf_PMf_SHIFT+18)) /* Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will be used by regex engines to check whether they should set RXf_SKIPWHITE */ -#define RXf_SPLIT 0x01000000 +#define RXf_SPLIT (1<<(_RXf_PMf_SHIFT+19)) #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) /* Copy and tainted info */ -#define RXf_COPY_DONE 0x02000000 -#define RXf_TAINTED_SEEN 0x04000000 -#define RXf_TAINTED 0x08000000 /* this pattern is tainted */ +#define RXf_COPY_DONE (1<<(_RXf_PMf_SHIFT+20)) +#define RXf_TAINTED_SEEN (1<<(_RXf_PMf_SHIFT+21)) +#define RXf_TAINTED (1<<(_RXf_PMf_SHIFT+22)) /* this pattern is tainted */ /* Flags indicating special patterns */ -#define RXf_START_ONLY 0x10000000 /* Pattern is /^/ */ -#define RXf_SKIPWHITE 0x20000000 /* Pattern is for a split / / */ -#define RXf_WHITE 0x40000000 /* Pattern is /\s+/ */ -#define RXf_NULL 0x80000000 /* Pattern is // */ +#define RXf_START_ONLY (1<<(_RXf_PMf_SHIFT+23)) /* Pattern is /^/ */ +#define RXf_SKIPWHITE (1<<(_RXf_PMf_SHIFT+24)) /* Pattern is for a split / / */ +#define RXf_WHITE (1<<(_RXf_PMf_SHIFT+25)) /* Pattern is /\s+/ */ +#define RXf_NULL (1<<(_RXf_PMf_SHIFT+26)) /* Pattern is // */ +#if _RXf_PMf_SHIFT+23 > 31 +# error Too many RXf_PMf bits used. See regnodes.h for any spare in middle +#endif /* * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl -- 1.8.3.1