From 1e2159890b8bf881fbc717f671f87ba2dec1da46 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Mon, 18 Oct 2010 17:59:50 -0700 Subject: [PATCH] [perl #78072] use re '/xism'; --- MANIFEST | 1 + ext/re/re.pm | 82 +++++++++++++++++++++++++++++++++++++ ext/re/t/reflags.t | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++ op.c | 13 ++++++ op_reg_common.h | 2 + perl.h | 2 + 6 files changed, 216 insertions(+) create mode 100644 ext/re/t/reflags.t diff --git a/MANIFEST b/MANIFEST index cfe75a6..237cb5a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3312,6 +3312,7 @@ ext/re/re.xs re extension external subroutines ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug' ext/re/t/lexical_debug.t test that lexical re 'debug' works ext/re/t/qr.t test that qr// is a Regexp +ext/re/t/reflags.t see if re '/xism' pragma works ext/re/t/re_funcs.t See if exportable 're' funcs in re.xs work ext/re/t/regop.pl generate debug output for various patterns ext/re/t/regop.t test RE optimizations by scraping debug output diff --git a/ext/re/re.pm b/ext/re/re.pm index 90e31f3..8813232 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -16,6 +16,20 @@ my %bitmask = ( eval => 0x00200000, # HINT_RE_EVAL ); +my $flags_hint = 0x02000000; # HINT_RE_FLAGS +my $PMMOD_SHIFT = 0; +my %reflags = ( + m => 1 << ($PMMOD_SHIFT + 0), + s => 1 << ($PMMOD_SHIFT + 1), + i => 1 << ($PMMOD_SHIFT + 2), + x => 1 << ($PMMOD_SHIFT + 3), + p => 1 << ($PMMOD_SHIFT + 4), +# special cases: + l => 1 << ($PMMOD_SHIFT + 5), + u => 1 << ($PMMOD_SHIFT + 6), + d => 0, +); + sub setcolor { eval { # Ignore errors require Term::Cap; @@ -96,6 +110,7 @@ sub bits { require Carp; Carp::carp("Useless use of \"re\" pragma"); } + ARG: foreach my $idx (0..$#_){ my $s=$_[$idx]; if ($s eq 'Debug' or $s eq 'Debugcolor') { @@ -125,6 +140,33 @@ sub bits { } elsif ($EXPORT_OK{$s}) { require Exporter; re->export_to_level(2, 're', $s); + } elsif ($s =~ s/^\///) { + my $reflags = $^H{reflags} || 0; + for(split//, $s) { + if (/[dul]/) { + if ($on) { + $^H{reflags_dul} = $reflags{$_}; + } + else { + delete $^H{reflags_dul} + if defined $^H{reflags_dul} + && $^H{reflags_dul} == $reflags{$_}; + } + } elsif (exists $reflags{$_}) { + $on + ? $reflags |= $reflags{$_} + : ($reflags &= ~$reflags{$_}); + } else { + require Carp; + Carp::carp( + qq'Unknown regular expression flag "$_"' + ); + next ARG; + } + } + ($^H{reflags} = $reflags or defined $^H{reflags_dul}) + ? $^H |= $flags_hint + : ($^H &= ~$flags_hint); } else { require Carp; Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", @@ -170,6 +212,11 @@ re - Perl pragma to alter regular expression behaviour /foo${pat}bar/; # disallowed (with or without -T switch) } + use re '/ix'; + "FOO" =~ / foo /; # /ix implied + no re '/x'; + "FOO" =~ /foo/; # just /i implied + use re 'debug'; # output debugging info during /^(.*)$/s; # compile and run time @@ -220,6 +267,41 @@ interpolation. Thus: I allowed if $pat is a precompiled regular expression, even if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions. +=head2 '/flags' mode + +When C is specified, the given flags are automatically +added to every regular expression till the end of the lexical scope. + +C will turn off the effect of C for the +given flags. + +For example, if you want all your regular expressions to have /msx on by +default, simply put + + use re '/msx'; + +at the top of your code. + +The /dul flags cancel each other out. So, in this example, + + use re "/u"; + "ss" =~ /\xdf/; + use re "/d"; + "ss" =~ /\xdf/; + +The second C does an implicit C. + +Turning on the /l and /u flags with C takes precedence over the +C pragma and the 'unicode_strings' C, for regular +expressions. Turning off one of these flags when it is active reverts to +the behaviour specified by whatever other pragmata are in scope. For +example: + + use feature "unicode_strings"; + no re "/u"; # does nothing + use re "/l"; + no re "/l"; # reverts to unicode_strings behaviour + =head2 'debug' mode When C is in effect, perl emits debugging messages when diff --git a/ext/re/t/reflags.t b/ext/re/t/reflags.t new file mode 100644 index 0000000..26e8f05 --- /dev/null +++ b/ext/re/t/reflags.t @@ -0,0 +1,116 @@ +#!./perl + +BEGIN { + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use strict; + +use Test::More tests => 32; + +use re '/i'; +ok "Foo" =~ /foo/, 'use re "/i"'; +no re '/i'; +ok "Foo" !~ /foo/, 'no re "/i"'; +use re '/x'; +ok "foo" =~ / foo /, 'use re "/x"'; +no re '/x'; +ok "foo" !~ / foo /, 'no re "/x"'; +use re '/s'; +ok "\n" =~ /./, 'use re "/s"'; +no re '/s'; +ok "\n" !~ /./, 'no re "/s"'; +use re '/m'; +ok "\nfoo" =~ /^foo/, 'use re "/m"'; +no re '/m'; +ok "\nfoo" !~ /^foo/, 'no re "/m"'; + +use re '/xism'; +ok qr// =~ /(?=.*x)(?=.*i)(?=.*s)(?=.*m)/, 'use re "/multiple"'; +no re '/ix'; +ok qr// =~ /(?!.*x)(?!.*i)(?=.*s)(?=.*m)/, 'no re "/i" only turns off /ix'; +no re '/sm'; + +{ + use re '/x'; + ok 'frelp' =~ /f r e l p/, "use re '/x' in a lexical scope" +} +ok 'f r e l p' =~ /f r e l p/, + "use re '/x' turns off when it drops out of scope"; + +SKIP: { + if ( + !$Config::Config{d_setlocale} + || $Config::Config{ccflags} =~ /\bD?NO_LOCALE\b/ + ) { + skip "no locale support", 7 + } + use locale; + use re '/u'; + is qr//, '(?^u:)', 'use re "/u" with active locale'; + no re '/u'; + is qr//, '(?^l:)', 'no re "/u" reverts to /l with locale in scope'; + no re '/l'; + is qr//, '(?^l:)', 'no re "/l" is a no-op with locale in scope'; + use re '/d'; + is qr//, '(?^:)', 'use re "/d" with locale in scope'; + no re '/l'; + no re '/u'; + is qr//, '(?^:)', + 'no re "/l" and "/u" are no-ops when not on (locale scope)'; + no re "/d"; + is qr//, '(?^l:)', 'no re "/d" reverts to /l with locale in scope'; + use re "/u"; + no re "/d"; + is qr//, '(?^u:)', 'no re "/d" is a no-op when not on (locale scope)'; +} + +{ + use feature "unicode_strings"; + use re '/d'; + is qr//, '(?^:)', 'use re "/d" in Unicode scope'; + no re '/d'; + is qr//, '(?^u:)', 'no re "/d" reverts to /u in Unicode scope'; + no re '/u'; + is qr//, '(?^u:)', 'no re "/u" is a no-op in Unicode scope'; + no re '/d'; + is qr//, '(?^u:)', 'no re "/d" is a no-op when not on'; + use re '/u'; + no feature 'unicode_strings'; + is qr//, '(?^u:)', 'use re "/u" is not tied to unicode_strings feature'; +} + +use re '/u'; +is qr//, '(?^u:)', 'use re "/u"'; +no re '/u'; +is qr//, '(?^:)', 'no re "/u" reverts to /d'; +no re '/u'; +is qr//, '(?^:)', 'no re "/u" is a no-op when not on'; +no re '/d'; +is qr//, '(?^:)', 'no re "/d" is a no-op when not on'; + +{ + local $SIG{__WARN__} = sub { + ok $_[0] =~ /Unknown regular expression flag "\x{100}"/, + "warning with unknown regexp flags in use re '/flags'" + }; + import re "/\x{100}" +} + +# use re '/flags' in combination with explicit flags +use re '/xi'; +ok "A\n\n" =~ / a.$/sm, 'use re "/xi" in combination with explicit /sm'; +{ + local $::TODO = "test requires perl 5.16 syntax"; + # (remove the evals, the quotes, and the ‘no warnings’ when removing the + # to-do notice) + no warnings; + use re '/u'; + is eval 'qr//d', '(?^:)', 'explicit /d in re "/u" scope'; + use re '/d'; + is eval 'qr//u', '(?^u:)', 'explicit /u in re "/d" scope'; +} diff --git a/op.c b/op.c index 528ecac..acffe22 100644 --- a/op.c +++ b/op.c @@ -3734,6 +3734,19 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags) else if ((! (PL_hints & HINT_BYTES)) && (PL_hints & HINT_UNI_8_BIT)) { pmop->op_pmflags |= RXf_PMf_UNICODE; } + if (PL_hints & HINT_RE_FLAGS) { + SV *reflags = Perl_refcounted_he_fetch(aTHX_ + PL_compiling.cop_hints_hash, 0, STR_WITH_LEN("reflags"), 0, 0 + ); + if (reflags && SvOK(reflags)) pmop->op_pmflags |= SvIV(reflags); + reflags = Perl_refcounted_he_fetch(aTHX_ + PL_compiling.cop_hints_hash, 0, STR_WITH_LEN("reflags_dul"), 0, 0 + ); + if (reflags && SvOK(reflags)) { + pmop->op_pmflags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE); + pmop->op_pmflags |= SvIV(reflags); + } + } #ifdef USE_ITHREADS diff --git a/op_reg_common.h b/op_reg_common.h index ce12da5..5b49ec7 100644 --- a/op_reg_common.h +++ b/op_reg_common.h @@ -18,11 +18,13 @@ /* This tells where the first of these bits is. Setting it to 0 saved cycles * and memory. I (khw) think the code will work if changed back, but haven't * tested it */ +/* Make sure to update lib/re.pm when changing this! */ #define RXf_PMf_STD_PMMOD_SHIFT 0 /* The bits need to be ordered so that the msix are contiguous starting at bit * RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and * INT_PAT_MODS in regexp.h for the reason contiguity is needed */ +/* Make sure to update lib/re.pm when changing these! */ #define RXf_PMf_MULTILINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+0)) /* /m */ #define RXf_PMf_SINGLELINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+1)) /* /s */ #define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */ diff --git a/perl.h b/perl.h index a680e76..ccdc078 100644 --- a/perl.h +++ b/perl.h @@ -4823,6 +4823,8 @@ enum { /* pass one of these to get_vtbl */ #define HINT_NO_AMAGIC 0x01000000 /* overloading pragma */ +#define HINT_RE_FLAGS 0x02000000 /* re '/xism' pragma */ + /* The following are stored in $^H{sort}, not in PL_hints */ #define HINT_SORT_SORT_BITS 0x000000FF /* allow 256 different ones */ #define HINT_SORT_QUICKSORT 0x00000001 -- 1.8.3.1