Add /d, /l, /u (infixed) regex modifiers

author Karl Williamson <public@khwilliamson.com>

Tue, 21 Sep 2010 00:57:24 +0000 (18:57 -0600)

committer Father Chrysostomos <sprout@cpan.org>

Thu, 23 Sep 2010 05:54:23 +0000 (22:54 -0700)
author Karl Williamson <public@khwilliamson.com>
Tue, 21 Sep 2010 00:57:24 +0000 (18:57 -0600)
committer Father Chrysostomos <sprout@cpan.org>
Thu, 23 Sep 2010 05:54:23 +0000 (22:54 -0700)
diff --git a/op.c b/op.c

index db91cdb..983bf3d 100644 (file)
--- a/op.c
+++ b/op.c
@@ -3649,8 +3649,12 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags)
  
      if (PL_hints & HINT_RE_TAINT)
         pmop->op_pmflags |= PMf_RETAINT;
-    if (PL_hints & HINT_LOCALE)
+    if (PL_hints & HINT_LOCALE) {
         pmop->op_pmflags |= PMf_LOCALE;
+    }
+    else if ((! (PL_hints & HINT_BYTES)) && (PL_hints & HINT_UNI_8_BIT)) {
+        pmop->op_pmflags |= RXf_PMf_UNICODE;
+    }
  
  
  #ifdef USE_ITHREADS
diff --git a/op.h b/op.h

index da280b8..a29d516 100644 (file)
--- a/op.h
+++ b/op.h
@@ -366,7 +366,7 @@ struct pmop {
  
  /* Leave some space, so future bit allocations can go either in the shared or
   * unshared area without affecting binary compatibility */
-#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+8)
+#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+7)
  
  /* taint $1 etc. if target tainted */
  #define PMf_RETAINT    (1<<(PMf_BASE_SHIFT+0))
diff --git a/op_reg_common.h b/op_reg_common.h

index d4e3987..ce12da5 100644 (file)
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -29,14 +29,15 @@
  #define RXf_PMf_EXTENDED       (1 << (RXf_PMf_STD_PMMOD_SHIFT+3))    /* /x */
  #define RXf_PMf_KEEPCOPY       (1 << (RXf_PMf_STD_PMMOD_SHIFT+4))    /* /p */
  #define RXf_PMf_LOCALE         (1 << (RXf_PMf_STD_PMMOD_SHIFT+5))
+#define RXf_PMf_UNICODE        (1 << (RXf_PMf_STD_PMMOD_SHIFT+6))
  
  /* Next available bit after the above.  Name begins with '_' so won't be
   * exported by B */
-#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+6)
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+7)
  
  /* Mask of the above bits.  These need to be transferred from op_pmflags to
   * re->extflags during compilation */
-#define RXf_PMf_COMPILETIME    (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+#define RXf_PMf_COMPILETIME    (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_UNICODE)
  
  /* These copies need to be numerical or defsubs_h.PL won't know about them. */
  #define PMf_MULTILINE    1<<0
diff --git a/pod/perldelta.pod b/pod/perldelta.pod

index 4289130..8cdbdb3 100644 (file)
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -40,6 +40,19 @@ main purpose of this is to allow tests that rely on the stringification
  to not have to change when new modifiers are added.  See
  L<perlre/Extended Patterns>.
  
+=head2 C<"d">, C<"l">, and C<"u"> regex modifiers added
+
+These modifiers are currently only available within a C<(?...)> construct.
+
+The C<"l"> modifier says to compile the regular expression as if it were
+in the scope of C<use locale>, even if it is not.
+
+The C<"u"> modifier currently does nothing.
+
+The C<"d"> modifier is used in the scope of C<use locale> to compile the
+regular expression as if it were not in that scope.
+See L<perlre/(?dlupimsx-imsx)>.
+
  =head1 Security
  
  XXX Any security-related notices go here.  In particular, any security
@@ -66,6 +79,17 @@ can use something like the following:
  
  And then use C<$modifiers> instead of C<-xism>.
  
+=head2 Regular expressions retain their localeness when interpolated
+
+Regular expressions compiled under C<"use locale"> now retain this when
+interpolated into a new regular expression compiled outside a
+C<"use locale">, and vice-versa.
+
+Previously, a regular expression interpolated into another one inherited
+the localeness of the surrounding one, losing whatever state it
+originally had.  This is considered a bug fix, but may trip up code that
+has come to rely on the incorrect behavior.
+
  [ List each incompatible change as a =head2 entry ]
  
  =head1 Deprecations
@@ -320,7 +344,7 @@ be noted as well.
  
  =item *
  
-XXX
+See L</Regular expressions retain their localeness when interpolated>
  
  =back
  
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index e725749..c6806c1 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4035,8 +4035,10 @@ where the problem was discovered. See L<perlre>.
  <-- HERE shows in the regular expression about where the problem was
  discovered.  This happens when using the C<(?^...)> construct to tell
  Perl to use the default regular expression modifiers, and you
-redundantly specify a default modifier.  For other causes, see
-L<perlre>.
+redundantly specify a default modifier; or having a modifier that can't
+be turned off (such as C<"p"> or C<"l">) after a minus; or specifying
+more than one of the C<"d">, C<"l">, or C<"u"> modifiers.  For other
+causes, see L<perlre>.
  
  =item Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/
  
diff --git a/pod/perlre.pod b/pod/perlre.pod

index 6e68bcd..b9216c1 100644 (file)
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -594,20 +594,15 @@ whitespace formatting, a simple C<#> will suffice.  Note that Perl closes
  the comment as soon as it sees a C<)>, so there is no way to put a literal
  C<)> in the comment.
  
-=item C<(?pimsx-imsx)>
+=item C<(?dlupimsx-imsx)>
  
-=item C<(?^pimsx)>
+=item C<(?^lupimsx)>
  X<(?)> X<(?^)>
  
  One or more embedded pattern-match modifiers, to be turned on (or
  turned off, if preceded by C<->) for the remainder of the pattern or
  the remainder of the enclosing pattern group (if any).
  
-Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
-after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the
-regex under C<no locale>.  Flags may follow the caret to override it.
-But a minus sign is not legal with it.
-
  This is particularly useful for dynamic patterns, such as those read in from a
  configuration file, taken from an argument, or specified in a table
  somewhere.  Consider the case where some patterns want to be case
@@ -634,17 +629,53 @@ These modifiers do not carry over into named subpatterns called in the
  enclosing group. In other words, a pattern such as C<((?i)(&NAME))> does not
  change the case-sensitivity of the "NAME" pattern.
  
-Note that the C<p> modifier is special in that it can only be enabled,
-not disabled, and that its presence anywhere in a pattern has a global
-effect. Thus C<(?-p)> and C<(?-p:...)> are meaningless and will warn
-when executed under C<use warnings>.
+Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
+after the C<"?"> is a shorthand equivalent to C<d-imsx>.  Flags (except
+C<"d">) may follow the caret to override it.
+But a minus sign is not legal with it.
+
+Also, starting in Perl 5.14, are modifiers C<"d">, C<"l">, and C<"u">,
+which for 5.14 may not be used as suffix modifiers.
+
+C<"l"> means to use a locale (see L<perllocale>) when pattern matching.
+The locale used will be the one in effect at the time of execution of
+the pattern match.  This may not be the same as the compilation-time
+locale, and can differ from one match to another if there is an
+intervening call of the
+L<setlocale() function|perllocale/The setlocale function>.
+This modifier is automatically set if the regular expression is compiled
+within the scope of a C<"use locale"> pragma.
+
+C<"u"> has no effect currently.  It is automatically set if the regular
+expression is compiled within the scope of a
+L<C<"use feature 'unicode_strings">|feature> pragma.
+
+C<"d"> means to use the traditional Perl pattern matching behavior.
+This is dualistic (hence the name C<"d">, which also could stand for
+"default").  When this is in effect, Perl matches utf8-encoded strings
+using Unicode rules, and matches non-utf8-encoded strings using the
+platform's native character set rules.
+See L<perlunicode/The "Unicode Bug">.  It is automatically selected by
+default if the regular expression is compiled neither within the scope
+of a C<"use locale"> pragma nor a <C<"use feature 'unicode_strings">
+pragma.
+
+Note that the C<d>, C<l>, C<p>, and C<u> modifiers are special in that
+they can only be enabled, not disabled, and the C<d>, C<l>, and C<u>
+modifiers are mutually exclusive; a maximum of one may appear in the
+construct.  Specifying one de-specifies the others.  Thus, for example,
+C<(?-p)> and C<(?-d:...)> are meaningless and will warn when compiled
+under C<use warnings>.
+
+Note also that the C<p> modifier is special in that its presence
+anywhere in a pattern has a global effect.
  
  =item C<(?:pattern)>
  X<(?:)>
  
-=item C<(?imsx-imsx:pattern)>
+=item C<(?dluimsx-imsx:pattern)>
  
-=item C<(?^imsx:pattern)>
+=item C<(?^luimsx:pattern)>
  X<(?^:)>
  
  This is for clustering, not capturing; it groups subexpressions like
@@ -660,7 +691,7 @@ but doesn't spit out extra fields.  It's also cheaper not to capture
  characters if you don't need to.
  
  Any letters between C<?> and C<:> act as flags modifiers as with
-C<(?imsx-imsx)>.  For example,
+C<(?dluimsx-imsx)>.  For example,
  
      /(?s-i:more.*than).*million/i
  
@@ -669,8 +700,8 @@ is equivalent to the more verbose
      /(?:(?s-i)more.*than).*million/i
  
  Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
-after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the
-regex under C<no locale>.  Any positive flags may follow the caret, so
+after the C<"?"> is a shorthand equivalent to C<d-imsx>.  Any positive
+flags (except C<"d">) may follow the caret, so
  
      (?^x:foo)
  
@@ -679,7 +710,7 @@ is equivalent to
      (?x-ims:foo)
  
  The caret tells Perl that this cluster doesn't inherit the flags of any
-surrounding pattern, but to go back to the system defaults (C<-imsx>),
+surrounding pattern, but to go back to the system defaults (C<d-imsx>),
  modified by any flags specified.
  
  The caret allows for simpler stringification of compiled regular
diff --git a/regcomp.c b/regcomp.c

index c080fcd..4c332b0 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -370,6 +370,7 @@ static const scan_data_t zero_scan_data =
  
  #define UTF (RExC_utf8 != 0)
  #define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
+#define UNI_SEMANTICS ((RExC_flags & RXf_PMf_UNICODE) != 0)
  #define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
  
  #define OOB_UNICODE            12345678
@@ -4441,9 +4442,17 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags)
         SvPOK_on(rx);
         SvFLAGS(rx) |= SvUTF8(pattern);
          *p++='('; *p++='?';
-        if (has_minus) {    /* If a default, cover it using the caret */
+
+        /* If a default, cover it using the caret */
+        if (has_minus || (r->extflags & ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE))) {
              *p++= DEFAULT_PAT_MOD;
          }
+        if (r->extflags & RXf_PMf_LOCALE) {
+            *p++ = LOCALE_PAT_MOD;
+        }
+        else if (r->extflags & RXf_PMf_UNICODE) {
+            *p++ = UNICODE_PAT_MOD;
+        }
          if (has_p)
              *p++ = KEEPCOPY_PAT_MOD; /*'p'*/
          {
@@ -6124,6 +6133,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                        that follow */
                  has_use_defaults = TRUE;
                  STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
+                RExC_flags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
                  goto parse_flags;
             default:
                 --RExC_parse;
@@ -6131,6 +6141,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             {
                  U32 posflags = 0, negflags = 0;
                 U32 *flagsp = &posflags;
+                bool has_charset_modifier = 0;
  
                 while (*RExC_parse) {
                     /* && strchr("iogcmsx", *RExC_parse) */
@@ -6138,6 +6149,32 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        and must be globally applied -- japhy */
                      switch (*RExC_parse) {
                     CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+                    case LOCALE_PAT_MOD:
+                        if (has_charset_modifier || flagsp == &negflags) {
+                            goto fail_modifiers;
+                        }
+                        *flagsp &= ~RXf_PMf_UNICODE;
+                        *flagsp |= RXf_PMf_LOCALE;
+                        has_charset_modifier = 1;
+                        break;
+                    case UNICODE_PAT_MOD:
+                        if (has_charset_modifier || flagsp == &negflags) {
+                            goto fail_modifiers;
+                        }
+                        *flagsp &= ~RXf_PMf_LOCALE;
+                        *flagsp |= RXf_PMf_UNICODE;
+                        has_charset_modifier = 1;
+                        break;
+                    case DUAL_PAT_MOD:
+                        if (has_use_defaults
+                            || has_charset_modifier
+                            || flagsp == &negflags)
+                        {
+                            goto fail_modifiers;
+                        }
+                        *flagsp &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+                        has_charset_modifier = 1;
+                        break;
                      case ONCE_PAT_MOD: /* 'o' */
                      case GLOBAL_PAT_MOD: /* 'g' */
                         if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
@@ -6182,6 +6219,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                           * if there is a minus, it means will be trying to
                           * re-specify a default which is an error */
                          if (has_use_defaults || flagsp == &negflags) {
+            fail_modifiers:
                              RExC_parse++;
                             vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
                             /*NOTREACHED*/
diff --git a/regexp.h b/regexp.h

index 17f9983..004d614 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -236,9 +236,9 @@ and check for NULL.
      case SINGLE_PAT_MOD:    *(pmfl) |= RXf_PMf_SINGLELINE; break;   \
      case XTENDED_PAT_MOD:   *(pmfl) |= RXf_PMf_EXTENDED;   break
  
-/* Note, includes locale */
+/* Note, includes locale, unicode */
  #define STD_PMMOD_FLAGS_CLEAR(pmfl)                        \
-    *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE)
+    *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE|RXf_PMf_UNICODE)
  
  /* chars and strings used as regex pattern modifiers
   * Singlular is a 'c'har, plural is a "string"
@@ -258,12 +258,18 @@ and check for NULL.
  #define IGNORE_PAT_MOD       'i'
  #define XTENDED_PAT_MOD      'x'
  #define NONDESTRUCT_PAT_MOD  'r'
+#define LOCALE_PAT_MOD       'l'
+#define UNICODE_PAT_MOD      'u'
+#define DUAL_PAT_MOD         'd'
  
  #define ONCE_PAT_MODS        "o"
  #define KEEPCOPY_PAT_MODS    "p"
  #define EXEC_PAT_MODS        "e"
  #define LOOP_PAT_MODS        "gc"
  #define NONDESTRUCT_PAT_MODS "r"
+#define LOCALE_PAT_MODS      "l"
+#define UNICODE_PAT_MODS     "u"
+#define DUAL_PAT_MODS        "d"
  
  /* This string is expected by regcomp.c to be ordered so that the first
   * character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next
@@ -288,7 +294,7 @@ and check for NULL.
  
  /* Leave some space, so future bit allocations can go either in the shared or
   * unshared area without affecting binary compatibility */
-#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+3)
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
  
  /* Anchor and GPOS related stuff */
  #define RXf_ANCH_BOL           (1<<(RXf_BASE_SHIFT+0))
diff --git a/regnodes.h b/regnodes.h

index d132013..f5aacc2 100644 (file)
--- a/regnodes.h
+++ b/regnodes.h
@@ -625,14 +625,14 @@ EXTCONST char * const PL_reg_name[] = {
  EXTCONST char * PL_reg_extflags_name[];
  #else
  EXTCONST char * const PL_reg_extflags_name[] = {
-       /* Bits in extflags defined: 11111111111111111111111000111111 */
+       /* Bits in extflags defined: 11111111111111111111111001111111 */
         "MULTILINE",        /* 0x00000001 */
         "SINGLELINE",       /* 0x00000002 */
         "FOLD",             /* 0x00000004 */
         "EXTENDED",         /* 0x00000008 */
         "KEEPCOPY",         /* 0x00000010 */
         "LOCALE",           /* 0x00000020 */
-       "UNUSED_BIT_6",     /* 0x00000040 */
+       "UNICODE",          /* 0x00000040 */
         "UNUSED_BIT_7",     /* 0x00000080 */
         "UNUSED_BIT_8",     /* 0x00000100 */
         "ANCH_BOL",         /* 0x00000200 */
diff --git a/t/re/pat.t b/t/re/pat.t

index 3bc7f5d..c007880 100644 (file)
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -23,7 +23,7 @@ BEGIN {
  }
  
  
-plan tests => 385;  # Update this when adding/deleting tests.
+plan tests => 398;  # Update this when adding/deleting tests.
  
  run_tests() unless caller;
  
@@ -503,10 +503,38 @@ sub run_tests {
          iseq qr/\b\v$/s,    '(?^s:\b\v$)', 'qr/\b\v$/s';
          iseq qr/\b\v$/m,    '(?^m:\b\v$)', 'qr/\b\v$/m';
          iseq qr/\b\v$/x,    '(?^x:\b\v$)', 'qr/\b\v$/x';
-        iseq qr/\b\v$/xism, '(?msix:\b\v$)',  'qr/\b\v$/xism';
+        iseq qr/\b\v$/xism, '(?^msix:\b\v$)',  'qr/\b\v$/xism';
          iseq qr/\b\v$/,     '(?^:\b\v$)', 'qr/\b\v$/';
      }
  
+    {   # Test that charset modifier work, and are interpolated
+        iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier';
+        iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles';
+        iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles';
+        iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles';
+        iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles';
+
+        my $dual = qr/\b\v$/;
+        use locale;
+        my $locale = qr/\b\v$/;
+        iseq $locale,    '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale';
+        no locale;
+
+        use feature 'unicode_strings';
+        my $unicode = qr/\b\v$/;
+        iseq $unicode,    '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings';
+        iseq qr/abc$dual/,    '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
+        iseq qr/abc$locale/,    '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings';
+
+        no feature 'unicode_strings';
+        iseq qr/abc$locale/,    '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings';
+        iseq qr/def$unicode/,    '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings';
+
+        use locale;
+        iseq qr/abc$dual/,    '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
+        iseq qr/abc$unicode/,    '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale';
+    }
+
  
      {
          local $Message = "Look around";
diff --git a/t/re/re.t b/t/re/re.t

index 10e2ee2..76835f0 100644 (file)
--- a/t/re/re.t
+++ b/t/re/re.t
@@ -12,7 +12,9 @@ use warnings;
  use re qw(is_regexp regexp_pattern
            regname regnames regnames_count);
  {
+    use feature 'unicode_strings';  # Force 'u' pat mod
      my $qr=qr/foo/pi;
+    no feature 'unicode_strings';
      my $rx = $$qr;
  
      ok(is_regexp($qr),'is_regexp(REGEXP ref)');
@@ -20,12 +22,12 @@ use re qw(is_regexp regexp_pattern
      ok(!is_regexp(''),'is_regexp("")');
  
      is((regexp_pattern($qr))[0],'foo','regexp_pattern[0] (ref)');
-    is((regexp_pattern($qr))[1],'ip','regexp_pattern[1] (ref)');
-    is(regexp_pattern($qr),'(?^pi:foo)','scalar regexp_pattern (ref)');
+    is((regexp_pattern($qr))[1],'uip','regexp_pattern[1] (ref)');
+    is(regexp_pattern($qr),'(?^upi:foo)','scalar regexp_pattern (ref)');
  
      is((regexp_pattern($rx))[0],'foo','regexp_pattern[0] (bare REGEXP)');
-    is((regexp_pattern($rx))[1],'ip','regexp_pattern[1] (bare REGEXP)');
-    is(regexp_pattern($rx),'(?^pi:foo)', 'scalar regexp_pattern (bare REGEXP)');
+    is((regexp_pattern($rx))[1],'uip','regexp_pattern[1] (bare REGEXP)');
+    is(regexp_pattern($rx),'(?^upi:foo)', 'scalar regexp_pattern (bare REGEXP)');
  
      ok(!regexp_pattern(''),'!regexp_pattern("")');
  }
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t

index 4e8f3c4..80af8df 100644 (file)
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -55,6 +55,12 @@ my @death =
   '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/',
   '/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/',
   '/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/',
+ '/(?^-i:foo)/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i:foo)/',
+ '/(?^-i)foo/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i)foo/',
+ '/(?^d:foo)/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#}:foo)/',
+ '/(?^d)foo/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#})foo/',
+ '/(?^lu:foo)/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#}:foo)/',
+ '/(?^lu)foo/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#})foo/',
  
   '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/',
  
diff --git a/universal.c b/universal.c

index fe53969..102ff91 100644 (file)
--- a/universal.c
+++ b/universal.c
@@ -1189,16 +1189,23 @@ XS(XS_re_regexp_pattern)
      {
          /* Houston, we have a regex! */
          SV *pattern;
-        STRLEN left = 0;
-        char reflags[sizeof(INT_PAT_MODS)];
  
          if ( GIMME_V == G_ARRAY ) {
+           STRLEN left = 0;
+           char reflags[sizeof(INT_PAT_MODS) + 1]; /* The +1 is for the charset
+                                                       modifier */
              /*
                 we are in list context so stringify
                 the modifiers that apply. We ignore "negative
                 modifiers" in this scenario.
              */
  
+            if (RX_EXTFLAGS(re) & RXf_PMf_LOCALE) {
+               reflags[left++] = LOCALE_PAT_MOD;
+           }
+           else if (RX_EXTFLAGS(re) & RXf_PMf_UNICODE) {
+               reflags[left++] = UNICODE_PAT_MOD;
+           }
              const char *fptr = INT_PAT_MODS;
              char ch;
              U16 match_flags = (U16)((RX_EXTFLAGS(re) & PMf_COMPILETIME)
author	Karl Williamson <public@khwilliamson.com>
	Tue, 21 Sep 2010 00:57:24 +0000 (18:57 -0600)
committer	Father Chrysostomos <sprout@cpan.org>
	Thu, 23 Sep 2010 05:54:23 +0000 (22:54 -0700)
op.c		patch \| blob \| blame \| history
op.h		patch \| blob \| blame \| history
op_reg_common.h		patch \| blob \| blame \| history
pod/perldelta.pod		patch \| blob \| blame \| history
pod/perldiag.pod		patch \| blob \| blame \| history
pod/perlre.pod		patch \| blob \| blame \| history
regcomp.c		patch \| blob \| blame \| history
regexp.h		patch \| blob \| blame \| history
regnodes.h		patch \| blob \| blame \| history
t/re/pat.t		patch \| blob \| blame \| history
t/re/re.t		patch \| blob \| blame \| history
t/re/reg_mesg.t		patch \| blob \| blame \| history
universal.c		patch \| blob \| blame \| history