This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: /[[:lower:]]/i should match the same as /\p{Lower}/i
authorKarl Williamson <public@khwilliamson.com>
Sat, 11 Feb 2012 17:49:06 +0000 (10:49 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sat, 11 Feb 2012 21:01:29 +0000 (14:01 -0700)
Same for [[:upper:]] and \p{Upper}.  These were matching instead all of
[[:alpha:]] or \p{Alpha}.  What /\p{Lower}/i and /\p{Upper}/i match instead
is \p{Cased}, and so that is what these should match.

charclass_invlists.h
embedvar.h
intrpvar.h
regcomp.c
regen/mk_invlists.pl
sv.c
t/re/re_tests

index 5d78219..152793a 100644 (file)
@@ -36,6 +36,30 @@ UV ASCII_invlist[] = {
        0
 };
 
+UV L1Cased_invlist[] = {
+       16,     /* Number of elements */
+       0,      /* Current iteration position */
+       1064334010, /* Version and data structure type */
+       1,      /* 0 if this is the first element of the list proper;
+                  1 if the next element is the first */
+       65,
+       91,
+       97,
+       123,
+       170,
+       171,
+       181,
+       182,
+       186,
+       187,
+       192,
+       215,
+       216,
+       247,
+       248,
+       443
+};
+
 UV VertSpace_invlist[] = {
        6,      /* Number of elements */
        0,      /* Current iteration position */
index 0321963..1d187fe 100644 (file)
@@ -54,6 +54,7 @@
 #define PL_Dir                 (vTHX->IDir)
 #define PL_Env                 (vTHX->IEnv)
 #define PL_HorizSpace          (vTHX->IHorizSpace)
+#define PL_L1Cased             (vTHX->IL1Cased)
 #define PL_L1PosixAlnum                (vTHX->IL1PosixAlnum)
 #define PL_L1PosixAlpha                (vTHX->IL1PosixAlpha)
 #define PL_L1PosixGraph                (vTHX->IL1PosixGraph)
index 43e4e05..4025a7c 100644 (file)
@@ -582,6 +582,8 @@ PERLVAR(I, PosixAlpha,      SV *)
 PERLVAR(I, PosixBlank, SV *)
 PERLVAR(I, XPosixBlank,        SV *)
 
+PERLVAR(I, L1Cased,    SV *)
+
 PERLVAR(I, PosixCntrl, SV *)
 PERLVAR(I, XPosixCntrl,        SV *)
 
index 3123a15..b33edda 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4830,6 +4830,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
        PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist);
        PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist);
 
+       PL_L1Cased = _new_invlist_C_array(L1Cased_invlist);
+
        PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist);
        PL_XPosixCntrl = _new_invlist_C_array(XPosixCntrl_invlist);
 
@@ -10872,7 +10874,8 @@ parseit:
                case ANYOF_LOWER:
                case ANYOF_NLOWER:
                 {   /* These require special handling, as they differ under
-                       folding, matching the corresponding Alpha property */
+                      folding, matching Cased there (which in the ASCII range
+                      is the same as Alpha */
 
                    SV* ascii_source;
                    SV* l1_source;
@@ -10880,8 +10883,8 @@ parseit:
 
                    if (FOLD && ! LOC) {
                        ascii_source = PL_PosixAlpha;
-                       l1_source = PL_L1PosixAlpha;
-                       Xname = "__XposixAlpha_i";
+                       l1_source = PL_L1Cased;
+                       Xname = "Cased";
                    }
                    else {
                        ascii_source = PL_PosixLower;
@@ -10939,8 +10942,8 @@ parseit:
 
                    if (FOLD && ! LOC) {
                        ascii_source = PL_PosixAlpha;
-                       l1_source = PL_L1PosixAlpha;
-                       Xname = "__XposixAlpha_i";
+                       l1_source = PL_L1Cased;
+                       Xname = "Cased";
                    }
                    else {
                        ascii_source = PL_PosixUpper;
index 31c77de..8102c29 100644 (file)
@@ -93,6 +93,7 @@ output_invlist("AboveLatin1", [ 256 ]);
 
 for my $prop (qw(
                 ASCII
+                L1Cased
                VertSpace
                 PerlSpace
                     XPerlSpace
@@ -132,7 +133,8 @@ for my $prop (qw(
     # artifically cutting that off at 256 because 256 is the first code point
     # above Latin1, we let the range go to its natural ending.  That gives us
     # extra information with no added space taken.
-    my $lookup_prop = $prop =~ s/^L1/X/r;
+    my $lookup_prop = $prop;
+    $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//;
     my @invlist = prop_invlist($lookup_prop);
 
     if ($lookup_prop ne $prop) {
diff --git a/sv.c b/sv.c
index f02627b..214a17d 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13398,6 +13398,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_PosixBlank      = sv_dup_inc(proto_perl->IPosixBlank, param);
     PL_XPosixBlank     = sv_dup_inc(proto_perl->IXPosixBlank, param);
 
+    PL_L1Cased         = sv_dup_inc(proto_perl->IL1Cased, param);
+
     PL_PosixCntrl      = sv_dup_inc(proto_perl->IPosixCntrl, param);
     PL_XPosixCntrl     = sv_dup_inc(proto_perl->IXPosixCntrl, param);
 
index 2bdb24c..4eafaf7 100644 (file)
@@ -1567,4 +1567,9 @@ abc\N{def -       c       -       \\N{NAME} must be resolved by the lexer
 # /i properties shouldn't match more than the property itself
 /[[:ascii:]]/i \N{KELVIN SIGN} n       -       -
 
+# [[:lower:]]/i and [[:upper:]]/i should match what \p{Lower} and \p{Upper} do.
+# which is \p{Cased}, not \p{Alpha},
+/[[:lower:]]/i \N{U+3400}      n       -       -
+/[[:upper:]]/i \N{U+01BB}      n       -       -
+
 # vim: softtabstop=0 noexpandtab