This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Generate different property for /i matching
[perl5.git] / regcomp.c
index 4ac544f..aa05006 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -795,9 +795,13 @@ S_cl_and(struct regnode_charclass_class *cl,
     if (!(and_with->flags & ANYOF_NON_UTF8_LATIN1_ALL))
        cl->flags &= ~ANYOF_NON_UTF8_LATIN1_ALL;
 
-    if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
-       !(and_with->flags & ANYOF_INVERT)) {
-       cl->flags &= ~ANYOF_UNICODE_ALL;
+    if (cl->flags & ANYOF_UNICODE_ALL
+       && and_with->flags & ANYOF_NONBITMAP
+       && !(and_with->flags & ANYOF_INVERT))
+    {
+       if (! (and_with->flags & ANYOF_UNICODE_ALL)) {
+           cl->flags &= ~ANYOF_UNICODE_ALL;
+       }
        cl->flags |= and_with->flags & ANYOF_NONBITMAP; /* field is 2 bits; use
                                                           only the one(s)
                                                           actually set */
@@ -8595,7 +8599,14 @@ parseit:
                    e = RExC_parse;
                    n = 1;
                }
-               if (!SIZE_ONLY) {
+               if (SIZE_ONLY) {
+                   if (LOC) {
+                       ckWARN2reg(RExC_parse,
+                               "\\%c uses Unicode rules, not locale rules",
+                               (int) value);
+                   }
+               }
+               else {
                    if (UCHARAT(RExC_parse) == '^') {
                         RExC_parse++;
                         n--;
@@ -8605,8 +8616,18 @@ parseit:
                              n--;
                         }
                    }
-                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
-                       (value=='p' ? '+' : '!'), (int)n, RExC_parse);
+
+                   /* Add the property name to the list.  If /i matching, give
+                    * a different name which consists of the normal name
+                    * sandwiched between two underscores and '_i'.  The design
+                    * is discussed in the commit message for this. */
+                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%.*s%s\n",
+                                       (value=='p' ? '+' : '!'),
+                                       (FOLD) ? "__" : "",
+                                       (int)n,
+                                       RExC_parse,
+                                       (FOLD) ? "_i" : ""
+                                   );
                }
                RExC_parse = e + 1;