This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update the link for Module-Build/core integration
[perl5.git] / regcomp.c
index 157e06e..1e1dcfd 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -86,7 +86,9 @@
 #endif
 
 #include "dquote_static.c"
-#include "charclass_invlists.h"
+#ifndef PERL_IN_XSUB_RE
+#  include "charclass_invlists.h"
+#endif
 
 #ifdef op
 #undef op
@@ -2713,7 +2715,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
         * This uses an exclusive 'or' to find that bit and then inverts it to
         * form a mask, with just a single 0, in the bit position where 'S' and
         * 's' differ. */
-       const U8 S_or_s_mask = ~ ('S' ^ 's');
+       const U8 S_or_s_mask = (U8) ~ ('S' ^ 's');
        const U8 s_masked = 's' & S_or_s_mask;
 
        /* One pass is made over the node's string looking for all the
@@ -4814,6 +4816,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
+#ifndef PERL_IN_XSUB_RE
     /* Initialize these here instead of as-needed, as is quick and avoids
      * having to test them each time otherwise */
     if (! PL_AboveLatin1) {
@@ -4830,6 +4833,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
        PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist);
        PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist);
 
+       PL_L1Cased = _new_invlist_C_array(L1Cased_invlist);
+
        PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist);
        PL_XPosixCntrl = _new_invlist_C_array(XPosixCntrl_invlist);
 
@@ -4841,8 +4846,6 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
        PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
        PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
 
-       PL_HorizSpace = _new_invlist_C_array(HorizSpace_invlist);
-
        PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
        PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
 
@@ -4869,6 +4872,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
        PL_PosixXDigit = _new_invlist_C_array(PosixXDigit_invlist);
        PL_XPosixXDigit = _new_invlist_C_array(XPosixXDigit_invlist);
     }
+#endif
 
     exp = SvPV(pattern, plen);
 
@@ -10784,8 +10788,6 @@ parseit:
            }
 
            if (!SIZE_ONLY) {
-               const char *what = NULL;
-               char yesno = 0;
 
                /* Possible truncation here but in some 64-bit environments
                 * the compiler gets heartburn about switch on 64-bit values.
@@ -10865,17 +10867,19 @@ parseit:
                    /* For these, we use the nonbitmap, as /d doesn't make a
                     * difference in what these match.  There would be problems
                     * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding */
-                   _invlist_union(nonbitmap, PL_HorizSpace, &nonbitmap);
+                    * nonbitmap is subject to folding.  It turns out that \h
+                    * is just a synonym for XPosixBlank */
+                   _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
                    break;
                case ANYOF_NHORIZWS:
                     _invlist_union_complement_2nd(nonbitmap,
-                                                 PL_HorizSpace, &nonbitmap);
+                                                 PL_XPosixBlank, &nonbitmap);
                    break;
                case ANYOF_LOWER:
                case ANYOF_NLOWER:
                 {   /* These require special handling, as they differ under
-                       folding, matching the corresponding Alpha property */
+                      folding, matching Cased there (which in the ASCII range
+                      is the same as Alpha */
 
                    SV* ascii_source;
                    SV* l1_source;
@@ -10883,8 +10887,8 @@ parseit:
 
                    if (FOLD && ! LOC) {
                        ascii_source = PL_PosixAlpha;
-                       l1_source = PL_L1PosixAlpha;
-                       Xname = "__XposixAlpha_i";
+                       l1_source = PL_L1Cased;
+                       Xname = "Cased";
                    }
                    else {
                        ascii_source = PL_PosixLower;
@@ -10942,8 +10946,8 @@ parseit:
 
                    if (FOLD && ! LOC) {
                        ascii_source = PL_PosixAlpha;
-                       l1_source = PL_L1PosixAlpha;
-                       Xname = "__XposixAlpha_i";
+                       l1_source = PL_L1Cased;
+                       Xname = "Cased";
                    }
                    else {
                        ascii_source = PL_PosixUpper;
@@ -10994,10 +10998,6 @@ parseit:
                    vFAIL("Invalid [::] class");
                    break;
                }
-               if (what && ! (AT_LEAST_ASCII_RESTRICTED)) {
-                   /* Strings such as "+utf8::isWord\n" */
-                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n", yesno, what);
-               }
 
                continue;
            }
@@ -11429,6 +11429,12 @@ parseit:
             * there should not be overlap unless is /d rules. */
            _invlist_invert(nonbitmap);
 
+           /* Any swash can't be used as-is, because we've inverted things */
+           if (swash) {
+               SvREFCNT_dec(swash);
+               swash = NULL;
+           }
+
            for (i = 0; i < 256; ++i) {
                if (ANYOF_BITMAP_TEST(ret, i)) {
                    ANYOF_BITMAP_CLEAR(ret, i);
@@ -11638,7 +11644,6 @@ parseit:
     }
     return ret;
 }
-#undef _C_C_T_
 
 
 /* reg_skipcomment()