perldelta: remove placeholders for module changes

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index abafff6..bc38839 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -93,13 +93,6 @@ static const char* const non_utf8_target_but_utf8_required
  #include "inline_invlist.c"
  #include "unicode_constants.h"
  
-#define RF_tainted     1       /* tainted information used? e.g. locale */
-#define RF_warned      2               /* warned about big count? */
-
-#define RF_utf8                8               /* Pattern contains multibyte chars? */
-
-#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
-
  #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
  
  #ifndef STATIC
@@ -148,7 +141,6 @@ static const char* const non_utf8_target_but_utf8_required
  #define LOAD_UTF8_CHARCLASS(swash_ptr, property_name) STMT_START {            \
          if (!swash_ptr) {                                                     \
              U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;                       \
-            ENTER; save_re_context();                                         \
              swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
                                           1, 0, NULL, &flags);                 \
              assert(swash_ptr);                                                \
@@ -443,19 +435,21 @@ S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
      switch ((_char_class_number) classnum) {
          case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
          case _CC_ENUM_ALPHA:     return isALPHA_LC(character);
+        case _CC_ENUM_ASCII:     return isASCII_LC(character);
+        case _CC_ENUM_BLANK:     return isBLANK_LC(character);
+        case _CC_ENUM_CASED:     return isLOWER_LC(character)
+                                        || isUPPER_LC(character);
+        case _CC_ENUM_CNTRL:     return isCNTRL_LC(character);
          case _CC_ENUM_DIGIT:     return isDIGIT_LC(character);
          case _CC_ENUM_GRAPH:     return isGRAPH_LC(character);
          case _CC_ENUM_LOWER:     return isLOWER_LC(character);
          case _CC_ENUM_PRINT:     return isPRINT_LC(character);
+        case _CC_ENUM_PSXSPC:    return isPSXSPC_LC(character);
          case _CC_ENUM_PUNCT:     return isPUNCT_LC(character);
+        case _CC_ENUM_SPACE:     return isSPACE_LC(character);
          case _CC_ENUM_UPPER:     return isUPPER_LC(character);
          case _CC_ENUM_WORDCHAR:  return isWORDCHAR_LC(character);
-        case _CC_ENUM_SPACE:     return isSPACE_LC(character);
-        case _CC_ENUM_BLANK:     return isBLANK_LC(character);
          case _CC_ENUM_XDIGIT:    return isXDIGIT_LC(character);
-        case _CC_ENUM_CNTRL:     return isCNTRL_LC(character);
-        case _CC_ENUM_PSXSPC:    return isPSXSPC_LC(character);
-        case _CC_ENUM_ASCII:     return isASCII_LC(character);
          default:    /* VERTSPACE should never occur in locales */
              Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
      }
@@ -496,7 +490,9 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
                  swash_property_names[classnum], &PL_sv_undef, 1, 0, NULL, &flags);
          }
  
-        return swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *) character, TRUE);
+        return cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *)
+                                 character,
+                                 TRUE /* is UTF */ ));
      }
  
      switch ((_char_class_number) classnum) {
@@ -575,7 +571,7 @@ Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
  
  /* A failure to find a constant substring means that there is no need to make
     an expensive call to REx engine, thus we celebrate a failure.  Similarly,
-   finding a substring too deep into the string means that less calls to
+   finding a substring too deep into the string means that fewer calls to
     regtry() should be needed.
  
     REx compiler's optimizer found 4 possible hints:
@@ -611,6 +607,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
      char *checked_upto = NULL;          /* how far into the string we have already checked using find_byclass*/
      const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
      RXi_GET_DECL(prog,progi);
+    bool is_utf8_pat;
  #ifdef DEBUGGING
      const char * const i_strpos = strpos;
  #endif
@@ -622,9 +619,8 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
  
      RX_MATCH_UTF8_set(rx,utf8_target);
  
-    if (RX_UTF8(rx)) {
-       PL_reg_flags |= RF_utf8;
-    }
+    is_utf8_pat = cBOOL(RX_UTF8(rx));
+
      DEBUG_EXECUTE_r( 
          debug_start_match(rx, utf8_target, strpos, strend,
              sv ? "Guessing start of match in sv for"
@@ -637,7 +633,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
                               "String too short... [re_intuit_start]\n"));
         goto fail;
      }
-                
+
      /* XXX we need to pass strbeg as a separate arg: the following is
       * guesswork and can be wrong... */
      if (sv && SvPOK(sv)) {
@@ -1127,7 +1123,8 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
                                        (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
  
         t = s;
-        s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
+        s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
+                            NULL, is_utf8_pat);
         if (s) {
             checked_upto = s;
         } else {
@@ -1229,8 +1226,8 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
                                ? (utf8_target ? trie_utf8 : trie_plain) \
                                : (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
  
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,          \
-uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                               \
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
+STMT_START {                               \
      STRLEN skiplen;                                                                 \
      switch (trie_type) {                                                            \
      case trie_utf8_fold:                                                            \
@@ -1435,7 +1432,7 @@ if ((!reginfo || regtry(reginfo, &s))) \
  
  STATIC char *
  S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
-    const char *strend, regmatch_info *reginfo)
+    const char *strend, regmatch_info *reginfo, bool is_utf8_pat)
  {
      dVAR;
      const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -1463,6 +1460,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
      /* We know what class it must start with. */
      switch (OP(c)) {
      case ANYOF:
+    case ANYOF_SYNTHETIC:
+    case ANYOF_WARN_SUPER:
          if (utf8_target) {
              REXEC_FBC_UTF8_CLASS_SCAN(
                        reginclass(prog, c, (U8*)s, utf8_target));
@@ -1481,7 +1480,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
  
      case EXACTFA:
-        if (UTF_PATTERN || utf8_target) {
+        if (is_utf8_pat || utf8_target) {
              utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
              goto do_exactf_utf8;
          }
@@ -1501,7 +1500,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          goto do_exactf_non_utf8;
  
      case EXACTFL:
-        if (UTF_PATTERN || utf8_target) {
+        if (is_utf8_pat || utf8_target) {
              utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
              goto do_exactf_utf8;
          }
@@ -1510,15 +1509,15 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          goto do_exactf_non_utf8;
  
      case EXACTFU_SS:
-        if (UTF_PATTERN) {
+        if (is_utf8_pat) {
              utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
          }
          goto do_exactf_utf8;
  
      case EXACTFU_TRICKYFOLD:
      case EXACTFU:
-        if (UTF_PATTERN || utf8_target) {
-            utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+        if (is_utf8_pat || utf8_target) {
+            utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
              goto do_exactf_utf8;
          }
  
@@ -1574,7 +1573,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          pat_string = STRING(c);
          ln  = STR_LEN(c);      /* length to match in octets/bytes */
          pat_end = pat_string + ln;
-        lnc = (UTF_PATTERN)     /* length to match in characters */
+        lnc = is_utf8_pat       /* length to match in characters */
                  ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
                  : ln;
  
@@ -1610,7 +1609,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          while (s <= e) {
              char *my_strend= (char *)strend;
              if (foldEQ_utf8_flags(s, &my_strend, 0,  utf8_target,
-                  pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
+                  pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
                  && (!reginfo || regtry(reginfo, &s)) )
              {
                  goto got_it;
@@ -1620,20 +1619,20 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
      }
      case BOUNDL:
-        PL_reg_flags |= RF_tainted;
-        FBC_BOUND(isALNUM_LC,
-                  isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
-                  isALNUM_LC_utf8((U8*)s));
+        RXp_MATCH_TAINTED_on(prog);
+        FBC_BOUND(isWORDCHAR_LC,
+                  isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+                  isWORDCHAR_LC_utf8((U8*)s));
          break;
      case NBOUNDL:
-        PL_reg_flags |= RF_tainted;
-        FBC_NBOUND(isALNUM_LC,
-                   isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
-                   isALNUM_LC_utf8((U8*)s));
+        RXp_MATCH_TAINTED_on(prog);
+        FBC_NBOUND(isWORDCHAR_LC,
+                   isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+                   isWORDCHAR_LC_utf8((U8*)s));
          break;
      case BOUND:
          FBC_BOUND(isWORDCHAR,
-                  isALNUM_uni(tmp),
+                  isWORDCHAR_uni(tmp),
                    cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
          break;
      case BOUNDA:
@@ -1643,7 +1642,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
      case NBOUND:
          FBC_NBOUND(isWORDCHAR,
-                   isALNUM_uni(tmp),
+                   isWORDCHAR_uni(tmp),
                     cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
          break;
      case NBOUNDA:
@@ -1653,12 +1652,12 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
      case BOUNDU:
          FBC_BOUND(isWORDCHAR_L1,
-                  isALNUM_uni(tmp),
+                  isWORDCHAR_uni(tmp),
                    cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
          break;
      case NBOUNDU:
          FBC_NBOUND(isWORDCHAR_L1,
-                   isALNUM_uni(tmp),
+                   isWORDCHAR_uni(tmp),
                     cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
          break;
      case LNBREAK:
@@ -1675,7 +1674,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          /* FALLTHROUGH */
  
      case POSIXL:
-        PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
          REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
                          to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
          break;
@@ -2108,13 +2107,12 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
         Perl_croak(aTHX_ "corrupted regexp program");
      }
  
-    PL_reg_flags = 0;
+    RX_MATCH_TAINTED_off(rx);
      PL_reg_state.re_state_eval_setup_done = FALSE;
      PL_reg_maxiter = 0;
  
-    if (RX_UTF8(rx))
-       PL_reg_flags |= RF_utf8;
-
+    reginfo.is_utf8_pat = cBOOL(RX_UTF8(rx));
+    reginfo.warned = FALSE;
      /* Mark beginning of line for ^ and lookbehind. */
      reginfo.bol = startpos; /* XXX not used ??? */
      PL_bostr  = strbeg;
@@ -2169,8 +2167,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
             was from this regex we don't want a subsequent partially
             successful match to clobber the old results.
             So when we detect this possibility we add a swap buffer
-           to the re, and switch the buffer each match. If we fail
-           we switch it back, otherwise we leave it swapped.
+           to the re, and switch the buffer each match. If we fail,
+           we switch it back; otherwise we leave it swapped.
          */
          swap = prog->offs;
          /* do we need a save destructor here for eval dies? */
@@ -2289,7 +2287,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      /* Messy cases:  unanchored match. */
      if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
         /* we have /x+whatever/ */
-       /* it must be a one character string (XXXX Except UTF_PATTERN?) */
+       /* it must be a one character string (XXXX Except is_utf8_pat?) */
         char ch;
  #ifdef DEBUGGING
         int did_match = 0;
@@ -2459,7 +2457,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                      quoted, (int)(strend - s));
             }
         });
-        if (find_byclass(prog, c, s, strend, &reginfo))
+        if (find_byclass(prog, c, s, strend, &reginfo, reginfo.is_utf8_pat))
             goto got_it;
         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
      }
@@ -2595,7 +2593,6 @@ got_it:
             );
      );
      Safefree(swap);
-    RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
  
      if (PL_reg_state.re_state_eval_setup_done)
         restore_pos(aTHX_ prog);
@@ -3236,7 +3233,8 @@ S_clear_backtrack_stack(pTHX_ void *p)
      }
  }
  static bool
-S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c1_utf8, int *c2p, U8* c2_utf8)
+S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
+        U8* c1_utf8, int *c2p, U8* c2_utf8, bool is_utf8_pat)
  {
      /* This function determines if there are one or two characters that match
       * the first character of the passed-in EXACTish node <text_node>, and if
@@ -3308,7 +3306,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
           * character.  If both the pat and the target are UTF-8, we can just
           * copy the input to the output, avoiding finding the code point of
           * that character */
-        if (! UTF_PATTERN) {
+        if (!is_utf8_pat) {
              c2 = c1 = *pat;
          }
          else if (utf8_target) {
@@ -3321,10 +3319,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
          }
      }
      else /* an EXACTFish node */
-         if ((UTF_PATTERN
+         if ((is_utf8_pat
                      && is_MULTI_CHAR_FOLD_utf8_safe(pat,
                                                      pat + STR_LEN(text_node)))
-             || (! UTF_PATTERN
+             || (!is_utf8_pat
                      && is_MULTI_CHAR_FOLD_latin1_safe(pat,
                                                      pat + STR_LEN(text_node))))
      {
@@ -3334,7 +3332,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
          use_chrtest_void = TRUE;
      }
      else { /* an EXACTFish node which doesn't begin with a multi-char fold */
-        c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+        c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
          if (c1 > 256) {
              /* Load the folds hash, if not already done */
              SV** listp;
@@ -3562,6 +3560,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      U32 maxopenparen = 0;       /* max '(' index seen so far */
      int to_complement;  /* Invert the result? */
      _char_class_number classnum;
+    bool is_utf8_pat = reginfo->is_utf8_pat;
  
  #ifdef DEBUGGING
      GET_RE_DEBUG_FLAGS_DECL;
@@ -4050,7 +4049,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         case EXACT: {            /*  /abc/        */
             char *s = STRING(scan);
             ln = STR_LEN(scan);
-           if (utf8_target != UTF_PATTERN) {
+           if (utf8_target != is_utf8_pat) {
                 /* The target and the pattern have differing utf8ness. */
                 char *l = locinput;
                 const char * const e = s + ln;
@@ -4066,9 +4065,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       * is an invariant, but there are tests in the test suite
                       * dealing with (??{...}) which violate this) */
                     while (s < e) {
-                       if (l >= PL_regeol)
-                            sayNO;
-                        if (UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
+                       if (l >= PL_regeol || UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
                              sayNO;
                          }
                          if (UTF8_IS_INVARIANT(*(U8*)l)) {
@@ -4109,17 +4106,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     }
                 }
                 locinput = l;
-               break;
             }
-           /* The target and the pattern have the same utf8ness. */
-           /* Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchr)
-               sayNO;
-           if (PL_regeol - locinput < ln)
-               sayNO;
-           if (ln > 1 && memNE(s, locinput, ln))
-               sayNO;
-           locinput += ln;
+            else {
+                /* The target and the pattern have the same utf8ness. */
+                /* Inline the first character, for speed. */
+                if (PL_regeol - locinput < ln
+                    || UCHARAT(s) != nextchr
+                    || (ln > 1 && memNE(s, locinput, ln)))
+                {
+                    sayNO;
+                }
+                locinput += ln;
+            }
             break;
             }
  
@@ -4129,7 +4127,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             const char * s;
             U32 fold_utf8_flags;
  
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
              folder = foldEQ_locale;
              fold_array = PL_fold_locale;
             fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
@@ -4140,7 +4138,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         case EXACTFU:            /*  /abc/iu      */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
-           fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
+           fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
             goto do_exactf;
  
         case EXACTFA:            /*  /abc/iaa     */
@@ -4158,13 +4156,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             s = STRING(scan);
             ln = STR_LEN(scan);
  
-           if (utf8_target || UTF_PATTERN || state_num == EXACTFU_SS) {
+           if (utf8_target || is_utf8_pat || state_num == EXACTFU_SS) {
               /* Either target or the pattern are utf8, or has the issue where
                * the fold lengths may differ. */
                 const char * const l = locinput;
                 char *e = PL_regeol;
  
-               if (! foldEQ_utf8_flags(s, 0,  ln, cBOOL(UTF_PATTERN),
+               if (! foldEQ_utf8_flags(s, 0,  ln, is_utf8_pat,
                                         l, &e, 0,  utf8_target, fold_utf8_flags))
                 {
                     sayNO;
@@ -4193,7 +4191,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          * have to set the FLAGS fields of these */
         case BOUNDL:  /*  /\b/l  */
         case NBOUNDL: /*  /\B/l  */
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
             /* FALL THROUGH */
         case BOUND:   /*  /\b/   */
         case BOUNDU:  /*  /\b/u  */
@@ -4214,7 +4212,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
                 }
                 if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
-                   ln = isALNUM_uni(ln);
+                   ln = isWORDCHAR_uni(ln);
                      if (NEXTCHR_IS_EOS)
                          n = 0;
                      else {
@@ -4224,8 +4222,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                 }
                 else {
-                   ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
-                   n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
+                   ln = isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(ln));
+                   n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC_utf8((U8*)locinput);
                 }
             }
             else {
@@ -4249,12 +4247,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                         n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
                         break;
                     case REGEX_LOCALE_CHARSET:
-                       ln = isALNUM_LC(ln);
-                       n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
+                       ln = isWORDCHAR_LC(ln);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC(nextchr);
                         break;
                     case REGEX_DEPENDS_CHARSET:
-                       ln = isALNUM(ln);
-                       n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
+                       ln = isWORDCHAR(ln);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR(nextchr);
                         break;
                     case REGEX_ASCII_RESTRICTED_CHARSET:
                     case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
@@ -4273,19 +4271,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case ANYOF:  /*  /[abc]/       */
+       case ANYOF_WARN_SUPER:
              if (NEXTCHR_IS_EOS)
                  sayNO;
             if (utf8_target) {
                 if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
                     sayNO;
                 locinput += UTF8SKIP(locinput);
-               break;
             }
             else {
                 if (!REGINCLASS(rex, scan, (U8*)locinput))
                     sayNO;
                 locinput++;
-               break;
             }
             break;
  
@@ -4302,19 +4299,19 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
              /* The locale hasn't influenced the outcome before this, so defer
               * tainting until now */
-            PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
  
              /* Use isFOO_lc() for characters within Latin1.  (Note that
               * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
               * wouldn't be invariant) */
              if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
-                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), nextchr)))) {
+                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
                      sayNO;
                  }
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
                  if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
-                                        TWO_BYTE_UTF8_TO_UNI(nextchr,
+                                        (U8) TWO_BYTE_UTF8_TO_UNI(nextchr,
                                                              *(locinput + 1))))))
                  {
                      sayNO;
@@ -4676,7 +4673,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             const U8 *fold_array;
             UV utf8_fold_flags;
  
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
             type = REFFL;
@@ -4721,7 +4718,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             goto do_nref_ref_common;
  
         case REFFL:  /*  /\1/il  */
-           PL_reg_flags |= RF_tainted;
+            RX_MATCH_TAINTED_on(reginfo->prog);
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
@@ -4881,8 +4878,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  */
                 Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
  
-               PL_reg_state.re_reparsing = FALSE;
-
                 if (!caller_cv)
                     caller_cv = find_runcv(NULL);
  
@@ -4916,12 +4911,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  * points to newcv's pad. */
                 if (newcv != last_pushed_cv || PL_comppad != last_pad)
                 {
-                   I32 depth = (newcv == caller_cv) ? 0 : 1;
+                    U8 flags = (CXp_SUB_RE |
+                                ((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0));
                     if (last_pushed_cv) {
-                       CHANGE_MULTICALL_WITHDEPTH(newcv, depth);
+                       CHANGE_MULTICALL_FLAGS(newcv, flags);
                     }
                     else {
-                       PUSH_MULTICALL_WITHDEPTH(newcv, depth);
+                       PUSH_MULTICALL_FLAGS(newcv, flags);
                     }
                     last_pushed_cv = newcv;
                 }
@@ -5118,12 +5114,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 /* XXXX This is too dramatic a measure... */
                 PL_reg_maxiter = 0;
  
-               ST.toggle_reg_flags = PL_reg_flags;
-               if (RX_UTF8(re_sv))
-                   PL_reg_flags |= RF_utf8;
-               else
-                   PL_reg_flags &= ~RF_utf8;
-               ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
+               ST.saved_utf8_pat = is_utf8_pat;
+               is_utf8_pat = cBOOL(RX_UTF8(re_sv));
  
                 ST.prev_rex = rex_sv;
                 ST.prev_curlyx = cur_curlyx;
@@ -5142,7 +5134,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
         case EVAL_AB: /* cleanup after a successful (??{A})B */
             /* note: this is called twice; first after popping B, then A */
-           PL_reg_flags ^= ST.toggle_reg_flags; 
+            is_utf8_pat = ST.saved_utf8_pat;
             rex_sv = ST.prev_rex;
             SET_reg_curpm(rex_sv);
             rex = ReANY(rex_sv);
@@ -5160,7 +5152,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
             /* note: this is called twice; first after popping B, then A */
-           PL_reg_flags ^= ST.toggle_reg_flags; 
+            is_utf8_pat = ST.saved_utf8_pat;
             rex_sv = ST.prev_rex;
             SET_reg_curpm(rex_sv);
             rex = ReANY(rex_sv);
@@ -5562,9 +5554,9 @@ NULL
           do_whilem_B_max:
             if (cur_curlyx->u.curlyx.count >= REG_INFTY
                 && ckWARN(WARN_REGEXP)
-               && !(PL_reg_flags & RF_warned))
+               && !reginfo->warned)
             {
-               PL_reg_flags |= RF_warned;
+                reginfo->warned        = TRUE;
                 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
                      "Complex regular subexpression recursion limit (%d) "
                      "exceeded",
@@ -5587,9 +5579,9 @@ NULL
                 /* Maximum greed exceeded */
                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
                     && ckWARN(WARN_REGEXP)
-                   && !(PL_reg_flags & RF_warned))
+                    && !reginfo->warned)
                 {
-                   PL_reg_flags |= RF_warned;
+                    reginfo->warned    = TRUE;
                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
                         "Complex regular subexpression recursion "
                         "limit (%d) exceeded",
@@ -5784,7 +5776,8 @@ NULL
                      */
                     if (PL_regkind[OP(text_node)] == EXACT) {
                          if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
-                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+                           is_utf8_pat))
                          {
                              sayNO;
                          }
@@ -5960,7 +5953,8 @@ NULL
                          if this changes back then the macro for IS_TEXT and 
                          friends need to change. */
                          if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
-                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+                           is_utf8_pat))
                          {
                              sayNO;
                          }
@@ -5973,7 +5967,9 @@ NULL
             if (minmod) {
                  char *li = locinput;
                 minmod = 0;
-               if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
+               if (ST.min &&
+                        regrepeat(rex, &li, ST.A, ST.min, depth, is_utf8_pat)
+                            < ST.min)
                     sayNO;
                  SET_locinput(li);
                 ST.count = ST.min;
@@ -6009,7 +6005,8 @@ NULL
                  /* avoid taking address of locinput, so it can remain
                   * a register var */
                  char *li = locinput;
-               ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
+               ST.count = regrepeat(rex, &li, ST.A, ST.max, depth,
+                                        is_utf8_pat);
                 if (ST.count < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -6093,7 +6090,7 @@ NULL
                       * locinput matches */
                      char *li = ST.oldloc;
                     ST.count += n;
-                   if (regrepeat(rex, &li, ST.A, n, depth) < n)
+                   if (regrepeat(rex, &li, ST.A, n, depth, is_utf8_pat) < n)
                         sayNO;
                      assert(n == REG_INFTY || locinput == li);
                 }
@@ -6117,7 +6114,7 @@ NULL
             /* failed -- move forward one */
              {
                  char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, 1, depth)) {
+                if (!regrepeat(rex, &li, ST.A, 1, depth, is_utf8_pat)) {
                      sayNO;
                  }
                  locinput = li;
@@ -6192,9 +6189,8 @@ NULL
             fake_end:
             if (cur_eval) {
                 /* we've just finished A in /(??{A})B/; now continue with B */
-               st->u.eval.toggle_reg_flags
-                           = cur_eval->u.eval.toggle_reg_flags;
-               PL_reg_flags ^= st->u.eval.toggle_reg_flags; 
+                st->u.eval.saved_utf8_pat = is_utf8_pat;
+               is_utf8_pat = cur_eval->u.eval.saved_utf8_pat;
  
                 st->u.eval.prev_rex = rex_sv;           /* inner */
  
@@ -6645,7 +6641,8 @@ no_silent:
   * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
+                I32 max, int depth, bool is_utf8_pat)
  {
      dVAR;
      char *scan;     /* Pointer to current position in target string */
@@ -6724,7 +6721,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
          }
         break;
      case EXACT:
-        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
  
         c = (U8)*STRING(p);
  
@@ -6732,7 +6729,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
           * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
           * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
           * true iff it doesn't matter if the argument is in UTF-8 or not */
-        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! UTF_PATTERN)) {
+        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) {
              if (utf8_target && scan + max < loceol) {
                  /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
                   * since here, to match at all, 1 char == 1 byte */
@@ -6742,7 +6739,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
                 scan++;
             }
         }
-       else if (UTF_PATTERN) {
+       else if (is_utf8_pat) {
              if (utf8_target) {
                  STRLEN scan_char_len;
  
@@ -6793,7 +6790,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
         goto do_exactf;
  
      case EXACTFL:
-       PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
         utf8_flags = FOLDEQ_UTF8_LOCALE;
         goto do_exactf;
  
@@ -6804,23 +6801,25 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
      case EXACTFU_SS:
      case EXACTFU_TRICKYFOLD:
      case EXACTFU:
-       utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+       utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
  
      do_exactf: {
          int c1, c2;
          U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
  
-        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
  
-        if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
+        if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
+                                        is_utf8_pat))
+        {
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
                  char *tmpeol = PL_regeol;
-                STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
                                               STRING(p), NULL, pat_len,
-                                             cBOOL(UTF_PATTERN), utf8_flags))
+                                             is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
                      tmpeol = PL_regeol;
@@ -6864,6 +6863,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
         break;
      }
      case ANYOF:
+    case ANYOF_WARN_SUPER:
         if (utf8_target) {
             while (hardcount < max
                     && scan < loceol
@@ -6885,7 +6885,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
          /* FALLTHROUGH */
  
      case POSIXL:
-       PL_reg_flags |= RF_tainted;
+        RXp_MATCH_TAINTED_on(prog);
         if (! utf8_target) {
             while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                     *scan)))
@@ -7272,7 +7272,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit
   */
  
  STATIC bool
-S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
+S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
  {
      dVAR;
      const char flags = ANYOF_FLAGS(n);
@@ -7305,7 +7305,7 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8*
             match = TRUE;
         }
         else if (flags & ANYOF_LOCALE) {
-           PL_reg_flags |= RF_tainted;
+           RXp_MATCH_TAINTED_on(prog);
  
             if ((flags & ANYOF_LOC_FOLD)
                  && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
@@ -7332,7 +7332,17 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8*
                   * will be 1, so the exclusive or will reverse things, so we
                   * are testing for \W.  On the third iteration, 'to_complement'
                   * will be 0, and we would be testing for \s; the fourth
-                 * iteration would test for \S, etc. */
+                 * iteration would test for \S, etc.
+                 *
+                 * Note that this code assumes that all the classes are closed
+                 * under folding.  For example, if a character matches \w, then
+                 * its fold does too; and vice versa.  This should be true for
+                 * any well-behaved locale for all the currently defined Posix
+                 * classes, except for :lower: and :upper:, which are handled
+                 * by the pseudo-class :cased: which matches if either of the
+                 * other two does.  To get rid of this assumption, an outer
+                 * loop could be used below to iterate over both the source
+                 * character, and its fold (if different) */
  
                  int count = 0;
                  int to_complement = 0;
@@ -7368,7 +7378,7 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8*
                      || (utf8_target
                          && (c >=256
                              || (! (flags & ANYOF_LOCALE))
-                            || (flags & ANYOF_IS_SYNTHETIC)))))
+                            || OP(n) == ANYOF_SYNTHETIC))))
         {
             SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
             if (sw) {
@@ -7390,7 +7400,7 @@ S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8*
         }
  
          if (UNICODE_IS_SUPER(c)
-            && (flags & ANYOF_WARN_SUPER)
+            && OP(n) == ANYOF_WARN_SUPER
              && ckWARN_d(WARN_NON_UNICODE))
          {
              Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),