This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix more shared threads leaks: add SAVETMPS to the second branch
[perl5.git] / regexec.c
index 05839f0..464ceaf 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -67,7 +67,8 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-2002, Larry Wall
+ ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ ****    2000, 2001, 2002, 2003, by Larry Wall and others
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -86,6 +87,7 @@
 #define RF_warned      2               /* warned about big count? */
 #define RF_evaled      4               /* Did an EVAL with setting? */
 #define RF_utf8                8               /* String contains multibyte chars? */
+#define RF_false       16              /* odd number of nested negatives */
 
 #define UTF ((PL_reg_flags & RF_utf8) != 0)
 
@@ -171,7 +173,7 @@ S_regcppush(pTHX_ I32 parenfloor)
        Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 
 #define REGCP_OTHER_ELEMS 6
-    SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
+    SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
     for (p = PL_regsize; p > parenfloor; p--) {
 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
        SSPUSHINT(PL_regendp[p]);
@@ -400,6 +402,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
     char *i_strpos = strpos;
     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
 #endif
+    RX_MATCH_UTF8_set(prog,do_utf8);
 
     if (prog->reganch & ROPT_UTF8) {
        DEBUG_r(PerlIO_printf(Perl_debug_log,
@@ -431,7 +434,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
              );
     });
 
-    if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
+    /* CHR_DIST() would be more correct here but it makes things slow. */
+    if (prog->minlen > strend - strpos) {
        DEBUG_r(PerlIO_printf(Perl_debug_log,
                              "String too short... [re_intuit_start]\n"));
        goto fail;
@@ -540,6 +544,9 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
                            start_shift + (s - strbeg), end_shift, pp, 0);
        else
            goto fail_finish;
+       /* we may be pointing at the wrong string */
+       if (s && RX_MATCH_COPIED(prog))
+           s = strbeg + (s - SvPVX(sv));
        if (data)
            *data->scream_olds = s;
     }
@@ -594,7 +601,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 
                t = s - prog->check_offset_max;
                if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
-                   && (!(prog->reganch & ROPT_UTF8)
+                   && (!do_utf8
                        || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
                            && t > strpos)))
                    /* EMPTY */;
@@ -714,7 +721,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
 
     t = s - prog->check_offset_max;
     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
-        && (!(prog->reganch & ROPT_UTF8)
+        && (!do_utf8
            || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
                 && t > strpos))) {
        /* Fixed substring is found far enough so that the match
@@ -847,10 +854,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
        char *startpos = strbeg;
 
        t = s;
-       if (prog->reganch & ROPT_UTF8) {        
-           PL_regdata = prog->data;
-           PL_bostr = startpos;
-       }
+       cache_re(prog);
        s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
        if (!s) {
 #ifdef DEBUGGING
@@ -949,6 +953,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
        I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
        char *m;
        STRLEN ln;
+       STRLEN lnc;
        unsigned int c1;
        unsigned int c2;
        char *e;
@@ -958,25 +963,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
        /* We know what class it must start with. */
        switch (OP(c)) {
        case ANYOF:
-           while (s < strend) {
-               STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
-                 
-               if (do_utf8 ?
-                   reginclass(c, (U8*)s, 0, do_utf8) :
-                   REGINCLASS(c, (U8*)s) ||
-                   (ANYOF_FOLD_SHARP_S(c, s, strend) &&
-                    /* The assignment of 2 is intentional:
-                     * for the sharp s, the skip is 2. */
-                    (skip = SHARP_S_SKIP)
-                    )) {
-                   if (tmp && (norun || regtry(prog, s)))
-                       goto got_it;
-                   else
-                       tmp = doevery;
-               }
-               else 
-                   tmp = 1;
-               s += skip;
+           if (do_utf8) {
+                while (s < strend) {
+                     if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+                         !UTF8_IS_INVARIANT((U8)s[0]) ?
+                         reginclass(c, (U8*)s, 0, do_utf8) :
+                         REGINCLASS(c, (U8*)s)) {
+                          if (tmp && (norun || regtry(prog, s)))
+                               goto got_it;
+                          else
+                               tmp = doevery;
+                     }
+                     else 
+                          tmp = 1;
+                     s += UTF8SKIP(s);
+                }
+           }
+           else {
+                while (s < strend) {
+                     STRLEN skip = 1;
+
+                     if (REGINCLASS(c, (U8*)s) ||
+                         (ANYOF_FOLD_SHARP_S(c, s, strend) &&
+                          /* The assignment of 2 is intentional:
+                           * for the folded sharp s, the skip is 2. */
+                          (skip = SHARP_S_SKIP))) {
+                          if (tmp && (norun || regtry(prog, s)))
+                               goto got_it;
+                          else
+                               tmp = doevery;
+                     }
+                     else 
+                          tmp = 1;
+                     s += skip;
+                }
            }
            break;
        case CANY:
@@ -989,10 +1009,12 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            }
            break;
        case EXACTF:
-           m = STRING(c);
-           ln = STR_LEN(c);
+           m   = STRING(c);
+           ln  = STR_LEN(c);   /* length to match in octets/bytes */
+           lnc = (I32) ln;     /* length to match in characters */
            if (UTF) {
                STRLEN ulen1, ulen2;
+               U8 *sm = (U8 *) m;
                U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
                U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
 
@@ -1003,6 +1025,11 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
                c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
                                    0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+               lnc = 0;
+               while (sm < ((U8 *) m + ln)) {
+                   lnc++;
+                   sm += UTF8SKIP(sm);
+               }
            }
            else {
                c1 = *(U8*)m;
@@ -1010,12 +1037,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            }
            goto do_exactf;
        case EXACTFL:
-           m = STRING(c);
-           ln = STR_LEN(c);
+           m   = STRING(c);
+           ln  = STR_LEN(c);
+           lnc = (I32) ln;
            c1 = *(U8*)m;
            c2 = PL_fold_locale[c1];
          do_exactf:
-           e = HOP3c(strend, -(I32)ln, s);
+           e = HOP3c(strend, -((I32)lnc), s);
 
            if (norun && e < s)
                e = s;                  /* Due to minlen logic of intuit() */
@@ -1038,6 +1066,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                STRLEN len, foldlen;
                
                if (c1 == c2) {
+                   /* Upper and lower of 1st char are equal -
+                    * probably not a "letter". */
                    while (s <= e) {
                        c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
                                           ckWARN(WARN_UTF8) ?
@@ -1596,6 +1626,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
     SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
     SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
 #endif
+    RX_MATCH_UTF8_set(prog,do_utf8);
 
     PL_regcc = 0;
 
@@ -1842,6 +1873,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
                                  (unsigned char*)strend, must,
                                  PL_multiline ? FBMrf_MULTILINE : 0))) ) {
+           /* we may be pointing at the wrong string */
+           if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
+               s = strbeg + (s - SvPVX(sv));
            DEBUG_r( did_match = 1 );
            if (HOPc(s, -back_max) > last1) {
                last1 = HOPc(s, -back_min);
@@ -1881,9 +1915,12 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
        goto phooey;
     }
     else if ((c = prog->regstclass)) {
-       if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
+       if (minlen) {
+           I32 op = (U8)OP(prog->regstclass);
            /* don't bother with what can't match */
-           strend = HOPc(strend, -(minlen - 1));
+           if (PL_regkind[op] != EXACT && op != CANY)
+               strend = HOPc(strend, -(minlen - 1));
+       }
        DEBUG_r({
            SV *prop = sv_newmortal();
            char *s0;
@@ -1925,6 +1962,9 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
                                   end_shift, &scream_pos, 1); /* last one */
                if (!last)
                    last = scream_olds; /* Only one occurrence. */
+               /* we may be pointing at the wrong string */
+               else if (RX_MATCH_COPIED(prog))
+                   s = strbeg + (s - SvPVX(sv));
            }
            else {
                STRLEN len;
@@ -1992,17 +2032,28 @@ got_it:
 
     /* make sure $`, $&, $', and $digit will work later */
     if ( !(flags & REXEC_NOT_FIRST) ) {
-       if (RX_MATCH_COPIED(prog)) {
-           Safefree(prog->subbeg);
-           RX_MATCH_COPIED_off(prog);
-       }
+       RX_MATCH_COPY_FREE(prog);
        if (flags & REXEC_COPY_STR) {
            I32 i = PL_regeol - startpos + (stringarg - strbeg);
-
-           s = savepvn(strbeg, i);
-           prog->subbeg = s;
+#ifdef PERL_COPY_ON_WRITE
+           if ((SvIsCOW(sv)
+                || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
+               if (DEBUG_C_TEST) {
+                   PerlIO_printf(Perl_debug_log,
+                                 "Copy on write: regexp capture, type %d\n",
+                                 (int) SvTYPE(sv));
+               }
+               prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
+               prog->subbeg = SvPVX(prog->saved_copy);
+               assert (SvPOKp(prog->saved_copy));
+           } else
+#endif
+           {
+               RX_MATCH_COPIED_on(prog);
+               s = savepvn(strbeg, i);
+               prog->subbeg = s;
+           }
            prog->sublen = i;
-           RX_MATCH_COPIED_on(prog);
        }
        else {
            prog->subbeg = strbeg;
@@ -2092,6 +2143,9 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
                $` inside (?{}) could fail... */
            PL_reg_oldsaved = prog->subbeg;
            PL_reg_oldsavedlen = prog->sublen;
+#ifdef PERL_COPY_ON_WRITE
+           PL_nrs = prog->saved_copy;
+#endif
            RX_MATCH_COPIED_off(prog);
        }
        else
@@ -2106,6 +2160,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
     PL_reglastparen = &prog->lastparen;
     PL_reglastcloseparen = &prog->lastcloseparen;
     prog->lastparen = 0;
+    prog->lastcloseparen = 0;
     PL_regsize = 0;
     DEBUG_r(PL_reg_starttry = startpos);
     if (PL_reg_start_tmpl <= prog->nparens) {
@@ -2268,17 +2323,17 @@ S_regmatch(pTHX_ regnode *prog)
            regprop(prop, scan);
            {
              char *s0 =
-               do_utf8 ?
+               do_utf8 && OP(scan) != CANY ?
                pv_uni_display(dsv0, (U8*)(locinput - pref_len),
                               pref0_len, 60, UNI_DISPLAY_REGEX) :
                locinput - pref_len;
              int len0 = do_utf8 ? strlen(s0) : pref0_len;
-             char *s1 = do_utf8 ?
+             char *s1 = do_utf8 && OP(scan) != CANY ?
                pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
                               pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
                locinput - pref_len + pref0_len;
              int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
-             char *s2 = do_utf8 ?
+             char *s2 = do_utf8 && OP(scan) != CANY ?
                pv_uni_display(dsv2, (U8*)locinput,
                               PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
                locinput;
@@ -2790,13 +2845,14 @@ S_regmatch(pTHX_ regnode *prog)
            dSP;
            OP_4tree *oop = PL_op;
            COP *ocurcop = PL_curcop;
-           SV **ocurpad = PL_curpad;
+           PAD *old_comppad;
            SV *ret;
+           struct regexp *oreg = PL_reg_re;
        
            n = ARG(scan);
            PL_op = (OP_4tree*)PL_regdata->data[n];
            DEBUG_r( PerlIO_printf(Perl_debug_log, "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
-           PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
+           PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
            PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
 
            {
@@ -2804,7 +2860,7 @@ S_regmatch(pTHX_ regnode *prog)
                CALLRUNOPS(aTHX);                       /* Scalar context. */
                SPAGAIN;
                if (SP == before)
-                   ret = Nullsv;   /* protect against empty (?{}) blocks. */
+                   ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
                else {
                    ret = POPs;
                    PUTBACK;
@@ -2812,7 +2868,7 @@ S_regmatch(pTHX_ regnode *prog)
            }
 
            PL_op = oop;
-           PL_curpad = ocurpad;
+           PAD_RESTORE_LOCAL(old_comppad);
            PL_curcop = ocurcop;
            if (logical) {
                if (logical == 2) {     /* Postponed subexpression. */
@@ -2820,13 +2876,18 @@ S_regmatch(pTHX_ regnode *prog)
                    MAGIC *mg = Null(MAGIC*);
                    re_cc_state state;
                    CHECKPOINT cp, lastcp;
-
-                   if(SvROK(ret) || SvRMAGICAL(ret)) {
-                       SV *sv = SvROK(ret) ? SvRV(ret) : ret;
-
-                       if(SvMAGICAL(sv))
-                           mg = mg_find(sv, PERL_MAGIC_qr);
+                    int toggleutf;
+                   register SV *sv;
+
+                   if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
+                       mg = mg_find(sv, PERL_MAGIC_qr);
+                   else if (SvSMAGICAL(ret)) {
+                       if (SvGMAGICAL(ret))
+                           sv_unmagic(ret, PERL_MAGIC_qr);
+                       else
+                           mg = mg_find(ret, PERL_MAGIC_qr);
                    }
+
                    if (mg) {
                        re = (regexp *)mg->mg_obj;
                        (void)ReREFCNT_inc(re);
@@ -2840,9 +2901,11 @@ S_regmatch(pTHX_ regnode *prog)
                        I32 onpar = PL_regnpar;
 
                        Zero(&pm, 1, PMOP);
+                        if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
                        re = CALLREGCOMP(aTHX_ t, t + len, &pm);
                        if (!(SvFLAGS(ret)
-                             & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
+                             & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
+                               | SVs_GMG)))
                            sv_magic(ret,(SV*)ReREFCNT_inc(re),
                                        PERL_MAGIC_qr,0,0);
                        PL_regprecomp = oprecomp;
@@ -2872,6 +2935,9 @@ S_regmatch(pTHX_ regnode *prog)
                    *PL_reglastcloseparen = 0;
                    PL_reg_call_cc = &state;
                    PL_reginput = locinput;
+                   toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+                               ((re->reganch & ROPT_UTF8) != 0);
+                   if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                    /* XXXX This is too dramatic a measure... */
                    PL_reg_maxiter = 0;
@@ -2886,6 +2952,7 @@ S_regmatch(pTHX_ regnode *prog)
                        PL_regcc = state.cc;
                        PL_reg_re = state.re;
                        cache_re(PL_reg_re);
+                       if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                        /* XXXX This is too dramatic a measure... */
                        PL_reg_maxiter = 0;
@@ -2902,6 +2969,7 @@ S_regmatch(pTHX_ regnode *prog)
                    PL_regcc = state.cc;
                    PL_reg_re = state.re;
                    cache_re(PL_reg_re);
+                   if (toggleutf) PL_reg_flags ^= RF_utf8;
 
                    /* XXXX This is too dramatic a measure... */
                    PL_reg_maxiter = 0;
@@ -2912,8 +2980,10 @@ S_regmatch(pTHX_ regnode *prog)
                sw = SvTRUE(ret);
                logical = 0;
            }
-           else
+           else {
                sv_setsv(save_scalar(PL_replgv), ret);
+               cache_re(oreg);
+           }
            break;
        }
        case OPEN:
@@ -3073,7 +3143,7 @@ S_regmatch(pTHX_ regnode *prog)
                                  "%*s  %ld out of %ld..%ld  cc=%"UVxf"\n",
                                  REPORT_CODE_OFF+PL_regindent*2, "",
                                  (long)n, (long)cc->min,
-                                 (long)cc->max, (UV)cc)
+                                 (long)cc->max, PTR2UV(cc))
                    );
 
                /* If degenerate scan matches "", assume scan done. */
@@ -3146,7 +3216,10 @@ S_regmatch(pTHX_ regnode *prog)
                                      "%*s  already tried at this position...\n",
                                      REPORT_CODE_OFF+PL_regindent*2, "")
                        );
-                       sayNO_SILENT;
+                       if (PL_reg_flags & RF_false)
+                           sayYES;
+                       else
+                           sayNO_SILENT;
                    }
                    PL_reg_poscache[o] |= (1<<b);
                }
@@ -3332,19 +3405,8 @@ S_regmatch(pTHX_ regnode *prog)
                    if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
                    else {
                        if (PL_regkind[(U8)OP(text_node)] == REF) {
-                           I32 n, ln;
-                           n = ARG(text_node);  /* which paren pair */
-                           ln = PL_regstartp[n];
-                           /* assume yes if we haven't seen CLOSEn */
-                           if (
-                               (I32)*PL_reglastparen < n ||
-                               ln == -1 ||
-                               ln == PL_regendp[n]
-                           ) {
-                               c1 = c2 = -1000;
-                               goto assume_ok_MM;
-                           }
-                           c1 = *(PL_bostr + ln);
+                           c1 = c2 = -1000;
+                           goto assume_ok_MM;
                        }
                        else { c1 = (U8)*STRING(text_node); }
                        if (OP(text_node) == EXACTF || OP(text_node) == REFF)
@@ -3414,19 +3476,8 @@ S_regmatch(pTHX_ regnode *prog)
                        if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
                        else {
                            if (PL_regkind[(U8)OP(text_node)] == REF) {
-                               I32 n, ln;
-                               n = ARG(text_node);  /* which paren pair */
-                               ln = PL_regstartp[n];
-                               /* assume yes if we haven't seen CLOSEn */
-                               if (
-                                   (I32)*PL_reglastparen < n ||
-                                   ln == -1 ||
-                                   ln == PL_regendp[n]
-                               ) {
-                                   c1 = c2 = -1000;
-                                   goto assume_ok_REG;
-                               }
-                               c1 = *(PL_bostr + ln);
+                               c1 = c2 = -1000;
+                               goto assume_ok_REG;
                            }
                            else { c1 = (U8)*STRING(text_node); }
 
@@ -3523,19 +3574,8 @@ S_regmatch(pTHX_ regnode *prog)
                if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
                else {
                    if (PL_regkind[(U8)OP(text_node)] == REF) {
-                       I32 n, ln;
-                       n = ARG(text_node);  /* which paren pair */
-                       ln = PL_regstartp[n];
-                       /* assume yes if we haven't seen CLOSEn */
-                       if (
-                           (I32)*PL_reglastparen < n ||
-                           ln == -1 ||
-                           ln == PL_regendp[n]
-                       ) {
-                           c1 = c2 = -1000;
-                           goto assume_ok_easy;
-                       }
-                       s = (U8*)PL_bostr + ln;
+                       c1 = c2 = -1000;
+                       goto assume_ok_easy;
                    }
                    else { s = (U8*)STRING(text_node); }
 
@@ -3681,14 +3721,14 @@ S_regmatch(pTHX_ regnode *prog)
                        /* If it could work, try it. */
                        if (c == (UV)c1 || c == (UV)c2)
                        {
-                           TRYPAREN(paren, n, PL_reginput);
+                           TRYPAREN(paren, ln, PL_reginput);
                            REGCP_UNWIND(lastcp);
                        }
                    }
                    /* If it could work, try it. */
                    else if (c1 == -1000)
                    {
-                       TRYPAREN(paren, n, PL_reginput);
+                       TRYPAREN(paren, ln, PL_reginput);
                        REGCP_UNWIND(lastcp);
                    }
                    /* Couldn't or didn't -- move forward. */
@@ -3829,6 +3869,7 @@ S_regmatch(pTHX_ regnode *prog)
            }
            else
                PL_reginput = locinput;
+           PL_reg_flags ^= RF_false;
            goto do_ifmatch;
        case IFMATCH:
            n = 1;
@@ -3844,6 +3885,8 @@ S_regmatch(pTHX_ regnode *prog)
          do_ifmatch:
            inner = NEXTOPER(NEXTOPER(scan));
            if (regmatch(inner) != n) {
+               if (n == 0)
+                   PL_reg_flags ^= RF_false;
              say_no:
                if (logical) {
                    logical = 0;
@@ -3853,6 +3896,8 @@ S_regmatch(pTHX_ regnode *prog)
                else
                    sayNO;
            }
+           if (n == 0)
+               PL_reg_flags ^= RF_false;
          say_yes:
            if (logical) {
                logical = 0;
@@ -3989,7 +4034,9 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
     register bool do_utf8 = PL_reg_match_utf8;
 
     scan = PL_reginput;
-    if (max != REG_INFTY && max < loceol - scan)
+    if (max == REG_INFTY)
+       max = I32_MAX;
+    else if (max < loceol - scan)
       loceol = scan + max;
     switch (OP(p)) {
     case REG_ANY:
@@ -4283,15 +4330,16 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV
        if (PL_regdata->what[n] == 's') {
            SV *rv = (SV*)PL_regdata->data[n];
            AV *av = (AV*)SvRV((SV*)rv);
+           SV **ary = AvARRAY(av);
            SV **a, **b;
        
            /* See the end of regcomp.c:S_reglass() for
             * documentation of these array elements. */
 
-           si  = *av_fetch(av, 0, FALSE);
-           a   =  av_fetch(av, 1, FALSE);
-           b   =  av_fetch(av, 2, FALSE);
-       
+           si = *ary;
+           a  = SvTYPE(ary[1]) == SVt_RV   ? &ary[1] : 0;
+           b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
+
            if (a)
                sw = *a;
            else if (si && doinit) {
@@ -4326,12 +4374,13 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register b
 {
     char flags = ANYOF_FLAGS(n);
     bool match = FALSE;
-    UV c;
+    UV c = *p;
     STRLEN len = 0;
     STRLEN plen;
 
-    c = do_utf8 ? utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
-                                ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY) : *p;
+    if (do_utf8 && !UTF8_IS_INVARIANT(c))
+        c = utf8n_to_uvchr(p, UTF8_MAXLEN, &len,
+                           ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 
     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
     if (do_utf8 || (flags & ANYOF_UNICODE)) {
@@ -4512,6 +4561,9 @@ restore_pos(pTHX_ void *arg)
        if (PL_reg_oldsaved) {
            PL_reg_re->subbeg = PL_reg_oldsaved;
            PL_reg_re->sublen = PL_reg_oldsavedlen;
+#ifdef PERL_COPY_ON_WRITE
+           PL_reg_re->saved_copy = PL_nrs;
+#endif
            RX_MATCH_COPIED_on(PL_reg_re);
        }
        PL_reg_magic->mg_len = PL_reg_oldpos;
@@ -4526,7 +4578,7 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
     SV* sv;
     if (prog->float_substr && !prog->float_utf8) {
        prog->float_utf8 = sv = NEWSV(117, 0);
-       SvSetMagicSV(sv, prog->float_substr);
+       SvSetSV(sv, prog->float_substr);
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->float_substr))
            SvTAIL_on(sv);
@@ -4535,7 +4587,7 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
     }
     if (prog->anchored_substr && !prog->anchored_utf8) {
        prog->anchored_utf8 = sv = NEWSV(118, 0);
-       SvSetMagicSV(sv, prog->anchored_substr);
+       SvSetSV(sv, prog->anchored_substr);
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->anchored_substr))
            SvTAIL_on(sv);
@@ -4550,7 +4602,7 @@ S_to_byte_substr(pTHX_ register regexp *prog)
     SV* sv;
     if (prog->float_utf8 && !prog->float_substr) {
        prog->float_substr = sv = NEWSV(117, 0);
-       SvSetMagicSV(sv, prog->float_utf8);
+       SvSetSV(sv, prog->float_utf8);
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->float_utf8))
                SvTAIL_on(sv);
@@ -4563,7 +4615,7 @@ S_to_byte_substr(pTHX_ register regexp *prog)
     }
     if (prog->anchored_utf8 && !prog->anchored_substr) {
        prog->anchored_substr = sv = NEWSV(118, 0);
-       SvSetMagicSV(sv, prog->anchored_utf8);
+       SvSetSV(sv, prog->anchored_utf8);
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->anchored_utf8))
                SvTAIL_on(sv);