This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Log::Messge::Simple to 0.04 (was: Re: Code freeze)
[perl5.git] / regcomp.c
index 6ea593e..3ad5d8c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -109,6 +109,7 @@ typedef struct RExC_state_t {
     char       *parse;                 /* Input-scan pointer. */
     I32                whilem_seen;            /* number of WHILEM in this expr */
     regnode    *emit_start;            /* Start of emitted-code area */
+    regnode    *emit_bound;            /* First regnode outside of the allocated space */
     regnode    *emit;                  /* Code-emit pointer; &regdummy = don't = compiling */
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
@@ -123,7 +124,10 @@ typedef struct RExC_state_t {
     regnode    **open_parens;          /* pointers to open parens */
     regnode    **close_parens;         /* pointers to close parens */
     regnode    *opend;                 /* END node in program */
-    I32                utf8;
+    I32                utf8;           /* whether the pattern is utf8 or not */
+    I32                orig_utf8;      /* whether the pattern was originally in utf8 */
+                               /* XXX use this for future optimisation of case
+                                * where pattern must be upgraded to utf8. */
     HV         *charnames;             /* cache of named sequences */
     HV         *paren_names;           /* Paren names */
     
@@ -156,6 +160,7 @@ typedef struct RExC_state_t {
 #endif
 #define RExC_emit      (pRExC_state->emit)
 #define RExC_emit_start        (pRExC_state->emit_start)
+#define RExC_emit_bound        (pRExC_state->emit_bound)
 #define RExC_naughty   (pRExC_state->naughty)
 #define RExC_sawback   (pRExC_state->sawback)
 #define RExC_seen      (pRExC_state->seen)
@@ -166,6 +171,7 @@ typedef struct RExC_state_t {
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
 #define RExC_seen_evals        (pRExC_state->seen_evals)
 #define RExC_utf8      (pRExC_state->utf8)
+#define RExC_orig_utf8 (pRExC_state->orig_utf8)
 #define RExC_charnames  (pRExC_state->charnames)
 #define RExC_open_parens       (pRExC_state->open_parens)
 #define RExC_close_parens      (pRExC_state->close_parens)
@@ -1157,11 +1163,19 @@ is the recommended Unicode-aware way of saying
 
 #define TRIE_STORE_REVCHAR                                                 \
     STMT_START {                                                           \
-       SV *tmp = newSVpvs("");                                            \
-       if (UTF) SvUTF8_on(tmp);                                           \
-       Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc );                       \
-       av_push( revcharmap, tmp );                                        \
-    } STMT_END
+       if (UTF) {                                                         \
+           SV *zlopp = newSV(2);                                          \
+           unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
+           unsigned const char *const kapow = uvuni_to_utf8(flrbbbbb, uvc & 0xFF); \
+           SvCUR_set(zlopp, kapow - flrbbbbb);                            \
+           SvPOK_on(zlopp);                                               \
+           SvUTF8_on(zlopp);                                              \
+           av_push(revcharmap, zlopp);                                    \
+       } else {                                                           \
+           char ooooff = (char)uvc;                                               \
+           av_push(revcharmap, newSVpvn(&ooooff, 1));                     \
+       }                                                                  \
+        } STMT_END
 
 #define TRIE_READ_CHAR STMT_START {                                           \
     wordlen++;                                                                \
@@ -1350,7 +1364,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
        have unique chars.
 
        We use an array of integers to represent the character codes 0..255
-       (trie->charmap) and we use a an HV* to store unicode characters. We use the
+       (trie->charmap) and we use a an HV* to store Unicode characters. We use the
        native representation of the character value as the key and IV's for the
        coded index.
 
@@ -1373,16 +1387,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
         const U8 *scan = (U8*)NULL;
         U32 wordlen      = 0;         /* required init */
-        STRLEN chars=0;
+        STRLEN chars = 0;
+        bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the bitmap?*/
 
         if (OP(noper) == NOTHING) {
             trie->minlen= 0;
             continue;
         }
-        if (trie->bitmap) {
-            TRIE_BITMAP_SET(trie,*uc);
-            if ( folder ) TRIE_BITMAP_SET(trie,folder[ *uc ]);            
-        }
+        if ( set_bit ) /* bitmap only alloced when !(UTF&&Folding) */
+            TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte
+                                          regardless of encoding */
+
         for ( ; uc < e ; uc += len ) {
             TRIE_CHARCOUNT(trie)++;
             TRIE_READ_CHAR;
@@ -1394,6 +1409,26 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         trie->charmap[ folder[ uvc ] ] = trie->charmap[ uvc ];
                     TRIE_STORE_REVCHAR;
                 }
+                if ( set_bit ) {
+                    /* store the codepoint in the bitmap, and if its ascii
+                       also store its folded equivelent. */
+                    TRIE_BITMAP_SET(trie,uvc);
+
+                   /* store the folded codepoint */
+                   if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
+
+                   if ( !UTF ) {
+                       /* store first byte of utf8 representation of
+                          codepoints in the 127 < uvc < 256 range */
+                       if (127 < uvc && uvc < 192) {
+                           TRIE_BITMAP_SET(trie,194);
+                       } else if (191 < uvc ) {
+                           TRIE_BITMAP_SET(trie,195);
+                       /* && uvc < 256 -- we know uvc is < 256 already */
+                       }
+                   }
+                    set_bit = 0; /* We've done our bit :-) */
+                }
             } else {
                 SV** svpp;
                 if ( !widecharmap )
@@ -1952,7 +1987,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 }
                 if ( count == 1 ) {
                     SV **tmp = av_fetch( revcharmap, idx, 0);
-                    char *ch = SvPV_nolen( *tmp );
+                    STRLEN len;
+                    char *ch = SvPV( *tmp, len );
                     DEBUG_OPTIMISE_r({
                         SV *sv=sv_newmortal();
                         PerlIO_printf( Perl_debug_log,
@@ -1971,11 +2007,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         str=STRING(convert);
                         STR_LEN(convert)=0;
                     }
-                    while (*ch) {
+                    STR_LEN(convert) += len;
+                    while (len--)
                         *str++ = *ch++;
-                        STR_LEN(convert)++;
-                    }
-                    
                } else {
 #ifdef DEBUGGING           
                    if (state>1)
@@ -2386,6 +2420,34 @@ typedef struct scan_frame {
 
 #define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
 
+#define CASE_SYNST_FNC(nAmE)                                       \
+case nAmE:                                                         \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+           for (value = 0; value < 256; value++)                  \
+               if (!is_ ## nAmE ## _cp(value))                       \
+                   ANYOF_BITMAP_CLEAR(data->start_class, value);  \
+    }                                                              \
+    else {                                                         \
+           for (value = 0; value < 256; value++)                  \
+               if (is_ ## nAmE ## _cp(value))                        \
+                   ANYOF_BITMAP_SET(data->start_class, value);    \
+    }                                                              \
+    break;                                                         \
+case N ## nAmE:                                                    \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+           for (value = 0; value < 256; value++)                   \
+               if (is_ ## nAmE ## _cp(value))                         \
+                   ANYOF_BITMAP_CLEAR(data->start_class, value);   \
+    }                                                               \
+    else {                                                          \
+           for (value = 0; value < 256; value++)                   \
+               if (!is_ ## nAmE ## _cp(value))                        \
+                   ANYOF_BITMAP_SET(data->start_class, value);     \
+    }                                                               \
+    break
+
+
+
 STATIC I32
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 *minlenp, I32 *deltap,
@@ -3256,7 +3318,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                        SvUTF8(sv) && SvMAGICAL(sv) ?
                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
                                    if (mg && mg->mg_len >= 0)
-                                       mg->mg_len += CHR_SVLEN(last_str);
+                                       mg->mg_len += CHR_SVLEN(last_str) - l;
                                }
                                data->last_end += l * (mincount - 1);
                            }
@@ -3316,6 +3378,46 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                break;
            }
        }
+       else if (OP(scan) == LNBREAK) {
+           if (flags & SCF_DO_STCLASS) {
+               int value = 0;
+               data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
+               if (flags & SCF_DO_STCLASS_AND) {
+                    for (value = 0; value < 256; value++)
+                        if (!is_VERTWS_cp(value))
+                            ANYOF_BITMAP_CLEAR(data->start_class, value);  
+                }                                                              
+                else {                                                         
+                    for (value = 0; value < 256; value++)
+                        if (is_VERTWS_cp(value))
+                            ANYOF_BITMAP_SET(data->start_class, value);           
+                }                                                              
+                if (flags & SCF_DO_STCLASS_OR)
+                   cl_and(data->start_class, and_withp);
+               flags &= ~SCF_DO_STCLASS;
+            }
+           min += 1;
+           delta += 1;
+            if (flags & SCF_DO_SUBSTR) {
+               SCAN_COMMIT(pRExC_state,data,minlenp);  /* Cannot expect anything... */
+               data->pos_min += 1;
+               data->pos_delta += 1;
+               data->longest = &(data->longest_float);
+           }
+           
+       }
+       else if (OP(scan) == FOLDCHAR) {
+           int d = ARG(scan)==0xDF ? 1 : 2;
+           flags &= ~SCF_DO_STCLASS;
+            min += 1;
+            delta += d;
+            if (flags & SCF_DO_SUBSTR) {
+               SCAN_COMMIT(pRExC_state,data,minlenp);  /* Cannot expect anything... */
+               data->pos_min += 1;
+               data->pos_delta += d;
+               data->longest = &(data->longest_float);
+           }
+       }
        else if (strchr((const char*)PL_simple,OP(scan))) {
            int value = 0;
 
@@ -3510,6 +3612,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        }
                    }
                    break;
+               CASE_SYNST_FNC(VERTWS);
+               CASE_SYNST_FNC(HORIZWS);
+               
                }
                if (flags & SCF_DO_STCLASS_OR)
                    cl_and(data->start_class, and_withp);
@@ -3880,6 +3985,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        }
 #endif /* old or new */
 #endif /* TRIE_STUDY_OPT */    
+
        /* Else: zero-length, ignore. */
        scan = regnext(scan);
     }
@@ -4002,8 +4108,8 @@ extern const struct regexp_engine my_reg_engine;
 #endif
 
 #ifndef PERL_IN_XSUB_RE 
-regexp *
-Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
+REGEXP *
+Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
 {
     dVAR;
     HV * const table = GvHV(PL_hintgv);
@@ -4018,21 +4124,23 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
                 PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
                     SvIV(*ptr));
             });            
-            return CALLREGCOMP_ENG(eng, exp, xend, pm);
+            return CALLREGCOMP_ENG(eng, pattern, flags);
         } 
     }
-    return Perl_re_compile(aTHX_ exp, xend, pm);
+    return Perl_re_compile(aTHX_ pattern, flags);
 }
 #endif
 
-regexp *
-Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
+REGEXP *
+Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
 {
     dVAR;
-    register regexp *r;
+    register REGEXP *r;
     register regexp_internal *ri;
+    STRLEN plen;
+    char*  exp = SvPV((SV*)pattern, plen);
+    char* xend = exp + plen;
     regnode *scan;
-    regnode *first;
     I32 flags;
     I32 minlen = 0;
     I32 sawplus = 0;
@@ -4046,21 +4154,20 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
 #endif    
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_r(if (!PL_colorset) reginitcolors());
-        
-    if (exp == NULL)
-       FAIL("NULL regexp argument");
 
-    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+    RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
 
-    RExC_precomp = exp;
     DEBUG_COMPILE_r({
         SV *dsv= sv_newmortal();
         RE_PV_QUOTED_DECL(s, RExC_utf8,
-            dsv, RExC_precomp, (xend - exp), 60);
+            dsv, exp, plen, 60);
         PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
                       PL_colors[4],PL_colors[5],s);
     });
-    RExC_flags = pm->op_pmflags;
+
+redo_first_pass:
+    RExC_precomp = exp;
+    RExC_flags = pm_flags;
     RExC_sawback = 0;
 
     RExC_seen = 0;
@@ -4098,6 +4205,25 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
        RExC_precomp = NULL;
        return(NULL);
     }
+    if (RExC_utf8 && !RExC_orig_utf8) {
+        /* It's possible to write a regexp in ascii that represents Unicode
+        codepoints outside of the byte range, such as via \x{100}. If we
+        detect such a sequence we have to convert the entire pattern to utf8
+        and then recompile, as our sizing calculation will have been based
+        on 1 byte == 1 character, but we will need to use utf8 to encode
+        at least some part of the pattern, and therefore must convert the whole
+        thing.
+        XXX: somehow figure out how to make this less expensive...
+        -- dmq */
+        STRLEN len = plen;
+        DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+           "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+        xend = exp + len;
+        RExC_orig_utf8 = RExC_utf8;
+        SAVEFREEPV(exp);
+        goto redo_first_pass;
+    }
     DEBUG_PARSE_r({
         PerlIO_printf(Perl_debug_log, 
             "Required size %"IVdf" nodes\n"
@@ -4115,11 +4241,6 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
     if (RExC_whilem_seen > 15)
        RExC_whilem_seen = 15;
 
-#ifdef DEBUGGING
-    /* Make room for a sentinel value at the end of the program */
-    RExC_size++;
-#endif
-
     /* Allocate space and zero-initialize. Note, the two step process 
        of zeroing when in debug mode, thus anything assigned has to 
        happen after that */
@@ -4140,24 +4261,24 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
     RXi_SET( r, ri );
     r->engine= RE_ENGINE_PTR;
     r->refcnt = 1;
-    r->prelen = xend - exp;
-    r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+    r->prelen = plen;
+    r->extflags = pm_flags;
     {
-        bool has_k     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+        bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
        bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
        bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
        U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12);
        const char *fptr = STD_PAT_MODS;        /*"msix"*/
        char *p;
-        r->wraplen = r->prelen + has_minus + has_k + has_runon
+        r->wraplen = r->prelen + has_minus + has_p + has_runon
             + (sizeof(STD_PAT_MODS) - 1)
             + (sizeof("(?:)") - 1);
 
-        Newx(r->wrapped, r->wraplen, char );
+        Newx(r->wrapped, r->wraplen + 1, char );
         p = r->wrapped;
         *p++='('; *p++='?';
-        if (has_k)
-            *p++ = KEEPCOPY_PAT_MOD; /*'k'*/
+        if (has_p)
+            *p++ = KEEPCOPY_PAT_MOD; /*'p'*/
         {
             char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
             char *colon = r + 1;
@@ -4176,13 +4297,14 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
             }
         }
 
-        *p++=':';
+        *p++ = ':';
         Copy(RExC_precomp, p, r->prelen, char);
         r->precomp = p;
         p += r->prelen;
         if (has_runon)
-            *p++='\n';
-        *p=')';
+            *p++ = '\n';
+        *p++ = ')';
+        *p = 0;
     }
 
     r->intflags = 0;
@@ -4208,24 +4330,22 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_rxi = ri;
 
     /* Second pass: emit code. */
-    RExC_flags = pm->op_pmflags;       /* don't let top level (?i) bleed */
+    RExC_flags = pm_flags;     /* don't let top level (?i) bleed */
     RExC_parse = exp;
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
-#ifdef DEBUGGING
-    /* put a sentinal on the end of the program so we can check for
-       overwrites */
-    ri->program[RExC_size].type = 255;
-#endif
+    RExC_emit_bound = ri->program + RExC_size + 1;
+
     /* Store the count of eval-groups for security checks: */
     RExC_rx->seen_evals = RExC_seen_evals;
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
-    if (reg(pRExC_state, 0, &flags,1) == NULL)
+    if (reg(pRExC_state, 0, &flags,1) == NULL) {
+       ReREFCNT_dec(r);   
        return(NULL);
-
+    }
     /* XXXX To minimize changes to RE engine we always allocate
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
@@ -4263,8 +4383,9 @@ reStudy:
 #endif    
 
     /* Dig out information for optimizations. */
-    r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
-    pm->op_pmflags = RExC_flags;
+    r->extflags = RExC_flags; /* was pm_op */
+    /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
     if (UTF)
         r->extflags |= RXf_UTF8;       /* Unicode in it? */
     ri->regstclass = NULL;
@@ -4280,18 +4401,20 @@ reStudy:
        struct regnode_charclass_class ch_class; /* pointed to by data */
        int stclass_flag;
        I32 last_close = 0; /* pointed to by data */
-
-       first = scan;
+        regnode *first= scan;
+        regnode *first_next= regnext(first);
+       
        /* Skip introductions and multiplicators >= 1. */
        while ((OP(first) == OPEN && (sawopen = 1)) ||
               /* An OR of *one* alternative - should not happen now. */
-           (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
+           (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
            /* for now we can't handle lookbehind IFMATCH*/
            (OP(first) == IFMATCH && !first->flags) || 
            (OP(first) == PLUS) ||
            (OP(first) == MINMOD) ||
               /* An {n,m} with n>0 */
-           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ) 
+           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
+           (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
        {
                
                if (OP(first) == PLUS)
@@ -4303,6 +4426,7 @@ reStudy:
                    first += EXTRA_STEP_2ARGS;
                } else  /* XXX possible optimisation for /(?=)/  */
                    first = NEXTOPER(first);
+               first_next= regnext(first);
        }
 
        /* Starting-point info. */
@@ -4648,11 +4772,34 @@ reStudy:
         r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
     else
         r->paren_names = NULL;
-    if (r->prelen == 3 && strEQ("\\s+", r->precomp))
-       r->extflags |= RXf_WHITE;
+
+#ifdef STUPID_PATTERN_CHECKS            
+    if (r->prelen == 0)
+        r->extflags |= RXf_NULL;
+    if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
+        /* XXX: this should happen BEFORE we compile */
+        r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
+    else if (r->prelen == 3 && memEQ("\\s+", r->precomp, 3))
+        r->extflags |= RXf_WHITE;
     else if (r->prelen == 1 && r->precomp[0] == '^')
         r->extflags |= RXf_START_ONLY;
-
+#else
+    if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
+            /* XXX: this should happen BEFORE we compile */
+            r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
+    else {
+        regnode *first = ri->program + 1;
+        U8 fop = OP(first);
+        U8 nop = OP(NEXTOPER(first));
+        
+        if (PL_regkind[fop] == NOTHING && nop == END)
+            r->extflags |= RXf_NULL;
+        else if (PL_regkind[fop] == BOL && nop == END)
+            r->extflags |= RXf_START_ONLY;
+        else if (fop == PLUS && nop ==SPACE && OP(regnext(first))==END)
+            r->extflags |= RXf_WHITE;    
+    }
+#endif
 #ifdef DEBUGGING
     if (RExC_paren_names) {
         ri->name_list_idx = add_data( pRExC_state, 1, "p" );
@@ -4667,8 +4814,7 @@ reStudy:
             ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
         }
     }
-    Newxz(r->startp, RExC_npar, I32);
-    Newxz(r->endp, RExC_npar, I32);
+    Newxz(r->offs, RExC_npar, regexp_paren_pair);
     /* assume we don't need to swap parens around before we match */
 
     DEBUG_DUMP_r({
@@ -4696,11 +4842,52 @@ reStudy:
 
 
 SV*
-Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
+Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
+                    const U32 flags)
+{
+    PERL_UNUSED_ARG(value);
+
+    if (flags & RXapif_FETCH) {
+        return reg_named_buff_fetch(rx, key, flags);
+    } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
+        Perl_croak(aTHX_ PL_no_modify);
+        return NULL;
+    } else if (flags & RXapif_EXISTS) {
+        return reg_named_buff_exists(rx, key, flags)
+            ? &PL_sv_yes
+            : &PL_sv_no;
+    } else if (flags & RXapif_REGNAMES) {
+        return reg_named_buff_all(rx, flags);
+    } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
+        return reg_named_buff_scalar(rx, flags);
+    } else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
+                         const U32 flags)
+{
+    PERL_UNUSED_ARG(lastkey);
+
+    if (flags & RXapif_FIRSTKEY)
+        return reg_named_buff_firstkey(rx, flags);
+    else if (flags & RXapif_NEXTKEY)
+        return reg_named_buff_nextkey(rx, flags);
+    else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags)
 {
     AV *retarray = NULL;
     SV *ret;
-    if (flags & 1) 
+    if (flags & RXapif_ALL)
         retarray=newAV();
 
     if (rx && rx->paren_names) {
@@ -4710,62 +4897,183 @@ Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
             SV* sv_dat=HeVAL(he_str);
             I32 *nums=(I32*)SvPVX(sv_dat);
             for ( i=0; i<SvIVX(sv_dat); i++ ) {
-               if ((I32)(rx->lastparen) >= nums[i] &&
-                   rx->endp[nums[i]] != -1)
+                if ((I32)(rx->nparens) >= nums[i]
+                    && rx->offs[nums[i]].start != -1
+                    && rx->offs[nums[i]].end != -1)
                 {
-                    ret = CALLREG_NUMBUF(rx,nums[i],NULL);
+                    ret = newSVpvs("");
+                    CALLREG_NUMBUF_FETCH(rx,nums[i],ret);
                     if (!retarray)
                         return ret;
                 } else {
                     ret = newSVsv(&PL_sv_undef);
                 }
                 if (retarray) {
-                    SvREFCNT_inc(ret);
+                    SvREFCNT_inc_simple_void(ret);
                     av_push(retarray, ret);
                 }
             }
             if (retarray)
-                return (SV*)retarray;
+                return newRV((SV*)retarray);
         }
     }
     return NULL;
 }
 
+bool
+Perl_reg_named_buff_exists(pTHX_ REGEXP * const rx, SV * const key,
+                           const U32 flags)
+{
+    if (rx && rx->paren_names) {
+        if (flags & RXapif_ALL) {
+            return hv_exists_ent(rx->paren_names, key, 0);
+        } else {
+           SV *sv = CALLREG_NAMED_BUFF_FETCH(rx, key, flags);
+            if (sv) {
+               SvREFCNT_dec(sv);
+                return TRUE;
+            } else {
+                return FALSE;
+            }
+        }
+    } else {
+        return FALSE;
+    }
+}
+
 SV*
-Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
+Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    if ( rx && rx->paren_names ) {
+       (void)hv_iterinit(rx->paren_names);
+
+       return CALLREG_NAMED_BUFF_NEXTKEY(rx, NULL, flags & ~RXapif_FIRSTKEY);
+    } else {
+       return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    if (rx && rx->paren_names) {
+        HV *hv = rx->paren_names;
+        HE *temphe;
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+                STRLEN len;
+                char *pv = HePV(temphe, len);
+                return newSVpvn(pv,len);
+            }
+        }
+    }
+    return NULL;
+}
+
+SV*
+Perl_reg_named_buff_scalar(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    SV *ret;
+    AV *av;
+    I32 length;
+
+    if (rx && rx->paren_names) {
+        if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
+            return newSViv(HvTOTALKEYS(rx->paren_names));
+        } else if (flags & RXapif_ONE) {
+            ret = CALLREG_NAMED_BUFF_ALL(rx, (flags | RXapif_REGNAMES));
+            av = (AV*)SvRV(ret);
+            length = av_len(av);
+            return newSViv(length + 1);
+        } else {
+            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags);
+            return NULL;
+        }
+    }
+    return &PL_sv_undef;
+}
+
+SV*
+Perl_reg_named_buff_all(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    AV *av = newAV();
+
+    if (rx && rx->paren_names) {
+        HV *hv= rx->paren_names;
+        HE *temphe;
+        (void)hv_iterinit(hv);
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+                STRLEN len;
+                char *pv = HePV(temphe, len);
+                av_push(av, newSVpvn(pv,len));
+            }
+        }
+    }
+
+    return newRV((SV*)av);
+}
+
+void
+Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv)
 {
     char *s = NULL;
     I32 i = 0;
     I32 s1, t1;
-    SV *sv = usesv ? usesv : newSVpvs("");
         
     if (!rx->subbeg) {
         sv_setsv(sv,&PL_sv_undef);
-        return sv;
+        return;
     } 
     else               
-    if (paren == -2 && rx->startp[0] != -1) {
+    if (paren == RX_BUFF_IDX_PREMATCH && rx->offs[0].start != -1) {
         /* $` */
-       i = rx->startp[0];
+       i = rx->offs[0].start;
        s = rx->subbeg;
     }
     else 
-    if (paren == -1 && rx->endp[0] != -1) {
+    if (paren == RX_BUFF_IDX_POSTMATCH && rx->offs[0].end != -1) {
         /* $' */
-       s = rx->subbeg + rx->endp[0];
-       i = rx->sublen - rx->endp[0];
+       s = rx->subbeg + rx->offs[0].end;
+       i = rx->sublen - rx->offs[0].end;
     } 
     else
     if ( 0 <= paren && paren <= (I32)rx->nparens &&
-        (s1 = rx->startp[paren]) != -1 &&
-        (t1 = rx->endp[paren]) != -1)
+        (s1 = rx->offs[paren].start) != -1 &&
+        (t1 = rx->offs[paren].end) != -1)
     {
         /* $& $1 ... */
         i = t1 - s1;
         s = rx->subbeg + s1;
     } else {
         sv_setsv(sv,&PL_sv_undef);
-        return sv;
+        return;
     }          
     assert(rx->sublen >= (s - rx->subbeg) + i );
     if (i >= 0) {
@@ -4803,10 +5111,86 @@ Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
         }
     } else {
         sv_setsv(sv,&PL_sv_undef);
+        return;
     }
-    return sv;
 }
 
+void
+Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+                                                        SV const * const value)
+{
+    PERL_UNUSED_ARG(rx);
+    PERL_UNUSED_ARG(paren);
+    PERL_UNUSED_ARG(value);
+
+    if (!PL_localizing)
+        Perl_croak(aTHX_ PL_no_modify);
+}
+
+I32
+Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv,
+                              const I32 paren)
+{
+    I32 i;
+    I32 s1, t1;
+
+    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
+       switch (paren) {
+      /* $` / ${^PREMATCH} */
+      case RX_BUFF_IDX_PREMATCH:
+        if (rx->offs[0].start != -1) {
+                       i = rx->offs[0].start;
+                       if (i > 0) {
+                               s1 = 0;
+                               t1 = i;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $' / ${^POSTMATCH} */
+      case RX_BUFF_IDX_POSTMATCH:
+           if (rx->offs[0].end != -1) {
+                       i = rx->sublen - rx->offs[0].end;
+                       if (i > 0) {
+                               s1 = rx->offs[0].end;
+                               t1 = rx->sublen;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $& / ${^MATCH}, $1, $2, ... */
+      default:
+           if (paren <= (I32)rx->nparens &&
+            (s1 = rx->offs[paren].start) != -1 &&
+            (t1 = rx->offs[paren].end) != -1)
+           {
+            i = t1 - s1;
+            goto getlen;
+        } else {
+            if (ckWARN(WARN_UNINITIALIZED))
+                report_uninit((SV*)sv);
+            return 0;
+        }
+    }
+  getlen:
+    if (i > 0 && RX_MATCH_UTF8(rx)) {
+        const char * const s = rx->subbeg + s1;
+        const U8 *ep;
+        STRLEN el;
+
+        i = t1 - s1;
+        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
+                       i = el;
+    }
+    return i;
+}
+
+SV*
+Perl_reg_qr_package(pTHX_ REGEXP * const rx)
+{
+       PERL_UNUSED_ARG(rx);
+       return newSVpvs("Regexp");
+}
 
 /* Scans the name of a named buffer from the pattern.
  * If flags is REG_RSN_RETURN_NULL returns null.
@@ -4884,7 +5268,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
         PerlIO_printf(Perl_debug_log,"%16s","");                \
                                                                 \
     if (SIZE_ONLY)                                              \
-       num=RExC_size;                                           \
+       num = RExC_size + 1;                                     \
     else                                                        \
        num=REG_NODE_NUM(RExC_emit);                             \
     if (RExC_lastnum!=num)                                      \
@@ -4925,10 +5309,6 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
 #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
 #endif
 
-/* this idea is borrowed from STR_WITH_LEN in handy.h */
-#define CHECK_WORD(s,v,l)  \
-    (((sizeof(s)-1)==(l)) && (strnEQ(start_verb, (s ""), (sizeof(s)-1))))
-
 STATIC regnode *
 S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
@@ -4940,7 +5320,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     register regnode *ender = NULL;
     register I32 parno = 0;
     I32 flags;
-    const I32 oregflags = RExC_flags;
+    U32 oregflags = RExC_flags;
     bool have_branch = 0;
     bool is_open = 0;
     I32 freeze_paren = 0;
@@ -4961,7 +5341,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_PARSE("reg ");
 
-
     *flagp = 0;                                /* Tentatively. */
 
 
@@ -4998,39 +5377,39 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            
            switch ( *start_verb ) {
             case 'A':  /* (*ACCEPT) */
-                if ( CHECK_WORD("ACCEPT",start_verb,verb_len) ) {
+                if ( memEQs(start_verb,verb_len,"ACCEPT") ) {
                    op = ACCEPT;
                    internal_argval = RExC_nestroot;
                }
                break;
             case 'C':  /* (*COMMIT) */
-                if ( CHECK_WORD("COMMIT",start_verb,verb_len) )
+                if ( memEQs(start_verb,verb_len,"COMMIT") )
                     op = COMMIT;
                 break;
             case 'F':  /* (*FAIL) */
-                if ( verb_len==1 || CHECK_WORD("FAIL",start_verb,verb_len) ) {
+                if ( verb_len==1 || memEQs(start_verb,verb_len,"FAIL") ) {
                    op = OPFAIL;
                    argok = 0;
                }
                break;
             case ':':  /* (*:NAME) */
            case 'M':  /* (*MARK:NAME) */
-               if ( verb_len==0 || CHECK_WORD("MARK",start_verb,verb_len) ) {
+               if ( verb_len==0 || memEQs(start_verb,verb_len,"MARK") ) {
                     op = MARKPOINT;
                     argok = -1;
                 }
                 break;
             case 'P':  /* (*PRUNE) */
-                if ( CHECK_WORD("PRUNE",start_verb,verb_len) )
+                if ( memEQs(start_verb,verb_len,"PRUNE") )
                     op = PRUNE;
                 break;
             case 'S':   /* (*SKIP) */  
-                if ( CHECK_WORD("SKIP",start_verb,verb_len) ) 
+                if ( memEQs(start_verb,verb_len,"SKIP") ) 
                     op = SKIP;
                 break;
             case 'T':  /* (*THEN) */
                 /* [19:06] <TimToady> :: is then */
-                if ( CHECK_WORD("THEN",start_verb,verb_len) ) {
+                if ( memEQs(start_verb,verb_len,"THEN") ) {
                     op = CUTGROUP;
                     RExC_seen |= REG_SEEN_CUTGROUP;
                 }
@@ -5101,7 +5480,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     if (!SIZE_ONLY) {
                         num = add_data( pRExC_state, 1, "S" );
                         RExC_rxi->data->data[num]=(void*)sv_dat;
-                        SvREFCNT_inc(sv_dat);
+                        SvREFCNT_inc_simple_void(sv_dat);
                     }
                     RExC_sawback = 1;
                     ret = reganode(pRExC_state,
@@ -5324,10 +5703,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             } /* named and numeric backreferences */
             /* NOT REACHED */
 
-           case 'p':           /* (?p...) */
-               if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
-                   vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
-               /* FALL THROUGH*/
            case '?':           /* (??...) */
                is_logical = 1;
                if (*RExC_parse != '{') {
@@ -5440,7 +5815,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    if (!SIZE_ONLY) {
                         num = add_data( pRExC_state, 1, "S" );
                         RExC_rxi->data->data[num]=(void*)sv_dat;
-                        SvREFCNT_inc(sv_dat);
+                        SvREFCNT_inc_simple_void(sv_dat);
                     }
                     ret = reganode(pRExC_state,NGROUPP,num);
                     goto insert_if_check_paren;
@@ -5518,6 +5893,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    }
                    else
                         REGTAIL(pRExC_state, ret, ender);
+                    RExC_size++; /* XXX WHY do we need this?!!
+                                    For large programs it seems to be required
+                                    but I can't figure out why. -- dmq*/
                    return ret;
                }
                else {
@@ -5541,8 +5919,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                       and must be globally applied -- japhy */
                     switch (*RExC_parse) {
                    CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
-                    case 'o':
-                    case 'g':
+                    case ONCE_PAT_MOD: /* 'o' */
+                    case GLOBAL_PAT_MOD: /* 'g' */
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
                            const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
                            if (! (wastedflags & wflagbit) ) {
@@ -5559,7 +5937,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        }
                        break;
                        
-                   case 'c':
+                   case CONTINUE_PAT_MOD: /* 'c' */
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
                            if (! (wastedflags & WASTED_C) ) {
                                wastedflags |= WASTED_GC;
@@ -5572,10 +5950,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                            }
                        }
                        break;
-                   case 'k':
+                   case KEEPCOPY_PAT_MOD: /* 'p' */
                         if (flagsp == &negflags) {
                             if (SIZE_ONLY && ckWARN(WARN_REGEXP))
-                                vWARN(RExC_parse + 1,"Useless use of (?-k)");
+                                vWARN(RExC_parse + 1,"Useless use of (?-p)");
                         } else {
                             *flagsp |= RXf_PMf_KEEPCOPY;
                         }
@@ -5595,6 +5973,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     case ')':
                         RExC_flags |= posflags;
                         RExC_flags &= ~negflags;
+                        if (paren != ':') {
+                            oregflags |= posflags;
+                            oregflags &= ~negflags;
+                        }
                         nextchar(pRExC_state);
                        if (paren != ':') {
                            *flagp = TRYAGAIN;
@@ -5802,6 +6184,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
     I32 flags = 0, c = 0;
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_PARSE("brnc");
+
     if (first)
        ret = NULL;
     else {
@@ -6108,11 +6491,12 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
     /* RExC_parse points at the beginning brace, 
        endbrace points at the last */
     if ( name[0]=='U' && name[1]=='+' ) {
-        /* its a "unicode hex" notation {U+89AB} */
+        /* its a "Unicode hex" notation {U+89AB} */
         I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
             | PERL_SCAN_DISALLOW_PREFIX
             | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
         UV cp;
+       char string;
         len = (STRLEN)(endbrace - name - 2);
         cp = grok_hex(name + 2, &len, &fl, NULL);
         if ( len != (STRLEN)(endbrace - name - 2) ) {
@@ -6124,7 +6508,8 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
             *valuep = cp;
             return NULL;
         }
-        sv_str= Perl_newSVpvf_nocontext("%c",(int)cp);
+       string = (char)cp;
+        sv_str= newSVpvn(&string, 1);
     } else {
         /* fetch the charnames handler for this scope */
         HV * const table = GvHV(PL_hintgv);
@@ -6332,8 +6717,7 @@ S_reg_recode(pTHX_ const char value, SV **encp)
 {
     STRLEN numlen = 1;
     SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
-    const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
-                                        : SvPVX(sv);
+    const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
     const STRLEN newlen = SvCUR(sv);
     UV uv = UNICODE_REPLACEMENT;
 
@@ -6344,8 +6728,7 @@ S_reg_recode(pTHX_ const char value, SV **encp)
 
     if (!newlen || numlen != newlen) {
        uv = UNICODE_REPLACEMENT;
-       if (encp)
-           *encp = NULL;
+       *encp = NULL;
     }
     return uv;
 }
@@ -6386,7 +6769,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
 
 tryagain:
-    switch (*RExC_parse) {
+    switch ((U8)*RExC_parse) {
     case '^':
        RExC_seen_zerolen++;
        nextchar(pRExC_state);
@@ -6470,6 +6853,22 @@ tryagain:
        RExC_parse++;
        vFAIL("Quantifier follows nothing");
        break;
+    case 0xDF:
+    case 0xC3:
+    case 0xCE:
+        if (!LOC && FOLD) {
+            U32 len,cp;
+           len=0; /* silence a spurious compiler warning */
+            if ((cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len))) {
+                *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */
+                RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */
+                ret = reganode(pRExC_state, FOLDCHAR, cp);
+                Set_Node_Length(ret, 1); /* MJD */
+                nextchar(pRExC_state); /* kill whitespace under /x */
+                return ret;
+            }
+        }
+        goto outer_default;
     case '\\':
        /* Special Escapes
 
@@ -6555,15 +6954,25 @@ tryagain:
            ret = reg_node(pRExC_state, NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
+       case 'R':
+           ret = reg_node(pRExC_state, LNBREAK);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
+       case 'h':
+           ret = reg_node(pRExC_state, HORIZWS);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
+       case 'H':
+           ret = reg_node(pRExC_state, NHORIZWS);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
        case 'v':
-           ret = reganode(pRExC_state, PRUNE, 0);
-           ret->flags = 1;
-           *flagp |= SIMPLE;
+           ret = reg_node(pRExC_state, VERTWS);
+           *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'V':
-           ret = reganode(pRExC_state, SKIP, 0);
-           ret->flags = 1;
-           *flagp |= SIMPLE;
+           ret = reg_node(pRExC_state, NVERTWS);
+           *flagp |= HASWIDTH|SIMPLE;
          finish_meta_pat:          
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -6634,7 +7043,7 @@ tryagain:
                 if (!SIZE_ONLY) {
                     num = add_data( pRExC_state, 1, "S" );
                     RExC_rxi->data->data[num]=(void*)sv_dat;
-                    SvREFCNT_inc(sv_dat);
+                    SvREFCNT_inc_simple_void(sv_dat);
                 }
 
                 RExC_sawback = 1;
@@ -6675,6 +7084,8 @@ tryagain:
                        goto parse_named_seq;
                }   }
                num = atoi(RExC_parse);
+               if (isg && num == 0)
+                   vFAIL("Reference to invalid group 0");
                 if (isrel) {
                     num = RExC_npar - num;
                     if (num < 1)
@@ -6730,7 +7141,8 @@ tryagain:
        }
        /* FALL THROUGH */
 
-    default: {
+    default:
+        outer_default:{
            register STRLEN len;
            register UV ender;
            register char *p;
@@ -6755,7 +7167,12 @@ tryagain:
 
                if (RExC_flags & RXf_PMf_EXTENDED)
                    p = regwhite( pRExC_state, p );
-               switch (*p) {
+               switch ((U8)*p) {
+               case 0xDF:
+               case 0xC3:
+               case 0xCE:
+                          if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
+                               goto normal_default;
                case '^':
                case '$':
                case '.':
@@ -6785,11 +7202,13 @@ tryagain:
                    case 'C':             /* Single char !DANGEROUS! */
                    case 'd': case 'D':   /* digit class */
                    case 'g': case 'G':   /* generic-backref, pos assertion */
+                   case 'h': case 'H':   /* HORIZWS */
                    case 'k': case 'K':   /* named backref, keep marker */
                    case 'N':             /* named char sequence */
-                   case 'p': case 'P':   /* unicode property */
+                   case 'p': case 'P':   /* Unicode property */
+                             case 'R':   /* LNBREAK */
                    case 's': case 'S':   /* space class */
-                   case 'v': case 'V':   /* (*PRUNE) and (*SKIP) */
+                   case 'v': case 'V':   /* VERTWS */
                    case 'w': case 'W':   /* word class */
                    case 'X':             /* eXtended Unicode "combining character sequence" */
                    case 'z': case 'Z':   /* End of line/string assertion */
@@ -7212,6 +7631,21 @@ case ANYOF_N##NAME:                                     \
     what = WORD;                                        \
     break
 
+#define _C_C_T_NOLOC_(NAME,TEST,WORD)                   \
+ANYOF_##NAME:                                           \
+       for (value = 0; value < 256; value++)           \
+           if (TEST)                                   \
+               ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '+';                                        \
+    what = WORD;                                        \
+    break;                                              \
+case ANYOF_N##NAME:                                     \
+       for (value = 0; value < 256; value++)           \
+           if (!TEST)                                  \
+               ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '!';                                        \
+    what = WORD;                                        \
+    break
 
 /*
    parse a class specification and produce either an ANYOF node that
@@ -7224,10 +7658,10 @@ STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 {
     dVAR;
-    register UV value = 0;
     register UV nextvalue;
     register IV prevvalue = OOB_UNICODE;
     register IV range = 0;
+    UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
     register regnode *ret;
     STRLEN numlen;
     IV namedclass;
@@ -7330,6 +7764,10 @@ parseit:
            case 'S':   namedclass = ANYOF_NSPACE;      break;
            case 'd':   namedclass = ANYOF_DIGIT;       break;
            case 'D':   namedclass = ANYOF_NDIGIT;      break;
+           case 'v':   namedclass = ANYOF_VERTWS;      break;
+           case 'V':   namedclass = ANYOF_NVERTWS;     break;
+           case 'h':   namedclass = ANYOF_HORIZWS;     break;
+           case 'H':   namedclass = ANYOF_NHORIZWS;    break;
             case 'N':  /* Handle \N{NAME} in class */
                 {
                     /* We only pay attention to the first char of 
@@ -7508,6 +7946,8 @@ parseit:
                case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
                case _C_C_T_(UPPER, isUPPER(value), "Upper");
                case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
+               case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
+               case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
@@ -7629,12 +8069,16 @@ parseit:
                {
                    if (isLOWER(prevvalue)) {
                        for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isLOWER(i))
+                           if (isLOWER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
+                               stored++;
                                ANYOF_BITMAP_SET(ret, i);
+                           }
                    } else {
                        for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isUPPER(i))
+                           if (isUPPER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
+                               stored++;
                                ANYOF_BITMAP_SET(ret, i);
+                           }
                    }
                }
                else
@@ -7746,7 +8190,7 @@ parseit:
         return ret;
     /****** !SIZE_ONLY AFTER HERE *********/
 
-    if( stored == 1 && value < 256
+    if( stored == 1 && (value < 128 || (value < 256 && !UTF))
         && !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
     ) {
         /* optimize single char class to an EXACT node
@@ -7896,11 +8340,9 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
        RExC_size += 1;
        return(ret);
     }
-#ifdef DEBUGGING
-    if (OP(RExC_emit) == 255)
-        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %s: %d ",
-            reg_name[op], OP(RExC_emit));
-#endif  
+    if (RExC_emit >= RExC_emit_bound)
+        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
+
     NODE_ALIGN_FILL(ret);
     ptr = ret;
     FILL_ADVANCE_NODE(ptr, op);
@@ -7908,7 +8350,7 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
     if (RExC_offsets) {         /* MJD */
        MJD_OFFSET_DEBUG(("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n", 
               "reg_node", __LINE__, 
-              reg_name[op],
+              PL_reg_name[op],
               (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] 
                ? "Overwriting end of array!\n" : "OK",
               (UV)(RExC_emit - RExC_emit_start),
@@ -7951,10 +8393,9 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
        */
        return(ret);
     }
-#ifdef DEBUGGING
-    if (OP(RExC_emit) == 255)
-        Perl_croak(aTHX_ "panic: reganode overwriting end of allocated program space");
-#endif 
+    if (RExC_emit >= RExC_emit_bound)
+        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d", op);
+
     NODE_ALIGN_FILL(ret);
     ptr = ret;
     FILL_ADVANCE_NODE_ARG(ptr, op, arg);
@@ -7963,7 +8404,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
        MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n", 
               "reganode",
              __LINE__,
-             reg_name[op],
+             PL_reg_name[op],
               (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] ? 
               "Overwriting end of array!\n" : "OK",
               (UV)(RExC_emit - RExC_emit_start),
@@ -8001,8 +8442,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     const int offset = regarglen[(U8)op];
     const int size = NODE_STEP_REGNODE + offset;
     GET_RE_DEBUG_FLAGS_DECL;
+    PERL_UNUSED_ARG(depth);
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
-    DEBUG_PARSE_FMT("inst"," - %s",reg_name[op]);
+    DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
     if (SIZE_ONLY) {
        RExC_size += size;
        return;
@@ -8013,19 +8455,19 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     dst = RExC_emit;
     if (RExC_open_parens) {
         int paren;
-        DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);
+        /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
         for ( paren=0 ; paren < RExC_npar ; paren++ ) {
             if ( RExC_open_parens[paren] >= opnd ) {
-                DEBUG_PARSE_FMT("open"," - %d",size);
+                /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                 RExC_open_parens[paren] += size;
             } else {
-                DEBUG_PARSE_FMT("open"," - %s","ok");
+                /*DEBUG_PARSE_FMT("open"," - %s","ok");*/
             }
             if ( RExC_close_parens[paren] >= opnd ) {
-                DEBUG_PARSE_FMT("close"," - %d",size);
+                /*DEBUG_PARSE_FMT("close"," - %d",size);*/
                 RExC_close_parens[paren] += size;
             } else {
-                DEBUG_PARSE_FMT("close"," - %s","ok");
+                /*DEBUG_PARSE_FMT("close"," - %s","ok");*/
             }
         }
     }
@@ -8037,7 +8479,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
            MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n",
                   "reg_insert",
                  __LINE__,
-                 reg_name[op],
+                 PL_reg_name[op],
                   (UV)(dst - RExC_emit_start) > RExC_offsets[0] 
                    ? "Overwriting end of array!\n" : "OK",
                   (UV)(src - RExC_emit_start),
@@ -8056,7 +8498,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
        MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n", 
               "reginsert",
              __LINE__,
-             reg_name[op],
+             PL_reg_name[op],
               (UV)(place - RExC_emit_start) > RExC_offsets[0] 
               ? "Overwriting end of array!\n" : "OK",
               (UV)(place - RExC_emit_start),
@@ -8100,7 +8542,7 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 de
             PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
                 SvPV_nolen_const(mysv), REG_NODE_NUM(scan),
                     (temp == NULL ? "->" : ""),
-                    (temp == NULL ? reg_name[OP(val)] : "")
+                    (temp == NULL ? PL_reg_name[OP(val)] : "")
             );
         });
         if (temp == NULL)
@@ -8181,7 +8623,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,
             PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
                 SvPV_nolen_const(mysv),
                 REG_NODE_NUM(scan),
-                reg_name[exact]);
+                PL_reg_name[exact]);
         });
        if (temp == NULL)
            break;
@@ -8233,6 +8675,27 @@ S_regcurly(register const char *s)
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
+#ifdef DEBUGGING
+void 
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags) {
+    int bit;
+    int set=0;
+    for (bit=0; bit<32; bit++) {
+        if (flags & (1<<bit)) {
+            if (!set++ && lead) 
+                PerlIO_printf(Perl_debug_log, "%s",lead);
+            PerlIO_printf(Perl_debug_log, "%s ",PL_reg_extflags_name[bit]);
+        }              
+    }     
+    if (lead)  {
+        if (set) 
+            PerlIO_printf(Perl_debug_log, "\n");
+        else 
+            PerlIO_printf(Perl_debug_log, "%s[none-set]\n",lead);
+    }            
+}   
+#endif
+
 void
 Perl_regdump(pTHX_ const regexp *r)
 {
@@ -8241,6 +8704,7 @@ Perl_regdump(pTHX_ const regexp *r)
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r,ri);
+    GET_RE_DEBUG_FLAGS_DECL;
 
     (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
 
@@ -8314,6 +8778,7 @@ Perl_regdump(pTHX_ const regexp *r)
     if (r->extflags & RXf_EVAL_SEEN)
        PerlIO_printf(Perl_debug_log, "with eval ");
     PerlIO_printf(Perl_debug_log, "\n");
+    DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags));            
 #else
     PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(r);
@@ -8339,24 +8804,22 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
        /* It would be nice to FAIL() here, but this may be called from
           regexec.c, and it would be hard to supply pRExC_state. */
        Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(o), (int)REGNODE_MAX);
-    sv_catpv(sv, reg_name[OP(o)]); /* Take off const! */
+    sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */
 
     k = PL_regkind[OP(o)];
 
     if (k == EXACT) {
-       SV * const dsv = sv_2mortal(newSVpvs(""));
+       sv_catpvs(sv, " ");
        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT) 
         * is a crude hack but it may be the best for now since 
         * we have no flag "this EXACTish node was UTF-8" 
         * --jhi */
-       const char * const s = 
-           pv_pretty(dsv, STRING(o), STR_LEN(o), 60, 
-               PL_colors[0], PL_colors[1],
-               PERL_PV_ESCAPE_UNI_DETECT |
-               PERL_PV_PRETTY_ELIPSES    |
-               PERL_PV_PRETTY_LTGT    
-            ); 
-       Perl_sv_catpvf(aTHX_ sv, " %s", s );
+       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+                 PERL_PV_ESCAPE_UNI_DETECT |
+                 PERL_PV_PRETTY_ELLIPSES   |
+                 PERL_PV_PRETTY_LTGT       |
+                 PERL_PV_PRETTY_NOCLEAR
+                 );
     } else if (k == TRIE) {
        /* print the details of the trie in dumpuntil instead, as
         * progi->data isn't available here */
@@ -8368,7 +8831,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         const reg_trie_data * const trie
            = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
         
-        Perl_sv_catpvf(aTHX_ sv, "-%s",reg_name[o->flags]);
+        Perl_sv_catpvf(aTHX_ sv, "-%s",PL_reg_name[o->flags]);
         DEBUG_TRIE_COMPILE_r(
             Perl_sv_catpvf(aTHX_ sv,
                 "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
@@ -8385,7 +8848,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             int i;
             int rangestart = -1;
             U8* bitmap = IS_ANYOF_TRIE(op) ? (U8*)ANYOF_BITMAP(o) : (U8*)TRIE_BITMAP(trie);
-            Perl_sv_catpvf(aTHX_ sv, "[");
+            sv_catpvs(sv, "[");
             for (i = 0; i <= 256; i++) {
                 if (i < 256 && BITMAP_TEST(bitmap,i)) {
                     if (rangestart == -1)
@@ -8402,7 +8865,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                     rangestart = -1;
                 }
             }
-            Perl_sv_catpvf(aTHX_ sv, "]");
+            sv_catpvs(sv, "]");
         } 
         
     } else if (k == CURLY) {
@@ -8444,6 +8907,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                 SVfARG((SV*)progi->data->data[ ARG( o ) ]));
     } else if (k == LOGICAL)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
+    else if (k == FOLDCHAR)
+       Perl_sv_catpvf(aTHX_ sv, "[0x%"UVXf"]", PTR2UV(ARG(o)) );
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const U8 flags = ANYOF_FLAGS(o);
@@ -8594,7 +9059,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 }
 
 SV *
-Perl_re_intuit_string(pTHX_ regexp *prog)
+Perl_re_intuit_string(pTHX_ REGEXP * const prog)
 {                              /* Assume that RE_INTUIT is set */
     dVAR;
     GET_RE_DEBUG_FLAGS_DECL;
@@ -8640,30 +9105,82 @@ Perl_pregfree(pTHX_ struct regexp *r)
 
     if (!r || (--r->refcnt > 0))
        return;
-       
-    CALLREGFREE_PVT(r); /* free the private data */
+    if (r->mother_re) {
+        ReREFCNT_dec(r->mother_re);
+    } else {
+        CALLREGFREE_PVT(r); /* free the private data */
+        if (r->paren_names)
+            SvREFCNT_dec(r->paren_names);
+        Safefree(r->wrapped);
+    }        
+    if (r->substrs) {
+        if (r->anchored_substr)
+            SvREFCNT_dec(r->anchored_substr);
+        if (r->anchored_utf8)
+            SvREFCNT_dec(r->anchored_utf8);
+        if (r->float_substr)
+            SvREFCNT_dec(r->float_substr);
+        if (r->float_utf8)
+            SvREFCNT_dec(r->float_utf8);
+       Safefree(r->substrs);
+    }
     RX_MATCH_COPY_FREE(r);
 #ifdef PERL_OLD_COPY_ON_WRITE
     if (r->saved_copy)
-       SvREFCNT_dec(r->saved_copy);
+        SvREFCNT_dec(r->saved_copy);
 #endif
+    Safefree(r->swap);
+    Safefree(r->offs);
+    Safefree(r);
+}
+
+/*  reg_temp_copy()
+    
+    This is a hacky workaround to the structural issue of match results
+    being stored in the regexp structure which is in turn stored in
+    PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
+    could be PL_curpm in multiple contexts, and could require multiple
+    result sets being associated with the pattern simultaneously, such
+    as when doing a recursive match with (??{$qr})
+    
+    The solution is to make a lightweight copy of the regexp structure 
+    when a qr// is returned from the code executed by (??{$qr}) this
+    lightweight copy doesnt actually own any of its data except for
+    the starp/end and the actual regexp structure itself. 
+    
+*/    
+    
+    
+regexp *
+Perl_reg_temp_copy (pTHX_ struct regexp *r) {
+    regexp *ret;
+    register const I32 npar = r->nparens+1;
+    (void)ReREFCNT_inc(r);
+    Newx(ret, 1, regexp);
+    StructCopy(r, ret, regexp);
+    Newx(ret->offs, npar, regexp_paren_pair);
+    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    ret->refcnt = 1;
     if (r->substrs) {
-       if (r->anchored_substr)
-           SvREFCNT_dec(r->anchored_substr);
-       if (r->anchored_utf8)
-           SvREFCNT_dec(r->anchored_utf8);
-       if (r->float_substr)
-           SvREFCNT_dec(r->float_substr);
-       if (r->float_utf8)
-           SvREFCNT_dec(r->float_utf8);
-       Safefree(r->substrs);
+        Newx(ret->substrs, 1, struct reg_substr_data);
+       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+
+       SvREFCNT_inc_void(ret->anchored_substr);
+       SvREFCNT_inc_void(ret->anchored_utf8);
+       SvREFCNT_inc_void(ret->float_substr);
+       SvREFCNT_inc_void(ret->float_utf8);
+
+       /* check_substr and check_utf8, if non-NULL, point to either their
+          anchored or float namesakes, and don't hold a second reference.  */
     }
-    if (r->paren_names)
-        SvREFCNT_dec(r->paren_names);
-    Safefree(r->wrapped);
-    Safefree(r->startp);
-    Safefree(r->endp);
-    Safefree(r);
+    RX_MATCH_COPIED_off(ret);
+#ifdef PERL_OLD_COPY_ON_WRITE
+    ret->saved_copy = NULL;
+#endif
+    ret->mother_re = r; 
+    ret->swap = NULL;
+    
+    return ret;
 }
 #endif
 
@@ -8680,7 +9197,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
  */
  
 void
-Perl_regfree_internal(pTHX_ struct regexp *r)
+Perl_regfree_internal(pTHX_ REGEXP * const r)
 {
     dVAR;
     RXi_GET_DECL(r,ri);
@@ -8789,11 +9306,7 @@ Perl_regfree_internal(pTHX_ struct regexp *r)
        Safefree(ri->data->what);
        Safefree(ri->data);
     }
-    if (ri->swap) {
-        Safefree(ri->swap->startp);
-        Safefree(ri->swap->endp);
-        Safefree(ri->swap);
-    }
+
     Safefree(ri);
 }
 
@@ -8803,12 +9316,11 @@ Perl_regfree_internal(pTHX_ struct regexp *r)
 #define SAVEPVN(p,n)   ((p) ? savepvn(p,n) : NULL)
 
 /* 
-   regdupe - duplicate a regexp. 
-   
-   This routine is called by sv.c's re_dup and is expected to clone a 
-   given regexp structure. It is a no-op when not under USE_ITHREADS. 
-   (Originally this *was* re_dup() for change history see sv.c)
+   re_dup - duplicate a regexp. 
    
+   This routine is expected to clone a given regexp structure. It is not
+   compiler under USE_ITHREADS.
+
    After all of the core data stored in struct regexp is duplicated
    the regexp_engine.dupe method is used to copy any private data
    stored in the *pprivate pointer. This allows extensions to handle
@@ -8823,8 +9335,7 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 {
     dVAR;
     regexp *ret;
-    int i, npar;
-    struct reg_substr_datum *s;
+    I32 npar;
 
     if (!r)
        return (REGEXP *)NULL;
@@ -8834,55 +9345,63 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 
     
     npar = r->nparens+1;
-    Newxz(ret, 1, regexp);
-    Newx(ret->startp, npar, I32);
-    Copy(r->startp, ret->startp, npar, I32);
-    Newx(ret->endp, npar, I32);
-    Copy(r->endp, ret->endp, npar, I32);
+    Newx(ret, 1, regexp);
+    StructCopy(r, ret, regexp);
+    Newx(ret->offs, npar, regexp_paren_pair);
+    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    if(ret->swap) {
+        /* no need to copy these */
+        Newx(ret->swap, npar, regexp_paren_pair);
+    }
 
-    if (r->substrs) {
+    if (ret->substrs) {
+       /* Do it this way to avoid reading from *r after the StructCopy().
+          That way, if any of the sv_dup_inc()s dislodge *r from the L1
+          cache, it doesn't matter.  */
+       const bool anchored = r->check_substr == r->anchored_substr;
         Newx(ret->substrs, 1, struct reg_substr_data);
-        for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
-            s->min_offset = r->substrs->data[i].min_offset;
-            s->max_offset = r->substrs->data[i].max_offset;
-            s->end_shift  = r->substrs->data[i].end_shift;
-            s->substr     = sv_dup_inc(r->substrs->data[i].substr, param);
-            s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
-        }
-    } else 
-        ret->substrs = NULL;    
-
-    ret->wrapped        = SAVEPVN(r->wrapped, r->wraplen);
-    ret->precomp        = ret->wrapped + (r->precomp - r->wrapped);
-    ret->prelen         = r->prelen;
-    ret->wraplen        = r->wraplen;
-
-    ret->refcnt         = r->refcnt;
-    ret->minlen         = r->minlen;
-    ret->minlenret      = r->minlenret;
-    ret->nparens        = r->nparens;
-    ret->lastparen      = r->lastparen;
-    ret->lastcloseparen = r->lastcloseparen;
-    ret->intflags       = r->intflags;
-    ret->extflags       = r->extflags;
-
-    ret->sublen         = r->sublen;
-
-    ret->engine         = r->engine;
-    
-    ret->paren_names    = hv_dup_inc(r->paren_names, param);
+       StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+
+       ret->anchored_substr = sv_dup_inc(ret->anchored_substr, param);
+       ret->anchored_utf8 = sv_dup_inc(ret->anchored_utf8, param);
+       ret->float_substr = sv_dup_inc(ret->float_substr, param);
+       ret->float_utf8 = sv_dup_inc(ret->float_utf8, param);
+
+       /* check_substr and check_utf8, if non-NULL, point to either their
+          anchored or float namesakes, and don't hold a second reference.  */
+
+       if (ret->check_substr) {
+           if (anchored) {
+               assert(r->check_utf8 == r->anchored_utf8);
+               ret->check_substr = ret->anchored_substr;
+               ret->check_utf8 = ret->anchored_utf8;
+           } else {
+               assert(r->check_substr == r->float_substr);
+               assert(r->check_utf8 == r->float_utf8);
+               ret->check_substr = ret->float_substr;
+               ret->check_utf8 = ret->float_utf8;
+           }
+       }
+    }
+
+    ret->wrapped        = SAVEPVN(ret->wrapped, ret->wraplen+1);
+    ret->precomp        = ret->wrapped + (ret->precomp - ret->wrapped);
+    ret->paren_names    = hv_dup_inc(ret->paren_names, param);
+
+    if (ret->pprivate)
+       RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
 
     if (RX_MATCH_COPIED(ret))
-       ret->subbeg  = SAVEPVN(r->subbeg, r->sublen);
+       ret->subbeg  = SAVEPVN(ret->subbeg, ret->sublen);
     else
        ret->subbeg = NULL;
 #ifdef PERL_OLD_COPY_ON_WRITE
     ret->saved_copy = NULL;
 #endif
-    
-    ret->pprivate = r->pprivate;
-    if (ret->pprivate) 
-        RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
+
+    ret->mother_re      = NULL;
+    ret->gofs = 0;
+    ret->seen_evals = 0;
     
     ptr_table_store(PL_ptr_table, r, ret);
     return ret;
@@ -8904,7 +9423,7 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 */
 
 void *
-Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
+Perl_regdupe_internal(pTHX_ REGEXP * const r, CLONE_PARAMS *param)
 {
     dVAR;
     regexp_internal *reti;
@@ -8917,14 +9436,6 @@ Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
     Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
     Copy(ri->program, reti->program, len+1, regnode);
     
-    if(ri->swap) {
-        Newx(reti->swap, 1, regexp_paren_ofs);
-        /* no need to copy these */
-        Newx(reti->swap->startp, npar, I32);
-        Newx(reti->swap->endp, npar, I32);
-    } else {
-        reti->swap = NULL;
-    }
 
     reti->regstclass = NULL;
 
@@ -9165,12 +9676,24 @@ clear_re(pTHX_ void *r)
 STATIC void
 S_put_byte(pTHX_ SV *sv, int c)
 {
-    if (isCNTRL(c) || c == 255 || !isPRINT(c))
+    /* Our definition of isPRINT() ignores locales, so only bytes that are
+       not part of UTF-8 are considered printable. I assume that the same
+       holds for UTF-EBCDIC.
+       Also, code point 255 is not printable in either (it's E0 in EBCDIC,
+       which Wikipedia says:
+
+       EO, or Eight Ones, is an 8-bit EBCDIC character code represented as all
+       ones (binary 1111 1111, hexadecimal FF). It is similar, but not
+       identical, to the ASCII delete (DEL) or rubout control character.
+       ) So the old condition can be simplified to !isPRINT(c)  */
+    if (!isPRINT(c))
        Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
-    else if (c == '-' || c == ']' || c == '\\' || c == '^')
-       Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
-    else
-       Perl_sv_catpvf(aTHX_ sv, "%c", c);
+    else {
+       const char string = c;
+       if (c == '-' || c == ']' || c == '\\' || c == '^')
+           sv_catpvs(sv, "\\");
+       sv_catpvn(sv, &string, 1);
+    }
 }
 
 
@@ -9273,7 +9796,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                     elem_ptr ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), SvCUR(*elem_ptr), 60,
                            PL_colors[0], PL_colors[1],
                            (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
-                           PERL_PV_PRETTY_ELIPSES    |
+                           PERL_PV_PRETTY_ELLIPSES    |
                            PERL_PV_PRETTY_LTGT
                             )
                             : "???"