This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Convert all accesses of the member paren_names of struct regexp to
[perl5.git] / regcomp.c
index 4729780..96dd3f9 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 typedef struct RExC_state_t {
     U32                flags;                  /* are we folding, multilining? */
     char       *precomp;               /* uncompiled string. */
+    REGEXP     *rx_sv;                 /* The SV that is the regexp. */
     regexp     *rx;                    /* perl core regexp structure */
     regexp_internal    *rxi;           /* internal data for regexp object pprivate field */        
     char       *start;                 /* Start of input for compile */
@@ -149,6 +150,7 @@ typedef struct RExC_state_t {
 
 #define RExC_flags     (pRExC_state->flags)
 #define RExC_precomp   (pRExC_state->precomp)
+#define RExC_rx_sv     (pRExC_state->rx_sv)
 #define RExC_rx                (pRExC_state->rx)
 #define RExC_rxi       (pRExC_state->rxi)
 #define RExC_start     (pRExC_state->start)
@@ -389,7 +391,7 @@ static const scan_data_t zero_scan_data =
     IV len = RExC_end - RExC_precomp;                                  \
                                                                        \
     if (!SIZE_ONLY)                                                    \
-       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                      \
+       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);                   \
     if (len > RegexLengthToShowInErrorMessages) {                      \
        /* chop 10 shorter than the max, to ensure meaning of "..." */  \
        len = RegexLengthToShowInErrorMessages - 10;                    \
@@ -420,7 +422,7 @@ static const scan_data_t zero_scan_data =
  */
 #define        vFAIL(m) STMT_START {                           \
     if (!SIZE_ONLY)                                    \
-       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);      \
+       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);   \
     Simple_vFAIL(m);                                   \
 } STMT_END
 
@@ -438,7 +440,7 @@ static const scan_data_t zero_scan_data =
  */
 #define        vFAIL2(m,a1) STMT_START {                       \
     if (!SIZE_ONLY)                                    \
-       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);      \
+       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);   \
     Simple_vFAIL2(m, a1);                              \
 } STMT_END
 
@@ -457,7 +459,7 @@ static const scan_data_t zero_scan_data =
  */
 #define        vFAIL3(m,a1,a2) STMT_START {                    \
     if (!SIZE_ONLY)                                    \
-       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);      \
+       SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);   \
     Simple_vFAIL3(m, a1, a2);                          \
 } STMT_END
 
@@ -1163,11 +1165,19 @@ is the recommended Unicode-aware way of saying
 
 #define TRIE_STORE_REVCHAR                                                 \
     STMT_START {                                                           \
-       SV *tmp = newSVpvs("");                                            \
-       if (UTF) SvUTF8_on(tmp);                                           \
-       Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc );                       \
-       av_push( revcharmap, tmp );                                        \
-    } STMT_END
+       if (UTF) {                                                         \
+           SV *zlopp = newSV(2);                                          \
+           unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
+           unsigned const char *const kapow = uvuni_to_utf8(flrbbbbb, uvc & 0xFF); \
+           SvCUR_set(zlopp, kapow - flrbbbbb);                            \
+           SvPOK_on(zlopp);                                               \
+           SvUTF8_on(zlopp);                                              \
+           av_push(revcharmap, zlopp);                                    \
+       } else {                                                           \
+           char ooooff = (char)uvc;                                               \
+           av_push(revcharmap, newSVpvn(&ooooff, 1));                     \
+       }                                                                  \
+        } STMT_END
 
 #define TRIE_READ_CHAR STMT_START {                                           \
     wordlen++;                                                                \
@@ -1222,10 +1232,9 @@ is the recommended Unicode-aware way of saying
         /* store the word for dumping */                        \
         SV* tmp;                                                \
         if (OP(noper) != NOTHING)                               \
-            tmp = newSVpvn(STRING(noper), STR_LEN(noper));      \
+            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);   \
         else                                                    \
-            tmp = newSVpvn( "", 0 );                            \
-        if ( UTF ) SvUTF8_on( tmp );                            \
+            tmp = newSVpvn_utf8( "", 0, UTF );                 \
         av_push( trie_words, tmp );                             \
     });                                                         \
                                                                 \
@@ -1356,7 +1365,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
        have unique chars.
 
        We use an array of integers to represent the character codes 0..255
-       (trie->charmap) and we use a an HV* to store unicode characters. We use the
+       (trie->charmap) and we use a an HV* to store Unicode characters. We use the
        native representation of the character value as the key and IV's for the
        coded index.
 
@@ -1405,7 +1414,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     /* store the codepoint in the bitmap, and if its ascii
                        also store its folded equivelent. */
                     TRIE_BITMAP_SET(trie,uvc);
-                    if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
+
+                   /* store the folded codepoint */
+                   if ( folder ) TRIE_BITMAP_SET(trie,folder[ uvc ]);
+
+                   if ( !UTF ) {
+                       /* store first byte of utf8 representation of
+                          codepoints in the 127 < uvc < 256 range */
+                       if (127 < uvc && uvc < 192) {
+                           TRIE_BITMAP_SET(trie,194);
+                       } else if (191 < uvc ) {
+                           TRIE_BITMAP_SET(trie,195);
+                       /* && uvc < 256 -- we know uvc is < 256 already */
+                       }
+                   }
                     set_bit = 0; /* We've done our bit :-) */
                 }
             } else {
@@ -1966,7 +1988,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 }
                 if ( count == 1 ) {
                     SV **tmp = av_fetch( revcharmap, idx, 0);
-                    char *ch = SvPV_nolen( *tmp );
+                    STRLEN len;
+                    char *ch = SvPV( *tmp, len );
                     DEBUG_OPTIMISE_r({
                         SV *sv=sv_newmortal();
                         PerlIO_printf( Perl_debug_log,
@@ -1985,11 +2008,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         str=STRING(convert);
                         STR_LEN(convert)=0;
                     }
-                    while (*ch) {
+                    STR_LEN(convert) += len;
+                    while (len--)
                         *str++ = *ch++;
-                        STR_LEN(convert)++;
-                    }
-                    
                } else {
 #ifdef DEBUGGING           
                    if (state>1)
@@ -2004,24 +2025,35 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 trie->startstate = state;
                 trie->minlen -= (state - 1);
                 trie->maxlen -= (state - 1);
-                DEBUG_r({
-                    regnode *fix = convert;
-                    U32 word = trie->wordcount;
-                    mjd_nodelen++;
-                    Set_Node_Offset_Length(convert, mjd_offset, state - 1);
-                    while( ++fix < n ) {
-                        Set_Node_Offset_Length(fix, 0, 0);
-                    }
-                    while (word--) {
-                        SV ** const tmp = av_fetch( trie_words, word, 0 );
-                        if (tmp) {
-                            if ( STR_LEN(convert) <= SvCUR(*tmp) )
-                                sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
-                            else
-                                sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
-                        }
-                    }    
-                });
+#ifdef DEBUGGING
+               /* At least the UNICOS C compiler choked on this
+                * being argument to DEBUG_r(), so let's just have
+                * it right here. */
+               if (
+#ifdef PERL_EXT_RE_BUILD
+                   1
+#else
+                   DEBUG_r_TEST
+#endif
+                   ) {
+                   regnode *fix = convert;
+                   U32 word = trie->wordcount;
+                   mjd_nodelen++;
+                   Set_Node_Offset_Length(convert, mjd_offset, state - 1);
+                   while( ++fix < n ) {
+                       Set_Node_Offset_Length(fix, 0, 0);
+                   }
+                   while (word--) {
+                       SV ** const tmp = av_fetch( trie_words, word, 0 );
+                       if (tmp) {
+                           if ( STR_LEN(convert) <= SvCUR(*tmp) )
+                               sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
+                           else
+                               sv_chop(*tmp, SvPV_nolen(*tmp) + SvCUR(*tmp));
+                       }
+                   }
+               }
+#endif
                 if (trie->maxlen) {
                     convert = n;
                } else {
@@ -2760,7 +2792,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     last = cur;
                                 }
                             } else {
-                                if ( last ) {
+/* 
+    Currently we assume that the trie can handle unicode and ascii
+    matches fold cased matches. If this proves true then the following
+    define will prevent tries in this situation. 
+    
+    #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
+*/
+#define TRIE_TYPE_IS_SAFE 1
+                                if ( last && TRIE_TYPE_IS_SAFE ) {
                                     make_trie( pRExC_state, 
                                             startbranch, first, cur, tail, count, 
                                             optype, depth+1 );
@@ -2788,7 +2828,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                               "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
 
                         });
-                        if ( last ) {
+                        
+                        if ( last && TRIE_TYPE_IS_SAFE ) {
                             made= make_trie( pRExC_state, startbranch, first, scan, tail, count, optype, depth+1 );
 #ifdef TRIE_STUDY_OPT  
                             if ( ((made == MADE_EXACT_TRIE && 
@@ -3278,9 +3319,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        
                        l -= old;
                        /* Get the added string: */
-                       last_str = newSVpvn(s  + old, l);
-                       if (UTF)
-                           SvUTF8_on(last_str);
+                       last_str = newSVpvn_utf8(s  + old, l, UTF);
                        if (deltanext == 0 && pos_before == b) {
                            /* What was added is a constant string */
                            if (mincount > 1) {
@@ -3298,7 +3337,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                        SvUTF8(sv) && SvMAGICAL(sv) ?
                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
                                    if (mg && mg->mg_len >= 0)
-                                       mg->mg_len += CHR_SVLEN(last_str);
+                                       mg->mg_len += CHR_SVLEN(last_str) - l;
                                }
                                data->last_end += l * (mincount - 1);
                            }
@@ -4112,16 +4151,16 @@ Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
 #endif
 
 REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
+Perl_re_compile(pTHX_ const SV * const pattern, U32 pm_flags)
 {
     dVAR;
-    register REGEXP *r;
+    REGEXP *rx;
+    struct regexp *r;
     register regexp_internal *ri;
     STRLEN plen;
     char*  exp = SvPV((SV*)pattern, plen);
     char* xend = exp + plen;
     regnode *scan;
-    regnode *first;
     I32 flags;
     I32 minlen = 0;
     I32 sawplus = 0;
@@ -4136,7 +4175,7 @@ Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
-    RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
+    RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
 
     DEBUG_COMPILE_r({
         SV *dsv= sv_newmortal();
@@ -4187,7 +4226,7 @@ redo_first_pass:
        return(NULL);
     }
     if (RExC_utf8 && !RExC_orig_utf8) {
-        /* It's possible to write a regexp in ascii that represents unicode
+        /* It's possible to write a regexp in ascii that represents Unicode
         codepoints outside of the byte range, such as via \x{100}. If we
         detect such a sequence we have to convert the entire pattern to utf8
         and then recompile, as our sizing calculation will have been based
@@ -4225,7 +4264,8 @@ redo_first_pass:
     /* Allocate space and zero-initialize. Note, the two step process 
        of zeroing when in debug mode, thus anything assigned has to 
        happen after that */
-    Newxz(r, 1, regexp);
+    rx = newSV_type(SVt_REGEXP);
+    r = (struct regexp*)SvANY(rx);
     Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
         char, regexp_internal);
     if ( r == NULL || ri == NULL )
@@ -4241,25 +4281,26 @@ redo_first_pass:
     /* non-zero initialization begins here */
     RXi_SET( r, ri );
     r->engine= RE_ENGINE_PTR;
-    r->refcnt = 1;
-    r->prelen = plen;
     r->extflags = pm_flags;
     {
-        bool has_k     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+        bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
        bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
        bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
-       U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12);
+       U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD)
+                           >> RXf_PMf_STD_PMMOD_SHIFT);
        const char *fptr = STD_PAT_MODS;        /*"msix"*/
        char *p;
-        r->wraplen = r->prelen + has_minus + has_k + has_runon
+       const STRLEN wraplen = plen + has_minus + has_p + has_runon
             + (sizeof(STD_PAT_MODS) - 1)
             + (sizeof("(?:)") - 1);
 
-        Newx(r->wrapped, r->wraplen + 1, char );
-        p = r->wrapped;
+       p = sv_grow(rx, wraplen + 1);
+       SvCUR_set(rx, wraplen);
+       SvPOK_on(rx);
+       SvFLAGS(rx) |= SvUTF8(pattern);
         *p++='('; *p++='?';
-        if (has_k)
-            *p++ = KEEPCOPY_PAT_MOD; /*'k'*/
+        if (has_p)
+            *p++ = KEEPCOPY_PAT_MOD; /*'p'*/
         {
             char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
             char *colon = r + 1;
@@ -4279,9 +4320,10 @@ redo_first_pass:
         }
 
         *p++ = ':';
-        Copy(RExC_precomp, p, r->prelen, char);
-        r->precomp = p;
-        p += r->prelen;
+        Copy(RExC_precomp, p, plen, char);
+       assert ((RX_WRAPPED(rx) - p) < 16);
+       r->pre_prefix = p - RX_WRAPPED(rx);
+        p += plen;
         if (has_runon)
             *p++ = '\n';
         *p++ = ')';
@@ -4307,6 +4349,7 @@ redo_first_pass:
                           (UV)((2*RExC_size+1) * sizeof(U32))));
 #endif
     SetProgLen(ri,RExC_size);
+    RExC_rx_sv = rx;
     RExC_rx = r;
     RExC_rxi = ri;
 
@@ -4323,9 +4366,10 @@ redo_first_pass:
     /* Store the count of eval-groups for security checks: */
     RExC_rx->seen_evals = RExC_seen_evals;
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
-    if (reg(pRExC_state, 0, &flags,1) == NULL)
+    if (reg(pRExC_state, 0, &flags,1) == NULL) {
+       ReREFCNT_dec(rx);   
        return(NULL);
-
+    }
     /* XXXX To minimize changes to RE engine we always allocate
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
@@ -4363,11 +4407,11 @@ reStudy:
 #endif    
 
     /* Dig out information for optimizations. */
-    r->extflags = pm_flags; /* Again? */
+    r->extflags = RExC_flags; /* was pm_op */
     /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
  
     if (UTF)
-        r->extflags |= RXf_UTF8;       /* Unicode in it? */
+       SvUTF8_on(rx);  /* Unicode in it? */
     ri->regstclass = NULL;
     if (RExC_naughty >= 10)    /* Probably an expensive pattern. */
        r->intflags |= PREGf_NAUGHTY;
@@ -4381,18 +4425,20 @@ reStudy:
        struct regnode_charclass_class ch_class; /* pointed to by data */
        int stclass_flag;
        I32 last_close = 0; /* pointed to by data */
-
-       first = scan;
+        regnode *first= scan;
+        regnode *first_next= regnext(first);
+       
        /* Skip introductions and multiplicators >= 1. */
        while ((OP(first) == OPEN && (sawopen = 1)) ||
               /* An OR of *one* alternative - should not happen now. */
-           (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
+           (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
            /* for now we can't handle lookbehind IFMATCH*/
            (OP(first) == IFMATCH && !first->flags) || 
            (OP(first) == PLUS) ||
            (OP(first) == MINMOD) ||
               /* An {n,m} with n>0 */
-           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ) 
+           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
+           (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
        {
                
                if (OP(first) == PLUS)
@@ -4404,6 +4450,7 @@ reStudy:
                    first += EXTRA_STEP_2ARGS;
                } else  /* XXX possible optimisation for /(?=)/  */
                    first = NEXTOPER(first);
+               first_next= regnext(first);
        }
 
        /* Starting-point info. */
@@ -4746,14 +4793,37 @@ reStudy:
     if (RExC_seen & REG_SEEN_CUTGROUP)
        r->intflags |= PREGf_CUTGROUP_SEEN;
     if (RExC_paren_names)
-        r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
+        RXp_PAREN_NAMES(r) = (HV*)SvREFCNT_inc(RExC_paren_names);
     else
-        r->paren_names = NULL;
-    if (r->prelen == 3 && strnEQ("\\s+", r->precomp, 3)) /* precomp = "\\s+)" */
-       r->extflags |= RXf_WHITE;
-    else if (r->prelen == 1 && r->precomp[0] == '^')
+        RXp_PAREN_NAMES(r) = NULL;
+
+#ifdef STUPID_PATTERN_CHECKS            
+    if (RX_PRELEN(rx) == 0)
+        r->extflags |= RXf_NULL;
+    if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
+        /* XXX: this should happen BEFORE we compile */
+        r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
+    else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3))
+        r->extflags |= RXf_WHITE;
+    else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^')
         r->extflags |= RXf_START_ONLY;
-
+#else
+    if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
+            /* XXX: this should happen BEFORE we compile */
+            r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
+    else {
+        regnode *first = ri->program + 1;
+        U8 fop = OP(first);
+        U8 nop = OP(NEXTOPER(first));
+        
+        if (PL_regkind[fop] == NOTHING && nop == END)
+            r->extflags |= RXf_NULL;
+        else if (PL_regkind[fop] == BOL && nop == END)
+            r->extflags |= RXf_START_ONLY;
+        else if (fop == PLUS && nop ==SPACE && OP(regnext(first))==END)
+            r->extflags |= RXf_WHITE;    
+    }
+#endif
 #ifdef DEBUGGING
     if (RExC_paren_names) {
         ri->name_list_idx = add_data( pRExC_state, 1, "p" );
@@ -4789,33 +4859,76 @@ reStudy:
         PerlIO_printf(Perl_debug_log, "\n");
     });
 #endif
-    return(r);
+    return rx;
 }
 
 #undef RE_ENGINE_PTR
 
 
 SV*
-Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags)
+Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
+                    const U32 flags)
+{
+    PERL_UNUSED_ARG(value);
+
+    if (flags & RXapif_FETCH) {
+        return reg_named_buff_fetch(rx, key, flags);
+    } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
+        Perl_croak(aTHX_ PL_no_modify);
+        return NULL;
+    } else if (flags & RXapif_EXISTS) {
+        return reg_named_buff_exists(rx, key, flags)
+            ? &PL_sv_yes
+            : &PL_sv_no;
+    } else if (flags & RXapif_REGNAMES) {
+        return reg_named_buff_all(rx, flags);
+    } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
+        return reg_named_buff_scalar(rx, flags);
+    } else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
+                         const U32 flags)
+{
+    PERL_UNUSED_ARG(lastkey);
+
+    if (flags & RXapif_FIRSTKEY)
+        return reg_named_buff_firstkey(rx, flags);
+    else if (flags & RXapif_NEXTKEY)
+        return reg_named_buff_nextkey(rx, flags);
+    else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
+                         const U32 flags)
 {
     AV *retarray = NULL;
     SV *ret;
-    if (flags & 1) 
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    if (flags & RXapif_ALL)
         retarray=newAV();
 
-    if (rx && rx->paren_names) {
-        HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
         if (he_str) {
             IV i;
             SV* sv_dat=HeVAL(he_str);
             I32 *nums=(I32*)SvPVX(sv_dat);
             for ( i=0; i<SvIVX(sv_dat); i++ ) {
-               if ((I32)(rx->nparens) >= nums[i]
-                       && rx->offs[nums[i]].start != -1
-                       && rx->offs[nums[i]].end != -1)
+                if ((I32)(rx->nparens) >= nums[i]
+                    && rx->offs[nums[i]].start != -1
+                    && rx->offs[nums[i]].end != -1)
                 {
                     ret = newSVpvs("");
-                    CALLREG_NUMBUF(rx,nums[i],ret);
+                    CALLREG_NUMBUF_FETCH(r,nums[i],ret);
                     if (!retarray)
                         return ret;
                 } else {
@@ -4827,15 +4940,138 @@ Perl_reg_named_buff_get(pTHX_ REGEXP * const rx, SV * const namesv, const U32 fl
                 }
             }
             if (retarray)
-                return (SV*)retarray;
+                return newRV((SV*)retarray);
+        }
+    }
+    return NULL;
+}
+
+bool
+Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
+                           const U32 flags)
+{
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        if (flags & RXapif_ALL) {
+            return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
+        } else {
+           SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
+            if (sv) {
+               SvREFCNT_dec(sv);
+                return TRUE;
+            } else {
+                return FALSE;
+            }
+        }
+    } else {
+        return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    if ( rx && RXp_PAREN_NAMES(rx) ) {
+       (void)hv_iterinit(RXp_PAREN_NAMES(rx));
+
+       return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
+    } else {
+       return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv = RXp_PAREN_NAMES(rx);
+        HE *temphe;
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+               return newSVhek(HeKEY_hek(temphe));
+            }
         }
     }
     return NULL;
 }
 
+SV*
+Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
+{
+    SV *ret;
+    AV *av;
+    I32 length;
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
+            return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
+        } else if (flags & RXapif_ONE) {
+            ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
+            av = (AV*)SvRV(ret);
+            length = av_len(av);
+            return newSViv(length + 1);
+        } else {
+            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags);
+            return NULL;
+        }
+    }
+    return &PL_sv_undef;
+}
+
+SV*
+Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
+{
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    AV *av = newAV();
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv= RXp_PAREN_NAMES(rx);
+        HE *temphe;
+        (void)hv_iterinit(hv);
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXapif_ALL) {
+                av_push(av, newSVhek(HeKEY_hek(temphe)));
+            }
+        }
+    }
+
+    return newRV((SV*)av);
+}
+
 void
-Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv)
+Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
+                            SV * const sv)
 {
+    struct regexp *const rx = (struct regexp *)SvANY(r);
     char *s = NULL;
     I32 i = 0;
     I32 s1, t1;
@@ -4845,13 +5081,13 @@ Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const
         return;
     } 
     else               
-    if (paren == -2 && rx->offs[0].start != -1) {
+    if (paren == RX_BUFF_IDX_PREMATCH && rx->offs[0].start != -1) {
         /* $` */
        i = rx->offs[0].start;
        s = rx->subbeg;
     }
     else 
-    if (paren == -1 && rx->offs[0].end != -1) {
+    if (paren == RX_BUFF_IDX_POSTMATCH && rx->offs[0].end != -1) {
         /* $' */
        s = rx->subbeg + rx->offs[0].end;
        i = rx->sublen - rx->offs[0].end;
@@ -4875,16 +5111,16 @@ Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const
         sv_setpvn(sv, s, i);
         PL_tainted = oldtainted;
         if ( (rx->extflags & RXf_CANY_SEEN)
-            ? (RX_MATCH_UTF8(rx)
+            ? (RXp_MATCH_UTF8(rx)
                         && (!i || is_utf8_string((U8*)s, i)))
-            : (RX_MATCH_UTF8(rx)) )
+            : (RXp_MATCH_UTF8(rx)) )
         {
             SvUTF8_on(sv);
         }
         else
             SvUTF8_off(sv);
         if (PL_tainting) {
-            if (RX_MATCH_TAINTED(rx)) {
+            if (RXp_MATCH_TAINTED(rx)) {
                 if (SvTYPE(sv) >= SVt_PVMG) {
                     MAGIC* const mg = SvMAGIC(sv);
                     MAGIC* mgt;
@@ -4908,11 +5144,82 @@ Perl_reg_numbered_buff_get(pTHX_ REGEXP * const rx, const I32 paren, SV * const
     }
 }
 
+void
+Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+                                                        SV const * const value)
+{
+    PERL_UNUSED_ARG(rx);
+    PERL_UNUSED_ARG(paren);
+    PERL_UNUSED_ARG(value);
+
+    if (!PL_localizing)
+        Perl_croak(aTHX_ PL_no_modify);
+}
+
+I32
+Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
+                              const I32 paren)
+{
+    struct regexp *const rx = (struct regexp *)SvANY(r);
+    I32 i;
+    I32 s1, t1;
+
+    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
+       switch (paren) {
+      /* $` / ${^PREMATCH} */
+      case RX_BUFF_IDX_PREMATCH:
+        if (rx->offs[0].start != -1) {
+                       i = rx->offs[0].start;
+                       if (i > 0) {
+                               s1 = 0;
+                               t1 = i;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $' / ${^POSTMATCH} */
+      case RX_BUFF_IDX_POSTMATCH:
+           if (rx->offs[0].end != -1) {
+                       i = rx->sublen - rx->offs[0].end;
+                       if (i > 0) {
+                               s1 = rx->offs[0].end;
+                               t1 = rx->sublen;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $& / ${^MATCH}, $1, $2, ... */
+      default:
+           if (paren <= (I32)rx->nparens &&
+            (s1 = rx->offs[paren].start) != -1 &&
+            (t1 = rx->offs[paren].end) != -1)
+           {
+            i = t1 - s1;
+            goto getlen;
+        } else {
+            if (ckWARN(WARN_UNINITIALIZED))
+                report_uninit((SV*)sv);
+            return 0;
+        }
+    }
+  getlen:
+    if (i > 0 && RXp_MATCH_UTF8(rx)) {
+        const char * const s = rx->subbeg + s1;
+        const U8 *ep;
+        STRLEN el;
+
+        i = t1 - s1;
+        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
+                       i = el;
+    }
+    return i;
+}
+
 SV*
 Perl_reg_qr_package(pTHX_ REGEXP * const rx)
 {
        PERL_UNUSED_ARG(rx);
-       return newSVpvs("Regexp");
+       return NULL;
 }
 
 /* Scans the name of a named buffer from the pattern.
@@ -4944,10 +5251,9 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
     }
 
     if ( flags ) {
-        SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
-            (int)(RExC_parse - name_start)));
-       if (UTF)
-            SvUTF8_on(sv_name);
+        SV* sv_name
+           = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
+                            SVs_TEMP | (UTF ? SVf_UTF8 : 0));
         if ( flags == REG_RSN_RETURN_NAME)
             return sv_name;
         else if (flags==REG_RSN_RETURN_DATA) {
@@ -5043,7 +5349,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     register regnode *ender = NULL;
     register I32 parno = 0;
     I32 flags;
-    const I32 oregflags = RExC_flags;
+    U32 oregflags = RExC_flags;
     bool have_branch = 0;
     bool is_open = 0;
     I32 freeze_paren = 0;
@@ -5642,8 +5948,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                       and must be globally applied -- japhy */
                     switch (*RExC_parse) {
                    CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
-                    case 'o':
-                    case 'g':
+                    case ONCE_PAT_MOD: /* 'o' */
+                    case GLOBAL_PAT_MOD: /* 'g' */
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
                            const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
                            if (! (wastedflags & wflagbit) ) {
@@ -5660,7 +5966,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        }
                        break;
                        
-                   case 'c':
+                   case CONTINUE_PAT_MOD: /* 'c' */
                        if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
                            if (! (wastedflags & WASTED_C) ) {
                                wastedflags |= WASTED_GC;
@@ -5673,10 +5979,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                            }
                        }
                        break;
-                   case 'k':
+                   case KEEPCOPY_PAT_MOD: /* 'p' */
                         if (flagsp == &negflags) {
                             if (SIZE_ONLY && ckWARN(WARN_REGEXP))
-                                vWARN(RExC_parse + 1,"Useless use of (?-k)");
+                                vWARN(RExC_parse + 1,"Useless use of (?-p)");
                         } else {
                             *flagsp |= RXf_PMf_KEEPCOPY;
                         }
@@ -5696,6 +6002,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     case ')':
                         RExC_flags |= posflags;
                         RExC_flags &= ~negflags;
+                        if (paren != ':') {
+                            oregflags |= posflags;
+                            oregflags &= ~negflags;
+                        }
                         nextchar(pRExC_state);
                        if (paren != ':') {
                            *flagp = TRYAGAIN;
@@ -6210,11 +6520,12 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
     /* RExC_parse points at the beginning brace, 
        endbrace points at the last */
     if ( name[0]=='U' && name[1]=='+' ) {
-        /* its a "unicode hex" notation {U+89AB} */
+        /* its a "Unicode hex" notation {U+89AB} */
         I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
             | PERL_SCAN_DISALLOW_PREFIX
             | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
         UV cp;
+       char string;
         len = (STRLEN)(endbrace - name - 2);
         cp = grok_hex(name + 2, &len, &fl, NULL);
         if ( len != (STRLEN)(endbrace - name - 2) ) {
@@ -6226,7 +6537,8 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
             *valuep = cp;
             return NULL;
         }
-        sv_str= Perl_newSVpvf_nocontext("%c",(int)cp);
+       string = (char)cp;
+        sv_str= newSVpvn(&string, 1);
     } else {
         /* fetch the charnames handler for this scope */
         HV * const table = GvHV(PL_hintgv);
@@ -6433,7 +6745,7 @@ STATIC UV
 S_reg_recode(pTHX_ const char value, SV **encp)
 {
     STRLEN numlen = 1;
-    SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
+    SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
     const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
     const STRLEN newlen = SvCUR(sv);
     UV uv = UNICODE_REPLACEMENT;
@@ -6573,8 +6885,10 @@ tryagain:
     case 0xDF:
     case 0xC3:
     case 0xCE:
+        do_foldchar:
         if (!LOC && FOLD) {
             U32 len,cp;
+           len=0; /* silence a spurious compiler warning */
             if ((cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len))) {
                 *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */
                 RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */
@@ -6598,7 +6912,11 @@ tryagain:
           required, as the default for this switch is to jump to the
           literal text handling code.
        */
-       switch (*++RExC_parse) {
+       switch ((U8)*++RExC_parse) {
+       case 0xDF:
+       case 0xC3:
+       case 0xCE:
+                  goto do_foldchar;        
        /* Special Escapes */
        case 'A':
            RExC_seen_zerolen++;
@@ -6614,6 +6932,11 @@ tryagain:
            RExC_seen_zerolen++;
            ret = reg_node(pRExC_state, KEEPS);
            *flagp |= SIMPLE;
+           /* XXX:dmq : disabling in-place substitution seems to
+            * be necessary here to avoid cases of memory corruption, as
+            * with: C<$_="x" x 80; s/x\K/y/> -- rgs
+            */
+           RExC_seen |= REG_SEEN_LOOKBEHIND;
            goto finish_meta_pat;
        case 'Z':
            ret = reg_node(pRExC_state, SEOL);
@@ -6800,6 +7123,8 @@ tryagain:
                        goto parse_named_seq;
                }   }
                num = atoi(RExC_parse);
+               if (isg && num == 0)
+                   vFAIL("Reference to invalid group 0");
                 if (isrel) {
                     num = RExC_npar - num;
                     if (num < 1)
@@ -6909,8 +7234,13 @@ tryagain:
                       an unescaped equivalent literal.
                    */
 
-                   switch (*++p) {
+                   switch ((U8)*++p) {
                    /* These are all the special escapes. */
+                   case 0xDF:
+                   case 0xC3:
+                   case 0xCE:
+                          if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
+                               goto normal_default;                
                    case 'A':             /* Start assertion */
                    case 'b': case 'B':   /* Word-boundary assertion*/
                    case 'C':             /* Single char !DANGEROUS! */
@@ -6919,7 +7249,7 @@ tryagain:
                    case 'h': case 'H':   /* HORIZWS */
                    case 'k': case 'K':   /* named backref, keep marker */
                    case 'N':             /* named char sequence */
-                   case 'p': case 'P':   /* unicode property */
+                   case 'p': case 'P':   /* Unicode property */
                              case 'R':   /* LNBREAK */
                    case 's': case 'S':   /* space class */
                    case 'v': case 'V':   /* VERTWS */
@@ -7783,12 +8113,16 @@ parseit:
                {
                    if (isLOWER(prevvalue)) {
                        for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isLOWER(i))
+                           if (isLOWER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
+                               stored++;
                                ANYOF_BITMAP_SET(ret, i);
+                           }
                    } else {
                        for (i = prevvalue; i <= ceilvalue; i++)
-                           if (isUPPER(i))
+                           if (isUPPER(i) && !ANYOF_BITMAP_TEST(ret,i)) {
+                               stored++;
                                ANYOF_BITMAP_SET(ret, i);
+                           }
                    }
                }
                else
@@ -7853,8 +8187,8 @@ parseit:
 
                                  if (!unicode_alternate)
                                      unicode_alternate = newAV();
-                                 sv = newSVpvn((char*)foldbuf, foldlen);
-                                 SvUTF8_on(sv);
+                                 sv = newSVpvn_utf8((char*)foldbuf, foldlen,
+                                                    TRUE);
                                  av_push(unicode_alternate, sv);
                              }
                         }
@@ -8385,6 +8719,27 @@ S_regcurly(register const char *s)
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
+#ifdef DEBUGGING
+void 
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags) {
+    int bit;
+    int set=0;
+    for (bit=0; bit<32; bit++) {
+        if (flags & (1<<bit)) {
+            if (!set++ && lead) 
+                PerlIO_printf(Perl_debug_log, "%s",lead);
+            PerlIO_printf(Perl_debug_log, "%s ",PL_reg_extflags_name[bit]);
+        }              
+    }     
+    if (lead)  {
+        if (set) 
+            PerlIO_printf(Perl_debug_log, "\n");
+        else 
+            PerlIO_printf(Perl_debug_log, "%s[none-set]\n",lead);
+    }            
+}   
+#endif
+
 void
 Perl_regdump(pTHX_ const regexp *r)
 {
@@ -8393,6 +8748,7 @@ Perl_regdump(pTHX_ const regexp *r)
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r,ri);
+    GET_RE_DEBUG_FLAGS_DECL;
 
     (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
 
@@ -8466,6 +8822,7 @@ Perl_regdump(pTHX_ const regexp *r)
     if (r->extflags & RXf_EVAL_SEEN)
        PerlIO_printf(Perl_debug_log, "with eval ");
     PerlIO_printf(Perl_debug_log, "\n");
+    DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags));            
 #else
     PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(r);
@@ -8496,19 +8853,17 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     k = PL_regkind[OP(o)];
 
     if (k == EXACT) {
-       SV * const dsv = sv_2mortal(newSVpvs(""));
+       sv_catpvs(sv, " ");
        /* Using is_utf8_string() (via PERL_PV_UNI_DETECT) 
         * is a crude hack but it may be the best for now since 
         * we have no flag "this EXACTish node was UTF-8" 
         * --jhi */
-       const char * const s = 
-           pv_pretty(dsv, STRING(o), STR_LEN(o), 60, 
-               PL_colors[0], PL_colors[1],
-               PERL_PV_ESCAPE_UNI_DETECT |
-               PERL_PV_PRETTY_ELIPSES    |
-               PERL_PV_PRETTY_LTGT    
-            ); 
-       Perl_sv_catpvf(aTHX_ sv, " %s", s );
+       pv_pretty(sv, STRING(o), STR_LEN(o), 60, PL_colors[0], PL_colors[1],
+                 PERL_PV_ESCAPE_UNI_DETECT |
+                 PERL_PV_PRETTY_ELLIPSES   |
+                 PERL_PV_PRETTY_LTGT       |
+                 PERL_PV_PRETTY_NOCLEAR
+                 );
     } else if (k == TRIE) {
        /* print the details of the trie in dumpuntil instead, as
         * progi->data isn't available here */
@@ -8537,7 +8892,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             int i;
             int rangestart = -1;
             U8* bitmap = IS_ANYOF_TRIE(op) ? (U8*)ANYOF_BITMAP(o) : (U8*)TRIE_BITMAP(trie);
-            Perl_sv_catpvf(aTHX_ sv, "[");
+            sv_catpvs(sv, "[");
             for (i = 0; i <= 256; i++) {
                 if (i < 256 && BITMAP_TEST(bitmap,i)) {
                     if (rangestart == -1)
@@ -8554,7 +8909,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                     rangestart = -1;
                 }
             }
-            Perl_sv_catpvf(aTHX_ sv, "]");
+            sv_catpvs(sv, "]");
         } 
         
     } else if (k == CURLY) {
@@ -8566,7 +8921,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
        Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
     else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) {
        Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));    /* Parenth number */
-       if ( prog->paren_names ) {
+       if ( RXp_PAREN_NAMES(prog) ) {
             if ( k != REF || OP(o) < NREF) {       
                AV *list= (AV *)progi->data->data[progi->name_list_idx];
                SV **name= av_fetch(list, ARG(o), 0 );
@@ -8597,7 +8952,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     } else if (k == LOGICAL)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
     else if (k == FOLDCHAR)
-       Perl_sv_catpvf(aTHX_ sv, "[0x%"UVXf"]",ARG(o) );        
+       Perl_sv_catpvf(aTHX_ sv, "[0x%"UVXf"]", PTR2UV(ARG(o)) );
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const U8 flags = ANYOF_FLAGS(o);
@@ -8748,9 +9103,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 }
 
 SV *
-Perl_re_intuit_string(pTHX_ REGEXP * const prog)
+Perl_re_intuit_string(pTHX_ REGEXP * const r)
 {                              /* Assume that RE_INTUIT is set */
     dVAR;
+    struct regexp *const prog = (struct regexp *)SvANY(r);
     GET_RE_DEBUG_FLAGS_DECL;
     PERL_UNUSED_CONTEXT;
 
@@ -8787,20 +9143,24 @@ Perl_re_intuit_string(pTHX_ REGEXP * const prog)
 */
 #ifndef PERL_IN_XSUB_RE
 void
-Perl_pregfree(pTHX_ struct regexp *r)
+Perl_pregfree(pTHX_ REGEXP *r)
+{
+    SvREFCNT_dec(r);
+}
+
+void
+Perl_pregfree2(pTHX_ REGEXP *rx)
 {
     dVAR;
+    struct regexp *const r = (struct regexp *)SvANY(rx);
     GET_RE_DEBUG_FLAGS_DECL;
 
-    if (!r || (--r->refcnt > 0))
-       return;
     if (r->mother_re) {
         ReREFCNT_dec(r->mother_re);
     } else {
-        CALLREGFREE_PVT(r); /* free the private data */
-        if (r->paren_names)
-            SvREFCNT_dec(r->paren_names);
-        Safefree(r->wrapped);
+        CALLREGFREE_PVT(rx); /* free the private data */
+        if (RXp_PAREN_NAMES(r))
+            SvREFCNT_dec(RXp_PAREN_NAMES(r));
     }        
     if (r->substrs) {
         if (r->anchored_substr)
@@ -8813,14 +9173,13 @@ Perl_pregfree(pTHX_ struct regexp *r)
             SvREFCNT_dec(r->float_utf8);
        Safefree(r->substrs);
     }
-    RX_MATCH_COPY_FREE(r);
+    RX_MATCH_COPY_FREE(rx);
 #ifdef PERL_OLD_COPY_ON_WRITE
     if (r->saved_copy)
         SvREFCNT_dec(r->saved_copy);
 #endif
     Safefree(r->swap);
     Safefree(r->offs);
-    Safefree(r);
 }
 
 /*  reg_temp_copy()
@@ -8840,16 +9199,24 @@ Perl_pregfree(pTHX_ struct regexp *r)
 */    
     
     
-regexp *
-Perl_reg_temp_copy (pTHX_ struct regexp *r) {
-    regexp *ret;
+REGEXP *
+Perl_reg_temp_copy (pTHX_ REGEXP *rx) {
+    REGEXP *ret_x = newSV_type(SVt_REGEXP);
+    struct regexp *ret = (struct regexp *)SvANY(ret_x);
+    struct regexp *const r = (struct regexp *)SvANY(rx);
     register const I32 npar = r->nparens+1;
-    (void)ReREFCNT_inc(r);
-    Newx(ret, 1, regexp);
+    (void)ReREFCNT_inc(rx);
+    /* FIXME ORANGE (once we start actually using the regular SV fields.) */
+    /* We can take advantage of the existing "copied buffer" mechanism in SVs
+       by pointing directly at the buffer, but flagging that the allocated
+       space in the copy is zero. As we've just done a struct copy, it's now
+       a case of zero-ing that, rather than copying the current length.  */
+    SvPV_set(ret_x, RX_WRAPPED(rx));
+    SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
     StructCopy(r, ret, regexp);
+    SvLEN_set(ret_x, 0);
     Newx(ret->offs, npar, regexp_paren_pair);
     Copy(r->offs, ret->offs, npar, regexp_paren_pair);
-    ret->refcnt = 1;
     if (r->substrs) {
         Newx(ret->substrs, 1, struct reg_substr_data);
        StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
@@ -8862,14 +9229,14 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) {
        /* check_substr and check_utf8, if non-NULL, point to either their
           anchored or float namesakes, and don't hold a second reference.  */
     }
-    RX_MATCH_COPIED_off(ret);
+    RX_MATCH_COPIED_off(ret_x);
 #ifdef PERL_OLD_COPY_ON_WRITE
     ret->saved_copy = NULL;
 #endif
-    ret->mother_re = r
+    ret->mother_re = rx;
     ret->swap = NULL;
     
-    return ret;
+    return ret_x;
 }
 #endif
 
@@ -8886,9 +9253,10 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) {
  */
  
 void
-Perl_regfree_internal(pTHX_ struct regexp *r)
+Perl_regfree_internal(pTHX_ REGEXP * const rx)
 {
     dVAR;
+    struct regexp *const r = (struct regexp *)SvANY(rx);
     RXi_GET_DECL(r,ri);
     GET_RE_DEBUG_FLAGS_DECL;
     
@@ -8897,8 +9265,8 @@ Perl_regfree_internal(pTHX_ struct regexp *r)
            reginitcolors();
        {
            SV *dsv= sv_newmortal();
-            RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
-                dsv, r->precomp, r->prelen, 60);
+            RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
+                dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60);
             PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", 
                 PL_colors[4],PL_colors[5],s);
         }
@@ -9019,23 +9387,15 @@ Perl_regfree_internal(pTHX_ struct regexp *r)
 */
 #if defined(USE_ITHREADS)
 #ifndef PERL_IN_XSUB_RE
-regexp *
-Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
+void
+Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
 {
     dVAR;
-    regexp *ret;
     I32 npar;
-
-    if (!r)
-       return (REGEXP *)NULL;
-
-    if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r)))
-       return ret;
-
+    const struct regexp *r = (const struct regexp *)SvANY(sstr);
+    struct regexp *ret = (struct regexp *)SvANY(dstr);
     
     npar = r->nparens+1;
-    Newx(ret, 1, regexp);
-    StructCopy(r, ret, regexp);
     Newx(ret->offs, npar, regexp_paren_pair);
     Copy(r->offs, ret->offs, npar, regexp_paren_pair);
     if(ret->swap) {
@@ -9073,14 +9433,12 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
        }
     }
 
-    ret->wrapped        = SAVEPVN(ret->wrapped, ret->wraplen+1);
-    ret->precomp        = ret->wrapped + (ret->precomp - ret->wrapped);
-    ret->paren_names    = hv_dup_inc(ret->paren_names, param);
+    RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
 
     if (ret->pprivate)
-       RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
+       RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
 
-    if (RX_MATCH_COPIED(ret))
+    if (RX_MATCH_COPIED(dstr))
        ret->subbeg  = SAVEPVN(ret->subbeg, ret->sublen);
     else
        ret->subbeg = NULL;
@@ -9091,9 +9449,6 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
     ret->mother_re      = NULL;
     ret->gofs = 0;
     ret->seen_evals = 0;
-    
-    ptr_table_store(PL_ptr_table, r, ret);
-    return ret;
 }
 #endif /* PERL_IN_XSUB_RE */
 
@@ -9112,9 +9467,10 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 */
 
 void *
-Perl_regdupe_internal(pTHX_ REGEXP * const r, CLONE_PARAMS *param)
+Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 {
     dVAR;
+    struct regexp *const r = (struct regexp *)SvANY(rx);
     regexp_internal *reti;
     int len, npar;
     RXi_GET_DECL(r,ri);
@@ -9236,14 +9592,14 @@ Perl_regdupe_internal(pTHX_ REGEXP * const r, CLONE_PARAMS *param)
 char *
 Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
     dVAR;
-    const regexp * const re = (regexp *)mg->mg_obj;
+    const REGEXP * const re = (REGEXP *)mg->mg_obj;
     if (haseval) 
-        *haseval = re->seen_evals;
+        *haseval = RX_SEEN_EVALS(re);
     if (flags)    
-       *flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
+       *flags = RX_UTF8(re) ? 1 : 0;
     if (lp)
-       *lp = re->wraplen;
-    return re->wrapped;
+       *lp = RX_WRAPLEN(re);
+    return RX_WRAPPED(re);
 }
 
 /*
@@ -9336,7 +9692,7 @@ Perl_save_re_context(pTHX)
        const REGEXP * const rx = PM_GETRE(PL_curpm);
        if (rx) {
            U32 i;
-           for (i = 1; i <= rx->nparens; i++) {
+           for (i = 1; i <= RX_NPARENS(rx); i++) {
                char digits[TYPE_CHARS(long)];
                const STRLEN len = my_snprintf(digits, sizeof(digits), "%lu", (long)i);
                GV *const *const gvp
@@ -9357,7 +9713,7 @@ static void
 clear_re(pTHX_ void *r)
 {
     dVAR;
-    ReREFCNT_dec((regexp *)r);
+    ReREFCNT_dec((REGEXP *)r);
 }
 
 #ifdef DEBUGGING
@@ -9365,12 +9721,24 @@ clear_re(pTHX_ void *r)
 STATIC void
 S_put_byte(pTHX_ SV *sv, int c)
 {
-    if (isCNTRL(c) || c == 255 || !isPRINT(c))
+    /* Our definition of isPRINT() ignores locales, so only bytes that are
+       not part of UTF-8 are considered printable. I assume that the same
+       holds for UTF-EBCDIC.
+       Also, code point 255 is not printable in either (it's E0 in EBCDIC,
+       which Wikipedia says:
+
+       EO, or Eight Ones, is an 8-bit EBCDIC character code represented as all
+       ones (binary 1111 1111, hexadecimal FF). It is similar, but not
+       identical, to the ASCII delete (DEL) or rubout control character.
+       ) So the old condition can be simplified to !isPRINT(c)  */
+    if (!isPRINT(c))
        Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
-    else if (c == '-' || c == ']' || c == '\\' || c == '^')
-       Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
-    else
-       Perl_sv_catpvf(aTHX_ sv, "%c", c);
+    else {
+       const char string = c;
+       if (c == '-' || c == ']' || c == '\\' || c == '^')
+           sv_catpvs(sv, "\\");
+       sv_catpvn(sv, &string, 1);
+    }
 }
 
 
@@ -9473,7 +9841,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                     elem_ptr ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), SvCUR(*elem_ptr), 60,
                            PL_colors[0], PL_colors[1],
                            (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
-                           PERL_PV_PRETTY_ELIPSES    |
+                           PERL_PV_PRETTY_ELLIPSES    |
                            PERL_PV_PRETTY_LTGT
                             )
                             : "???"