This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] Callbacks for named captures (%+ and %-)
[perl5.git] / regcomp.c
index 0bead35..6c9fd2a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1966,7 +1966,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 }
                 if ( count == 1 ) {
                     SV **tmp = av_fetch( revcharmap, idx, 0);
-                    char *ch = SvPV_nolen( *tmp );
+                    STRLEN len;
+                    char *ch = SvPV( *tmp, len );
                     DEBUG_OPTIMISE_r({
                         SV *sv=sv_newmortal();
                         PerlIO_printf( Perl_debug_log,
@@ -1985,11 +1986,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         str=STRING(convert);
                         STR_LEN(convert)=0;
                     }
-                    while (*ch) {
+                    STR_LEN(convert) += len;
+                    while (len--)
                         *str++ = *ch++;
-                        STR_LEN(convert)++;
-                    }
-                    
                } else {
 #ifdef DEBUGGING           
                    if (state>1)
@@ -2400,6 +2399,34 @@ typedef struct scan_frame {
 
 #define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
 
+#define CASE_SYNST_FNC(nAmE)                                       \
+case nAmE:                                                         \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+           for (value = 0; value < 256; value++)                  \
+               if (!is_ ## nAmE ## _cp(value))                       \
+                   ANYOF_BITMAP_CLEAR(data->start_class, value);  \
+    }                                                              \
+    else {                                                         \
+           for (value = 0; value < 256; value++)                  \
+               if (is_ ## nAmE ## _cp(value))                        \
+                   ANYOF_BITMAP_SET(data->start_class, value);    \
+    }                                                              \
+    break;                                                         \
+case N ## nAmE:                                                    \
+    if (flags & SCF_DO_STCLASS_AND) {                              \
+           for (value = 0; value < 256; value++)                   \
+               if (is_ ## nAmE ## _cp(value))                         \
+                   ANYOF_BITMAP_CLEAR(data->start_class, value);   \
+    }                                                               \
+    else {                                                          \
+           for (value = 0; value < 256; value++)                   \
+               if (!is_ ## nAmE ## _cp(value))                        \
+                   ANYOF_BITMAP_SET(data->start_class, value);     \
+    }                                                               \
+    break
+
+
+
 STATIC I32
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 *minlenp, I32 *deltap,
@@ -3330,6 +3357,46 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                break;
            }
        }
+       else if (OP(scan) == LNBREAK) {
+           if (flags & SCF_DO_STCLASS) {
+               int value = 0;
+               data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
+               if (flags & SCF_DO_STCLASS_AND) {
+                    for (value = 0; value < 256; value++)
+                        if (!is_VERTWS_cp(value))
+                            ANYOF_BITMAP_CLEAR(data->start_class, value);  
+                }                                                              
+                else {                                                         
+                    for (value = 0; value < 256; value++)
+                        if (is_VERTWS_cp(value))
+                            ANYOF_BITMAP_SET(data->start_class, value);           
+                }                                                              
+                if (flags & SCF_DO_STCLASS_OR)
+                   cl_and(data->start_class, and_withp);
+               flags &= ~SCF_DO_STCLASS;
+            }
+           min += 1;
+           delta += 1;
+            if (flags & SCF_DO_SUBSTR) {
+               SCAN_COMMIT(pRExC_state,data,minlenp);  /* Cannot expect anything... */
+               data->pos_min += 1;
+               data->pos_delta += 1;
+               data->longest = &(data->longest_float);
+           }
+           
+       }
+       else if (OP(scan) == FOLDCHAR) {
+           int d = ARG(scan)==0xDF ? 1 : 2;
+           flags &= ~SCF_DO_STCLASS;
+            min += 1;
+            delta += d;
+            if (flags & SCF_DO_SUBSTR) {
+               SCAN_COMMIT(pRExC_state,data,minlenp);  /* Cannot expect anything... */
+               data->pos_min += 1;
+               data->pos_delta += d;
+               data->longest = &(data->longest_float);
+           }
+       }
        else if (strchr((const char*)PL_simple,OP(scan))) {
            int value = 0;
 
@@ -3524,6 +3591,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        }
                    }
                    break;
+               CASE_SYNST_FNC(VERTWS);
+               CASE_SYNST_FNC(HORIZWS);
+               
                }
                if (flags & SCF_DO_STCLASS_OR)
                    cl_and(data->start_class, and_withp);
@@ -3894,6 +3964,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        }
 #endif /* old or new */
 #endif /* TRIE_STUDY_OPT */    
+
        /* Else: zero-length, ignore. */
        scan = regnext(scan);
     }
@@ -4016,8 +4087,8 @@ extern const struct regexp_engine my_reg_engine;
 #endif
 
 #ifndef PERL_IN_XSUB_RE 
-regexp *
-Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
+REGEXP *
+Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
 {
     dVAR;
     HV * const table = GvHV(PL_hintgv);
@@ -4032,21 +4103,23 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
                 PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
                     SvIV(*ptr));
             });            
-            return CALLREGCOMP_ENG(eng, exp, xend, pm);
+            return CALLREGCOMP_ENG(eng, pattern, flags);
         } 
     }
-    return Perl_re_compile(aTHX_ exp, xend, pm);
+    return Perl_re_compile(aTHX_ pattern, flags);
 }
 #endif
 
-regexp *
-Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
+REGEXP *
+Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
 {
     dVAR;
-    register regexp *r;
+    register REGEXP *r;
     register regexp_internal *ri;
+    STRLEN plen;
+    char*  exp = SvPV((SV*)pattern, plen);
+    char* xend = exp + plen;
     regnode *scan;
-    regnode *first;
     I32 flags;
     I32 minlen = 0;
     I32 sawplus = 0;
@@ -4060,23 +4133,20 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
 #endif    
     GET_RE_DEBUG_FLAGS_DECL;
     DEBUG_r(if (!PL_colorset) reginitcolors());
-        
-    if (exp == NULL)
-       FAIL("NULL regexp argument");
 
-    RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+    RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
 
     DEBUG_COMPILE_r({
         SV *dsv= sv_newmortal();
         RE_PV_QUOTED_DECL(s, RExC_utf8,
-            dsv, exp, (xend - exp), 60);
+            dsv, exp, plen, 60);
         PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
                       PL_colors[4],PL_colors[5],s);
     });
 
 redo_first_pass:
     RExC_precomp = exp;
-    RExC_flags = pm->op_pmflags;
+    RExC_flags = pm_flags;
     RExC_sawback = 0;
 
     RExC_seen = 0;
@@ -4124,7 +4194,7 @@ redo_first_pass:
         thing.
         XXX: somehow figure out how to make this less expensive...
         -- dmq */
-        STRLEN len = xend-exp;
+        STRLEN len = plen;
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
            "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
         exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
@@ -4170,8 +4240,8 @@ redo_first_pass:
     RXi_SET( r, ri );
     r->engine= RE_ENGINE_PTR;
     r->refcnt = 1;
-    r->prelen = xend - exp;
-    r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+    r->prelen = plen;
+    r->extflags = pm_flags;
     {
         bool has_k     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
        bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
@@ -4239,7 +4309,7 @@ redo_first_pass:
     RExC_rxi = ri;
 
     /* Second pass: emit code. */
-    RExC_flags = pm->op_pmflags;       /* don't let top level (?i) bleed */
+    RExC_flags = pm_flags;     /* don't let top level (?i) bleed */
     RExC_parse = exp;
     RExC_end = xend;
     RExC_naughty = 0;
@@ -4291,8 +4361,9 @@ reStudy:
 #endif    
 
     /* Dig out information for optimizations. */
-    r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
-    pm->op_pmflags = RExC_flags;
+    r->extflags = pm_flags; /* Again? */
+    /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
     if (UTF)
         r->extflags |= RXf_UTF8;       /* Unicode in it? */
     ri->regstclass = NULL;
@@ -4308,18 +4379,20 @@ reStudy:
        struct regnode_charclass_class ch_class; /* pointed to by data */
        int stclass_flag;
        I32 last_close = 0; /* pointed to by data */
-
-       first = scan;
+        regnode *first= scan;
+        regnode *first_next= regnext(first);
+       
        /* Skip introductions and multiplicators >= 1. */
        while ((OP(first) == OPEN && (sawopen = 1)) ||
               /* An OR of *one* alternative - should not happen now. */
-           (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
+           (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
            /* for now we can't handle lookbehind IFMATCH*/
            (OP(first) == IFMATCH && !first->flags) || 
            (OP(first) == PLUS) ||
            (OP(first) == MINMOD) ||
               /* An {n,m} with n>0 */
-           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ) 
+           (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
+           (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
        {
                
                if (OP(first) == PLUS)
@@ -4331,6 +4404,7 @@ reStudy:
                    first += EXTRA_STEP_2ARGS;
                } else  /* XXX possible optimisation for /(?=)/  */
                    first = NEXTOPER(first);
+               first_next= regnext(first);
        }
 
        /* Starting-point info. */
@@ -4676,7 +4750,7 @@ reStudy:
         r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
     else
         r->paren_names = NULL;
-    if (r->prelen == 3 && strEQ("\\s+", r->precomp))
+    if (r->prelen == 3 && strnEQ("\\s+", r->precomp, 3)) /* precomp = "\\s+)" */
        r->extflags |= RXf_WHITE;
     else if (r->prelen == 1 && r->precomp[0] == '^')
         r->extflags |= RXf_START_ONLY;
@@ -4695,8 +4769,7 @@ reStudy:
             ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
         }
     }
-    Newxz(r->startp, RExC_npar * 2, I32);
-    r->endp = r->startp + RExC_npar;
+    Newxz(r->offs, RExC_npar, regexp_paren_pair);
     /* assume we don't need to swap parens around before we match */
 
     DEBUG_DUMP_r({
@@ -4724,11 +4797,52 @@ reStudy:
 
 
 SV*
-Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
+Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
+                    const U32 flags)
+{
+    PERL_UNUSED_ARG(value);
+
+    if (flags & RXf_HASH_FETCH) {
+        return reg_named_buff_fetch(rx, key, flags);
+    } else if (flags & (RXf_HASH_STORE | RXf_HASH_DELETE | RXf_HASH_CLEAR)) {
+        Perl_croak(aTHX_ PL_no_modify);
+        return NULL;
+    } else if (flags & RXf_HASH_EXISTS) {
+        return reg_named_buff_exists(rx, key, flags)
+            ? &PL_sv_yes
+            : &PL_sv_no;
+    } else if (flags & RXf_HASH_REGNAMES) {
+        return reg_named_buff_all(rx, flags);
+    } else if (flags & (RXf_HASH_SCALAR | RXf_HASH_REGNAMES_COUNT)) {
+        return reg_named_buff_scalar(rx, flags);
+    } else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
+                         const U32 flags)
+{
+    PERL_UNUSED_ARG(lastkey);
+
+    if (flags & RXf_HASH_FIRSTKEY)
+        return reg_named_buff_firstkey(rx, flags);
+    else if (flags & RXf_HASH_NEXTKEY)
+        return reg_named_buff_nextkey(rx, flags);
+    else {
+        Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter", (int)flags);
+        return NULL;
+    }
+}
+
+SV*
+Perl_reg_named_buff_fetch(pTHX_ REGEXP * const rx, SV * const namesv, const U32 flags)
 {
     AV *retarray = NULL;
     SV *ret;
-    if (flags & 1) 
+    if (flags & RXf_HASH_ALL)
         retarray=newAV();
 
     if (rx && rx->paren_names) {
@@ -4738,63 +4852,177 @@ Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
             SV* sv_dat=HeVAL(he_str);
             I32 *nums=(I32*)SvPVX(sv_dat);
             for ( i=0; i<SvIVX(sv_dat); i++ ) {
-               if ((I32)(rx->nparens) >= nums[i]
-                       && rx->startp[nums[i]] != -1
-                       && rx->endp[nums[i]] != -1)
+                if ((I32)(rx->nparens) >= nums[i]
+                    && rx->offs[nums[i]].start != -1
+                    && rx->offs[nums[i]].end != -1)
                 {
-                    ret = CALLREG_NUMBUF(rx,nums[i],NULL);
+                    ret = newSVpvs("");
+                    CALLREG_NUMBUF_FETCH(rx,nums[i],ret);
                     if (!retarray)
                         return ret;
                 } else {
                     ret = newSVsv(&PL_sv_undef);
                 }
                 if (retarray) {
-                    SvREFCNT_inc(ret);
+                    SvREFCNT_inc_simple_void(ret);
                     av_push(retarray, ret);
                 }
             }
             if (retarray)
-                return (SV*)retarray;
+                return newRV((SV*)retarray);
         }
     }
     return NULL;
 }
 
+bool
+Perl_reg_named_buff_exists(pTHX_ REGEXP * const rx, SV * const key,
+                           const U32 flags)
+{
+    if (rx && rx->paren_names) {
+        if (flags & RXf_HASH_ALL) {
+            return hv_exists_ent(rx->paren_names, key, 0);
+        } else {
+            if (CALLREG_NAMED_BUFF_FETCH(rx, key, flags)) {
+                return TRUE;
+            } else {
+                return FALSE;
+            }
+        }
+    } else {
+        return FALSE;
+    }
+}
+
+SV*
+Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    (void)hv_iterinit(rx->paren_names);
+
+    return CALLREG_NAMED_BUFF_NEXTKEY(rx, NULL, flags & ~RXf_HASH_FIRSTKEY);
+}
+
+SV*
+Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    if (rx && rx->paren_names) {
+        HV *hv = rx->paren_names;
+        HE *temphe;
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXf_HASH_ALL) {
+                STRLEN len;
+                char *pv = HePV(temphe, len);
+                return newSVpvn(pv,len);
+            }
+        }
+    }
+    return NULL;
+}
+
+SV*
+Perl_reg_named_buff_scalar(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    SV *ret;
+    AV *av;
+    I32 length;
+
+    if (rx && rx->paren_names) {
+        if (flags & (RXf_HASH_ALL | RXf_HASH_REGNAMES_COUNT)) {
+            return newSViv(HvTOTALKEYS(rx->paren_names));
+        } else if (flags & RXf_HASH_ONE) {
+            ret = CALLREG_NAMED_BUFF_ALL(rx, (flags | RXf_HASH_REGNAMES));
+            av = (AV*)SvRV(ret);
+            length = av_len(av);
+            return newSViv(length + 1);
+        } else {
+            Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags);
+            return NULL;
+        }
+    }
+    return &PL_sv_undef;
+}
+
 SV*
-Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
+Perl_reg_named_buff_all(pTHX_ REGEXP * const rx, const U32 flags)
+{
+    AV *av = newAV();
+
+    if (rx && rx->paren_names) {
+        HV *hv= rx->paren_names;
+        HE *temphe;
+        (void)hv_iterinit(hv);
+        while ( (temphe = hv_iternext_flags(hv,0)) ) {
+            IV i;
+            IV parno = 0;
+            SV* sv_dat = HeVAL(temphe);
+            I32 *nums = (I32*)SvPVX(sv_dat);
+            for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                    rx->offs[nums[i]].start != -1 &&
+                    rx->offs[nums[i]].end != -1)
+                {
+                    parno = nums[i];
+                    break;
+                }
+            }
+            if (parno || flags & RXf_HASH_ALL) {
+                STRLEN len;
+                char *pv = HePV(temphe, len);
+                av_push(av, newSVpvn(pv,len));
+            }
+        }
+    }
+
+    return newRV((SV*)av);
+}
+
+void
+Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const rx, const I32 paren, SV * const sv)
 {
     char *s = NULL;
     I32 i = 0;
     I32 s1, t1;
-    SV *sv = usesv ? usesv : newSVpvs("");
         
     if (!rx->subbeg) {
         sv_setsv(sv,&PL_sv_undef);
-        return sv;
+        return;
     } 
     else               
-    if (paren == -2 && rx->startp[0] != -1) {
+    if (paren == RXf_PREMATCH && rx->offs[0].start != -1) {
         /* $` */
-       i = rx->startp[0];
+       i = rx->offs[0].start;
        s = rx->subbeg;
     }
     else 
-    if (paren == -1 && rx->endp[0] != -1) {
+    if (paren == RXf_POSTMATCH && rx->offs[0].end != -1) {
         /* $' */
-       s = rx->subbeg + rx->endp[0];
-       i = rx->sublen - rx->endp[0];
+       s = rx->subbeg + rx->offs[0].end;
+       i = rx->sublen - rx->offs[0].end;
     } 
     else
     if ( 0 <= paren && paren <= (I32)rx->nparens &&
-        (s1 = rx->startp[paren]) != -1 &&
-        (t1 = rx->endp[paren]) != -1)
+        (s1 = rx->offs[paren].start) != -1 &&
+        (t1 = rx->offs[paren].end) != -1)
     {
         /* $& $1 ... */
         i = t1 - s1;
         s = rx->subbeg + s1;
     } else {
         sv_setsv(sv,&PL_sv_undef);
-        return sv;
+        return;
     }          
     assert(rx->sublen >= (s - rx->subbeg) + i );
     if (i >= 0) {
@@ -4832,10 +5060,86 @@ Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
         }
     } else {
         sv_setsv(sv,&PL_sv_undef);
+        return;
     }
-    return sv;
 }
 
+void
+Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+                                                        SV const * const value)
+{
+    PERL_UNUSED_ARG(rx);
+    PERL_UNUSED_ARG(paren);
+    PERL_UNUSED_ARG(value);
+
+    if (!PL_localizing)
+        Perl_croak(aTHX_ PL_no_modify);
+}
+
+I32
+Perl_reg_numbered_buff_length(pTHX_ REGEXP * const rx, const SV * const sv,
+                              const I32 paren)
+{
+    I32 i;
+    I32 s1, t1;
+
+    /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
+       switch (paren) {
+      /* $` / ${^PREMATCH} */
+      case RXf_PREMATCH:
+        if (rx->offs[0].start != -1) {
+                       i = rx->offs[0].start;
+                       if (i > 0) {
+                               s1 = 0;
+                               t1 = i;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $' / ${^POSTMATCH} */
+      case RXf_POSTMATCH:
+           if (rx->offs[0].end != -1) {
+                       i = rx->sublen - rx->offs[0].end;
+                       if (i > 0) {
+                               s1 = rx->offs[0].end;
+                               t1 = rx->sublen;
+                               goto getlen;
+                       }
+           }
+        return 0;
+      /* $& / ${^MATCH}, $1, $2, ... */
+      default:
+           if (paren <= (I32)rx->nparens &&
+            (s1 = rx->offs[paren].start) != -1 &&
+            (t1 = rx->offs[paren].end) != -1)
+           {
+            i = t1 - s1;
+            goto getlen;
+        } else {
+            if (ckWARN(WARN_UNINITIALIZED))
+                report_uninit((SV*)sv);
+            return 0;
+        }
+    }
+  getlen:
+    if (i > 0 && RX_MATCH_UTF8(rx)) {
+        const char * const s = rx->subbeg + s1;
+        const U8 *ep;
+        STRLEN el;
+
+        i = t1 - s1;
+        if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
+                       i = el;
+    }
+    return i;
+}
+
+SV*
+Perl_reg_qr_package(pTHX_ REGEXP * const rx)
+{
+       PERL_UNUSED_ARG(rx);
+       return newSVpvs("Regexp");
+}
 
 /* Scans the name of a named buffer from the pattern.
  * If flags is REG_RSN_RETURN_NULL returns null.
@@ -5125,7 +5429,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     if (!SIZE_ONLY) {
                         num = add_data( pRExC_state, 1, "S" );
                         RExC_rxi->data->data[num]=(void*)sv_dat;
-                        SvREFCNT_inc(sv_dat);
+                        SvREFCNT_inc_simple_void(sv_dat);
                     }
                     RExC_sawback = 1;
                     ret = reganode(pRExC_state,
@@ -5460,7 +5764,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                    if (!SIZE_ONLY) {
                         num = add_data( pRExC_state, 1, "S" );
                         RExC_rxi->data->data[num]=(void*)sv_dat;
-                        SvREFCNT_inc(sv_dat);
+                        SvREFCNT_inc_simple_void(sv_dat);
                     }
                     ret = reganode(pRExC_state,NGROUPP,num);
                     goto insert_if_check_paren;
@@ -6356,8 +6660,7 @@ S_reg_recode(pTHX_ const char value, SV **encp)
 {
     STRLEN numlen = 1;
     SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
-    const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
-                                        : SvPVX(sv);
+    const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
     const STRLEN newlen = SvCUR(sv);
     UV uv = UNICODE_REPLACEMENT;
 
@@ -6368,8 +6671,7 @@ S_reg_recode(pTHX_ const char value, SV **encp)
 
     if (!newlen || numlen != newlen) {
        uv = UNICODE_REPLACEMENT;
-       if (encp)
-           *encp = NULL;
+       *encp = NULL;
     }
     return uv;
 }
@@ -6410,7 +6712,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
 
 tryagain:
-    switch (*RExC_parse) {
+    switch ((U8)*RExC_parse) {
     case '^':
        RExC_seen_zerolen++;
        nextchar(pRExC_state);
@@ -6494,6 +6796,21 @@ tryagain:
        RExC_parse++;
        vFAIL("Quantifier follows nothing");
        break;
+    case 0xDF:
+    case 0xC3:
+    case 0xCE:
+        if (!LOC && FOLD) {
+            U32 len,cp;
+            if ((cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len))) {
+                *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */
+                RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */
+                ret = reganode(pRExC_state, FOLDCHAR, cp);
+                Set_Node_Length(ret, 1); /* MJD */
+                nextchar(pRExC_state); /* kill whitespace under /x */
+                return ret;
+            }
+        }
+        goto outer_default;
     case '\\':
        /* Special Escapes
 
@@ -6579,15 +6896,25 @@ tryagain:
            ret = reg_node(pRExC_state, NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
+       case 'R':
+           ret = reg_node(pRExC_state, LNBREAK);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
+       case 'h':
+           ret = reg_node(pRExC_state, HORIZWS);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
+       case 'H':
+           ret = reg_node(pRExC_state, NHORIZWS);
+           *flagp |= HASWIDTH|SIMPLE;
+           goto finish_meta_pat;
        case 'v':
-           ret = reganode(pRExC_state, PRUNE, 0);
-           ret->flags = 1;
-           *flagp |= SIMPLE;
+           ret = reg_node(pRExC_state, VERTWS);
+           *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'V':
-           ret = reganode(pRExC_state, SKIP, 0);
-           ret->flags = 1;
-           *flagp |= SIMPLE;
+           ret = reg_node(pRExC_state, NVERTWS);
+           *flagp |= HASWIDTH|SIMPLE;
          finish_meta_pat:          
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -6658,7 +6985,7 @@ tryagain:
                 if (!SIZE_ONLY) {
                     num = add_data( pRExC_state, 1, "S" );
                     RExC_rxi->data->data[num]=(void*)sv_dat;
-                    SvREFCNT_inc(sv_dat);
+                    SvREFCNT_inc_simple_void(sv_dat);
                 }
 
                 RExC_sawback = 1;
@@ -6754,7 +7081,8 @@ tryagain:
        }
        /* FALL THROUGH */
 
-    default: {
+    default:
+        outer_default:{
            register STRLEN len;
            register UV ender;
            register char *p;
@@ -6779,7 +7107,12 @@ tryagain:
 
                if (RExC_flags & RXf_PMf_EXTENDED)
                    p = regwhite( pRExC_state, p );
-               switch (*p) {
+               switch ((U8)*p) {
+               case 0xDF:
+               case 0xC3:
+               case 0xCE:
+                          if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
+                               goto normal_default;
                case '^':
                case '$':
                case '.':
@@ -6809,11 +7142,13 @@ tryagain:
                    case 'C':             /* Single char !DANGEROUS! */
                    case 'd': case 'D':   /* digit class */
                    case 'g': case 'G':   /* generic-backref, pos assertion */
+                   case 'h': case 'H':   /* HORIZWS */
                    case 'k': case 'K':   /* named backref, keep marker */
                    case 'N':             /* named char sequence */
                    case 'p': case 'P':   /* unicode property */
+                             case 'R':   /* LNBREAK */
                    case 's': case 'S':   /* space class */
-                   case 'v': case 'V':   /* (*PRUNE) and (*SKIP) */
+                   case 'v': case 'V':   /* VERTWS */
                    case 'w': case 'W':   /* word class */
                    case 'X':             /* eXtended Unicode "combining character sequence" */
                    case 'z': case 'Z':   /* End of line/string assertion */
@@ -7236,6 +7571,21 @@ case ANYOF_N##NAME:                                     \
     what = WORD;                                        \
     break
 
+#define _C_C_T_NOLOC_(NAME,TEST,WORD)                   \
+ANYOF_##NAME:                                           \
+       for (value = 0; value < 256; value++)           \
+           if (TEST)                                   \
+               ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '+';                                        \
+    what = WORD;                                        \
+    break;                                              \
+case ANYOF_N##NAME:                                     \
+       for (value = 0; value < 256; value++)           \
+           if (!TEST)                                  \
+               ANYOF_BITMAP_SET(ret, value);           \
+    yesno = '!';                                        \
+    what = WORD;                                        \
+    break
 
 /*
    parse a class specification and produce either an ANYOF node that
@@ -7248,10 +7598,10 @@ STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 {
     dVAR;
-    register UV value = 0;
     register UV nextvalue;
     register IV prevvalue = OOB_UNICODE;
     register IV range = 0;
+    UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
     register regnode *ret;
     STRLEN numlen;
     IV namedclass;
@@ -7354,6 +7704,10 @@ parseit:
            case 'S':   namedclass = ANYOF_NSPACE;      break;
            case 'd':   namedclass = ANYOF_DIGIT;       break;
            case 'D':   namedclass = ANYOF_NDIGIT;      break;
+           case 'v':   namedclass = ANYOF_VERTWS;      break;
+           case 'V':   namedclass = ANYOF_NVERTWS;     break;
+           case 'h':   namedclass = ANYOF_HORIZWS;     break;
+           case 'H':   namedclass = ANYOF_NHORIZWS;    break;
             case 'N':  /* Handle \N{NAME} in class */
                 {
                     /* We only pay attention to the first char of 
@@ -7532,6 +7886,8 @@ parseit:
                case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
                case _C_C_T_(UPPER, isUPPER(value), "Upper");
                case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
+               case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
+               case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
@@ -8466,6 +8822,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                 SVfARG((SV*)progi->data->data[ ARG( o ) ]));
     } else if (k == LOGICAL)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
+    else if (k == FOLDCHAR)
+       Perl_sv_catpvf(aTHX_ sv, "[0x%"UVXf"]",ARG(o) );        
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const U8 flags = ANYOF_FLAGS(o);
@@ -8616,7 +8974,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 }
 
 SV *
-Perl_re_intuit_string(pTHX_ regexp *prog)
+Perl_re_intuit_string(pTHX_ REGEXP * const prog)
 {                              /* Assume that RE_INTUIT is set */
     dVAR;
     GET_RE_DEBUG_FLAGS_DECL;
@@ -8686,11 +9044,8 @@ Perl_pregfree(pTHX_ struct regexp *r)
     if (r->saved_copy)
         SvREFCNT_dec(r->saved_copy);
 #endif
-    if (r->swap) {
-        Safefree(r->swap->startp);
-        Safefree(r->swap);
-    }
-    Safefree(r->startp);
+    Safefree(r->swap);
+    Safefree(r->offs);
     Safefree(r);
 }
 
@@ -8718,13 +9073,10 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) {
     (void)ReREFCNT_inc(r);
     Newx(ret, 1, regexp);
     StructCopy(r, ret, regexp);
-    Newx(ret->startp, npar * 2, I32);
-    Copy(r->startp, ret->startp, npar * 2, I32);
-    ret->endp = ret->startp + npar;
+    Newx(ret->offs, npar, regexp_paren_pair);
+    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
     ret->refcnt = 1;
     if (r->substrs) {
-        struct reg_substr_datum *s;
-        I32 i;
         Newx(ret->substrs, 1, struct reg_substr_data);
        StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
 
@@ -8738,10 +9090,7 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) {
     }
     RX_MATCH_COPIED_off(ret);
 #ifdef PERL_OLD_COPY_ON_WRITE
-    /* this is broken. */
-    assert(0); 
-    if (ret->saved_copy)
-        ret->saved_copy=NULL;
+    ret->saved_copy = NULL;
 #endif
     ret->mother_re = r; 
     ret->swap = NULL;
@@ -8763,7 +9112,7 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) {
  */
  
 void
-Perl_regfree_internal(pTHX_ struct regexp *r)
+Perl_regfree_internal(pTHX_ REGEXP * const r)
 {
     dVAR;
     RXi_GET_DECL(r,ri);
@@ -8911,20 +9260,16 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 
     
     npar = r->nparens+1;
-    Newxz(ret, 1, regexp);
-    Newx(ret->startp, npar * 2, I32);
-    Copy(r->startp, ret->startp, npar * 2, I32);
-    ret->endp = ret->startp + npar;
-    if(r->swap) {
-        Newx(ret->swap, 1, regexp_paren_ofs);
+    Newx(ret, 1, regexp);
+    StructCopy(r, ret, regexp);
+    Newx(ret->offs, npar, regexp_paren_pair);
+    Copy(r->offs, ret->offs, npar, regexp_paren_pair);
+    if(ret->swap) {
         /* no need to copy these */
-        Newx(ret->swap->startp, npar * 2, I32);
-       ret->swap->endp = ret->swap->startp + npar;
-    } else {
-        ret->swap = NULL;
+        Newx(ret->swap, npar, regexp_paren_pair);
     }
 
-    if (r->substrs) {
+    if (ret->substrs) {
        /* Do it this way to avoid reading from *r after the StructCopy().
           That way, if any of the sv_dup_inc()s dislodge *r from the L1
           cache, it doesn't matter.  */
@@ -8952,41 +9297,26 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
                ret->check_utf8 = ret->float_utf8;
            }
        }
-    } else 
-        ret->substrs = NULL;    
+    }
 
-    ret->wrapped        = SAVEPVN(r->wrapped, r->wraplen+1);
-    ret->precomp        = ret->wrapped + (r->precomp - r->wrapped);
-    ret->prelen         = r->prelen;
-    ret->wraplen        = r->wraplen;
+    ret->wrapped        = SAVEPVN(ret->wrapped, ret->wraplen+1);
+    ret->precomp        = ret->wrapped + (ret->precomp - ret->wrapped);
+    ret->paren_names    = hv_dup_inc(ret->paren_names, param);
 
-    ret->mother_re      = NULL;
-    ret->refcnt         = r->refcnt;
-    ret->minlen         = r->minlen;
-    ret->minlenret      = r->minlenret;
-    ret->nparens        = r->nparens;
-    ret->lastparen      = r->lastparen;
-    ret->lastcloseparen = r->lastcloseparen;
-    ret->intflags       = r->intflags;
-    ret->extflags       = r->extflags;
-
-    ret->sublen         = r->sublen;
-
-    ret->engine         = r->engine;
-    
-    ret->paren_names    = hv_dup_inc(r->paren_names, param);
+    if (ret->pprivate)
+       RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
 
     if (RX_MATCH_COPIED(ret))
-       ret->subbeg  = SAVEPVN(r->subbeg, r->sublen);
+       ret->subbeg  = SAVEPVN(ret->subbeg, ret->sublen);
     else
        ret->subbeg = NULL;
 #ifdef PERL_OLD_COPY_ON_WRITE
     ret->saved_copy = NULL;
 #endif
-    
-    ret->pprivate = r->pprivate;
-    if (ret->pprivate) 
-        RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
+
+    ret->mother_re      = NULL;
+    ret->gofs = 0;
+    ret->seen_evals = 0;
     
     ptr_table_store(PL_ptr_table, r, ret);
     return ret;
@@ -9008,7 +9338,7 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
 */
 
 void *
-Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
+Perl_regdupe_internal(pTHX_ REGEXP * const r, CLONE_PARAMS *param)
 {
     dVAR;
     regexp_internal *reti;