This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Typo fix in perlretut by Simon Taylor
[perl5.git] / regcomp.c
index 625479c..da245b1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 */
 
 #ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-#  ifndef PERL_IN_XSUB_RE
-#    define PERL_IN_XSUB_RE
-#  endif
-/* need access to debugger hooks */
-#  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-#    define DEBUGGING
-#  endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-#  define Perl_pregcomp my_regcomp
-#  define Perl_regdump my_regdump
-#  define Perl_regprop my_regprop
-#  define Perl_pregfree my_regfree
-#  define Perl_re_intuit_string my_re_intuit_string
-/* *These* symbols are masked to allow static link. */
-#  define Perl_regnext my_regnext
-#  define Perl_save_re_context my_save_re_context
-#  define Perl_reginitcolors my_reginitcolors
-
-#  define PERL_NO_GET_CONTEXT
+#include "re_top.h"
 #endif
 
 /*
 #endif
 
 #define REG_COMP_C
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+#  include "re_comp.h"
+#else
+#  include "regcomp.h"
+#endif
 
 #ifdef op
 #undef op
@@ -444,7 +426,7 @@ static void clear_re(pTHX_ void *r);
    floating substrings if needed. */
 
 STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
 {
     const STRLEN l = CHR_SVLEN(data->last_found);
     const STRLEN old_l = CHR_SVLEN(*data->longest);
@@ -476,10 +458,11 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
     SvCUR_set(data->last_found, 0);
     {
        SV * const sv = data->last_found;
-       MAGIC * const mg =
-           SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
-       if (mg && mg->mg_len > 0)
-           mg->mg_len = 0;
+       if (SvUTF8(sv) && SvMAGICAL(sv)) {
+           MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+           if (mg)
+               mg->mg_len = 0;
+       }
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
@@ -487,7 +470,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     ANYOF_CLASS_ZERO(cl);
     ANYOF_BITMAP_SETALL(cl);
@@ -514,7 +497,7 @@ S_cl_is_anything(const struct regnode_charclass_class *cl)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -522,7 +505,7 @@ S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 }
 
 STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -571,7 +554,7 @@ S_cl_and(struct regnode_charclass_class *cl,
 /* 'OR' a given class with another one.  Can create false positives */
 /* We assume that cl is not inverted */
 STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
 {
     if (or_with->flags & ANYOF_INVERT) {
        /* We do not use
@@ -806,7 +789,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     /* first pass, loop through and scan words */
     reg_trie_data *trie;
     regnode *cur;
-    const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+    const U32 uniflags = UTF8_ALLOW_DEFAULT;
     STRLEN len = 0;
     UV uvc = 0;
     U16 curword = 0;
@@ -1015,7 +998,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                });
 
            } else {
-               /* Its a dupe. So ignore it. */
+               NOOP;   /* It's a dupe. So ignore it. */
            }
 
         } /* end second pass */
@@ -1221,7 +1204,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 });
 
             } else {
-                /* Its a dupe. So ignore it. */
+               NOOP;  /* Its a dupe. So ignore it. */
             }
 
         } /* end second pass */
@@ -1536,7 +1519,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
        /* Peephole optimizer: */
        DEBUG_OPTIMISE_r({
          SV * const mysv=sv_newmortal();
-         regprop( mysv, scan);
+         regprop(RExC_rx, mysv, scan);
          PerlIO_printf(Perl_debug_log, "%*speep: %s (0x%08"UVXf")\n",
            (int)depth*2, "", SvPV_nolen_const(mysv), PTR2UV(scan));
        });
@@ -1617,7 +1600,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                 char * const s0 = STRING(scan), *s, *t;
                 char * const s1 = s0 + STR_LEN(scan) - 1;
                 char * const s2 = s1 - 4;
-                const char * const t0 = "\xcc\x88\xcc\x81";
+                const char t0[] = "\xcc\x88\xcc\x81";
                 const char * const t1 = t0 + 3;
 
                 for (s = s0 + 2;
@@ -1678,7 +1661,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
            if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
                I32 max1 = 0, min1 = I32_MAX, num = 0;
                struct regnode_charclass_class accum;
-               regnode *startbranch=scan;
+               regnode * const startbranch=scan;
                
                if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
                    scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
@@ -1831,7 +1814,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                         }
 
                         DEBUG_OPTIMISE_r({
-                            regprop( mysv, tail );
+                            regprop(RExC_rx, mysv, tail );
                             PerlIO_printf( Perl_debug_log, "%*s%s%s%s\n",
                                 (int)depth * 2 + 2, "", "Tail node is:", SvPV_nolen_const( mysv ),
                                 (RExC_seen_evals) ? "[EVAL]" : ""
@@ -1868,16 +1851,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                             regnode * const noper_next = regnext( noper );
 
                             DEBUG_OPTIMISE_r({
-                                regprop( mysv, cur);
+                                regprop(RExC_rx, mysv, cur);
                                 PerlIO_printf( Perl_debug_log, "%*s%s",
                                    (int)depth * 2 + 2,"  ", SvPV_nolen_const( mysv ) );
 
-                                regprop( mysv, noper);
+                                regprop(RExC_rx, mysv, noper);
                                 PerlIO_printf( Perl_debug_log, " -> %s",
                                     SvPV_nolen_const(mysv));
 
                                 if ( noper_next ) {
-                                  regprop( mysv, noper_next );
+                                  regprop(RExC_rx, mysv, noper_next );
                                   PerlIO_printf( Perl_debug_log,"\t=> %s\t",
                                     SvPV_nolen_const(mysv));
                                 }
@@ -1895,20 +1878,20 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                                 } else {
                                     DEBUG_OPTIMISE_r(
                                         if (!last ) {
-                                            regprop( mysv, first);
+                                            regprop(RExC_rx, mysv, first);
                                             PerlIO_printf( Perl_debug_log, "%*s%s",
                                               (int)depth * 2 + 2, "F:", SvPV_nolen_const( mysv ) );
-                                            regprop( mysv, NEXTOPER(first) );
+                                            regprop(RExC_rx, mysv, NEXTOPER(first) );
                                             PerlIO_printf( Perl_debug_log, " -> %s\n",
                                               SvPV_nolen_const( mysv ) );
                                         }
                                     );
                                     last = cur;
                                     DEBUG_OPTIMISE_r({
-                                        regprop( mysv, cur);
+                                        regprop(RExC_rx, mysv, cur);
                                         PerlIO_printf( Perl_debug_log, "%*s%s",
                                           (int)depth * 2 + 2, "N:", SvPV_nolen_const( mysv ) );
-                                        regprop( mysv, noper );
+                                        regprop(RExC_rx, mysv, noper );
                                         PerlIO_printf( Perl_debug_log, " -> %s\n",
                                           SvPV_nolen_const( mysv ) );
                                     });
@@ -1936,7 +1919,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                             }
                         }
                         DEBUG_OPTIMISE_r({
-                            regprop( mysv, cur);
+                            regprop(RExC_rx, mysv, cur);
                             PerlIO_printf( Perl_debug_log,
                               "%*s%s\t(0x%p,0x%p,0x%p)\n", (int)depth * 2 + 2,
                               "  ", SvPV_nolen_const( mysv ), first, last, cur);
@@ -1978,6 +1961,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        ? I32_MAX : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+               if (UTF)
+                   SvUTF8_on(data->last_found);
                {
                    SV * const sv = data->last_found;
                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
@@ -1986,8 +1971,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        mg->mg_len += utf8_length((U8*)STRING(scan),
                                                  (U8*)STRING(scan)+STR_LEN(scan));
                }
-               if (UTF)
-                   SvUTF8_on(data->last_found);
                data->last_end = data->pos_min + l;
                data->pos_min += l; /* As in the first entry. */
                data->flags &= ~SF_BEFORE_EOL;
@@ -2027,15 +2010,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
            UV uc = *((U8*)STRING(scan));
 
            /* Search for fixed substrings supports EXACT only. */
-           if (flags & SCF_DO_SUBSTR)
+           if (flags & SCF_DO_SUBSTR) {
+               assert(data);
                scan_commit(pRExC_state, data);
+           }
            if (UTF) {
                const U8 * const s = (U8 *)STRING(scan);
                l = utf8_length(s, s + l);
                uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
-           if (data && (flags & SCF_DO_SUBSTR))
+           if (flags & SCF_DO_SUBSTR)
                data->pos_min += l;
            if (flags & SCF_DO_STCLASS_AND) {
                /* Check whether it is compatible with what we know already! */
@@ -2071,7 +2056,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
        else if (strchr((const char*)PL_varies,OP(scan))) {
            I32 mincount, maxcount, minnext, deltanext, fl = 0;
            I32 f = flags, pos_before = 0;
-           regnode *oscan = scan;
+           regnode * const oscan = scan;
            struct regnode_charclass_class this_class;
            struct regnode_charclass_class *oclass = NULL;
            I32 next_is_eval = 0;
@@ -2205,7 +2190,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                      && !deltanext && minnext == 1 ) {
                    /* Try to optimize to CURLYN.  */
                    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
-                   regnode *nxt1 = nxt;
+                   regnode * const nxt1 = nxt;
 #ifdef DEBUGGING
                    regnode *nxt2;
 #endif
@@ -2334,7 +2319,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                        I32 b = pos_before >= data->last_start_min
                            ? pos_before : data->last_start_min;
                        STRLEN l;
-                       const char *s = SvPV_const(data->last_found, l);
+                       const char * const s = SvPV_const(data->last_found, l);
                        I32 old = b - data->last_start_min;
 #endif
 
@@ -2383,7 +2368,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                            the group.  */
                        scan_commit(pRExC_state,data);
                        if (mincount && last_str) {
-                           sv_setsv(data->last_found, last_str);
+                           SV * const sv = data->last_found;
+                           MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+                               mg_find(sv, PERL_MAGIC_utf8) : NULL;
+
+                           if (mg)
+                               mg->mg_len = -1;
+                           sv_setsv(sv, last_str);
                            data->last_end = data->pos_min;
                            data->last_start_min =
                                data->pos_min - CHR_SVLEN(last_str);
@@ -2744,6 +2735,7 @@ S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
     return RExC_rx->data->count - n;
 }
 
+#ifndef PERL_IN_XSUB_RE
 void
 Perl_reginitcolors(pTHX)
 {
@@ -2769,7 +2761,7 @@ Perl_reginitcolors(pTHX)
     }
     PL_colorset = 1;
 }
-
+#endif
 
 /*
  - pregcomp - compile a regular expression into internal code
@@ -2946,7 +2938,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
       again:
        if (PL_regkind[(U8)OP(first)] == EXACT) {
            if (OP(first) == EXACT)
-               ;       /* Empty, get anchored substr later. */
+               NOOP;   /* Empty, get anchored substr later. */
            else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
                r->regstclass = first;
        }
@@ -3101,9 +3093,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
                       struct regnode_charclass_class);
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
-           PL_regdata = r->data; /* for regprop() */
            DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
-                     regprop(sv, (regnode*)data.start_class);
+                     regprop(r, sv, (regnode*)data.start_class);
                      PerlIO_printf(Perl_debug_log,
                                    "synthetic stclass \"%s\".\n",
                                    SvPVX_const(sv));});
@@ -3158,7 +3149,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
            DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
-                     regprop(sv, (regnode*)data.start_class);
+                     regprop(r, sv, (regnode*)data.start_class);
                      PerlIO_printf(Perl_debug_log,
                                    "synthetic stclass \"%s\".\n",
                                    SvPVX_const(sv));});
@@ -3176,7 +3167,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->reganch |= ROPT_CANY_SEEN;
     Newxz(r->startp, RExC_npar, I32);
     Newxz(r->endp, RExC_npar, I32);
-    PL_regdata = r->data; /* for regprop() */
     DEBUG_COMPILE_r(regdump(r));
     return(r);
 }
@@ -3317,8 +3307,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                        FAIL("Eval-group not allowed at runtime, use re 'eval'");
                    if (PL_tainting && PL_tainted)
                        FAIL("Eval-group in insecure regular expression");
+#if PERL_VERSION > 8
                    if (IN_PERL_COMPILETIME)
                        PL_cv_has_eval = 1;
+#endif
                }
 
                nextchar(pRExC_state);
@@ -3659,7 +3651,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
        *flagp |= flags&SIMPLE;
     }
 
-    return(ret);
+    return ret;
 }
 
 /*
@@ -4268,7 +4260,7 @@ tryagain:
                    if (UTF8_IS_START(*p) && UTF) {
                        STRLEN numlen;
                        ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
-                                              &numlen, 0);
+                                              &numlen, UTF8_ALLOW_DEFAULT);
                        p += numlen;
                    }
                    else
@@ -4449,7 +4441,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
        /* I smell either [: or [= or [. -- POSIX has been here, right? */
        POSIXCC(UCHARAT(RExC_parse))) {
        const char c = UCHARAT(RExC_parse);
-       char* s = RExC_parse++;
+       char* const s = RExC_parse++;
        
        while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
            RExC_parse++;
@@ -4661,8 +4653,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            ANYOF_FLAGS(ret) |= ANYOF_INVERT;
     }
 
-    if (SIZE_ONLY)
+    if (SIZE_ONLY) {
        RExC_size += ANYOF_SKIP;
+       listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
+    }
     else {
        RExC_emit += ANYOF_SKIP;
        if (FOLD)
@@ -4693,7 +4687,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        if (UTF) {
            value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
-                                  &numlen, 0);
+                                  &numlen, UTF8_ALLOW_DEFAULT);
            RExC_parse += numlen;
        }
        else
@@ -4705,7 +4699,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            if (UTF) {
                value = utf8n_to_uvchr((U8*)RExC_parse,
                                   RExC_end - RExC_parse,
-                                  &numlen, 0);
+                                  &numlen, UTF8_ALLOW_DEFAULT);
                RExC_parse += numlen;
            }
            else
@@ -4753,12 +4747,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                              n--;
                         }
                    }
-                   if (value == 'p')
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "+utf8::%.*s\n", (int)n, RExC_parse);
-                   else
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "!utf8::%.*s\n", (int)n, RExC_parse);
+                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+                       (value=='p' ? '+' : '!'), (int)n, RExC_parse);
                }
                RExC_parse = e + 1;
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
@@ -4827,14 +4817,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            if (range) {
                if (!SIZE_ONLY) {
                    if (ckWARN(WARN_REGEXP)) {
-                       int w =
+                       const int w =
                            RExC_parse >= rangebegin ?
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (prevvalue < 256) {
                        ANYOF_BITMAP_SET(ret, prevvalue);
@@ -5224,10 +5212,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
        if (range) {
            if (prevvalue > (IV)value) /* b-a */ {
-               Simple_vFAIL4("Invalid [] range \"%*.*s\"",
-                             RExC_parse - rangebegin,
-                             RExC_parse - rangebegin,
-                             rangebegin);
+               const int w = RExC_parse - rangebegin;
+               Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
                range = 0; /* not a valid range */
            }
        }
@@ -5240,14 +5226,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                /* a bad range like \w-, [:word:]- ? */
                if (namedclass > OOB_NAMEDCLASS) {
                    if (ckWARN(WARN_REGEXP)) {
-                       int w =
+                       const int w =
                            RExC_parse >= rangebegin ?
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (!SIZE_ONLY)
                        ANYOF_BITMAP_SET(ret, '-');
@@ -5391,7 +5375,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     }
 
     if (!SIZE_ONLY) {
-       AV *av = newAV();
+       AV * const av = newAV();
        SV *rv;
 
        /* The 0th element stores the character class description
@@ -5415,7 +5399,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 STATIC char*
 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 {
-    char* retval = RExC_parse++;
+    char* const retval = RExC_parse++;
 
     for (;;) {
        if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
@@ -5590,8 +5574,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 /*
 - regtail - set the next-pointer at the end of a node chain of p to val.
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
 {
     dVAR;
     register regnode *scan;
@@ -5619,8 +5604,9 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 /*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
 {
     dVAR;
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
@@ -5662,13 +5648,13 @@ S_regcurly(register const char *s)
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
 void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
 {
 #ifdef DEBUGGING
     dVAR;
     SV * const sv = sv_newmortal();
 
-    (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
+    (void)dumpuntil(r, r->program, r->program + 1, NULL, sv, 0);
 
     /* Header fields of interest. */
     if (r->anchored_substr)
@@ -5720,7 +5706,7 @@ Perl_regdump(pTHX_ regexp *r)
        PerlIO_printf(Perl_debug_log, ") ");
 
     if (r->regstclass) {
-       regprop(sv, r->regstclass);
+       regprop(r, sv, r->regstclass);
        PerlIO_printf(Perl_debug_log, "stclass \"%s\" ", SvPVX_const(sv));
     }
     if (r->reganch & ROPT_ANCH) {
@@ -5758,6 +5744,7 @@ Perl_regdump(pTHX_ regexp *r)
         });
     }
 #else
+    PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(r);
 #endif /* DEBUGGING */
 }
@@ -5766,7 +5753,7 @@ Perl_regdump(pTHX_ regexp *r)
 - regprop - printable representation of opcode
 */
 void
-Perl_regprop(pTHX_ SV *sv, const regnode *o)
+Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 {
 #ifdef DEBUGGING
     dVAR;
@@ -5798,17 +5785,10 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
                       PL_colors[0],
                       len, s,
                       PL_colors[1]);
-    } else if (k == TRIE) {/*
-       this isn't always safe, as Pl_regdata may not be for this regex yet
-       (depending on where its called from) so its being moved to dumpuntil
-       I32 n = ARG(o);
-       reg_trie_data *trie=(reg_trie_data*)PL_regdata->data[n];
-       Perl_sv_catpvf(aTHX_ sv, " (W:%d L:%d C:%d S:%d)",
-                      trie->wordcount,
-                      trie->charcount,
-                      trie->uniquecharcount,
-                      trie->laststate);
-       */
+    } else if (k == TRIE) {
+       NOOP;
+       /* print the details od the trie in dumpuntil instead, as
+        * prog->data isn't available here */
     } else if (k == CURLY) {
        if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
            Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
@@ -5883,7 +5863,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
        }
 
        if (o->flags & ANYOF_CLASS)
-           for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
+           for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
                if (ANYOF_CLASS_TEST(o,i))
                    sv_catpv(sv, anyofs[i]);
 
@@ -5894,7 +5874,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
 
        {
            SV *lv;
-           SV * const sw = regclass_swash(o, FALSE, &lv, 0);
+           SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
        
            if (lv) {
                if (sw) {
@@ -5961,6 +5941,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
        Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
 #else
+    PERL_UNUSED_CONTEXT;
     PERL_UNUSED_ARG(sv);
     PERL_UNUSED_ARG(o);
 #endif /* DEBUGGING */
@@ -5971,6 +5952,8 @@ Perl_re_intuit_string(pTHX_ regexp *prog)
 {                              /* Assume that RE_INTUIT is set */
     dVAR;
     GET_RE_DEBUG_FLAGS_DECL;
+    PERL_UNUSED_CONTEXT;
+
     DEBUG_COMPILE_r(
        {
            const char * const s = SvPV_nolen_const(prog->check_substr
@@ -5996,9 +5979,8 @@ Perl_pregfree(pTHX_ struct regexp *r)
     dVAR;
 #ifdef DEBUGGING
     SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
-    SV * const re_debug_flags=get_sv(RE_DEBUG_FLAGS,0);
 #endif
-
+    GET_RE_DEBUG_FLAGS_DECL;
 
     if (!r || (--r->refcnt > 0))
        return;
@@ -6109,6 +6091,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
     Safefree(r);
 }
 
+#ifndef PERL_IN_XSUB_RE
 /*
  - regnext - dig the "next" pointer out of a node
  */
@@ -6127,6 +6110,7 @@ Perl_regnext(pTHX_ register regnode *p)
 
     return(p+offset);
 }
+#endif
 
 STATIC void    
 S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
@@ -6164,59 +6148,34 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
 
 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
 
+#ifndef PERL_IN_XSUB_RE
 void
 Perl_save_re_context(pTHX)
 {
     dVAR;
-    SAVEI32(PL_reg_flags);             /* from regexec.c */
-    SAVEPPTR(PL_bostr);
-    SAVEPPTR(PL_reginput);             /* String-input pointer. */
-    SAVEPPTR(PL_regbol);               /* Beginning of input, for ^ check. */
-    SAVEPPTR(PL_regeol);               /* End of input, for $ check. */
-    SAVEVPTR(PL_regstartp);            /* Pointer to startp array. */
-    SAVEVPTR(PL_regendp);              /* Ditto for endp. */
-    SAVEVPTR(PL_reglastparen);         /* Similarly for lastparen. */
-    SAVEVPTR(PL_reglastcloseparen);    /* Similarly for lastcloseparen. */
-    SAVEPPTR(PL_regtill);              /* How far we are required to go. */
-    SAVEGENERICPV(PL_reg_start_tmp);           /* from regexec.c */
+
+    struct re_save_state *state;
+
+    SAVEVPTR(PL_curcop);
+    SSGROW(SAVESTACK_ALLOC_FOR_RE_SAVE_STATE + 1);
+
+    state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
+    PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
+    SSPUSHINT(SAVEt_RE_STATE);
+
+    Copy(&PL_reg_state, state, 1, struct re_save_state);
+
     PL_reg_start_tmp = 0;
-    SAVEI32(PL_reg_start_tmpl);                /* from regexec.c */
     PL_reg_start_tmpl = 0;
-    SAVEVPTR(PL_regdata);
-    SAVEI32(PL_reg_eval_set);          /* from regexec.c */
-    SAVEI32(PL_regnarrate);            /* from regexec.c */
-    SAVEVPTR(PL_regprogram);           /* from regexec.c */
-    SAVEINT(PL_regindent);             /* from regexec.c */
-    SAVEVPTR(PL_regcc);                        /* from regexec.c */
-    SAVEVPTR(PL_curcop);
-    SAVEVPTR(PL_reg_call_cc);          /* from regexec.c */
-    SAVEVPTR(PL_reg_re);               /* from regexec.c */
-    SAVEPPTR(PL_reg_ganch);            /* from regexec.c */
-    SAVESPTR(PL_reg_sv);               /* from regexec.c */
-    SAVEBOOL(PL_reg_match_utf8);       /* from regexec.c */
-    SAVEVPTR(PL_reg_magic);            /* from regexec.c */
-    SAVEI32(PL_reg_oldpos);                    /* from regexec.c */
-    SAVEVPTR(PL_reg_oldcurpm);         /* from regexec.c */
-    SAVEVPTR(PL_reg_curpm);            /* from regexec.c */
-    SAVEPPTR(PL_reg_oldsaved);         /* old saved substr during match */
     PL_reg_oldsaved = NULL;
-    SAVEI32(PL_reg_oldsavedlen);       /* old length of saved substr during match */
     PL_reg_oldsavedlen = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
-    SAVESPTR(PL_nrs);
-    PL_nrs = NULL;
-#endif
-    SAVEI32(PL_reg_maxiter);           /* max wait until caching pos */
     PL_reg_maxiter = 0;
-    SAVEI32(PL_reg_leftiter);          /* wait until caching pos */
     PL_reg_leftiter = 0;
-    SAVEGENERICPV(PL_reg_poscache);    /* cache of pos of WHILEM */
     PL_reg_poscache = NULL;
-    SAVEI32(PL_reg_poscache_size);     /* size of pos cache of WHILEM */
     PL_reg_poscache_size = 0;
-    SAVEPPTR(PL_regprecomp);           /* uncompiled string. */
-    SAVEI32(PL_regnpar);               /* () count. */
-    SAVEI32(PL_regsize);               /* from regexec.c */
+#ifdef PERL_OLD_COPY_ON_WRITE
+    PL_nrs = NULL;
+#endif
 
     /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
     if (PL_curpm) {
@@ -6224,23 +6183,25 @@ Perl_save_re_context(pTHX)
        if (rx) {
            U32 i;
            for (i = 1; i <= rx->nparens; i++) {
-               GV *gv;
                char digits[TYPE_CHARS(long)];
+#ifdef USE_SNPRINTF
+               const STRLEN len = snprintf(digits, sizeof(digits), "%lu", (long)i);
+#else
                const STRLEN len = my_sprintf(digits, "%lu", (long)i);
+#endif /* #ifdef USE_SNPRINTF */
                GV *const *const gvp
                    = (GV**)hv_fetch(PL_defstash, digits, len, 0);
 
-               if (gvp && SvTYPE(gv = *gvp) == SVt_PVGV && GvSV(gv)) {
-                   save_scalar(gv);
+               if (gvp) {
+                   GV * const gv = *gvp;
+                   if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
+                       save_scalar(gv);
                }
            }
        }
     }
-
-#ifdef DEBUGGING
-    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */
-#endif
 }
+#endif
 
 static void
 clear_re(pTHX_ void *r)
@@ -6263,12 +6224,13 @@ S_put_byte(pTHX_ SV *sv, int c)
 }
 
 
-STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
+           const regnode *last, SV* sv, I32 l)
 {
     dVAR;
     register U8 op = EXACT;    /* Arbitrary non-END op. */
-    register regnode *next;
+    register const regnode *next;
 
     while (op != END && (!last || node < last)) {
        /* While that wasn't END last time... */
@@ -6277,11 +6239,11 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
        op = OP(node);
        if (op == CLOSE)
            l--;        
-       next = regnext(node);
+       next = regnext((regnode *)node);
        /* Where, what. */
        if (OP(node) == OPTIMIZED)
            goto after_print;
-       regprop(sv, node);
+       regprop(r, sv, node);
        PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
                      (int)(2*l + 1), "", SvPVX_const(sv));
        if (next == NULL)               /* Next ptr. */
@@ -6291,19 +6253,19 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
        (void)PerlIO_putc(Perl_debug_log, '\n');
       after_print:
        if (PL_regkind[(U8)op] == BRANCHJ) {
-           register regnode *nnode = (OP(next) == LONGJMP
-                                      ? regnext(next)
-                                      : next);
+           register const regnode *nnode = (OP(next) == LONGJMP
+                                            ? regnext((regnode *)next)
+                                            : next);
            if (last && nnode > last)
                nnode = last;
-           node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
        }
        else if (PL_regkind[(U8)op] == BRANCH) {
-           node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(node), next, sv, l + 1);
        }
        else if ( PL_regkind[(U8)op]  == TRIE ) {
             const I32 n = ARG(node);
-           const reg_trie_data * const trie = (reg_trie_data*)PL_regdata->data[n];
+           const reg_trie_data * const trie = (reg_trie_data*)r->data->data[n];
             const I32 arry_len = av_len(trie->words)+1;
            I32 word_idx;
            PerlIO_printf(Perl_debug_log,
@@ -6317,7 +6279,7 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
                       node->flags ? " EVAL mode" : "");
 
            for (word_idx=0; word_idx < arry_len; word_idx++) {
-               SV **elem_ptr=av_fetch(trie->words,word_idx,0);
+               SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
                if (elem_ptr) {
                    PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n",
                       (int)(2*(l+4)), "",
@@ -6340,15 +6302,15 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
 
        }
        else if ( op == CURLY) {   /* "next" might be very big: optimizer */
-           node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+           node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
                             NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
        }
        else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
-           node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
+           node = dumpuntil(r, start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
                             next, sv, l + 1);
        }
        else if ( op == PLUS || op == STAR) {
-           node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
+           node = dumpuntil(r, start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
        }
        else if (op == ANYOF) {
            /* arglen 1 + class block */