This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Revert 27869 at Jarkko's request.
[perl5.git] / regcomp.c
index 982fc7d..0e53589 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -444,7 +444,7 @@ static void clear_re(pTHX_ void *r);
    floating substrings if needed. */
 
 STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
 {
     const STRLEN l = CHR_SVLEN(data->last_found);
     const STRLEN old_l = CHR_SVLEN(*data->longest);
@@ -476,10 +476,11 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
     SvCUR_set(data->last_found, 0);
     {
        SV * const sv = data->last_found;
-       MAGIC * const mg =
-           SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
-       if (mg)
-           mg->mg_len = 0;
+       if (SvUTF8(sv) && SvMAGICAL(sv)) {
+           MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+           if (mg)
+               mg->mg_len = 0;
+       }
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
@@ -487,7 +488,7 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     ANYOF_CLASS_ZERO(cl);
     ANYOF_BITMAP_SETALL(cl);
@@ -514,7 +515,7 @@ S_cl_is_anything(const struct regnode_charclass_class *cl)
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -522,7 +523,7 @@ S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 }
 
 STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -571,7 +572,7 @@ S_cl_and(struct regnode_charclass_class *cl,
 /* 'OR' a given class with another one.  Can create false positives */
 /* We assume that cl is not inverted */
 STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
 {
     if (or_with->flags & ANYOF_INVERT) {
        /* We do not use
@@ -1536,7 +1537,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
        /* Peephole optimizer: */
        DEBUG_OPTIMISE_r({
          SV * const mysv=sv_newmortal();
-         regprop( mysv, scan);
+         regprop(RExC_rx, mysv, scan);
          PerlIO_printf(Perl_debug_log, "%*speep: %s (0x%08"UVXf")\n",
            (int)depth*2, "", SvPV_nolen_const(mysv), PTR2UV(scan));
        });
@@ -1831,7 +1832,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                         }
 
                         DEBUG_OPTIMISE_r({
-                            regprop( mysv, tail );
+                            regprop(RExC_rx, mysv, tail );
                             PerlIO_printf( Perl_debug_log, "%*s%s%s%s\n",
                                 (int)depth * 2 + 2, "", "Tail node is:", SvPV_nolen_const( mysv ),
                                 (RExC_seen_evals) ? "[EVAL]" : ""
@@ -1868,16 +1869,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                             regnode * const noper_next = regnext( noper );
 
                             DEBUG_OPTIMISE_r({
-                                regprop( mysv, cur);
+                                regprop(RExC_rx, mysv, cur);
                                 PerlIO_printf( Perl_debug_log, "%*s%s",
                                    (int)depth * 2 + 2,"  ", SvPV_nolen_const( mysv ) );
 
-                                regprop( mysv, noper);
+                                regprop(RExC_rx, mysv, noper);
                                 PerlIO_printf( Perl_debug_log, " -> %s",
                                     SvPV_nolen_const(mysv));
 
                                 if ( noper_next ) {
-                                  regprop( mysv, noper_next );
+                                  regprop(RExC_rx, mysv, noper_next );
                                   PerlIO_printf( Perl_debug_log,"\t=> %s\t",
                                     SvPV_nolen_const(mysv));
                                 }
@@ -1895,20 +1896,20 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                                 } else {
                                     DEBUG_OPTIMISE_r(
                                         if (!last ) {
-                                            regprop( mysv, first);
+                                            regprop(RExC_rx, mysv, first);
                                             PerlIO_printf( Perl_debug_log, "%*s%s",
                                               (int)depth * 2 + 2, "F:", SvPV_nolen_const( mysv ) );
-                                            regprop( mysv, NEXTOPER(first) );
+                                            regprop(RExC_rx, mysv, NEXTOPER(first) );
                                             PerlIO_printf( Perl_debug_log, " -> %s\n",
                                               SvPV_nolen_const( mysv ) );
                                         }
                                     );
                                     last = cur;
                                     DEBUG_OPTIMISE_r({
-                                        regprop( mysv, cur);
+                                        regprop(RExC_rx, mysv, cur);
                                         PerlIO_printf( Perl_debug_log, "%*s%s",
                                           (int)depth * 2 + 2, "N:", SvPV_nolen_const( mysv ) );
-                                        regprop( mysv, noper );
+                                        regprop(RExC_rx, mysv, noper );
                                         PerlIO_printf( Perl_debug_log, " -> %s\n",
                                           SvPV_nolen_const( mysv ) );
                                     });
@@ -1936,7 +1937,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
                             }
                         }
                         DEBUG_OPTIMISE_r({
-                            regprop( mysv, cur);
+                            regprop(RExC_rx, mysv, cur);
                             PerlIO_printf( Perl_debug_log,
                               "%*s%s\t(0x%p,0x%p,0x%p)\n", (int)depth * 2 + 2,
                               "  ", SvPV_nolen_const( mysv ), first, last, cur);
@@ -2027,15 +2028,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap,
            UV uc = *((U8*)STRING(scan));
 
            /* Search for fixed substrings supports EXACT only. */
-           if (flags & SCF_DO_SUBSTR)
+           if (flags & SCF_DO_SUBSTR) {
+               assert(data);
                scan_commit(pRExC_state, data);
+           }
            if (UTF) {
                const U8 * const s = (U8 *)STRING(scan);
                l = utf8_length(s, s + l);
                uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
-           if (data && (flags & SCF_DO_SUBSTR))
+           if (flags & SCF_DO_SUBSTR)
                data->pos_min += l;
            if (flags & SCF_DO_STCLASS_AND) {
                /* Check whether it is compatible with what we know already! */
@@ -3108,7 +3111,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
            DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
-                     regprop(sv, (regnode*)data.start_class);
+                     regprop(r, sv, (regnode*)data.start_class);
                      PerlIO_printf(Perl_debug_log,
                                    "synthetic stclass \"%s\".\n",
                                    SvPVX_const(sv));});
@@ -3163,7 +3166,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
            DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
-                     regprop(sv, (regnode*)data.start_class);
+                     regprop(r, sv, (regnode*)data.start_class);
                      PerlIO_printf(Perl_debug_log,
                                    "synthetic stclass \"%s\".\n",
                                    SvPVX_const(sv));});
@@ -4453,7 +4456,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
        /* I smell either [: or [= or [. -- POSIX has been here, right? */
        POSIXCC(UCHARAT(RExC_parse))) {
        const char c = UCHARAT(RExC_parse);
-       char* s = RExC_parse++;
+       char* const s = RExC_parse++;
        
        while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
            RExC_parse++;
@@ -4665,8 +4668,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            ANYOF_FLAGS(ret) |= ANYOF_INVERT;
     }
 
-    if (SIZE_ONLY)
+    if (SIZE_ONLY) {
        RExC_size += ANYOF_SKIP;
+       listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
+    }
     else {
        RExC_emit += ANYOF_SKIP;
        if (FOLD)
@@ -4757,12 +4762,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                              n--;
                         }
                    }
-                   if (value == 'p')
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "+utf8::%.*s\n", (int)n, RExC_parse);
-                   else
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "!utf8::%.*s\n", (int)n, RExC_parse);
+                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+                       (value=='p' ? '+' : '!'), (int)n, RExC_parse);
                }
                RExC_parse = e + 1;
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
@@ -4831,14 +4832,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            if (range) {
                if (!SIZE_ONLY) {
                    if (ckWARN(WARN_REGEXP)) {
-                       int w =
+                       const int w =
                            RExC_parse >= rangebegin ?
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (prevvalue < 256) {
                        ANYOF_BITMAP_SET(ret, prevvalue);
@@ -5247,9 +5246,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (!SIZE_ONLY)
                        ANYOF_BITMAP_SET(ret, '-');
@@ -5393,7 +5390,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     }
 
     if (!SIZE_ONLY) {
-       AV *av = newAV();
+       AV * const av = newAV();
        SV *rv;
 
        /* The 0th element stores the character class description
@@ -5417,7 +5414,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 STATIC char*
 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 {
-    char* retval = RExC_parse++;
+    char* const retval = RExC_parse++;
 
     for (;;) {
        if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
@@ -5592,8 +5589,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 /*
 - regtail - set the next-pointer at the end of a node chain of p to val.
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
 {
     dVAR;
     register regnode *scan;
@@ -5621,8 +5619,9 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 /*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode *val)
 {
     dVAR;
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
@@ -5664,7 +5663,7 @@ S_regcurly(register const char *s)
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
 void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
 {
 #ifdef DEBUGGING
     dVAR;
@@ -5722,7 +5721,7 @@ Perl_regdump(pTHX_ regexp *r)
        PerlIO_printf(Perl_debug_log, ") ");
 
     if (r->regstclass) {
-       regprop(sv, r->regstclass);
+       regprop(r, sv, r->regstclass);
        PerlIO_printf(Perl_debug_log, "stclass \"%s\" ", SvPVX_const(sv));
     }
     if (r->reganch & ROPT_ANCH) {
@@ -5769,7 +5768,7 @@ Perl_regdump(pTHX_ regexp *r)
 - regprop - printable representation of opcode
 */
 void
-Perl_regprop(pTHX_ SV *sv, const regnode *o)
+Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 {
 #ifdef DEBUGGING
     dVAR;
@@ -5890,7 +5889,7 @@ Perl_regprop(pTHX_ SV *sv, const regnode *o)
 
        {
            SV *lv;
-           SV * const sw = regclass_swash(o, FALSE, &lv, 0);
+           SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
        
            if (lv) {
                if (sw) {
@@ -6177,43 +6176,7 @@ Perl_save_re_context(pTHX)
     PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
     SSPUSHINT(SAVEt_RE_STATE);
 
-    state->re_state_reg_flags = PL_reg_flags;
-    state->re_state_bostr = PL_bostr;
-    state->re_state_reginput = PL_reginput;
-    state->re_state_regbol = PL_regbol;
-    state->re_state_regeol = PL_regeol;
-    state->re_state_regstartp = PL_regstartp;
-    state->re_state_regendp = PL_regendp;
-    state->re_state_reglastparen = PL_reglastparen;
-    state->re_state_reglastcloseparen = PL_reglastcloseparen;
-    state->re_state_regtill = PL_regtill;
-    state->re_state_reg_start_tmp = PL_reg_start_tmp;
-    state->re_state_reg_start_tmpl = PL_reg_start_tmpl;
-    state->re_state_reg_eval_set = PL_reg_eval_set;
-    state->re_state_regnarrate = PL_regnarrate;
-    state->re_state_regindent = PL_regindent;
-    state->re_state_reg_call_cc = PL_reg_call_cc;
-    state->re_state_reg_re = PL_reg_re;
-    state->re_state_reg_ganch = PL_reg_ganch;
-    state->re_state_reg_sv = PL_reg_sv;
-    state->re_state_reg_match_utf8 = PL_reg_match_utf8;
-    state->re_state_reg_magic = PL_reg_magic;
-    state->re_state_reg_oldpos = PL_reg_oldpos;
-    state->re_state_reg_oldcurpm = PL_reg_oldcurpm;
-    state->re_state_reg_curpm = PL_reg_curpm;
-    state->re_state_reg_oldsaved = PL_reg_oldsaved;
-    state->re_state_reg_oldsavedlen = PL_reg_oldsavedlen;
-    state->re_state_reg_maxiter = PL_reg_maxiter;
-    state->re_state_reg_leftiter = PL_reg_leftiter;
-    state->re_state_reg_poscache = PL_reg_poscache;
-    state->re_state_reg_poscache_size = PL_reg_poscache_size;
-    state->re_state_regsize = PL_regsize;
-#ifdef DEBUGGING
-    state->re_state_reg_starttry = PL_reg_starttry;
-#endif
-#ifdef PERL_OLD_COPY_ON_WRITE
-    state->re_state_nrs = PL_nrs;
-#endif
+    Copy(&PL_reg_state, state, 1, struct re_save_state);
 
     PL_reg_start_tmp = 0;
     PL_reg_start_tmpl = 0;
@@ -6246,10 +6209,6 @@ Perl_save_re_context(pTHX)
            }
        }
     }
-
-#ifdef DEBUGGING
-    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */
-#endif
 }
 
 static void
@@ -6273,13 +6232,13 @@ S_put_byte(pTHX_ SV *sv, int c)
 }
 
 
-STATIC regnode *
-S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last,
-    SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
+           const regnode *last, SV* sv, I32 l)
 {
     dVAR;
     register U8 op = EXACT;    /* Arbitrary non-END op. */
-    register regnode *next;
+    register const regnode *next;
 
     while (op != END && (!last || node < last)) {
        /* While that wasn't END last time... */
@@ -6288,11 +6247,11 @@ S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last,
        op = OP(node);
        if (op == CLOSE)
            l--;        
-       next = regnext(node);
+       next = regnext((regnode *)node);
        /* Where, what. */
        if (OP(node) == OPTIMIZED)
            goto after_print;
-       regprop(sv, node);
+       regprop(r, sv, node);
        PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
                      (int)(2*l + 1), "", SvPVX_const(sv));
        if (next == NULL)               /* Next ptr. */
@@ -6302,9 +6261,9 @@ S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last,
        (void)PerlIO_putc(Perl_debug_log, '\n');
       after_print:
        if (PL_regkind[(U8)op] == BRANCHJ) {
-           register regnode *nnode = (OP(next) == LONGJMP
-                                      ? regnext(next)
-                                      : next);
+           register const regnode *nnode = (OP(next) == LONGJMP
+                                            ? regnext((regnode *)next)
+                                            : next);
            if (last && nnode > last)
                nnode = last;
            node = dumpuntil(r, start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
@@ -6328,7 +6287,7 @@ S_dumpuntil(pTHX_ regexp *r, regnode *start, regnode *node, regnode *last,
                       node->flags ? " EVAL mode" : "");
 
            for (word_idx=0; word_idx < arry_len; word_idx++) {
-               SV **elem_ptr=av_fetch(trie->words,word_idx,0);
+               SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
                if (elem_ptr) {
                    PerlIO_printf(Perl_debug_log, "%*s<%s%s%s>\n",
                       (int)(2*(l+4)), "",