This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unify GOSTART and GOSUB
authorYves Orton <demerphq@gmail.com>
Sat, 5 Mar 2016 21:04:28 +0000 (22:04 +0100)
committerYves Orton <demerphq@gmail.com>
Sun, 6 Mar 2016 13:06:08 +0000 (14:06 +0100)
GOSTART is a special case of GOSUB, we can remove a lot of offset twiddling,
and other special casing by unifying them, at pretty much no cost.

GOSUB has 2 arguments, ARG() and ARG2L(), which are interpreted as
a U32 and an I32 respectively. ARG() holds the "parno" we will recurse
into. ARG2L() holds a signed offset to the relevant start node for the
recursion.

Prior to this patch the argument to GOSUB would always be >=, and unlike
other parts of our logic we would not use 0 to represent "start/end" of
pattern, as GOSTART would be used for "recurse to beginning of pattern",
after this patch we use 0 to represent "start/end", and a lot of
complexity "goes away" along with GOSTART regops.

pod/perldebguts.pod
regcomp.c
regcomp.h
regcomp.sym
regexec.c
regexp.h
regnodes.h

index 5024d98..c463ff0 100644 (file)
@@ -746,7 +746,6 @@ will be lost.
 
  # Regex Subroutines
  GOSUB           num/ofs 2L recurse to paren arg1 at (signed) ofs arg2
- GOSTART         no         recurse to start of pattern
 
  # Special conditionals
  NGROUPP         no-sv 1    Whether the group matched.
index f8f4b91..b789ca3 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -168,7 +168,7 @@ struct RExC_state_t {
     I32                seen_zerolen;
     regnode    **open_parens;          /* pointers to open parens */
     regnode    **close_parens;         /* pointers to close parens */
-    regnode    *end_op;                        /* END node in program */
+    regnode     *end_op;                /* END node in program */
     I32                utf8;           /* whether the pattern is utf8 or not */
     I32                orig_utf8;      /* whether the pattern was originally in utf8 */
                                /* XXX use this for future optimisation of case
@@ -179,7 +179,7 @@ struct RExC_state_t {
     HV         *paren_names;           /* Paren names */
 
     regnode    **recurse;              /* Recurse regops */
-    I32                recurse_count;          /* Number of recurse regops */
+    I32                recurse_count;                /* Number of recurse regops we have generated */
     U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                            through */
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
@@ -929,9 +929,6 @@ static const scan_data_t zero_scan_data =
             if (RExC_seen & REG_UNFOLDED_MULTI_SEEN)                        \
                 PerlIO_printf(Perl_debug_log,"REG_UNFOLDED_MULTI_SEEN ");   \
                                                                             \
-            if (RExC_seen & REG_GOSTART_SEEN)                               \
-                PerlIO_printf(Perl_debug_log,"REG_GOSTART_SEEN ");          \
-                                                                            \
             if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)                               \
                 PerlIO_printf(Perl_debug_log,"REG_UNBOUNDED_QUANTIFIER_SEEN ");          \
                                                                             \
@@ -4656,29 +4653,24 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            } else                      /* single branch is optimized. */
                scan = NEXTOPER(scan);
            continue;
-       } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) {
+        } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
             I32 paren = 0;
             regnode *start = NULL;
             regnode *end = NULL;
             U32 my_recursed_depth= recursed_depth;
 
-
-            if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */
+            if (OP(scan) != SUSPEND) { /* GOSUB */
                 /* Do setup, note this code has side effects beyond
                  * the rest of this block. Specifically setting
                  * RExC_recurse[] must happen at least once during
                  * study_chunk(). */
-               if (OP(scan) == GOSUB) {
-                   paren = ARG(scan);
-                   RExC_recurse[ARG2L(scan)] = scan;
-                    start = RExC_open_parens[paren-1];
-                    end   = RExC_close_parens[paren-1];
-                } else {
-                    start = RExC_rxi->program + 1;
-                    end   = RExC_end_op;
-                }
+                paren = ARG(scan);
+                RExC_recurse[ARG2L(scan)] = scan;
+                start = RExC_open_parens[paren];
+                end   = RExC_close_parens[paren];
+
                 /* NOTE we MUST always execute the above code, even
-                 * if we do nothing with a GOSUB/GOSTART */
+                 * if we do nothing with a GOSUB */
                 if (
                     ( flags & SCF_IN_DEFINE )
                     ||
@@ -5074,8 +5066,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    if (OP(nxt) != CLOSE)
                        goto nogo;
                    if (RExC_open_parens) {
-                       RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
-                       RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/
+                        RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+                        RExC_close_parens[ARG(nxt1)]=nxt+2; /*close->while*/
                    }
                    /* Now we know that nxt2 is the only contents: */
                    oscan->flags = (U8)ARG(nxt);
@@ -5121,8 +5113,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                        oscan->flags = (U8)ARG(nxt);
                        if (RExC_open_parens) {
-                           RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
-                           RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/
+                            RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+                            RExC_close_parens[ARG(nxt1)]=nxt2+1; /*close->NOTHING*/
                        }
                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
@@ -7162,24 +7154,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     r->intflags = 0;
     r->nparens = RExC_npar - 1;        /* set early to validate backrefs */
 
-    /* setup various meta data about recursion, this all requires
-     * RExC_npar to be correctly set, and a bit later on we clear it */
-    if (RExC_seen & REG_RECURSE_SEEN) {
-        Newxz(RExC_open_parens, RExC_npar,regnode *);
-        SAVEFREEPV(RExC_open_parens);
-        Newxz(RExC_close_parens,RExC_npar,regnode *);
-        SAVEFREEPV(RExC_close_parens);
-    }
-    if (RExC_seen & (REG_RECURSE_SEEN | REG_GOSTART_SEEN)) {
-        /* Note, RExC_npar is 1 + the number of parens in a pattern.
-         * So its 1 if there are no parens. */
-        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
-                                         ((RExC_npar & 0x07) != 0);
-        Newx(RExC_study_chunk_recursed,
-             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
-        SAVEFREEPV(RExC_study_chunk_recursed);
-    }
-
     /* Useful during FAIL. */
 #ifdef RE_TRACK_PATTERN_OFFSETS
     Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
@@ -7199,17 +7173,51 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_parse = exp;
     RExC_end = exp + plen;
     RExC_naughty = 0;
-    RExC_npar = 1;
     RExC_emit_start = ri->program;
     RExC_emit = ri->program;
     RExC_emit_bound = ri->program + RExC_size + 1;
     pRExC_state->code_index = 0;
 
     *((char*) RExC_emit++) = (char) REG_MAGIC;
+    /* setup various meta data about recursion, this all requires
+     * RExC_npar to be correctly set, and a bit later on we clear it */
+    if (RExC_seen & REG_RECURSE_SEEN) {
+        DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+            "%*s%*s Setting up open/close parens\n",
+                  22, "|    |", (int)(0 * 2 + 1), ""));
+
+        /* setup RExC_open_parens, which holds the address of each
+         * OPEN tag, and to make things simpler for the 0 index
+         * the start of the program - this is used later for offsets */
+        Newxz(RExC_open_parens, RExC_npar,regnode *);
+        SAVEFREEPV(RExC_open_parens);
+        RExC_open_parens[0] = RExC_emit;
+
+        /* setup RExC_close_parens, which holds the address of each
+         * CLOSE tag, and to make things simpler for the 0 index
+         * the end of the program - this is used later for offsets */
+        Newxz(RExC_close_parens, RExC_npar,regnode *);
+        SAVEFREEPV(RExC_close_parens);
+        /* we dont know where end op starts yet, so we dont
+         * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
+
+        /* Note, RExC_npar is 1 + the number of parens in a pattern.
+         * So its 1 if there are no parens. */
+        RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
+                                         ((RExC_npar & 0x07) != 0);
+        Newx(RExC_study_chunk_recursed,
+             RExC_study_chunk_recursed_bytes * RExC_npar, U8);
+        SAVEFREEPV(RExC_study_chunk_recursed);
+    }
+    RExC_npar = 1;
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        ReREFCNT_dec(rx);
         Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
     }
+    DEBUG_OPTIMISE_r(
+        PerlIO_printf(Perl_debug_log, "Starting post parse optimization\n");
+    );
+
     /* XXXX To minimize changes to RE engine we always allocate
        3-units-long substrs field. */
     Newx(r->substrs, 1, struct reg_substr_data);
@@ -7609,6 +7617,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     if (r->minlen < minlen)
         r->minlen = minlen;
 
+    if (RExC_seen & REG_RECURSE_SEEN )
+        r->intflags |= PREGf_RECURSE_SEEN;
     if (RExC_seen & REG_GPOS_SEEN)
         r->intflags |= PREGf_GPOS_SEEN;
     if (RExC_seen & REG_LOOKBEHIND_SEEN)
@@ -7682,14 +7692,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                    = (void*)SvREFCNT_inc(RExC_paren_name_list);
     } else
 #endif
-        ri->name_list_idx = 0;
+    ri->name_list_idx = 0;
 
-    if (RExC_recurse_count) {
-        for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
-            const regnode *scan = RExC_recurse[RExC_recurse_count-1];
-            ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
-        }
+    while ( RExC_recurse_count > 0 ) {
+        const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+        ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
     }
+
     Newxz(r->offs, RExC_npar, regexp_paren_pair);
     /* assume we don't need to swap parens around before we match */
     DEBUG_TEST_r({
@@ -10605,13 +10614,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                break;
            case '0' :           /* (?0) */
            case 'R' :           /* (?R) */
-               if (*RExC_parse != ')')
+                if (RExC_parse == RExC_end || *RExC_parse != ')')
                    FAIL("Sequence (?R) not terminated");
-               ret = reg_node(pRExC_state, GOSTART);
-                    RExC_seen |= REG_GOSTART_SEEN;
+                num = 0;
+                RExC_seen |= REG_RECURSE_SEEN;
                *flagp |= POSTPONED;
-               nextchar(pRExC_state);
-               return ret;
+                goto gen_recurse_regop;
                /*notreached*/
             /* named and numeric backreferences */
             case '&':            /* (?&NAME) */
@@ -10687,6 +10695,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 } else if ( paren == '+' ) {
                     num = RExC_npar + num - 1;
                 }
+                /* We keep track how many GOSUB items we have produced.
+                   To start off the ARG2L() of the GOSUB holds its "id",
+                   which is used later in conjunction with RExC_recurse
+                   to calculate the offset we need to jump for the GOSUB,
+                   which it will store in the final representation.
+                   We have to defer the actual calculation until much later
+                   as the regop may move.
+                 */
 
                 ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                 if (!SIZE_ONLY) {
@@ -10701,10 +10717,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                               (UV)ARG(ret), (IV)ARG2L(ret)));
                 }
                 RExC_seen |= REG_RECURSE_SEEN;
+
                 Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
                Set_Node_Offset(ret, parse_start); /* MJD */
 
                 *flagp |= POSTPONED;
+                assert(*RExC_parse == ')');
                 nextchar(pRExC_state);
                 return ret;
 
@@ -10848,7 +10866,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                else if (RExC_parse[0] == 'R') {
                    RExC_parse++;
                     /* parno == 0 => /(?(R)YES|NO)/  "in any form of recursion OR eval"
-                     * parno == 1 => /(?(R0)YES|NO)/ "in GOSTART (?0) / (?R)"
+                     * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
                      * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
                      */
                    parno = 0;
@@ -10881,7 +10899,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                          * will return something, and when SIZE_ONLY is
                          * true, reg_scan_name() just parses the string,
                          * and doesnt return anything. (in theory) */
-                        assert(SIZE_ONLY ? !sv_dat : sv_dat);
+                        assert(SIZE_ONLY ? !sv_dat : !!sv_dat);
 
                         if (sv_dat)
                             parno = 1 + *((I32 *)SvPVX(sv_dat));
@@ -11004,14 +11022,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            if (!SIZE_ONLY ){
                if (!RExC_nestroot)
                    RExC_nestroot = parno;
-                if (RExC_seen & REG_RECURSE_SEEN
-                   && !RExC_open_parens[parno-1])
+                if (RExC_open_parens && !RExC_open_parens[parno])
                {
                    DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
                         "%*s%*s Setting open paren #%"IVdf" to %d\n",
                         22, "|    |", (int)(depth * 2 + 1), "",
                        (IV)parno, REG_NODE_NUM(ret)));
-                   RExC_open_parens[parno-1]= ret;
+                    RExC_open_parens[parno]= ret;
                }
            }
             Set_Node_Length(ret, 1); /* MJD */
@@ -11100,11 +11117,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            break;
        case 1: case 2:
            ender = reganode(pRExC_state, CLOSE, parno);
-            if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) {
+            if ( RExC_close_parens ) {
                DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
                         "%*s%*s Setting close paren #%"IVdf" to %d\n",
                         22, "|    |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
-               RExC_close_parens[parno-1]= ender;
+                RExC_close_parens[parno]= ender;
                if (RExC_nestroot == parno)
                    RExC_nestroot = 0;
            }
@@ -11125,6 +11142,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            if (!SIZE_ONLY) {
                 assert(!RExC_end_op); /* there can only be one! */
                 RExC_end_op = ender;
+                if (RExC_close_parens) {
+                    DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+                        "%*s%*s Setting close paren #0 (END) to %d\n",
+                        22, "|    |", (int)(depth * 2 + 1), "", REG_NODE_NUM(ender)));
+
+                    RExC_close_parens[0]= ender;
+                }
             }
            break;
        }
@@ -18182,7 +18206,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
     if (RExC_open_parens) {
         int paren;
         /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/
-        for ( paren=0 ; paren < RExC_npar ; paren++ ) {
+        for ( paren=0 ; paren <= RExC_npar ; paren++ ) {
             if ( RExC_open_parens[paren] >= opnd ) {
                 /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                 RExC_open_parens[paren] += size;
@@ -18197,6 +18221,8 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
             }
         }
     }
+    if (RExC_end_op)
+        RExC_end_op += size;
 
     while (src > opnd) {
        StructCopy(--src, --dst, regnode);
@@ -18748,7 +18774,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         }
 
         /* Paren and offset */
-       Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o));
+        Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
+                (int)((o + (int)ARG2L(o)) - progi->program) );
         if (name_list) {
             SV **name= av_fetch(name_list, ARG(o), 0 );
             if (name)
index c08888e..f16197f 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
 #define PREGf_ANCH_MBOL         0x00000400
 #define PREGf_ANCH_SBOL         0x00000800
 #define PREGf_ANCH_GPOS         0x00001000
+#define PREGf_RECURSE_SEEN      0x00002000
 
 #define PREGf_ANCH              \
     ( PREGf_ANCH_SBOL | PREGf_ANCH_GPOS | PREGf_ANCH_MBOL )
@@ -716,7 +717,7 @@ struct regnode_ssc {
 #define REG_CUTGROUP_SEEN                   0x00000100
 #define REG_RUN_ON_COMMENT_SEEN             0x00000200
 #define REG_UNFOLDED_MULTI_SEEN             0x00000400
-#define REG_GOSTART_SEEN                    0x00000800
+/* spare */
 #define REG_UNBOUNDED_QUANTIFIER_SEEN       0x00001000
 
 
index 8f9861a..ac67955 100644 (file)
@@ -190,7 +190,6 @@ AHOCORASICKC    TRIE,trie charclass   ; Same as AHOCORASICK, but with embedded c
 
 #*Regex Subroutines
 GOSUB       GOSUB,      num/ofs 2L    ; recurse to paren arg1 at (signed) ofs arg2
-GOSTART     GOSTART,    no        ; recurse to start of pattern
 
 #*Special conditionals
 NGROUPP     NGROUPP,    no-sv 1   ; Whether the group matched.            
index 66dc245..5e188fd 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -6495,7 +6495,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             regexp_internal *rei;
             regnode *startpoint;
 
-       case GOSTART: /*  (?R)  */
        case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
            if (cur_eval && cur_eval->locinput==locinput) {
                 if ( EVAL_CLOSE_PAREN_IS( cur_eval, (U32)ARG(scan) ) )
@@ -6510,13 +6509,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            re_sv = rex_sv;
             re = rex;
             rei = rexi;
-            if ( OP(scan) == GOSUB ) {
-                startpoint = scan + ARG2L(scan);
-                ST.close_paren = 1 + ARG(scan);
-            } else {
-                startpoint = rei->program + 1;
-                ST.close_paren = 1;
-            }
+            startpoint = scan + ARG2L(scan);
+            EVAL_CLOSE_PAREN_SET( st, ARG(scan) ); /* ST.close_paren = 1 + ARG(scan) */
 
             /* Save all the positions seen so far. */
             ST.cp = regcppush(rex, 0, maxopenparen);
@@ -6775,7 +6769,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                     reginfo->strend, "Matching embedded");
                );              
                startpoint = rei->program + 1;
-                       ST.close_paren = 0; /* only used for GOSUB */
+                EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
+                                             * close_paren only for GOSUB */
                 /* Save all the seen positions so far. */
                 ST.cp = regcppush(rex, 0, maxopenparen);
                 REGCP_SET(ST.lastcp);
index ff44df2..02258fe 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -833,13 +833,19 @@ typedef struct regmatch_state {
     } u;
 } regmatch_state;
 
-#define EVAL_CLOSE_PAREN_IS(cur_eval,expr) \
+#define EVAL_CLOSE_PAREN_IS(st,expr) \
 (\
-    ( ( cur_eval )                                         ) && \
-    ( ( cur_eval )->u.eval.close_paren                     ) && \
-    ( ( ( cur_eval )->u.eval.close_paren - 1 ) == ( expr ) ) \
+    ( ( st )                                         ) && \
+    ( ( st )->u.eval.close_paren                     ) && \
+    ( ( ( st )->u.eval.close_paren - 1 ) == ( expr ) ) \
 )
 
+#define EVAL_CLOSE_PAREN_SET(st,expr) \
+    (st)->u.eval.close_paren = (expr) + 1
+
+#define EVAL_CLOSE_PAREN_CLEAR(st) \
+    (st)->u.eval.close_paren = 0
+
 /* how many regmatch_state structs to allocate as a single slab.
  * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev
  * pointers, plus 1 for any mythical malloc overhead. */
index f27abe0..f820c56 100644 (file)
@@ -6,8 +6,8 @@
 
 /* Regops and State definitions */
 
-#define REGNODE_MAX            93
-#define REGMATCH_STATE_MAX     133
+#define REGNODE_MAX            92
+#define REGMATCH_STATE_MAX     132
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
 #define        AHOCORASICK             74      /* 0x4a Aho Corasick stclass. flags==type */
 #define        AHOCORASICKC            75      /* 0x4b Same as AHOCORASICK, but with embedded charclass data */
 #define        GOSUB                   76      /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */
-#define        GOSTART                 77      /* 0x4d recurse to start of pattern */
-#define        NGROUPP                 78      /* 0x4e Whether the group matched. */
-#define        INSUBP                  79      /* 0x4f Whether we are in a specific recurse. */
-#define        DEFINEP                 80      /* 0x50 Never execute directly. */
-#define        ENDLIKE                 81      /* 0x51 Used only for the type field of verbs */
-#define        OPFAIL                  82      /* 0x52 Same as (?!), but with verb arg */
-#define        ACCEPT                  83      /* 0x53 Accepts the current matched string, with verbar */
-#define        VERB                    84      /* 0x54 Used only for the type field of verbs */
-#define        PRUNE                   85      /* 0x55 Pattern fails at this startpoint if no-backtracking through this */
-#define        MARKPOINT               86      /* 0x56 Push the current location for rollback by cut. */
-#define        SKIP                    87      /* 0x57 On failure skip forward (to the mark) before retrying */
-#define        COMMIT                  88      /* 0x58 Pattern fails outright if backtracking through this */
-#define        CUTGROUP                89      /* 0x59 On failure go to the next alternation in the group */
-#define        KEEPS                   90      /* 0x5a $& begins here. */
-#define        LNBREAK                 91      /* 0x5b generic newline pattern */
-#define        OPTIMIZED               92      /* 0x5c Placeholder for dump. */
-#define        PSEUDO                  93      /* 0x5d Pseudo opcode for internal use. */
+#define        NGROUPP                 77      /* 0x4d Whether the group matched. */
+#define        INSUBP                  78      /* 0x4e Whether we are in a specific recurse. */
+#define        DEFINEP                 79      /* 0x4f Never execute directly. */
+#define        ENDLIKE                 80      /* 0x50 Used only for the type field of verbs */
+#define        OPFAIL                  81      /* 0x51 Same as (?!), but with verb arg */
+#define        ACCEPT                  82      /* 0x52 Accepts the current matched string, with verbar */
+#define        VERB                    83      /* 0x53 Used only for the type field of verbs */
+#define        PRUNE                   84      /* 0x54 Pattern fails at this startpoint if no-backtracking through this */
+#define        MARKPOINT               85      /* 0x55 Push the current location for rollback by cut. */
+#define        SKIP                    86      /* 0x56 On failure skip forward (to the mark) before retrying */
+#define        COMMIT                  87      /* 0x57 Pattern fails outright if backtracking through this */
+#define        CUTGROUP                88      /* 0x58 On failure go to the next alternation in the group */
+#define        KEEPS                   89      /* 0x59 $& begins here. */
+#define        LNBREAK                 90      /* 0x5a generic newline pattern */
+#define        OPTIMIZED               91      /* 0x5b Placeholder for dump. */
+#define        PSEUDO                  92      /* 0x5c Pseudo opcode for internal use. */
        /* ------------ States ------------- */
 #define        TRIE_next               (REGNODE_MAX + 1)       /* state for TRIE */
 #define        TRIE_next_fail          (REGNODE_MAX + 2)       /* state for TRIE */
@@ -230,7 +229,6 @@ EXTCONST U8 PL_regkind[] = {
        TRIE,           /* AHOCORASICK            */
        TRIE,           /* AHOCORASICKC           */
        GOSUB,          /* GOSUB                  */
-       GOSTART,        /* GOSTART                */
        NGROUPP,        /* NGROUPP                */
        INSUBP,         /* INSUBP                 */
        DEFINEP,        /* DEFINEP                */
@@ -373,7 +371,6 @@ static const U8 regarglen[] = {
        EXTRA_SIZE(struct regnode_1),           /* AHOCORASICK  */
        EXTRA_SIZE(struct regnode_charclass),   /* AHOCORASICKC */
        EXTRA_SIZE(struct regnode_2L),          /* GOSUB        */
-       0,                                      /* GOSTART      */
        EXTRA_SIZE(struct regnode_1),           /* NGROUPP      */
        EXTRA_SIZE(struct regnode_1),           /* INSUBP       */
        EXTRA_SIZE(struct regnode_1),           /* DEFINEP      */
@@ -472,7 +469,6 @@ static const char reg_off_by_arg[] = {
        0,      /* AHOCORASICK  */
        0,      /* AHOCORASICKC */
        0,      /* GOSUB        */
-       0,      /* GOSTART      */
        0,      /* NGROUPP      */
        0,      /* INSUBP       */
        0,      /* DEFINEP      */
@@ -577,23 +573,22 @@ EXTCONST char * const PL_reg_name[] = {
        "AHOCORASICK",                  /* 0x4a */
        "AHOCORASICKC",                 /* 0x4b */
        "GOSUB",                        /* 0x4c */
-       "GOSTART",                      /* 0x4d */
-       "NGROUPP",                      /* 0x4e */
-       "INSUBP",                       /* 0x4f */
-       "DEFINEP",                      /* 0x50 */
-       "ENDLIKE",                      /* 0x51 */
-       "OPFAIL",                       /* 0x52 */
-       "ACCEPT",                       /* 0x53 */
-       "VERB",                         /* 0x54 */
-       "PRUNE",                        /* 0x55 */
-       "MARKPOINT",                    /* 0x56 */
-       "SKIP",                         /* 0x57 */
-       "COMMIT",                       /* 0x58 */
-       "CUTGROUP",                     /* 0x59 */
-       "KEEPS",                        /* 0x5a */
-       "LNBREAK",                      /* 0x5b */
-       "OPTIMIZED",                    /* 0x5c */
-       "PSEUDO",                       /* 0x5d */
+       "NGROUPP",                      /* 0x4d */
+       "INSUBP",                       /* 0x4e */
+       "DEFINEP",                      /* 0x4f */
+       "ENDLIKE",                      /* 0x50 */
+       "OPFAIL",                       /* 0x51 */
+       "ACCEPT",                       /* 0x52 */
+       "VERB",                         /* 0x53 */
+       "PRUNE",                        /* 0x54 */
+       "MARKPOINT",                    /* 0x55 */
+       "SKIP",                         /* 0x56 */
+       "COMMIT",                       /* 0x57 */
+       "CUTGROUP",                     /* 0x58 */
+       "KEEPS",                        /* 0x59 */
+       "LNBREAK",                      /* 0x5a */
+       "OPTIMIZED",                    /* 0x5b */
+       "PSEUDO",                       /* 0x5c */
        /* ------------ States ------------- */
        "TRIE_next",                    /* REGNODE_MAX +0x01 */
        "TRIE_next_fail",               /* REGNODE_MAX +0x02 */
@@ -702,11 +697,12 @@ EXTCONST char * const PL_reg_intflags_name[] = {
        "ANCH_MBOL",                  /* 0x00000400 - PREGf_ANCH_MBOL */
        "ANCH_SBOL",                  /* 0x00000800 - PREGf_ANCH_SBOL */
        "ANCH_GPOS",                  /* 0x00001000 - PREGf_ANCH_GPOS */
+       "RECURSE_SEEN",               /* 0x00002000 - PREGf_RECURSE_SEEN */
 };
 #endif /* DOINIT */
 
 #ifdef DEBUGGING
-#  define REG_INTFLAGS_NAME_SIZE 12
+#  define REG_INTFLAGS_NAME_SIZE 13
 #endif
 
 /* The following have no fixed length. U8 so we can do strchr() on it. */