This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: With ACCEPT set stopmin even if no data struct present
[perl5.git] / regcomp.c
index 2f12de8..d8bf687 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -348,7 +348,7 @@ struct RExC_state_t {
  *
  * All of these macros depend on the above RExC_ accessor macros, which
  * in turns depend on a variable pRExC_state being in scope where they
- * are used. This is the based regexp parser context variable which is
+ * are used. This is the standard regexp parser context variable which is
  * passed into every non-trivial parse function in this file.
  *
  * Note that the UTF macro is itself a wrapper around RExC_utf8, so all
@@ -1343,16 +1343,18 @@ S_debug_show_study_flags(pTHX_ U32 flags, const char *open_str,
 
 static void
 S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
-                    U32 depth, int is_inf)
+                    U32 depth, int is_inf,
+                    SSize_t min, SSize_t stopmin, SSize_t delta)
 {
     DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_OPTIMISE_MORE_r({
         if (!data)
             return;
-        Perl_re_indentf(aTHX_  "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+        Perl_re_indentf(aTHX_  "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
             depth,
             where,
+            min, stopmin, delta,
             (IV)data->pos_min,
             (IV)data->pos_delta,
             (UV)data->flags
@@ -1419,14 +1421,14 @@ S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
 }
 
 
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf) \
-                    S_debug_studydata(aTHX_ where, data, depth, is_inf)
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
+                    S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
 
 #  define DEBUG_PEEP(str, scan, depth, flags)   \
                     S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
 
 #else
-#  define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+#  define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
 #  define DEBUG_PEEP(str, scan, depth, flags)         NOOP
 #endif
 
@@ -1628,7 +1630,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
-    DEBUG_STUDYDATA("commit", data, 0, is_inf);
+    DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
 }
 
 /* An SSC is just a regnode_charclass_posix with an extra field: the inversion
@@ -4629,7 +4631,7 @@ STATIC SSize_t
 S_study_chunk(pTHX_
     RExC_state_t *pRExC_state,
     regnode **scanp,        /* Start here (read-write). */
-    SSize_t *minlenp,
+    SSize_t *minlenp,       /* used for the minlen of substrings? */
     SSize_t *deltap,        /* Write maxlen-minlen here. */
     regnode *last,          /* Stop before this one. */
     scan_data_t *data,      /* string data about the pattern */
@@ -4644,20 +4646,41 @@ S_study_chunk(pTHX_
                                a higher caller is holding a ptr to them. */
 )
 {
-    SSize_t final_minlen;
-    /* There must be at least this number of characters to match */
-    SSize_t min = 0;
-    I32 pars = 0, code;
-    regnode *scan = *scanp, *next;
-    SSize_t delta = 0;
+    /* vars about the regnodes we are working with */
+    regnode *scan = *scanp; /* the current opcode we are inspecting */
+    regnode *next = NULL;   /* the next opcode beyond scan, tmp var */
+    regnode *first_non_open = scan; /* FIXME: should this init to NULL?
+                                       the first non open regop, if the init
+                                       val IS an OPEN then we will skip past
+                                       it just after the var decls section */
+    I32 code = 0;           /* temp var used to hold the optype of a regop */
+
+    /* vars about the min and max length of the pattern */
+    SSize_t min = 0;    /* min length of this part of the pattern */
+    SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
+                                         this is adjusted down if we find
+                                         an ACCEPT */
+    SSize_t delta = 0;  /* difference between min and max length
+                           (not accounting for stopmin) */
+
+    /* vars about capture buffers in the pattern */
+    I32 pars = 0;       /* count of OPEN opcodes */
+    I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; /* is this op an OPEN? */
+
+    /* vars about whether this pattern contains something that can match
+     * infinitely long strings, eg, X* or X+ */
     int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
     int is_inf_internal = 0;           /* The studied chunk is infinite */
-    I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
-    scan_data_t data_fake;
-    SV *re_trie_maxbuff = NULL;
-    regnode *first_non_open = scan;
-    SSize_t stopmin = OPTIMIZE_INFTY;
-    scan_frame *frame = NULL;
+
+    /* scan_data_t (struct) is used to hold information about the substrings
+     * and start class we have extracted from the string */
+    scan_data_t data_fake; /* temp var used for recursing in some cases */
+
+    SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
+                                   trie optimizations */
+
+    scan_frame *frame = NULL;  /* used as part of fake recursion */
+
     DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_STUDY_CHUNK;
@@ -4670,7 +4693,6 @@ S_study_chunk(pTHX_
             first_non_open=regnext(first_non_open);
     }
 
-
   fake_study_recurse:
     DEBUG_r(
         RExC_study_chunk_recursed_count++;
@@ -4711,7 +4733,7 @@ S_study_chunk(pTHX_
          */
         bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
         /* Peephole optimizer: */
-        DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+        DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
         DEBUG_PEEP("Peep", scan, depth, flags);
 
 
@@ -4860,6 +4882,7 @@ S_study_chunk(pTHX_
                     }
                     if (flags & SCF_DO_STCLASS)
                         ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+                    DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
                 }
                 if (code == IFTHEN && num < 2) /* Empty ELSE branch */
                     min1 = 0;
@@ -4900,6 +4923,7 @@ S_study_chunk(pTHX_
                         flags |= SCF_DO_STCLASS_OR;
                     }
                 }
+                DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
 
                 if (PERL_ENABLE_TRIE_OPTIMISATION
                     && OP(startbranch) == BRANCH
@@ -5239,7 +5263,7 @@ S_study_chunk(pTHX_
                         } /* end if ( prev) */
                     } /* TRIE_MAXBUF is non zero */
                 } /* do trie */
-
+                DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
             }
             else if ( code == BRANCHJ ) {  /* single branch is optimized. */
                 scan = NEXTOPER(NEXTOPER(scan));
@@ -5316,11 +5340,11 @@ S_study_chunk(pTHX_
                              RExC_study_chunk_recursed_bytes, U8);
                     }
                     /* we havent recursed into this paren yet, so recurse into it */
-                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
+                    DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
                     PAREN_SET(recursed_depth, paren);
                     my_recursed_depth= recursed_depth + 1;
                 } else {
-                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
+                    DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
                     /* some form of infinite recursion, assume infinite length
                      * */
                     if (flags & SCF_DO_SUBSTR) {
@@ -5366,7 +5390,7 @@ S_study_chunk(pTHX_
                     (frame && frame->in_gosub) || OP(scan) == GOSUB
                 );
 
-                DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+                DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
                 DEBUG_PEEP("fnew", scan, depth, flags);
 
                 frame = newframe;
@@ -5432,6 +5456,7 @@ S_study_chunk(pTHX_
                 ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
             }
             flags &= ~SCF_DO_STCLASS;
+            DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
         }
         else if (PL_regkind[OP(scan)] == EXACT) {
             /* But OP != EXACT!, so is EXACTFish */
@@ -5516,6 +5541,7 @@ S_study_chunk(pTHX_
                 flags &= ~SCF_DO_STCLASS;
                 SvREFCNT_dec(EXACTF_invlist);
             }
+            DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
         }
         else if (REGNODE_VARIES(OP(scan))) {
             SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
@@ -5715,6 +5741,16 @@ S_study_chunk(pTHX_
                     delta += (minnext + deltanext) * maxcount
                              - minnext * mincount;
                 }
+
+                if (data && data->flags & SCF_SEEN_ACCEPT) {
+                    if (flags & SCF_DO_SUBSTR) {
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
+                        flags &= ~SCF_DO_SUBSTR;
+                    }
+                    if (stopmin > min)
+                        stopmin = min;
+                    DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
+                }
                 /* Try powerful optimization CURLYX => CURLYN. */
                 if (  OP(oscan) == CURLYX && data
                       && data->flags & SF_IN_PAR
@@ -6274,6 +6310,7 @@ S_study_chunk(pTHX_
                                       last, &data_fake, stopparen,
                                       recursed_depth, NULL, f, depth+1,
                                       mutate_ok);
+
                 if (scan->flags) {
                     if (   deltanext < 0
                         || deltanext > (I32) U8_MAX
@@ -6332,6 +6369,7 @@ S_study_chunk(pTHX_
                                                    |= SSC_MATCHES_EMPTY_STRING;
                     }
                 }
+                DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
             }
 #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
             else {
@@ -6476,11 +6514,10 @@ S_study_chunk(pTHX_
             if (OP(scan)==ACCEPT) {
                 /* m{(*ACCEPT)x} does not have to start with 'x' */
                 flags &= ~SCF_DO_STCLASS;
-                if (data) {
+                if (data)
                     data->flags |= SCF_SEEN_ACCEPT;
-                    if (stopmin > min)
-                        stopmin = min;
-                }
+                if (stopmin > min)
+                    stopmin = min;
             }
         }
         else if (OP(scan) == COMMIT) {
@@ -6611,6 +6648,7 @@ S_study_chunk(pTHX_
                     if (flags & SCF_DO_STCLASS)
                         ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
                 }
+                DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
             }
             if (flags & SCF_DO_SUBSTR) {
                 data->pos_min += min1;
@@ -6648,6 +6686,7 @@ S_study_chunk(pTHX_
                 }
             }
             scan= tail;
+            DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
             continue;
         }
 #else
@@ -6687,7 +6726,7 @@ S_study_chunk(pTHX_
         /* we need to unwind recursion. */
         depth = depth - 1;
 
-        DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+        DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
         DEBUG_PEEP("fend", scan, depth, flags);
 
         /* restore previous context */
@@ -6702,7 +6741,17 @@ S_study_chunk(pTHX_
     }
 
     assert(!frame);
-    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
+    DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
+
+    if (min > stopmin) {
+        /* stopmin might be shorter than min if we saw an (*ACCEPT). If
+        this is the case then it means this pattern is variable length
+        and we need to ensure that the delta accounts for it. delta
+        represents the difference between min length and max length for
+        this part of the pattern. */
+        delta += min - stopmin;
+        min = stopmin;
+    }
 
     *scanp = scan;
     *deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
@@ -6724,18 +6773,15 @@ S_study_chunk(pTHX_
     if (flags & SCF_TRIE_RESTUDY)
         data->flags |=         SCF_TRIE_RESTUDY;
 
-    DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
-
-    final_minlen = min < stopmin
-            ? min : stopmin;
 
     if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-        if (final_minlen > OPTIMIZE_INFTY - delta)
+        if (min > OPTIMIZE_INFTY - delta)
             RExC_maxlen = OPTIMIZE_INFTY;
-        else if (RExC_maxlen < final_minlen + delta)
-            RExC_maxlen = final_minlen + delta;
+        else if (RExC_maxlen < min + delta)
+            RExC_maxlen = min + delta;
     }
-    return final_minlen;
+    DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
+    return min;
 }
 
 /* add a data member to the struct reg_data attached to this regex, it should
@@ -8770,7 +8816,7 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                         return ret;
                 } else {
                     if (retarray)
-                        ret = newSVsv(&PL_sv_undef);
+                        ret = newSV_type(SVt_NULL);
                 }
                 if (retarray)
                     av_push(retarray, ret);
@@ -11197,14 +11243,14 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 return;
             default:
               fail_modifiers:
-                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                RExC_parse_inc_if_char();
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                       UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
                 NOT_REACHED; /*NOTREACHED*/
         }
 
-        RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse_inc();
     }
 
     vFAIL("Sequence (?... not terminated");
@@ -11554,9 +11600,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     goto unterminated_verb_pattern;
                 }
 
-                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse_inc();
                 while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
-                    RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                    RExC_parse_inc();
                 }
                 if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
                   unterminated_verb_pattern:
@@ -11784,7 +11830,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 
             } /* End of switch */
             if ( ! op ) {
-                RExC_parse += UTF ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1;
+                RExC_parse_inc_safe();
                 if (has_upper || verb_len == 0) {
                     vFAIL2utf8f( "Unknown verb pattern '%" UTF8f "'",
                         UTF8fARG(UTF, verb_len, start_verb));
@@ -11837,7 +11883,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RExC_parse_inc_by(1);   /* past the '?' */
             paren = *RExC_parse;    /* might be a trailing NUL, if not
                                        well-formed */
-            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+            RExC_parse_inc();
             if (RExC_parse > RExC_end) {
                 paren = '\0';
             }
@@ -11865,7 +11911,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     return handle_named_backref(pRExC_state, flagp,
                                                 segment_parse_start, ')');
                 }
-                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                RExC_parse_inc_if_char();
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL3("Sequence (%.*s...) not recognized",
                                 (int) (RExC_parse - seqstart), seqstart);
@@ -12151,7 +12197,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             case '?':           /* (??...) */
                 is_logical = 1;
                 if (*RExC_parse != '{') {
-                    RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+                    RExC_parse_inc_if_char();
                     /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                     vFAIL2utf8f(
                         "Sequence (%" UTF8f "...) not recognized",
@@ -12297,7 +12343,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                    "DEFINE"))
                 {
                     ret = reganode(pRExC_state, DEFINEP, 0);
-                    RExC_parse += DEFINE_len;
+                    RExC_parse_inc_by(DEFINE_len);
                     is_define = 1;
                     goto insert_if_check_paren;
                 }
@@ -12351,9 +12397,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 
                  insert_if_check_paren:
                     if (UCHARAT(RExC_parse) != ')') {
-                        RExC_parse += UTF
-                                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                      : 1;
+                        RExC_parse_inc_safe();
                         vFAIL("Switch condition not recognized");
                     }
                     nextchar(pRExC_state);
@@ -12434,9 +12478,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 #endif
                     return ret;
                 }
-                RExC_parse += UTF
-                              ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                              : 1;
+                RExC_parse_inc_safe();
                 vFAIL("Unknown switch condition (?(...))");
             }
             case '[':           /* (?[ ... ]) */
@@ -13605,7 +13647,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          * converted a name to the \N{U+...} form.  This include changing a
          * name that evaluates to multiple code points to \N{U+c1.c2.c3 ...} */
 
-        RExC_parse += 2;    /* Skip past the 'U+' */
+        RExC_parse_inc_by(2);    /* Skip past the 'U+' */
 
         /* Code points are separated by dots.  The '}' terminates the whole
          * thing. */
@@ -13627,7 +13669,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                 vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
 
-            RExC_parse += len;
+            RExC_parse_inc_by(len);
 
             if (cp > MAX_LEGAL_CP) {
                 vFAIL(form_cp_too_large_msg(16, start_digit, len, 0));
@@ -13659,9 +13701,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              * \N{U+100.} )
              * */
             if (*RExC_parse != '.' || RExC_parse + 1 >= e) {
-                RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
-                              ? UTF8SKIP(RExC_parse)
-                              : 1;
+                /*point to after 1st invalid */
+                RExC_parse_incf(RExC_orig_utf8);
                 /*Guard against malformed utf8*/
                 RExC_parse_set(MIN(e, RExC_parse));
                 goto bad_NU;
@@ -14081,7 +14122,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                                    RExC_end - RExC_parse);
                 char * e = endbrace;
 
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
 
                 if (! endbrace) {
                     vFAIL2("Missing right brace on \\%c{}", name);
@@ -14224,7 +14265,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 && UCHARAT(RExC_parse + 1) == '{'
                 && UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
             {
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
                 vFAIL("Unescaped left brace in regex is illegal here");
             }
             nextchar(pRExC_state);
@@ -14274,7 +14315,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2("Sequence %.2s... not terminated", atom_parse_start);
             } else {
-                RExC_parse += 2;
+                RExC_parse_inc_by(2);
                 if (ch == '{') {
                     while (isBLANK(*RExC_parse)) {
                         RExC_parse_inc_by(1);
@@ -16794,10 +16835,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      * compile time values are valid in all runtime cases */
     REQUIRE_UNI_RULES(flagp, 0);
 
-    ckWARNexperimental(RExC_parse,
-                       WARN_EXPERIMENTAL__REGEX_SETS,
-                       "The regex_sets feature is experimental");
-
     /* Everything in this construct is a metacharacter.  Operands begin with
      * either a '\' (for an escape sequence), or a '[' for a bracketed
      * character class.  Any other character should be an operator, or
@@ -16874,8 +16911,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      * so that everything gets evaluated down to a single operand, which is the
      * result */
 
-    sv_2mortal((SV *)(stack = newAV()));
-    sv_2mortal((SV *)(fence_stack = newAV()));
+    stack = (AV*)newSV_type_mortal(SVt_PVAV);
+    fence_stack = (AV*)newSV_type_mortal(SVt_PVAV);
 
     while (RExC_parse < RExC_end) {
         I32 top_index;              /* Index of top-most element in 'stack' */
@@ -17247,7 +17284,7 @@ redo_curchar:
                 break;
 
             default:
-                RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse_inc();
                 if (RExC_parse >= RExC_end) {
                     break;
                 }
@@ -17307,7 +17344,7 @@ redo_curchar:
 
         } /* End of switch on next parse token */
 
-        RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse_inc();
     } /* End of loop parsing through the construct */
 
     vFAIL("Syntax error in (?[...])");
@@ -17954,7 +17991,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
-            RExC_parse += numlen;
+            RExC_parse_inc_by(numlen);
         }
         else {
             value = UCHARAT(RExC_parse);
@@ -18018,7 +18055,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
-                RExC_parse += numlen;
+                RExC_parse_inc_by(numlen);
             }
             else {
                 value = UCHARAT(RExC_parse);
@@ -18173,9 +18210,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                 }   /* The \p isn't immediately followed by a '{' */
                 else if (! isALPHA(*RExC_parse)) {
-                    RExC_parse += (UTF)
-                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                  : 1;
+                    RExC_parse_inc_safe();
                     vFAIL2("Character following \\%c must be '{' or a "
                            "single-character Unicode property name",
                            (U8) value);
@@ -18407,9 +18442,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                     /* going to die anyway; point to exact spot of
                         * failure */
-                    RExC_parse += (UTF)
-                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                  : 1;
+                    RExC_parse_inc_safe();
                     vFAIL(message);
                 }
 
@@ -18429,12 +18462,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                               | PERL_SCAN_NOTIFY_ILLDIGIT;
                     numlen = (strict) ? 4 : 3;
                     value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
-                    RExC_parse += numlen;
+                    RExC_parse_inc_by(numlen);
                     if (numlen != 3) {
                         if (strict) {
-                            RExC_parse += (UTF)
-                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                          : 1;
+                            RExC_parse_inc_safe();
                             vFAIL("Need exactly 3 octal digits");
                         }
                         else if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
@@ -20881,9 +20912,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
                || UTF8_IS_INVARIANT(*RExC_parse)
                || UTF8_IS_START(*RExC_parse));
 
-        RExC_parse += (UTF)
-                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                      : 1;
+        RExC_parse_inc_safe();
 
         skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                 FALSE /* Don't force /x */ );