regcomp.c: regpiece(): Convert to a switch() stmt

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 097094c..f4765d8 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -370,22 +370,15 @@ struct RExC_state_t {
          RExC_naughty += RExC_naughty / (exp) + (add)
  
  #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
-#define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
-       ((*s) == '{' && regcurly(s)))
+#define        ISMULT2(s)      (ISMULT1(*s) || ((*s) == '{' && regcurly(s)))
  
  /*
   * Flags to be passed up and down.
   */
-#define        WORST           0       /* Worst case. */
  #define        HASWIDTH        0x01    /* Known to not match null strings, could match
                                     non-null ones. */
-
-/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
- * character.  (There needs to be a case: in the switch statement in regexec.c
- * for any node marked SIMPLE.)  Note that this is not the same thing as
- * REGNODE_SIMPLE */
-#define        SIMPLE          0x02
-#define        SPSTART         0x04    /* Starts with * or + */
+#define        SIMPLE          0x02    /* Exactly one character wide */
+                                /* (or LNBREAK as a special case) */
  #define POSTPONED      0x08    /* (?1),(?&name), (??{...}) or similar */
  #define TRYAGAIN       0x10    /* Weeded out a declaration. */
  #define RESTART_PARSE   0x20    /* Need to redo the parse */
@@ -1498,6 +1491,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
                         ? OPTIMIZE_INFTY
                         : (l
                            ? data->last_start_max
+                          /* temporary underflow guard for 5.32 */
+                          : data->pos_delta < 0 ? OPTIMIZE_INFTY
                            : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
                                          ? OPTIMIZE_INFTY
                                          : data->pos_min + data->pos_delta));
@@ -1659,7 +1654,6 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
-    dVAR;
      SV* invlist = NULL;
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
@@ -4003,7 +3997,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      character folded sequences.  Since a single character can fold into
   *      such a sequence, the minimum match length for this node is less than
   *      the number of characters in the node.  This routine returns in
- *      *min_subtract how many characters to subtract from the the actual
+ *      *min_subtract how many characters to subtract from the actual
   *      length of the string to get a real minimum match length; it is 0 if
   *      there are no multi-char foldeds.  This delta is used by the caller to
   *      adjust the min length of the match, and the delta between min and max,
@@ -4473,7 +4467,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                      /* EXACTF nodes need to know that the minimum length
                       * changed so that a sharp s in the string can match this
                       * ss in the pattern, but they remain EXACTF nodes, as they
-                     * won't match this unless the target string is is UTF-8,
+                     * won't match this unless the target string is in UTF-8,
                       * which we don't know until runtime.  EXACTFL nodes can't
                       * transform into EXACTFU nodes */
                      if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
@@ -4526,6 +4520,42 @@ S_unwind_scan_frames(pTHX_ const void *p)
      } while (f);
  }
  
+/* Follow the next-chain of the current node and optimize away
+   all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+    if (OP(node) != CURLYX) {
+        const int max = (reg_off_by_arg[OP(node)]
+                        ? I32_MAX
+                          /* I32 may be smaller than U16 on CRAYs! */
+                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+        int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+        int noff;
+        regnode *n = node;
+
+        /* Skip NOTHING and LONGJMP. */
+        while (
+            (n = regnext(n))
+            && (
+                (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+            )
+            && off + noff < max
+        ) {
+            off += noff;
+        }
+        if (reg_off_by_arg[OP(node)])
+            ARG(node) = off;
+        else
+            NEXT_OFF(node) = off;
+    }
+    return;
+}
+
  /* the return from this sub is the minimum length that could possibly match */
  STATIC SSize_t
  S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
@@ -4535,7 +4565,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         I32 stopparen,
                          U32 recursed_depth,
                         regnode_ssc *and_withp,
-                       U32 flags, U32 depth)
+                       U32 flags, U32 depth, bool was_mutate_ok)
                         /* scanp: Start here (read-write). */
                         /* deltap: Write maxlen-minlen here. */
                         /* last: Stop before this one. */
@@ -4544,7 +4574,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recursed: which subroutines have we recursed into */
                         /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
  {
-    dVAR;
      SSize_t final_minlen;
      /* There must be at least this number of characters to match */
      SSize_t min = 0;
@@ -4607,7 +4636,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     node length to get a real minimum (because
                                     the folded version may be shorter) */
         bool unfolded_multi_char = FALSE;
-        bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1;
+        /* avoid mutating ops if we are anywhere within the recursed or
+         * enframed handling for a GOSUB: the outermost level will handle it.
+         */
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
         /* Peephole optimizer: */
          DEBUG_STUDYDATA("Peep", data, depth, is_inf);
          DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4628,27 +4660,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          }
  
          /* Follow the next-chain of the current node and optimize
-           away all the NOTHINGs from it.  */
-        if (OP(scan) != CURLYX) {
-            const int max = (reg_off_by_arg[OP(scan)]
-                            ? I32_MAX
-                              /* I32 may be smaller than U16 on CRAYs! */
-                            : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
-            int noff;
-            regnode *n = scan;
-
-            /* Skip NOTHING and LONGJMP. */
-            while (   (n = regnext(n))
-                   && (   (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
-                       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
-                   && off + noff < max)
-                off += noff;
-            if (reg_off_by_arg[OP(scan)])
-                ARG(scan) = off;
-            else
-                NEXT_OFF(scan) = off;
-        }
+           away all the NOTHINGs from it.
+         */
+        rck_elide_nothing(scan);
  
          /* The principal pseudo-switch.  Cannot be a switch, since we look into
           * several different things.  */
@@ -4675,7 +4689,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              /* DEFINEP study_chunk() recursion */
              (void)study_chunk(pRExC_state, &scan, &minlen,
                                &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1);
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
  
              scan = next;
          } else
@@ -4743,7 +4757,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      /* recurse study_chunk() for each BRANCH in an alternation */
                     minnext = study_chunk(pRExC_state, &scan, minlenp,
                                        &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
  
                     if (min1 > minnext)
                         min1 = minnext;
@@ -5193,7 +5208,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                       * might result in a minlen of 1 and not of 4,
                       * but this doesn't make us mismatch, just try a bit
                       * harder than we should.
-                     * */
+                     *
+                     * However we must assume this GOSUB is infinite, to
+                     * avoid wrongly applying other optimizations in the
+                     * enclosing scope - see GH 18096, for example.
+                     */
+                    is_inf = is_inf_internal = 1;
                      scan= regnext(scan);
                      continue;
                  }
@@ -5443,6 +5463,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
                 if (flags & SCF_DO_SUBSTR)
                     data->pos_min++;
+                /* This will bypass the formal 'min += minnext * mincount'
+                 * calculation in the do_curly path, so assumes min width
+                 * of the PLUS payload is exactly one. */
                 min++;
                 /* FALLTHROUGH */
             case STAR:
@@ -5547,7 +5570,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                    (mincount == 0
                                     ? (f & ~SCF_DO_SUBSTR)
                                     : f)
-                                  ,depth+1);
+                                  , depth+1, mutate_ok);
  
                 if (flags & SCF_DO_STCLASS)
                     data->start_class = oclass;
@@ -5593,6 +5616,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   RExC_precomp)));
                  }
  
+                if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
+                    || min >= SSize_t_MAX - minnext * mincount )
+                {
+                    FAIL("Regexp out of space");
+                }
+
                 min += minnext * mincount;
                 is_inf_internal |= deltanext == OPTIMIZE_INFTY
                           || (maxcount == REG_INFTY && minnext + deltanext > 0);
@@ -5715,7 +5744,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          /* recurse study_chunk() on optimised CURLYX => CURLYM */
                         study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                      NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1);
+                                    depth+1, mutate_ok);
                     }
                     else
                         oscan->flags = 0;
@@ -5845,11 +5874,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 if (data && (fl & SF_HAS_EVAL))
                     data->flags |= SF_HAS_EVAL;
               optimize_curly_tail:
-               if (OP(oscan) != CURLYX) {
-                   while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
-                          && NEXT_OFF(next))
-                       NEXT_OFF(oscan) += NEXT_OFF(next);
-               }
+               rck_elide_nothing(oscan);
                 continue;
  
             default:
@@ -6148,7 +6173,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  /* recurse study_chunk() for lookahead body */
                  minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                        last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
                  if (scan->flags) {
                      if (   deltanext < 0
                          || deltanext > (I32) U8_MAX
@@ -6253,7 +6279,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                          &deltanext, last, &data_fake,
                                          stopparen, recursed_depth, NULL,
-                                        f, depth+1);
+                                        f, depth+1, mutate_ok);
                  if (scan->flags) {
                      assert(0);  /* This code has never been tested since this
                                     is normally not compiled */
@@ -6420,7 +6446,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                          /* optimise study_chunk() for TRIE */
                          minnext = study_chunk(pRExC_state, &scan, minlenp,
                              &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1);
+                            stopparen, recursed_depth, NULL, f, depth+1,
+                            mutate_ok);
                      }
                      if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                          nextbranch= regnext((regnode*)nextbranch);
@@ -6893,7 +6920,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
          /* we make the assumption here that each op in the list of
           * op_siblings maps to one SV pushed onto the stack,
           * except for code blocks, with have both an OP_NULL and
-         * and OP_CONST.
+         * an OP_CONST.
           * This allows us to match up the list of SVs against the
           * list of OPs to find the next code block.
           *
@@ -7386,7 +7413,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
      PERL_ARGS_ASSERT_SET_REGEX_PV;
  
      /* make sure PL_bitcount bounds not exceeded */
-    assert(sizeof(STD_PAT_MODS) <= 8);
+    STATIC_ASSERT_STMT(sizeof(STD_PAT_MODS) <= 8);
  
      p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */
      SvPOK_on(Rx);
@@ -7480,7 +7507,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
   * length of the pattern.  Patches welcome to improve that guess.  That amount
   * of space is malloc'd and then immediately freed, and then clawed back node
   * by node.  This design is to minimze, to the extent possible, memory churn
- * when doing the the reallocs.
+ * when doing the reallocs.
   *
   * A separate parentheses counting pass may be needed in some cases.
   * (Previously the sizing pass did this.)  Patches welcome to reduce the number
@@ -7498,7 +7525,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                     OP *expr, const regexp_engine* eng, REGEXP *old_re,
                      bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
  {
-    dVAR;
      REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
      STRLEN plen;
      char *exp;
@@ -8208,7 +8234,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              &data, -1, 0, NULL,
              SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                            | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
-            0);
+            0, TRUE);
  
  
          CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8337,7 +8363,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                        ? SCF_TRIE_DOING_RESTUDY
                                                        : 0),
-            0);
+            0, TRUE);
  
          CHECK_RESTUDY_GOTO_butfirst(NOOP);
  
@@ -8692,9 +8718,9 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
          } else if (flags & RXapif_ONE) {
              ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
              av = MUTABLE_AV(SvRV(ret));
-            length = av_tindex(av);
+            length = av_count(av);
             SvREFCNT_dec_NN(ret);
-            return newSViv(length + 1);
+            return newSViv(length);
          } else {
              Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
                                                  (int)flags);
@@ -9717,7 +9743,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * one of them */
      while (i_a < len_a && i_b < len_b) {
         UV cp;      /* The element to potentially add to the union's array */
-       bool cp_in_set;   /* is it in the the input list's set or not */
+       bool cp_in_set;   /* is it in the input list's set or not */
  
         /* We need to take one or the other of the two inputs for the union.
          * Since we are merging two sorted lists, we take the smaller of the
@@ -10614,7 +10640,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  STATIC SV*
  S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
  {
-    dVAR;
      const U8 * s = (U8*)STRING(node);
      SSize_t bytelen = STR_LEN(node);
      UV uc;
@@ -10675,7 +10700,7 @@ S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
          fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
  
          /* The only code points that aren't folded in a UTF EXACTFish
-         * node are are the problematic ones in EXACTFL nodes */
+         * node are the problematic ones in EXACTFL nodes */
          if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
              /* We need to check for the possibility that this EXACTFL
               * node begins with a multi-char fold.  Therefore we fold
@@ -11152,7 +11177,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
          vFAIL("Too many nested open parens");
      }
  
-    *flagp = 0;                                /* Tentatively. */
+    *flagp = 0;                                /* Initialize. */
  
      if (RExC_in_lookbehind) {
         RExC_in_lookbehind++;
@@ -12246,7 +12271,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      }
      else if (paren != '?')             /* Not Conditional */
         ret = br;
-    *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+    *flagp |= flags & (HASWIDTH | POSTPONED);
      lastbr = br;
      while (*RExC_parse == '|') {
         if (RExC_use_BRANCHJ) {
@@ -12276,7 +12301,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              REQUIRE_BRANCHJ(flagp, 0);
          }
         lastbr = br;
-       *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+       *flagp |= flags & (HASWIDTH | POSTPONED);
      }
  
      if (have_branch || paren != ':') {
@@ -12509,7 +12534,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
          }
      }
  
-    *flagp = WORST;                    /* Tentatively. */
+    *flagp = 0;                        /* Initialize. */
  
      skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                              FALSE /* Don't force to /x */ );
@@ -12525,9 +12550,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         else if (ret == 0)
              ret = latest;
         *flagp |= flags&(HASWIDTH|POSTPONED);
-       if (chain == 0)         /* First piece. */
-           *flagp |= flags&SPSTART;
-       else {
+       if (chain != 0) {
             /* FIXME adding one for every branch after the first is probably
              * excessive now we have TRIE support. (hv) */
             MARK_NAUGHTY(1);
@@ -12601,212 +12624,229 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
      }
  
-    op = *RExC_parse;
-
-    if (op == '{' && regcurly(RExC_parse)) {
-       maxpos = NULL;
  #ifdef RE_TRACK_PATTERN_OFFSETS
-        parse_start = RExC_parse; /* MJD */
+    parse_start = RExC_parse;
  #endif
-       next = RExC_parse + 1;
-       while (isDIGIT(*next) || *next == ',') {
-           if (*next == ',') {
-               if (maxpos)
-                   break;
-               else
-                   maxpos = next;
-           }
-           next++;
-       }
-       if (*next == '}') {             /* got one */
-            const char* endptr;
-           if (!maxpos)
-               maxpos = next;
-           RExC_parse++;
-            if (isDIGIT(*RExC_parse)) {
-                endptr = RExC_end;
-                if (!grok_atoUV(RExC_parse, &uv, &endptr))
-                    vFAIL("Invalid quantifier in {,}");
-                if (uv >= REG_INFTY)
-                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
-                min = (I32)uv;
-            } else {
-                min = 0;
-            }
-           if (*maxpos == ',')
-               maxpos++;
-           else
-               maxpos = RExC_parse;
-            if (isDIGIT(*maxpos)) {
-                endptr = RExC_end;
-                if (!grok_atoUV(maxpos, &uv, &endptr))
-                    vFAIL("Invalid quantifier in {,}");
-                if (uv >= REG_INFTY)
-                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
-                max = (I32)uv;
-            } else {
-               max = REG_INFTY;                /* meaning "infinity" */
-            }
-           RExC_parse = next;
-           nextchar(pRExC_state);
-            if (max < min) {    /* If can't match, warn and optimize to fail
-                                   unconditionally */
-                reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
-                ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                NEXT_OFF(REGNODE_p(orig_emit)) =
-                                    regarglen[OPFAIL] + NODE_STEP_REGNODE;
-                return ret;
-            }
-            else if (min == max && *RExC_parse == '?')
-            {
-                ckWARN2reg(RExC_parse + 1,
-                           "Useless use of greediness modifier '%c'",
-                           *RExC_parse);
+
+    op = *RExC_parse;
+    switch (op) {
+
+      case '*':
+        nextchar(pRExC_state);
+            min = 0;
+        break;
+
+      case '+':
+        nextchar(pRExC_state);
+            min = 1;
+        break;
+
+      case '?':
+        nextchar(pRExC_state);
+            min = 0; max = 1;
+        break;
+
+      case '{':  /* A '{' may or may not indicate a quantifier; call regcurly()
+                    to determine which */
+        if (regcurly(RExC_parse)) {
+        const char* endptr;
+
+            /* Here is a quantifier, parse for min and max values */
+        maxpos = NULL;
+        next = RExC_parse + 1;
+        while (isDIGIT(*next) || *next == ',') {
+            if (*next == ',') {
+                if (maxpos)
+                    break;
+                else
+                    maxpos = next;
              }
+            next++;
+        }
  
-         do_curly:
-           if ((flags&SIMPLE)) {
-                if (min == 0 && max == REG_INFTY) {
+        assert(*next == '}');
  
-                    /* Going from 0..inf is currently forbidden in wildcard
-                     * subpatterns.  The only reason is to make it harder to
-                     * write patterns that take a long long time to halt, and
-                     * because the use of this construct isn't necessary in
-                     * matching Unicode property values */
-                    if (RExC_pm_flags & PMf_WILDCARD) {
-                        RExC_parse++;
-                        /* diag_listed_as: Use of %s is not allowed in Unicode
-                           property wildcard subpatterns in regex; marked by
-                           <-- HERE in m/%s/ */
-                        vFAIL("Use of quantifier '*' is not allowed in"
-                              " Unicode property wildcard subpatterns");
-                        /* Note, don't need to worry about {0,}, as a '}' isn't
-                         * legal at all in wildcards, so wouldn't get this far
-                         * */
-                    }
-                    reginsert(pRExC_state, STAR, ret, depth+1);
-                    MARK_NAUGHTY(4);
-                    RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-                    goto nest_check;
-                }
-                if (min == 1 && max == REG_INFTY) {
-                    reginsert(pRExC_state, PLUS, ret, depth+1);
-                    MARK_NAUGHTY(3);
-                    RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-                    goto nest_check;
-                }
-                MARK_NAUGHTY_EXP(2, 2);
-               reginsert(pRExC_state, CURLY, ret, depth+1);
-                Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
-                Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-           }
-           else {
-               const regnode_offset w = reg_node(pRExC_state, WHILEM);
+        if (!maxpos)
+            maxpos = next;
+        RExC_parse++;
+        if (isDIGIT(*RExC_parse)) {
+            endptr = RExC_end;
+            if (!grok_atoUV(RExC_parse, &uv, &endptr))
+                vFAIL("Invalid quantifier in {,}");
+            if (uv >= REG_INFTY)
+                vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+            min = (I32)uv;
+        } else {
+            min = 0;
+        }
+        if (*maxpos == ',')
+            maxpos++;
+        else
+            maxpos = RExC_parse;
+        if (isDIGIT(*maxpos)) {
+            endptr = RExC_end;
+            if (!grok_atoUV(maxpos, &uv, &endptr))
+                vFAIL("Invalid quantifier in {,}");
+            if (uv >= REG_INFTY)
+                vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+            max = (I32)uv;
+        } else {
+            max = REG_INFTY;            /* meaning "infinity" */
+        }
+        RExC_parse = next;
+        nextchar(pRExC_state);
+        if (max < min) {    /* If can't match, warn and optimize to fail
+                               unconditionally */
+            reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+            ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+            NEXT_OFF(REGNODE_p(orig_emit)) =
+                                regarglen[OPFAIL] + NODE_STEP_REGNODE;
+            return ret;
+        }
+        else if (min == max && *RExC_parse == '?')
+        {
+            ckWARN2reg(RExC_parse + 1,
+                       "Useless use of greediness modifier '%c'",
+                       *RExC_parse);
+        }
  
-               FLAGS(REGNODE_p(w)) = 0;
-                if (!  REGTAIL(pRExC_state, ret, w)) {
-                    REQUIRE_BRANCHJ(flagp, 0);
-                }
-               if (RExC_use_BRANCHJ) {
-                   reginsert(pRExC_state, LONGJMP, ret, depth+1);
-                   reginsert(pRExC_state, NOTHING, ret, depth+1);
-                   NEXT_OFF(REGNODE_p(ret)) = 3;       /* Go over LONGJMP. */
-               }
-               reginsert(pRExC_state, CURLYX, ret, depth+1);
-                                /* MJD hk */
-                Set_Node_Offset(REGNODE_p(ret), parse_start+1);
-                Set_Node_Length(REGNODE_p(ret),
-                                op == '{' ? (RExC_parse - parse_start) : 1);
+            break;
+        } /* End of is regcurly() */
  
-               if (RExC_use_BRANCHJ)
-                    NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
-                                                       LONGJMP. */
-                if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
-                                                          NOTHING)))
-                {
-                    REQUIRE_BRANCHJ(flagp, 0);
-                }
-                RExC_whilem_seen++;
-                MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
-           }
-           FLAGS(REGNODE_p(ret)) = 0;
-
-           if (min > 0)
-               *flagp = WORST;
-           if (max > 0)
-               *flagp |= HASWIDTH;
-            ARG1_SET(REGNODE_p(ret), (U16)min);
-            ARG2_SET(REGNODE_p(ret), (U16)max);
-            if (max == REG_INFTY)
-                RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-
-           goto nest_check;
-       }
+        /* Here was a '{', but what followed it didn't form a quantifier. */
+        /* FALLTHROUGH */
+
+      default:
+        *flagp = flags;
+        return(ret);
+        NOT_REACHED; /*NOTREACHED*/
      }
  
-    if (!ISMULT1(op)) {
-       *flagp = flags;
-       return(ret);
+    /* Here we have a quantifier, and have calculated 'min' and 'max'.
+     *
+     * Check and possibly adjust a zero width operand */
+    if (! (flags & (HASWIDTH|POSTPONED))) {
+        if (max > REG_INFTY/3) {
+            if (origparse[0] == '\\' && origparse[1] == 'K') {
+                vFAIL2utf8f(
+                           "%" UTF8f " is forbidden - matches null string"
+                           " many times",
+                           UTF8fARG(UTF, (RExC_parse >= origparse
+                                         ? RExC_parse - origparse
+                                         : 0),
+                           origparse));
+            } else {
+                ckWARN2reg(RExC_parse,
+                           "%" UTF8f " matches null string many times",
+                           UTF8fARG(UTF, (RExC_parse >= origparse
+                                         ? RExC_parse - origparse
+                                         : 0),
+                           origparse));
+            }
+        }
+
+        /* There's no point in trying to match something 0 length more than
+         * once except for extra side effects, which we don't have here since
+         * not POSTPONED */
+        if (max > 1) {
+            max = 1;
+            if (min > max) {
+                min = max;
+            }
+        }
      }
  
-#if 0                          /* Now runtime fix should be reliable. */
+    /* If this is a code block pass it up */
+    *flagp |= (flags & POSTPONED);
  
-    /* if this is reinstated, don't forget to put this back into perldiag:
+    if (max > 0) {
+        *flagp |= (flags & HASWIDTH);
+        if (max == REG_INFTY)
+            RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
+    }
  
-           =item Regexp *+ operand could be empty at {#} in regex m/%s/
+    /* 'SIMPLE' operands don't require full generality */
+    if ((flags&SIMPLE)) {
+        if (max == REG_INFTY) {
+            if (min == 1) {
+                reginsert(pRExC_state, PLUS, ret, depth+1);
+                MARK_NAUGHTY(3);
+                goto done_main_op;
+            }
+            else if (min == 0) {
  
-          (F) The part of the regexp subject to either the * or + quantifier
-           could match an empty string. The {#} shows in the regular
-           expression about where the problem was discovered.
+                /* Going from 0..inf is currently forbidden in wildcard
+                 * subpatterns.  The only reason is to make it harder to
+                 * write patterns that take a long long time to halt, and
+                 * because the use of this construct isn't necessary in
+                 * matching Unicode property values */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by
+                       <-- HERE in m/%s/ */
+                    vFAIL("Use of quantifier '*' is not allowed in"
+                          " Unicode property wildcard subpatterns");
+                    /* Note, don't need to worry about {0,}, as a '}' isn't
+                     * legal at all in wildcards, so wouldn't get this far
+                     * */
+                }
  
-    */
+                reginsert(pRExC_state, STAR, ret, depth+1);
+                MARK_NAUGHTY(4);
+                goto done_main_op;
+            }
+        }
  
-    if (!(flags&HASWIDTH) && op != '?')
-      vFAIL("Regexp *+ operand could be empty");
-#endif
+        /* Here, SIMPLE, but not the '*' and '+' special cases */
  
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    parse_start = RExC_parse;
-#endif
-    nextchar(pRExC_state);
+        MARK_NAUGHTY_EXP(2, 2);
+        reginsert(pRExC_state, CURLY, ret, depth+1);
+        Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
+        Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
+    }
+    else {  /* not SIMPLE */
+        const regnode_offset w = reg_node(pRExC_state, WHILEM);
  
-    *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
-
-    if (op == '*') {
-       min = 0;
-       goto do_curly;
-    }
-    else if (op == '+') {
-       min = 1;
-       goto do_curly;
-    }
-    else if (op == '?') {
-       min = 0; max = 1;
-       goto do_curly;
-    }
-  nest_check:
-    if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
-        if (origparse[0] == '\\' && origparse[1] == 'K') {
-            vFAIL2utf8f(
-                       "%" UTF8f " is forbidden - matches null string many times",
-                       UTF8fARG(UTF, (RExC_parse >= origparse
-                                     ? RExC_parse - origparse
-                                     : 0),
-                       origparse));
-            /* NOT-REACHED */
-        } else {
-            ckWARN2reg(RExC_parse,
-                       "%" UTF8f " matches null string many times",
-                       UTF8fARG(UTF, (RExC_parse >= origparse
-                                     ? RExC_parse - origparse
-                                     : 0),
-                       origparse));
+        FLAGS(REGNODE_p(w)) = 0;
+        if (!  REGTAIL(pRExC_state, ret, w)) {
+            REQUIRE_BRANCHJ(flagp, 0);
          }
+        if (RExC_use_BRANCHJ) {
+            reginsert(pRExC_state, LONGJMP, ret, depth+1);
+            reginsert(pRExC_state, NOTHING, ret, depth+1);
+            NEXT_OFF(REGNODE_p(ret)) = 3;        /* Go over LONGJMP. */
+        }
+        reginsert(pRExC_state, CURLYX, ret, depth+1);
+                        /* MJD hk */
+        Set_Node_Offset(REGNODE_p(ret), parse_start+1);
+        Set_Node_Length(REGNODE_p(ret),
+                        op == '{' ? (RExC_parse - parse_start) : 1);
+
+        if (RExC_use_BRANCHJ)
+            NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
+                                               LONGJMP. */
+        if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+                                                  NOTHING)))
+        {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
+        RExC_whilem_seen++;
+        MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
      }
  
+    /* Finish up the CURLY/CURLYX case */
+    FLAGS(REGNODE_p(ret)) = 0;
+
+    ARG1_SET(REGNODE_p(ret), (U16)min);
+    ARG2_SET(REGNODE_p(ret), (U16)max);
+
+  done_main_op:
+
+    /* Process any greediness modifiers */
      if (*RExC_parse == '?') {
-       nextchar(pRExC_state);
-       reginsert(pRExC_state, MINMOD, ret, depth+1);
+        nextchar(pRExC_state);
+        reginsert(pRExC_state, MINMOD, ret, depth+1);
          if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
              REQUIRE_BRANCHJ(flagp, 0);
          }
@@ -12825,9 +12865,10 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          }
      }
  
+    /* Forbid extra quantifiers */
      if (ISMULT2(RExC_parse)) {
-       RExC_parse++;
-       vFAIL("Nested quantifiers");
+        RExC_parse++;
+        vFAIL("Nested quantifiers");
      }
  
      return(ret);
@@ -13268,7 +13309,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
              (UV) flags);
      }
-    *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+    *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
  
      nextchar(pRExC_state);
  
@@ -13431,7 +13472,6 @@ S_backref_value(char *p, char *e)
  STATIC regnode_offset
  S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
-    dVAR;
      regnode_offset ret = 0;
      I32 flags = 0;
      char *parse_start;
@@ -13440,7 +13480,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
-    *flagp = WORST;            /* Tentatively. */
+    *flagp = 0;                /* Initialize. */
  
      DEBUG_PARSE("atom");
  
@@ -13518,7 +13558,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf,
                                                                   (UV) flags);
         }
-       *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+       *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
         break;
      case '|':
      case ')':
@@ -13563,8 +13603,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  /* SBOL is shared with /^/ so we set the flags so we can tell
                   * /\A/ from /^/ in split. */
                  FLAGS(REGNODE_p(ret)) = 1;
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
              }
-           *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
              if (RExC_pm_flags & PMf_WILDCARD) {
@@ -13577,13 +13617,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              }
             ret = reg_node(pRExC_state, GPOS);
              RExC_seen |= REG_GPOS_SEEN;
-           *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'K':
              if (!RExC_in_lookbehind && !RExC_in_lookahead) {
                  RExC_seen_zerolen++;
                  ret = reg_node(pRExC_state, KEEPS);
-                *flagp |= SIMPLE;
                  /* XXX:dmq : disabling in-place substitution seems to
                   * be necessary here to avoid cases of memory corruption, as
                   * with: C<$_="x" x 80; s/x\K/y/> -- rgs
@@ -13602,8 +13640,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              }
              else {
                  ret = reg_node(pRExC_state, SEOL);
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
              }
-           *flagp |= SIMPLE;
             RExC_seen_zerolen++;                /* Do not optimize RE away */
             goto finish_meta_pat;
         case 'z':
@@ -13613,8 +13651,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              }
              else {
                  ret = reg_node(pRExC_state, EOS);
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
              }
-           *flagp |= SIMPLE;
             RExC_seen_zerolen++;                /* Do not optimize RE away */
             goto finish_meta_pat;
         case 'C':
@@ -13737,8 +13775,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             ret = reg_node(pRExC_state, op);
              FLAGS(REGNODE_p(ret)) = flags;
  
-           *flagp |= SIMPLE;
-
             goto finish_meta_pat;
            }
  
@@ -13913,10 +13949,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          num > 9
                          /* any numeric escape < RExC_npar is a backref */
                          && num >= RExC_npar
-                        /* cannot be an octal escape if it starts with 8 */
-                        && *RExC_parse != '8'
-                        /* cannot be an octal escape if it starts with 9 */
-                        && *RExC_parse != '9'
+                        /* cannot be an octal escape if it starts with [89] */
+                        && ! inRANGE(*RExC_parse, '8', '9')
                      ) {
                          /* Probably not meant to be a backref, instead likely
                           * to be an octal character escape, e.g. \35 or \777.
@@ -15171,23 +15205,18 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          * have to map that back to the original */
                          if (need_to_fold_loc) {
                              upper_fill = loc_correspondence[s - s_start];
-                            Safefree(locfold_buf);
-                            Safefree(loc_correspondence);
-
                              if (upper_fill == 0) {
                                  FAIL2("panic: loc_correspondence[%d] is 0",
                                        (int) (s - s_start));
                              }
+                            Safefree(locfold_buf);
+                            Safefree(loc_correspondence);
                          }
                          else {
                              upper_fill = s - s0;
                          }
                          goto reparse;
                      }
-                    else if (need_to_fold_loc) {
-                        Safefree(locfold_buf);
-                        Safefree(loc_correspondence);
-                    }
  
                      /* Here the node consists entirely of non-final multi-char
                       * folds.  (Likely it is all 'f's or all 's's.)  There's no
@@ -15195,6 +15224,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * whole thing */
                      len = old_s - s0;
                  }
+
+                if (need_to_fold_loc) {
+                    Safefree(locfold_buf);
+                    Safefree(loc_correspondence);
+                }
             }   /* End of verifying node ends with an appropriate char */
  
              /* We need to start the next node at the character that didn't fit
@@ -15355,7 +15389,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
       * sets up the bitmap and any flags, removing those code points from the
       * inversion list, setting it to NULL should it become completely empty */
  
-    dVAR;
  
      PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST;
      assert(PL_regkind[OP(node)] == ANYOF);
@@ -15395,9 +15428,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
                     ? end
                     : NUM_ANYOF_CODE_POINTS - 1;
             for (i = start; i <= (int) high; i++) {
-               if (! ANYOF_BITMAP_TEST(node, i)) {
-                   ANYOF_BITMAP_SET(node, i);
-               }
+                ANYOF_BITMAP_SET(node, i);
             }
         }
         invlist_iterfinish(*invlist_ptr);
@@ -16212,7 +16243,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
              if (   posix_warnings
                  && RExC_warn_text
-                && av_top_index(RExC_warn_text) > -1)
+                && av_count(RExC_warn_text) > 0)
              {
                  *posix_warnings = RExC_warn_text;
              }
@@ -16464,7 +16495,10 @@ redo_curchar:
                             /* If more than a single node returned, the nested
                              * parens evaluated to more than just a (?[...]),
                              * which isn't legal */
-                        || node != 1) {
+                        || RExC_emit != orig_emit
+                                      + NODE_STEP_REGNODE
+                                      + regarglen[REGEX_SET])
+                    {
                          vFAIL("Expecting interpolated extended charclass");
                      }
                      resultant_invlist = (SV *) ARGp(REGNODE_p(node));
@@ -16515,6 +16549,8 @@ redo_curchar:
                      goto regclass_failed;
                  }
  
+                assert(current);
+
                  /* regclass() will return with parsing just the \ sequence,
                   * leaving the parse pointer at the next thing to parse */
                  RExC_parse--;
@@ -16552,9 +16588,7 @@ redo_curchar:
                      goto regclass_failed;
                  }
  
-                if (! current) {
-                    break;
-                }
+                assert(current);
  
                  /* function call leaves parse pointing to the ']', except if we
                   * faked it */
@@ -17212,10 +17246,10 @@ S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN c
   *
   * There is a line below that uses the same white space criteria but is outside
   * this macro.  Both here and there must use the same definition */
-#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p)                          \
+#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p, stop_p)                  \
      STMT_START {                                                        \
          if (do_skip) {                                                  \
-            while (isBLANK_A(UCHARAT(p)))                               \
+            while (p < stop_p && isBLANK_A(UCHARAT(p)))                 \
              {                                                           \
                  p++;                                                    \
              }                                                           \
@@ -17265,7 +17299,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * UTF-8
       */
  
-    dVAR;
      UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
      IV range = 0;
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
@@ -17369,6 +17402,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      PERL_UNUSED_ARG(depth);
  #endif
  
+    assert(! (ret_invlist && allow_mutiple_chars));
  
      /* If wants an inversion list returned, we can't optimize to something
       * else. */
@@ -17390,7 +17424,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      initial_listsv_len = SvCUR(listsv);
      SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
  
-    SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+    SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
  
      assert(RExC_parse <= RExC_end);
  
@@ -17399,7 +17433,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          invert = TRUE;
          allow_mutiple_chars = FALSE;
          MARK_NAUGHTY(1);
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
      }
  
      /* Check that they didn't say [:posix:] instead of [[:posix:]] */
@@ -17446,12 +17480,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              output_posix_warnings(pRExC_state, posix_warnings);
          }
  
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
+
          if  (RExC_parse >= stop_ptr) {
              break;
          }
  
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
-
          if  (UCHARAT(RExC_parse) == ']') {
              break;
          }
@@ -17738,7 +17772,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      assert(prop_definition || strings);
  
                      if (strings) {
-                        if (! RExC_in_multi_char_class) {
+                        if (ret_invlist) {
+                            if (! prop_definition) {
+                                RExC_parse = e + 1;
+                                vFAIL("Unicode string properties are not implemented in (?[...])");
+                            }
+                            else {
+                                ckWARNreg(e + 1,
+                                    "Using just the single character results"
+                                    " returned by \\p{} in (?[...])");
+                            }
+                        }
+                        else if (! RExC_in_multi_char_class) {
                              if (invert ^ (value == 'P')) {
                                  RExC_parse = e + 1;
                                  vFAIL("Inverting a character class which contains"
@@ -17746,16 +17791,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              }
  
                              /* For each multi-character string ... */
-                            while (av_tindex(strings) >= 0) {
+                            while (av_count(strings) > 0) {
                                  /* ... Each entry is itself an array of code
                                  * points. */
                                  AV * this_string = (AV *) av_shift( strings);
-                                STRLEN cp_count = av_tindex(this_string) + 1;
+                                STRLEN cp_count = av_count(this_string);
                                  SV * final = newSV(cp_count * 4);
                                  SvPVCLEAR(final);
  
                                  /* Create another string of sequences of \x{...} */
-                                while (av_tindex(this_string) >= 0) {
+                                while (av_count(this_string) > 0) {
                                      SV * character = av_shift(this_string);
                                      UV cp = SvUV(character);
  
@@ -18129,7 +18174,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             }
         } /* end of namedclass \blah */
  
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
  
          /* If 'range' is set, 'value' is the ending of a range--check its
           * validity.  (If value isn't a single code point in the case of a
@@ -18172,7 +18217,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  char* next_char_ptr = RExC_parse + 1;
  
                  /* Get the next real char after the '-' */
-                SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr);
+                SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr, RExC_end);
  
                  /* If the '-' is at the end of the class (just before the ']',
                   * it is a literal minus; otherwise it is a range */
@@ -18309,7 +18354,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * printable should have each end point be a portable value
                       * for it (preferably like 'A', but we don't warn if it is
                       * a (portable) Unicode name or code point), and the range
-                     * must be be all digits or all letters of the same case.
+                     * must be all digits or all letters of the same case.
                       * Otherwise, the range is non-portable and unclear as to
                       * what it contains */
                      if (             (isPRINT_A(prevvalue) || isPRINT_A(value))
@@ -18567,7 +18612,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
         ret = reg(pRExC_state, 1, &reg_flags, depth+1);
  
-        *flagp |= reg_flags & (HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PARSE|NEED_UTF8);
+        *flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
  
          /* And restore so can parse the rest of the pattern */
          RExC_parse = save_parse;
@@ -18967,7 +19012,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      if (ret_invlist) {
          *ret_invlist = cp_list;
  
-        return RExC_emit;
+        return (cp_list) ? RExC_emit : 0;
      }
  
      if (anyof_flags & ANYOF_LOCALE_FLAGS) {
@@ -19308,7 +19353,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * points) in the ASCII range, so we can't use it here to
                       * artificially restrict the fold domain, so we check if
                       * the class does or does not match some EXACTFish node.
-                     * Further, if we aren't under /i, and and the folded-to
+                     * Further, if we aren't under /i, and the folded-to
                       * character is part of a multi-character fold, we can't do
                       * this optimization, as the sequence around it could be
                       * that multi-character fold, and we don't here know the
@@ -19975,6 +20020,9 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
              av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list));
          }
  
+        /* (Note that if any of this changes, the size calculations in
+         * S_optimize_regclass() might need to be updated.) */
+
          if (only_utf8_locale_list) {
              av_store(av, ONLY_LOCALE_MATCHES_INDEX,
                                       SvREFCNT_inc_NN(only_utf8_locale_list));
@@ -20637,7 +20685,8 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
      PERL_UNUSED_ARG(depth);
  #endif
  
-    /* Find last node. */
+    /* The final node in the chain is the first one with a nonzero next pointer
+     * */
      scan = (regnode_offset) p;
      for (;;) {
         regnode * const temp = regnext(REGNODE_p(scan));
@@ -20655,6 +20704,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
          scan = REGNODE_OFFSET(temp);
      }
  
+    /* Populate this node's next pointer */
      assert(val >= scan);
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
          assert((UV) (val - scan) <= U32_MAX);
@@ -21047,7 +21097,6 @@ void
  Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
  {
  #ifdef DEBUGGING
-    dVAR;
      int k;
      RXi_GET_DECL(prog, progi);
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
@@ -21406,11 +21455,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                           : (OP(o) == ANYOFH || OP(o) == ANYOFR)
                             ? 0xFF
                             : lowest;
-            Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
-            if (lowest != highest) {
-                Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+#ifndef EBCDIC
+            if (OP(o) != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
+#endif
+            {
+                Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+                if (lowest != highest) {
+                    Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+                }
+                Perl_sv_catpvf(aTHX_ sv, ")");
              }
-            Perl_sv_catpvf(aTHX_ sv, ")");
          }
  
          SvREFCNT_dec(unresolved);
@@ -21489,7 +21543,9 @@ SV *
  Perl_re_intuit_string(pTHX_ REGEXP * const r)
  {                              /* Assume that RE_INTUIT is set */
      /* Returns an SV containing a string that must appear in the target for it
-     * to match */
+     * to match, or NULL if nothing is known that must match.
+     *
+     * CAUTION: the SV can be freed during execution of the regex engine */
  
      struct regexp *const prog = ReANY(r);
      DECLARE_AND_GET_RE_DEBUG_FLAGS;
@@ -21758,7 +21814,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                      U32 refcount;
                      reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
  #ifdef USE_ITHREADS
-                    dVAR;
  #endif
                      OP_REFCNT_LOCK;
                      refcount = --aho->refcount;
@@ -21787,7 +21842,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                     U32 refcount;
                     reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
  #ifdef USE_ITHREADS
-                    dVAR;
  #endif
                      OP_REFCNT_LOCK;
                      refcount = --trie->refcount;
@@ -21823,15 +21877,19 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
  #define SAVEPVN(p, n)  ((p) ? savepvn(p, n) : NULL)
  
  /*
-   re_dup_guts - duplicate a regexp.
+=for apidoc_section REGEXP Functions
+=for apidoc re_dup_guts
+Duplicate a regexp.
  
-   This routine is expected to clone a given regexp structure. It is only
-   compiled under USE_ITHREADS.
+This routine is expected to clone a given regexp structure. It is only
+compiled under USE_ITHREADS.
  
-   After all of the core data stored in struct regexp is duplicated
-   the regexp_engine.dupe method is used to copy any private data
-   stored in the *pprivate pointer. This allows extensions to handle
-   any duplication it needs to do.
+After all of the core data stored in struct regexp is duplicated
+the regexp_engine.dupe method is used to copy any private data
+stored in the *pprivate pointer. This allows extensions to handle
+any duplication they need to do.
+
+=cut
  
     See pregfree() and regfree_internal() if you change anything here.
  */
@@ -21840,7 +21898,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
  void
  Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
  {
-    dVAR;
      I32 npar;
      const struct regexp *r = ReANY(sstr);
      struct regexp *ret = ReANY(dstr);
@@ -21943,7 +22000,6 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
  void *
  Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
  {
-    dVAR;
      struct regexp *const r = ReANY(rx);
      regexp_internal *reti;
      int len;
@@ -22208,9 +22264,11 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
          UV this_end;
          const char * format;
  
-        if (end - start < min_range_count) {
-
-            /* Output chars individually when they occur in short ranges */
+        if (    end - start < min_range_count
+            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
+        {
+            /* Output a range of 1 or 2 chars individually, or longer ranges
+             * when printable */
              for (; start <= end; start++) {
                  put_code_point(sv, start);
              }
@@ -22434,7 +22492,6 @@ S_put_charclass_bitmap_innards_common(pTHX_
       * output would have been only the inversion indicator '^', NULL is instead
       * returned. */
  
-    dVAR;
      SV * output;
  
      PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
@@ -22541,7 +22598,6 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       * cases where it can't try inverting, as what actually matches isn't known
       * until runtime, and hence the inversion isn't either. */
  
-    dVAR;
      bool inverting_allowed = ! force_as_is_display;
  
      int i;
@@ -22682,7 +22738,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  
          int inverted_bias, as_is_bias;
  
-        /* We will apply our bias to whichever of the the results doesn't have
+        /* We will apply our bias to whichever of the results doesn't have
           * the '^' */
          if (invert) {
              invert = FALSE;
@@ -22934,7 +22990,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  void
  Perl_init_uniprops(pTHX)
  {
-    dVAR;
  
  #  ifdef DEBUGGING
      char * dump_len_string;
@@ -23512,7 +23567,7 @@ S_handle_user_defined_property(pTHX_
  #    define CUR_CONTEXT      aTHX
  #    define ORIGINAL_CONTEXT save_aTHX
  #  else
-#    define DECLARATION_FOR_GLOBAL_CONTEXT
+#    define DECLARATION_FOR_GLOBAL_CONTEXT    dNOOP
  #    define SWITCH_TO_GLOBAL_CONTEXT          NOOP
  #    define RESTORE_CONTEXT                   NOOP
  #    define CUR_CONTEXT                       NULL
@@ -23526,7 +23581,6 @@ S_delete_recursion_entry(pTHX_ void *key)
       * properties.  This is a function so it can be set up to be called even if
       * the program unexpectedly quits */
  
-    dVAR;
      SV ** current_entry;
      const STRLEN key_len = strlen((const char *) key);
      DECLARATION_FOR_GLOBAL_CONTEXT;
@@ -23619,7 +23673,6 @@ S_parse_uniprop_string(pTHX_
                                     this */
      const STRLEN level)         /* Recursion level of this call */
  {
-    dVAR;
      char* lookup_name;          /* normalized name for lookup in our tables */
      unsigned lookup_len;        /* Its length */
      enum { Not_Strict = 0,      /* Some properties have stricter name */
@@ -24011,7 +24064,7 @@ S_parse_uniprop_string(pTHX_
                  goto append_name_to_msg;
              }
  
-            lookup_loose = get_cv("_charnames::_loose_regcomp_lookup", 0);
+            lookup_loose = get_cvs("_charnames::_loose_regcomp_lookup", 0);
              if (! lookup_loose) {
                  Perl_croak(aTHX_
                         "panic: Can't find '_charnames::_loose_regcomp_lookup");
@@ -24640,8 +24693,10 @@ S_parse_uniprop_string(pTHX_
          /* Try again stripping off any initial 'Is'.  This is because we
           * promise that an initial Is is optional.  The same isn't true of
           * names that start with 'In'.  Those can match only blocks, and the
-         * lookup table already has those accounted for. */
-        if (starts_with_Is) {
+         * lookup table already has those accounted for.  The lookup table also
+         * has already accounted for Perl extensions (without and = sign)
+         * starting with 'i's'. */
+        if (starts_with_Is && equals_pos >= 0) {
              lookup_name += 2;
              lookup_len -= 2;
              equals_pos -= 2;
@@ -25009,8 +25064,10 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
                                     where we are now */
      bool found_matches = FALSE; /* Did any name match so far? */
      SV * empty;                 /* For matching zero length names */
-    SV * must;                  /* What substring, if any, must be in a name
-                                   for the subpattern to match */
+    SV * must_sv;               /* Contains the substring, if any, that must be
+                                   in a name for the subpattern to match */
+    const char * must;          /* The PV of 'must' */
+    STRLEN must_len;            /* And its length */
      SV * syllable_name = NULL;  /* For Hangul syllables */
      const char hangul_prefix[] = "HANGUL SYLLABLE ";
      const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
@@ -25075,7 +25132,23 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
  
      /* Compile the subpattern consisting of the name being looked for */
      subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
-    must = re_intuit_string(subpattern_re);
+
+    must_sv = re_intuit_string(subpattern_re);
+    if (must_sv) {
+        /* regexec.c can free the re_intuit_string() return. GH #17734 */
+        must_sv = sv_2mortal(newSVsv(must_sv));
+        must = SvPV(must_sv, must_len);
+    }
+    else {
+        must = "";
+        must_len = 0;
+    }
+
+    /* (Note: 'must' could contain a NUL.  And yet we use strspn() below on it.
+     * This works because the NUL causes the function to return early, thus
+     * showing that there are characters in it other than the acceptable ones,
+     * which is our desired result.) */
+
      prog = ReANY(subpattern_re);
  
      /* If only nothing is matched, skip to where empty names are looked for */
@@ -25085,10 +25158,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
  
      /* And match against the string of all names /gc.  Don't even try if it
       * must match a character not found in any name. */
-    if ( ! must
-        || SvCUR(must) == 0
-        || strspn(SvPVX(must), "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()")
-                                                              == SvCUR(must))
+    if (strspn(must, "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()") == must_len)
      {
          while (execute_wildcard(subpattern_re,
                                  cur_pos,
@@ -25228,9 +25298,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
       * one of the characters in that isn't in any Hangul syllable. */
      if (    prog->minlen <= (SSize_t) syl_max_len
          &&  prog->maxlen > 0
-        && ( ! must
-            || SvCUR(must) == 0
-            || strspn(SvPVX(must), "\n ABCDEGHIJKLMNOPRSTUWY") == SvCUR(must)))
+        && (strspn(must, "\n ABCDEGHIJKLMNOPRSTUWY") == must_len))
      {
          /* These constants, names, values, and algorithm are adapted from the
           * Unicode standard, version 5.1, section 3.12, and should never
@@ -25325,9 +25393,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
           * series */
          if (    prog->minlen <= (SSize_t) SvCUR(algo_name)
              &&  prog->maxlen > 0
-            && ( ! must
-                || SvCUR(must) == 0
-                || strspn(SvPVX(must), legal) == SvCUR(must)))
+            && (strspn(must, legal) == must_len))
          {
              for (j = low; j <= high; j++) { /* For each code point in the series */