Only allow punct delimiter for regex subpattern

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 4583236..129d692 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -131,6 +131,8 @@ struct RExC_state_t {
      char       *parse;                 /* Input-scan pointer. */
      char        *copy_start;            /* start of copy of input within
                                             constructed parse string */
+    char        *save_copy_start;       /* Provides one level of saving
+                                           and restoring 'copy_start' */
      char        *copy_start_in_input;   /* Position in input string
                                             corresponding to copy_start */
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
@@ -180,11 +182,10 @@ struct RExC_state_t {
                                             through */
      U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
      I32                in_lookbehind;
+    I32                in_lookahead;
      I32                contains_locale;
      I32                override_recoding;
-#ifdef EBCDIC
-    I32                recode_x_to_native;
-#endif
+    I32         recode_x_to_native;
      I32                in_multi_char_class;
      struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                             within pattern */
@@ -229,6 +230,7 @@ struct RExC_state_t {
  #define RExC_precomp   (pRExC_state->precomp)
  #define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
  #define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
  #define RExC_precomp_end (pRExC_state->precomp_end)
  #define RExC_rx_sv     (pRExC_state->rx_sv)
  #define RExC_rx                (pRExC_state->rx)
@@ -241,7 +243,6 @@ struct RExC_state_t {
  #define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
                                                     under /d from /u ? */
  
-
  #ifdef RE_TRACK_PATTERN_OFFSETS
  #  define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
                                                           others */
@@ -270,10 +271,17 @@ struct RExC_state_t {
  #define RExC_study_chunk_recursed_bytes  \
                                     (pRExC_state->study_chunk_recursed_bytes)
  #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
+#define RExC_in_lookahead      (pRExC_state->in_lookahead)
  #define RExC_contains_locale   (pRExC_state->contains_locale)
+#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+
  #ifdef EBCDIC
-#   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+#  define SET_recode_x_to_native(x)                                         \
+                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
+#else
+#  define SET_recode_x_to_native(x) NOOP
  #endif
+
  #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
  #define RExC_frame_head (pRExC_state->frame_head)
  #define RExC_frame_last (pRExC_state->frame_last)
@@ -349,7 +357,7 @@ struct RExC_state_t {
  /* Change from /d into /u rules, and restart the parse.  RExC_uni_semantics is
   * a flag that indicates we need to override /d with /u as a result of
   * something in the pattern.  It should only be used in regards to calling
- * set_regex_charset() or get_regex_charse() */
+ * set_regex_charset() or get_regex_charset() */
  #define REQUIRE_UNI_RULES(flagp, restart_retval)                            \
      STMT_START {                                                            \
              if (DEPENDS_SEMANTICS) {                                        \
@@ -369,12 +377,8 @@ struct RExC_state_t {
  #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
      STMT_START {                                                            \
                  RExC_use_BRANCHJ = 1;                                       \
-                if (LIKELY(! IN_PARENS_PASS)) {                             \
-                    /* No need to restart the parse immediately if we're    \
-                     * going to reparse anyway to count parens */           \
-                    *flagp |= RESTART_PARSE;                                \
-                    return restart_retval;                                  \
-                }                                                           \
+                *flagp |= RESTART_PARSE;                                    \
+                return restart_retval;                                      \
      } STMT_END
  
  /* Until we have completed the parse, we leave RExC_total_parens at 0 or
@@ -422,6 +426,14 @@ struct RExC_state_t {
  #define _invlist_intersection_complement_2nd(a, b, output) \
                   _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
  
+/* We add a marker if we are deferring expansion of a potential user-defined
+ * property until it is needed at runtime the first time it is encountered in a
+ * pattern match.  This marker that shouldn't conflict with any that could be
+ * in a legal name is appended to its name to indicate this.  There is a string
+ * and character form */
+#define DEFERRED_PROP_EXPANSION_MARKERs  "~"
+#define DEFERRED_PROP_EXPANSION_MARKERc  '~'
+
  /* About scan_data_t.
  
    During optimisation we recurse through the regexp program performing
@@ -745,6 +757,10 @@ static const scan_data_t zero_scan_data = {
      Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
             arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
  
+#define        FAIL3(msg,arg1,arg2) _FAIL(                         \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
+     arg1, arg2, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+
  /*
   * Simple_vFAIL -- like FAIL, but marks the current location in the scan
   */
@@ -821,8 +837,13 @@ static const scan_data_t zero_scan_data = {
  } STMT_END
  
  /* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
-#define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
+    STMT_START {                                                            \
+      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
+      RExC_copy_start_in_constructed = NULL;                                \
+    } STMT_END
+#define RESTORE_WARNINGS                                                    \
+    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
  
  /* Since a warning can be generated multiple times as the input is reparsed, we
   * output it the first time we come to that point in the parse, but suppress it
@@ -837,7 +858,8 @@ static const scan_data_t zero_scan_data = {
  #define UPDATE_WARNINGS_LOC(loc)                                        \
      STMT_START {                                                        \
          if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            RExC_latest_warn_offset = (xI(loc)) - RExC_precomp;         \
+            RExC_latest_warn_offset = MAX(sI, MIN(eI, xI(loc)))         \
+                                                       - RExC_precomp;  \
          }                                                               \
      } STMT_END
  
@@ -1574,7 +1596,9 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      unsigned int i;
      const U32 n = ARG(node);
      bool new_node_has_latin1 = FALSE;
-    const U8 flags = OP(node) == ANYOFH ? 0 : ANYOF_FLAGS(node);
+    const U8 flags = (inRANGE(OP(node), ANYOFH, ANYOFRb))
+                      ? 0
+                      : ANYOF_FLAGS(node);
  
      PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
  
@@ -1627,7 +1651,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      }
  
      /* Add in the points from the bit map */
-    if (OP(node) != ANYOFH) {
+    if (! inRANGE(OP(node), ANYOFH, ANYOFRb)) {
          for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
              if (ANYOF_BITMAP_TEST(node, i)) {
                  unsigned int start = i++;
@@ -1714,7 +1738,9 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
       * another SSC or a regular ANYOF class.  Can create false positives. */
  
      SV* anded_cp_list;
-    U8  and_with_flags = (OP(and_with) == ANYOFH) ? 0 : ANYOF_FLAGS(and_with);
+    U8  and_with_flags = inRANGE(OP(and_with), ANYOFH, ANYOFRb)
+                          ? 0
+                          : ANYOF_FLAGS(and_with);
      U8  anded_flags;
  
      PERL_ARGS_ASSERT_SSC_AND;
@@ -1898,7 +1924,9 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
  
      SV* ored_cp_list;
      U8 ored_flags;
-    U8  or_with_flags = (OP(or_with) == ANYOFH) ? 0 : ANYOF_FLAGS(or_with);
+    U8  or_with_flags = inRANGE(OP(or_with), ANYOFH, ANYOFRb)
+                         ? 0
+                         : ANYOF_FLAGS(or_with);
  
      PERL_ARGS_ASSERT_SSC_OR;
  
@@ -2122,6 +2150,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
  
      set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL);
+    SvREFCNT_dec(invlist);
  
      /* Make sure is clone-safe */
      ssc->invlist = NULL;
@@ -2512,7 +2541,8 @@ is the recommended Unicode-aware way of saying
         if (UTF) {                                                         \
              SV *zlopp = newSV(UTF8_MAXBYTES);                             \
             unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned const char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
+            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
+            *kapow = '\0';                                                 \
             SvCUR_set(zlopp, kapow - flrbbbbb);                            \
             SvPOK_on(zlopp);                                               \
             SvUTF8_on(zlopp);                                              \
@@ -2681,7 +2711,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  #endif
  
      switch (flags) {
-        case EXACT: case EXACT_ONLY8: case EXACTL: break;
+        case EXACT: case EXACT_REQ8: case EXACTL: break;
         case EXACTFAA:
          case EXACTFUP:
         case EXACTFU:
@@ -2696,7 +2726,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
      trie->wordcount = word_count;
      RExC_rxi->data->data[ data_slot ] = (void*)trie;
      trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
-    if (flags == EXACT || flags == EXACT_ONLY8 || flags == EXACTL)
+    if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL)
         trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
      trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
                         trie->wordcount+1, sizeof(reg_trie_wordinfo));
@@ -2772,8 +2802,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
          if (    noper < tail
              && (    OP(noper) == flags
-                || (flags == EXACT && OP(noper) == EXACT_ONLY8)
-                || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
+                || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
                                           || OP(noper) == EXACTFUP))))
          {
              uc= (U8*)STRING(noper);
@@ -2990,8 +3020,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
              if (    noper < tail
                  && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_ONLY8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
+                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
                                               || OP(noper) == EXACTFUP))))
              {
                  const U8 *uc= (U8*)STRING(noper);
@@ -3215,8 +3245,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  
              if (    noper < tail
                  && (    OP(noper) == flags
-                    || (flags == EXACT && OP(noper) == EXACT_ONLY8)
-                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_ONLY8
+                    || (flags == EXACT && OP(noper) == EXACT_REQ8)
+                    || (flags == EXACTFU && (   OP(noper) == EXACTFU_REQ8
                                               || OP(noper) == EXACTFUP))))
              {
                  const U8 *uc= (U8*)STRING(noper);
@@ -3531,9 +3561,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                      if ( state==1 ) {
                          OP( convert ) = nodetype;
                          str=STRING(convert);
-                        STR_LEN(convert)=0;
+                        setSTR_LEN(convert, 0);
                      }
-                    STR_LEN(convert) += len;
+                    setSTR_LEN(convert, STR_LEN(convert) + len);
                      while (len--)
                          *str++ = *ch++;
                 } else {
@@ -3973,8 +4003,9 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      using /iaa matching will be doing so almost entirely with ASCII
   *      strings, so this should rarely be encountered in practice */
  
-#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags) \
-    if (PL_regkind[OP(scan)] == EXACT) \
+#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags)    \
+    if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT         \
+                                      && OP(scan) != LEXACT_REQ8)  \
          join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags), NULL, depth+1)
  
  STATIC U32
@@ -4039,16 +4070,16 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
  
              /* Joining something that requires UTF-8 with something that
               * doesn't, means the result requires UTF-8. */
-            if (OP(scan) == EXACT && (OP(n) == EXACT_ONLY8)) {
-                OP(scan) = EXACT_ONLY8;
+            if (OP(scan) == EXACT && (OP(n) == EXACT_REQ8)) {
+                OP(scan) = EXACT_REQ8;
              }
-            else if (OP(scan) == EXACT_ONLY8 && (OP(n) == EXACT)) {
+            else if (OP(scan) == EXACT_REQ8 && (OP(n) == EXACT)) {
                  ;   /* join is compatible, no need to change OP */
              }
-            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_ONLY8)) {
-                OP(scan) = EXACTFU_ONLY8;
+            else if ((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_REQ8)) {
+                OP(scan) = EXACTFU_REQ8;
              }
-            else if ((OP(scan) == EXACTFU_ONLY8) && (OP(n) == EXACTFU)) {
+            else if ((OP(scan) == EXACTFU_REQ8) && (OP(n) == EXACTFU)) {
                  ;   /* join is compatible, no need to change OP */
              }
              else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
@@ -4071,7 +4102,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                    * node.  And if the only adjacent node is EXACTF, they get
                    * absorbed into that, under the theory that a longer node is
                    * better than two shorter ones, even if one is EXACTFU.  Note
-                  * that EXACTFU_ONLY8 is generated only for UTF-8 patterns,
+                  * that EXACTFU_REQ8 is generated only for UTF-8 patterns,
                    * and the EXACTFU_S_EDGE ones only for non-UTF-8.  */
  
                  if (STRING(n)[STR_LEN(n)-1] == 's') {
@@ -4140,7 +4171,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              merged++;
  
              NEXT_OFF(scan) += NEXT_OFF(n);
-            STR_LEN(scan) += STR_LEN(n);
+            setSTR_LEN(scan, STR_LEN(scan) + STR_LEN(n));
              next = n + NODE_SZ_STR(n);
              /* Now we can overwrite *n : */
              Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
@@ -4179,7 +4210,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
       * this final joining, sequences could have been split over boundaries, and
       * hence missed).  The sequences only happen in folding, hence for any
       * non-EXACT EXACTish node */
-    if (OP(scan) != EXACT && OP(scan) != EXACT_ONLY8 && OP(scan) != EXACTL) {
+    if (OP(scan) != EXACT && OP(scan) != EXACT_REQ8 && OP(scan) != EXACTL) {
          U8* s0 = (U8*) STRING(scan);
          U8* s = s0;
          U8* s_end = s0 + STR_LEN(scan);
@@ -4522,31 +4553,31 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
           */
          JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
  
-       /* Follow the next-chain of the current node and optimize
-          away all the NOTHINGs from it.  */
-       if (OP(scan) != CURLYX) {
-           const int max = (reg_off_by_arg[OP(scan)]
-                      ? I32_MAX
-                      /* I32 may be smaller than U16 on CRAYs! */
-                      : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-           int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
-           int noff;
-           regnode *n = scan;
-
-           /* Skip NOTHING and LONGJMP. */
-           while ((n = regnext(n))
-                  && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
-                      || ((OP(n) == LONGJMP) && (noff = ARG(n))))
-                  && off + noff < max)
-               off += noff;
-           if (reg_off_by_arg[OP(scan)])
-               ARG(scan) = off;
-           else
-               NEXT_OFF(scan) = off;
-       }
+        /* Follow the next-chain of the current node and optimize
+           away all the NOTHINGs from it.  */
+        if (OP(scan) != CURLYX) {
+            const int max = (reg_off_by_arg[OP(scan)]
+                            ? I32_MAX
+                              /* I32 may be smaller than U16 on CRAYs! */
+                            : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
+            int noff;
+            regnode *n = scan;
+
+            /* Skip NOTHING and LONGJMP. */
+            while (   (n = regnext(n))
+                   && (   (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+                       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
+                   && off + noff < max)
+                off += noff;
+            if (reg_off_by_arg[OP(scan)])
+                ARG(scan) = off;
+            else
+                NEXT_OFF(scan) = off;
+        }
  
-       /* The principal pseudo-switch.  Cannot be a switch, since we
-          look into several different things.  */
+        /* The principal pseudo-switch.  Cannot be a switch, since we look into
+         * several different things.  */
          if ( OP(scan) == DEFINEP ) {
              SSize_t minlen = 0;
              SSize_t deltanext = 0;
@@ -4760,7 +4791,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      if ( SvIV(re_trie_maxbuff)>=0  ) {
                          regnode *cur;
                          regnode *first = (regnode *)NULL;
-                        regnode *last = (regnode *)NULL;
+                        regnode *prev = (regnode *)NULL;
                          regnode *tail = scan;
                          U8 trietype = 0;
                          U32 count=0;
@@ -4838,9 +4869,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  ----------------+-----------
                                  NOTHING         | NOTHING
                                  EXACT           | EXACT
-                                EXACT_ONLY8     | EXACT
+                                EXACT_REQ8     | EXACT
                                  EXACTFU         | EXACTFU
-                                EXACTFU_ONLY8   | EXACTFU
+                                EXACTFU_REQ8   | EXACTFU
                                  EXACTFUP        | EXACTFU
                                  EXACTFAA        | EXACTFAA
                                  EXACTL          | EXACTL
@@ -4850,10 +4881,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          */
  #define TRIE_TYPE(X) ( ( NOTHING == (X) )                                   \
                         ? NOTHING                                            \
-                       : ( EXACT == (X) || EXACT_ONLY8 == (X) )             \
+                       : ( EXACT == (X) || EXACT_REQ8 == (X) )             \
                           ? EXACT                                            \
                           : (     EXACTFU == (X)                             \
-                              || EXACTFU_ONLY8 == (X)                       \
+                              || EXACTFU_REQ8 == (X)                       \
                                || EXACTFUP == (X) )                          \
                             ? EXACTFU                                        \
                             : ( EXACTFAA == (X) )                            \
@@ -4891,7 +4922,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                      REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
                                  }
                                  Perl_re_printf( aTHX_  "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
-                                   REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
+                                   REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
                                    PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype]
                                 );
                              });
@@ -4937,7 +4968,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  } else {
                                      if ( trietype == NOTHING )
                                          trietype = noper_trietype;
-                                    last = cur;
+                                    prev = cur;
                                  }
                                 if (first)
                                     count++;
@@ -4947,7 +4978,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   * noper may either be a triable node which can
                                   * not be tried together with the current trie,
                                   * or a non triable node */
-                                if ( last ) {
+                                if ( prev ) {
                                      /* If last is set and trietype is not
                                       * NOTHING then we have found at least two
                                       * triable branch sequences in a row of a
@@ -4960,7 +4991,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                          make_trie( pRExC_state,
                                                  startbranch, first, cur, tail,
                                                  count, trietype, depth+1 );
-                                    last = NULL; /* note: we clear/update
+                                    prev = NULL; /* note: we clear/update
                                                      first, trietype etc below,
                                                      so we dont do it here */
                                  }
@@ -4989,12 +5020,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              Perl_re_indentf( aTHX_  "- %s (%d) <SCAN FINISHED> ",
                                depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
                              Perl_re_printf( aTHX_  "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
-                               REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
+                               REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
                                 PL_reg_name[trietype]
                              );
  
                          });
-                        if ( last && trietype ) {
+                        if ( prev && trietype ) {
                              if ( trietype != NOTHING ) {
                                  /* the last branch of the sequence was part of
                                   * a trie, so we have to construct it here
@@ -5039,9 +5070,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                          OP(opt)= OPTIMIZED;
                                  }
                              }
-                        } /* end if ( last) */
+                        } /* end if ( prev) */
                      } /* TRIE_MAXBUF is non zero */
-
                  } /* do trie */
  
             }
@@ -5177,7 +5207,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             }
         }
         else if (   OP(scan) == EXACT
-                 || OP(scan) == EXACT_ONLY8
+                 || OP(scan) == LEXACT
+                 || OP(scan) == EXACT_REQ8
+                 || OP(scan) == LEXACT_REQ8
                   || OP(scan) == EXACTL)
          {
             SSize_t l = STR_LEN(scan);
@@ -5262,7 +5294,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             }
  
              if (flags & SCF_DO_STCLASS) {
-                SV* EXACTF_invlist = _make_exactf_invlist(pRExC_state, scan);
+                SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
  
                  assert(EXACTF_invlist);
                  if (flags & SCF_DO_STCLASS_AND) {
@@ -5299,7 +5331,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
                     next = NEXTOPER(scan);
                     if (   OP(next) == EXACT
-                        || OP(next) == EXACT_ONLY8
+                        || OP(next) == LEXACT
+                        || OP(next) == EXACT_REQ8
+                        || OP(next) == LEXACT_REQ8
                          || OP(next) == EXACTL
                          || (flags & SCF_DO_STCLASS))
                      {
@@ -5836,6 +5870,9 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  case ANYOFL:
                  case ANYOFPOSIXL:
                  case ANYOFH:
+                case ANYOFHb:
+                case ANYOFHr:
+                case ANYOFHs:
                  case ANYOF:
                     if (flags & SCF_DO_STCLASS_AND)
                         ssc_and(pRExC_state, data->start_class,
@@ -5861,6 +5898,26 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                      break;
                    }
  
+                case ANYOFR:
+                case ANYOFRb:
+                  {
+                    SV* cp_list = NULL;
+
+                    cp_list = _add_range_to_invlist(cp_list,
+                                        ANYOFRbase(scan),
+                                        ANYOFRbase(scan) + ANYOFRdelta(scan));
+
+                    if (flags & SCF_DO_STCLASS_OR) {
+                        ssc_union(data->start_class, cp_list, invert);
+                    }
+                    else if (flags & SCF_DO_STCLASS_AND) {
+                        ssc_intersection(data->start_class, cp_list, invert);
+                    }
+
+                    SvREFCNT_dec_NN(cp_list);
+                    break;
+                  }
+
                 case NPOSIXL:
                      invert = 1;
                      /* FALLTHROUGH */
@@ -6149,7 +6206,6 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
             }
  #endif
         }
-
         else if (OP(scan) == OPEN) {
             if (stopparen != (I32)ARG(scan))
                 pars++;
@@ -7242,7 +7298,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
          const char* name;
  
          name = get_regex_charset_name(RExC_rx->extflags, &len);
-        if strEQ(name, DEPENDS_PAT_MODS) {  /* /d under UTF-8 => /u */
+        if (strEQ(name, DEPENDS_PAT_MODS)) {  /* /d under UTF-8 => /u */
              assert(RExC_utf8);
              name = UNICODE_PAT_MODS;
              len = sizeof(UNICODE_PAT_MODS) - 1;
@@ -7371,28 +7427,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      DEBUG_r(if (!PL_colorset) reginitcolors());
  
-    /* Initialize these here instead of as-needed, as is quick and avoids
-     * having to test them each time otherwise */
-    if (! PL_InBitmap) {
-#ifdef DEBUGGING
-        char * dump_len_string;
-#endif
-
-        /* This is calculated here, because the Perl program that generates the
-         * static global ones doesn't currently have access to
-         * NUM_ANYOF_CODE_POINTS */
-       PL_InBitmap = _new_invlist(2);
-       PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
-                                                    NUM_ANYOF_CODE_POINTS - 1);
-#ifdef DEBUGGING
-        dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
-        if (   ! dump_len_string
-            || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
-        {
-            PL_dump_re_max_len = 60;    /* A reasonable default */
-        }
-#endif
-    }
  
      pRExC_state->warn_text = NULL;
      pRExC_state->unlexed_names = NULL;
@@ -7562,6 +7596,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          && memEQ(RX_PRECOMP(old_re), exp, plen)
         && !runtime_code /* with runtime code, always recompile */ )
      {
+        DEBUG_COMPILE_r({
+            SV *dsv= sv_newmortal();
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
+            Perl_re_printf( aTHX_  "%sSkipping recompilation of unchanged REx%s %s\n",
+                          PL_colors[4], PL_colors[5], s);
+        });
          return old_re;
      }
  
@@ -7605,10 +7645,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_seen = 0;
      RExC_maxlen = 0;
      RExC_in_lookbehind = 0;
+    RExC_in_lookahead = 0;
      RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-#ifdef EBCDIC
      RExC_recode_x_to_native = 0;
-#endif
      RExC_in_multi_char_class = 0;
  
      RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
@@ -7819,6 +7858,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      SetProgLen(RExC_rxi,RExC_size);
  #endif
  
+    DEBUG_DUMP_PRE_OPTIMIZE_r({
+        SV * const sv = sv_newmortal();
+        RXi_GET_DECL(RExC_rx, ri);
+        DEBUG_RExC_seen();
+        Perl_re_printf( aTHX_ "Program before optimization:\n");
+
+        (void)dumpuntil(RExC_rx, ri->program, ri->program + 1, NULL, NULL,
+                        sv, 0, 0);
+    });
+
      DEBUG_OPTIMISE_r(
          Perl_re_printf( aTHX_  "Starting post parse optimization\n");
      );
@@ -7941,7 +7990,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          /* Ignore EXACT as we deal with it later. */
         if (PL_regkind[OP(first)] == EXACT) {
             if (   OP(first) == EXACT
-                || OP(first) == EXACT_ONLY8
+               || OP(first) == LEXACT
+                || OP(first) == EXACT_REQ8
+                || OP(first) == LEXACT_REQ8
                  || OP(first) == EXACTL)
              {
                 NOOP;   /* Empty, get anchored substr later. */
@@ -8287,7 +8338,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                   && nop == END)
              RExC_rx->extflags |= RXf_WHITE;
          else if ( RExC_rx->extflags & RXf_SPLIT
-                  && (fop == EXACT || fop == EXACT_ONLY8 || fop == EXACTL)
+                  && (   fop == EXACT || fop == LEXACT
+                      || fop == EXACT_REQ8 || fop == LEXACT_REQ8
+                      || fop == EXACTL)
                    && STR_LEN(first) == 1
                    && *(STRING(first)) == ' '
                    && nop == END )
@@ -8631,8 +8684,8 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
         i = rx->sublen + rx->suboffset - rx->offs[0].end;
      }
      else
-    if ( 0 <= n && n <= (I32)rx->nparens &&
-        (s1 = rx->offs[n].start) != -1 &&
+    if (inRANGE(n, 0, (I32)rx->nparens) &&
+        (s1 = rx->offs[n].start) != -1  &&
          (t1 = rx->offs[n].end) != -1)
      {
          /* $&, ${^MATCH},  $1 ... */
@@ -8985,23 +9038,6 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
      return zero_addr + *offset;
  }
  
-PERL_STATIC_INLINE void
-S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
-{
-    /* Sets the current number of elements stored in the inversion list.
-     * Updates SvCUR correspondingly */
-    PERL_UNUSED_CONTEXT;
-    PERL_ARGS_ASSERT_INVLIST_SET_LEN;
-
-    assert(is_invlist(invlist));
-
-    SvCUR_set(invlist,
-              (len == 0)
-               ? 0
-               : TO_INTERNAL_SIZE(len + offset));
-    assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
-}
-
  STATIC void
  S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
  {
@@ -9152,6 +9188,7 @@ S_initialize_invlist_guts(pTHX_ SV* invlist, const Size_t initial_size)
      invlist_iterfinish(invlist);
  
      *get_invlist_previous_index_addr(invlist) = 0;
+    SvPOK_on(invlist);  /* This allows B to extract the PV */
  }
  
  SV*
@@ -9226,25 +9263,12 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
      invlist_iterfinish(invlist);
  
      SvREADONLY_on(invlist);
+    SvPOK_on(invlist);
  
      return invlist;
  }
  
  STATIC void
-S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
-{
-    /* Grow the maximum size of an inversion list */
-
-    PERL_ARGS_ASSERT_INVLIST_EXTEND;
-
-    assert(is_invlist(invlist));
-
-    /* Add one to account for the zero element at the beginning which may not
-     * be counted by the calling parameters */
-    SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
-}
-
-STATIC void
  S__append_range_to_invlist(pTHX_ SV* const invlist,
                                   const UV start, const UV end)
  {
@@ -10230,11 +10254,6 @@ Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
  
  #endif
  
-PERL_STATIC_INLINE SV*
-S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
-    return _add_range_to_invlist(invlist, cp, cp);
-}
-
  #ifndef PERL_IN_XSUB_RE
  void
  Perl__invlist_invert(pTHX_ SV* const invlist)
@@ -10285,106 +10304,26 @@ Perl_invlist_clone(pTHX_ SV* const invlist, SV* new_invlist)
  
  #endif
  
-PERL_STATIC_INLINE STRLEN*
-S_get_invlist_iter_addr(SV* invlist)
-{
-    /* Return the address of the UV that contains the current iteration
-     * position */
-
-    PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
-
-    assert(is_invlist(invlist));
-
-    return &(((XINVLIST*) SvANY(invlist))->iterator);
-}
-
-PERL_STATIC_INLINE void
-S_invlist_iterinit(SV* invlist)        /* Initialize iterator for invlist */
-{
-    PERL_ARGS_ASSERT_INVLIST_ITERINIT;
-
-    *get_invlist_iter_addr(invlist) = 0;
-}
-
-PERL_STATIC_INLINE void
-S_invlist_iterfinish(SV* invlist)
-{
-    /* Terminate iterator for invlist.  This is to catch development errors.
-     * Any iteration that is interrupted before completed should call this
-     * function.  Functions that add code points anywhere else but to the end
-     * of an inversion list assert that they are not in the middle of an
-     * iteration.  If they were, the addition would make the iteration
-     * problematical: if the iteration hadn't reached the place where things
-     * were being added, it would be ok */
-
-    PERL_ARGS_ASSERT_INVLIST_ITERFINISH;
-
-    *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX;
-}
-
-STATIC bool
-S_invlist_iternext(SV* invlist, UV* start, UV* end)
-{
-    /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
-     * This call sets in <*start> and <*end>, the next range in <invlist>.
-     * Returns <TRUE> if successful and the next call will return the next
-     * range; <FALSE> if was already at the end of the list.  If the latter,
-     * <*start> and <*end> are unchanged, and the next call to this function
-     * will start over at the beginning of the list */
-
-    STRLEN* pos = get_invlist_iter_addr(invlist);
-    UV len = _invlist_len(invlist);
-    UV *array;
-
-    PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
-
-    if (*pos >= len) {
-       *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */
-       return FALSE;
-    }
-
-    array = invlist_array(invlist);
-
-    *start = array[(*pos)++];
-
-    if (*pos >= len) {
-       *end = UV_MAX;
-    }
-    else {
-       *end = array[(*pos)++] - 1;
-    }
-
-    return TRUE;
-}
-
  PERL_STATIC_INLINE UV
-S_invlist_highest(SV* const invlist)
+S_invlist_lowest(SV* const invlist)
  {
-    /* Returns the highest code point that matches an inversion list.  This API
-     * has an ambiguity, as it returns 0 under either the highest is actually
+    /* Returns the lowest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the lowest is actually
       * 0, or if the list is empty.  If this distinction matters to you, check
       * for emptiness before calling this function */
  
      UV len = _invlist_len(invlist);
      UV *array;
  
-    PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+    PERL_ARGS_ASSERT_INVLIST_LOWEST;
  
      if (len == 0) {
-       return 0;
+        return 0;
      }
  
      array = invlist_array(invlist);
  
-    /* The last element in the array in the inversion list always starts a
-     * range that goes to infinity.  That range may be for code points that are
-     * matched in the inversion list, or it may be for ones that aren't
-     * matched.  In the latter case, the highest code point in the set is one
-     * less than the beginning of this range; otherwise it is the final element
-     * of this range: infinity */
-    return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
-           ? UV_MAX
-           : array[len - 1] - 1;
+    return array[0];
  }
  
  STATIC SV *
@@ -10565,7 +10504,7 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
   * call SvREFCNT_dec() when done with it.
   */
  STATIC SV*
-S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
+S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
  {
      dVAR;
      const U8 * s = (U8*)STRING(node);
@@ -10574,7 +10513,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
      /* Start out big enough for 2 separate code points */
      SV* invlist = _new_invlist(4);
  
-    PERL_ARGS_ASSERT__MAKE_EXACTF_INVLIST;
+    PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST;
  
      if (! UTF) {
          uc = *s;
@@ -10992,14 +10931,14 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
      RExC_sawback = 1;
      ret = reganode(pRExC_state,
                     ((! FOLD)
-                     ? NREF
+                     ? REFN
                       : (ASCII_FOLD_RESTRICTED)
-                       ? NREFFA
+                       ? REFFAN
                         : (AT_LEAST_UNI_SEMANTICS)
-                         ? NREFFU
+                         ? REFFUN
                           : (LOC)
-                           ? NREFFL
-                           : NREFF),
+                           ? REFFLN
+                           : REFFN),
                      num);
      *flagp |= HASWIDTH;
  
@@ -11048,7 +10987,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      PERL_ARGS_ASSERT_REG;
      DEBUG_PARSE("reg ");
  
-
      max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD);
      assert(max_open);
      if (!SvIOK(max_open)) {
@@ -11061,6 +10999,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
      *flagp = 0;                                /* Tentatively. */
  
+    if (RExC_in_lookbehind) {
+       RExC_in_lookbehind++;
+    }
+    if (RExC_in_lookahead) {
+        RExC_in_lookahead++;
+    }
+
      /* Having this true makes it feasible to have a lot fewer tests for the
       * parse pointer being in scope.  For example, we can write
       *      while(isFOO(*RExC_parse)) RExC_parse++;
@@ -11274,12 +11219,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          goto parse_rest;
                      }
  
-                    /* By doing this here, we avoid extra warnings for nested
-                     * script runs */
-                    ckWARNexperimental(RExC_parse,
-                        WARN_EXPERIMENTAL__SCRIPT_RUN,
-                        "The script_run feature is experimental");
-
                      if (paren == 's') {
                          /* Here, we're starting a new regular script run */
                          ret = reg_node(pRExC_state, SROPEN);
@@ -11302,10 +11241,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          return 0;
                      }
  
-                    REGTAIL(pRExC_state, ret, atomic);
+                    if (! REGTAIL(pRExC_state, ret, atomic)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
  
-                    REGTAIL(pRExC_state, atomic,
-                           reg_node(pRExC_state, SRCLOSE));
+                    if (! REGTAIL(pRExC_state, atomic, reg_node(pRExC_state,
+                                                                SRCLOSE)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
  
                      RExC_in_script_run = 0;
                      return ret;
@@ -11319,9 +11263,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  /*FALLTHROUGH*/
  
              alpha_assertions:
-                ckWARNexperimental(RExC_parse,
-                        WARN_EXPERIMENTAL__ALPHA_ASSERTIONS,
-                        "The alpha_assertions feature is experimental");
  
                  RExC_seen_zerolen++;
  
@@ -11525,10 +11466,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  if (RExC_parse >= RExC_end) {
                      vFAIL("Sequence (?... not terminated");
                  }
-
-                /* FALLTHROUGH */
+                RExC_seen_zerolen++;
+                break;
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
+                RExC_in_lookahead++;
                  break;
             case '!':           /* (?!...) */
                 RExC_seen_zerolen++;
@@ -11764,7 +11706,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                         RExC_flags & RXf_PMf_COMPILETIME
                                        );
                      FLAGS(REGNODE_p(ret)) = 2;
-                    REGTAIL(pRExC_state, ret, eval);
+                    if (! REGTAIL(pRExC_state, ret, eval)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                      /* deal with the length of this later - MJD */
                     return ret;
                 }
@@ -11817,7 +11761,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
  
                      tail = reg(pRExC_state, 1, &flag, depth+1);
                      RETURN_FAIL_ON_RESTART(flag, flagp);
-                    REGTAIL(pRExC_state, ret, tail);
+                    if (! REGTAIL(pRExC_state, ret, tail)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                      goto insert_if;
                  }
                 else if (   RExC_parse[0] == '<'     /* (?(<NAME>)...) */
@@ -11840,7 +11786,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          RExC_rxi->data->data[num]=(void*)sv_dat;
                          SvREFCNT_inc_simple_void_NN(sv_dat);
                      }
-                    ret = reganode(pRExC_state, NGROUPP, num);
+                    ret = reganode(pRExC_state, GROUPPN, num);
                      goto insert_if_check_paren;
                 }
                 else if (memBEGINs(RExC_parse,
@@ -11909,15 +11855,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     }
                     nextchar(pRExC_state);
                   insert_if:
-                    REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
+                    if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state,
+                                                             IFTHEN, 0)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                      br = regbranch(pRExC_state, &flags, 1, depth+1);
                     if (br == 0) {
                          RETURN_FAIL_ON_RESTART(flags,flagp);
                          FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                                (UV) flags);
                      } else
-                        REGTAIL(pRExC_state, br, reganode(pRExC_state,
-                                                          LONGJMP, 0));
+                    if (! REGTAIL(pRExC_state, br, reganode(pRExC_state,
+                                                             LONGJMP, 0)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     c = UCHARAT(RExC_parse);
                      nextchar(pRExC_state);
                     if (flags&HASWIDTH)
@@ -11934,7 +11887,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                              FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                                    (UV) flags);
                          }
-                        REGTAIL(pRExC_state, ret, lastbr);
+                        if (! REGTAIL(pRExC_state, ret, lastbr)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
                         if (flags&HASWIDTH)
                             *flagp |= HASWIDTH;
                          c = UCHARAT(RExC_parse);
@@ -11949,16 +11904,26 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                              vFAIL("Switch (?(condition)... contains too many branches");
                      }
                     ender = reg_node(pRExC_state, TAIL);
-                    REGTAIL(pRExC_state, br, ender);
+                    if (! REGTAIL(pRExC_state, br, ender)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     if (lastbr) {
-                        REGTAIL(pRExC_state, lastbr, ender);
-                        REGTAIL(pRExC_state, REGNODE_OFFSET(
-                                                NEXTOPER(
-                                                NEXTOPER(REGNODE_p(lastbr)))),
-                                             ender);
+                        if (! REGTAIL(pRExC_state, lastbr, ender)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
+                        if (! REGTAIL(pRExC_state,
+                                      REGNODE_OFFSET(
+                                                 NEXTOPER(
+                                                 NEXTOPER(REGNODE_p(lastbr)))),
+                                      ender))
+                        {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
                     }
                     else
-                        REGTAIL(pRExC_state, ret, ender);
+                        if (! REGTAIL(pRExC_state, ret, ender)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
  #if 0  /* Removing this doesn't cause failures in the test suite -- khw */
                      RExC_size++; /* XXX WHY do we need this?!!
                                      For large programs it seems to be required
@@ -12000,16 +11965,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  goto parse_rest;
              } /* end switch */
         }
-       else {
-            if (*RExC_parse == '{') {
-                ckWARNregdep(RExC_parse + 1,
-                            "Unescaped left brace in regex is "
-                            "deprecated here (and will be fatal "
-                            "in Perl 5.32), passed through");
-            }
-            /* Not bothering to indent here, as the above 'else' is temporary
-             * */
-        if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
+        else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
@@ -12076,7 +12032,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              /* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
              paren = ':';
             ret = 0;
-       }
          }
      }
      else                        /* ! paren */
@@ -12108,7 +12063,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
         *flagp |= flags&SIMPLE;
      }
      if (is_open) {                             /* Starts with OPEN. */
-        REGTAIL(pRExC_state, ret, br);          /* OPEN -> first. */
+        if (! REGTAIL(pRExC_state, ret, br)) {  /* OPEN -> first. */
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
      }
      else if (paren != '?')             /* Not Conditional */
         ret = br;
@@ -12116,12 +12073,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      lastbr = br;
      while (*RExC_parse == '|') {
         if (RExC_use_BRANCHJ) {
+            bool shut_gcc_up;
+
             ender = reganode(pRExC_state, LONGJMP, 0);
  
              /* Append to the previous. */
-            REGTAIL(pRExC_state,
-                    REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
-                    ender);
+            shut_gcc_up = REGTAIL(pRExC_state,
+                         REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
+                         ender);
+            PERL_UNUSED_VAR(shut_gcc_up);
         }
         nextchar(pRExC_state);
         if (freeze_paren) {
@@ -12232,9 +12192,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          is_nothing= 0;
                 }
                 else if (op == BRANCHJ) {
-                    REGTAIL_STUDY(pRExC_state,
-                                  REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
-                                  ender);
+                    bool shut_gcc_up = REGTAIL_STUDY(pRExC_state,
+                                        REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
+                                        ender);
+                    PERL_UNUSED_VAR(shut_gcc_up);
                      /* for now we always disable this optimisation * /
                      if ( OP(NEXTOPER(NEXTOPER(br))) != NOTHING
                           || regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender))
@@ -12327,6 +12288,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      if (RExC_in_lookbehind) {
         RExC_in_lookbehind--;
      }
+    if (RExC_in_lookahead) {
+        RExC_in_lookahead--;
+    }
      if (after_freeze > RExC_npar)
          RExC_npar = after_freeze;
      return(ret);
@@ -12546,7 +12510,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 const regnode_offset w = reg_node(pRExC_state, WHILEM);
  
                 FLAGS(REGNODE_p(w)) = 0;
-                REGTAIL(pRExC_state, ret, w);
+                if (!  REGTAIL(pRExC_state, ret, w)) {
+                    REQUIRE_BRANCHJ(flagp, 0);
+                }
                 if (RExC_use_BRANCHJ) {
                     reginsert(pRExC_state, LONGJMP, ret, depth+1);
                     reginsert(pRExC_state, NOTHING, ret, depth+1);
@@ -12561,7 +12527,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 if (RExC_use_BRANCHJ)
                      NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
                                                         LONGJMP. */
-                REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
+                if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+                                                          NOTHING)))
+                {
+                    REQUIRE_BRANCHJ(flagp, 0);
+                }
                  RExC_whilem_seen++;
                  MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
             }
@@ -12633,16 +12603,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      if (*RExC_parse == '?') {
         nextchar(pRExC_state);
         reginsert(pRExC_state, MINMOD, ret, depth+1);
-        REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
+        if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
      }
      else if (*RExC_parse == '+') {
          regnode_offset ender;
          nextchar(pRExC_state);
          ender = reg_node(pRExC_state, SUCCEED);
-        REGTAIL(pRExC_state, ret, ender);
+        if (! REGTAIL(pRExC_state, ret, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
          reginsert(pRExC_state, SUSPEND, ret, depth+1);
          ender = reg_node(pRExC_state, TAIL);
-        REGTAIL(pRExC_state, ret, ender);
+        if (! REGTAIL(pRExC_state, ret, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
      }
  
      if (ISMULT2(RExC_parse)) {
@@ -12883,9 +12859,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          value = (U8 *) SvPV(value_sv, value_len);
  
          /* See if the result is one code point vs 0 or multiple */
-        if (value_len > 0 && value_len <= (UV) ((SvUTF8(value_sv))
-                                               ? UTF8SKIP(value)
-                                               : 1))
+        if (inRANGE(value_len, 1, ((UV) SvUTF8(value_sv)
+                                  ? UTF8SKIP(value)
+                                  : 1)))
          {
              /* Here, exactly one code point.  If that isn't what is wanted,
               * fail */
@@ -12936,11 +12912,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          sv_catsv(substitute_parse, value_sv);
          sv_catpv(substitute_parse, ")");
  
-#ifdef EBCDIC
          /* The value should already be native, so no need to convert on EBCDIC
           * platforms.*/
          assert(! RExC_recode_x_to_native);
-#endif
  
      }
      else {   /* \N{U+...} */
@@ -13073,12 +13047,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
          sv_catpvs(substitute_parse, ")");
  
-#ifdef EBCDIC
          /* The values are Unicode, and therefore have to be converted to native
           * on a non-Unicode (meaning non-ASCII) platform. */
-        RExC_recode_x_to_native = 1;
-#endif
-
+        SET_recode_x_to_native(1);
      }
  
      /* Here, we have the string the name evaluates to, ready to be parsed,
@@ -13103,9 +13074,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      RExC_start = save_start;
      RExC_parse = endbrace;
      RExC_end = orig_end;
-#ifdef EBCDIC
-    RExC_recode_x_to_native = 0;
-#endif
+    SET_recode_x_to_native(0);
  
      SvREFCNT_dec_NN(substitute_parse);
  
@@ -13283,7 +13252,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      char *parse_start;
      U8 op;
      int invert = 0;
-    U8 arg;
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -13412,15 +13380,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'K':
-           RExC_seen_zerolen++;
-           ret = reg_node(pRExC_state, KEEPS);
-           *flagp |= SIMPLE;
-           /* XXX:dmq : disabling in-place substitution seems to
-            * be necessary here to avoid cases of memory corruption, as
-            * with: C<$_="x" x 80; s/x\K/y/> -- rgs
-            */
-            RExC_seen |= REG_LOOKBEHIND_SEEN;
-           goto finish_meta_pat;
+            if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+                RExC_seen_zerolen++;
+                ret = reg_node(pRExC_state, KEEPS);
+                *flagp |= SIMPLE;
+                /* XXX:dmq : disabling in-place substitution seems to
+                 * be necessary here to avoid cases of memory corruption, as
+                 * with: C<$_="x" x 80; s/x\K/y/> -- rgs
+                 */
+                RExC_seen |= REG_LOOKBEHIND_SEEN;
+                goto finish_meta_pat;
+            }
+            else {
+                ++RExC_parse; /* advance past the 'K' */
+                vFAIL("\\K not permitted in lookahead/lookbehind");
+            }
         case 'Z':
             ret = reg_node(pRExC_state, SEOL);
             *flagp |= SIMPLE;
@@ -13438,13 +13412,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             *flagp |= HASWIDTH;
             goto finish_meta_pat;
  
-       case 'W':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'w':
-            arg = ANYOF_WORDCHAR;
-            goto join_posix;
-
         case 'B':
              invert = 1;
              /* FALLTHROUGH */
@@ -13563,85 +13530,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             goto finish_meta_pat;
            }
  
-       case 'D':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'd':
-            arg = ANYOF_DIGIT;
-            if (! DEPENDS_SEMANTICS) {
-                goto join_posix;
-            }
-
-            /* \d doesn't have any matches in the upper Latin1 range, hence /d
-             * is equivalent to /u.  Changing to /u saves some branches at
-             * runtime */
-            op = POSIXU;
-            goto join_posix_op_known;
-
         case 'R':
             ret = reg_node(pRExC_state, LNBREAK);
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
  
-       case 'H':
-            invert = 1;
-            /* FALLTHROUGH */
+       case 'd':
+       case 'D':
         case 'h':
-           arg = ANYOF_BLANK;
-            op = POSIXU;
-            goto join_posix_op_known;
-
-       case 'V':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'v':
-           arg = ANYOF_VERTWS;
-            op = POSIXU;
-            goto join_posix_op_known;
-
-       case 'S':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 's':
-            arg = ANYOF_SPACE;
-
-          join_posix:
-
-           op = POSIXD + get_regex_charset(RExC_flags);
-            if (op > POSIXA) {  /* /aa is same as /a */
-                op = POSIXA;
-            }
-            else if (op == POSIXL) {
-                RExC_contains_locale = 1;
-            }
-            else if (op == POSIXD) {
-                RExC_seen_d_op = TRUE;
-            }
-
-          join_posix_op_known:
-
-            if (invert) {
-                op += NPOSIXD - POSIXD;
-            }
-
-           ret = reg_node(pRExC_state, op);
-            FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
-
-           *flagp |= HASWIDTH|SIMPLE;
-            /* FALLTHROUGH */
-
-          finish_meta_pat:
-            if (   UCHARAT(RExC_parse + 1) == '{'
-                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
-            {
-                RExC_parse += 2;
-                vFAIL("Unescaped left brace in regex is illegal here");
-            }
-           nextchar(pRExC_state);
-            Set_Node_Length(REGNODE_p(ret), 2); /* MJD */
-           break;
+       case 'H':
         case 'p':
         case 'P':
+       case 's':
+       case 'S':
+       case 'v':
+       case 'V':
+       case 'w':
+       case 'W':
+            /* These all have the same meaning inside [brackets], and it knows
+             * how to do the best optimizations for them.  So, pretend we found
+             * these within brackets, and let it do the work */
              RExC_parse--;
  
              ret = regclass(pRExC_state, flagp, depth+1,
@@ -13660,10 +13568,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                        (UV) *flagp);
  
-            RExC_parse--;
+            RExC_parse--;   /* regclass() leaves this one too far ahead */
  
+          finish_meta_pat:
+                   /* The escapes above that don't take a parameter can't be
+                    * followed by a '{'.  But 'pX', 'p{foo}' and
+                    * correspondingly 'P' can be */
+            if (   RExC_parse - parse_start == 1
+                && UCHARAT(RExC_parse + 1) == '{'
+                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+            {
+                RExC_parse += 2;
+                vFAIL("Unescaped left brace in regex is illegal here");
+            }
              Set_Node_Offset(REGNODE_p(ret), parse_start);
-            Set_Node_Cur_Length(REGNODE_p(ret), parse_start - 2);
+            Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */
              nextchar(pRExC_state);
             break;
          case 'N':
@@ -13784,7 +13703,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          && num >= RExC_npar
                          /* cannot be an octal escape if it starts with 8 */
                          && *RExC_parse != '8'
-                        /* cannot be an octal escape it it starts with 9 */
+                        /* cannot be an octal escape if it starts with 9 */
                          && *RExC_parse != '9'
                      ) {
                          /* Probably not meant to be a backref, instead likely
@@ -13884,14 +13803,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             STRLEN len = 0;
             UV ender = 0;
             char *p;
-           char *s;
-
-/* This allows us to fill a node with just enough spare so that if the final
- * character folds, its expansion is guaranteed to fit */
-#define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
-
+           char *s, *old_s = NULL, *old_old_s = NULL;
             char *s0;
-           U8 upper_parse = MAX_NODE_STRING_SIZE;
+            U32 max_string_len = 255;
+
+            /* We may have to reparse the node, artificially stopping filling
+             * it early, based on info gleaned in the first parse.  This
+             * variable gives where we stop.  Make it above the normal stopping
+             * place first time through; otherwise it would stop too early */
+            U32 upper_fill = max_string_len + 1;
  
              /* We start out as an EXACT node, even if under /i, until we find a
               * character which is in a fold.  The algorithm now segregates into
@@ -13905,9 +13825,17 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              U8 node_type = EXACT;
  
              /* Assume the node will be fully used; the excess is given back at
-             * the end.  We can't make any other length assumptions, as a byte
-             * input sequence could shrink down. */
-            Ptrdiff_t initial_size = STR_SZ(256);
+             * the end.  Under /i, we may need to temporarily add the fold of
+             * an extra character or two at the end to check for splitting
+             * multi-char folds, so allocate extra space for that.   We can't
+             * make any other length assumptions, as a byte input sequence
+             * could shrink down. */
+            Ptrdiff_t current_string_nodes = STR_SZ(max_string_len
+                                                 + ((! FOLD)
+                                                    ? 0
+                                                    : 2 * ((UTF)
+                                                           ? UTF8_MAXBYTES_CASE
+                        /* Max non-UTF-8 expansion is 2 */ : 2)));
  
              bool next_is_quantifier;
              char * oldp = NULL;
@@ -13938,10 +13866,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              /* So is the MICRO SIGN */
              bool has_micro_sign = FALSE;
  
+            /* Set when we fill up the current node and there is still more
+             * text to process */
+            bool overflowed;
+
              /* Allocate an EXACT node.  The node_type may change below to
               * another EXACTish node, but since the size of the node doesn't
               * change, it works */
-            ret = regnode_guts(pRExC_state, node_type, initial_size, "exact");
+            ret = regnode_guts(pRExC_state, node_type, current_string_nodes,
+                                                                    "exact");
              FILL_NODE(ret, node_type);
              RExC_emit++;
  
@@ -13951,6 +13884,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
           reparse:
  
+            p = RExC_parse;
+            len = 0;
+            s = s0;
+            node_type = EXACT;
+            oldp = NULL;
+            maybe_exactfu = FOLD && (DEPENDS_SEMANTICS || LOC);
+            maybe_SIMPLE = SIMPLE;
+            requires_utf8_target = FALSE;
+            has_ss = FALSE;
+            has_micro_sign = FALSE;
+
+          continue_parse:
+
              /* This breaks under rare circumstances.  If folding, we do not
               * want to split a node at a character that is a non-final in a
               * multi-char fold, as an input string could just happen to want to
@@ -13965,18 +13911,22 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
+            overflowed = FALSE;
+
              /* Here, we have a literal character.  Find the maximal string of
               * them in the input that we can fit into a single EXACTish node.
               * We quit at the first non-literal or when the node gets full, or
               * under /i the categorization of folding/non-folding character
               * changes */
-           for (p = RExC_parse; len < upper_parse && p < RExC_end; ) {
+            while (p < RExC_end && len < upper_fill) {
  
                  /* In most cases each iteration adds one byte to the output.
                   * The exceptions override this */
                  Size_t added_len = 1;
  
                 oldp = p;
+                old_old_s = old_s;
+                old_s = s;
  
                  /* White space has already been ignored */
                  assert(   (RExC_flags & RXf_PMf_EXTENDED) == 0
@@ -14145,13 +14095,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              UPDATE_WARNINGS_LOC(p - 1);
                              ender = result;
  
-                            if (ender < 0x100) {
  #ifdef EBCDIC
+                            if (ender < 0x100) {
                                  if (RExC_recode_x_to_native) {
                                      ender = LATIN1_TO_NATIVE(ender);
                                  }
-#endif
                             }
+#endif
                             break;
                         }
                     case 'c':
@@ -14308,20 +14258,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  /* Ready to add 'ender' to the node */
  
                  if (! FOLD) {  /* The simple case, just append the literal */
+                  not_fold_common:
  
-                      not_fold_common:
-                        if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
-                            *(s++) = (char) ender;
-                        }
-                        else {
-                            U8 * new_s = uvchr_to_utf8((U8*)s, ender);
-                            added_len = (char *) new_s - s;
-                            s = (char *) new_s;
+                    /* Don't output if it would overflow */
+                    if (UNLIKELY(len > max_string_len - ((UTF)
+                                                      ? UVCHR_SKIP(ender)
+                                                      : 1)))
+                    {
+                        overflowed = TRUE;
+                        break;
+                    }
  
-                            if (ender > 255)  {
-                                requires_utf8_target = TRUE;
-                            }
+                    if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
+                        *(s++) = (char) ender;
+                    }
+                    else {
+                        U8 * new_s = uvchr_to_utf8((U8*)s, ender);
+                        added_len = (char *) new_s - s;
+                        s = (char *) new_s;
+
+                        if (ender > 255)  {
+                            requires_utf8_target = TRUE;
                          }
+                    }
                  }
                  else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
  
@@ -14385,22 +14344,35 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          goto loopdone;
                      }
  
-                    if (UTF) {  /* Use the folded value */
+                    if (UTF) {  /* Alway use the folded value for UTF-8
+                                   patterns */
                          if (UVCHR_IS_INVARIANT(ender)) {
+                            if (UNLIKELY(len + 1 > max_string_len)) {
+                                overflowed = TRUE;
+                                break;
+                            }
+
                              *(s)++ = (U8) toFOLD(ender);
                          }
                          else {
-                            ender = _to_uni_fold_flags(
+                            UV folded = _to_uni_fold_flags(
                                      ender,
-                                    (U8 *) s,
+                                    (U8 *) s,  /* We have allocated extra space
+                                                  in 's' so can't run off the
+                                                  end */
                                      &added_len,
                                      FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
                                                      ? FOLD_FLAGS_NOMIX_ASCII
                                                      : 0));
+                            if (UNLIKELY(len + added_len > max_string_len)) {
+                                overflowed = TRUE;
+                                break;
+                            }
+
                              s += added_len;
  
-                            if (   ender > 255
-                                && LIKELY(ender != GREEK_SMALL_LETTER_MU))
+                            if (   folded > 255
+                                && LIKELY(folded != GREEK_SMALL_LETTER_MU))
                              {
                                  /* U+B5 folds to the MU, so its possible for a
                                   * non-UTF-8 target to match it */
@@ -14408,63 +14380,77 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              }
                          }
                      }
-                    else {
-
-                        /* Here is non-UTF8.  First, see if the character's
-                         * fold differs between /d and /u. */
-                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
-                            maybe_exactfu = FALSE;
+                    else { /* Here is non-UTF8. */
+
+                        /* The fold will be one or (rarely) two characters.
+                         * Check that there's room for at least a single one
+                         * before setting any flags, etc.  Because otherwise an
+                         * overflowing character could cause a flag to be set
+                         * even though it doesn't end up in this node.  (For
+                         * the two character fold, we check again, before
+                         * setting any flags) */
+                        if (UNLIKELY(len + 1 > max_string_len)) {
+                            overflowed = TRUE;
+                            break;
                          }
  
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
  
-                        /* On non-ancient Unicode versions, this includes the
-                         * multi-char fold SHARP S to 'ss' */
-
-                        if (   UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
-                                 || (   isALPHA_FOLD_EQ(ender, 's')
-                                     && len > 0
-                                     && isALPHA_FOLD_EQ(*(s-1), 's')))
-                        {
-                            /* Here, we have one of the following:
-                             *  a)  a SHARP S.  This folds to 'ss' only under
-                             *      /u rules.  If we are in that situation,
-                             *      fold the SHARP S to 'ss'.  See the comments
-                             *      for join_exact() as to why we fold this
-                             *      non-UTF at compile time, and no others.
-                             *  b)  'ss'.  When under /u, there's nothing
-                             *      special needed to be done here.  The
-                             *      previous iteration handled the first 's',
-                             *      and this iteration will handle the second.
-                             *      If, on the otherhand it's not /u, we have
-                             *      to exclude the possibility of moving to /u,
-                             *      so that we won't generate an unwanted
-                             *      match, unless, at runtime, the target
-                             *      string is in UTF-8.
-                             * */
+                        /* On non-ancient Unicodes, check for the only possible
+                         * multi-char fold  */
+                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
  
+                            /* This potential multi-char fold means the node
+                             * can't be simple (because it could match more
+                             * than a single char).  And in some cases it will
+                             * match 'ss', so set that flag */
+                            maybe_SIMPLE = 0;
                              has_ss = TRUE;
-                            maybe_exactfu = FALSE;  /* Can't generate an
-                                                       EXACTFU node (unless we
-                                                       already are in one) */
-                            if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
-                                maybe_SIMPLE = 0;
-                                if (node_type == EXACTFU) {
-                                    *(s++) = 's';
-
-                                    /* Let the code below add in the extra 's' */
-                                    ender = 's';
-                                    added_len = 2;
+
+                            /* It can't change to be an EXACTFU (unless already
+                             * is one).  We fold it iff under /u rules. */
+                            if (node_type != EXACTFU) {
+                                maybe_exactfu = FALSE;
+                            }
+                            else {
+                                if (UNLIKELY(len + 2 > max_string_len)) {
+                                    overflowed = TRUE;
+                                    break;
                                  }
+
+                                *(s++) = 's';
+                                *(s++) = 's';
+                                added_len = 2;
+
+                                goto done_with_this_char;
                              }
                          }
+                        else if (   UNLIKELY(isALPHA_FOLD_EQ(ender, 's'))
+                                 && LIKELY(len > 0)
+                                 && UNLIKELY(isALPHA_FOLD_EQ(*(s-1), 's')))
+                        {
+                            /* Also, the sequence 'ss' is special when not
+                             * under /u.  If the target string is UTF-8, it
+                             * should match SHARP S; otherwise it won't.  So,
+                             * here we have to exclude the possibility of this
+                             * node moving to /u.*/
+                            has_ss = TRUE;
+                            maybe_exactfu = FALSE;
+                        }
  #endif
+                        /* Here, the fold will be a single character */
  
-                        else if (UNLIKELY(ender == MICRO_SIGN)) {
+                        if (UNLIKELY(ender == MICRO_SIGN)) {
                              has_micro_sign = TRUE;
                          }
+                        else if (PL_fold[ender] != PL_fold_latin1[ender]) {
+
+                            /* If the character's fold differs between /d and
+                             * /u, this can't change to be an EXACTFU node */
+                            maybe_exactfu = FALSE;
+                        }
  
                          *(s++) = (DEPENDS_SEMANTICS)
                                   ? (char) toFOLD(ender)
@@ -14479,6 +14465,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      }
                 } /* End of adding current character to the node */
  
+              done_with_this_char:
+
                  len += added_len;
  
                 if (next_is_quantifier) {
@@ -14490,168 +14478,505 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
             } /* End of loop through literal characters */
  
-            /* Here we have either exhausted the input or ran out of room in
-             * the node.  (If we encountered a character that can't be in the
-             * node, transfer is made directly to <loopdone>, and so we
-             * wouldn't have fallen off the end of the loop.)  In the latter
-             * case, we artificially have to split the node into two, because
-             * we just don't have enough space to hold everything.  This
-             * creates a problem if the final character participates in a
-             * multi-character fold in the non-final position, as a match that
-             * should have occurred won't, due to the way nodes are matched,
-             * and our artificial boundary.  So back off until we find a non-
-             * problematic character -- one that isn't at the beginning or
-             * middle of such a fold.  (Either it doesn't participate in any
-             * folds, or appears only in the final position of all the folds it
-             * does participate in.)  A better solution with far fewer false
-             * positives, and that would fill the nodes more completely, would
-             * be to actually have available all the multi-character folds to
-             * test against, and to back-off only far enough to be sure that
-             * this node isn't ending with a partial one.  <upper_parse> is set
-             * further below (if we need to reparse the node) to include just
-             * up through that final non-problematic character that this code
-             * identifies, so when it is set to less than the full node, we can
-             * skip the rest of this */
-            if (FOLD && p < RExC_end && upper_parse == MAX_NODE_STRING_SIZE) {
-                PERL_UINT_FAST8_T backup_count = 0;
-
-                const STRLEN full_len = len;
-
-               assert(len >= MAX_NODE_STRING_SIZE);
-
-                /* Here, <s> points to just beyond where we have output the
-                 * final character of the node.  Look backwards through the
-                 * string until find a non- problematic character */
-
-               if (! UTF) {
-
-                    /* This has no multi-char folds to non-UTF characters */
-                    if (ASCII_FOLD_RESTRICTED) {
-                        goto loopdone;
+            /* Here we have either exhausted the input or run out of room in
+             * the node.  If the former, we are done.  (If we encountered a
+             * character that can't be in the node, transfer is made directly
+             * to <loopdone>, and so we wouldn't have fallen off the end of the
+             * loop.)  */
+            if (LIKELY(! overflowed)) {
+                goto loopdone;
+            }
+
+            /* Here we have run out of room.  We can grow plain EXACT and
+             * LEXACT nodes.  If the pattern is gigantic enough, though,
+             * eventually we'll have to artificially chunk the pattern into
+             * multiple nodes. */
+            if (! LOC && (node_type == EXACT || node_type == LEXACT)) {
+                Size_t overhead = 1 + regarglen[OP(REGNODE_p(ret))];
+                Size_t overhead_expansion = 0;
+                char temp[256];
+                Size_t max_nodes_for_string;
+                Size_t achievable;
+                SSize_t delta;
+
+                /* Here we couldn't fit the final character in the current
+                 * node, so it will have to be reparsed, no matter what else we
+                 * do */
+                p = oldp;
+
+                /* If would have overflowed a regular EXACT node, switch
+                 * instead to an LEXACT.  The code below is structured so that
+                 * the actual growing code is common to changing from an EXACT
+                 * or just increasing the LEXACT size.  This means that we have
+                 * to save the string in the EXACT case before growing, and
+                 * then copy it afterwards to its new location */
+                if (node_type == EXACT) {
+                    overhead_expansion = regarglen[LEXACT] - regarglen[EXACT];
+                    RExC_emit += overhead_expansion;
+                    Copy(s0, temp, len, char);
+                }
+
+                /* Ready to grow.  If it was a plain EXACT, the string was
+                 * saved, and the first few bytes of it overwritten by adding
+                 * an argument field.  We assume, as we do elsewhere in this
+                 * file, that one byte of remaining input will translate into
+                 * one byte of output, and if that's too small, we grow again,
+                 * if too large the excess memory is freed at the end */
+
+                max_nodes_for_string = U16_MAX - overhead - overhead_expansion;
+                achievable = MIN(max_nodes_for_string,
+                                 current_string_nodes + STR_SZ(RExC_end - p));
+                delta = achievable - current_string_nodes;
+
+                /* If there is just no more room, go finish up this chunk of
+                 * the pattern. */
+                if (delta <= 0) {
+                    goto loopdone;
+                }
+
+                change_engine_size(pRExC_state, delta + overhead_expansion);
+                current_string_nodes += delta;
+                max_string_len
+                           = sizeof(struct regnode) * current_string_nodes;
+                upper_fill = max_string_len + 1;
+
+                /* If the length was small, we know this was originally an
+                 * EXACT node now converted to LEXACT, and the string has to be
+                 * restored.  Otherwise the string was untouched.  260 is just
+                 * a number safely above 255 so don't have to worry about
+                 * getting it precise */
+                if (len < 260) {
+                    node_type = LEXACT;
+                    FILL_NODE(ret, node_type);
+                    s0 = STRING(REGNODE_p(ret));
+                    Copy(temp, s0, len, char);
+                    s = s0 + len;
+                }
+
+                goto continue_parse;
+            }
+            else if (FOLD) {
+                bool splittable = FALSE;
+                bool backed_up = FALSE;
+                char * e;
+                char * s_start;
+
+                /* Here is /i.  Running out of room creates a problem if we are
+                 * folding, and the split happens in the middle of a
+                 * multi-character fold, as a match that should have occurred,
+                 * won't, due to the way nodes are matched, and our artificial
+                 * boundary.  So back off until we aren't splitting such a
+                 * fold.  If there is no such place to back off to, we end up
+                 * taking the entire node as-is.  This can happen if the node
+                 * consists entirely of 'f' or entirely of 's' characters (or
+                 * things that fold to them) as 'ff' and 'ss' are
+                 * multi-character folds.
+                 *
+                 * The Unicode standard says that multi character folds consist
+                 * of either two or three characters.  That means we would be
+                 * splitting one if the final character in the node is at the
+                 * beginning of either type, or is the second of a three
+                 * character fold.
+                 *
+                 * At this point:
+                 *  ender     is the code point of the character that won't fit
+                 *            in the node
+                 *  s         points to just beyond the final byte in the node.
+                 *            It's where we would place ender if there were
+                 *            room, and where in fact we do place ender's fold
+                 *            in the code below, as we've over-allocated space
+                 *            for s0 (hence s) to allow for this
+                 *  e         starts at 's' and advances as we append things.
+                 *  old_s     is the same as 's'.  (If ender had fit, 's' would
+                 *            have been advanced to beyond it).
+                 *  old_old_s points to the beginning byte of the final
+                 *            character in the node
+                 *  p         points to the beginning byte in the input of the
+                 *            character beyond 'ender'.
+                 *  oldp      points to the beginning byte in the input of
+                 *            'ender'.
+                 *
+                 * In the case of /il, we haven't folded anything that could be
+                 * affected by the locale.  That means only above-Latin1
+                 * characters that fold to other above-latin1 characters get
+                 * folded at compile time.  To check where a good place to
+                 * split nodes is, everything in it will have to be folded.
+                 * The boolean 'maybe_exactfu' keeps track in /il if there are
+                 * any unfolded characters in the node. */
+                bool need_to_fold_loc = LOC && ! maybe_exactfu;
+
+                /* If we do need to fold the node, we need a place to store the
+                 * folded copy, and a way to map back to the unfolded original
+                 * */
+                char * locfold_buf = NULL;
+                Size_t * loc_correspondence = NULL;
+
+                if (! need_to_fold_loc) {   /* The normal case.  Just
+                                               initialize to the actual node */
+                    e = s;
+                    s_start = s0;
+                    s = old_old_s;  /* Point to the beginning of the final char
+                                       that fits in the node */
+                }
+                else {
+
+                    /* Here, we have filled a /il node, and there are unfolded
+                     * characters in it.  If the runtime locale turns out to be
+                     * UTF-8, there are possible multi-character folds, just
+                     * like when not under /l.  The node hence can't terminate
+                     * in the middle of such a fold.  To determine this, we
+                     * have to create a folded copy of this node.  That means
+                     * reparsing the node, folding everything assuming a UTF-8
+                     * locale.  (If at runtime it isn't such a locale, the
+                     * actions here wouldn't have been necessary, but we have
+                     * to assume the worst case.)  If we find we need to back
+                     * off the folded string, we do so, and then map that
+                     * position back to the original unfolded node, which then
+                     * gets output, truncated at that spot */
+
+                    char * redo_p = RExC_parse;
+                    char * redo_e;
+                    char * old_redo_e;
+
+                    /* Allow enough space assuming a single byte input folds to
+                     * a single byte output, plus assume that the two unparsed
+                     * characters (that we may need) fold to the largest number
+                     * of bytes possible, plus extra for one more worst case
+                     * scenario.  In the loop below, if we start eating into
+                     * that final spare space, we enlarge this initial space */
+                    Size_t size = max_string_len + (3 * UTF8_MAXBYTES_CASE) + 1;
+
+                    Newxz(locfold_buf, size, char);
+                    Newxz(loc_correspondence, size, Size_t);
+
+                    /* Redo this node's parse, folding into 'locfold_buf' */
+                    redo_p = RExC_parse;
+                    old_redo_e = redo_e = locfold_buf;
+                    while (redo_p <= oldp) {
+
+                        old_redo_e = redo_e;
+                        loc_correspondence[redo_e - locfold_buf]
+                                                        = redo_p - RExC_parse;
+
+                        if (UTF) {
+                            Size_t added_len;
+
+                            (void) _to_utf8_fold_flags((U8 *) redo_p,
+                                                       (U8 *) RExC_end,
+                                                       (U8 *) redo_e,
+                                                       &added_len,
+                                                       FOLD_FLAGS_FULL);
+                            redo_e += added_len;
+                            redo_p += UTF8SKIP(redo_p);
+                        }
+                        else {
+
+                            /* Note that if this code is run on some ancient
+                             * Unicode versions, SHARP S doesn't fold to 'ss',
+                             * but rather than clutter the code with #ifdef's,
+                             * as is done above, we ignore that possibility.
+                             * This is ok because this code doesn't affect what
+                             * gets matched, but merely where the node gets
+                             * split */
+                            if (UCHARAT(redo_p) != LATIN_SMALL_LETTER_SHARP_S) {
+                                *redo_e++ = toLOWER_L1(UCHARAT(redo_p));
+                            }
+                            else {
+                                *redo_e++ = 's';
+                                *redo_e++ = 's';
+                            }
+                            redo_p++;
+                        }
+
+
+                        /* If we're getting so close to the end that a
+                         * worst-case fold in the next character would cause us
+                         * to overflow, increase, assuming one byte output byte
+                         * per one byte input one, plus room for another worst
+                         * case fold */
+                        if (   redo_p <= oldp
+                            && redo_e > locfold_buf + size
+                                                    - (UTF8_MAXBYTES_CASE + 1))
+                        {
+                            Size_t new_size = size
+                                            + (oldp - redo_p)
+                                            + UTF8_MAXBYTES_CASE + 1;
+                            Ptrdiff_t e_offset = redo_e - locfold_buf;
+
+                            Renew(locfold_buf, new_size, char);
+                            Renew(loc_correspondence, new_size, Size_t);
+                            size = new_size;
+
+                            redo_e = locfold_buf + e_offset;
+                        }
                      }
  
-                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) {
-                        backup_count++;
+                    /* Set so that things are in terms of the folded, temporary
+                     * string */
+                    s = old_redo_e;
+                    s_start = locfold_buf;
+                    e = redo_e;
+
+                }
+
+                /* Here, we have 's', 's_start' and 'e' set up to point to the
+                 * input that goes into the node, folded.
+                 *
+                 * If the final character of the node and the fold of ender
+                 * form the first two characters of a three character fold, we
+                 * need to peek ahead at the next (unparsed) character in the
+                 * input to determine if the three actually do form such a
+                 * fold.  Just looking at that character is not generally
+                 * sufficient, as it could be, for example, an escape sequence
+                 * that evaluates to something else, and it needs to be folded.
+                 *
+                 * khw originally thought to just go through the parse loop one
+                 * extra time, but that doesn't work easily as that iteration
+                 * could cause things to think that the parse is over and to
+                 * goto loopdone.  The character could be a '$' for example, or
+                 * the character beyond could be a quantifier, and other
+                 * glitches as well.
+                 *
+                 * The solution used here for peeking ahead is to look at that
+                 * next character.  If it isn't ASCII punctuation, then it will
+                 * be something that continues in an EXACTish node if there
+                 * were space.  We append the fold of it to s, having reserved
+                 * enough room in s0 for the purpose.  If we can't reasonably
+                 * peek ahead, we instead assume the worst case: that it is
+                 * something that would form the completion of a multi-char
+                 * fold.
+                 *
+                 * If we can't split between s and ender, we work backwards
+                 * character-by-character down to s0.  At each current point
+                 * see if we are at the beginning of a multi-char fold.  If so,
+                 * that means we would be splitting the fold across nodes, and
+                 * so we back up one and try again.
+                 *
+                 * If we're not at the beginning, we still could be at the
+                 * final two characters of a (rare) three character fold.  We
+                 * check if the sequence starting at the character before the
+                 * current position (and including the current and next
+                 * characters) is a three character fold.  If not, the node can
+                 * be split here.  If it is, we have to backup two characters
+                 * and try again.
+                 *
+                 * Otherwise, the node can be split at the current position.
+                 *
+                 * The same logic is used for UTF-8 patterns and not */
+                if (UTF) {
+                    Size_t added_len;
+
+                    /* Append the fold of ender */
+                    (void) _to_uni_fold_flags(
+                        ender,
+                        (U8 *) e,
+                        &added_len,
+                        FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
+                                        ? FOLD_FLAGS_NOMIX_ASCII
+                                        : 0));
+                    e += added_len;
+
+                    /* 's' and the character folded to by ender may be the
+                     * first two of a three-character fold, in which case the
+                     * node should not be split here.  That may mean examining
+                     * the so-far unparsed character starting at 'p'.  But if
+                     * ender folded to more than one character, we already have
+                     * three characters to look at.  Also, we first check if
+                     * the sequence consisting of s and the next character form
+                     * the first two of some three character fold.  If not,
+                     * there's no need to peek ahead. */
+                    if (   added_len <= UTF8SKIP(e - added_len)
+                        && UNLIKELY(is_THREE_CHAR_FOLD_HEAD_utf8_safe(s, e)))
+                    {
+                        /* Here, the two do form the beginning of a potential
+                         * three character fold.  The unexamined character may
+                         * or may not complete it.  Peek at it.  It might be
+                         * something that ends the node or an escape sequence,
+                         * in which case we don't know without a lot of work
+                         * what it evaluates to, so we have to assume the worst
+                         * case: that it does complete the fold, and so we
+                         * can't split here.  All such instances  will have
+                         * that character be an ASCII punctuation character,
+                         * like a backslash.  So, for that case, backup one and
+                         * drop down to try at that position */
+                        if (isPUNCT(*p)) {
+                            s = (char *) utf8_hop_back((U8 *) s, -1,
+                                       (U8 *) s_start);
+                            backed_up = TRUE;
+                        }
+                        else {
+                            /* Here, since it's not punctuation, it must be a
+                             * real character, and we can append its fold to
+                             * 'e' (having deliberately reserved enough space
+                             * for this eventuality) and drop down to check if
+                             * the three actually do form a folded sequence */
+                            (void) _to_utf8_fold_flags(
+                                (U8 *) p, (U8 *) RExC_end,
+                                (U8 *) e,
+                                &added_len,
+                                FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
+                                                ? FOLD_FLAGS_NOMIX_ASCII
+                                                : 0));
+                            e += added_len;
+                        }
                      }
-                    len = s - s0 + 1;
-               }
-                else {
  
-                    /* Point to the first byte of the final character */
-                    s = (char *) utf8_hop_back((U8 *) s, -1, (U8 *) s0);
+                    /* Here, we either have three characters available in
+                     * sequence starting at 's', or we have two characters and
+                     * know that the following one can't possibly be part of a
+                     * three character fold.  We go through the node backwards
+                     * until we find a place where we can split it without
+                     * breaking apart a multi-character fold.  At any given
+                     * point we have to worry about if such a fold begins at
+                     * the current 's', and also if a three-character fold
+                     * begins at s-1, (containing s and s+1).  Splitting in
+                     * either case would break apart a fold */
+                    do {
+                        char *prev_s = (char *) utf8_hop_back((U8 *) s, -1,
+                                                            (U8 *) s_start);
+
+                        /* If is a multi-char fold, can't split here.  Backup
+                         * one char and try again */
+                        if (UNLIKELY(is_MULTI_CHAR_FOLD_utf8_safe(s, e))) {
+                            s = prev_s;
+                            backed_up = TRUE;
+                            continue;
+                        }
  
-                    while (s >= s0) {   /* Search backwards until find
-                                           a non-problematic char */
-                        if (UTF8_IS_INVARIANT(*s)) {
+                        /* If the two characters beginning at 's' are part of a
+                         * three character fold starting at the character
+                         * before s, we can't split either before or after s.
+                         * Backup two chars and try again */
+                        if (   LIKELY(s > s_start)
+                            && UNLIKELY(is_THREE_CHAR_FOLD_utf8_safe(prev_s, e)))
+                        {
+                            s = prev_s;
+                            s = (char *) utf8_hop_back((U8 *) s, -1, (U8 *) s_start);
+                            backed_up = TRUE;
+                            continue;
+                        }
  
-                            /* There are no ascii characters that participate
-                             * in multi-char folds under /aa.  In EBCDIC, the
-                             * non-ascii invariants are all control characters,
-                             * so don't ever participate in any folds. */
-                            if (ASCII_FOLD_RESTRICTED
-                                || ! IS_NON_FINAL_FOLD(*s))
-                            {
-                                break;
-                            }
+                        /* Here there's no multi-char fold between s and the
+                         * next character following it.  We can split */
+                        splittable = TRUE;
+                        break;
+
+                    } while (s > s_start); /* End of loops backing up through the node */
+
+                    /* Here we either couldn't find a place to split the node,
+                     * or else we broke out of the loop setting 'splittable' to
+                     * true.  In the latter case, the place to split is between
+                     * the first and second characters in the sequence starting
+                     * at 's' */
+                    if (splittable) {
+                        s += UTF8SKIP(s);
+                    }
+                }
+                else {  /* Pattern not UTF-8 */
+                    if (   ender != LATIN_SMALL_LETTER_SHARP_S
+                        || ASCII_FOLD_RESTRICTED)
+                    {
+                        *e++ = toLOWER_L1(ender);
+                    }
+                    else {
+                        *e++ = 's';
+                        *e++ = 's';
+                    }
+
+                    if (   e - s  <= 1
+                        && UNLIKELY(is_THREE_CHAR_FOLD_HEAD_latin1_safe(s, e)))
+                    {
+                        if (isPUNCT(*p)) {
+                            s--;
+                            backed_up = TRUE;
                          }
-                        else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                            if (! IS_NON_FINAL_FOLD(EIGHT_BIT_UTF8_TO_NATIVE(
-                                                                  *s, *(s+1))))
+                        else {
+                            if (   UCHARAT(p) != LATIN_SMALL_LETTER_SHARP_S
+                                || ASCII_FOLD_RESTRICTED)
                              {
-                                break;
+                                *e++ = toLOWER_L1(ender);
+                            }
+                            else {
+                                *e++ = 's';
+                                *e++ = 's';
                              }
                          }
-                        else if (! _invlist_contains_cp(
-                                        PL_NonFinalFold,
-                                        valid_utf8_to_uvchr((U8 *) s, NULL)))
-                        {
-                            break;
+                    }
+
+                    do {
+                        if (UNLIKELY(is_MULTI_CHAR_FOLD_latin1_safe(s, e))) {
+                            s--;
+                            backed_up = TRUE;
+                            continue;
                          }
  
-                        /* Here, the current character is problematic in that
-                         * it does occur in the non-final position of some
-                         * fold, so try the character before it, but have to
-                         * special case the very first byte in the string, so
-                         * we don't read outside the string */
-                        s = (s == s0) ? s -1 : (char *) utf8_hop((U8 *) s, -1);
-                        backup_count++;
-                    } /* End of loop backwards through the string */
-
-                    /* If there were only problematic characters in the string,
-                     * <s> will point to before s0, in which case the length
-                     * should be 0, otherwise include the length of the
-                     * non-problematic character just found */
-                    len = (s < s0) ? 0 : s - s0 + UTF8SKIP(s);
-               }
+                        if (   LIKELY(s > s_start)
+                            && UNLIKELY(is_THREE_CHAR_FOLD_latin1_safe(s - 1, e)))
+                        {
+                            s -= 2;
+                            backed_up = TRUE;
+                            continue;
+                        }
  
-                /* Here, have found the final character, if any, that is
-                 * non-problematic as far as ending the node without splitting
-                 * it across a potential multi-char fold.  <len> contains the
-                 * number of bytes in the node up-to and including that
-                 * character, or is 0 if there is no such character, meaning
-                 * the whole node contains only problematic characters.  In
-                 * this case, give up and just take the node as-is.  We can't
-                 * do any better */
-                if (len == 0) {
-                    len = full_len;
+                        splittable = TRUE;
+                        break;
  
-                } else {
+                    } while (s > s_start);
  
-                    /* Here, the node does contain some characters that aren't
-                     * problematic.  If we didn't have to backup any, then the
-                     * final character in the node is non-problematic, and we
-                     * can take the node as-is */
-                    if (backup_count == 0) {
-                        goto loopdone;
+                    if (splittable) {
+                        s++;
                      }
-                    else if (backup_count == 1) {
+                }
  
-                        /* If the final character is problematic, but the
-                         * penultimate is not, back-off that last character to
-                         * later start a new node with it */
-                        p = oldp;
-                        goto loopdone;
+                /* Here, we are done backing up.  If we didn't backup at all
+                 * (the likely case), just proceed */
+                if (backed_up) {
+
+                   /* If we did find a place to split, reparse the entire node
+                    * stopping where we have calculated. */
+                    if (splittable) {
+
+                       /* If we created a temporary folded string under /l, we
+                        * have to map that back to the original */
+                        if (need_to_fold_loc) {
+                            upper_fill = loc_correspondence[s - s_start];
+                            Safefree(locfold_buf);
+                            Safefree(loc_correspondence);
+
+                            if (upper_fill == 0) {
+                                FAIL2("panic: loc_correspondence[%d] is 0",
+                                      (int) (s - s_start));
+                            }
+                        }
+                        else {
+                            upper_fill = s - s0;
+                        }
+                        goto reparse;
+                    }
+                    else if (need_to_fold_loc) {
+                        Safefree(locfold_buf);
+                        Safefree(loc_correspondence);
                      }
  
-                    /* Here, the final non-problematic character is earlier
-                     * in the input than the penultimate character.  What we do
-                     * is reparse from the beginning, going up only as far as
-                     * this final ok one, thus guaranteeing that the node ends
-                     * in an acceptable character.  The reason we reparse is
-                     * that we know how far in the character is, but we don't
-                     * know how to correlate its position with the input parse.
-                     * An alternate implementation would be to build that
-                     * correlation as we go along during the original parse,
-                     * but that would entail extra work for every node, whereas
-                     * this code gets executed only when the string is too
-                     * large for the node, and the final two characters are
-                     * problematic, an infrequent occurrence.  Yet another
-                     * possible strategy would be to save the tail of the
-                     * string, and the next time regatom is called, initialize
-                     * with that.  The problem with this is that unless you
-                     * back off one more character, you won't be guaranteed
-                     * regatom will get called again, unless regbranch,
-                     * regpiece ... are also changed.  If you do back off that
-                     * extra character, so that there is input guaranteed to
-                     * force calling regatom, you can't handle the case where
-                     * just the first character in the node is acceptable.  I
-                     * (khw) decided to try this method which doesn't have that
-                     * pitfall; if performance issues are found, we can do a
-                     * combination of the current approach plus that one */
-                    upper_parse = len;
-                    len = 0;
-                    s = s0;
-                    goto reparse;
+                    /* Here the node consists entirely of non-final multi-char
+                     * folds.  (Likely it is all 'f's or all 's's.)  There's no
+                     * decent place to split it, so give up and just take the
+                     * whole thing */
+                    len = old_s - s0;
                  }
             }   /* End of verifying node ends with an appropriate char */
  
+            /* We need to start the next node at the character that didn't fit
+             * in this one */
+            p = oldp;
+
            loopdone:   /* Jumped to when encounters something that shouldn't be
                           in the node */
  
              /* Free up any over-allocated space; cast is to silence bogus
               * warning in MS VC */
              change_engine_size(pRExC_state,
-                                - (Ptrdiff_t) (initial_size - STR_SZ(len)));
+                        - (Ptrdiff_t) (current_string_nodes - STR_SZ(len)));
  
              /* I (khw) don't know if you can get here with zero length, but the
               * old code handled this situation by creating a zero-length EXACT
@@ -14662,15 +14987,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else {
  
                  /* If the node type is EXACT here, check to see if it
-                 * should be EXACTL, or EXACT_ONLY8. */
+                 * should be EXACTL, or EXACT_REQ8. */
                  if (node_type == EXACT) {
                      if (LOC) {
                          node_type = EXACTL;
                      }
                      else if (requires_utf8_target) {
-                        node_type = EXACT_ONLY8;
+                        node_type = EXACT_REQ8;
                      }
-                } else if (FOLD) {
+                }
+                else if (node_type == LEXACT) {
+                    if (requires_utf8_target) {
+                        node_type = LEXACT_REQ8;
+                    }
+                }
+                else if (FOLD) {
                      if (    UNLIKELY(has_micro_sign || has_ss)
                          && (node_type == EXACTFU || (   node_type == EXACTF
                                                       && maybe_exactfu)))
@@ -14687,9 +15018,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          if (maybe_exactfu) {
                              node_type = EXACTFLU8;
                          }
+                        else if (UNLIKELY(
+                             _invlist_contains_cp(PL_HasMultiCharFold, ender)))
+                        {
+                            /* A character that folds to more than one will
+                             * match multiple characters, so can't be SIMPLE.
+                             * We don't have to worry about this with EXACTFLU8
+                             * nodes just above, as they have already been
+                             * folded (since the fold doesn't vary at run
+                             * time).  Here, if the final character in the node
+                             * folds to multiple, it can't be simple.  (This
+                             * only has an effect if the node has only a single
+                             * character, hence the final one, as elsewhere we
+                             * turn off simple for nodes whose length > 1 */
+                            maybe_SIMPLE = 0;
+                        }
                      }
                      else if (node_type == EXACTF) {  /* Means is /di */
  
+                        /* This intermediate variable is needed solely because
+                         * the asserts in the macro where used exceed Win32's
+                         * literal string capacity */
+                        char first_char = * STRING(REGNODE_p(ret));
+
                          /* If 'maybe_exactfu' is clear, then we need to stay
                           * /di.  If it is set, it means there are no code
                           * points that match differently depending on UTF8ness
@@ -14698,7 +15049,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          if (! maybe_exactfu) {
                              RExC_seen_d_op = TRUE;
                          }
-                        else if (   isALPHA_FOLD_EQ(* STRING(REGNODE_p(ret)), 's')
+                        else if (   isALPHA_FOLD_EQ(first_char, 's')
                                   || isALPHA_FOLD_EQ(ender, 's'))
                          {
                              /* But, if the node begins or ends in an 's' we
@@ -14718,16 +15069,16 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      }
  
                      if (requires_utf8_target && node_type == EXACTFU) {
-                        node_type = EXACTFU_ONLY8;
+                        node_type = EXACTFU_REQ8;
                      }
                  }
  
                  OP(REGNODE_p(ret)) = node_type;
-                STR_LEN(REGNODE_p(ret)) = len;
+                setSTR_LEN(REGNODE_p(ret), len);
                  RExC_emit += STR_SZ(len);
  
                  /* If the node isn't a single character, it can't be SIMPLE */
-                if (len > (Size_t) ((UTF) ? UVCHR_SKIP(ender) : 1)) {
+                if (len > (Size_t) ((UTF) ? UTF8SKIP(STRING(REGNODE_p(ret))) : 1)) {
                      maybe_SIMPLE = 0;
                  }
  
@@ -14779,7 +15130,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
      assert(PL_regkind[OP(node)] == ANYOF);
  
      /* There is no bitmap for this node type */
-    if (OP(node) == ANYOFH) {
+    if (inRANGE(OP(node), ANYOFH, ANYOFRb)) {
          return;
      }
  
@@ -15897,8 +16248,11 @@ redo_curchar:
  
                      /* Recurse, with the meat of the embedded expression */
                      RExC_parse++;
-                    (void) handle_regex_sets(pRExC_state, &current, flagp,
-                                                    depth+1, oregcomp_parse);
+                    if (! handle_regex_sets(pRExC_state, &current, flagp,
+                                                    depth+1, oregcomp_parse))
+                    {
+                        RETURN_FAIL_ON_RESTART(*flagp, flagp);
+                    }
  
                      /* Here, 'current' contains the embedded expression's
                       * inversion list, and RExC_parse points to the trailing
@@ -15952,6 +16306,7 @@ redo_curchar:
                                FALSE, /* Require return to be an ANYOF */
                                &current))
                  {
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
                      goto regclass_failed;
                  }
  
@@ -15988,6 +16343,7 @@ redo_curchar:
                                  FALSE, /* Require return to be an ANYOF */
                                  &current))
                  {
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
                      goto regclass_failed;
                  }
  
@@ -16334,7 +16690,11 @@ redo_curchar:
                               well have generated non-portable code points, but
                               they're valid on this machine */
                      FALSE, /* similarly, no need for strict */
-                    FALSE, /* Require return to be an ANYOF */
+
+                    /* We can optimize into something besides an ANYOF, except
+                     * under /l, which needs to be ANYOF because of runtime
+                     * checks for locale sanity, etc */
+                  ! in_locale,
                      NULL
                  );
  
@@ -16348,8 +16708,10 @@ redo_curchar:
          RExC_flags |= RXf_PMf_FOLD;
      }
  
-    if (!node)
+    if (!node) {
+        RETURN_FAIL_ON_RESTART(*flagp, flagp);
          goto regclass_failed;
+    }
  
      /* Fix up the node type if we are in locale.  (We have pretended we are
       * under /u for the purposes of regclass(), as this construct will only
@@ -16561,6 +16923,22 @@ S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
      UPDATE_WARNINGS_LOC(RExC_parse);
  }
  
+PERL_STATIC_INLINE Size_t
+S_find_first_differing_byte_pos(const U8 * s1, const U8 * s2, const Size_t max)
+{
+    const U8 * const start = s1;
+    const U8 * const send = start + max;
+
+    PERL_ARGS_ASSERT_FIND_FIRST_DIFFERING_BYTE_POS;
+
+    while (s1 < send && *s1  == *s2) {
+        s1++; s2++;
+    }
+
+    return s1 - start;
+}
+
+
  STATIC AV *
  S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
  {
@@ -17004,7 +17382,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  {
                                      if (strict) {
                                          RExC_parse--;
-                                        vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
+                                        vFAIL("\\N{} here is restricted to one character");
                                      }
                                      ckWARNreg(RExC_parse, "Using just the first character returned by \\N{} in character class");
                                      break; /* <value> contains the first code
@@ -17722,7 +18100,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             literal
                          );
                  }
-                else if isMNEMONIC_CNTRL(value) {
+                else if (isMNEMONIC_CNTRL(value)) {
                      vWARN4(RExC_parse,
                             "\"%.*s\" is more clearly written simply as \"%s\"",
                             (int) (RExC_parse - rangebegin),
@@ -18086,6 +18464,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  /* Likewise for 'posixes' */
                  _invlist_union(posixes, cp_list, &cp_list);
+                SvREFCNT_dec(posixes);
  
                  /* Likewise for anything else in the range that matched only
                   * under UTF-8 */
@@ -18233,14 +18612,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   |= ANYOFL_FOLD
                   |  ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
          }
-        else if (cp_list) { /* Look to see if a 0-255 code point is in list */
-            UV start, end;
-            invlist_iterinit(cp_list);
-            if (invlist_iternext(cp_list, &start, &end) && start < 256) {
-                anyof_flags |= ANYOFL_FOLD;
-                has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
-            }
-            invlist_iterfinish(cp_list);
+        else if (cp_list && invlist_lowest(cp_list) < 256) {
+            /* If nothing is below 256, has no locale dependency; otherwise it
+             * does */
+            anyof_flags |= ANYOFL_FOLD;
+            has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
          }
      }
      else if (   DEPENDS_SEMANTICS
@@ -18285,9 +18661,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      if (optimizable) {
          PERL_UINT_FAST8_T i;
-        Size_t partial_cp_count = 0;
+        UV partial_cp_count = 0;
          UV start[MAX_FOLD_FROMS+1] = { 0 }; /* +1 for the folded-to char */
          UV   end[MAX_FOLD_FROMS+1] = { 0 };
+        bool single_range = FALSE;
  
          if (cp_list) { /* Count the code points in enough ranges that we would
                            see all the ones possible in any fold in this version
@@ -18301,6 +18678,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  partial_cp_count += end[i] - start[i] + 1;
              }
  
+            if (i == 1) {
+                single_range = TRUE;
+            }
              invlist_iterfinish(cp_list);
          }
  
@@ -18336,6 +18716,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      goto not_anyof;
                  }
              }
+
              /* For well-behaved locales, some classes are subsets of others,
               * so complementing the subset and including the non-complemented
               * superset should match everything, like [\D[:alnum:]], and
@@ -18440,7 +18821,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
          /* Next see if can optimize classes that contain just a few code points
           * into an EXACTish node.  The reason to do this is to let the
-         * optimizer join this node with adjacent EXACTish ones.
+         * optimizer join this node with adjacent EXACTish ones, and ANYOF
+         * nodes require conversion to code point from UTF-8.
           *
           * An EXACTFish node can be generated even if not under /i, and vice
           * versa.  But care must be taken.  An EXACTFish node has to be such
@@ -18459,21 +18841,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           * participates in no fold whatsoever, and having it EXACT tells the
           * optimizer the target string cannot match unless it has a colon in
           * it.
-         *
-         * We don't typically generate an EXACTish node if doing so would
-         * require changing the pattern to UTF-8, as that affects /d and
-         * otherwise is slower.  However, under /i, not changing to UTF-8 can
-         * miss some potential multi-character folds.  We calculate the
-         * EXACTish node, and then decide if something would be missed if we
-         * don't upgrade */
+         */
          if (   ! posixl
              && ! invert
  
                  /* Only try if there are no more code points in the class than
                   * in the max possible fold */
-            &&   partial_cp_count > 0 && partial_cp_count <= MAX_FOLD_FROMS + 1
-
-            && (start[0] < 256 || UTF || FOLD))
+            &&   inRANGE(partial_cp_count, 1, MAX_FOLD_FROMS + 1))
          {
              if (partial_cp_count == 1 && ! upper_latin1_only_utf8_matches)
              {
@@ -18482,13 +18856,23 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  if (LOC) {
  
-                    /* Here is /l:  Use EXACTL, except /li indicates EXACTFL,
-                     * as that means there is a fold not known until runtime so
-                     * shows as only a single code point here. */
-                    op = (FOLD) ? EXACTFL : EXACTL;
+                    /* Here is /l:  Use EXACTL, except if there is a fold not
+                     * known until runtime so shows as only a single code point
+                     * here.  For code points above 255, we know which can
+                     * cause problems by having a potential fold to the Latin1
+                     * range. */
+                    if (  ! FOLD
+                        || (     start[0] > 255
+                            && ! is_PROBLEMATIC_LOCALE_FOLD_cp(start[0])))
+                    {
+                        op = EXACTL;
+                    }
+                    else {
+                        op = EXACTFL;
+                    }
                  }
                  else if (! FOLD) { /* Not /l and not /i */
-                    op = (start[0] < 256) ? EXACT : EXACT_ONLY8;
+                    op = (start[0] < 256) ? EXACT : EXACT_REQ8;
                  }
                  else if (start[0] < 256) { /* /i, not /l, and the code point is
                                                small */
@@ -18518,8 +18902,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             applies to it */
                      op = _invlist_contains_cp(PL_InMultiCharFold,
                                                start[0])
-                         ? EXACTFU_ONLY8
-                         : EXACT_ONLY8;
+                         ? EXACTFU_REQ8
+                         : EXACT_REQ8;
                  }
  
                  value = start[0];
@@ -18683,7 +19067,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                       ? EXACTFLU8
                                       : (ASCII_FOLD_RESTRICTED)
                                         ? EXACTFAA
-                                       : EXACTFU_ONLY8;
+                                       : EXACTFU_REQ8;
                                  value = folded;
                              }
                          }   /* Below, the lowest code point < 256 */
@@ -18735,45 +19119,43 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              }
  
              if (op != END) {
+                U8 len;
  
-                /* Here, we have calculated what EXACTish node we would use.
-                 * But we don't use it if it would require converting the
-                 * pattern to UTF-8, unless not using it could cause us to miss
-                 * some folds (hence be buggy) */
-
-                if (! UTF && value > 255) {
-                    SV * in_multis = NULL;
-
-                    assert(FOLD);
-
-                    /* If there is no code point that is part of a multi-char
-                     * fold, then there aren't any matches, so we don't do this
-                     * optimization.  Otherwise, it could match depending on
-                     * the context around us, so we do upgrade */
-                    _invlist_intersection(PL_InMultiCharFold, cp_list, &in_multis);
-                    if (UNLIKELY(_invlist_len(in_multis) != 0)) {
+                /* Here, we have calculated what EXACTish node to use.  Have to
+                 * convert to UTF-8 if not already there */
+                if (value > 255) {
+                    if (! UTF) {
+                        SvREFCNT_dec(cp_list);;
                          REQUIRE_UTF8(flagp);
                      }
-                    else {
-                        op = END;
+
+                    /* This is a kludge to the special casing issues with this
+                     * ligature under /aa.  FB05 should fold to FB06, but the
+                     * call above to _to_uni_fold_flags() didn't find this, as
+                     * it didn't use the /aa restriction in order to not miss
+                     * other folds that would be affected.  This is the only
+                     * instance likely to ever be a problem in all of Unicode.
+                     * So special case it. */
+                    if (   value == LATIN_SMALL_LIGATURE_LONG_S_T
+                        && ASCII_FOLD_RESTRICTED)
+                    {
+                        value = LATIN_SMALL_LIGATURE_ST;
                      }
                  }
  
-                if (op != END) {
-                    U8 len = (UTF) ? UVCHR_SKIP(value) : 1;
+                len = (UTF) ? UVCHR_SKIP(value) : 1;
  
-                    ret = regnode_guts(pRExC_state, op, len, "exact");
-                    FILL_NODE(ret, op);
-                    RExC_emit += 1 + STR_SZ(len);
-                    STR_LEN(REGNODE_p(ret)) = len;
-                    if (len == 1) {
-                        *STRING(REGNODE_p(ret)) = (U8) value;
-                    }
-                    else {
-                        uvchr_to_utf8((U8 *) STRING(REGNODE_p(ret)), value);
-                    }
-                    goto not_anyof;
+                ret = regnode_guts(pRExC_state, op, len, "exact");
+                FILL_NODE(ret, op);
+                RExC_emit += 1 + STR_SZ(len);
+                setSTR_LEN(REGNODE_p(ret), len);
+                if (len == 1) {
+                    *STRINGs(REGNODE_p(ret)) = (U8) value;
                  }
+                else {
+                    uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(ret)), value);
+                }
+                goto not_anyof;
              }
          }
  
@@ -18816,7 +19198,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
              if (invlist_highest(cp_list) <= max_permissible) {
                  UV this_start, this_end;
-                UV lowest_cp = UV_MAX;  /* inited to suppress compiler warn */
+                UV lowest_cp = UV_MAX;  /* init'ed to suppress compiler warn */
                  U8 bits_differing = 0;
                  Size_t full_cp_count = 0;
                  bool first_time = TRUE;
@@ -18853,7 +19235,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                      full_cp_count += this_end - this_start + 1;
                  }
-                invlist_iterfinish(cp_list);
  
                  /* At the end of the loop, we count how many bits differ from
                   * the bits in lowest code point, call the count 'd'.  If the
@@ -18882,8 +19263,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      ret = reganode(pRExC_state, op, lowest_cp);
                      FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
                  }
+
+              done_anyofm:
+                invlist_iterfinish(cp_list);
              }
-          done_anyofm:
  
              if (inverted) {
                  _invlist_invert(cp_list);
@@ -18892,6 +19275,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              if (op != END) {
                  goto not_anyof;
              }
+
+            /* XXX We could create an ANYOFR_LOW node here if we saved above if
+             * all were invariants, it wasn't inverted, and there is a single
+             * range.  This would be faster than some of the posix nodes we
+             * create below like /\d/a, but would be twice the size.  Without
+             * having actually measured the gain, khw doesn't think the
+             * tradeoff is really worth it */
          }
  
          if (! (anyof_flags & ANYOF_LOCALE_FLAGS)) {
@@ -19008,52 +19398,163 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              SvREFCNT_dec(intersection);
          }
  
-        /* If didn't find an optimization and there is no need for a
-        * bitmap, optimize to indicate that */
+        /* If it is a single contiguous range, ANYOFR is an efficient regnode,
+         * both in size and speed.  Currently, a 20 bit range base (smallest
+         * code point in the range), and a 12 bit maximum delta are packed into
+         * a 32 bit word.  This allows for using it on all of the Unicode code
+         * points except for the highest plane, which is only for private use
+         * code points.  khw doubts that a bigger delta is likely in real world
+         * applications */
+        if (     single_range
+            && ! has_runtime_dependency
+            &&   anyof_flags == 0
+            &&   start[0] < (1 << ANYOFR_BASE_BITS)
+            &&   end[0] - start[0]
+                    < ((1U << (sizeof(((struct regnode_1 *)NULL)->arg1)
+                                   * CHARBITS - ANYOFR_BASE_BITS))))
+
+        {
+            U8 low_utf8[UTF8_MAXBYTES+1];
+            U8 high_utf8[UTF8_MAXBYTES+1];
+
+            ret = reganode(pRExC_state, ANYOFR,
+                        (start[0] | (end[0] - start[0]) << ANYOFR_BASE_BITS));
+
+            /* Place the lowest UTF-8 start byte in the flags field, so as to
+             * allow efficient ruling out at run time of many possible inputs.
+             * */
+            (void) uvchr_to_utf8(low_utf8, start[0]);
+            (void) uvchr_to_utf8(high_utf8, end[0]);
+
+            /* If all code points share the same first byte, this can be an
+             * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
+             * quickly rule out many inputs at run-time without having to
+             * compute the code point from UTF-8.  For EBCDIC, we use I8, as
+             * not doing that transformation would not rule out nearly so many
+             * things */
+            if (low_utf8[0] == high_utf8[0]) {
+                OP(REGNODE_p(ret)) = ANYOFRb;
+                ANYOF_FLAGS(REGNODE_p(ret)) = low_utf8[0];
+            }
+            else {
+                ANYOF_FLAGS(REGNODE_p(ret))
+                                    = NATIVE_UTF8_TO_I8(low_utf8[0]);
+            }
+
+            goto not_anyof;
+        }
+
+        /* If didn't find an optimization and there is no need for a bitmap,
+         * optimize to indicate that */
          if (     start[0] >= NUM_ANYOF_CODE_POINTS
              && ! LOC
              && ! upper_latin1_only_utf8_matches
              &&   anyof_flags == 0)
          {
+            U8 low_utf8[UTF8_MAXBYTES+1];
              UV highest_cp = invlist_highest(cp_list);
  
-            /* If the lowest and highest code point in the class have the same
-             * UTF-8 first byte, then all do, and we can store that byte for
-             * regexec.c to use so that it can more quickly scan the target
-             * string for potential matches for this class.  We co-opt the the
-             * flags field for this.  Zero means, they don't have the same
-             * first byte.  We do accept here very large code points (for
-             * future use), but don't bother with this optimization for them,
-             * as it would cause other complications */
-            if (highest_cp > IV_MAX) {
-                anyof_flags = 0;
-            }
-            else {
-                U8 low_utf8[UTF8_MAXBYTES+1];
+            /* Currently the maximum allowed code point by the system is
+             * IV_MAX.  Higher ones are reserved for future internal use.  This
+             * particular regnode can be used for higher ones, but we can't
+             * calculate the code point of those.  IV_MAX suffices though, as
+             * it will be a large first byte */
+            Size_t low_len = uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX))
+                           - low_utf8;
+
+            /* We store the lowest possible first byte of the UTF-8
+             * representation, using the flags field.  This allows for quick
+             * ruling out of some inputs without having to convert from UTF-8
+             * to code point.  For EBCDIC, we use I8, as not doing that
+             * transformation would not rule out nearly so many things */
+            anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
+
+            op = ANYOFH;
+
+            /* If the first UTF-8 start byte for the highest code point in the
+             * range is suitably small, we may be able to get an upper bound as
+             * well */
+            if (highest_cp <= IV_MAX) {
                  U8 high_utf8[UTF8_MAXBYTES+1];
+                Size_t high_len = uvchr_to_utf8(high_utf8, highest_cp)
+                                - high_utf8;
+
+                /* If the lowest and highest are the same, we can get an exact
+                 * first byte instead of a just minimum or even a sequence of
+                 * exact leading bytes.  We signal these with different
+                 * regnodes */
+                if (low_utf8[0] == high_utf8[0]) {
+                    Size_t len = find_first_differing_byte_pos(low_utf8,
+                                                               high_utf8,
+                                                       MIN(low_len, high_len));
  
-                (void) uvchr_to_utf8(low_utf8, start[0]);
-                (void) uvchr_to_utf8(high_utf8, invlist_highest(cp_list));
+                    if (len == 1) {
  
-                anyof_flags = (low_utf8[0] == high_utf8[0])
-                            ? low_utf8[0]
-                            : 0;
+                        /* No need to convert to I8 for EBCDIC as this is an
+                         * exact match */
+                        anyof_flags = low_utf8[0];
+                        op = ANYOFHb;
+                    }
+                    else {
+                        op = ANYOFHs;
+                        ret = regnode_guts(pRExC_state, op,
+                                           regarglen[op] + STR_SZ(len),
+                                           "anyofhs");
+                        FILL_NODE(ret, op);
+                        ((struct regnode_anyofhs *) REGNODE_p(ret))->str_len
+                                                                        = len;
+                        Copy(low_utf8,  /* Add the common bytes */
+                           ((struct regnode_anyofhs *) REGNODE_p(ret))->string,
+                           len, U8);
+                        RExC_emit += NODE_SZ_STR(REGNODE_p(ret));
+                        set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
+                                                  NULL, only_utf8_locale_list);
+                        goto not_anyof;
+                    }
+                }
+                else if (NATIVE_UTF8_TO_I8(high_utf8[0]) <= MAX_ANYOF_HRx_BYTE)
+                {
+
+                    /* Here, the high byte is not the same as the low, but is
+                     * small enough that its reasonable to have a loose upper
+                     * bound, which is packed in with the strict lower bound.
+                     * See comments at the definition of MAX_ANYOF_HRx_BYTE.
+                     * On EBCDIC platforms, I8 is used.  On ASCII platforms I8
+                     * is the same thing as UTF-8 */
+
+                    U8 bits = 0;
+                    U8 max_range_diff = MAX_ANYOF_HRx_BYTE - anyof_flags;
+                    U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
+                                  - anyof_flags;
+
+                    if (range_diff <= max_range_diff / 8) {
+                        bits = 3;
+                    }
+                    else if (range_diff <= max_range_diff / 4) {
+                        bits = 2;
+                    }
+                    else if (range_diff <= max_range_diff / 2) {
+                        bits = 1;
+                    }
+                    anyof_flags = (anyof_flags - 0xC0) << 2 | bits;
+                    op = ANYOFHr;
+                }
              }
  
-            op = ANYOFH;
+            goto done_finding_op;
          }
      }   /* End of seeing if can optimize it into a different node */
  
    is_anyof: /* It's going to be an ANYOF node. */
-    if (op != ANYOFH) {
-        op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
-             ? ANYOFD
-             : ((posixl)
-                ? ANYOFPOSIXL
-                : ((LOC)
-                   ? ANYOFL
-                   : ANYOF));
-    }
+    op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
+         ? ANYOFD
+         : ((posixl)
+            ? ANYOFPOSIXL
+            : ((LOC)
+               ? ANYOFL
+               : ANYOF));
+
+  done_finding_op:
  
      ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
      FILL_NODE(ret, op);        /* We set the argument later */
@@ -19095,8 +19596,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
                    (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
-                   ? listsv : NULL,
+                   ? listsv
+                   : NULL,
                    only_utf8_locale_list);
+    SvREFCNT_dec(cp_list);;
+    SvREFCNT_dec(only_utf8_locale_list);
      return ret;
  
    not_anyof:
@@ -19107,6 +19611,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
                                             RExC_parse - orig_parse);;
      SvREFCNT_dec(cp_list);;
+    SvREFCNT_dec(only_utf8_locale_list);
      return ret;
  }
  
@@ -19147,15 +19652,17 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
         SV *rv;
  
          if (cp_list) {
-            av_store(av, INVLIST_INDEX, cp_list);
+            av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list));
          }
  
          if (only_utf8_locale_list) {
-            av_store(av, ONLY_LOCALE_MATCHES_INDEX, only_utf8_locale_list);
+            av_store(av, ONLY_LOCALE_MATCHES_INDEX,
+                                     SvREFCNT_inc_NN(only_utf8_locale_list));
          }
  
          if (runtime_defns) {
-            av_store(av, DEFERRED_USER_DEFINED_INDEX, SvREFCNT_inc(runtime_defns));
+            av_store(av, DEFERRED_USER_DEFINED_INDEX,
+                         SvREFCNT_inc_NN(runtime_defns));
          }
  
         rv = newRV_noinc(MUTABLE_SV(av));
@@ -19265,10 +19772,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                      STATIC_ASSERT_STMT(ONLY_LOCALE_MATCHES_INDEX == 1 + INVLIST_INDEX);
                      STATIC_ASSERT_STMT(DEFERRED_USER_DEFINED_INDEX == 1 + ONLY_LOCALE_MATCHES_INDEX);
  
-                    av_store(av, INVLIST_INDEX, invlist);
+                    ary[INVLIST_INDEX] = invlist;
                      av_fill(av, (ary[ONLY_LOCALE_MATCHES_INDEX])
-                                 ? ONLY_LOCALE_MATCHES_INDEX:
-                                 INVLIST_INDEX);
+                                 ? ONLY_LOCALE_MATCHES_INDEX
+                                 : INVLIST_INDEX);
                      si = NULL;
                  }
             }
@@ -19301,15 +19808,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                  UV prev_cp = 0;
                  U8 count = 0;
  
-                /* Ignore everything before the first new-line */
-                while (*si_string != '\n' && remaining > 0) {
-                    si_string++;
-                    remaining--;
-                }
-                assert(remaining > 0);
-
+                /* Ignore everything before and including the first new-line */
+                si_string = (const char *) memchr(si_string, '\n', SvCUR(si));
+                assert (si_string != NULL);
                  si_string++;
-                remaining--;
+                remaining = SvPVX(si) + SvCUR(si) - si_string;
  
                  while (remaining > 0) {
  
@@ -19351,26 +19854,34 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                          continue;
                      }
  
-                    /* Here, didn't find a legal hex number.  Just add it from
-                     * here to the next \n */
+                    /* Here, didn't find a legal hex number.  Just add the text
+                     * from here up to the next \n, omitting any trailing
+                     * markers. */
  
                      remaining -= len;
-                    while (*(si_string + len) != '\n' && remaining > 0) {
-                        remaining--;
-                        len++;
-                    }
-                    if (*(si_string + len) == '\n') {
-                        len++;
-                        remaining--;
-                    }
+                    len = strcspn(si_string,
+                                        DEFERRED_PROP_EXPANSION_MARKERs "\n");
+                    remaining -= len;
                      if (matches_string) {
-                        sv_catpvn(matches_string, si_string, len - 1);
+                        sv_catpvn(matches_string, si_string, len);
                      }
                      else {
-                        matches_string = newSVpvn(si_string, len - 1);
+                        matches_string = newSVpvn(si_string, len);
                      }
-                    si_string += len;
                      sv_catpvs(matches_string, " ");
+
+                    si_string += len;
+                    if (   remaining
+                        && UCHARAT(si_string)
+                                            == DEFERRED_PROP_EXPANSION_MARKERc)
+                    {
+                        si_string++;
+                        remaining--;
+                    }
+                    if (remaining && UCHARAT(si_string) == '\n') {
+                        si_string++;
+                        remaining--;
+                    }
                  } /* end of loop through the text */
  
                  assert(matches_string);
@@ -19529,8 +20040,9 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
  STATIC void
  S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
  {
-    /* 'size' is the delta to add or subtract from the current memory allocated
-     * to the regex engine being constructed */
+    /* 'size' is the delta number of smallest regnode equivalents to add or
+     * subtract from the current memory allocated to the regex engine being
+     * constructed. */
  
      PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
  
@@ -19562,8 +20074,8 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
  STATIC regnode_offset
  S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
  {
-    /* Allocate a regnode for 'op', with 'extra_size' extra space.  It aligns
-     * and increments RExC_size and RExC_emit
+    /* Allocate a regnode for 'op', with 'extra_size' extra (smallest) regnode
+     * equivalents space.  It aligns and increments RExC_size
       *
       * It returns the regnode's offset into the regex engine program */
  
@@ -19804,14 +20316,15 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
          scan = REGNODE_OFFSET(temp);
      }
  
+    assert(val >= scan);
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
          assert((UV) (val - scan) <= U32_MAX);
          ARG_SET(REGNODE_p(scan), val - scan);
      }
      else {
          if (val - scan > U16_MAX) {
-            /* Since not all callers check the return value, populate this with
-             * something that won't loop and will likely lead to a crash if
+            /* Populate this with something that won't loop and will likely
+             * lead to a crash if the caller ignores the failure return, and
               * execution continues */
              NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
              return FALSE;
@@ -19872,15 +20385,17 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
  #endif
          if ( exact ) {
              switch (OP(REGNODE_p(scan))) {
+                case LEXACT:
                  case EXACT:
-                case EXACT_ONLY8:
+                case LEXACT_REQ8:
+                case EXACT_REQ8:
                  case EXACTL:
                  case EXACTF:
                  case EXACTFU_S_EDGE:
                  case EXACTFAA_NO_TRIE:
                  case EXACTFAA:
                  case EXACTFU:
-                case EXACTFU_ONLY8:
+                case EXACTFU_REQ8:
                  case EXACTFLU8:
                  case EXACTFUP:
                  case EXACTFL:
@@ -19922,6 +20437,9 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
      }
      else {
          if (val - scan > U16_MAX) {
+            /* Populate this with something that won't loop and will likely
+             * lead to a crash if the caller ignores the failure return, and
+             * execution continues */
              NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
              return FALSE;
          }
@@ -20199,11 +20717,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      SvPVCLEAR(sv);
  
-    if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
-       /* It would be nice to FAIL() here, but this may be called from
-          regexec.c, and it would be hard to supply pRExC_state. */
-       Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
-                                              (int)OP(o), (int)REGNODE_MAX);
+    if (OP(o) > REGNODE_MAX) {          /* regnode.type is unsigned */
+        if (pRExC_state) {  /* This gives more info, if we have it */
+            FAIL3("panic: corrupted regexp opcode %d > %d",
+                  (int)OP(o), (int)REGNODE_MAX);
+        }
+        else {
+            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
+                             (int)OP(o), (int)REGNODE_MAX);
+        }
+    }
      sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */
  
      k = PL_regkind[OP(o)];
@@ -20257,6 +20780,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                                  NULL,
                                                  NULL,
                                                  NULL,
+                                                0,
                                                  FALSE
                                                 );
              sv_catpvs(sv, "]");
@@ -20286,7 +20810,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              name_list= RExC_paren_name_list;
          }
          if (name_list) {
-            if ( k != REF || (OP(o) < NREF)) {
+            if ( k != REF || (OP(o) < REFN)) {
                  SV **name= av_fetch(name_list, parno, 0 );
                 if (name)
                     Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
@@ -20339,8 +20863,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      else if (k == LOGICAL)
          /* 2: embedded, otherwise 1 */
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
-    else if (k == ANYOF) {
-       const U8 flags = (OP(o) == ANYOFH) ? 0 : ANYOF_FLAGS(o);
+    else if (k == ANYOF || k == ANYOFR) {
+        U8 flags;
+        char * bitmap;
+        U32 arg;
          bool do_sep = FALSE;    /* Do we need to separate various components of
                                     the output? */
          /* Set if there is still an unresolved user-defined property */
@@ -20355,7 +20881,18 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* And things that aren't in the bitmap, but are small enough to be */
          SV* bitmap_range_not_in_bitmap = NULL;
  
-        const bool inverted = flags & ANYOF_INVERT;
+        bool inverted;
+
+        if (inRANGE(OP(o), ANYOFH, ANYOFRb)) {
+            flags = 0;
+            bitmap = NULL;
+            arg = 0;
+        }
+        else {
+            flags = ANYOF_FLAGS(o);
+            bitmap = ANYOF_BITMAP(o);
+            arg = ARG(o);
+        }
  
         if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
              if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
@@ -20366,17 +20903,27 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              }
          }
  
+        inverted = flags & ANYOF_INVERT;
+
          /* If there is stuff outside the bitmap, get it */
-        if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
-            (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+        if (arg != ANYOF_ONLY_HAS_BITMAP) {
+            if (inRANGE(OP(o), ANYOFR, ANYOFRb)) {
+                nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
+                                            ANYOFRbase(o),
+                                            ANYOFRbase(o) + ANYOFRdelta(o));
+            }
+            else {
+                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
                                                  &unresolved,
                                                  &only_utf8_locale_invlist,
                                                  &nonbitmap_invlist);
+            }
+
              /* The non-bitmap data may contain stuff that could fit in the
               * bitmap.  This could come from a user-defined property being
               * finally resolved when this call was done; or much more likely
               * because there are matches that require UTF-8 to be valid, and so
-             * aren't in the bitmap.  This is teased apart later */
+             * aren't in the bitmap (or ANYOFR).  This is teased apart later */
              _invlist_intersection(nonbitmap_invlist,
                                    PL_InBitmap,
                                    &bitmap_range_not_in_bitmap);
@@ -20396,19 +20943,26 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* Ready to start outputting.  First, the initial left bracket */
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
  
-        if (OP(o) != ANYOFH) {
+        /* ANYOFH by definition doesn't have anything that will fit inside the
+         * bitmap;  ANYOFR may or may not. */
+        if (  ! inRANGE(OP(o), ANYOFH, ANYOFHr)
+            && (   ! inRANGE(OP(o), ANYOFR, ANYOFRb)
+                ||   ANYOFRbase(o) < NUM_ANYOF_CODE_POINTS))
+        {
              /* Then all the things that could fit in the bitmap */
              do_sep = put_charclass_bitmap_innards(sv,
-                                                  ANYOF_BITMAP(o),
+                                                  bitmap,
                                                    bitmap_range_not_in_bitmap,
                                                    only_utf8_locale_invlist,
                                                    o,
+                                                  flags,
  
                                                    /* Can't try inverting for a
                                                     * better display if there
                                                     * are things that haven't
                                                     * been resolved */
-                                                  unresolved != NULL);
+                                                  unresolved != NULL
+                                            || inRANGE(OP(o), ANYOFR, ANYOFRb));
              SvREFCNT_dec(bitmap_range_not_in_bitmap);
  
              /* If there are user-defined properties which haven't been defined
@@ -20494,10 +21048,24 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
  
-        if (OP(o) == ANYOFH && FLAGS(o) != 0) {
-            Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=\\x%02x)", FLAGS(o));
+        if (OP(o) == ANYOFHs) {
+            Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1));
+        }
+        else if (inRANGE(OP(o), ANYOFH, ANYOFRb)) {
+            U8 lowest = (OP(o) != ANYOFHr)
+                         ? FLAGS(o)
+                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
+            U8 highest = (OP(o) == ANYOFHr)
+                         ? HIGHEST_ANYOF_HRx_BYTE(FLAGS(o))
+                         : (OP(o) == ANYOFH || OP(o) == ANYOFR)
+                           ? 0xFF
+                           : lowest;
+            Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+            if (lowest != highest) {
+                Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+            }
+            Perl_sv_catpvf(aTHX_ sv, ")");
          }
-
  
          SvREFCNT_dec(unresolved);
      }
@@ -20509,7 +21077,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              _invlist_invert(cp_list);
          }
  
-        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
+        put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE);
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
  
          SvREFCNT_dec(cp_list);
@@ -21427,9 +21995,14 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  
          /* As a final resort, output the range or subrange as hex. */
  
-        this_end = (end < NUM_ANYOF_CODE_POINTS)
-                    ? end
-                    : NUM_ANYOF_CODE_POINTS - 1;
+        if (start >= NUM_ANYOF_CODE_POINTS) {
+            this_end = end;
+        }
+        else {  /* Have to split range at the bitmap boundary */
+            this_end = (end < NUM_ANYOF_CODE_POINTS)
+                        ? end
+                        : NUM_ANYOF_CODE_POINTS - 1;
+        }
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
                   ? "\\x%02" UVXf "-\\x%02" UVXf
@@ -21585,6 +22158,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                       SV *nonbitmap_invlist,
                                       SV *only_utf8_locale_invlist,
                                       const regnode * const node,
+                                     const U8 flags,
                                       const bool force_as_is_display)
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
@@ -21601,6 +22175,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       *  'node' is the regex pattern ANYOF node.  It is needed only when the
       *      above two parameters are not null, and is passed so that this
       *      routine can tease apart the various reasons for them.
+     *  'flags' is the flags field of 'node'
       *  'force_as_is_display' is TRUE if this routine should definitely NOT try
       *      to invert things to see if that leads to a cleaner display.  If
       *      FALSE, this routine is free to use its judgment about doing this.
@@ -21639,8 +22214,6 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                 literally */
      SV* inverted_display;   /* The output string when we invert the inputs */
  
-    U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
-
      bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
                                                     to match? */
      /* We are biased in favor of displaying things without them being inverted,
@@ -21986,7 +22559,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         else if ( op == PLUS || op == STAR) {
             DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
         }
-       else if (PL_regkind[(U8)op] == EXACT) {
+       else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) {
              /* Literal string, where present. */
             node += NODE_SZ_STR(node) - 1;
             node = NEXTOPER(node);
@@ -22016,6 +22589,17 @@ Perl_init_uniprops(pTHX)
  {
      dVAR;
  
+#ifdef DEBUGGING
+    char * dump_len_string;
+
+    dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
+    if (   ! dump_len_string
+        || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
+    {
+        PL_dump_re_max_len = 60;    /* A reasonable default */
+    }
+#endif
+
      PL_user_def_props = newHV();
  
  #ifdef USE_ITHREADS
@@ -22025,7 +22609,7 @@ Perl_init_uniprops(pTHX)
  
  #endif
  
-    /* Set up the inversion list global variables */
+    /* Set up the inversion list interpreter-level variables */
  
      PL_XPosix_ptrs[_CC_ASCII] = _new_invlist_C_array(uni_prop_ptrs[UNI_ASCII]);
      PL_XPosix_ptrs[_CC_ALPHANUMERIC] = _new_invlist_C_array(uni_prop_ptrs[UNI_XPOSIXALNUM]);
@@ -22067,6 +22651,7 @@ Perl_init_uniprops(pTHX)
      PL_LB_invlist = _new_invlist_C_array(_Perl_LB_invlist);
      PL_SCX_invlist = _new_invlist_C_array(_Perl_SCX_invlist);
  
+    PL_InBitmap = _new_invlist_C_array(_Perl_InBitmap_invlist);
      PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
      PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
      PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
@@ -22084,9 +22669,6 @@ Perl_init_uniprops(pTHX)
                                              UNI__PERL_FOLDS_TO_MULTI_CHAR]);
      PL_InMultiCharFold = _new_invlist_C_array(uni_prop_ptrs[
                                              UNI__PERL_IS_IN_MULTI_CHAR_FOLD]);
-    PL_NonFinalFold = _new_invlist_C_array(uni_prop_ptrs[
-                                            UNI__PERL_NON_FINAL_FOLDS]);
-
      PL_utf8_toupper = _new_invlist_C_array(Uppercase_Mapping_invlist);
      PL_utf8_tolower = _new_invlist_C_array(Lowercase_Mapping_invlist);
      PL_utf8_totitle = _new_invlist_C_array(Titlecase_Mapping_invlist);
@@ -22480,6 +23062,38 @@ S_delete_recursion_entry(pTHX_ void *key)
      RESTORE_CONTEXT;
  }
  
+STATIC SV *
+S_get_fq_name(pTHX_
+              const char * const name,    /* The first non-blank in the \p{}, \P{} */
+              const Size_t name_len,      /* Its length in bytes, not including any trailing space */
+              const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
+              const bool has_colon_colon
+             )
+{
+    /* Returns a mortal SV containing the fully qualified version of the input
+     * name */
+
+    SV * fq_name;
+
+    fq_name = newSVpvs_flags("", SVs_TEMP);
+
+    /* Use the current package if it wasn't included in our input */
+    if (! has_colon_colon) {
+        const HV * pkg = (IN_PERL_COMPILETIME)
+                         ? PL_curstash
+                         : CopSTASH(PL_curcop);
+        const char* pkgname = HvNAME(pkg);
+
+        Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+        sv_catpvs(fq_name, "::");
+    }
+
+    Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                         UTF8fARG(is_utf8, name_len, name));
+    return fq_name;
+}
+
  SV *
  Perl_parse_uniprop_string(pTHX_
  
@@ -22503,7 +23117,7 @@ Perl_parse_uniprop_string(pTHX_
       * Other parameters will be set on return as described below */
  
      const char * const name,    /* The first non-blank in the \p{}, \P{} */
-    const Size_t name_len,      /* Its length in bytes, not including any
+    Size_t name_len,            /* Its length in bytes, not including any
                                     trailing space */
      const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
      const bool to_fold,         /* ? Is this under /i */
@@ -22534,8 +23148,7 @@ Perl_parse_uniprop_string(pTHX_
      int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
      int table_index = 0;    /* The entry number for this property in the table
                                 of all Unicode property names */
-    bool starts_with_In_or_Is = FALSE;  /* ? Does the name start with 'In' or
-                                             'Is' */
+    bool starts_with_Is = FALSE;  /* ? Does the name start with 'Is' */
      Size_t lookup_offset = 0;   /* Used to ignore the first few characters of
                                     the normalized name in certain situations */
      Size_t non_pkg_begin = 0;   /* Offset of first byte in 'name' that isn't
@@ -22548,8 +23161,13 @@ Perl_parse_uniprop_string(pTHX_
                                       it is the definition.  Otherwise it is a
                                       string containing the fully qualified sub
                                       name of 'name' */
+    SV * fq_name = NULL;        /* For user-defined properties, the fully
+                                   qualified name */
      bool invert_return = FALSE; /* ? Do we need to complement the result before
                                       returning it */
+    bool stripped_utf8_pkg = FALSE; /* Set TRUE if the input includes an
+                                       explicit utf8:: package that we strip
+                                       off  */
  
      PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING;
  
@@ -22608,6 +23226,17 @@ Perl_parse_uniprop_string(pTHX_
              break;
          }
  
+        /* If this looks like it is a marker we inserted at compile time,
+         * ignore it; otherwise keep it as it would have been user input. */
+        if (     UNLIKELY(cur == DEFERRED_PROP_EXPANSION_MARKERc)
+            && ! deferrable
+            &&   could_be_user_defined
+            &&   i == name_len - 1)
+        {
+            name_len--;
+            continue;
+        }
+
          /* Otherwise, this character is part of the name. */
          lookup_name[j++] = cur;
  
@@ -22641,6 +23270,7 @@ Perl_parse_uniprop_string(pTHX_
          lookup_name +=  STRLENs("utf8::");
          j -=  STRLENs("utf8::");
          equals_pos -=  STRLENs("utf8::");
+        stripped_utf8_pkg = TRUE;
      }
  
      /* Here, we are either done with the whole property name, if it was simple;
@@ -22660,10 +23290,13 @@ Perl_parse_uniprop_string(pTHX_
          /* Most punctuation after the equals indicates a subpattern, like
           * \p{foo=/bar/} */
          if (   isPUNCT_A(name[i])
-            && name[i] != '-'
-            && name[i] != '+'
-            && name[i] != '_'
-            && name[i] != '{')
+            &&  name[i] != '-'
+            &&  name[i] != '+'
+            &&  name[i] != '_'
+            &&  name[i] != '{'
+                /* A backslash means the real delimitter is the next character,
+                 * but it must be punctuation */
+            && (name[i] != '\\' || (i < name_len && isPUNCT_A(name[i+1]))))
          {
              /* Find the property.  The table includes the equals sign, so we
               * use 'j' as-is */
@@ -22679,8 +23312,8 @@ Perl_parse_uniprop_string(pTHX_
                  const char * pos_in_brackets;
                  bool escaped = 0;
  
-                /* A backslash means the real delimitter is the next character.
-                 * */
+                /* Backslash => delimitter is the character following.  We
+                 * already checked that it is punctuation */
                  if (open == '\\') {
                      open = name[i++];
                      escaped = 1;
@@ -22694,7 +23327,8 @@ Perl_parse_uniprop_string(pTHX_
                  pos_in_brackets = strchr("([<)]>)]>", open);
                  close = (pos_in_brackets) ? pos_in_brackets[3] : open;
  
-                if (   name[name_len-1] != close
+                if (    i >= name_len
+                    ||  name[name_len-1] != close
                      || (escaped && name[name_len-2] != '\\'))
                  {
                      sv_catpvs(msg, "Unicode property wildcard not terminated");
@@ -22785,7 +23419,7 @@ Perl_parse_uniprop_string(pTHX_
                                  Titlecase Mapping  (both full and simple)
                                  Uppercase Mapping  (both full and simple)
               * Move the part that looks at the property values into a perl
-             * script, like utf8_heavy.pl is done.  This makes things somewhat
+             * script, like utf8_heavy.pl was done.  This makes things somewhat
               * easier, but most importantly, it avoids always adding all these
               * strings to the memory usage when the feature is little-used.
               *
@@ -22822,7 +23456,7 @@ Perl_parse_uniprop_string(pTHX_
          /* Certain properties whose values are numeric need special handling.
           * They may optionally be prefixed by 'is'.  Ignore that prefix for the
           * purposes of checking if this is one of those properties */
-        if (memBEGINPs(lookup_name, name_len, "is")) {
+        if (memBEGINPs(lookup_name, j, "is")) {
              lookup_offset = 2;
          }
  
@@ -22988,7 +23622,9 @@ Perl_parse_uniprop_string(pTHX_
              }
  
              /* Store the first real character in the denominator */
-            lookup_name[j++] = name[i];
+            if (i < name_len) {
+                lookup_name[j++] = name[i];
+            }
          }
      }
  
@@ -23006,11 +23642,15 @@ Perl_parse_uniprop_string(pTHX_
  
      /* If the original input began with 'In' or 'Is', it could be a subroutine
       * call to a user-defined property instead of a Unicode property name. */
-    if (    non_pkg_begin + name_len > 2
+    if (    name_len - non_pkg_begin > 2
          &&  name[non_pkg_begin+0] == 'I'
          && (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
      {
-        starts_with_In_or_Is = TRUE;
+        /* Names that start with In have different characterstics than those
+         * that start with Is */
+        if (name[non_pkg_begin+1] == 's') {
+            starts_with_Is = TRUE;
+        }
      }
      else {
          could_be_user_defined = FALSE;
@@ -23019,20 +23659,50 @@ Perl_parse_uniprop_string(pTHX_
      if (could_be_user_defined) {
          CV* user_sub;
  
+        /* If the user defined property returns the empty string, it could
+         * easily be because the pattern is being compiled before the data it
+         * actually needs to compile is available.  This could be argued to be
+         * a bug in the perl code, but this is a change of behavior for Perl,
+         * so we handle it.  This means that intentionally returning nothing
+         * will not be resolved until runtime */
+        bool empty_return = FALSE;
+
          /* Here, the name could be for a user defined property, which are
           * implemented as subs. */
          user_sub = get_cvn_flags(name, name_len, 0);
-        if (user_sub) {
+        if (! user_sub) {
+
+            /* Here, the property name could be a user-defined one, but there
+             * is no subroutine to handle it (as of now).   Defer handling it
+             * until runtime.  Otherwise, a block defined by Unicode in a later
+             * release would get the synonym InFoo added for it, and existing
+             * code that used that name would suddenly break if it referred to
+             * the property before the sub was declared.  See [perl #134146] */
+            if (deferrable) {
+                goto definition_deferred;
+            }
+
+            /* If we haven't already stripped the package name (if one), do so
+             * now so can look for an official property with the stripped name.
+             * */
+            if (! stripped_utf8_pkg) {
+                lookup_name += non_pkg_begin;
+                j -= non_pkg_begin;
+            }
+
+            /* Drop down to look up in the official properties */
+        }
+        else {
+            const char insecure[] = "Insecure user-defined property";
  
              /* Here, there is a sub by the correct name.  Normally we call it
               * to get the property definition */
              dSP;
              SV * user_sub_sv = MUTABLE_SV(user_sub);
              SV * error;     /* Any error returned by calling 'user_sub' */
-            SV * fq_name;   /* Fully qualified property name */
+            SV * key;       /* The key into the hash of user defined sub names
+                             */
              SV * placeholder;
-            char to_fold_string[] = "0:";   /* The 0 gets overwritten with the
-                                               actual value */
              SV ** saved_user_prop_ptr;      /* Hash entry for this property */
  
              /* How many times to retry when another thread is in the middle of
@@ -23044,11 +23714,11 @@ Perl_parse_uniprop_string(pTHX_
              /* If we get here, we know this property is user-defined */
              *user_defined_ptr = TRUE;
  
-            /* We refuse to call a tainted subroutine; returning an error
-             * instead */
+            /* We refuse to call a potentially tainted subroutine; returning an
+             * error instead */
              if (TAINT_get) {
                  if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Insecure user-defined property");
+                sv_catpvn(msg, insecure, sizeof(insecure) - 1);
                  goto append_name_to_msg;
              }
  
@@ -23062,14 +23732,13 @@ Perl_parse_uniprop_string(pTHX_
               * should the need arise, passing the /i status as a parameter.
               *
               * We start by constructing the hash key name, consisting of the
-             * fully qualified subroutine name */
-            fq_name = sv_2mortal(newSV(10));    /* 10 is just a guess */
-            (void) cv_name(user_sub, fq_name, 0);
-
-            /* But precede the sub name in the key with the /i status, so that
-             * there is a key for /i and a different key for non-/i */
-            to_fold_string[0] = to_fold + '0';
-            sv_insert(fq_name, 0, 0, to_fold_string, 2);
+             * fully qualified subroutine name, preceded by the /i status, so
+             * that there is a key for /i and a different key for non-/i */
+            key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+            fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                          non_pkg_begin != 0);
+            sv_catsv(key, fq_name);
+            sv_2mortal(key);
  
              /* We only call the sub once throughout the life of the program
               * (with the /i, non-/i exception noted above).  That means the
@@ -23119,7 +23788,7 @@ Perl_parse_uniprop_string(pTHX_
              /* If we have an entry for this key, the subroutine has already
               * been called once with this /i status. */
              saved_user_prop_ptr = hv_fetch(PL_user_def_props,
-                                           SvPVX(fq_name), SvCUR(fq_name), 0);
+                                                   SvPVX(key), SvCUR(key), 0);
              if (saved_user_prop_ptr) {
  
                  /* If the saved result is an inversion list, it is the valid
@@ -23194,7 +23863,7 @@ Perl_parse_uniprop_string(pTHX_
               * */
              SWITCH_TO_GLOBAL_CONTEXT;
              placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
-            (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+            (void) hv_store_ent(PL_user_def_props, key, placeholder, 0);
              RESTORE_CONTEXT;
  
              /* Now that we have a placeholder, we can let other threads
@@ -23202,7 +23871,7 @@ Perl_parse_uniprop_string(pTHX_
              USER_PROP_MUTEX_UNLOCK;
  
              /* Make sure the placeholder always gets destroyed */
-            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(key));
  
              PUSHMARK(SP);
              SAVETMPS;
@@ -23213,16 +23882,33 @@ Perl_parse_uniprop_string(pTHX_
              XPUSHs(boolSV(to_fold));
              PUTBACK;
  
+            /* The following block was taken from swash_init().  Presumably
+             * they apply to here as well, though we no longer use a swash --
+             * khw */
+            SAVEHINTS();
+            save_re_context();
+            /* We might get here via a subroutine signature which uses a utf8
+             * parameter name, at which point PL_subname will have been set
+             * but not yet used. */
+            save_item(PL_subname);
+
              (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
  
              SPAGAIN;
  
              error = ERRSV;
-            if (SvTRUE(error)) {
+            if (TAINT_get || SvTRUE(error)) {
                  if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Error \"");
-                sv_catsv(msg, error);
-                sv_catpvs(msg, "\"");
+                if (SvTRUE(error)) {
+                    sv_catpvs(msg, "Error \"");
+                    sv_catsv(msg, error);
+                    sv_catpvs(msg, "\"");
+                }
+                if (TAINT_get) {
+                    if (SvTRUE(error)) sv_catpvs(msg, "; ");
+                    sv_catpvn(msg, insecure, sizeof(insecure) - 1);
+                }
+
                  if (name_len > 0) {
                      sv_catpvs(msg, " in expansion of ");
                      Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8,
@@ -23234,16 +23920,28 @@ Perl_parse_uniprop_string(pTHX_
                  prop_definition = NULL;
              }
              else {  /* G_SCALAR guarantees a single return value */
+                SV * contents = POPs;
  
                  /* The contents is supposed to be the expansion of the property
-                 * definition.  Call a function to check for valid syntax and
-                 * handle it */
-                prop_definition = handle_user_defined_property(name, name_len,
+                 * definition.  If the definition is deferrable, and we got an
+                 * empty string back, set a flag to later defer it (after clean
+                 * up below). */
+                if (      deferrable
+                    && (! SvPOK(contents) || SvCUR(contents) == 0))
+                {
+                        empty_return = TRUE;
+                }
+                else { /* Otherwise, call a function to check for valid syntax,
+                          and handle it */
+
+                    prop_definition = handle_user_defined_property(
+                                                    name, name_len,
                                                      is_utf8, to_fold, runtime,
                                                      deferrable,
-                                                    POPs, user_defined_ptr,
+                                                    contents, user_defined_ptr,
                                                      msg,
                                                      level);
+                }
              }
  
              /* Here, we have the results of the expansion.  Delete the
@@ -23253,15 +23951,16 @@ Perl_parse_uniprop_string(pTHX_
               * and add the permanent entry */
              USER_PROP_MUTEX_LOCK;
  
-            S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
-
-            if (! prop_definition || is_invlist(prop_definition)) {
+            S_delete_recursion_entry(aTHX_ SvPVX(key));
  
+            if (    ! empty_return
+                && (! prop_definition || is_invlist(prop_definition)))
+            {
                  /* If we got success we use the inversion list defining the
                   * property; otherwise use the error message */
                  SWITCH_TO_GLOBAL_CONTEXT;
                  (void) hv_store_ent(PL_user_def_props,
-                                    fq_name,
+                                    key,
                                      ((prop_definition)
                                       ? newSVsv(prop_definition)
                                       : newSVsv(msg)),
@@ -23277,6 +23976,10 @@ Perl_parse_uniprop_string(pTHX_
              LEAVE;
              POPSTACK;
  
+            if (empty_return) {
+                goto definition_deferred;
+            }
+
              if (prop_definition) {
  
                  /* If the definition is for something not known at this time,
@@ -23308,8 +24011,11 @@ Perl_parse_uniprop_string(pTHX_
      /* If it didn't find the property ... */
      if (table_index == 0) {
  
-        /* Try again stripping off any initial 'In' or 'Is' */
-        if (starts_with_In_or_Is) {
+        /* Try again stripping off any initial 'Is'.  This is because we
+         * promise that an initial Is is optional.  The same isn't true of
+         * names that start with 'In'.  Those can match only blocks, and the
+         * lookup table already has those accounted for. */
+        if (starts_with_Is) {
              lookup_name += 2;
              lookup_len -= 2;
              equals_pos -= 2;
@@ -23352,10 +24058,12 @@ Perl_parse_uniprop_string(pTHX_
                   * NV. */
  
                  NV value;
+                SSize_t value_len = lookup_len - equals_pos;
  
                  /* Get the value */
-                if (my_atof3(lookup_name + equals_pos, &value,
-                             lookup_len - equals_pos)
+                if (   value_len <= 0
+                    || my_atof3(lookup_name + equals_pos, &value,
+                                value_len)
                            != lookup_name + lookup_len)
                  {
                      goto failed;
@@ -23620,29 +24328,34 @@ Perl_parse_uniprop_string(pTHX_
  
    definition_deferred:
  
-    /* Here it could yet to be defined, so defer evaluation of this
-     * until its needed at runtime. */
-    prop_definition = newSVpvs_flags("", SVs_TEMP);
+    {
+        bool is_qualified = non_pkg_begin != 0;  /* If has "::" */
  
-    /* To avoid any ambiguity, the package is always specified.
-     * Use the current one if it wasn't included in our input */
-    if (non_pkg_begin == 0) {
-        const HV * pkg = (IN_PERL_COMPILETIME)
-                         ? PL_curstash
-                         : CopSTASH(PL_curcop);
-        const char* pkgname = HvNAME(pkg);
+        /* Here it could yet to be defined, so defer evaluation of this until
+         * its needed at runtime.  We need the fully qualified property name to
+         * avoid ambiguity */
+        if (! fq_name) {
+            fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                                                is_qualified);
+        }
  
-        Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
-        sv_catpvs(prop_definition, "::");
-    }
+        /* If it didn't come with a package, or the package is utf8::, this
+         * actually could be an official Unicode property whose inclusion we
+         * are deferring until runtime to make sure that it isn't overridden by
+         * a user-defined property of the same name (which we haven't
+         * encountered yet).  Add a marker to indicate this possibility, for
+         * use at such time when we first need the definition during pattern
+         * matching execution */
+        if (! is_qualified || memBEGINPs(name, non_pkg_begin, "utf8::")) {
+            sv_catpvs(fq_name, DEFERRED_PROP_EXPANSION_MARKERs);
+        }
  
-    Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                         UTF8fARG(is_utf8, name_len, name));
-    sv_catpvs(prop_definition, "\n");
+        /* We also need a trailing newline */
+        sv_catpvs(fq_name, "\n");
  
-    *user_defined_ptr = TRUE;
-    return prop_definition;
+        *user_defined_ptr = TRUE;
+        return fq_name;
+    }
  }
  
  #endif