This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Comments, white-space only
[perl5.git] / regcomp.c
index f0adce9..54cab09 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -131,6 +131,8 @@ struct RExC_state_t {
     char       *parse;                 /* Input-scan pointer. */
     char        *copy_start;            /* start of copy of input within
                                            constructed parse string */
+    char        *save_copy_start;       /* Provides one level of saving
+                                           and restoring 'copy_start' */
     char        *copy_start_in_input;   /* Position in input string
                                            corresponding to copy_start */
     SSize_t    whilem_seen;            /* number of WHILEM in this expr */
@@ -163,6 +165,7 @@ struct RExC_state_t {
     I32                seen_zerolen;
     regnode_offset *open_parens;       /* offsets to open parens */
     regnode_offset *close_parens;      /* offsets to close parens */
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
     regnode     *end_op;                /* END node in program */
     I32                utf8;           /* whether the pattern is utf8 or not */
     I32                orig_utf8;      /* whether the pattern was originally in utf8 */
@@ -179,11 +182,10 @@ struct RExC_state_t {
                                            through */
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
     I32                in_lookbehind;
+    I32                in_lookahead;
     I32                contains_locale;
     I32                override_recoding;
-#ifdef EBCDIC
-    I32                recode_x_to_native;
-#endif
+    I32         recode_x_to_native;
     I32                in_multi_char_class;
     struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                            within pattern */
@@ -193,6 +195,7 @@ struct RExC_state_t {
     scan_frame *frame_last;
     U32         frame_count;
     AV         *warn_text;
+    HV         *unlexed_names;
 #ifdef ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
@@ -227,6 +230,7 @@ struct RExC_state_t {
 #define RExC_precomp   (pRExC_state->precomp)
 #define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
 #define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
 #define RExC_precomp_end (pRExC_state->precomp_end)
 #define RExC_rx_sv     (pRExC_state->rx_sv)
 #define RExC_rx                (pRExC_state->rx)
@@ -239,7 +243,6 @@ struct RExC_state_t {
 #define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
                                                    under /d from /u ? */
 
-
 #ifdef RE_TRACK_PATTERN_OFFSETS
 #  define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
                                                          others */
@@ -252,6 +255,7 @@ struct RExC_state_t {
 #define RExC_maxlen        (pRExC_state->maxlen)
 #define RExC_npar      (pRExC_state->npar)
 #define RExC_total_parens      (pRExC_state->total_par)
+#define RExC_parens_buf_size   (pRExC_state->parens_buf_size)
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
 #define RExC_utf8      (pRExC_state->utf8)
@@ -267,10 +271,17 @@ struct RExC_state_t {
 #define RExC_study_chunk_recursed_bytes  \
                                    (pRExC_state->study_chunk_recursed_bytes)
 #define RExC_in_lookbehind     (pRExC_state->in_lookbehind)
+#define RExC_in_lookahead      (pRExC_state->in_lookahead)
 #define RExC_contains_locale   (pRExC_state->contains_locale)
+#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+
 #ifdef EBCDIC
-#   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+#  define SET_recode_x_to_native(x)                                         \
+                    STMT_START { RExC_recode_x_to_native = (x); } STMT_END
+#else
+#  define SET_recode_x_to_native(x) NOOP
 #endif
+
 #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
 #define RExC_frame_head (pRExC_state->frame_head)
 #define RExC_frame_last (pRExC_state->frame_last)
@@ -280,6 +291,7 @@ struct RExC_state_t {
 #define RExC_warn_text (pRExC_state->warn_text)
 #define RExC_in_script_run      (pRExC_state->in_script_run)
 #define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
+#define RExC_unlexed_names (pRExC_state->unlexed_names)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
  * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -351,7 +363,7 @@ struct RExC_state_t {
             if (DEPENDS_SEMANTICS) {                                        \
                 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                 RExC_uni_semantics = 1;                                     \
-                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) {           \
                     /* No need to restart the parse if we haven't seen      \
                      * anything that differs between /u and /d, and no need \
                      * to restart immediately if we're going to reparse     \
@@ -362,22 +374,26 @@ struct RExC_state_t {
             }                                                               \
     } STMT_END
 
-#define BRANCH_MAX_OFFSET   U16_MAX
 #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
     STMT_START {                                                            \
                 RExC_use_BRANCHJ = 1;                                       \
-                if (LIKELY(RExC_total_parens >= 0)) {                       \
-                    /* No need to restart the parse immediately if we're    \
-                     * going to reparse anyway to count parens */           \
-                    *flagp |= RESTART_PARSE;                                \
-                    return restart_retval;                                  \
-                }                                                           \
+                *flagp |= RESTART_PARSE;                                    \
+                return restart_retval;                                      \
     } STMT_END
 
+/* Until we have completed the parse, we leave RExC_total_parens at 0 or
+ * less.  After that, it must always be positive, because the whole re is
+ * considered to be surrounded by virtual parens.  Setting it to negative
+ * indicates there is some construct that needs to know the actual number of
+ * parens to be properly handled.  And that means an extra pass will be
+ * required after we've counted them all */
+#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
 #define REQUIRE_PARENS_PASS                                                 \
-    STMT_START {                                                            \
-                    if (RExC_total_parens == 0) RExC_total_parens = -1;     \
+    STMT_START {  /* No-op if have completed a pass */                      \
+                    if (! ALL_PARENS_COUNTED) RExC_total_parens = -1;       \
     } STMT_END
+#define IN_PARENS_PASS (RExC_total_parens < 0)
+
 
 /* This is used to return failure (zero) early from the calling function if
  * various flags in 'flags' are set.  Two flags always cause a return:
@@ -694,7 +710,7 @@ static const scan_data_t zero_scan_data = {
 
 /* Used to point after bad bytes for an error message, but avoid skipping
  * past a nul byte. */
-#define SKIP_IF_CHAR(s) (!*(s) ? 0 : UTF ? UTF8SKIP(s) : 1)
+#define SKIP_IF_CHAR(s, e) (!*(s) ? 0 : UTF ? UTF8_SAFE_SKIP(s, e) : 1)
 
 /* Set up to clean up after our imminent demise */
 #define PREPARE_TO_DIE                                                      \
@@ -733,6 +749,10 @@ static const scan_data_t zero_scan_data = {
     Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
            arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
 
+#define        FAIL3(msg,arg1,arg2) _FAIL(                         \
+    Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/",      \
+     arg1, arg2, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+
 /*
  * Simple_vFAIL -- like FAIL, but marks the current location in the scan
  */
@@ -809,8 +829,13 @@ static const scan_data_t zero_scan_data = {
 } STMT_END
 
 /* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
-#define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
+    STMT_START {                                                            \
+      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
+      RExC_copy_start_in_constructed = NULL;                                \
+    } STMT_END
+#define RESTORE_WARNINGS                                                    \
+    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
 
 /* Since a warning can be generated multiple times as the input is reparsed, we
  * output it the first time we come to that point in the parse, but suppress it
@@ -825,7 +850,8 @@ static const scan_data_t zero_scan_data = {
 #define UPDATE_WARNINGS_LOC(loc)                                        \
     STMT_START {                                                        \
         if (TO_OUTPUT_WARNINGS(loc)) {                                  \
-            RExC_latest_warn_offset = (xI(loc)) - RExC_precomp;         \
+            RExC_latest_warn_offset = MAX(sI, MIN(eI, xI(loc)))         \
+                                                       - RExC_precomp;  \
         }                                                               \
     } STMT_END
 
@@ -1562,6 +1588,9 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
     unsigned int i;
     const U32 n = ARG(node);
     bool new_node_has_latin1 = FALSE;
+    const U8 flags = (inRANGE(OP(node), ANYOFH, ANYOFHr))
+                      ? 0
+                      : ANYOF_FLAGS(node);
 
     PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
 
@@ -1586,7 +1615,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
         }
 
         /* Get the code points valid only under UTF-8 locales */
-        if (   (ANYOF_FLAGS(node) & ANYOFL_FOLD)
+        if (   (flags & ANYOFL_FOLD)
             &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
         {
             only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
@@ -1607,14 +1636,14 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
      * have to do this here before we add the unconditionally matched code
      * points */
-    if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+    if (flags & ANYOF_INVERT) {
         _invlist_intersection_complement_2nd(invlist,
                                              PL_UpperLatin1,
                                              &invlist);
     }
 
     /* Add in the points from the bit map */
-    if (OP(node) != ANYOFH) {
+    if (! inRANGE(OP(node), ANYOFH, ANYOFHr)) {
         for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
             if (ANYOF_BITMAP_TEST(node, i)) {
                 unsigned int start = i++;
@@ -1634,21 +1663,21 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      * as well.  But don't add them if inverting, as when that gets done below,
      * it would exclude all these characters, including the ones it shouldn't
      * that were added just above */
-    if (! (ANYOF_FLAGS(node) & ANYOF_INVERT) && OP(node) == ANYOFD
-        && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+    if (! (flags & ANYOF_INVERT) && OP(node) == ANYOFD
+        && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
     {
         _invlist_union(invlist, PL_UpperLatin1, &invlist);
     }
 
     /* Similarly for these */
-    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+    if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
         _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
     }
 
-    if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+    if (flags & ANYOF_INVERT) {
         _invlist_invert(invlist);
     }
-    else if (ANYOF_FLAGS(node) & ANYOFL_FOLD) {
+    else if (flags & ANYOFL_FOLD) {
         if (new_node_has_latin1) {
 
             /* Under /li, any 0-255 could fold to any other 0-255, depending on
@@ -1676,7 +1705,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
     if (only_utf8_locale_invlist) {
         _invlist_union_maybe_complement_2nd(invlist,
                                             only_utf8_locale_invlist,
-                                            ANYOF_FLAGS(node) & ANYOF_INVERT,
+                                            flags & ANYOF_INVERT,
                                             &invlist);
     }
 
@@ -1701,6 +1730,9 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      * another SSC or a regular ANYOF class.  Can create false positives. */
 
     SV* anded_cp_list;
+    U8  and_with_flags = inRANGE(OP(and_with), ANYOFH, ANYOFHr)
+                          ? 0
+                          : ANYOF_FLAGS(and_with);
     U8  anded_flags;
 
     PERL_ARGS_ASSERT_SSC_AND;
@@ -1711,7 +1743,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      * the code point inversion list and just the relevant flags */
     if (is_ANYOF_SYNTHETIC(and_with)) {
         anded_cp_list = ((regnode_ssc *)and_with)->invlist;
-        anded_flags = ANYOF_FLAGS(and_with);
+        anded_flags = and_with_flags;
 
         /* XXX This is a kludge around what appears to be deficiencies in the
          * optimizer.  If we make S_ssc_anything() add in the WARN_SUPER flag,
@@ -1735,14 +1767,14 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
     else {
         anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
         if (OP(and_with) == ANYOFD) {
-            anded_flags = ANYOF_FLAGS(and_with) & ANYOF_COMMON_FLAGS;
+            anded_flags = and_with_flags & ANYOF_COMMON_FLAGS;
         }
         else {
-            anded_flags = ANYOF_FLAGS(and_with)
+            anded_flags = and_with_flags
             &( ANYOF_COMMON_FLAGS
               |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
               |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
-            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(and_with))) {
+            if (ANYOFL_UTF8_LOCALE_REQD(and_with_flags)) {
                 anded_flags &=
                     ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
             }
@@ -1782,7 +1814,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      *                         <=  (C1 & ~C2) | (P1 & ~P2)
      * */
 
-    if ((ANYOF_FLAGS(and_with) & ANYOF_INVERT)
+    if ((and_with_flags & ANYOF_INVERT)
         && ! is_ANYOF_SYNTHETIC(and_with))
     {
         unsigned int i;
@@ -1794,7 +1826,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
 
         /* If either P1 or P2 is empty, the intersection will be also; can skip
          * the loop */
-        if (! (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL)) {
+        if (! (and_with_flags & ANYOF_MATCHES_POSIXL)) {
             ANYOF_POSIXL_ZERO(ssc);
         }
         else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
@@ -1854,16 +1886,16 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
             else {
                 ssc->invlist = anded_cp_list;
                 ANYOF_POSIXL_ZERO(ssc);
-                if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
+                if (and_with_flags & ANYOF_MATCHES_POSIXL) {
                     ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
                 }
             }
         }
         else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
-                 || (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL))
+                 || (and_with_flags & ANYOF_MATCHES_POSIXL))
         {
             /* One or the other of P1, P2 is non-empty. */
-            if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
+            if (and_with_flags & ANYOF_MATCHES_POSIXL) {
                 ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
             }
             ssc_union(ssc, anded_cp_list, FALSE);
@@ -1884,6 +1916,9 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
 
     SV* ored_cp_list;
     U8 ored_flags;
+    U8  or_with_flags = inRANGE(OP(or_with), ANYOFH, ANYOFHr)
+                         ? 0
+                         : ANYOF_FLAGS(or_with);
 
     PERL_ARGS_ASSERT_SSC_OR;
 
@@ -1893,17 +1928,17 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      * the code point inversion list and just the relevant flags */
     if (is_ANYOF_SYNTHETIC(or_with)) {
         ored_cp_list = ((regnode_ssc*) or_with)->invlist;
-        ored_flags = ANYOF_FLAGS(or_with);
+        ored_flags = or_with_flags;
     }
     else {
         ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
-        ored_flags = ANYOF_FLAGS(or_with) & ANYOF_COMMON_FLAGS;
+        ored_flags = or_with_flags & ANYOF_COMMON_FLAGS;
         if (OP(or_with) != ANYOFD) {
             ored_flags
-            |= ANYOF_FLAGS(or_with)
+            |= or_with_flags
              & ( ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
                 |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
-            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(or_with))) {
+            if (ANYOFL_UTF8_LOCALE_REQD(or_with_flags)) {
                 ored_flags |=
                     ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
             }
@@ -1930,12 +1965,12 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      * (which results in actually simpler code than the non-inverted case)
      * */
 
-    if ((ANYOF_FLAGS(or_with) & ANYOF_INVERT)
+    if ((or_with_flags & ANYOF_INVERT)
         && ! is_ANYOF_SYNTHETIC(or_with))
     {
         /* We ignore P2, leaving P1 going forward */
     }   /* else  Not inverted */
-    else if (ANYOF_FLAGS(or_with) & ANYOF_MATCHES_POSIXL) {
+    else if (or_with_flags & ANYOF_MATCHES_POSIXL) {
         ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
         if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
             unsigned int i;
@@ -2497,7 +2532,8 @@ is the recommended Unicode-aware way of saying
        if (UTF) {                                                         \
             SV *zlopp = newSV(UTF8_MAXBYTES);                             \
            unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned const char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
+            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
+            *kapow = '\0';                                                 \
            SvCUR_set(zlopp, kapow - flrbbbbb);                            \
            SvPOK_on(zlopp);                                               \
            SvUTF8_on(zlopp);                                              \
@@ -2690,7 +2726,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
         trie_words = newAV();
     });
 
-    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+    re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
     assert(re_trie_maxbuff);
     if (!SvIOK(re_trie_maxbuff)) {
         sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
@@ -3516,9 +3552,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                     if ( state==1 ) {
                         OP( convert ) = nodetype;
                         str=STRING(convert);
-                        STR_LEN(convert)=0;
+                        setSTR_LEN(convert, 0);
                     }
-                    STR_LEN(convert) += len;
+                    setSTR_LEN(convert, STR_LEN(convert) + len);
                     while (len--)
                         *str++ = *ch++;
                } else {
@@ -3958,8 +3994,9 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *      using /iaa matching will be doing so almost entirely with ASCII
  *      strings, so this should rarely be encountered in practice */
 
-#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags) \
-    if (PL_regkind[OP(scan)] == EXACT) \
+#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags)    \
+    if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT         \
+                                      && OP(scan) != LEXACT_ONLY8)  \
         join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags), NULL, depth+1)
 
 STATIC U32
@@ -4125,7 +4162,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             merged++;
 
             NEXT_OFF(scan) += NEXT_OFF(n);
-            STR_LEN(scan) += STR_LEN(n);
+            setSTR_LEN(scan, STR_LEN(scan) + STR_LEN(n));
             next = n + NODE_SZ_STR(n);
             /* Now we can overwrite *n : */
             Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
@@ -5162,7 +5199,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            }
        }
        else if (   OP(scan) == EXACT
+                 || OP(scan) == LEXACT
                  || OP(scan) == EXACT_ONLY8
+                 || OP(scan) == LEXACT_ONLY8
                  || OP(scan) == EXACTL)
         {
            SSize_t l = STR_LEN(scan);
@@ -5284,7 +5323,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
                    next = NEXTOPER(scan);
                    if (   OP(next) == EXACT
+                        || OP(next) == LEXACT
                         || OP(next) == EXACT_ONLY8
+                        || OP(next) == LEXACT_ONLY8
                         || OP(next) == EXACTL
                         || (flags & SCF_DO_STCLASS))
                     {
@@ -5607,9 +5648,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        STRLEN l;
                        const char * const s = SvPV_const(data->last_found, l);
                        SSize_t old = b - data->last_start_min;
+                        assert(old >= 0);
 
                        if (UTF)
-                           old = utf8_hop((U8*)s, old) - (U8*)s;
+                           old = utf8_hop_forward((U8*)s, old,
+                                               (U8 *) SvEND(data->last_found))
+                                - (U8*)s;
                        l -= old;
                        /* Get the added string: */
                        last_str = newSVpvn_utf8(s  + old, l, UTF);
@@ -5818,6 +5862,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 case ANYOFL:
                 case ANYOFPOSIXL:
                 case ANYOFH:
+                case ANYOFHb:
+                case ANYOFHr:
                 case ANYOF:
                    if (flags & SCF_DO_STCLASS_AND)
                        ssc_and(pRExC_state, data->start_class,
@@ -5974,14 +6020,27 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                       last, &data_fake, stopparen,
                                       recursed_depth, NULL, f, depth+1);
                 if (scan->flags) {
-                    if (deltanext) {
-                       FAIL("Variable length lookbehind not implemented");
-                    }
-                    else if (minnext > (I32)U8_MAX) {
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || minnext > (I32)U8_MAX
+                        || minnext + deltanext > (I32)U8_MAX)
+                    {
                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
                               (UV)U8_MAX);
                     }
-                    scan->flags = (U8)minnext;
+
+                    /* The 'next_off' field has been repurposed to count the
+                     * additional starting positions to try beyond the initial
+                     * one.  (This leaves it at 0 for non-variable length
+                     * matches to avoid breakage for those not using this
+                     * extension) */
+                    if (deltanext) {
+                        scan->next_off = deltanext;
+                        ckWARNexperimental(RExC_parse,
+                            WARN_EXPERIMENTAL__VLB,
+                            "Variable length lookbehind is experimental");
+                    }
+                    scan->flags = (U8)minnext + deltanext;
                 }
                 if (data) {
                     if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -6066,14 +6125,21 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                         stopparen, recursed_depth, NULL,
                                         f, depth+1);
                 if (scan->flags) {
-                    if (deltanext) {
-                       FAIL("Variable length lookbehind not implemented");
-                    }
-                    else if (*minnextp > (I32)U8_MAX) {
+                    assert(0);  /* This code has never been tested since this
+                                   is normally not compiled */
+                    if (   deltanext < 0
+                        || deltanext > (I32) U8_MAX
+                        || *minnextp > (I32)U8_MAX
+                        || *minnextp + deltanext > (I32)U8_MAX)
+                    {
                        FAIL2("Lookbehind longer than %" UVuf " not implemented",
                               (UV)U8_MAX);
                     }
-                    scan->flags = (U8)*minnextp;
+
+                    if (deltanext) {
+                        scan->next_off = deltanext;
+                    }
+                    scan->flags = (U8)*minnextp + deltanext;
                 }
 
                 *minnextp += min;
@@ -7204,7 +7270,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
         const char* name;
 
         name = get_regex_charset_name(RExC_rx->extflags, &len);
-        if strEQ(name, DEPENDS_PAT_MODS) {  /* /d under UTF-8 => /u */
+        if (strEQ(name, DEPENDS_PAT_MODS)) {  /* /d under UTF-8 => /u */
             assert(RExC_utf8);
             name = UNICODE_PAT_MODS;
             len = sizeof(UNICODE_PAT_MODS) - 1;
@@ -7357,6 +7423,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     }
 
     pRExC_state->warn_text = NULL;
+    pRExC_state->unlexed_names = NULL;
     pRExC_state->code_blocks = NULL;
 
     if (is_bare_re)
@@ -7523,6 +7590,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         && memEQ(RX_PRECOMP(old_re), exp, plen)
        && !runtime_code /* with runtime code, always recompile */ )
     {
+        DEBUG_COMPILE_r({
+            SV *dsv= sv_newmortal();
+            RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, PL_dump_re_max_len);
+            Perl_re_printf( aTHX_  "%sSkipping recompilation of unchanged REx%s %s\n",
+                          PL_colors[4], PL_colors[5], s);
+        });
         return old_re;
     }
 
@@ -7566,10 +7639,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_seen = 0;
     RExC_maxlen = 0;
     RExC_in_lookbehind = 0;
+    RExC_in_lookahead = 0;
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
-#ifdef EBCDIC
     RExC_recode_x_to_native = 0;
-#endif
     RExC_in_multi_char_class = 0;
 
     RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
@@ -7655,6 +7727,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
     RExC_naughty = 0;
     RExC_npar = 1;
+    RExC_parens_buf_size = 0;
     RExC_emit_start = RExC_rxi->program;
     pRExC_state->code_index = 0;
 
@@ -7664,9 +7737,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     /* Do the parse */
     if (reg(pRExC_state, 0, &flags, 1)) {
 
-        /* Success!, But if RExC_total_parens < 0, we need to redo the parse
-         * knowing how many parens there actually are */
-        if (RExC_total_parens < 0) {
+        /* Success!, But we may need to redo the parse knowing how many parens
+         * there actually are */
+        if (IN_PARENS_PASS) {
             flags |= RESTART_PARSE;
         }
 
@@ -7708,7 +7781,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
         }
 
-        if (RExC_total_parens > 0) {
+        if (ALL_PARENS_COUNTED) {
             /* Make enough room for all the known parens, and zero it */
             Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
             Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
@@ -7779,6 +7852,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     SetProgLen(RExC_rxi,RExC_size);
 #endif
 
+    DEBUG_DUMP_PRE_OPTIMIZE_r({
+        SV * const sv = sv_newmortal();
+        RXi_GET_DECL(RExC_rx, ri);
+        DEBUG_RExC_seen();
+        Perl_re_printf( aTHX_ "Program before optimization:\n");
+
+        (void)dumpuntil(RExC_rx, ri->program, ri->program + 1, NULL, NULL,
+                        sv, 0, 0);
+    });
+
     DEBUG_OPTIMISE_r(
         Perl_re_printf( aTHX_  "Starting post parse optimization\n");
     );
@@ -7901,7 +7984,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         /* Ignore EXACT as we deal with it later. */
        if (PL_regkind[OP(first)] == EXACT) {
            if (   OP(first) == EXACT
+               || OP(first) == LEXACT
                 || OP(first) == EXACT_ONLY8
+                || OP(first) == LEXACT_ONLY8
                 || OP(first) == EXACTL)
             {
                NOOP;   /* Empty, get anchored substr later. */
@@ -8247,7 +8332,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                  && nop == END)
             RExC_rx->extflags |= RXf_WHITE;
         else if ( RExC_rx->extflags & RXf_SPLIT
-                  && (fop == EXACT || fop == EXACT_ONLY8 || fop == EXACTL)
+                  && (   fop == EXACT || fop == LEXACT
+                      || fop == EXACT_ONLY8 || fop == LEXACT_ONLY8
+                      || fop == EXACTL)
                   && STR_LEN(first) == 1
                   && *(STRING(first)) == ' '
                   && nop == END )
@@ -8806,7 +8893,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
             /* It might be a forward reference; we can't fail until we
                 * know, by completing the parse to get all the groups, and
                 * then reparsing */
-            if (RExC_total_parens > 0)  {
+            if (ALL_PARENS_COUNTED)  {
                 vFAIL("Reference to nonexistent named group");
             }
             else {
@@ -10895,7 +10982,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 return;
             default:
               fail_modifiers:
-                RExC_parse += SKIP_IF_CHAR(RExC_parse);
+                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
                /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
                       UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
@@ -10952,14 +11039,14 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     RExC_sawback = 1;
     ret = reganode(pRExC_state,
                    ((! FOLD)
-                     ? NREF
+                     ? REFN
                      : (ASCII_FOLD_RESTRICTED)
-                       ? NREFFA
+                       ? REFFAN
                        : (AT_LEAST_UNI_SEMANTICS)
-                         ? NREFFU
+                         ? REFFUN
                          : (LOC)
-                           ? NREFFL
-                           : NREFF),
+                           ? REFFLN
+                           : REFFN),
                     num);
     *flagp |= HASWIDTH;
 
@@ -10998,6 +11085,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     I32 freeze_paren = 0;
     I32 after_freeze = 0;
     I32 num; /* numeric backreferences */
+    SV * max_open;  /* Max number of unclosed parens */
 
     char * parse_start = RExC_parse; /* MJD */
     char * const oregcomp_parse = RExC_parse;
@@ -11007,8 +11095,26 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     PERL_ARGS_ASSERT_REG;
     DEBUG_PARSE("reg ");
 
+
+    max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD);
+    assert(max_open);
+    if (!SvIOK(max_open)) {
+        sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
+    }
+    if (depth > 4 * (UV) SvIV(max_open)) { /* We increase depth by 4 for each
+                                              open paren */
+        vFAIL("Too many nested open parens");
+    }
+
     *flagp = 0;                                /* Tentatively. */
 
+    if (RExC_in_lookbehind) {
+       RExC_in_lookbehind++;
+    }
+    if (RExC_in_lookahead) {
+        RExC_in_lookahead++;
+    }
+
     /* Having this true makes it feasible to have a lot fewer tests for the
      * parse pointer being in scope.  For example, we can write
      *      while(isFOO(*RExC_parse)) RExC_parse++;
@@ -11250,10 +11356,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         return 0;
                     }
 
-                    REGTAIL(pRExC_state, ret, atomic);
+                    if (! REGTAIL(pRExC_state, ret, atomic)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
 
-                    REGTAIL(pRExC_state, atomic,
-                           reg_node(pRExC_state, SRCLOSE));
+                    if (! REGTAIL(pRExC_state, atomic, reg_node(pRExC_state,
+                                                                SRCLOSE)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
 
                     RExC_in_script_run = 0;
                     return ret;
@@ -11295,7 +11406,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 
            } /* End of switch */
            if ( ! op ) {
-               RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+               RExC_parse += UTF
+                              ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                              : 1;
                 if (has_upper || verb_len == 0) {
                     vFAIL2utf8f(
                     "Unknown verb pattern '%" UTF8f "'",
@@ -11375,7 +11488,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     return handle_named_backref(pRExC_state, flagp,
                                                 parse_start, ')');
                 }
-                RExC_parse += SKIP_IF_CHAR(RExC_parse);
+                RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                vFAIL3("Sequence (%.*s...) not recognized",
                                 RExC_parse-seqstart, seqstart);
@@ -11471,10 +11584,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 if (RExC_parse >= RExC_end) {
                     vFAIL("Sequence (?... not terminated");
                 }
-
-                /* FALLTHROUGH */
+                RExC_seen_zerolen++;
+                break;
            case '=':           /* (?=...) */
                RExC_seen_zerolen++;
+                RExC_in_lookahead++;
                 break;
            case '!':           /* (?!...) */
                RExC_seen_zerolen++;
@@ -11533,14 +11647,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 goto gen_recurse_regop;
                 /* NOTREACHED */
             case '+':
-                if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+                if (! inRANGE(RExC_parse[0], '1', '9')) {
                     RExC_parse++;
                     vFAIL("Illegal pattern");
                 }
                 goto parse_recursion;
                 /* NOTREACHED*/
             case '-': /* (?-1) */
-                if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+                if (! inRANGE(RExC_parse[0], '1', '9')) {
                     RExC_parse--; /* rewind to let it be handled later */
                     goto parse_flags;
                 }
@@ -11592,7 +11706,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         /* It might be a forward reference; we can't fail until
                          * we know, by completing the parse to get all the
                          * groups, and then reparsing */
-                        if (RExC_total_parens > 0)  {
+                        if (ALL_PARENS_COUNTED)  {
                             RExC_parse++;
                             vFAIL("Reference to nonexistent group");
                         }
@@ -11618,7 +11732,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     /* It might be a forward reference; we can't fail until we
                      * know, by completing the parse to get all the groups, and
                      * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                         if (num >= RExC_total_parens) {
                             RExC_parse++;
                             vFAIL("Reference to nonexistent group");
@@ -11650,7 +11764,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            case '?':           /* (??...) */
                is_logical = 1;
                if (*RExC_parse != '{') {
-                    RExC_parse += SKIP_IF_CHAR(RExC_parse);
+                    RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
                     /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                     vFAIL2utf8f(
                         "Sequence (%" UTF8f "...) not recognized",
@@ -11710,7 +11824,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                        RExC_flags & RXf_PMf_COMPILETIME
                                       );
                     FLAGS(REGNODE_p(ret)) = 2;
-                    REGTAIL(pRExC_state, ret, eval);
+                    if (! REGTAIL(pRExC_state, ret, eval)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     /* deal with the length of this later - MJD */
                    return ret;
                }
@@ -11763,7 +11879,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 
                     tail = reg(pRExC_state, 1, &flag, depth+1);
                     RETURN_FAIL_ON_RESTART(flag, flagp);
-                    REGTAIL(pRExC_state, ret, tail);
+                    if (! REGTAIL(pRExC_state, ret, tail)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     goto insert_if;
                 }
                else if (   RExC_parse[0] == '<'     /* (?(<NAME>)...) */
@@ -11786,7 +11904,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         RExC_rxi->data->data[num]=(void*)sv_dat;
                         SvREFCNT_inc_simple_void_NN(sv_dat);
                     }
-                    ret = reganode(pRExC_state, NGROUPP, num);
+                    ret = reganode(pRExC_state, GROUPPN, num);
                     goto insert_if_check_paren;
                }
                else if (memBEGINs(RExC_parse,
@@ -11809,7 +11927,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         parno = 1;
                         RExC_parse++;
                     }
-                    else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+                    else if (inRANGE(RExC_parse[0], '1', '9')) {
                         UV uv;
                         endptr = RExC_end;
                         if (grok_atoUV(RExC_parse, &uv, &endptr)
@@ -11830,7 +11948,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    ret = reganode(pRExC_state, INSUBP, parno);
                    goto insert_if_check_paren;
                }
-               else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+                else if (inRANGE(RExC_parse[0], '1', '9')) {
                     /* (?(1)...) */
                    char c;
                     UV uv;
@@ -11848,20 +11966,29 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 
                  insert_if_check_paren:
                    if (UCHARAT(RExC_parse) != ')') {
-                        RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                        RExC_parse += UTF
+                                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                      : 1;
                        vFAIL("Switch condition not recognized");
                    }
                    nextchar(pRExC_state);
                  insert_if:
-                    REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
+                    if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state,
+                                                             IFTHEN, 0)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     br = regbranch(pRExC_state, &flags, 1, depth+1);
                    if (br == 0) {
                         RETURN_FAIL_ON_RESTART(flags,flagp);
                         FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                               (UV) flags);
                     } else
-                        REGTAIL(pRExC_state, br, reganode(pRExC_state,
-                                                          LONGJMP, 0));
+                    if (! REGTAIL(pRExC_state, br, reganode(pRExC_state,
+                                                             LONGJMP, 0)))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                    c = UCHARAT(RExC_parse);
                     nextchar(pRExC_state);
                    if (flags&HASWIDTH)
@@ -11878,7 +12005,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                             FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
                                   (UV) flags);
                         }
-                        REGTAIL(pRExC_state, ret, lastbr);
+                        if (! REGTAIL(pRExC_state, ret, lastbr)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
                        if (flags&HASWIDTH)
                            *flagp |= HASWIDTH;
                         c = UCHARAT(RExC_parse);
@@ -11893,16 +12022,26 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                             vFAIL("Switch (?(condition)... contains too many branches");
                     }
                    ender = reg_node(pRExC_state, TAIL);
-                    REGTAIL(pRExC_state, br, ender);
+                    if (! REGTAIL(pRExC_state, br, ender)) {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                    if (lastbr) {
-                        REGTAIL(pRExC_state, lastbr, ender);
-                        REGTAIL(pRExC_state, REGNODE_OFFSET(
-                                                NEXTOPER(
-                                                NEXTOPER(REGNODE_p(lastbr)))),
-                                             ender);
+                        if (! REGTAIL(pRExC_state, lastbr, ender)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
+                        if (! REGTAIL(pRExC_state,
+                                      REGNODE_OFFSET(
+                                                 NEXTOPER(
+                                                 NEXTOPER(REGNODE_p(lastbr)))),
+                                      ender))
+                        {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
                    }
                    else
-                        REGTAIL(pRExC_state, ret, ender);
+                        if (! REGTAIL(pRExC_state, ret, ender)) {
+                            REQUIRE_BRANCHJ(flagp, 0);
+                        }
 #if 0  /* Removing this doesn't cause failures in the test suite -- khw */
                     RExC_size++; /* XXX WHY do we need this?!!
                                     For large programs it seems to be required
@@ -11910,7 +12049,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
 #endif
                    return ret;
                }
-                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse += UTF
+                              ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                              : 1;
                 vFAIL("Unknown switch condition (?(...))");
            }
            case '[':           /* (?[ ... ]) */
@@ -11920,6 +12061,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                RExC_parse--; /* for vFAIL to print correctly */
                 vFAIL("Sequence (? incomplete");
                 break;
+
+            case ')':
+                if (RExC_strict) {  /* [perl #132851] */
+                    ckWARNreg(RExC_parse, "Empty (?) without any modifiers");
+                }
+                /* FALLTHROUGH */
            default: /* e.g., (?i) */
                RExC_parse = (char *) seqstart + 1;
               parse_flags:
@@ -11949,34 +12096,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
          capturing_parens:
            parno = RExC_npar;
            RExC_npar++;
-            if (RExC_total_parens <= 0) {
+            if (! ALL_PARENS_COUNTED) {
                 /* If we are in our first pass through (and maybe only pass),
                  * we  need to allocate memory for the capturing parentheses
-                 * data structures.  Since we start at npar=1, when it reaches
-                 * 2, for the first time it has something to put in it.  Above
-                 * 2 means we extend what we already have */
-                if (RExC_npar == 2) {
+                 * data structures.
+                 */
+
+                if (!RExC_parens_buf_size) {
+                    /* first guess at number of parens we might encounter */
+                    RExC_parens_buf_size = 10;
+
                     /* setup RExC_open_parens, which holds the address of each
                      * OPEN tag, and to make things simpler for the 0 index the
                      * start of the program - this is used later for offsets */
-                    Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
                     RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
 
                     /* setup RExC_close_parens, which holds the address of each
                      * CLOSE tag, and to make things simpler for the 0 index
                      * the end of the program - this is used later for offsets
                      * */
-                    Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
                     /* we dont know where end op starts yet, so we dont need to
                      * set RExC_close_parens[0] like we do RExC_open_parens[0]
                      * above */
                 }
-                else {
-                    Renew(RExC_open_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+                else if (RExC_npar > RExC_parens_buf_size) {
+                    I32 old_size = RExC_parens_buf_size;
+
+                    RExC_parens_buf_size *= 2;
 
-                    Renew(RExC_close_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+                    Renew(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_open_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
+
+                    Renew(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_close_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
                 }
             }
 
@@ -12031,7 +12191,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
        *flagp |= flags&SIMPLE;
     }
     if (is_open) {                             /* Starts with OPEN. */
-        REGTAIL(pRExC_state, ret, br);          /* OPEN -> first. */
+        if (! REGTAIL(pRExC_state, ret, br)) {  /* OPEN -> first. */
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
     }
     else if (paren != '?')             /* Not Conditional */
        ret = br;
@@ -12039,12 +12201,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     lastbr = br;
     while (*RExC_parse == '|') {
        if (RExC_use_BRANCHJ) {
+            bool shut_gcc_up;
+
            ender = reganode(pRExC_state, LONGJMP, 0);
 
             /* Append to the previous. */
-            REGTAIL(pRExC_state,
-                    REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
-                    ender);
+            shut_gcc_up = REGTAIL(pRExC_state,
+                         REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
+                         ender);
+            PERL_UNUSED_VAR(shut_gcc_up);
        }
        nextchar(pRExC_state);
        if (freeze_paren) {
@@ -12058,7 +12223,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RETURN_FAIL_ON_RESTART(flags, flagp);
             FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
         }
-        REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
+        if (!  REGTAIL(pRExC_state, lastbr, br)) {  /* BRANCH -> BRANCH. */
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
        lastbr = br;
        *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
     }
@@ -12129,7 +12296,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                           (IV)(ender - lastbr)
             );
         );
-        REGTAIL(pRExC_state, lastbr, ender);
+        if (! REGTAIL(pRExC_state, lastbr, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
 
        if (have_branch) {
             char is_nothing= 1;
@@ -12140,17 +12309,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            for (br = REGNODE_p(ret); br; br = regnext(br)) {
                const U8 op = PL_regkind[OP(br)];
                if (op == BRANCH) {
-                    REGTAIL_STUDY(pRExC_state,
-                                  REGNODE_OFFSET(NEXTOPER(br)),
-                                  ender);
+                    if (! REGTAIL_STUDY(pRExC_state,
+                                        REGNODE_OFFSET(NEXTOPER(br)),
+                                        ender))
+                    {
+                        REQUIRE_BRANCHJ(flagp, 0);
+                    }
                     if ( OP(NEXTOPER(br)) != NOTHING
                          || regnext(NEXTOPER(br)) != REGNODE_p(ender))
                         is_nothing= 0;
                }
                else if (op == BRANCHJ) {
-                    REGTAIL_STUDY(pRExC_state,
-                                  REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
-                                  ender);
+                    bool shut_gcc_up = REGTAIL_STUDY(pRExC_state,
+                                        REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
+                                        ender);
+                    PERL_UNUSED_VAR(shut_gcc_up);
                     /* for now we always disable this optimisation * /
                     if ( OP(NEXTOPER(NEXTOPER(br))) != NOTHING
                          || regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender))
@@ -12209,7 +12382,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
            Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
            FLAGS(REGNODE_p(ret)) = flag;
-            REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
+            if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
+            {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
        }
     }
 
@@ -12240,6 +12416,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     if (RExC_in_lookbehind) {
        RExC_in_lookbehind--;
     }
+    if (RExC_in_lookahead) {
+        RExC_in_lookahead--;
+    }
     if (after_freeze > RExC_npar)
         RExC_npar = after_freeze;
     return(ret);
@@ -12303,14 +12482,12 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
            /* FIXME adding one for every branch after the first is probably
             * excessive now we have TRIE support. (hv) */
            MARK_NAUGHTY(1);
-            if (     chain > (SSize_t) BRANCH_MAX_OFFSET
-                && ! RExC_use_BRANCHJ)
-            {
+            if (! REGTAIL(pRExC_state, chain, latest)) {
                 /* XXX We could just redo this branch, but figuring out what
-                 * bookkeeping needs to be reset is a pain */
+                 * bookkeeping needs to be reset is a pain, and it's likely
+                 * that other branches that goto END will also be too large */
                 REQUIRE_BRANCHJ(flagp, 0);
             }
-            REGTAIL(pRExC_state, chain, latest);
        }
        chain = latest;
        c++;
@@ -12461,7 +12638,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                const regnode_offset w = reg_node(pRExC_state, WHILEM);
 
                FLAGS(REGNODE_p(w)) = 0;
-                REGTAIL(pRExC_state, ret, w);
+                if (!  REGTAIL(pRExC_state, ret, w)) {
+                    REQUIRE_BRANCHJ(flagp, 0);
+                }
                if (RExC_use_BRANCHJ) {
                    reginsert(pRExC_state, LONGJMP, ret, depth+1);
                    reginsert(pRExC_state, NOTHING, ret, depth+1);
@@ -12476,7 +12655,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                if (RExC_use_BRANCHJ)
                     NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
                                                        LONGJMP. */
-                REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
+                if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+                                                          NOTHING)))
+                {
+                    REQUIRE_BRANCHJ(flagp, 0);
+                }
                 RExC_whilem_seen++;
                 MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
            }
@@ -12548,16 +12731,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     if (*RExC_parse == '?') {
        nextchar(pRExC_state);
        reginsert(pRExC_state, MINMOD, ret, depth+1);
-        REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
+        if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
     }
     else if (*RExC_parse == '+') {
         regnode_offset ender;
         nextchar(pRExC_state);
         ender = reg_node(pRExC_state, SUCCEED);
-        REGTAIL(pRExC_state, ret, ender);
+        if (! REGTAIL(pRExC_state, ret, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
         reginsert(pRExC_state, SUSPEND, ret, depth+1);
         ender = reg_node(pRExC_state, TAIL);
-        REGTAIL(pRExC_state, ret, ender);
+        if (! REGTAIL(pRExC_state, ret, ender)) {
+            REQUIRE_BRANCHJ(flagp, 0);
+        }
     }
 
     if (ISMULT2(RExC_parse)) {
@@ -12610,20 +12799,23 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * points) that this \N sequence matches.  This is set, and the input is
   * parsed for errors, even if the function returns FALSE, as detailed below.
   *
-  * There are 5 possibilities here, as detailed in the next 5 paragraphs.
+  * There are 6 possibilities here, as detailed in the next 6 paragraphs.
   *
   * Probably the most common case is for the \N to specify a single code point.
   * *cp_count will be set to 1, and *code_point_p will be set to that code
   * point.
   *
-  * Another possibility is for the input to be an empty \N{}, which for
-  * backwards compatibility we accept.  *cp_count will be set to 0. *node_p
-  * will be set to a generated NOTHING node.
+  * Another possibility is for the input to be an empty \N{}.  This is no
+  * longer accepted, and will generate a fatal error.
+  *
+  * Another possibility is for a custom charnames handler to be in effect which
+  * translates the input name to an empty string.  *cp_count will be set to 0.
+  * *node_p will be set to a generated NOTHING node.
   *
   * Still another possibility is for the \N to mean [^\n]. *cp_count will be
   * set to 0. *node_p will be set to a generated REG_ANY node.
   *
-  * The fourth possibility is that \N resolves to a sequence of more than one
+  * The fifth possibility is that \N resolves to a sequence of more than one
   * code points.  *cp_count will be set to the number of code points in the
   * sequence. *node_p will be set to a generated node returned by this
   * function calling S_reg().
@@ -12631,7 +12823,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * The final possibility is that it is premature to be calling this function;
   * the parse needs to be restarted.  This can happen when this changes from
   * /d to /u rules, or when the pattern needs to be upgraded to UTF-8.  The
-  * latter occurs only when the fourth possibility would otherwise be in
+  * latter occurs only when the fifth possibility would otherwise be in
   * effect, and is because one of those code points requires the pattern to be
   * recompiled as UTF-8.  The function returns FALSE, and sets the
   * RESTART_PARSE and NEED_UTF8 flags in *flagp, as appropriate.  When this
@@ -12648,12 +12840,11 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * so we need a way to take a snapshot of what they resolve to at the time of
   * the original parse. [perl #56444].
   *
-  * That parsing is skipped for single-quoted regexes, so we may here get
-  * '\N{NAME}'.  This is a fatal error.  These names have to be resolved by the
-  * parser.  But if the single-quoted regex is something like '\N{U+41}', that
-  * is legal and handled here.  The code point is Unicode, and has to be
-  * translated into the native character set for non-ASCII platforms.
-  */
+  * That parsing is skipped for single-quoted regexes, so here we may get
+  * '\N{NAME}', which is parsed now.  If the single-quoted regex is something
+  * like '\N{U+41}', that code point is Unicode, and has to be translated into
+  * the native character set for non-ASCII platforms.  The other possibilities
+  * are already native, so no translation is done. */
 
     char * endbrace;    /* points to '}' following the name */
     char* p = RExC_parse; /* Temporary */
@@ -12662,7 +12853,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     char *orig_end;
     char *save_start;
     I32 flags;
-    Size_t count = 0;   /* code point count kept internally by this function */
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -12685,7 +12875,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 
     /* Disambiguate between \N meaning a named character versus \N meaning
      * [^\n].  The latter is assumed when the {...} following the \N is a legal
-     * quantifier, or there is no '{' at all */
+     * quantifier, or if there is no '{' at all */
     if (*p != '{' || regcurly(p)) {
         RExC_parse = p;
         if (cp_count) {
@@ -12718,15 +12908,19 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         vFAIL2("Missing right brace on \\%c{}", 'N');
     }
 
-    /* Here, we have decided it should be a named character or sequence */
-    REQUIRE_UNI_RULES(flagp, FALSE); /* Unicode named chars imply Unicode
-                                        semantics */
+    /* Here, we have decided it should be a named character or sequence.  These
+     * imply Unicode semantics */
+    REQUIRE_UNI_RULES(flagp, FALSE);
 
-    if (endbrace == RExC_parse) {   /* empty: \N{} */
+    /* \N{_} is what toke.c returns to us to indicate a name that evaluates to
+     * nothing at all (not allowed under strict) */
+    if (endbrace - RExC_parse == 1 && *RExC_parse == '_') {
+        RExC_parse = endbrace;
         if (strict) {
             RExC_parse++;   /* Position after the "}" */
             vFAIL("Zero length \\N{}");
         }
+
         if (cp_count) {
             *cp_count = 0;
         }
@@ -12739,15 +12933,120 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         return TRUE;
     }
 
-    /* If we haven't got something that begins with 'U+', then it didn't get lexed. */
-    if (   endbrace - RExC_parse < 2
-        || strnNE(RExC_parse, "U+", 2))
-    {
-        RExC_parse = endbrace;  /* position msg's '<--HERE' */
-        vFAIL("\\N{NAME} must be resolved by the lexer");
-    }
+    if (endbrace - RExC_parse < 2 || ! strBEGINs(RExC_parse, "U+")) {
+
+        /* Here, the name isn't of the form  U+....  This can happen if the
+         * pattern is single-quoted, so didn't get evaluated in toke.c.  Now
+         * is the time to find out what the name means */
+
+        const STRLEN name_len = endbrace - RExC_parse;
+        SV *  value_sv;     /* What does this name evaluate to */
+        SV ** value_svp;
+        const U8 * value;   /* string of name's value */
+        STRLEN value_len;   /* and its length */
+
+        /*  RExC_unlexed_names is a hash of names that weren't evaluated by
+         *  toke.c, and their values. Make sure is initialized */
+        if (! RExC_unlexed_names) {
+            RExC_unlexed_names = newHV();
+        }
+
+        /* If we have already seen this name in this pattern, use that.  This
+         * allows us to only call the charnames handler once per name per
+         * pattern.  A broken or malicious handler could return something
+         * different each time, which could cause the results to vary depending
+         * on if something gets added or subtracted from the pattern that
+         * causes the number of passes to change, for example */
+        if ((value_svp = hv_fetch(RExC_unlexed_names, RExC_parse,
+                                                      name_len, 0)))
+        {
+            value_sv = *value_svp;
+        }
+        else { /* Otherwise we have to go out and get the name */
+            const char * error_msg = NULL;
+            value_sv = get_and_check_backslash_N_name(RExC_parse, endbrace,
+                                                      UTF,
+                                                      &error_msg);
+            if (error_msg) {
+                RExC_parse = endbrace;
+                vFAIL(error_msg);
+            }
+
+            /* If no error message, should have gotten a valid return */
+            assert (value_sv);
+
+            /* Save the name's meaning for later use */
+            if (! hv_store(RExC_unlexed_names, RExC_parse, name_len,
+                           value_sv, 0))
+            {
+                Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
+            }
+        }
+
+        /* Here, we have the value the name evaluates to in 'value_sv' */
+        value = (U8 *) SvPV(value_sv, value_len);
+
+        /* See if the result is one code point vs 0 or multiple */
+        if (value_len > 0 && value_len <= (UV) ((SvUTF8(value_sv))
+                                               ? UTF8SKIP(value)
+                                               : 1))
+        {
+            /* Here, exactly one code point.  If that isn't what is wanted,
+             * fail */
+            if (! code_point_p) {
+                RExC_parse = p;
+                return FALSE;
+            }
+
+            /* Convert from string to numeric code point */
+            *code_point_p = (SvUTF8(value_sv))
+                            ? valid_utf8_to_uvchr(value, NULL)
+                            : *value;
 
-        /* This code purposely indented below because of future changes coming */
+            /* Have parsed this entire single code point \N{...}.  *cp_count
+             * has already been set to 1, so don't do it again. */
+            RExC_parse = endbrace;
+            nextchar(pRExC_state);
+            return TRUE;
+        } /* End of is a single code point */
+
+        /* Count the code points, if caller desires.  The API says to do this
+         * even if we will later return FALSE */
+        if (cp_count) {
+            *cp_count = 0;
+
+            *cp_count = (SvUTF8(value_sv))
+                        ? utf8_length(value, value + value_len)
+                        : value_len;
+        }
+
+        /* Fail if caller doesn't want to handle a multi-code-point sequence.
+         * But don't back the pointer up if the caller wants to know how many
+         * code points there are (they need to handle it themselves in this
+         * case).  */
+        if (! node_p) {
+            if (! cp_count) {
+                RExC_parse = p;
+            }
+            return FALSE;
+        }
+
+        /* Convert this to a sub-pattern of the form "(?: ... )", and then call
+         * reg recursively to parse it.  That way, it retains its atomicness,
+         * while not having to worry about any special handling that some code
+         * points may have. */
+
+        substitute_parse = newSVpvs("?:");
+        sv_catsv(substitute_parse, value_sv);
+        sv_catpv(substitute_parse, ")");
+
+        /* The value should already be native, so no need to convert on EBCDIC
+         * platforms.*/
+        assert(! RExC_recode_x_to_native);
+
+    }
+    else {   /* \N{U+...} */
+        Size_t count = 0;   /* code point count kept internally */
 
         /* We can get to here when the input is \N{U+...} or when toke.c has
          * converted a name to the \N{U+...} form.  This include changing a
@@ -12876,11 +13175,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 
         sv_catpvs(substitute_parse, ")");
 
-#ifdef EBCDIC
         /* The values are Unicode, and therefore have to be converted to native
          * on a non-Unicode (meaning non-ASCII) platform. */
-        RExC_recode_x_to_native = 1;
-#endif
+        SET_recode_x_to_native(1);
+    }
 
     /* Here, we have the string the name evaluates to, ready to be parsed,
      * stored in 'substitute_parse' as a series of valid "\x{...}\x{...}"
@@ -12904,9 +13202,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     RExC_start = save_start;
     RExC_parse = endbrace;
     RExC_end = orig_end;
-#ifdef EBCDIC
-    RExC_recode_x_to_native = 0;
-#endif
+    SET_recode_x_to_native(0);
 
     SvREFCNT_dec_NN(substitute_parse);
 
@@ -13084,7 +13380,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     char *parse_start;
     U8 op;
     int invert = 0;
-    U8 arg;
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -13213,15 +13508,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 'K':
-           RExC_seen_zerolen++;
-           ret = reg_node(pRExC_state, KEEPS);
-           *flagp |= SIMPLE;
-           /* XXX:dmq : disabling in-place substitution seems to
-            * be necessary here to avoid cases of memory corruption, as
-            * with: C<$_="x" x 80; s/x\K/y/> -- rgs
-            */
-            RExC_seen |= REG_LOOKBEHIND_SEEN;
-           goto finish_meta_pat;
+            if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+                RExC_seen_zerolen++;
+                ret = reg_node(pRExC_state, KEEPS);
+                *flagp |= SIMPLE;
+                /* XXX:dmq : disabling in-place substitution seems to
+                 * be necessary here to avoid cases of memory corruption, as
+                 * with: C<$_="x" x 80; s/x\K/y/> -- rgs
+                 */
+                RExC_seen |= REG_LOOKBEHIND_SEEN;
+                goto finish_meta_pat;
+            }
+            else {
+                ++RExC_parse; /* advance past the 'K' */
+                vFAIL("\\K not permitted in lookahead/lookbehind");
+            }
        case 'Z':
            ret = reg_node(pRExC_state, SEOL);
            *flagp |= SIMPLE;
@@ -13239,13 +13540,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            *flagp |= HASWIDTH;
            goto finish_meta_pat;
 
-       case 'W':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'w':
-            arg = ANYOF_WORDCHAR;
-            goto join_posix;
-
        case 'B':
             invert = 1;
             /* FALLTHROUGH */
@@ -13364,85 +13658,26 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            goto finish_meta_pat;
           }
 
-       case 'D':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'd':
-            arg = ANYOF_DIGIT;
-            if (! DEPENDS_SEMANTICS) {
-                goto join_posix;
-            }
-
-            /* \d doesn't have any matches in the upper Latin1 range, hence /d
-             * is equivalent to /u.  Changing to /u saves some branches at
-             * runtime */
-            op = POSIXU;
-            goto join_posix_op_known;
-
        case 'R':
            ret = reg_node(pRExC_state, LNBREAK);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
 
-       case 'H':
-            invert = 1;
-            /* FALLTHROUGH */
+       case 'd':
+       case 'D':
        case 'h':
-           arg = ANYOF_BLANK;
-            op = POSIXU;
-            goto join_posix_op_known;
-
-       case 'V':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 'v':
-           arg = ANYOF_VERTWS;
-            op = POSIXU;
-            goto join_posix_op_known;
-
-       case 'S':
-            invert = 1;
-            /* FALLTHROUGH */
-       case 's':
-            arg = ANYOF_SPACE;
-
-          join_posix:
-
-           op = POSIXD + get_regex_charset(RExC_flags);
-            if (op > POSIXA) {  /* /aa is same as /a */
-                op = POSIXA;
-            }
-            else if (op == POSIXL) {
-                RExC_contains_locale = 1;
-            }
-            else if (op == POSIXD) {
-                RExC_seen_d_op = TRUE;
-            }
-
-          join_posix_op_known:
-
-            if (invert) {
-                op += NPOSIXD - POSIXD;
-            }
-
-           ret = reg_node(pRExC_state, op);
-            FLAGS(REGNODE_p(ret)) = namedclass_to_classnum(arg);
-
-           *flagp |= HASWIDTH|SIMPLE;
-            /* FALLTHROUGH */
-
-          finish_meta_pat:
-            if (   UCHARAT(RExC_parse + 1) == '{'
-                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
-            {
-                RExC_parse += 2;
-                vFAIL("Unescaped left brace in regex is illegal here");
-            }
-           nextchar(pRExC_state);
-            Set_Node_Length(REGNODE_p(ret), 2); /* MJD */
-           break;
+       case 'H':
        case 'p':
        case 'P':
+       case 's':
+       case 'S':
+       case 'v':
+       case 'V':
+       case 'w':
+       case 'W':
+            /* These all have the same meaning inside [brackets], and it knows
+             * how to do the best optimizations for them.  So, pretend we found
+             * these within brackets, and let it do the work */
             RExC_parse--;
 
             ret = regclass(pRExC_state, flagp, depth+1,
@@ -13461,10 +13696,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                       (UV) *flagp);
 
-            RExC_parse--;
+            RExC_parse--;   /* regclass() leaves this one too far ahead */
 
+          finish_meta_pat:
+                   /* The escapes above that don't take a parameter can't be
+                    * followed by a '{'.  But 'pX', 'p{foo}' and
+                    * correspondingly 'P' can be */
+            if (   RExC_parse - parse_start == 1
+                && UCHARAT(RExC_parse + 1) == '{'
+                && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+            {
+                RExC_parse += 2;
+                vFAIL("Unescaped left brace in regex is illegal here");
+            }
             Set_Node_Offset(REGNODE_p(ret), parse_start);
-            Set_Node_Cur_Length(REGNODE_p(ret), parse_start - 2);
+            Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */
             nextchar(pRExC_state);
            break;
         case 'N':
@@ -13585,7 +13831,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         && num >= RExC_npar
                         /* cannot be an octal escape if it starts with 8 */
                         && *RExC_parse != '8'
-                        /* cannot be an octal escape it it starts with 9 */
+                        /* cannot be an octal escape if it starts with 9 */
                         && *RExC_parse != '9'
                     ) {
                         /* Probably not meant to be a backref, instead likely
@@ -13614,7 +13860,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     /* It might be a forward reference; we can't fail until we
                      * know, by completing the parse to get all the groups, and
                      * then reparsing */
-                    if (RExC_total_parens > 0)  {
+                    if (ALL_PARENS_COUNTED)  {
                         if (num >= RExC_total_parens)  {
                             vFAIL("Reference to nonexistent group");
                         }
@@ -13686,13 +13932,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            UV ender = 0;
            char *p;
            char *s;
-
-/* This allows us to fill a node with just enough spare so that if the final
- * character folds, its expansion is guaranteed to fit */
-#define MAX_NODE_STRING_SIZE (255-UTF8_MAXBYTES_CASE)
-
            char *s0;
-           U8 upper_parse = MAX_NODE_STRING_SIZE;
+            U32 max_string_len = 255;
+
+            /* We may have to reparse the node, artificially stopping filling
+             * it early, based on info gleaned in the first parse.  This
+             * variable gives where we stop.  Make it above the normal stopping
+             * place first time through. */
+           U32 upper_fill = max_string_len + 1;
 
             /* We start out as an EXACT node, even if under /i, until we find a
              * character which is in a fold.  The algorithm now segregates into
@@ -13708,10 +13955,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* Assume the node will be fully used; the excess is given back at
              * the end.  We can't make any other length assumptions, as a byte
              * input sequence could shrink down. */
-            Ptrdiff_t initial_size = STR_SZ(256);
+            Ptrdiff_t current_string_nodes = STR_SZ(max_string_len);
 
             bool next_is_quantifier;
             char * oldp = NULL;
+            char * old_oldp = NULL;
 
             /* We can convert EXACTF nodes to EXACTFU if they contain only
              * characters that match identically regardless of the target
@@ -13739,10 +13987,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             /* So is the MICRO SIGN */
             bool has_micro_sign = FALSE;
 
+            /* Set when we fill up the current node and there is still more
+             * text to process */
+            bool overflowed;
+
             /* Allocate an EXACT node.  The node_type may change below to
              * another EXACTish node, but since the size of the node doesn't
              * change, it works */
-            ret = regnode_guts(pRExC_state, node_type, initial_size, "exact");
+            ret = regnode_guts(pRExC_state, node_type, current_string_nodes,
+                                                                    "exact");
             FILL_NODE(ret, node_type);
             RExC_emit++;
 
@@ -13752,6 +14005,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
          reparse:
 
+            p = RExC_parse;
+            len = 0;
+            s = s0;
+
+          continue_parse:
+
             /* This breaks under rare circumstances.  If folding, we do not
              * want to split a node at a character that is a non-final in a
              * multi-char fold, as an input string could just happen to want to
@@ -13766,18 +14025,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                    || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                    || UTF8_IS_START(UCHARAT(RExC_parse)));
 
+            overflowed = FALSE;
 
             /* Here, we have a literal character.  Find the maximal string of
              * them in the input that we can fit into a single EXACTish node.
              * We quit at the first non-literal or when the node gets full, or
              * under /i the categorization of folding/non-folding character
              * changes */
-           for (p = RExC_parse; len < upper_parse && p < RExC_end; ) {
+            while (p < RExC_end && len < upper_fill) {
 
                 /* In most cases each iteration adds one byte to the output.
                  * The exceptions override this */
                 Size_t added_len = 1;
 
+                old_oldp = oldp;
                oldp = p;
 
                 /* White space has already been ignored */
@@ -13947,13 +14208,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             UPDATE_WARNINGS_LOC(p - 1);
                             ender = result;
 
-                            if (ender < 0x100) {
 #ifdef EBCDIC
+                            if (ender < 0x100) {
                                 if (RExC_recode_x_to_native) {
                                     ender = LATIN1_TO_NATIVE(ender);
                                 }
-#endif
                            }
+#endif
                            break;
                        }
                    case 'c':
@@ -14110,20 +14371,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 /* Ready to add 'ender' to the node */
 
                 if (! FOLD) {  /* The simple case, just append the literal */
+                  not_fold_common:
 
-                      not_fold_common:
-                        if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
-                            *(s++) = (char) ender;
-                        }
-                        else {
-                            U8 * new_s = uvchr_to_utf8((U8*)s, ender);
-                            added_len = (char *) new_s - s;
-                            s = (char *) new_s;
+                    /* Don't output if it would overflow */
+                    if (UNLIKELY(len > max_string_len - ((UTF)
+                                                         ? UVCHR_SKIP(ender)
+                                                         : 1)))
+                    {
+                        overflowed = TRUE;
+                        break;
+                    }
 
-                            if (ender > 255)  {
-                                requires_utf8_target = TRUE;
-                            }
+                    if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
+                        *(s++) = (char) ender;
+                    }
+                    else {
+                        U8 * new_s = uvchr_to_utf8((U8*)s, ender);
+                        added_len = (char *) new_s - s;
+                        s = (char *) new_s;
+
+                        if (ender > 255)  {
+                            requires_utf8_target = TRUE;
                         }
+                    }
                 }
                 else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
 
@@ -14189,20 +14459,33 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
                     if (UTF) {  /* Use the folded value */
                         if (UVCHR_IS_INVARIANT(ender)) {
+                            if (UNLIKELY(len + 1 > max_string_len)) {
+                                overflowed = TRUE;
+                                break;
+                            }
+
                             *(s)++ = (U8) toFOLD(ender);
                         }
                         else {
-                            ender = _to_uni_fold_flags(
+                            U8 temp[UTF8_MAXBYTES_CASE+1];
+
+                            UV folded = _to_uni_fold_flags(
                                     ender,
-                                    (U8 *) s,
+                                    temp,
                                     &added_len,
                                     FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
                                                     ? FOLD_FLAGS_NOMIX_ASCII
                                                     : 0));
+                            if (UNLIKELY(len + added_len > max_string_len)) {
+                                overflowed = TRUE;
+                                break;
+                            }
+
+                            Copy(temp, s, added_len, char);
                             s += added_len;
 
-                            if (   ender > 255
-                                && LIKELY(ender != GREEK_SMALL_LETTER_MU))
+                            if (   folded > 255
+                                && LIKELY(folded != GREEK_SMALL_LETTER_MU))
                             {
                                 /* U+B5 folds to the MU, so its possible for a
                                  * non-UTF-8 target to match it */
@@ -14254,9 +14537,16 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
                                 maybe_SIMPLE = 0;
                                 if (node_type == EXACTFU) {
+
+                                    if (UNLIKELY(len + 2 > max_string_len)) {
+                                        overflowed = TRUE;
+                                        break;
+                                    }
+
                                     *(s++) = 's';
 
-                                    /* Let the code below add in the extra 's' */
+                                    /* Let the code below add in the extra 's'
+                                     * */
                                     ender = 's';
                                     added_len = 2;
                                 }
@@ -14268,18 +14558,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             has_micro_sign = TRUE;
                         }
 
-                        *(s++) = (char) (DEPENDS_SEMANTICS)
-                                        ? toFOLD(ender)
-
-                                          /* Under /u, the fold of any
-                                           * character in the 0-255 range
-                                           * happens to be its lowercase
-                                           * equivalent, except for LATIN SMALL
-                                           * LETTER SHARP S, which was handled
-                                           * above, and the MICRO SIGN, whose
-                                           * fold requires UTF-8 to represent.
-                                           * */
-                                        : toLOWER_L1(ender);
+                        if (UNLIKELY(len + 1 > max_string_len)) {
+                            overflowed = TRUE;
+                            break;
+                        }
+
+                        *(s++) = (DEPENDS_SEMANTICS)
+                                 ? (char) toFOLD(ender)
+
+                                   /* Under /u, the fold of any character in
+                                    * the 0-255 range happens to be its
+                                    * lowercase equivalent, except for LATIN
+                                    * SMALL LETTER SHARP S, which was handled
+                                    * above, and the MICRO SIGN, whose fold
+                                    * requires UTF-8 to represent.  */
+                                 : (char) toLOWER_L1(ender);
                     }
                } /* End of adding current character to the node */
 
@@ -14294,166 +14587,287 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
            } /* End of loop through literal characters */
 
-            /* Here we have either exhausted the input or ran out of room in
-             * the node.  (If we encountered a character that can't be in the
-             * node, transfer is made directly to <loopdone>, and so we
-             * wouldn't have fallen off the end of the loop.)  In the latter
-             * case, we artificially have to split the node into two, because
-             * we just don't have enough space to hold everything.  This
-             * creates a problem if the final character participates in a
-             * multi-character fold in the non-final position, as a match that
-             * should have occurred won't, due to the way nodes are matched,
-             * and our artificial boundary.  So back off until we find a non-
-             * problematic character -- one that isn't at the beginning or
-             * middle of such a fold.  (Either it doesn't participate in any
-             * folds, or appears only in the final position of all the folds it
-             * does participate in.)  A better solution with far fewer false
-             * positives, and that would fill the nodes more completely, would
-             * be to actually have available all the multi-character folds to
-             * test against, and to back-off only far enough to be sure that
-             * this node isn't ending with a partial one.  <upper_parse> is set
-             * further below (if we need to reparse the node) to include just
-             * up through that final non-problematic character that this code
-             * identifies, so when it is set to less than the full node, we can
-             * skip the rest of this */
-            if (FOLD && p < RExC_end && upper_parse == MAX_NODE_STRING_SIZE) {
-                PERL_UINT_FAST8_T backup_count = 0;
-
-                const STRLEN full_len = len;
-
-               assert(len >= MAX_NODE_STRING_SIZE);
-
-                /* Here, <s> points to just beyond where we have output the
-                 * final character of the node.  Look backwards through the
-                 * string until find a non- problematic character */
-
-               if (! UTF) {
-
-                    /* This has no multi-char folds to non-UTF characters */
-                    if (ASCII_FOLD_RESTRICTED) {
-                        goto loopdone;
-                    }
+            /* Here we have either exhausted the input or run out of room in
+             * the node.  If the former, we are done.  (If we encountered a
+             * character that can't be in the node, transfer is made directly
+             * to <loopdone>, and so we wouldn't have fallen off the end of the
+             * loop.)  */
+            if (LIKELY(! overflowed)) {
+                goto loopdone;
+            }
+
+            /* Here we have run out of room.  We can grow plain EXACT and
+             * LEXACT nodes.  If the pattern is gigantic enough, though,
+             * eventually we'll have to artificially chunk the pattern into
+             * multiple nodes. */
+            if (! LOC && (node_type == EXACT || node_type == LEXACT)) {
+                Size_t overhead = 1 + regarglen[OP(REGNODE_p(ret))];
+                Size_t overhead_expansion = 0;
+                char temp[256];
+                Size_t max_nodes_for_string;
+                Size_t achievable;
+                SSize_t delta;
+
+                /* Here we couldn't fit the final character in the current
+                 * node, so it will have to be reparsed, no matter what else we
+                 * do */
+                p = oldp;
+
+                /* If would have overflowed a regular EXACT node, switch
+                 * instead to an LEXACT.  The code below is structured so that
+                 * the actual growing code is common to changing from an EXACT
+                 * or just increasing the LEXACT size.  This means that we have
+                 * to save the string in the EXACT case before growing, and
+                 * then copy it afterwards to its new location */
+                if (node_type == EXACT) {
+                    overhead_expansion = regarglen[LEXACT] - regarglen[EXACT];
+                    RExC_emit += overhead_expansion;
+                    Copy(s0, temp, len, char);
+                }
+
+                /* Ready to grow.  If it was a plain EXACT, the string was
+                 * saved, and the first few bytes of it overwritten by adding
+                 * an argument field.  We assume, as we do elsewhere in this
+                 * file, that one byte of remaining input will translate into
+                 * one byte of output, and if that's too small, we grow again,
+                 * if too large the excess memory is freed at the end */
+
+                max_nodes_for_string = U16_MAX - overhead - overhead_expansion;
+                achievable = MIN(max_nodes_for_string,
+                                 current_string_nodes + STR_SZ(RExC_end - p));
+                delta = achievable - current_string_nodes;
+
+                /* If there is just no more room, go finish up this chunk of
+                 * the pattern. */
+                if (delta <= 0) {
+                    goto loopdone;
+                }
 
-                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) {
-                        backup_count++;
-                    }
-                    len = s - s0 + 1;
-               }
+                change_engine_size(pRExC_state, delta + overhead_expansion);
+                current_string_nodes += delta;
+                max_string_len
+                           = sizeof(struct regnode) * current_string_nodes;
+                upper_fill = max_string_len + 1;
+
+                /* If the length was small, we know this was originally an
+                 * EXACT node now converted to LEXACT, and the string has to be
+                 * restored.  Otherwise the string was untouched.  260 is just
+                 * a number safely above 255 so don't have to worry about
+                 * getting it precise */
+                if (len < 260) {
+                    node_type = LEXACT;
+                    FILL_NODE(ret, node_type);
+                    s0 = STRING(REGNODE_p(ret));
+                    Copy(temp, s0, len, char);
+                    s = s0 + len;
+                }
+
+                goto continue_parse;
+            }
+            else if (! LOC) {
+
+                /* Here is /i.  Running out of room creates a problem if we are
+                 * folding, and the split happens in the middle of a
+                 * multi-character fold, as a match that should have occurred,
+                 * won't, due to the way nodes are matched, and our artificial
+                 * boundary.  So back off until we aren't splitting such a
+                 * fold.  If there is no such place to back off to, we end up
+                 * taking the entire node as-is.  This can happen if the node
+                 * consists entirely of 'f' or entirely of 's' characters (or
+                 * things that fold to them) as 'ff' and 'ss' are
+                 * multi-character folds.
+                 *
+                 * At this point:
+                 *  old_oldp  points to the beginning in the input of the
+                 *              penultimate character in the node.
+                 *  oldp      points to the beginning in the input of the
+                 *              final character in the node.
+                 *  p         points to the beginning in the input of the
+                 *              next character in the input, the one that won't
+                 *              fit in the node.
+                 *
+                 * We aren't in the middle of a multi-char fold unless the
+                 * final character in the node can appear in a non-final
+                 * position in such a fold.  Very few characters actually
+                 * participate in multi-character folds, and fewer still can be
+                 * in the non-final position.  But it's complicated to know
+                 * here if that final character is folded or not, so skip this
+                 * check */
+
+                           /* Make sure enough space for final char of node,
+                            * first char of following node, and the fold of the
+                            * following char (so we don't have to worry about
+                            * that fold running off the end */
+                U8 foldbuf[UTF8_MAXBYTES_CASE * 5 + 1];
+                STRLEN fold_len;
+                UV folded;
+                char * const sav_oldp = oldp;
+
+                assert(FOLD);
+
+                /* The Unicode standard says that multi character folds consist
+                 * of either two or three characters.  So we create a buffer
+                 * containing a window of three.  The first is the final
+                 * character in the node (folded), and then the two that begin
+                 * the following node.   But if the first character of the
+                 * following node can't be in a non-final fold position, there
+                 * is no need to look at its successor character.  The macros
+                 * used below to check for multi character folds require folded
+                 * inputs, so we have to fold these.  (The fold of p was likely
+                 * calculated in the loop above, but it hasn't beeen saved, and
+                 * khw thinks it would be too entangled to change to do so) */
+
+                if (UTF || LIKELY(UCHARAT(p) != MICRO_SIGN)) {
+                    folded = _to_uni_fold_flags(ender,
+                                                foldbuf,
+                                                &fold_len,
+                                                FOLD_FLAGS_FULL);
+                }
                 else {
+                    foldbuf[0] = folded = MICRO_SIGN;
+                    fold_len = 1;
+                }
+
+                /* Here, foldbuf contains the fold of the first character in
+                 * the next node.  We may also need the next one (if there is
+                 * one) to get our third, but if the first character folded to
+                 * more than one, those extra one(s) will serve as the third.
+                 * Also, we don't need a third unless the previous one can
+                 * appear in a non-final position in a fold */
+                if (  ((RExC_end - p) > ((UTF) ? UVCHR_SKIP(ender) : 1))
+                    && (fold_len == 1 || (   UTF
+                                          && UVCHR_SKIP(folded) == fold_len))
+                    &&  UNLIKELY(_invlist_contains_cp(PL_NonFinalFold, folded)))
+                {
+                    if (UTF) {
+                        STRLEN next_fold_len;
+
+                        toFOLD_utf8_safe((U8*) p + UTF8SKIP(p),
+                                         (U8*) RExC_end, foldbuf + fold_len,
+                                         &next_fold_len);
+                        fold_len += next_fold_len;
+                    }
+                    else {
+                        if (UNLIKELY(p[1] == LATIN_SMALL_LETTER_SHARP_S)) {
+                            foldbuf[fold_len] = 's';
+                        }
+                        else {
+                            foldbuf[fold_len] = toLOWER_L1(p[1]);
+                        }
+                        fold_len++;
+                    }
+                }
 
-                    /* Point to the first byte of the final character */
-                    s = (char *) utf8_hop((U8 *) s, -1);
+                /* Here foldbuf contains the the fold of p, and if appropriate
+                 * that of the character following p in the input. */
 
-                    while (s >= s0) {   /* Search backwards until find
-                                           a non-problematic char */
-                        if (UTF8_IS_INVARIANT(*s)) {
+                /* Search backwards until find a place that doesn't split a
+                 * multi-char fold */
+                while (1) {
+                    STRLEN s_len;
+                    char s_fold_buf[UTF8_MAXBYTES_CASE];
+                    char * s_fold = s_fold_buf;
 
-                            /* There are no ascii characters that participate
-                             * in multi-char folds under /aa.  In EBCDIC, the
-                             * non-ascii invariants are all control characters,
-                             * so don't ever participate in any folds. */
-                            if (ASCII_FOLD_RESTRICTED
-                                || ! IS_NON_FINAL_FOLD(*s))
-                            {
-                                break;
-                            }
+                    if (s <= s0) {
+
+                        /* There's no safe place in the node to split.  Quit so
+                         * will take the whole node */
+                        oldp = sav_oldp;
+                        break;
+                    }
+
+                    /* Backup 1 character.  The first time through this moves s
+                     * to point to the final character in the node */
+                    if (UTF) {
+                        s = (char *) utf8_hop_back((U8 *) s, -1, (U8 *) s0);
+                    }
+                    else {
+                        s--;
+                    }
+
+                    /* 's' may or may not be folded; so make sure it is, and
+                     * use just the final character in its fold (should there
+                     * be more than one */
+                    if (UTF) {
+                        toFOLD_utf8_safe((U8*) s,
+                                         (U8*) s + UTF8SKIP(s),
+                                         (U8 *) s_fold_buf, &s_len);
+                        while (s_fold + UTF8SKIP(s_fold) < s_fold_buf + s_len)
+                        {
+                            s_fold += UTF8SKIP(s_fold);
                         }
-                        else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                            if (! IS_NON_FINAL_FOLD(EIGHT_BIT_UTF8_TO_NATIVE(
-                                                                  *s, *(s+1))))
-                            {
+                        s_len = UTF8SKIP(s_fold);
+                    }
+                    else {
+                        if (UNLIKELY(UCHARAT(s) == LATIN_SMALL_LETTER_SHARP_S))
+                        {
+                            s_fold_buf[0] = 's';
+                        }
+                        else {  /* This works for all other non-UTF-8 folds
+                                 */
+                            s_fold_buf[0] = toLOWER_L1(UCHARAT(s));
+                        }
+                        s_len = 1;
+                    }
+
+                    /* Unshift this character to the beginning of the buffer,
+                     * No longer needed trailing characters are overwritten.
+                     * */
+                    Move(foldbuf, foldbuf + s_len, sizeof(foldbuf) - s_len, U8);
+                    Copy(s_fold, foldbuf, s_len, U8);
+
+                    /* If this isn't a multi-character fold, we have found a
+                     * splittable place.  If this is the final character in the
+                     * node, that means the node is valid as-is, and can quit.
+                     * Otherwise, we note how much we can fill the node before
+                     * coming to a non-splittable position, and go parse it
+                     * again, stopping there. This is done because we know
+                     * where in the output to stop, but we don't have a map to
+                     * where that is in the input.  One could be created, but
+                     * it seems like overkill for such a rare event as we are
+                     * dealing with here */
+                    if (UTF) {
+                        if (! is_MULTI_CHAR_FOLD_utf8_safe(foldbuf,
+                                                foldbuf + UTF8_MAXBYTES_CASE))
+                        {
+                            upper_fill = s + UTF8SKIP(s) - s0;
+                            if (LIKELY(oldp)) {
                                 break;
                             }
+                            goto reparse;
                         }
-                        else if (! _invlist_contains_cp(
-                                        PL_NonFinalFold,
-                                        valid_utf8_to_uvchr((U8 *) s, NULL)))
-                        {
+                    }
+                    else if (! is_MULTI_CHAR_FOLD_latin1_safe(foldbuf,
+                                                foldbuf + UTF8_MAXBYTES_CASE))
+                    {
+                        upper_fill = s + 1 - s0;
+                        if (LIKELY(oldp)) {
                             break;
                         }
+                        goto reparse;
+                    }
 
-                        /* Here, the current character is problematic in that
-                         * it does occur in the non-final position of some
-                         * fold, so try the character before it, but have to
-                         * special case the very first byte in the string, so
-                         * we don't read outside the string */
-                        s = (s == s0) ? s -1 : (char *) utf8_hop((U8 *) s, -1);
-                        backup_count++;
-                    } /* End of loop backwards through the string */
-
-                    /* If there were only problematic characters in the string,
-                     * <s> will point to before s0, in which case the length
-                     * should be 0, otherwise include the length of the
-                     * non-problematic character just found */
-                    len = (s < s0) ? 0 : s - s0 + UTF8SKIP(s);
-               }
-
-                /* Here, have found the final character, if any, that is
-                 * non-problematic as far as ending the node without splitting
-                 * it across a potential multi-char fold.  <len> contains the
-                 * number of bytes in the node up-to and including that
-                 * character, or is 0 if there is no such character, meaning
-                 * the whole node contains only problematic characters.  In
-                 * this case, give up and just take the node as-is.  We can't
-                 * do any better */
-                if (len == 0) {
-                    len = full_len;
+                    oldp = old_oldp;
+                    old_oldp = NULL;
 
-                } else {
+                } /* End of loop backing up through the node */
 
-                    /* Here, the node does contain some characters that aren't
-                     * problematic.  If we didn't have to backup any, then the
-                     * final character in the node is non-problematic, and we
-                     * can take the node as-is */
-                    if (backup_count == 0) {
-                        goto loopdone;
-                    }
-                    else if (backup_count == 1) {
-
-                        /* If the final character is problematic, but the
-                         * penultimate is not, back-off that last character to
-                         * later start a new node with it */
-                        p = oldp;
-                        goto loopdone;
-                    }
+                /* Here the node consists entirely of non-final multi-char
+                 * folds.  (Likely it is all 'f's or all 's's.)  There's no
+                 * decent place to split it, so give up and just take the whole
+                 * thing */
 
-                    /* Here, the final non-problematic character is earlier
-                     * in the input than the penultimate character.  What we do
-                     * is reparse from the beginning, going up only as far as
-                     * this final ok one, thus guaranteeing that the node ends
-                     * in an acceptable character.  The reason we reparse is
-                     * that we know how far in the character is, but we don't
-                     * know how to correlate its position with the input parse.
-                     * An alternate implementation would be to build that
-                     * correlation as we go along during the original parse,
-                     * but that would entail extra work for every node, whereas
-                     * this code gets executed only when the string is too
-                     * large for the node, and the final two characters are
-                     * problematic, an infrequent occurrence.  Yet another
-                     * possible strategy would be to save the tail of the
-                     * string, and the next time regatom is called, initialize
-                     * with that.  The problem with this is that unless you
-                     * back off one more character, you won't be guaranteed
-                     * regatom will get called again, unless regbranch,
-                     * regpiece ... are also changed.  If you do back off that
-                     * extra character, so that there is input guaranteed to
-                     * force calling regatom, you can't handle the case where
-                     * just the first character in the node is acceptable.  I
-                     * (khw) decided to try this method which doesn't have that
-                     * pitfall; if performance issues are found, we can do a
-                     * combination of the current approach plus that one */
-                    upper_parse = len;
-                    len = 0;
-                    s = s0;
-                    goto reparse;
-                }
            }   /* End of verifying node ends with an appropriate char */
 
+            p = oldp;
+
           loopdone:   /* Jumped to when encounters something that shouldn't be
                          in the node */
 
-            /* Free up any over-allocated space */
-            change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+            /* Free up any over-allocated space; cast is to silence bogus
+             * warning in MS VC */
+            change_engine_size(pRExC_state,
+                        - (Ptrdiff_t) (current_string_nodes - STR_SZ(len)));
 
             /* I (khw) don't know if you can get here with zero length, but the
              * old code handled this situation by creating a zero-length EXACT
@@ -14472,7 +14886,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     else if (requires_utf8_target) {
                         node_type = EXACT_ONLY8;
                     }
-                } else if (FOLD) {
+                }
+                else if (node_type == LEXACT) {
+                    if (requires_utf8_target) {
+                        node_type = LEXACT_ONLY8;
+                    }
+                }
+                else if (FOLD) {
                     if (    UNLIKELY(has_micro_sign || has_ss)
                         && (node_type == EXACTFU || (   node_type == EXACTF
                                                      && maybe_exactfu)))
@@ -14489,9 +14909,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         if (maybe_exactfu) {
                             node_type = EXACTFLU8;
                         }
+                        else if (UNLIKELY(
+                             _invlist_contains_cp(PL_HasMultiCharFold, ender)))
+                        {
+                            /* A character that folds to more than one will
+                             * match multiple characters, so can't be SIMPLE.
+                             * We don't have to worry about this with EXACTFLU8
+                             * nodes just above, as they have already been
+                             * folded (since the fold doesn't vary at run
+                             * time).  Here, if the final character in the node
+                             * folds to multiple, it can't be simple.  (This
+                             * only has an effect if the node has only a single
+                             * character, hence the final one, as elsewhere we
+                             * turn off simple for nodes whose length > 1 */
+                            maybe_SIMPLE = 0;
+                        }
                     }
                     else if (node_type == EXACTF) {  /* Means is /di */
 
+                        /* This intermediate variable is needed solely because
+                         * the asserts in the macro where used exceed Win32's
+                         * literal string capacity */
+                        char first_char = * STRING(REGNODE_p(ret));
+
                         /* If 'maybe_exactfu' is clear, then we need to stay
                          * /di.  If it is set, it means there are no code
                          * points that match differently depending on UTF8ness
@@ -14500,7 +14940,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         if (! maybe_exactfu) {
                             RExC_seen_d_op = TRUE;
                         }
-                        else if (   isALPHA_FOLD_EQ(* STRING(REGNODE_p(ret)), 's')
+                        else if (   isALPHA_FOLD_EQ(first_char, 's')
                                  || isALPHA_FOLD_EQ(ender, 's'))
                         {
                             /* But, if the node begins or ends in an 's' we
@@ -14525,11 +14965,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 }
 
                 OP(REGNODE_p(ret)) = node_type;
-                STR_LEN(REGNODE_p(ret)) = len;
+                setSTR_LEN(REGNODE_p(ret), len);
                 RExC_emit += STR_SZ(len);
 
                 /* If the node isn't a single character, it can't be SIMPLE */
-                if (len > ((UTF) ? UVCHR_SKIP(ender) : 1)) {
+                if (len > (Size_t) ((UTF) ? UTF8SKIP(STRING(REGNODE_p(ret))) : 1)) {
                     maybe_SIMPLE = 0;
                 }
 
@@ -14581,7 +15021,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
     assert(PL_regkind[OP(node)] == ANYOF);
 
     /* There is no bitmap for this node type */
-    if (OP(node) == ANYOFH) {
+    if (inRANGE(OP(node), ANYOFH, ANYOFHr)) {
         return;
     }
 
@@ -15690,15 +16130,20 @@ redo_curchar:
                             RExC_parse = RExC_end;
                         }
                         else if (RExC_parse != save_parse) {
-                            RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                            RExC_parse += (UTF)
+                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                          : 1;
                         }
                         vFAIL("Expecting '(?flags:(?[...'");
                     }
 
                     /* Recurse, with the meat of the embedded expression */
                     RExC_parse++;
-                    (void) handle_regex_sets(pRExC_state, &current, flagp,
-                                                    depth+1, oregcomp_parse);
+                    if (! handle_regex_sets(pRExC_state, &current, flagp,
+                                                    depth+1, oregcomp_parse))
+                    {
+                        RETURN_FAIL_ON_RESTART(*flagp, flagp);
+                    }
 
                     /* Here, 'current' contains the embedded expression's
                      * inversion list, and RExC_parse points to the trailing
@@ -15752,8 +16197,8 @@ redo_curchar:
                               FALSE, /* Require return to be an ANYOF */
                               &current))
                 {
-                    FAIL2("panic: regclass returned failure to handle_sets, "
-                          "flags=%#" UVxf, (UV) *flagp);
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
+                    goto regclass_failed;
                 }
 
                 /* regclass() will return with parsing just the \ sequence,
@@ -15789,8 +16234,8 @@ redo_curchar:
                                 FALSE, /* Require return to be an ANYOF */
                                 &current))
                 {
-                    FAIL2("panic: regclass returned failure to handle_sets, "
-                          "flags=%#" UVxf, (UV) *flagp);
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
+                    goto regclass_failed;
                 }
 
                 if (! current) {
@@ -16150,9 +16595,10 @@ redo_curchar:
         RExC_flags |= RXf_PMf_FOLD;
     }
 
-    if (!node)
-        FAIL2("panic: regclass returned failure to handle_sets, flags=%#" UVxf,
-                    PTR2UV(flagp));
+    if (!node) {
+        RETURN_FAIL_ON_RESTART(*flagp, flagp);
+        goto regclass_failed;
+    }
 
     /* Fix up the node type if we are in locale.  (We have pretended we are
      * under /u for the purposes of regclass(), as this construct will only
@@ -16183,6 +16629,10 @@ redo_curchar:
     nextchar(pRExC_state);
     Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
     return node;
+
+  regclass_failed:
+    FAIL2("panic: regclass returned failure to handle_sets, " "flags=%#" UVxf,
+                                                                (UV) *flagp);
 }
 
 #ifdef ENABLE_REGEX_SETS_DEBUGGING
@@ -16433,7 +16883,7 @@ STATIC regnode_offset
 S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  const bool stop_at_1,  /* Just parse the next thing, don't
                                            look for a full character class */
-                 bool allow_multi_folds,
+                 bool allow_mutiple_chars,
                  const bool silence_non_portable,   /* Don't output warnings
                                                        about too large
                                                        characters */
@@ -16588,7 +17038,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 #if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
     || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0          \
                                    && UNICODE_DOT_DOT_VERSION == 0)
-    allow_multi_folds = FALSE;
+    allow_mutiple_chars = FALSE;
 #endif
 
     /* We include the /i status at the beginning of this so that we can
@@ -16604,7 +17054,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     if (UCHARAT(RExC_parse) == '^') {  /* Complement the class */
        RExC_parse++;
         invert = TRUE;
-        allow_multi_folds = FALSE;
+        allow_mutiple_chars = FALSE;
         MARK_NAUGHTY(1);
         SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
     }
@@ -16794,11 +17244,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                               "Ignoring zero length \\N{} in character class");
                         }
                         else { /* cp_count > 1 */
+                            assert(cp_count > 1);
                             if (! RExC_in_multi_char_class) {
-                                if (invert || range || *RExC_parse == '-') {
+                                if ( ! allow_mutiple_chars
+                                    || invert
+                                    || range
+                                    || *RExC_parse == '-')
+                                {
                                     if (strict) {
                                         RExC_parse--;
-                                        vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
+                                        vFAIL("\\N{} here is restricted to one character");
                                     }
                                     ckWARNreg(RExC_parse, "Using just the first character returned by \\N{} in character class");
                                     break; /* <value> contains the first code
@@ -16876,7 +17331,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                }   /* The \p isn't immediately followed by a '{' */
                else if (! isALPHA(*RExC_parse)) {
-                    RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                    RExC_parse += (UTF)
+                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                  : 1;
                     vFAIL2("Character following \\%c must be '{' or a "
                            "single-character Unicode property name",
                            (U8) value);
@@ -16898,6 +17355,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     SV * prop_definition = parse_uniprop_string(
                                             name, n, UTF, FOLD,
                                             FALSE, /* This is compile-time */
+
+                                            /* We can't defer this defn when
+                                             * the full result is required in
+                                             * this call */
+                                            ! cBOOL(ret_invlist),
+
                                             &user_defined,
                                             msg,
                                             0 /* Base level */
@@ -17045,7 +17508,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    RExC_parse += numlen;
                     if (numlen != 3) {
                         if (strict) {
-                            RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                            RExC_parse += (UTF)
+                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                          : 1;
                             vFAIL("Need exactly 3 octal digits");
                         }
                         else if (   numlen < 3 /* like \08, \178 */
@@ -17117,40 +17582,60 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             ) {
                 SV* scratch_list = NULL;
 
-                /* What the Posix classes (like \w, [:space:]) match in locale
-                 * isn't knowable under locale until actual match time.  A
+                /* What the Posix classes (like \w, [:space:]) match isn't
+                 * generally knowable under locale until actual match time.  A
                  * special node is used for these which has extra space for a
                  * bitmap, with a bit reserved for each named class that is to
-                 * be matched against.  This isn't needed for \p{} and
+                 * be matched against.  (This isn't needed for \p{} and
                  * pseudo-classes, as they are not affected by locale, and
-                 * hence are dealt with separately */
-                POSIXL_SET(posixl, namedclass);
-                has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
-                anyof_flags |= ANYOF_MATCHES_POSIXL;
-
-                /* The above-Latin1 characters are not subject to locale rules.
-                 * Just add them to the unconditionally-matched list */
-
-                /* Get the list of the above-Latin1 code points this matches */
-                _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
-                                        PL_XPosix_ptrs[classnum],
-
-                                        /* Odd numbers are complements, like
-                                        * NDIGIT, NASCII, ... */
-                                        namedclass % 2 != 0,
-                                        &scratch_list);
-                /* Checking if 'cp_list' is NULL first saves an extra clone.
-                 * Its reference count will be decremented at the next union,
-                 * etc, or if this is the only instance, at the end of the
-                 * routine */
-                if (! cp_list) {
-                    cp_list = scratch_list;
-                }
-                else {
-                    _invlist_union(cp_list, scratch_list, &cp_list);
-                    SvREFCNT_dec_NN(scratch_list);
+                 * hence are dealt with separately.)  However, if a named class
+                 * and its complement are both present, then it matches
+                 * everything, and there is no runtime dependency.  Odd numbers
+                 * are the complements of the next lower number, so xor works.
+                 * (Note that something like [\w\D] should match everything,
+                 * because \d should be a proper subset of \w.  But rather than
+                 * trust that the locale is well behaved, we leave this to
+                 * runtime to sort out) */
+                if (POSIXL_TEST(posixl, namedclass ^ 1)) {
+                    cp_list = _add_range_to_invlist(cp_list, 0, UV_MAX);
+                    POSIXL_ZERO(posixl);
+                    has_runtime_dependency &= ~HAS_L_RUNTIME_DEPENDENCY;
+                    anyof_flags &= ~ANYOF_MATCHES_POSIXL;
+                    continue;   /* We could ignore the rest of the class, but
+                                   best to parse it for any errors */
+                }
+                else { /* Here, isn't the complement of any already parsed
+                          class */
+                    POSIXL_SET(posixl, namedclass);
+                    has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
+                    anyof_flags |= ANYOF_MATCHES_POSIXL;
+
+                    /* The above-Latin1 characters are not subject to locale
+                     * rules.  Just add them to the unconditionally-matched
+                     * list */
+
+                    /* Get the list of the above-Latin1 code points this
+                     * matches */
+                    _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+                                            PL_XPosix_ptrs[classnum],
+
+                                            /* Odd numbers are complements,
+                                             * like NDIGIT, NASCII, ... */
+                                            namedclass % 2 != 0,
+                                            &scratch_list);
+                    /* Checking if 'cp_list' is NULL first saves an extra
+                     * clone.  Its reference count will be decremented at the
+                     * next union, etc, or if this is the only instance, at the
+                     * end of the routine */
+                    if (! cp_list) {
+                        cp_list = scratch_list;
+                    }
+                    else {
+                        _invlist_union(cp_list, scratch_list, &cp_list);
+                        SvREFCNT_dec_NN(scratch_list);
+                    }
+                    continue;   /* Go get next character */
                 }
-                continue;   /* Go get next character */
             }
             else {
 
@@ -17312,7 +17797,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          *  "ss"  =~ /^[^\xDF]+$/i => N
          *
          * See [perl #89750] */
-        if (FOLD && allow_multi_folds && value == prevvalue) {
+        if (FOLD && allow_mutiple_chars && value == prevvalue) {
             if (    value == LATIN_SMALL_LETTER_SHARP_S
                 || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
                                                         value)))
@@ -17486,7 +17971,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                            literal
                         );
                 }
-                else if isMNEMONIC_CNTRL(value) {
+                else if (isMNEMONIC_CNTRL(value)) {
                     vWARN4(RExC_parse,
                            "\"%.*s\" is more clearly written simply as \"%s\"",
                            (int) (RExC_parse - rangebegin),
@@ -18529,9 +19014,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     ret = regnode_guts(pRExC_state, op, len, "exact");
                     FILL_NODE(ret, op);
                     RExC_emit += 1 + STR_SZ(len);
-                    STR_LEN(REGNODE_p(ret)) = len;
+                    setSTR_LEN(REGNODE_p(ret), len);
                     if (len == 1) {
-                        *STRING(REGNODE_p(ret)) = value;
+                        *STRING(REGNODE_p(ret)) = (U8) value;
                     }
                     else {
                         uvchr_to_utf8((U8 *) STRING(REGNODE_p(ret)), value);
@@ -18617,7 +19102,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                     full_cp_count += this_end - this_start + 1;
                 }
-                invlist_iterfinish(cp_list);
 
                 /* At the end of the loop, we count how many bits differ from
                  * the bits in lowest code point, call the count 'd'.  If the
@@ -18646,8 +19130,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     ret = reganode(pRExC_state, op, lowest_cp);
                     FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
                 }
+
+              done_anyofm:
+                invlist_iterfinish(cp_list);
             }
-          done_anyofm:
 
             if (inverted) {
                 _invlist_invert(cp_list);
@@ -18656,6 +19142,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             if (op != END) {
                 goto not_anyof;
             }
+
+            /* XXX We could create an ANYOFR_LOW node here if we saved above if
+             * all were invariants, it wasn't inverted, and there is a single
+             * range.  This would be faster than some of the posix nodes we
+             * create below like /\d/a, but would be twice the size.  Without
+             * having actually measured the gain, khw doesn't think the
+             * tradeoff is really worth it */
         }
 
         if (! (anyof_flags & ANYOF_LOCALE_FLAGS)) {
@@ -18772,26 +19265,92 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             SvREFCNT_dec(intersection);
         }
 
-        /* If didn't find an optimization and there is no need for a
-        * bitmap, optimize to indicate that */
+        /* If didn't find an optimization and there is no need for a bitmap,
+         * optimize to indicate that */
         if (     start[0] >= NUM_ANYOF_CODE_POINTS
             && ! LOC
-            && ! upper_latin1_only_utf8_matches)
+            && ! upper_latin1_only_utf8_matches
+            &&   anyof_flags == 0)
         {
+            U8 low_utf8[UTF8_MAXBYTES+1];
+            UV highest_cp = invlist_highest(cp_list);
+
             op = ANYOFH;
+
+            /* Currently the maximum allowed code point by the system is
+             * IV_MAX.  Higher ones are reserved for future internal use.  This
+             * particular regnode can be used for higher ones, but we can't
+             * calculate the code point of those.  IV_MAX suffices though, as
+             * it will be a large first byte */
+            (void) uvchr_to_utf8(low_utf8, MIN(start[0], IV_MAX));
+
+            /* We store the lowest possible first byte of the UTF-8
+             * representation, using the flags field.  This allows for quick
+             * ruling out of some inputs without having to convert from UTF-8
+             * to code point.  For EBCDIC, this has to be I8. */
+            anyof_flags = NATIVE_UTF8_TO_I8(low_utf8[0]);
+
+            /* If the first UTF-8 start byte for the highest code point in the
+             * range is suitably small, we may be able to get an upper bound as
+             * well */
+            if (highest_cp <= IV_MAX) {
+                U8 high_utf8[UTF8_MAXBYTES+1];
+
+                (void) uvchr_to_utf8(high_utf8, highest_cp);
+
+                /* If the lowest and highest are the same, we can get an exact
+                 * first byte instead of a just minimum.  We signal this with a
+                 * different regnode */
+                if (low_utf8[0] == high_utf8[0]) {
+
+                    /* No need to convert to I8 for EBCDIC as this is an exact
+                     * match */
+                    anyof_flags = low_utf8[0];
+                    op = ANYOFHb;
+                }
+                else if (NATIVE_UTF8_TO_I8(high_utf8[0]) <= MAX_ANYOF_HRx_BYTE)
+                {
+
+                    /* Here, the high byte is not the same as the low, but is
+                     * small enough that its reasonable to have a loose upper
+                     * bound, which is packed in with the strict lower bound.
+                     * See comments at the definition of MAX_ANYOF_HRx_BYTE.
+                     * On EBCDIC platforms, I8 is used.  On ASCII platforms I8
+                     * is the same thing as UTF-8 */
+
+                    U8 bits = 0;
+                    U8 max_range_diff = MAX_ANYOF_HRx_BYTE - anyof_flags;
+                    U8 range_diff = NATIVE_UTF8_TO_I8(high_utf8[0])
+                                  - anyof_flags;
+
+                    if (range_diff <= max_range_diff / 8) {
+                        bits = 3;
+                    }
+                    else if (range_diff <= max_range_diff / 4) {
+                        bits = 2;
+                    }
+                    else if (range_diff <= max_range_diff / 2) {
+                        bits = 1;
+                    }
+                    anyof_flags = (anyof_flags - 0xC0) << 2 | bits;
+                    op = ANYOFHr;
+                }
+            }
+
+            goto done_finding_op;
         }
     }   /* End of seeing if can optimize it into a different node */
 
   is_anyof: /* It's going to be an ANYOF node. */
-    if (op != ANYOFH) {
-        op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
-             ? ANYOFD
-             : ((posixl)
-                ? ANYOFPOSIXL
-                : ((LOC)
-                   ? ANYOFL
-                   : ANYOF));
-    }
+    op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
+         ? ANYOFD
+         : ((posixl)
+            ? ANYOFPOSIXL
+            : ((LOC)
+               ? ANYOFL
+               : ANYOF));
+
+  done_finding_op:
 
     ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
     FILL_NODE(ret, op);        /* We set the argument later */
@@ -18978,6 +19537,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                                                            stored here for just
                                                            this occasion */
                             TRUE,           /* run time */
+                            FALSE,          /* This call must find the defn */
                             si,             /* The property definition  */
                             &user_defined,
                             msg,
@@ -19254,7 +19814,9 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
                || UTF8_IS_INVARIANT(*RExC_parse)
                || UTF8_IS_START(*RExC_parse));
 
-        RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse += (UTF)
+                      ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                      : 1;
 
         skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                 FALSE /* Don't force /x */ );
@@ -19264,8 +19826,9 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 STATIC void
 S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
 {
-    /* 'size' is the delta to add or subtract from the current memory allocated
-     * to the regex engine being constructed */
+    /* 'size' is the delta number of smallest regnode equivalents to add or
+     * subtract from the current memory allocated to the regex engine being
+     * constructed. */
 
     PERL_ARGS_ASSERT_CHANGE_ENGINE_SIZE;
 
@@ -19297,8 +19860,8 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
 STATIC regnode_offset
 S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
 {
-    /* Allocate a regnode for 'op', with 'extra_size' extra space.  It aligns
-     * and increments RExC_size and RExC_emit
+    /* Allocate a regnode for 'op', with 'extra_size' extra (smallest) regnode
+     * equivalents space.  It aligns and increments RExC_size and RExC_emit
      *
      * It returns the regnode's offset into the regex engine program */
 
@@ -19424,7 +19987,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
     src = REGNODE_p(RExC_emit);
     RExC_emit += size;
     dst = REGNODE_p(RExC_emit);
-    if (RExC_open_parens) {
+
+    /* If we are in a "count the parentheses" pass, the numbers are unreliable,
+     * and [perl #133871] shows this can lead to problems, so skip this
+     * realignment of parens until a later pass when they are reliable */
+    if (! IN_PARENS_PASS && RExC_open_parens) {
         int paren;
         /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
         /* remember that RExC_npar is rex->nparens + 1,
@@ -19497,10 +20064,13 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
 }
 
 /*
-- regtail - set the next-pointer at the end of a node chain of p to val.
+- regtail - set the next-pointer at the end of a node chain of p to val.  If
+            that value won't fit in the space available, instead returns FALSE.
+            (Except asserts if we can't fit in the largest space the regex
+            engine is designed for.)
 - SEE ALSO: regtail_study
 */
-STATIC void
+STATIC bool
 S_regtail(pTHX_ RExC_state_t * pRExC_state,
                 const regnode_offset p,
                 const regnode_offset val,
@@ -19533,11 +20103,21 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
     }
 
     if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert((UV) (val - scan) <= U32_MAX);
         ARG_SET(REGNODE_p(scan), val - scan);
     }
     else {
+        if (val - scan > U16_MAX) {
+            /* Populate this with something that won't loop and will likely
+             * lead to a crash if the caller ignores the failure return, and
+             * execution continues */
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
         NEXT_OFF(REGNODE_p(scan)) = val - scan;
     }
+
+    return TRUE;
 }
 
 #ifdef DEBUGGING
@@ -19554,10 +20134,14 @@ that it is purely analytical.
 Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
 to control which is which.
 
+This used to return a value that was ignored.  It was a problem that it is
+#ifdef'd to be another function that didn't return a value.  khw has changed it
+so both currently return a pass/fail return.
+
 */
 /* TODO: All four parms should be const */
 
-STATIC U8
+STATIC bool
 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
                       const regnode_offset val, U32 depth)
 {
@@ -19581,12 +20165,14 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
            bool unfolded_multi_char;   /* Unexamined in this routine */
             if (join_exact(pRExC_state, scan, &min,
                            &unfolded_multi_char, 1, REGNODE_p(val), depth+1))
-                return EXACT;
+                return TRUE; /* Was return EXACT */
        }
 #endif
         if ( exact ) {
             switch (OP(REGNODE_p(scan))) {
+                case LEXACT:
                 case EXACT:
+                case LEXACT_ONLY8:
                 case EXACT_ONLY8:
                 case EXACTL:
                 case EXACTF:
@@ -19631,13 +20217,21 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         );
     });
     if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+        assert((UV) (val - scan) <= U32_MAX);
        ARG_SET(REGNODE_p(scan), val - scan);
     }
     else {
+        if (val - scan > U16_MAX) {
+            /* Populate this with something that won't loop and will likely
+             * lead to a crash if the caller ignores the failure return, and
+             * execution continues */
+            NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+            return FALSE;
+        }
        NEXT_OFF(REGNODE_p(scan)) = val - scan;
     }
 
-    return exact;
+    return TRUE; /* Was 'return exact' */
 }
 #endif
 
@@ -19908,11 +20502,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 
     SvPVCLEAR(sv);
 
-    if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
-       /* It would be nice to FAIL() here, but this may be called from
-          regexec.c, and it would be hard to supply pRExC_state. */
-       Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
-                                              (int)OP(o), (int)REGNODE_MAX);
+    if (OP(o) > REGNODE_MAX) {          /* regnode.type is unsigned */
+        if (pRExC_state) {  /* This gives more info, if we have it */
+            FAIL3("panic: corrupted regexp opcode %d > %d",
+                  (int)OP(o), (int)REGNODE_MAX);
+        }
+        else {
+            Perl_croak(aTHX_ "panic: corrupted regexp opcode %d > %d",
+                             (int)OP(o), (int)REGNODE_MAX);
+        }
+    }
     sv_catpv(sv, PL_reg_name[OP(o)]); /* Take off const! */
 
     k = PL_regkind[OP(o)];
@@ -19995,7 +20594,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
             name_list= RExC_paren_name_list;
         }
         if (name_list) {
-            if ( k != REF || (OP(o) < NREF)) {
+            if ( k != REF || (OP(o) < REFN)) {
                 SV **name= av_fetch(name_list, parno, 0 );
                if (name)
                    Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
@@ -20049,7 +20648,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         /* 2: embedded, otherwise 1 */
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
     else if (k == ANYOF) {
-       const U8 flags = ANYOF_FLAGS(o);
+       const U8 flags = inRANGE(OP(o), ANYOFH, ANYOFHr)
+                          ? 0
+                          : ANYOF_FLAGS(o);
         bool do_sep = FALSE;    /* Do we need to separate various components of
                                    the output? */
         /* Set if there is still an unresolved user-defined property */
@@ -20105,7 +20706,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         /* Ready to start outputting.  First, the initial left bracket */
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
 
-        if (OP(o) != ANYOFH) {
+        if (! inRANGE(OP(o), ANYOFH, ANYOFHr)) {
             /* Then all the things that could fit in the bitmap */
             do_sep = put_charclass_bitmap_innards(sv,
                                                   ANYOF_BITMAP(o),
@@ -20203,6 +20804,22 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         /* And finally the matching, closing ']' */
        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
 
+        if (inRANGE(OP(o), ANYOFH, ANYOFHr)) {
+            U8 lowest = (OP(o) != ANYOFHr)
+                         ? FLAGS(o)
+                         : LOWEST_ANYOF_HRx_BYTE(FLAGS(o));
+            U8 highest = (OP(o) == ANYOFHb)
+                         ? lowest
+                         : OP(o) == ANYOFH
+                           ? 0xFF
+                           : HIGHEST_ANYOF_HRx_BYTE(FLAGS(o));
+            Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+            if (lowest != highest) {
+                Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+            }
+            Perl_sv_catpvf(aTHX_ sv, ")");
+        }
+
         SvREFCNT_dec(unresolved);
     }
     else if (k == ANYOFM) {
@@ -20245,8 +20862,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
         sv_catpv(sv, bounds[FLAGS(o)]);
     }
-    else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
-       Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
+    else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) {
+       Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
+        if (o->next_off) {
+            Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+        }
+       Perl_sv_catpvf(aTHX_ sv, "]");
+    }
     else if (OP(o) == SBOL)
         Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
 
@@ -20390,7 +21012,23 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
     if (!dsv)
        dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
+        assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV));
+
+        /* our only valid caller, sv_setsv_flags(), should have done
+         * a SV_CHECK_THINKFIRST_COW_DROP() by now */
+        assert(!SvOOK(dsv));
+        assert(!SvIsCOW(dsv));
+        assert(!SvROK(dsv));
+
+        if (SvPVX_const(dsv)) {
+            if (SvLEN(dsv))
+                Safefree(SvPVX(dsv));
+            SvPVX(dsv) = NULL;
+        }
+        SvLEN_set(dsv, 0);
+        SvCUR_set(dsv, 0);
        SvOK_off((SV *)dsv);
+
        if (islv) {
            /* For PVLVs, the head (sv_any) points to an XPVLV, while
              * the LV's xpvlenu_rx will point to a regexp body, which
@@ -20681,6 +21319,11 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
               2: something we no longer hold a reference on
               so we need to copy it locally.  */
     RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
+    /* set malloced length to a non-zero value so it will be freed
+     * (otherwise in combination with SVf_FAKE it looks like an alien
+     * buffer). It doesn't have to be the actual malloced size, since it
+     * should never be grown */
+    SvLEN_set(dstr, SvCUR(sstr)+1);
     ret->mother_re   = NULL;
 }
 #endif /* PERL_IN_XSUB_RE */
@@ -21105,9 +21748,14 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
 
         /* As a final resort, output the range or subrange as hex. */
 
-        this_end = (end < NUM_ANYOF_CODE_POINTS)
-                    ? end
-                    : NUM_ANYOF_CODE_POINTS - 1;
+        if (start >= NUM_ANYOF_CODE_POINTS) {
+            this_end = end;
+        }
+        else {
+            this_end = (end < NUM_ANYOF_CODE_POINTS)
+                        ? end
+                        : NUM_ANYOF_CODE_POINTS - 1;
+        }
 #if NUM_ANYOF_CODE_POINTS > 256
         format = (this_end < 256)
                  ? "\\x%02" UVXf "-\\x%02" UVXf
@@ -21841,6 +22489,8 @@ Perl_handle_user_defined_property(pTHX_
     const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
     const bool to_fold,         /* ? Is this under /i */
     const bool runtime,         /* ? Are we in compile- or run-time */
+    const bool deferrable,      /* Is it ok for this property's full definition
+                                   to be deferred until later? */
     SV* contents,               /* The property's definition */
     bool *user_defined_ptr,     /* This will be set TRUE as we wouldn't be
                                    getting called unless this is thought to be
@@ -21918,7 +22568,7 @@ Perl_handle_user_defined_property(pTHX_
                 Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
                                      UTF8fARG(is_contents_utf8, s - s0, s0));
                 sv_catpvs(msg, "\"");
-                goto return_msg;
+                goto return_failure;
             }
 
             /* Accumulate this digit into the value */
@@ -21953,7 +22603,7 @@ Perl_handle_user_defined_property(pTHX_
                     Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
                                       UTF8fARG(is_contents_utf8, s - s0, s0));
                     sv_catpvs(msg, "\"");
-                    goto return_msg;
+                    goto return_failure;
                 }
 
                 max = (max << 4) + READ_XDIGIT(s);
@@ -21981,7 +22631,7 @@ Perl_handle_user_defined_property(pTHX_
             Perl_sv_catpvf(aTHX_ msg, "%" UTF8f,
                                 UTF8fARG(is_contents_utf8, s - s0, s0));
             sv_catpvs(msg, "\"");
-            goto return_msg;
+            goto return_failure;
         }
 
 #if 0   /* See explanation at definition above of get_extended_utf8_msg() */
@@ -22028,6 +22678,7 @@ Perl_handle_user_defined_property(pTHX_
 
         this_definition = parse_uniprop_string(s0, s - s0,
                                                is_utf8, to_fold, runtime,
+                                               deferrable,
                                                user_defined_ptr, msg,
                                                (name_len == 0)
                                                 ? level /* Don't increase level
@@ -22035,8 +22686,8 @@ Perl_handle_user_defined_property(pTHX_
                                                 : level + 1
                                               );
         if (this_definition == NULL) {
-            goto return_msg;    /* 'msg' should have had the reason appended to
-                                   it by the above call */
+            goto return_failure;    /* 'msg' should have had the reason
+                                       appended to it by the above call */
         }
 
         if (! is_invlist(this_definition)) {    /* Unknown at this time */
@@ -22093,6 +22744,10 @@ Perl_handle_user_defined_property(pTHX_
     }
 
     /* Otherwise, add some explanatory text, but we will return success */
+    goto return_msg;
+
+  return_failure:
+    running_definition = NULL;
 
   return_msg:
 
@@ -22151,6 +22806,38 @@ S_delete_recursion_entry(pTHX_ void *key)
     RESTORE_CONTEXT;
 }
 
+STATIC SV *
+S_get_fq_name(pTHX_
+              const char * const name,    /* The first non-blank in the \p{}, \P{} */
+              const Size_t name_len,      /* Its length in bytes, not including any trailing space */
+              const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
+              const bool has_colon_colon
+             )
+{
+    /* Returns a mortal SV containing the fully qualified version of the input
+     * name */
+
+    SV * fq_name;
+
+    fq_name = newSVpvs_flags("", SVs_TEMP);
+
+    /* Use the current package if it wasn't included in our input */
+    if (! has_colon_colon) {
+        const HV * pkg = (IN_PERL_COMPILETIME)
+                         ? PL_curstash
+                         : CopSTASH(PL_curcop);
+        const char* pkgname = HvNAME(pkg);
+
+        Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+        sv_catpvs(fq_name, "::");
+    }
+
+    Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                         UTF8fARG(is_utf8, name_len, name));
+    return fq_name;
+}
+
 SV *
 Perl_parse_uniprop_string(pTHX_
 
@@ -22179,6 +22866,8 @@ Perl_parse_uniprop_string(pTHX_
     const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
     const bool to_fold,         /* ? Is this under /i */
     const bool runtime,         /* TRUE if this is being called at run time */
+    const bool deferrable,      /* TRUE if it's ok for the definition to not be
+                                   known at this call */
     bool *user_defined_ptr,     /* Upon return from this function it will be
                                    set to TRUE if any component is a
                                    user-defined property */
@@ -22203,8 +22892,7 @@ Perl_parse_uniprop_string(pTHX_
     int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
     int table_index = 0;    /* The entry number for this property in the table
                                of all Unicode property names */
-    bool starts_with_In_or_Is = FALSE;  /* ? Does the name start with 'In' or
-                                             'Is' */
+    bool starts_with_Is = FALSE;  /* ? Does the name start with 'Is' */
     Size_t lookup_offset = 0;   /* Used to ignore the first few characters of
                                    the normalized name in certain situations */
     Size_t non_pkg_begin = 0;   /* Offset of first byte in 'name' that isn't
@@ -22217,6 +22905,8 @@ Perl_parse_uniprop_string(pTHX_
                                      it is the definition.  Otherwise it is a
                                      string containing the fully qualified sub
                                      name of 'name' */
+    SV * fq_name = NULL;        /* For user-defined properties, the fully
+                                   qualified name */
     bool invert_return = FALSE; /* ? Do we need to complement the result before
                                      returning it */
 
@@ -22363,7 +23053,8 @@ Perl_parse_uniprop_string(pTHX_
                 pos_in_brackets = strchr("([<)]>)]>", open);
                 close = (pos_in_brackets) ? pos_in_brackets[3] : open;
 
-                if (   name[name_len-1] != close
+                if (    i >= name_len
+                    ||  name[name_len-1] != close
                     || (escaped && name[name_len-2] != '\\'))
                 {
                     sv_catpvs(msg, "Unicode property wildcard not terminated");
@@ -22419,6 +23110,7 @@ Perl_parse_uniprop_string(pTHX_
                                                            is_utf8,
                                                            to_fold,
                                                            runtime,
+                                                           deferrable,
                                                            user_defined_ptr,
                                                            msg,
                                                            level + 1);
@@ -22490,7 +23182,7 @@ Perl_parse_uniprop_string(pTHX_
         /* Certain properties whose values are numeric need special handling.
          * They may optionally be prefixed by 'is'.  Ignore that prefix for the
          * purposes of checking if this is one of those properties */
-        if (memBEGINPs(lookup_name, name_len, "is")) {
+        if (memBEGINPs(lookup_name, j, "is")) {
             lookup_offset = 2;
         }
 
@@ -22656,7 +23348,9 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Store the first real character in the denominator */
-            lookup_name[j++] = name[i];
+            if (i < name_len) {
+                lookup_name[j++] = name[i];
+            }
         }
     }
 
@@ -22674,11 +23368,15 @@ Perl_parse_uniprop_string(pTHX_
 
     /* If the original input began with 'In' or 'Is', it could be a subroutine
      * call to a user-defined property instead of a Unicode property name. */
-    if (    non_pkg_begin + name_len > 2
+    if (    name_len - non_pkg_begin > 2
         &&  name[non_pkg_begin+0] == 'I'
         && (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
     {
-        starts_with_In_or_Is = TRUE;
+        /* Names that start with In have different characterstics than those
+         * that start with Is */
+        if (name[non_pkg_begin+1] == 's') {
+            starts_with_Is = TRUE;
+        }
     }
     else {
         could_be_user_defined = FALSE;
@@ -22687,20 +23385,28 @@ Perl_parse_uniprop_string(pTHX_
     if (could_be_user_defined) {
         CV* user_sub;
 
+        /* If the user defined property returns the empty string, it could
+         * easily be because the pattern is being compiled before the data it
+         * actually needs to compile is available.  This could be argued to be
+         * a bug in the perl code, but this is a change of behavior for Perl,
+         * so we handle it.  This means that intentionally returning nothing
+         * will not be resolved until runtime */
+        bool empty_return = FALSE;
+
         /* Here, the name could be for a user defined property, which are
          * implemented as subs. */
         user_sub = get_cvn_flags(name, name_len, 0);
         if (user_sub) {
+            const char insecure[] = "Insecure user-defined property";
 
             /* Here, there is a sub by the correct name.  Normally we call it
              * to get the property definition */
             dSP;
             SV * user_sub_sv = MUTABLE_SV(user_sub);
             SV * error;     /* Any error returned by calling 'user_sub' */
-            SV * fq_name;   /* Fully qualified property name */
+            SV * key;       /* The key into the hash of user defined sub names
+                             */
             SV * placeholder;
-            char to_fold_string[] = "0:";   /* The 0 gets overwritten with the
-                                               actual value */
             SV ** saved_user_prop_ptr;      /* Hash entry for this property */
 
             /* How many times to retry when another thread is in the middle of
@@ -22712,11 +23418,11 @@ Perl_parse_uniprop_string(pTHX_
             /* If we get here, we know this property is user-defined */
             *user_defined_ptr = TRUE;
 
-            /* We refuse to call a tainted subroutine; returning an error
-             * instead */
+            /* We refuse to call a potentially tainted subroutine; returning an
+             * error instead */
             if (TAINT_get) {
                 if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Insecure user-defined property");
+                sv_catpvn(msg, insecure, sizeof(insecure) - 1);
                 goto append_name_to_msg;
             }
 
@@ -22730,14 +23436,13 @@ Perl_parse_uniprop_string(pTHX_
              * should the need arise, passing the /i status as a parameter.
              *
              * We start by constructing the hash key name, consisting of the
-             * fully qualified subroutine name */
-            fq_name = sv_2mortal(newSV(10));    /* 10 is just a guess */
-            (void) cv_name(user_sub, fq_name, 0);
-
-            /* But precede the sub name in the key with the /i status, so that
-             * there is a key for /i and a different key for non-/i */
-            to_fold_string[0] = to_fold + '0';
-            sv_insert(fq_name, 0, 0, to_fold_string, 2);
+             * fully qualified subroutine name, preceded by the /i status, so
+             * that there is a key for /i and a different key for non-/i */
+            key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+            fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                          non_pkg_begin != 0);
+            sv_catsv(key, fq_name);
+            sv_2mortal(key);
 
             /* We only call the sub once throughout the life of the program
              * (with the /i, non-/i exception noted above).  That means the
@@ -22787,7 +23492,7 @@ Perl_parse_uniprop_string(pTHX_
             /* If we have an entry for this key, the subroutine has already
              * been called once with this /i status. */
             saved_user_prop_ptr = hv_fetch(PL_user_def_props,
-                                           SvPVX(fq_name), SvCUR(fq_name), 0);
+                                                   SvPVX(key), SvCUR(key), 0);
             if (saved_user_prop_ptr) {
 
                 /* If the saved result is an inversion list, it is the valid
@@ -22855,13 +23560,14 @@ Perl_parse_uniprop_string(pTHX_
              * for this property in the hash.  So we have the go ahead to
              * expand the definition ourselves. */
 
+            PUSHSTACKi(PERLSI_MAGIC);
             ENTER;
 
             /* Create a temporary placeholder in the hash to detect recursion
              * */
             SWITCH_TO_GLOBAL_CONTEXT;
             placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
-            (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+            (void) hv_store_ent(PL_user_def_props, key, placeholder, 0);
             RESTORE_CONTEXT;
 
             /* Now that we have a placeholder, we can let other threads
@@ -22869,7 +23575,7 @@ Perl_parse_uniprop_string(pTHX_
             USER_PROP_MUTEX_UNLOCK;
 
             /* Make sure the placeholder always gets destroyed */
-            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(key));
 
             PUSHMARK(SP);
             SAVETMPS;
@@ -22880,16 +23586,33 @@ Perl_parse_uniprop_string(pTHX_
             XPUSHs(boolSV(to_fold));
             PUTBACK;
 
+            /* The following block was taken from swash_init().  Presumably
+             * they apply to here as well, though we no longer use a swash --
+             * khw */
+            SAVEHINTS();
+            save_re_context();
+            /* We might get here via a subroutine signature which uses a utf8
+             * parameter name, at which point PL_subname will have been set
+             * but not yet used. */
+            save_item(PL_subname);
+
             (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
 
             SPAGAIN;
 
             error = ERRSV;
-            if (SvTRUE(error)) {
+            if (TAINT_get || SvTRUE(error)) {
                 if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Error \"");
-                sv_catsv(msg, error);
-                sv_catpvs(msg, "\"");
+                if (SvTRUE(error)) {
+                    sv_catpvs(msg, "Error \"");
+                    sv_catsv(msg, error);
+                    sv_catpvs(msg, "\"");
+                }
+                if (TAINT_get) {
+                    if (SvTRUE(error)) sv_catpvs(msg, "; ");
+                    sv_catpvn(msg, insecure, sizeof(insecure) - 1);
+                }
+
                 if (name_len > 0) {
                     sv_catpvs(msg, " in expansion of ");
                     Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8,
@@ -22901,32 +23624,47 @@ Perl_parse_uniprop_string(pTHX_
                 prop_definition = NULL;
             }
             else {  /* G_SCALAR guarantees a single return value */
+                SV * contents = POPs;
 
                 /* The contents is supposed to be the expansion of the property
-                 * definition.  Call a function to check for valid syntax and
-                 * handle it */
-                prop_definition = handle_user_defined_property(name, name_len,
+                 * definition.  If the definition is deferrable, and we got an
+                 * empty string back, set a flag to later defer it (after clean
+                 * up below). */
+                if (      deferrable
+                    && (! SvPOK(contents) || SvCUR(contents) == 0))
+                {
+                        empty_return = TRUE;
+                }
+                else { /* Otherwise, call a function to check for valid syntax,
+                          and handle it */
+
+                    prop_definition = handle_user_defined_property(
+                                                    name, name_len,
                                                     is_utf8, to_fold, runtime,
-                                                    POPs, user_defined_ptr,
+                                                    deferrable,
+                                                    contents, user_defined_ptr,
                                                     msg,
                                                     level);
+                }
             }
 
-            /* Here, we have the results of the expansion.  Replace the
-             * placeholder with them.  We need exclusive access to the hash,
-             * and we can't let anyone else in, between when we delete the
-             * placeholder and add the permanent entry */
+            /* Here, we have the results of the expansion.  Delete the
+             * placeholder, and if the definition is now known, replace it with
+             * that definition.  We need exclusive access to the hash, and we
+             * can't let anyone else in, between when we delete the placeholder
+             * and add the permanent entry */
             USER_PROP_MUTEX_LOCK;
 
-            S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
-
-            if (! prop_definition || is_invlist(prop_definition)) {
+            S_delete_recursion_entry(aTHX_ SvPVX(key));
 
+            if (    ! empty_return
+                && (! prop_definition || is_invlist(prop_definition)))
+            {
                 /* If we got success we use the inversion list defining the
                  * property; otherwise use the error message */
                 SWITCH_TO_GLOBAL_CONTEXT;
                 (void) hv_store_ent(PL_user_def_props,
-                                    fq_name,
+                                    key,
                                     ((prop_definition)
                                      ? newSVsv(prop_definition)
                                      : newSVsv(msg)),
@@ -22940,6 +23678,11 @@ Perl_parse_uniprop_string(pTHX_
 
             FREETMPS;
             LEAVE;
+            POPSTACK;
+
+            if (empty_return) {
+                goto definition_deferred;
+            }
 
             if (prop_definition) {
 
@@ -22972,8 +23715,11 @@ Perl_parse_uniprop_string(pTHX_
     /* If it didn't find the property ... */
     if (table_index == 0) {
 
-        /* Try again stripping off any initial 'In' or 'Is' */
-        if (starts_with_In_or_Is) {
+        /* Try again stripping off any initial 'Is'.  This is because we
+         * promise that an initial Is is optional.  The same isn't true of
+         * names that start with 'In'.  Those can match only blocks, and the
+         * lookup table already has those accounted for. */
+        if (starts_with_Is) {
             lookup_name += 2;
             lookup_len -= 2;
             equals_pos -= 2;
@@ -22996,7 +23742,7 @@ Perl_parse_uniprop_string(pTHX_
                  * compile time, it might just be that the subroutine for that
                  * property hasn't been encountered yet, but at runtime, it's
                  * an error to try to use an undefined one */
-                if (runtime) {
+                if (! deferrable) {
                     if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
                     sv_catpvs(msg, "Unknown user-defined property name");
                     goto append_name_to_msg;
@@ -23016,10 +23762,12 @@ Perl_parse_uniprop_string(pTHX_
                  * NV. */
 
                 NV value;
+                SSize_t value_len = lookup_len - equals_pos;
 
                 /* Get the value */
-                if (my_atof3(lookup_name + equals_pos, &value,
-                             lookup_len - equals_pos)
+                if (   value_len <= 0
+                    || my_atof3(lookup_name + equals_pos, &value,
+                                value_len)
                           != lookup_name + lookup_len)
                 {
                     goto failed;
@@ -23229,6 +23977,7 @@ Perl_parse_uniprop_string(pTHX_
                                                           0, /* Not UTF-8 */
                                                           0, /* Not folded */
                                                           runtime,
+                                                          deferrable,
                                                           pu_definition,
                                                           &dummy,
                                                           msg,
@@ -23284,28 +24033,17 @@ Perl_parse_uniprop_string(pTHX_
   definition_deferred:
 
     /* Here it could yet to be defined, so defer evaluation of this
-     * until its needed at runtime. */
-    prop_definition = newSVpvs_flags("", SVs_TEMP);
-
-    /* To avoid any ambiguity, the package is always specified.
-     * Use the current one if it wasn't included in our input */
-    if (non_pkg_begin == 0) {
-        const HV * pkg = (IN_PERL_COMPILETIME)
-                         ? PL_curstash
-                         : CopSTASH(PL_curcop);
-        const char* pkgname = HvNAME(pkg);
-
-        Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
-        sv_catpvs(prop_definition, "::");
+     * until its needed at runtime.  We need the fully qualified property name
+     * to avoid ambiguity, and a trailing newline */
+    if (! fq_name) {
+        fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                      non_pkg_begin != 0 /* If has "::" */
+                               );
     }
-
-    Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                         UTF8fARG(is_utf8, name_len, name));
-    sv_catpvs(prop_definition, "\n");
+    sv_catpvs(fq_name, "\n");
 
     *user_defined_ptr = TRUE;
-    return prop_definition;
+    return fq_name;
 }
 
 #endif