regcomp.c: Remove never used struct element

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 6aa9c62..50f823f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -82,7 +82,6 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #  include "regcomp.h"
  #endif
  
-#include "dquote_inline.h"
  #include "invlist_inline.h"
  #include "unicode_constants.h"
  
@@ -114,7 +113,7 @@ typedef struct scan_frame {
  
  /* Certain characters are output as a sequence with the first being a
   * backslash. */
-#define isBACKSLASHED_PUNCT(c)  strchr("-[]\\^", c)
+#define isBACKSLASHED_PUNCT(c)  memCHRs("-[]\\^", c)
  
  
  struct RExC_state_t {
@@ -143,6 +142,8 @@ struct RExC_state_t {
      U32                seen;
      SSize_t    size;                   /* Number of regnode equivalents in
                                             pattern */
+    Size_t      sets_depth;              /* Counts recursion depth of already-
+                                           compiled regex set patterns */
  
      /* position beyond 'precomp' of the warning message furthest away from
       * 'precomp'.  During the parse, no warnings are raised for any problems
@@ -196,10 +197,6 @@ struct RExC_state_t {
      U32         frame_count;
      AV         *warn_text;
      HV         *unlexed_names;
-#ifdef ADD_TO_REGEXEC
-    char       *starttry;              /* -Dr: where regtry was called. */
-#define RExC_starttry  (pRExC_state->starttry)
-#endif
      SV         *runtime_code_qr;       /* qr with the runtime code blocks */
  #ifdef DEBUGGING
      const char  *lastparse;
@@ -223,6 +220,8 @@ struct RExC_state_t {
      bool        study_started;
      bool        in_script_run;
      bool        use_BRANCHJ;
+    bool        sWARN_EXPERIMENTAL__VLB;
+    bool        sWARN_EXPERIMENTAL__REGEX_SETS;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -267,6 +266,7 @@ struct RExC_state_t {
  #define RExC_paren_names       (pRExC_state->paren_names)
  #define RExC_recurse   (pRExC_state->recurse)
  #define RExC_recurse_count     (pRExC_state->recurse_count)
+#define RExC_sets_depth         (pRExC_state->sets_depth)
  #define RExC_study_chunk_recursed        (pRExC_state->study_chunk_recursed)
  #define RExC_study_chunk_recursed_bytes  \
                                     (pRExC_state->study_chunk_recursed_bytes)
@@ -291,6 +291,8 @@ struct RExC_state_t {
  #define RExC_warn_text (pRExC_state->warn_text)
  #define RExC_in_script_run      (pRExC_state->in_script_run)
  #define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
+#define RExC_warned_WARN_EXPERIMENTAL__VLB (pRExC_state->sWARN_EXPERIMENTAL__VLB)
+#define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
  #define RExC_unlexed_names (pRExC_state->unlexed_names)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
@@ -343,9 +345,14 @@ struct RExC_state_t {
  
  #define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
  #define PBITVAL(paren) (1 << ((paren) & 7))
-#define PAREN_TEST(u8str,paren) ( PBYTE(u8str,paren) & PBITVAL(paren))
-#define PAREN_SET(u8str,paren) PBYTE(u8str,paren) |= PBITVAL(paren)
-#define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
+#define PAREN_OFFSET(depth) \
+    (RExC_study_chunk_recursed + (depth) * RExC_study_chunk_recursed_bytes)
+#define PAREN_TEST(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) & PBITVAL(paren))
+#define PAREN_SET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) |= PBITVAL(paren))
+#define PAREN_UNSET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) &= ~PBITVAL(paren))
  
  #define REQUIRE_UTF8(flagp) STMT_START {                                   \
                                       if (!UTF) {                           \
@@ -426,13 +433,16 @@ struct RExC_state_t {
  #define _invlist_intersection_complement_2nd(a, b, output) \
                   _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
  
-/* We add a marker if we are deferring expansion of a potential user-defined
- * property until it is needed at runtime the first time it is encountered in a
- * pattern match.  This marker that shouldn't conflict with any that could be
- * in a legal name is appended to its name to indicate this.  There is a string
- * and character form */
-#define DEFERRED_PROP_EXPANSION_MARKERs  "~"
-#define DEFERRED_PROP_EXPANSION_MARKERc  '~'
+/* We add a marker if we are deferring expansion of a property that is both
+ * 1) potentiallly user-defined; and
+ * 2) could also be an official Unicode property.
+ *
+ * Without this marker, any deferred expansion can only be for a user-defined
+ * one.  This marker shouldn't conflict with any that could be in a legal name,
+ * and is appended to its name to indicate this.  There is a string and
+ * character form */
+#define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
+#define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
  
  /* About scan_data_t.
  
@@ -880,11 +890,27 @@ static const scan_data_t zero_scan_data = {
      } STMT_END
  
  /* m is not necessarily a "literal string", in this macro */
-#define reg_warn_non_literal_string(loc, m)                             \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
+#define warn_non_literal_string(loc, packed_warn, m)                    \
+    _WARN_HELPER(loc, packed_warn,                                      \
+                      Perl_warner(aTHX_ packed_warn,                    \
                                         "%s" REPORT_LOCATION,            \
                                    m, REPORT_LOCATION_ARGS(loc)))
+#define reg_warn_non_literal_string(loc, m)                             \
+                warn_non_literal_string(loc, packWARN(WARN_REGEXP), m)
+
+#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
+    STMT_START {                                                            \
+                char * format;                                              \
+                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
+                Newx(format, format_size, char);                            \
+                my_strlcpy(format, m, format_size);                         \
+                my_strlcat(format, REPORT_LOCATION, format_size);           \
+                SAVEFREEPV(format);                                         \
+                _WARN_HELPER(loc, packwarn,                                 \
+                      Perl_ck_warner(aTHX_ packwarn,                        \
+                                        format,                             \
+                                        a1, REPORT_LOCATION_ARGS(loc)));    \
+    } STMT_END
  
  #define        ckWARNreg(loc,m)                                                \
      _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
@@ -964,10 +990,15 @@ static const scan_data_t zero_scan_data = {
                                         REPORT_LOCATION_ARGS(loc)))
  
  #define        ckWARNexperimental(loc, class, m)                               \
-    _WARN_HELPER(loc, packWARN(class),                                  \
+    STMT_START {                                                        \
+        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
+            RExC_warned_ ## class = 1;                                  \
+            _WARN_HELPER(loc, packWARN(class),                          \
                        Perl_ck_warner_d(aTHX_ packWARN(class),           \
                                              m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)))
+                                            REPORT_LOCATION_ARGS(loc)));\
+        }                                                               \
+    } STMT_END
  
  /* Convert between a pointer to a node and its offset from the beginning of the
   * program */
@@ -1376,29 +1407,6 @@ S_edit_distance(const UV* src,
  /* END of edit_distance() stuff
   * ========================================================= */
  
-/* is c a control character for which we have a mnemonic? */
-#define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-
-STATIC const char *
-S_cntrl_to_mnemonic(const U8 c)
-{
-    /* Returns the mnemonic string that represents character 'c', if one
-     * exists; NULL otherwise.  The only ones that exist for the purposes of
-     * this routine are a few control characters */
-
-    switch (c) {
-        case '\a':       return "\\a";
-        case '\b':       return "\\b";
-        case ESC_NATIVE: return "\\e";
-        case '\f':       return "\\f";
-        case '\n':       return "\\n";
-        case '\r':       return "\\r";
-        case '\t':       return "\\t";
-    }
-
-    return NULL;
-}
-
  /* Mark that we cannot extend a found fixed substring at this point.
     Update the longest found anchored substring or the longest found
     floating substrings if needed. */
@@ -2794,7 +2802,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
          if (OP(noper) == NOTHING) {
              /* skip past a NOTHING at the start of an alternation
               * eg, /(?:)a|(?:b)/ should be the same as /a|b/
+             *
+             * If the next node is not something we are supposed to process
+             * we will just ignore it due to the condition guarding the
+             * next block.
               */
+
              regnode *noper_next= regnext(noper);
              if (noper_next < tail)
                  noper= noper_next;
@@ -3016,6 +3029,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  regnode *noper_next= regnext(noper);
                  if (noper_next < tail)
                      noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
              }
  
              if (    noper < tail
@@ -3077,7 +3093,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                     }
                 }
-           }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD()
+                */
+                noper= NEXTOPER(cur);
+            }
              TRIE_HANDLE_WORD(state);
  
          } /* end second pass */
@@ -3241,6 +3263,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                  regnode *noper_next= regnext(noper);
                  if (noper_next < tail)
                      noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
              }
  
              if (    noper < tail
@@ -3281,6 +3306,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                      /* charid is now 0 if we dont know the char read, or
                       * nonzero if we do */
                  }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD().
+                */
+                noper= NEXTOPER(cur);
              }
              accept_state = TRIE_NODENUM( state );
              TRIE_HANDLE_WORD(accept_state);
@@ -3563,7 +3594,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                          str=STRING(convert);
                          setSTR_LEN(convert, 0);
                      }
-                    setSTR_LEN(convert, STR_LEN(convert) + len);
+                    assert( ( STR_LEN(convert) + len ) < 256 );
+                    setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
                      while (len--)
                          *str++ = *ch++;
                 } else {
@@ -3577,7 +3609,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
             trie->prefixlen = (state-1);
              if (str) {
                  regnode *n = convert+NODE_SZ_STR(convert);
-                NEXT_OFF(convert) = NODE_SZ_STR(convert);
+                assert( NODE_SZ_STR(convert) <= U16_MAX );
+                NEXT_OFF(convert) = (U16)(NODE_SZ_STR(convert));
                  trie->startstate = state;
                  trie->minlen -= (state - 1);
                  trie->maxlen -= (state - 1);
@@ -4003,11 +4036,6 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   *      using /iaa matching will be doing so almost entirely with ASCII
   *      strings, so this should rarely be encountered in practice */
  
-#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags)    \
-    if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT         \
-                                      && OP(scan) != LEXACT_REQ8)  \
-        join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags), NULL, depth+1)
-
  STATIC U32
  S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     UV *min_subtract, bool *unfolded_multi_char,
@@ -4171,7 +4199,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
              merged++;
  
              NEXT_OFF(scan) += NEXT_OFF(n);
-            setSTR_LEN(scan, STR_LEN(scan) + STR_LEN(n));
+            assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
+            setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
              next = n + NODE_SZ_STR(n);
              /* Now we can overwrite *n : */
              Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
@@ -4396,23 +4425,6 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             }
  #endif
         }
-
-        if (     STR_LEN(scan) == 1
-            &&   isALPHA_A(* STRING(scan))
-            &&  (         OP(scan) == EXACTFAA
-                 || (     OP(scan) == EXACTFU
-                     && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(scan)))))
-        {
-            U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
-
-            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
-             * with the mask set to the complement of the bit that differs
-             * between upper and lower case, and the lowest code point of the
-             * pair (which the '&' forces) */
-            OP(scan) = ANYOFM;
-            ARG_SET(scan, *STRING(scan) & mask);
-            FLAGS(scan) = mask;
-        }
      }
  
  #ifdef DEBUGGING
@@ -4472,6 +4484,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
  {
      dVAR;
+    SSize_t final_minlen;
      /* There must be at least this number of characters to match */
      SSize_t min = 0;
      I32 pars = 0, code;
@@ -4515,15 +4528,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
              U32 j;
              for ( j = 0 ; j < recursed_depth ; j++ ) {
                  for ( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
-                    if (
-                        PAREN_TEST(RExC_study_chunk_recursed +
-                                   ( j * RExC_study_chunk_recursed_bytes), i )
-                        && (
-                            !j ||
-                            !PAREN_TEST(RExC_study_chunk_recursed +
-                                   (( j - 1 ) * RExC_study_chunk_recursed_bytes), i)
-                        )
-                    ) {
+                    if (PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
                          Perl_re_printf( aTHX_ " %d",(int)i);
                          break;
                      }
@@ -4551,7 +4556,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
           * parsing code, as each (?:..) is handled by a different invocation of
           * reg() -- Yves
           */
-        JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
+        if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT
+                                          && OP(scan) != LEXACT_REQ8)
+            join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
+                    0, NULL, depth + 1);
  
          /* Follow the next-chain of the current node and optimize
             away all the NOTHINGs from it.  */
@@ -5125,8 +5133,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                  if (
                      !recursed_depth
-                    ||
-                    !PAREN_TEST(RExC_study_chunk_recursed + ((recursed_depth-1) * RExC_study_chunk_recursed_bytes), paren)
+                    || !PAREN_TEST(recursed_depth - 1, paren)
                  ) {
                      /* it is quite possible that there are more efficient ways
                       * to do this. We maintain a bitmap per level of recursion
@@ -5141,13 +5148,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      if (!recursed_depth) {
                          Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
                      } else {
-                        Copy(RExC_study_chunk_recursed + ((recursed_depth-1) * RExC_study_chunk_recursed_bytes),
-                             RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes),
+                        Copy(PAREN_OFFSET(recursed_depth - 1),
+                             PAREN_OFFSET(recursed_depth),
                               RExC_study_chunk_recursed_bytes, U8);
                      }
                      /* we havent recursed into this paren yet, so recurse into it */
                      DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
-                    PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
+                    PAREN_SET(recursed_depth, paren);
                      my_recursed_depth= recursed_depth + 1;
                  } else {
                      DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
@@ -5212,17 +5219,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                   || OP(scan) == LEXACT_REQ8
                   || OP(scan) == EXACTL)
          {
-           SSize_t l = STR_LEN(scan);
+           SSize_t bytelen = STR_LEN(scan), charlen;
             UV uc;
-            assert(l);
+            assert(bytelen);
             if (UTF) {
                 const U8 * const s = (U8*)STRING(scan);
-               uc = utf8_to_uvchr_buf(s, s + l, NULL);
-               l = utf8_length(s, s + l);
+               uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+               charlen = utf8_length(s, s + bytelen);
             } else {
                 uc = *((U8*)STRING(scan));
+                charlen = bytelen;
             }
-           min += l;
+           min += charlen;
             if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
                 /* The code below prefers earlier match for fixed
                    offset, later match for variable offset.  */
@@ -5231,7 +5239,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     data->last_start_max = is_inf
                         ? SSize_t_MAX : data->pos_min + data->pos_delta;
                 }
-               sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+               sv_catpvn(data->last_found, STRING(scan), bytelen);
                 if (UTF)
                     SvUTF8_on(data->last_found);
                 {
@@ -5239,11 +5247,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
                         mg_find(sv, PERL_MAGIC_utf8) : NULL;
                     if (mg && mg->mg_len >= 0)
-                       mg->mg_len += utf8_length((U8*)STRING(scan),
-                                              (U8*)STRING(scan)+STR_LEN(scan));
+                       mg->mg_len += charlen;
                 }
-               data->last_end = data->pos_min + l;
-               data->pos_min += l; /* As in the first entry. */
+               data->last_end = data->pos_min + charlen;
+               data->pos_min += charlen; /* As in the first entry. */
                 data->flags &= ~SF_BEFORE_EOL;
             }
  
@@ -5265,25 +5272,42 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
         }
          else if (PL_regkind[OP(scan)] == EXACT) {
              /* But OP != EXACT!, so is EXACTFish */
-           SSize_t l = STR_LEN(scan);
+           SSize_t bytelen = STR_LEN(scan), charlen;
              const U8 * s = (U8*)STRING(scan);
  
+            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
+             * with the mask set to the complement of the bit that differs
+             * between upper and lower case, and the lowest code point of the
+             * pair (which the '&' forces) */
+            if (     bytelen == 1
+                &&   isALPHA_A(*s)
+                &&  (         OP(scan) == EXACTFAA
+                     || (     OP(scan) == EXACTFU
+                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s))))
+            {
+                U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
+
+                OP(scan) = ANYOFM;
+                ARG_SET(scan, *s & mask);
+                FLAGS(scan) = mask;
+                /* we're not EXACTFish any more, so restudy */
+                continue;
+            }
+
             /* Search for fixed substrings supports EXACT only. */
             if (flags & SCF_DO_SUBSTR) {
                 assert(data);
                  scan_commit(pRExC_state, data, minlenp, is_inf);
             }
-           if (UTF) {
-               l = utf8_length(s, s + l);
-           }
+            charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
             if (unfolded_multi_char) {
                  RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
             }
-           min += l - min_subtract;
+           min += charlen - min_subtract;
              assert (min >= 0);
              delta += min_subtract;
             if (flags & SCF_DO_SUBSTR) {
-               data->pos_min += l - min_subtract;
+               data->pos_min += charlen - min_subtract;
                 if (data->pos_min < 0) {
                      data->pos_min = 0;
                  }
@@ -5753,10 +5777,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 continue;
  
             default:
-#ifdef DEBUGGING
                  Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
                                                                      OP(scan));
-#endif
              case REF:
              case CLUMP:
                 if (flags & SCF_DO_SUBSTR) {
@@ -6407,6 +6429,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
             if (trie->jump) /* no more substrings -- for now /grr*/
                 flags &= ~SCF_DO_SUBSTR;
         }
+        else if (OP(scan) == REGEX_SET) {
+            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
+                             " before optimization", reg_name[REGEX_SET]);
+        }
+
  #endif /* old or new */
  #endif /* TRIE_STUDY_OPT */
  
@@ -6458,18 +6485,15 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
  
      DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
  
-    {
-        SSize_t final_minlen= min < stopmin ? min : stopmin;
-
-        if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-            if (final_minlen > SSize_t_MAX - delta)
-                RExC_maxlen = SSize_t_MAX;
-            else if (RExC_maxlen < final_minlen + delta)
-                RExC_maxlen = final_minlen + delta;
-        }
-        return final_minlen;
+    final_minlen = min < stopmin
+            ? min : stopmin;
+    if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
+        if (final_minlen > SSize_t_MAX - delta)
+            RExC_maxlen = SSize_t_MAX;
+        else if (RExC_maxlen < final_minlen + delta)
+            RExC_maxlen = final_minlen + delta;
      }
-    NOT_REACHED; /* NOTREACHED */
+    return final_minlen;
  }
  
  STATIC U32
@@ -6598,15 +6622,24 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  REGEXP *
  Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
  {
-    SV *pat = pattern; /* defeat constness! */
      PERL_ARGS_ASSERT_RE_COMPILE;
+    return re_op_compile_wrapper(pattern, rx_flags, 0);
+}
+
+REGEXP *
+S_re_op_compile_wrapper(pTHX_ SV * const pattern, U32 rx_flags, const U32 pm_flags)
+{
+    SV *pat = pattern; /* defeat constness! */
+
+    PERL_ARGS_ASSERT_RE_OP_COMPILE_WRAPPER;
+
      return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
  #ifdef PERL_IN_XSUB_RE
                                  &my_reg_engine,
  #else
                                  &PL_core_reg_engine,
  #endif
-                                NULL, NULL, rx_flags, 0);
+                                NULL, NULL, rx_flags, pm_flags);
  }
  
  
@@ -7362,7 +7395,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
   *
   * pm_flags contains the PMf_* flags, typically based on those from the
   * pm_flags field of the related PMOP. Currently we're only interested in
- * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL, PMf_WILDCARD.
   *
   * For many years this code had an initial sizing pass that calculated
   * (sometimes incorrectly, leading to security holes) the size needed for the
@@ -7547,6 +7580,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_frame_count= 0;
      RExC_latest_warn_offset = 0;
      RExC_use_BRANCHJ = 0;
+    RExC_warned_WARN_EXPERIMENTAL__VLB = 0;
+    RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS = 0;
      RExC_total_parens = 0;
      RExC_open_parens = NULL;
      RExC_close_parens = NULL;
@@ -7659,6 +7694,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_study_chunk_recursed = NULL;
      RExC_study_chunk_recursed_bytes= 0;
      RExC_recurse_count = 0;
+    RExC_sets_depth = 0;
      pRExC_state->code_index = 0;
  
      /* Initialize the string in the compiled pattern.  This is so that there is
@@ -8826,7 +8862,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
  
          i = t1 - s1;
          if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
-                       i = el;
+            i = el;
      }
      return i;
  }
@@ -10619,8 +10655,8 @@ S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
          }
          else {  /* Single char fold */
              unsigned int k;
-            unsigned int first_fold;
-            const unsigned int * remaining_folds;
+            U32 first_fold;
+            const U32 * remaining_folds;
              Size_t folds_count;
  
              /* It matches itself */
@@ -10722,9 +10758,27 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
      }
  
      while (RExC_parse < RExC_end) {
-        /* && strchr("iogcmsx", *RExC_parse) */
+        /* && memCHRs("iogcmsx", *RExC_parse) */
          /* (?g), (?gc) and (?o) are useless here
             and must be globally applied -- japhy */
+        if ((RExC_pm_flags & PMf_WILDCARD)) {
+            if (flagsp == & negflags) {
+                if (*RExC_parse == 'm') {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL("Use of modifier '-m' is not allowed in Unicode"
+                          " property wildcard subpatterns");
+                }
+            }
+            else {
+                if (*RExC_parse == 's') {
+                    goto modifier_illegal_in_wildcard;
+                }
+            }
+        }
+
          switch (*RExC_parse) {
  
              /* Code for the imsxn flags */
@@ -10804,8 +10858,12 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
                                      *(RExC_parse - 1));
                  NOT_REACHED; /*NOTREACHED*/
-            case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
+                /*FALLTHROUGH*/
+            case ONCE_PAT_MOD: /* 'o' */
                  if (ckWARN(WARN_REGEXP)) {
                      const I32 wflagbit = *RExC_parse == 'o'
                                           ? WASTED_O
@@ -10826,6 +10884,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  break;
  
              case CONTINUE_PAT_MOD: /* 'c' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                  if (ckWARN(WARN_REGEXP)) {
                      if (! (wastedflags & WASTED_C) ) {
                          wastedflags |= WASTED_GC;
@@ -10840,6 +10901,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  }
                  break;
              case KEEPCOPY_PAT_MOD: /* 'p' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                  if (flagsp == &negflags) {
                      ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                  } else {
@@ -10860,6 +10924,18 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
              case ':':
              case ')':
  
+                if (  (RExC_pm_flags & PMf_WILDCARD)
+                    && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
+                {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL2("Use of modifier '%c' is not allowed in Unicode"
+                           " property wildcard subpatterns",
+                           has_charset_modifier);
+                }
+
                  if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
                      negflags |= RXf_PMf_EXTENDED_MORE;
                  }
@@ -10885,6 +10961,13 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
      }
  
      vFAIL("Sequence (?... not terminated");
+
+  modifier_illegal_in_wildcard:
+    RExC_parse++;
+    /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+       subpatterns in regex; marked by <-- HERE in m/%s/ */
+    vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
+           " subpatterns", *(RExC_parse - 1));
  }
  
  /*
@@ -11376,6 +11459,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                  RExC_parse-seqstart, seqstart);
                 NOT_REACHED; /*NOTREACHED*/
              case '<':           /* (?<...) */
+                /* If you want to support (?<*...), first reconcile with GH #17363 */
                 if (*RExC_parse == '!')
                     paren = ',';
                 else if (*RExC_parse != '=')
@@ -11949,6 +12033,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      ckWARNreg(RExC_parse, "Empty (?) without any modifiers");
                  }
                  /* FALLTHROUGH */
+            case '*': /* If you want to support (?*...), first reconcile with GH #17363 */
+           /* FALLTHROUGH */
             default: /* e.g., (?i) */
                 RExC_parse = (char *) seqstart + 1;
                parse_flags:
@@ -12156,7 +12242,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
              }
             break;
         }
-        DEBUG_PARSE_r(
+        DEBUG_PARSE_r({
              DEBUG_PARSE_MSG("lsbr");
              regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
              regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
@@ -12167,7 +12253,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                            (IV)ender,
                            (IV)(ender - lastbr)
              );
-        );
+        });
          if (! REGTAIL(pRExC_state, lastbr, ender)) {
              REQUIRE_BRANCHJ(flagp, 0);
          }
@@ -12208,7 +12294,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                  br= PL_regkind[OP(ret_as_regnode)] != BRANCH
                                 ? regnext(ret_as_regnode)
                                 : ret_as_regnode;
-                DEBUG_PARSE_r(
+                DEBUG_PARSE_r({
                      DEBUG_PARSE_MSG("NADA");
                      regprop(RExC_rx, RExC_mysv1, ret_as_regnode,
                                       NULL, pRExC_state);
@@ -12221,7 +12307,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                    (IV)ender,
                                    (IV)(ender - ret)
                      );
-                );
+                });
                  OP(br)= NOTHING;
                  if (OP(REGNODE_p(ender)) == TAIL) {
                      NEXT_OFF(br)= 0;
@@ -12490,6 +12576,23 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
           do_curly:
             if ((flags&SIMPLE)) {
                  if (min == 0 && max == REG_INFTY) {
+
+                    /* Going from 0..inf is currently forbidden in wildcard
+                     * subpatterns.  The only reason is to make it harder to
+                     * write patterns that take a long long time to halt, and
+                     * because the use of this construct isn't necessary in
+                     * matching Unicode property values */
+                    if (RExC_pm_flags & PMf_WILDCARD) {
+                        RExC_parse++;
+                        /* diag_listed_as: Use of %s is not allowed in Unicode
+                           property wildcard subpatterns in regex; marked by
+                           <-- HERE in m/%s/ */
+                        vFAIL("Use of quantifier '*' is not allowed in"
+                              " Unicode property wildcard subpatterns");
+                        /* Note, don't need to worry about {0,}, as a '}' isn't
+                         * legal at all in wildcards, so wouldn't get this far
+                         * */
+                    }
                      reginsert(pRExC_state, STAR, ret, depth+1);
                      MARK_NAUGHTY(4);
                      RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
@@ -12592,12 +12695,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      }
    nest_check:
      if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
-       ckWARN2reg(RExC_parse,
-                  "%" UTF8f " matches null string many times",
-                  UTF8fARG(UTF, (RExC_parse >= origparse
-                                 ? RExC_parse - origparse
-                                 : 0),
-                  origparse));
+        if (origparse[0] == '\\' && origparse[1] == 'K') {
+            vFAIL2utf8f(
+                       "%" UTF8f " is forbidden - matches null string many times",
+                       UTF8fARG(UTF, (RExC_parse >= origparse
+                                     ? RExC_parse - origparse
+                                     : 0),
+                       origparse));
+            /* NOT-REACHED */
+        } else {
+            ckWARN2reg(RExC_parse,
+                       "%" UTF8f " matches null string many times",
+                       UTF8fARG(UTF, (RExC_parse >= origparse
+                                     ? RExC_parse - origparse
+                                     : 0),
+                       origparse));
+        }
      }
  
      if (*RExC_parse == '?') {
@@ -12930,48 +13043,30 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
           * thing. */
  
          do {    /* Loop until the ending brace */
-            UV cp = 0;
-            char * start_digit;     /* The first of the current code point */
-            if (! isXDIGIT(*RExC_parse)) {
+            I32 flags = PERL_SCAN_SILENT_OVERFLOW
+                      | PERL_SCAN_SILENT_ILLDIGIT
+                      | PERL_SCAN_NOTIFY_ILLDIGIT
+                      | PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES
+                      | PERL_SCAN_DISALLOW_PREFIX;
+            STRLEN len = endbrace - RExC_parse;
+            NV overflow_value;
+            char * start_digit = RExC_parse;
+            UV cp = grok_hex(RExC_parse, &len, &flags, &overflow_value);
+
+            if (len == 0) {
                  RExC_parse++;
+              bad_NU:
                  vFAIL("Invalid hexadecimal number in \\N{U+...}");
              }
  
-            start_digit = RExC_parse;
-            count++;
-
-            /* Loop through the hex digits of the current code point */
-            do {
-                /* Adding this digit will shift the result 4 bits.  If that
-                 * result would be above the legal max, it's overflow */
-                if (cp > MAX_LEGAL_CP >> 4) {
-
-                    /* Find the end of the code point */
-                    do {
-                        RExC_parse ++;
-                    } while (isXDIGIT(*RExC_parse) || *RExC_parse == '_');
-
-                    /* Be sure to synchronize this message with the similar one
-                     * in utf8.c */
-                    vFAIL4("Use of code point 0x%.*s is not allowed; the"
-                        " permissible max is 0x%" UVxf,
-                        (int) (RExC_parse - start_digit), start_digit,
-                        MAX_LEGAL_CP);
-                }
-
-                /* Accumulate this (valid) digit into the running total */
-                cp  = (cp << 4) + READ_XDIGIT(RExC_parse);
+            RExC_parse += len;
  
-                /* READ_XDIGIT advanced the input pointer.  Ignore a single
-                 * underscore separator */
-                if (*RExC_parse == '_' && isXDIGIT(RExC_parse[1])) {
-                    RExC_parse++;
-                }
-            } while (isXDIGIT(*RExC_parse));
+            if (cp > MAX_LEGAL_CP) {
+                vFAIL(form_cp_too_large_msg(16, start_digit, len, 0));
+            }
  
-            /* Here, have accumulated the next code point */
-            if (RExC_parse >= endbrace) {   /* If done ... */
-                if (count != 1) {
+            if (RExC_parse >= endbrace) { /* Got to the closing '}' */
+                if (count) {
                      goto do_concat;
                  }
  
@@ -12988,18 +13083,19 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                  return TRUE;
              }
  
-            /* Here, the only legal thing would be a multiple character
-             * sequence (of the form "\N{U+c1.c2. ... }".   So the next
-             * character must be a dot (and the one after that can't be the
-             * endbrace, or we'd have something like \N{U+100.} ) */
+            /* Here, the parse stopped bfore the ending brace.  This is legal
+             * only if that character is a dot separating code points, like a
+             * multiple character sequence (of the form "\N{U+c1.c2. ... }".
+             * So the next character must be a dot (and the one after that
+             * can't be the endbrace, or we'd have something like \N{U+100.} )
+             * */
              if (*RExC_parse != '.' || RExC_parse + 1 >= endbrace) {
                  RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
-                                ? UTF8SKIP(RExC_parse)
-                                : 1;
-                if (RExC_parse >= endbrace) { /* Guard against malformed utf8 */
-                    RExC_parse = endbrace;
-                }
-                vFAIL("Invalid hexadecimal number in \\N{U+...}");
+                              ? UTF8SKIP(RExC_parse)
+                              : 1;
+                RExC_parse = MIN(endbrace, RExC_parse);/* Guard against
+                                                          malformed utf8 */
+                goto bad_NU;
              }
  
              /* Here, looks like its really a multiple character sequence.  Fail
@@ -13017,7 +13113,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
               * but go through the motions of code point counting and error
               * checking, if the caller doesn't want a node returned. */
  
-            if (node_p && count == 1) {
+            if (node_p && ! substitute_parse) {
                  substitute_parse = newSVpvs("?:");
              }
  
@@ -13368,13 +13464,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         /* Special Escapes */
         case 'A':
             RExC_seen_zerolen++;
-           ret = reg_node(pRExC_state, SBOL);
-            /* SBOL is shared with /^/ so we set the flags so we can tell
-             * /\A/ from /^/ in split. */
-            FLAGS(REGNODE_p(ret)) = 1;
+            /* Under wildcards, this is changed to match \n; should be
+             * invisible to the user, as they have to compile under /m */
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                ret = reg_node(pRExC_state, MBOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SBOL);
+                /* SBOL is shared with /^/ so we set the flags so we can tell
+                 * /\A/ from /^/ in split. */
+                FLAGS(REGNODE_p(ret)) = 1;
+            }
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                RExC_parse++;
+                /* diag_listed_as: Use of %s is not allowed in Unicode property
+                   wildcard subpatterns in regex; marked by <-- HERE in m/%s/
+                 */
+                vFAIL("Use of '\\G' is not allowed in Unicode property"
+                      " wildcard subpatterns");
+            }
             ret = reg_node(pRExC_state, GPOS);
              RExC_seen |= REG_GPOS_SEEN;
             *flagp |= SIMPLE;
@@ -13396,12 +13507,24 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  vFAIL("\\K not permitted in lookahead/lookbehind");
              }
         case 'Z':
-           ret = reg_node(pRExC_state, SEOL);
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SEOL);
+            }
             *flagp |= SIMPLE;
             RExC_seen_zerolen++;                /* Do not optimize RE away */
             goto finish_meta_pat;
         case 'z':
-           ret = reg_node(pRExC_state, EOS);
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, EOS);
+            }
             *flagp |= SIMPLE;
             RExC_seen_zerolen++;                /* Do not optimize RE away */
             goto finish_meta_pat;
@@ -13933,6 +14056,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         || ! is_PATWS_safe((p), RExC_end, UTF));
  
                 switch ((U8)*p) {
+                  const char* message;
+                  U32 packed_warn;
+                  U8 grok_c_char;
+
                 case '^':
                 case '$':
                 case '.':
@@ -14048,67 +14175,70 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         p++;
                         break;
                     case 'o':
-                       {
-                           UV result;
-                           const char* error_msg;
-
-                           bool valid = grok_bslash_o(&p,
-                                                       RExC_end,
-                                                      &result,
-                                                      &error_msg,
-                                                      TO_OUTPUT_WARNINGS(p),
-                                                       (bool) RExC_strict,
-                                                       TRUE, /* Output warnings
-                                                                for non-
-                                                                portables */
-                                                       UTF);
-                           if (! valid) {
-                               RExC_parse = p; /* going to die anyway; point
-                                                  to exact spot of failure */
-                               vFAIL(error_msg);
-                           }
-                            UPDATE_WARNINGS_LOC(p - 1);
-                            ender = result;
-                           break;
-                       }
+                        if (! grok_bslash_o(&p,
+                                            RExC_end,
+                                            &ender,
+                                            &message,
+                                            &packed_warn,
+                                            (bool) RExC_strict,
+                                            FALSE, /* No illegal cp's */
+                                            UTF))
+                        {
+                            RExC_parse = p; /* going to die anyway; point to
+                                               exact spot of failure */
+                            vFAIL(message);
+                        }
+
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
+                        break;
                     case 'x':
-                       {
-                            UV result = UV_MAX; /* initialize to erroneous
-                                                   value */
-                           const char* error_msg;
-
-                           bool valid = grok_bslash_x(&p,
-                                                       RExC_end,
-                                                      &result,
-                                                      &error_msg,
-                                                       TO_OUTPUT_WARNINGS(p),
-                                                       (bool) RExC_strict,
-                                                       TRUE, /* Silence warnings
-                                                                for non-
-                                                                portables */
-                                                       UTF);
-                           if (! valid) {
-                               RExC_parse = p; /* going to die anyway; point
-                                                  to exact spot of failure */
-                               vFAIL(error_msg);
-                           }
-                            UPDATE_WARNINGS_LOC(p - 1);
-                            ender = result;
+                        if (! grok_bslash_x(&p,
+                                            RExC_end,
+                                            &ender,
+                                            &message,
+                                            &packed_warn,
+                                            (bool) RExC_strict,
+                                            FALSE, /* No illegal cp's */
+                                            UTF))
+                        {
+                            RExC_parse = p;    /* going to die anyway; point
+                                                   to exact spot of failure */
+                            vFAIL(message);
+                        }
+
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
  
  #ifdef EBCDIC
-                            if (ender < 0x100) {
-                                if (RExC_recode_x_to_native) {
-                                    ender = LATIN1_TO_NATIVE(ender);
-                                }
-                           }
+                        if (ender < 0x100) {
+                            if (RExC_recode_x_to_native) {
+                                ender = LATIN1_TO_NATIVE(ender);
+                            }
+                        }
  #endif
-                           break;
-                       }
+                        break;
                     case 'c':
-                       p++;
-                       ender = grok_bslash_c(*p, TO_OUTPUT_WARNINGS(p));
-                        UPDATE_WARNINGS_LOC(p);
                          p++;
+                        if (! grok_bslash_c(*p, &grok_c_char,
+                                            &message, &packed_warn))
+                        {
+                            /* going to die anyway; point to exact spot of
+                             * failure */
+                            RExC_parse = p + ((UTF)
+                                              ? UTF8_SAFE_SKIP(p, RExC_end)
+                                              : 1);
+                            vFAIL(message);
+                        }
+
+                        ender = grok_c_char;
+                        p++;
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
+
                         break;
                      case '8': case '9': /* must be a backreference */
                          --p;
@@ -14143,17 +14273,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          /* FALLTHROUGH */
                      case '0':
                         {
-                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                                      | PERL_SCAN_NOTIFY_ILLDIGIT;
                             STRLEN numlen = 3;
                             ender = grok_oct(p, &numlen, &flags, NULL);
                             p += numlen;
-                            if (   isDIGIT(*p)  /* like \08, \178 */
-                                && ckWARN(WARN_REGEXP)
-                                && numlen < 3)
+                            if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
+                                && isDIGIT(*p)  /* like \08, \178 */
+                                && ckWARN(WARN_REGEXP))
                              {
                                 reg_warn_non_literal_string(
-                                         p + 1,
-                                         form_short_octal_warning(p, numlen));
+                                     p + 1,
+                                     form_alien_digit_msg(8, numlen, p,
+                                                        RExC_end, UTF, FALSE));
                              }
                         }
                         break;
@@ -14230,6 +14362,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
                  if (ender > 255) {
                      REQUIRE_UTF8(flagp);
+                    if (   UNICODE_IS_PERL_EXTENDED(ender)
+                        && TO_OUTPUT_WARNINGS(p))
+                    {
+                        ckWARN2_non_literal_string(p,
+                                                   packWARN(WARN_PORTABLE),
+                                                   PL_extended_cp_format,
+                                                   ender);
+                    }
                  }
  
                  /* We need to check if the next non-ignored thing is a
@@ -14558,8 +14698,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else if (FOLD) {
                  bool splittable = FALSE;
                  bool backed_up = FALSE;
-                char * e;
-                char * s_start;
+                char * e;       /* should this be U8? */
+                char * s_start; /* should this be U8? */
  
                  /* Here is /i.  Running out of room creates a problem if we are
                   * folding, and the split happens in the middle of a
@@ -14876,7 +15016,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      if (   ender != LATIN_SMALL_LETTER_SHARP_S
                          || ASCII_FOLD_RESTRICTED)
                      {
-                        *e++ = toLOWER_L1(ender);
+                        assert( toLOWER_L1(ender) < 256 );
+                        *e++ = (char)(toLOWER_L1(ender)); /* should e and the cast be U8? */
                      }
                      else {
                          *e++ = 's';
@@ -14894,7 +15035,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              if (   UCHARAT(p) != LATIN_SMALL_LETTER_SHARP_S
                                  || ASCII_FOLD_RESTRICTED)
                              {
-                                *e++ = toLOWER_L1(ender);
+                                assert( toLOWER_L1(ender) < 256 );
+                                *e++ = (char)(toLOWER_L1(ender)); /* should e and the cast be U8? */
                              }
                              else {
                                  *e++ = 's';
@@ -16061,7 +16203,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
-    regnode_offset node;                  /* Temporary, and final regnode returned by
+    regnode_offset node;            /* Temporary, and final regnode returned by
                                         this function */
      const bool save_fold = FOLD;    /* Temporary */
      char *save_end, *save_parse;    /* Temporaries */
@@ -16070,6 +16212,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
+    PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
  
      DEBUG_PARSE("xcls");
  
@@ -16196,14 +16339,16 @@ redo_curchar:
              SV* rhs;                /* Operand to the right of the operator */
              SV* fence_ptr;          /* Pointer to top element of the fence
                                         stack */
-
              case '(':
  
                  if (   RExC_parse < RExC_end - 2
                      && UCHARAT(RExC_parse + 1) == '?'
                      && UCHARAT(RExC_parse + 2) == '^')
                  {
-                    /* If is a '(?', could be an embedded '(?^flags:(?[...])'.
+                    const regnode_offset orig_emit = RExC_emit;
+                    SV * resultant_invlist;
+
+                    /* If is a '(?^', could be an embedded '(?^flags:(?[...])'.
                       * This happens when we have some thing like
                       *
                       *   my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
@@ -16212,62 +16357,33 @@ redo_curchar:
                       *
                       * Here we would be handling the interpolated
                       * '$thai_or_lao'.  We handle this by a recursive call to
-                     * ourselves which returns the inversion list the
-                     * interpolated expression evaluates to.  We use the flags
-                     * from the interpolated pattern. */
-                    U32 save_flags = RExC_flags;
-                    const char * save_parse;
-
-                    RExC_parse += 2;        /* Skip past the '(?' */
-                    save_parse = RExC_parse;
-
-                    /* Parse the flags for the '(?'.  We already know the first
-                     * flag to parse is a '^' */
-                    parse_lparen_question_flags(pRExC_state);
-
-                    if (   RExC_parse >= RExC_end - 4
-                        || UCHARAT(RExC_parse) != ':'
-                        || UCHARAT(++RExC_parse) != '('
-                        || UCHARAT(++RExC_parse) != '?'
-                        || UCHARAT(++RExC_parse) != '[')
-                    {
+                     * reg which returns the inversion list the
+                     * interpolated expression evaluates to.  Actually, the
+                     * return is a special regnode containing a pointer to that
+                     * inversion list.  If the return isn't that regnode alone,
+                     * we know that this wasn't such an interpolation, which is
+                     * an error: we need to get a single inversion list back
+                     * from the recursion */
  
-                        /* In combination with the above, this moves the
-                         * pointer to the point just after the first erroneous
-                         * character. */
-                        if (RExC_parse >= RExC_end - 4) {
-                            RExC_parse = RExC_end;
-                        }
-                        else if (RExC_parse != save_parse) {
-                            RExC_parse += (UTF)
-                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                          : 1;
-                        }
-                        vFAIL("Expecting '(?flags:(?[...'");
-                    }
-
-                    /* Recurse, with the meat of the embedded expression */
                      RExC_parse++;
-                    if (! handle_regex_sets(pRExC_state, &current, flagp,
-                                                    depth+1, oregcomp_parse))
-                    {
-                        RETURN_FAIL_ON_RESTART(*flagp, flagp);
-                    }
+                    RExC_sets_depth++;
  
-                    /* Here, 'current' contains the embedded expression's
-                     * inversion list, and RExC_parse points to the trailing
-                     * ']'; the next character should be the ')' */
-                    RExC_parse++;
-                    if (UCHARAT(RExC_parse) != ')')
-                        vFAIL("Expecting close paren for nested extended charclass");
+                   node = reg(pRExC_state, 2, flagp, depth+1);
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
  
-                    /* Then the ')' matching the original '(' handled by this
-                     * case: statement */
-                    RExC_parse++;
-                    if (UCHARAT(RExC_parse) != ')')
-                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
+                    if (   OP(REGNODE_p(node)) != REGEX_SET
+                           /* If more than a single node returned, the nested
+                            * parens evaluated to more than just a (?[...]),
+                            * which isn't legal */
+                        || node != 1) {
+                        vFAIL("Expecting interpolated extended charclass");
+                    }
+                    resultant_invlist = (SV *) ARGp(REGNODE_p(node));
+                    current = invlist_clone(resultant_invlist, NULL);
+                    SvREFCNT_dec(resultant_invlist);
  
-                    RExC_flags = save_flags;
+                    RExC_sets_depth--;
+                    RExC_emit = orig_emit;
                      goto handle_operand;
                  }
  
@@ -16655,88 +16771,96 @@ redo_curchar:
          return END;
      }
  
-    /* Otherwise generate a resultant node, based on 'final'.  regclass() is
-     * expecting a string of ranges and individual code points */
-    invlist_iterinit(final);
-    result_string = newSVpvs("");
-    while (invlist_iternext(final, &start, &end)) {
-        if (start == end) {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}",
-                                                     start,          end);
-        }
+    if (RExC_sets_depth) {  /* If within a recursive call, return in a special
+                               regnode */
+        RExC_parse++;
+        node = regpnode(pRExC_state, REGEX_SET, (void *) final);
      }
+    else {
  
-    /* About to generate an ANYOF (or similar) node from the inversion list we
-     * have calculated */
-    save_parse = RExC_parse;
-    RExC_parse = SvPV(result_string, len);
-    save_end = RExC_end;
-    RExC_end = RExC_parse + len;
-    TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
+        /* Otherwise generate a resultant node, based on 'final'.  regclass()
+         * is expecting a string of ranges and individual code points */
+        invlist_iterinit(final);
+        result_string = newSVpvs("");
+        while (invlist_iternext(final, &start, &end)) {
+            if (start == end) {
+                Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
+            }
+            else {
+                Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%"
+                                                        UVXf "}", start, end);
+            }
+        }
+
+        /* About to generate an ANYOF (or similar) node from the inversion list
+         * we have calculated */
+        save_parse = RExC_parse;
+        RExC_parse = SvPV(result_string, len);
+        save_end = RExC_end;
+        RExC_end = RExC_parse + len;
+        TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
+
+        /* We turn off folding around the call, as the class we have
+         * constructed already has all folding taken into consideration, and we
+         * don't want regclass() to add to that */
+        RExC_flags &= ~RXf_PMf_FOLD;
+        /* regclass() can only return RESTART_PARSE and NEED_UTF8 if multi-char
+         * folds are allowed.  */
+        node = regclass(pRExC_state, flagp, depth+1,
+                        FALSE, /* means parse the whole char class */
+                        FALSE, /* don't allow multi-char folds */
+                        TRUE, /* silence non-portable warnings.  The above may
+                                 very well have generated non-portable code
+                                 points, but they're valid on this machine */
+                        FALSE, /* similarly, no need for strict */
+
+                        /* We can optimize into something besides an ANYOF,
+                         * except under /l, which needs to be ANYOF because of
+                         * runtime checks for locale sanity, etc */
+                    ! in_locale,
+                        NULL
+                    );
  
-    /* We turn off folding around the call, as the class we have constructed
-     * already has all folding taken into consideration, and we don't want
-     * regclass() to add to that */
-    RExC_flags &= ~RXf_PMf_FOLD;
-    /* regclass() can only return RESTART_PARSE and NEED_UTF8 if multi-char
-     * folds are allowed.  */
-    node = regclass(pRExC_state, flagp, depth+1,
-                    FALSE, /* means parse the whole char class */
-                    FALSE, /* don't allow multi-char folds */
-                    TRUE, /* silence non-portable warnings.  The above may very
-                             well have generated non-portable code points, but
-                             they're valid on this machine */
-                    FALSE, /* similarly, no need for strict */
-
-                    /* We can optimize into something besides an ANYOF, except
-                     * under /l, which needs to be ANYOF because of runtime
-                     * checks for locale sanity, etc */
-                  ! in_locale,
-                    NULL
-                );
+        RESTORE_WARNINGS;
+        RExC_parse = save_parse + 1;
+        RExC_end = save_end;
+        SvREFCNT_dec_NN(final);
+        SvREFCNT_dec_NN(result_string);
  
-    RESTORE_WARNINGS;
-    RExC_parse = save_parse + 1;
-    RExC_end = save_end;
-    SvREFCNT_dec_NN(final);
-    SvREFCNT_dec_NN(result_string);
-
-    if (save_fold) {
-        RExC_flags |= RXf_PMf_FOLD;
-    }
-
-    if (!node) {
-        RETURN_FAIL_ON_RESTART(*flagp, flagp);
-        goto regclass_failed;
-    }
-
-    /* Fix up the node type if we are in locale.  (We have pretended we are
-     * under /u for the purposes of regclass(), as this construct will only
-     * work under UTF-8 locales.  But now we change the opcode to be ANYOFL (so
-     * as to cause any warnings about bad locales to be output in regexec.c),
-     * and add the flag that indicates to check if not in a UTF-8 locale.  The
-     * reason we above forbid optimization into something other than an ANYOF
-     * node is simply to minimize the number of code changes in regexec.c.
-     * Otherwise we would have to create new EXACTish node types and deal with
-     * them.  This decision could be revisited should this construct become
-     * popular.
-     *
-     * (One might think we could look at the resulting ANYOF node and suppress
-     * the flag if everything is above 255, as those would be UTF-8 only,
-     * but this isn't true, as the components that led to that result could
-     * have been locale-affected, and just happen to cancel each other out
-     * under UTF-8 locales.) */
-    if (in_locale) {
-        set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
+        if (save_fold) {
+            RExC_flags |= RXf_PMf_FOLD;
+        }
+
+        if (!node) {
+            RETURN_FAIL_ON_RESTART(*flagp, flagp);
+            goto regclass_failed;
+        }
+
+        /* Fix up the node type if we are in locale.  (We have pretended we are
+         * under /u for the purposes of regclass(), as this construct will only
+         * work under UTF-8 locales.  But now we change the opcode to be ANYOFL
+         * (so as to cause any warnings about bad locales to be output in
+         * regexec.c), and add the flag that indicates to check if not in a
+         * UTF-8 locale.  The reason we above forbid optimization into
+         * something other than an ANYOF node is simply to minimize the number
+         * of code changes in regexec.c.  Otherwise we would have to create new
+         * EXACTish node types and deal with them.  This decision could be
+         * revisited should this construct become popular.
+         *
+         * (One might think we could look at the resulting ANYOF node and
+         * suppress the flag if everything is above 255, as those would be
+         * UTF-8 only, but this isn't true, as the components that led to that
+         * result could have been locale-affected, and just happen to cancel
+         * each other out under UTF-8 locales.) */
+        if (in_locale) {
+            set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
  
-        assert(OP(REGNODE_p(node)) == ANYOF);
+            assert(OP(REGNODE_p(node)) == ANYOF);
  
-        OP(REGNODE_p(node)) = ANYOFL;
-        ANYOF_FLAGS(REGNODE_p(node))
-                |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            OP(REGNODE_p(node)) = ANYOFL;
+            ANYOF_FLAGS(REGNODE_p(node))
+                    |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+        }
      }
  
      nextchar(pRExC_state);
@@ -16848,8 +16972,8 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
                         current Unicode version */
            {
              Size_t folds_count;
-            unsigned int first_fold;
-            const unsigned int * remaining_folds;
+            U32 first_fold;
+            const U32 * remaining_folds;
              UV folded_cp;
  
              if (isASCII(cp)) {
@@ -16905,6 +17029,7 @@ S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
      PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
  
      if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+        CLEAR_POSIX_WARNINGS();
          return;
      }
  
@@ -17335,6 +17460,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               * is already in 'value'.  Otherwise, need to translate the escape
               * into what it signifies. */
              if (! skip_white || ! isBLANK_A(value)) switch ((I32)value) {
+                const char * message;
+                U32 packed_warn;
+                U8 grok_c_char;
  
             case 'w':   namedclass = ANYOF_WORDCHAR;    break;
             case 'W':   namedclass = ANYOF_NWORDCHAR;   break;
@@ -17417,6 +17545,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 {
                 char *e;
  
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL3("Use of '\\%c%c' is not allowed in Unicode property"
+                           " wildcard subpatterns", (char) value, *(RExC_parse - 1));
+                }
+
                 /* \p means they want Unicode semantics */
                 REQUIRE_UNI_RULES(flagp, 0);
  
@@ -17585,53 +17722,74 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             case 'a':   value = '\a';                   break;
             case 'o':
                 RExC_parse--;   /* function expects to be pointed at the 'o' */
-               {
-                   const char* error_msg;
-                   bool valid = grok_bslash_o(&RExC_parse,
-                                               RExC_end,
-                                              &value,
-                                              &error_msg,
-                                               TO_OUTPUT_WARNINGS(RExC_parse),
-                                               strict,
-                                               silence_non_portable,
-                                               UTF);
-                   if (! valid) {
-                       vFAIL(error_msg);
-                   }
-                    UPDATE_WARNINGS_LOC(RExC_parse - 1);
-               }
-                non_portable_endpoint++;
+                if (! grok_bslash_o(&RExC_parse,
+                                            RExC_end,
+                                            &value,
+                                            &message,
+                                            &packed_warn,
+                                            strict,
+                                            cBOOL(range), /* MAX_UV allowed for range
+                                                      upper limit */
+                                            UTF))
+                {
+                    vFAIL(message);
+                }
+                else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
+                if (value < 256) {
+                    non_portable_endpoint++;
+                }
                 break;
             case 'x':
                 RExC_parse--;   /* function expects to be pointed at the 'x' */
-               {
-                   const char* error_msg;
-                   bool valid = grok_bslash_x(&RExC_parse,
-                                               RExC_end,
-                                              &value,
-                                              &error_msg,
-                                              TO_OUTPUT_WARNINGS(RExC_parse),
-                                               strict,
-                                               silence_non_portable,
-                                               UTF);
-                    if (! valid) {
-                       vFAIL(error_msg);
-                   }
-                    UPDATE_WARNINGS_LOC(RExC_parse - 1);
-               }
-                non_portable_endpoint++;
+                if (!  grok_bslash_x(&RExC_parse,
+                                            RExC_end,
+                                            &value,
+                                            &message,
+                                            &packed_warn,
+                                            strict,
+                                            cBOOL(range), /* MAX_UV allowed for range
+                                                      upper limit */
+                                            UTF))
+                {
+                    vFAIL(message);
+                }
+                else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
+                if (value < 256) {
+                    non_portable_endpoint++;
+                }
                 break;
             case 'c':
-               value = grok_bslash_c(*RExC_parse, TO_OUTPUT_WARNINGS(RExC_parse));
-                UPDATE_WARNINGS_LOC(RExC_parse);
-               RExC_parse++;
+                if (! grok_bslash_c(*RExC_parse, &grok_c_char, &message,
+                                                                &packed_warn))
+                {
+                    /* going to die anyway; point to exact spot of
+                        * failure */
+                    RExC_parse += (UTF)
+                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                  : 1;
+                    vFAIL(message);
+                }
+
+                value = grok_c_char;
+                RExC_parse++;
+                if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
                  non_portable_endpoint++;
                 break;
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7':
                 {
                     /* Take 1-3 octal digits */
-                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                              | PERL_SCAN_NOTIFY_ILLDIGIT;
                      numlen = (strict) ? 4 : 3;
                      value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
                     RExC_parse += numlen;
@@ -17642,17 +17800,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                            : 1;
                              vFAIL("Need exactly 3 octal digits");
                          }
-                        else if (   numlen < 3 /* like \08, \178 */
+                        else if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
                                   && RExC_parse < RExC_end
                                   && isDIGIT(*RExC_parse)
                                   && ckWARN(WARN_REGEXP))
                          {
                              reg_warn_non_literal_string(
                                   RExC_parse + 1,
-                                 form_short_octal_warning(RExC_parse, numlen));
+                                 form_alien_digit_msg(8, numlen, RExC_parse,
+                                                        RExC_end, UTF, FALSE));
                          }
                      }
-                    non_portable_endpoint++;
+                    if (value < 256) {
+                        non_portable_endpoint++;
+                    }
                     break;
                 }
             default:
@@ -17914,7 +18075,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
         /* non-Latin1 code point implies unicode semantics. */
         if (value > 255) {
+            if (value > MAX_LEGAL_CP && (   value != UV_MAX
+                                         || prevvalue > MAX_LEGAL_CP))
+            {
+                vFAIL(form_cp_too_large_msg(16, NULL, 0, value));
+            }
              REQUIRE_UNI_RULES(flagp, 0);
+            if (  ! silence_non_portable
+                &&  UNICODE_IS_PERL_EXTENDED(value)
+                &&  TO_OUTPUT_WARNINGS(RExC_parse))
+            {
+                ckWARN2_non_literal_string(RExC_parse,
+                                           packWARN(WARN_PORTABLE),
+                                           PL_extended_cp_format,
+                                           value);
+            }
         }
  
          /* Ready to process either the single value, or the completed range.
@@ -18299,8 +18474,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      STRLEN foldlen;
                      unsigned int k;
                      Size_t folds_count;
-                    unsigned int first_fold;
-                    const unsigned int * remaining_folds;
+                    U32 first_fold;
+                    const U32 * remaining_folds;
  
                      if (j < 256) {
  
@@ -18639,17 +18814,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         invert = FALSE;
      }
  
+    /* All possible optimizations below still have these characteristics.
+     * (Multi-char folds aren't SIMPLE, but they don't get this far in this
+     * routine) */
+    *flagp |= HASWIDTH|SIMPLE;
+
      if (ret_invlist) {
          *ret_invlist = cp_list;
  
          return RExC_emit;
      }
  
-    /* All possible optimizations below still have these characteristics.
-     * (Multi-char folds aren't SIMPLE, but they don't get this far in this
-     * routine) */
-    *flagp |= HASWIDTH|SIMPLE;
-
      if (anyof_flags & ANYOF_LOCALE_FLAGS) {
          RExC_contains_locale = 1;
      }
@@ -19005,8 +19180,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      U8 foldbuf[UTF8_MAXBYTES_CASE];
                      UV folded = _to_uni_fold_flags(start[0],
                                                          foldbuf, &foldlen, 0);
-                    unsigned int first_fold;
-                    const unsigned int * remaining_folds;
+                    U32 first_fold;
+                    const U32 * remaining_folds;
                      Size_t folds_to_this_cp_count = _inverse_folds(
                                                              folded,
                                                              &first_fold,
@@ -19819,10 +19994,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                      /* The data consists of just strings defining user-defined
                       * property names, but in prior incarnations, and perhaps
                       * somehow from pluggable regex engines, it could still
-                     * hold hex code point definitions.  Each component of a
-                     * range would be separated by a tab, and each range by a
-                     * new-line.  If these are found, instead add them to the
-                     * inversion list */
+                     * hold hex code point definitions, all of which should be
+                     * legal (or it wouldn't have gotten this far).  Each
+                     * component of a range would be separated by a tab, and
+                     * each range by a new-line.  If these are found, instead
+                     * add them to the inversion list */
                      I32 grok_flags =  PERL_SCAN_SILENT_ILLDIGIT
                                       |PERL_SCAN_SILENT_NON_PORTABLE;
                      STRLEN len = remaining;
@@ -19860,7 +20036,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
                      remaining -= len;
                      len = strcspn(si_string,
-                                        DEFERRED_PROP_EXPANSION_MARKERs "\n");
+                                        DEFERRED_COULD_BE_OFFICIAL_MARKERs "\n");
                      remaining -= len;
                      if (matches_string) {
                          sv_catpvn(matches_string, si_string, len);
@@ -19873,7 +20049,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                      si_string += len;
                      if (   remaining
                          && UCHARAT(si_string)
-                                            == DEFERRED_PROP_EXPANSION_MARKERc)
+                                            == DEFERRED_COULD_BE_OFFICIAL_MARKERc)
                      {
                          si_string++;
                          remaining--;
@@ -20147,6 +20323,22 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
      return(ret);
  }
  
+/*
+- regpnode - emit a temporary node with a void* argument
+*/
+STATIC regnode_offset /* Location. */
+S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, void * arg)
+{
+    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regvnode");
+    regnode_offset ptr = ret;
+
+    PERL_ARGS_ASSERT_REGPNODE;
+
+    FILL_ADVANCE_NODE_ARGp(ptr, op, arg);
+    RExC_emit = ptr;
+    return(ret);
+}
+
  STATIC regnode_offset
  S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2)
  {
@@ -21142,6 +21334,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  SV *
  Perl_re_intuit_string(pTHX_ REGEXP * const r)
  {                              /* Assume that RE_INTUIT is set */
+    /* Returns an SV containing a string that must appear in the target for it
+     * to match */
+
      struct regexp *const prog = ReANY(r);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -21150,11 +21345,12 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
  
      DEBUG_COMPILE_r(
         {
-           const char * const s = SvPV_nolen_const(RX_UTF8(r)
+            if (prog->maxlen > 0) {
+                const char * const s = SvPV_nolen_const(RX_UTF8(r)
                       ? prog->check_utf8 : prog->check_substr);
  
-           if (!PL_colorset) reginitcolors();
-            Perl_re_printf( aTHX_
+                if (!PL_colorset) reginitcolors();
+                Perl_re_printf( aTHX_
                       "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                       PL_colors[4],
                       RX_UTF8(r) ? "utf8 " : "",
@@ -21162,6 +21358,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                       s,
                       PL_colors[1],
                       (strlen(s) > PL_dump_re_max_len ? "..." : ""));
+            }
         } );
  
      /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -22582,14 +22779,14 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  
  #ifndef PERL_IN_XSUB_RE
  
-#include "uni_keywords.h"
+#  include "uni_keywords.h"
  
  void
  Perl_init_uniprops(pTHX)
  {
      dVAR;
  
-#ifdef DEBUGGING
+#  ifdef DEBUGGING
      char * dump_len_string;
  
      dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
@@ -22598,16 +22795,16 @@ Perl_init_uniprops(pTHX)
      {
          PL_dump_re_max_len = 60;    /* A reasonable default */
      }
-#endif
+#  endif
  
      PL_user_def_props = newHV();
  
-#ifdef USE_ITHREADS
+#  ifdef USE_ITHREADS
  
      HvSHAREKEYS_off(PL_user_def_props);
      PL_user_def_props_aTHX = aTHX;
  
-#endif
+#  endif
  
      /* Set up the inversion list interpreter-level variables */
  
@@ -22651,7 +22848,7 @@ Perl_init_uniprops(pTHX)
      PL_LB_invlist = _new_invlist_C_array(_Perl_LB_invlist);
      PL_SCX_invlist = _new_invlist_C_array(_Perl_SCX_invlist);
  
-    PL_InBitmap = _new_invlist_C_array(_Perl_InBitmap_invlist);
+    PL_InBitmap = _new_invlist_C_array(InBitmap_invlist);
      PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
      PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
      PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
@@ -22679,15 +22876,15 @@ Perl_init_uniprops(pTHX)
      PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
      PL_Private_Use = _new_invlist_C_array(uni_prop_ptrs[UNI_CO]);
  
-#ifdef UNI_XIDC
+#  ifdef UNI_XIDC
      /* The below are used only by deprecated functions.  They could be removed */
      PL_utf8_xidcont  = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDC]);
      PL_utf8_idcont   = _new_invlist_C_array(uni_prop_ptrs[UNI_IDC]);
      PL_utf8_xidstart = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDS]);
-#endif
+#  endif
  }
  
-#if 0
+#  if 0
  
  This code was mainly added for backcompat to give a warning for non-portable
  code points in user-defined properties.  But experiments showed that the
@@ -22717,7 +22914,43 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
      return SvPVX(*msg);
  }
  
-#endif
+#  endif
+
+STATIC REGEXP *
+S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
+                         const bool ignore_case)
+{
+    U32 flags = PMf_MULTILINE|PMf_WILDCARD;
+    REGEXP * subpattern_re;
+
+    PERL_ARGS_ASSERT_COMPILE_WILDCARD;
+
+    if (ignore_case) {
+        flags |= PMf_FOLD;
+    }
+    set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+
+    subpattern_re = re_op_compile_wrapper(sv_2mortal(newSVpvn(name, len)),
+                                        /* Like in op.c, we copy the compile
+                                         * time pm flags to the rx ones */
+                                        (flags & RXf_PMf_COMPILETIME), flags);
+
+    assert(subpattern_re);  /* Should have died if didn't compile successfully */
+    return subpattern_re;
+}
+
+STATIC I32
+S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
+        char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
+{
+    I32 result;
+
+    PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
+
+    result = pregexec(prog, stringarg, strend, strbeg, minend, screamer, nosave);
+
+    return result;
+}
  
  SV *
  Perl_handle_user_defined_property(pTHX_
@@ -22890,7 +23123,7 @@ Perl_handle_user_defined_property(pTHX_
              goto return_failure;
          }
  
-#if 0   /* See explanation at definition above of get_extended_utf8_msg() */
+#  if 0   /* See explanation at definition above of get_extended_utf8_msg() */
  
          if (   UNICODE_IS_PERL_EXTENDED(min)
              || UNICODE_IS_PERL_EXTENDED(max))
@@ -22908,7 +23141,7 @@ Perl_handle_user_defined_property(pTHX_
              sv_catpvs(msg, "\"");
          }
  
-#endif
+#  endif
  
          /* Here, this line contains a legal range */
          this_definition = sv_2mortal(_new_invlist(2));
@@ -23017,21 +23250,21 @@ Perl_handle_user_defined_property(pTHX_
  
  /* As explained below, certain operations need to take place in the first
   * thread created.  These macros switch contexts */
-#ifdef USE_ITHREADS
-#  define DECLARATION_FOR_GLOBAL_CONTEXT                                    \
+#  ifdef USE_ITHREADS
+#    define DECLARATION_FOR_GLOBAL_CONTEXT                                  \
                                          PerlInterpreter * save_aTHX = aTHX;
-#  define SWITCH_TO_GLOBAL_CONTEXT                                          \
+#    define SWITCH_TO_GLOBAL_CONTEXT                                        \
                             PERL_SET_CONTEXT((aTHX = PL_user_def_props_aTHX))
-#  define RESTORE_CONTEXT  PERL_SET_CONTEXT((aTHX = save_aTHX));
-#  define CUR_CONTEXT      aTHX
-#  define ORIGINAL_CONTEXT save_aTHX
-#else
-#  define DECLARATION_FOR_GLOBAL_CONTEXT
-#  define SWITCH_TO_GLOBAL_CONTEXT          NOOP
-#  define RESTORE_CONTEXT                   NOOP
-#  define CUR_CONTEXT                       NULL
-#  define ORIGINAL_CONTEXT                  NULL
-#endif
+#    define RESTORE_CONTEXT  PERL_SET_CONTEXT((aTHX = save_aTHX));
+#    define CUR_CONTEXT      aTHX
+#    define ORIGINAL_CONTEXT save_aTHX
+#  else
+#    define DECLARATION_FOR_GLOBAL_CONTEXT
+#    define SWITCH_TO_GLOBAL_CONTEXT          NOOP
+#    define RESTORE_CONTEXT                   NOOP
+#    define CUR_CONTEXT                       NULL
+#    define ORIGINAL_CONTEXT                  NULL
+#  endif
  
  STATIC void
  S_delete_recursion_entry(pTHX_ void *key)
@@ -23129,14 +23362,15 @@ Perl_parse_uniprop_string(pTHX_
                                     user-defined property */
      SV * msg,                   /* Any error or warning msg(s) are appended to
                                     this */
-   const STRLEN level)          /* Recursion level of this call */
+    const STRLEN level)         /* Recursion level of this call */
  {
      dVAR;
      char* lookup_name;          /* normalized name for lookup in our tables */
      unsigned lookup_len;        /* Its length */
-    bool stricter = FALSE;      /* Some properties have stricter name
-                                   normalization rules, which we decide upon
-                                   based on parsing */
+    enum { Not_Strict = 0,      /* Some properties have stricter name */
+           Strict,              /* normalization rules, which we decide */
+           As_Is                /* upon based on parsing */
+         } stricter = Not_Strict;
  
      /* nv= or numeric_value=, or possibly one of the cjk numeric properties
       * (though it requires extra effort to download them from Unicode and
@@ -23153,6 +23387,7 @@ Perl_parse_uniprop_string(pTHX_
                                     the normalized name in certain situations */
      Size_t non_pkg_begin = 0;   /* Offset of first byte in 'name' that isn't
                                     part of a package name */
+    Size_t lun_non_pkg_begin = 0;   /* Similarly for 'lookup_name' */
      bool could_be_user_defined = TRUE;  /* ? Could this be a user-defined
                                               property rather than a Unicode
                                               one. */
@@ -23168,6 +23403,12 @@ Perl_parse_uniprop_string(pTHX_
      bool stripped_utf8_pkg = FALSE; /* Set TRUE if the input includes an
                                         explicit utf8:: package that we strip
                                         off  */
+    /* The expansion of properties that could be either user-defined or
+     * official unicode ones is deferred until runtime, including a marker for
+     * those that might be in the latter category.  This boolean indicates if
+     * we've seen that marker.  If not, what we're parsing can't be such an
+     * official Unicode property whose expansion was deferred */
+    bool could_be_deferred_official = FALSE;
  
      PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING;
  
@@ -23227,13 +23468,15 @@ Perl_parse_uniprop_string(pTHX_
          }
  
          /* If this looks like it is a marker we inserted at compile time,
-         * ignore it; otherwise keep it as it would have been user input. */
-        if (     UNLIKELY(cur == DEFERRED_PROP_EXPANSION_MARKERc)
+         * set a flag and otherwise ignore it.  If it isn't in the final
+         * position, keep it as it would have been user input. */
+        if (     UNLIKELY(cur == DEFERRED_COULD_BE_OFFICIAL_MARKERc)
              && ! deferrable
              &&   could_be_user_defined
              &&   i == name_len - 1)
          {
              name_len--;
+            could_be_deferred_official = TRUE;
              continue;
          }
  
@@ -23253,6 +23496,7 @@ Perl_parse_uniprop_string(pTHX_
              i++;
              non_pkg_begin = i + 1;
              lookup_name[j++] = ':';
+            lun_non_pkg_begin = j;
          }
          else { /* Only word chars (and '::') can be in a user-defined name */
              could_be_user_defined = FALSE;
@@ -23260,7 +23504,7 @@ Perl_parse_uniprop_string(pTHX_
      } /* End of parsing through the lhs of the property name (or all of it if
           no rhs) */
  
-#define STRLENs(s)  (sizeof("" s "") - 1)
+#  define STRLENs(s)  (sizeof("" s "") - 1)
  
      /* If there is a single package name 'utf8::', it is ambiguous.  It could
       * be for a user-defined property, or it could be a Unicode property, as
@@ -23277,7 +23521,7 @@ Perl_parse_uniprop_string(pTHX_
       * or are positioned just after the '=' if it is compound. */
  
      if (equals_pos >= 0) {
-        assert(! stricter); /* We shouldn't have set this yet */
+        assert(stricter == Not_Strict); /* We shouldn't have set this yet */
  
          /* Space immediately after the '=' is ignored */
          i++;
@@ -23290,10 +23534,13 @@ Perl_parse_uniprop_string(pTHX_
          /* Most punctuation after the equals indicates a subpattern, like
           * \p{foo=/bar/} */
          if (   isPUNCT_A(name[i])
-            && name[i] != '-'
-            && name[i] != '+'
-            && name[i] != '_'
-            && name[i] != '{')
+            &&  name[i] != '-'
+            &&  name[i] != '+'
+            &&  name[i] != '_'
+            &&  name[i] != '{'
+                /* A backslash means the real delimitter is the next character,
+                 * but it must be punctuation */
+            && (name[i] != '\\' || (i < name_len && isPUNCT_A(name[i+1]))))
          {
              /* Find the property.  The table includes the equals sign, so we
               * use 'j' as-is */
@@ -23301,16 +23548,14 @@ Perl_parse_uniprop_string(pTHX_
              if (table_index) {
                  const char * const * prop_values
                                              = UNI_prop_value_ptrs[table_index];
-                SV * subpattern;
-                Size_t subpattern_len;
                  REGEXP * subpattern_re;
                  char open = name[i++];
                  char close;
                  const char * pos_in_brackets;
                  bool escaped = 0;
  
-                /* A backslash means the real delimitter is the next character.
-                 * */
+                /* Backslash => delimitter is the character following.  We
+                 * already checked that it is punctuation */
                  if (open == '\\') {
                      open = name[i++];
                      escaped = 1;
@@ -23321,12 +23566,16 @@ Perl_parse_uniprop_string(pTHX_
                   * set of closing is so that if the opening is something like
                   * ']', the closing will be that as well.  Something similar is
                   * done in toke.c */
-                pos_in_brackets = strchr("([<)]>)]>", open);
+                pos_in_brackets = memCHRs("([<)]>)]>", open);
                  close = (pos_in_brackets) ? pos_in_brackets[3] : open;
  
                  if (    i >= name_len
                      ||  name[name_len-1] != close
-                    || (escaped && name[name_len-2] != '\\'))
+                    || (escaped && name[name_len-2] != '\\')
+                        /* Also make sure that there are enough characters.
+                         * e.g., '\\\' would show up incorrectly as legal even
+                         * though it is too short */
+                    || (SSize_t) (name_len - i - 1 - escaped) < 0)
                  {
                      sv_catpvs(msg, "Unicode property wildcard not terminated");
                      goto append_name_to_msg;
@@ -23342,14 +23591,10 @@ Perl_parse_uniprop_string(pTHX_
                   * pattern fails to compile, our added text to the user's
                   * pattern will be displayed to the user, which is not so
                   * desirable. */
-                subpattern_len = name_len - i - 1 - escaped;
-                subpattern = Perl_newSVpvf(aTHX_ "(?iaa:%.*s)",
-                                              (unsigned) subpattern_len,
-                                              name + i);
-                subpattern = sv_2mortal(subpattern);
-                subpattern_re = re_compile(subpattern, 0);
-                assert(subpattern_re);  /* Should have died if didn't compile
-                                         successfully */
+                subpattern_re = compile_wildcard(name + i,
+                                                 name_len - i - 1 - escaped,
+                                                 TRUE /* /i */
+                                                );
  
                  /* For each legal property value, see if the supplied pattern
                   * matches it. */
@@ -23358,7 +23603,7 @@ Perl_parse_uniprop_string(pTHX_
                      const Size_t len = strlen(entry);
                      SV* entry_sv = newSVpvn_flags(entry, len, SVs_TEMP);
  
-                    if (pregexec(subpattern_re,
+                    if (execute_wildcard(subpattern_re,
                                   (char *) entry,
                                   (char *) entry + len,
                                   (char *) entry, 0,
@@ -23449,6 +23694,94 @@ Perl_parse_uniprop_string(pTHX_
               * some constructs in their subpattern, like \A. */
          } /* End of is a wildcard subppattern */
  
+        /* \p{name=...} is handled specially.  Instead of using the normal
+         * mechanism involving charclass_invlists.h, it uses _charnames.pm
+         * which has the necessary (huge) data accessible to it, and which
+         * doesn't get loaded unless necessary.  The legal syntax for names is
+         * somewhat different than other properties due both to the vagaries of
+         * a few outlier official names, and the fact that only a few ASCII
+         * characters are permitted in them */
+        if (   memEQs(lookup_name, j - 1, "name")
+            || memEQs(lookup_name, j - 1, "na"))
+        {
+            dSP;
+            HV * table;
+            SV * character;
+            const char * error_msg;
+            CV* lookup_loose;
+            SV * character_name;
+            STRLEN character_len;
+            UV cp;
+
+            stricter = As_Is;
+
+            /* Since the RHS (after skipping initial space) is passed unchanged
+             * to charnames, and there are different criteria for what are
+             * legal characters in the name, just parse it here.  A character
+             * name must begin with an ASCII alphabetic */
+            if (! isALPHA(name[i])) {
+                goto failed;
+            }
+            lookup_name[j++] = name[i];
+
+            for (++i; i < name_len; i++) {
+                /* Official names can only be in the ASCII range, and only
+                 * certain characters */
+                if (! isASCII(name[i]) || ! isCHARNAME_CONT(name[i])) {
+                    goto failed;
+                }
+                lookup_name[j++] = name[i];
+            }
+
+            /* Finished parsing, save the name into an SV */
+            character_name = newSVpvn(lookup_name + equals_pos, j - equals_pos);
+
+            /* Make sure _charnames is loaded.  (The parameters give context
+             * for any errors generated */
+            table = load_charnames(character_name, name, name_len, &error_msg);
+            if (table == NULL) {
+                sv_catpv(msg, error_msg);
+                goto append_name_to_msg;
+            }
+
+            lookup_loose = get_cv("_charnames::_loose_regcomp_lookup", 0);
+            if (! lookup_loose) {
+                Perl_croak(aTHX_
+                       "panic: Can't find '_charnames::_loose_regcomp_lookup");
+            }
+
+            PUSHSTACKi(PERLSI_OVERLOAD);
+            ENTER ;
+            SAVETMPS;
+            save_re_context();
+
+            PUSHMARK(SP) ;
+            XPUSHs(character_name);
+            PUTBACK;
+            call_sv(MUTABLE_SV(lookup_loose), G_SCALAR);
+
+            SPAGAIN ;
+
+            character = POPs;
+            SvREFCNT_inc_simple_void_NN(character);
+
+            PUTBACK ;
+            FREETMPS ;
+            LEAVE ;
+            POPSTACK;
+
+            if (! SvOK(character)) {
+                goto failed;
+            }
+
+            cp = valid_utf8_to_uvchr((U8 *) SvPVX(character), &character_len);
+            if (character_len < SvCUR(character)) {
+                goto failed;
+            }
+
+            prop_definition = add_cp_to_invlist(NULL, cp);
+            return prop_definition;
+        }
  
          /* Certain properties whose values are numeric need special handling.
           * They may optionally be prefixed by 'is'.  Ignore that prefix for the
@@ -23501,12 +23834,12 @@ Perl_parse_uniprop_string(pTHX_
               * But the numeric type properties can have the alphas [Ee] to
               * signify an exponent, and it is still a number with stricter
               * rules.  So look for an alpha that signifies not-strict */
-            stricter = TRUE;
+            stricter = Strict;
              for (k = i; k < name_len; k++) {
                  if (   isALPHA_A(name[k])
                      && (! is_nv_type || ! isALPHA_FOLD_EQ(name[k], 'E')))
                  {
-                    stricter = FALSE;
+                    stricter = Not_Strict;
                      break;
                  }
              }
@@ -23544,7 +23877,7 @@ Perl_parse_uniprop_string(pTHX_
              && memNEs(lookup_name + 4, j - 4, "space")
              && memNEs(lookup_name + 4, j - 4, "word"))
          {
-            stricter = TRUE;
+            stricter = Strict;
  
              /* We set the inputs back to 0 and the code below will reparse,
               * using strict */
@@ -23679,12 +24012,15 @@ Perl_parse_uniprop_string(pTHX_
                  goto definition_deferred;
              }
  
-            /* If we haven't already stripped the package name (if one), do so
-             * now so can look for an official property with the stripped name.
-             * */
-            if (! stripped_utf8_pkg) {
-                lookup_name += non_pkg_begin;
-                j -= non_pkg_begin;
+            /* Here, we are at runtime, and didn't find the user property.  It
+             * could be an official property, but only if no package was
+             * specified, or just the utf8:: package. */
+            if (could_be_deferred_official) {
+                lookup_name += lun_non_pkg_begin;
+                j -= lun_non_pkg_begin;
+            }
+            else if (! stripped_utf8_pkg) {
+                goto unknown_user_defined;
              }
  
              /* Drop down to look up in the official properties */
@@ -23889,6 +24225,7 @@ Perl_parse_uniprop_string(pTHX_
               * but not yet used. */
              save_item(PL_subname);
  
+            /* G_SCALAR guarantees a single return value */
              (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
  
              SPAGAIN;
@@ -23916,7 +24253,7 @@ Perl_parse_uniprop_string(pTHX_
                  (void) POPs;
                  prop_definition = NULL;
              }
-            else {  /* G_SCALAR guarantees a single return value */
+            else {
                  SV * contents = POPs;
  
                  /* The contents is supposed to be the expansion of the property
@@ -24036,9 +24373,7 @@ Perl_parse_uniprop_string(pTHX_
                   * property hasn't been encountered yet, but at runtime, it's
                   * an error to try to use an undefined one */
                  if (! deferrable) {
-                    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                    sv_catpvs(msg, "Unknown user-defined property name");
-                    goto append_name_to_msg;
+                    goto unknown_user_defined;;
                  }
  
                  goto definition_deferred;
@@ -24299,6 +24634,10 @@ Perl_parse_uniprop_string(pTHX_
      }
      return prop_definition;
  
+  unknown_user_defined:
+    if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+    sv_catpvs(msg, "Unknown user-defined property name");
+    goto append_name_to_msg;
  
    failed:
      if (non_pkg_begin != 0) {
@@ -24344,7 +24683,7 @@ Perl_parse_uniprop_string(pTHX_
           * use at such time when we first need the definition during pattern
           * matching execution */
          if (! is_qualified || memBEGINPs(name, non_pkg_begin, "utf8::")) {
-            sv_catpvs(fq_name, DEFERRED_PROP_EXPANSION_MARKERs);
+            sv_catpvs(fq_name, DEFERRED_COULD_BE_OFFICIAL_MARKERs);
          }
  
          /* We also need a trailing newline */
@@ -24355,7 +24694,7 @@ Perl_parse_uniprop_string(pTHX_
      }
  }
  
-#endif
+#endif /* end of ! PERL_IN_XSUB_RE */
  
  /*
   * ex: set ts=8 sts=4 sw=4 et: