This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Rmv obsolete function
[perl5.git] / regcomp.c
index d4332bd..45b1d1f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
  * precedence is structured in regular expressions.  Serious changes in
  * regular-expression syntax might require a total rethink.
  */
+
+/* Note on debug output:
+ *
+ * This is set up so that -Dr turns on debugging like all other flags that are
+ * enabled by -DDEBUGGING.  -Drv gives more verbose output.  This applies to
+ * all regular expressions encountered in a program, and gives a huge amount of
+ * output for all but the shortest programs.
+ *
+ * The ability to output pattern debugging information lexically, and with much
+ * finer grained control was added, with 'use re qw(Debug ....);' available even
+ * in non-DEBUGGING builds.  This is accomplished by copying the contents of
+ * regcomp.c to ext/re/re_comp.c, and regexec.c is copied to ext/re/re_exec.c.
+ * Those files are compiled and linked into the perl executable, and they are
+ * compiled essentially as if DEBUGGING were enabled, and controlled by calls
+ * to re.pm.
+ *
+ * That would normally mean linking errors when two functions of the same name
+ * are attempted to be placed into the same executable.  That is solved in one
+ * of four ways:
+ *  1)  Static functions aren't known outside the file they are in, so for the
+ *      many functions of that type in this file, it just isn't a problem.
+ *  2)  Most externally known functions are enclosed in
+ *          #ifndef PERL_IN_XSUB_RE
+ *          ...
+ *          #endif
+ *      blocks, so there is only one defintion for them in the whole
+ *      executable, the one in regcomp.c (or regexec.c).  The implication of
+ *      that is any debugging info that comes from them is controlled only by
+ *      -Dr.  Further, any static function they call will also be the version
+ *      in regcomp.c (or regexec.c), so its debugging will also be by -Dr.
+ *  3)  About a dozen external functions are re-#defined in ext/re/re_top.h, to
+ *      have different names, so that what gets loaded in the executable is
+ *      'Perl_foo' from regcomp.c (and regexec.c), and the identical function
+ *      from re_comp.c (and re_exec.c), but with the name 'my_foo'  Debugging
+ *      in the 'Perl_foo' versions is controlled by -Dr, but the 'my_foo'
+ *      versions and their callees are under control of re.pm.   The catch is
+ *      that references to all these go through the regexp_engine structure,
+ *      which is initialized in regcomp.h to the Perl_foo versions, and
+ *      substituted out in lexical scopes where 'use re' is in effect to the
+ *      'my_foo' ones.   That structure is public API, so it would be a hard
+ *      sell to add any additional members.
+ *  4)  For functions in regcomp.c and re_comp.c that are called only from,
+ *      respectively, regexec.c and re_exec.c, they can have two different
+ *      names, depending on #ifdef'ing PERL_IN_XSUB_RE, in both regexec.c and
+ *      embed.fnc.
+ *
+ * The bottom line is that if you add code to one of the public functions
+ * listed in ext/re/re_top.h, debugging automagically works.  But if you write
+ * a new function that needs to do debugging or there is a chain of calls from
+ * it that need to do debugging, all functions in the chain should use options
+ * 2) or 4) above.
+ *
+ * A function may have to be split so that debugging stuff is static, but it
+ * calls out to some other function that only gets compiled in regcomp.c to
+ * access data that we don't want to duplicate.
+ */
+
 #include "EXTERN.h"
 #define PERL_IN_REGCOMP_C
 #include "perl.h"
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
 EXTERN_C const struct regexp_engine my_reg_engine;
+EXTERN_C const struct regexp_engine wild_reg_engine;
 #else
 #  include "regcomp.h"
 #endif
 
-#include "dquote_inline.h"
 #include "invlist_inline.h"
 #include "unicode_constants.h"
 
@@ -140,9 +197,16 @@ struct RExC_state_t {
     regnode_offset emit;               /* Code-emit pointer */
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
-    U32                seen;
     SSize_t    size;                   /* Number of regnode equivalents in
                                            pattern */
+    Size_t      sets_depth;              /* Counts recursion depth of already-
+                                           compiled regex set patterns */
+    U32                seen;
+
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
+    regnode_offset *open_parens;       /* offsets to open parens */
+    regnode_offset *close_parens;      /* offsets to close parens */
+    HV         *paren_names;           /* Paren names */
 
     /* position beyond 'precomp' of the warning message furthest away from
      * 'precomp'.  During the parse, no warnings are raised for any problems
@@ -163,9 +227,6 @@ struct RExC_state_t {
     I32                nestroot;               /* root parens we are in - used by
                                            accept */
     I32                seen_zerolen;
-    regnode_offset *open_parens;       /* offsets to open parens */
-    regnode_offset *close_parens;      /* offsets to close parens */
-    I32      parens_buf_size;           /* #slots malloced open/close_parens */
     regnode     *end_op;                /* END node in program */
     I32                utf8;           /* whether the pattern is utf8 or not */
     I32                orig_utf8;      /* whether the pattern was originally in utf8 */
@@ -174,10 +235,9 @@ struct RExC_state_t {
     I32                uni_semantics;  /* If a d charset modifier should use unicode
                                   rules, even if the pattern is not in
                                   utf8 */
-    HV         *paren_names;           /* Paren names */
 
-    regnode    **recurse;              /* Recurse regops */
     I32         recurse_count;          /* Number of recurse regops we have generated */
+    regnode    **recurse;              /* Recurse regops */
     U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                            through */
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
@@ -187,25 +247,21 @@ struct RExC_state_t {
     I32                override_recoding;
     I32         recode_x_to_native;
     I32                in_multi_char_class;
+    int                code_index;             /* next code_blocks[] slot */
     struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                            within pattern */
-    int                code_index;             /* next code_blocks[] slot */
     SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
     scan_frame *frame_head;
     scan_frame *frame_last;
     U32         frame_count;
     AV         *warn_text;
     HV         *unlexed_names;
-#ifdef ADD_TO_REGEXEC
-    char       *starttry;              /* -Dr: where regtry was called. */
-#define RExC_starttry  (pRExC_state->starttry)
-#endif
     SV         *runtime_code_qr;       /* qr with the runtime code blocks */
 #ifdef DEBUGGING
     const char  *lastparse;
     I32         lastnum;
-    AV          *paren_name_list;       /* idx -> name */
     U32         study_chunk_recursed_count;
+    AV          *paren_name_list;       /* idx -> name */
     SV          *mysv1;
     SV          *mysv2;
 
@@ -223,6 +279,8 @@ struct RExC_state_t {
     bool        study_started;
     bool        in_script_run;
     bool        use_BRANCHJ;
+    bool        sWARN_EXPERIMENTAL__VLB;
+    bool        sWARN_EXPERIMENTAL__REGEX_SETS;
 };
 
 #define RExC_flags     (pRExC_state->flags)
@@ -267,6 +325,7 @@ struct RExC_state_t {
 #define RExC_paren_names       (pRExC_state->paren_names)
 #define RExC_recurse   (pRExC_state->recurse)
 #define RExC_recurse_count     (pRExC_state->recurse_count)
+#define RExC_sets_depth         (pRExC_state->sets_depth)
 #define RExC_study_chunk_recursed        (pRExC_state->study_chunk_recursed)
 #define RExC_study_chunk_recursed_bytes  \
                                    (pRExC_state->study_chunk_recursed_bytes)
@@ -291,6 +350,8 @@ struct RExC_state_t {
 #define RExC_warn_text (pRExC_state->warn_text)
 #define RExC_in_script_run      (pRExC_state->in_script_run)
 #define RExC_use_BRANCHJ        (pRExC_state->use_BRANCHJ)
+#define RExC_warned_WARN_EXPERIMENTAL__VLB (pRExC_state->sWARN_EXPERIMENTAL__VLB)
+#define RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS (pRExC_state->sWARN_EXPERIMENTAL__REGEX_SETS)
 #define RExC_unlexed_names (pRExC_state->unlexed_names)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
@@ -343,9 +404,14 @@ struct RExC_state_t {
 
 #define PBYTE(u8str,paren) ((U8*)(u8str))[(paren) >> 3]
 #define PBITVAL(paren) (1 << ((paren) & 7))
-#define PAREN_TEST(u8str,paren) ( PBYTE(u8str,paren) & PBITVAL(paren))
-#define PAREN_SET(u8str,paren) PBYTE(u8str,paren) |= PBITVAL(paren)
-#define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
+#define PAREN_OFFSET(depth) \
+    (RExC_study_chunk_recursed + (depth) * RExC_study_chunk_recursed_bytes)
+#define PAREN_TEST(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) & PBITVAL(paren))
+#define PAREN_SET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) |= PBITVAL(paren))
+#define PAREN_UNSET(depth, paren) \
+    (PBYTE(PAREN_OFFSET(depth), paren) &= ~PBITVAL(paren))
 
 #define REQUIRE_UTF8(flagp) STMT_START {                                   \
                                      if (!UTF) {                           \
@@ -437,6 +503,9 @@ struct RExC_state_t {
 #define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
 #define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
 
+/* What is infinity for optimization purposes */
+#define OPTIMIZE_INFTY  SSize_t_MAX
+
 /* About scan_data_t.
 
   During optimisation we recurse through the regexp program performing
@@ -471,7 +540,7 @@ struct RExC_state_t {
 
   - max_offset
     Only used for floating strings. This is the rightmost point that
-    the string can appear at. If set to SSize_t_MAX it indicates that the
+    the string can appear at. If set to OPTIMIZE_INFTY it indicates that the
     string can occur infinitely far to the right.
     For fixed strings, it is equal to min_offset.
 
@@ -784,7 +853,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts two arguments.
  */
 #define        Simple_vFAIL2(m,a1) STMT_START {                        \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,             \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,               \
                       REPORT_LOCATION_ARGS(RExC_parse));       \
 } STMT_END
 
@@ -801,7 +870,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts three arguments.
  */
 #define        Simple_vFAIL3(m, a1, a2) STMT_START {                   \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,         \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,           \
            REPORT_LOCATION_ARGS(RExC_parse));                  \
 } STMT_END
 
@@ -817,7 +886,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts four arguments.
  */
 #define        Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2, a3,     \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3,       \
            REPORT_LOCATION_ARGS(RExC_parse));                  \
 } STMT_END
 
@@ -829,13 +898,13 @@ static const scan_data_t zero_scan_data = {
 /* A specialized version of vFAIL2 that works with UTF8f */
 #define vFAIL2utf8f(m, a1) STMT_START {             \
     PREPARE_TO_DIE;                                 \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,  \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
             REPORT_LOCATION_ARGS(RExC_parse));      \
 } STMT_END
 
 #define vFAIL3utf8f(m, a1, a2) STMT_START {             \
     PREPARE_TO_DIE;                                     \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
             REPORT_LOCATION_ARGS(RExC_parse));          \
 } STMT_END
 
@@ -883,11 +952,27 @@ static const scan_data_t zero_scan_data = {
     } STMT_END
 
 /* m is not necessarily a "literal string", in this macro */
-#define reg_warn_non_literal_string(loc, m)                             \
-    _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
-                      Perl_warner(aTHX_ packWARN(WARN_REGEXP),          \
+#define warn_non_literal_string(loc, packed_warn, m)                    \
+    _WARN_HELPER(loc, packed_warn,                                      \
+                      Perl_warner(aTHX_ packed_warn,                    \
                                        "%s" REPORT_LOCATION,            \
                                   m, REPORT_LOCATION_ARGS(loc)))
+#define reg_warn_non_literal_string(loc, m)                             \
+                warn_non_literal_string(loc, packWARN(WARN_REGEXP), m)
+
+#define ckWARN2_non_literal_string(loc, packwarn, m, a1)                    \
+    STMT_START {                                                            \
+                char * format;                                              \
+                Size_t format_size = strlen(m) + strlen(REPORT_LOCATION)+ 1;\
+                Newx(format, format_size, char);                            \
+                my_strlcpy(format, m, format_size);                         \
+                my_strlcat(format, REPORT_LOCATION, format_size);           \
+                SAVEFREEPV(format);                                         \
+                _WARN_HELPER(loc, packwarn,                                 \
+                      Perl_ck_warner(aTHX_ packwarn,                        \
+                                        format,                             \
+                                        a1, REPORT_LOCATION_ARGS(loc)));    \
+    } STMT_END
 
 #define        ckWARNreg(loc,m)                                                \
     _WARN_HELPER(loc, packWARN(WARN_REGEXP),                            \
@@ -967,10 +1052,15 @@ static const scan_data_t zero_scan_data = {
                                        REPORT_LOCATION_ARGS(loc)))
 
 #define        ckWARNexperimental(loc, class, m)                               \
-    _WARN_HELPER(loc, packWARN(class),                                  \
+    STMT_START {                                                        \
+        if (! RExC_warned_ ## class) { /* warn once per compilation */  \
+            RExC_warned_ ## class = 1;                                  \
+            _WARN_HELPER(loc, packWARN(class),                          \
                       Perl_ck_warner_d(aTHX_ packWARN(class),           \
                                             m REPORT_LOCATION,          \
-                                            REPORT_LOCATION_ARGS(loc)))
+                                            REPORT_LOCATION_ARGS(loc)));\
+        }                                                               \
+    } STMT_END
 
 /* Convert between a pointer to a node and its offset from the beginning of the
  * program */
@@ -1148,7 +1238,7 @@ static void
 S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                     U32 depth, int is_inf)
 {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_OPTIMISE_MORE_r({
         if (!data)
@@ -1202,7 +1292,7 @@ static void
 S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
                 regnode *scan, U32 depth, U32 flags)
 {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_OPTIMISE_r({
         regnode *Next;
@@ -1379,29 +1469,6 @@ S_edit_distance(const UV* src,
 /* END of edit_distance() stuff
  * ========================================================= */
 
-/* is c a control character for which we have a mnemonic? */
-#define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-
-STATIC const char *
-S_cntrl_to_mnemonic(const U8 c)
-{
-    /* Returns the mnemonic string that represents character 'c', if one
-     * exists; NULL otherwise.  The only ones that exist for the purposes of
-     * this routine are a few control characters */
-
-    switch (c) {
-        case '\a':       return "\\a";
-        case '\b':       return "\\b";
-        case ESC_NATIVE: return "\\e";
-        case '\f':       return "\\f";
-        case '\n':       return "\\n";
-        case '\r':       return "\\r";
-        case '\t':       return "\\t";
-    }
-
-    return NULL;
-}
-
 /* Mark that we cannot extend a found fixed substring at this point.
    Update the longest found anchored substring or the longest found
    floating substrings if needed. */
@@ -1413,7 +1480,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
     const STRLEN l = CHR_SVLEN(data->last_found);
     SV * const longest_sv = data->substrs[data->cur_is_floating].str;
     const STRLEN old_l = CHR_SVLEN(longest_sv);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_SCAN_COMMIT;
 
@@ -1425,14 +1492,14 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
        if (!i) /* fixed */
            data->substrs[0].max_offset = data->substrs[0].min_offset;
        else { /* float */
-           data->substrs[1].max_offset = (l
+           data->substrs[1].max_offset =
+                      (is_inf)
+                       ? OPTIMIZE_INFTY
+                       : (l
                           ? data->last_start_max
-                          : (data->pos_delta > SSize_t_MAX - data->pos_min
-                                        ? SSize_t_MAX
+                          : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
+                                        ? OPTIMIZE_INFTY
                                         : data->pos_min + data->pos_delta));
-           if (is_inf
-                || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
-               data->substrs[1].max_offset = SSize_t_MAX;
         }
 
         if (data->flags & SF_BEFORE_EOL)
@@ -2206,7 +2273,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
     U16 word;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE;
 
@@ -2300,7 +2367,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
     U32 state;
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
 
@@ -2361,7 +2428,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
     U16 charid;
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
 
@@ -2706,7 +2773,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
     STRLEN trie_charcount=0;
 #endif
     SV *re_trie_maxbuff;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_MAKE_TRIE;
 #ifndef DEBUGGING
@@ -2797,7 +2864,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
         if (OP(noper) == NOTHING) {
             /* skip past a NOTHING at the start of an alternation
              * eg, /(?:)a|(?:b)/ should be the same as /a|b/
+             *
+             * If the next node is not something we are supposed to process
+             * we will just ignore it due to the condition guarding the
+             * next block.
              */
+
             regnode *noper_next= regnext(noper);
             if (noper_next < tail)
                 noper= noper_next;
@@ -3019,6 +3091,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 regnode *noper_next= regnext(noper);
                 if (noper_next < tail)
                     noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
             }
 
             if (    noper < tail
@@ -3080,7 +3155,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                         Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
                    }
                }
-           }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD()
+                */
+                noper= NEXTOPER(cur);
+            }
             TRIE_HANDLE_WORD(state);
 
         } /* end second pass */
@@ -3244,6 +3325,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                 regnode *noper_next= regnext(noper);
                 if (noper_next < tail)
                     noper= noper_next;
+                /* we will undo this assignment if noper does not
+                 * point at a trieable type in the else clause of
+                 * the following statement. */
             }
 
             if (    noper < tail
@@ -3284,6 +3368,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                     /* charid is now 0 if we dont know the char read, or
                      * nonzero if we do */
                 }
+            } else {
+                /* If we end up here it is because we skipped past a NOTHING, but did not end up
+                 * on a trieable type. So we need to reset noper back to point at the first regop
+                 * in the branch before we call TRIE_HANDLE_WORD().
+                */
+                noper= NEXTOPER(cur);
             }
             accept_state = TRIE_NODENUM( state );
             TRIE_HANDLE_WORD(accept_state);
@@ -3566,7 +3656,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                         str=STRING(convert);
                         setSTR_LEN(convert, 0);
                     }
-                    setSTR_LEN(convert, STR_LEN(convert) + len);
+                    assert( ( STR_LEN(convert) + len ) < 256 );
+                    setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
                     while (len--)
                         *str++ = *ch++;
                } else {
@@ -3580,7 +3671,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
            trie->prefixlen = (state-1);
             if (str) {
                 regnode *n = convert+NODE_SZ_STR(convert);
-                NEXT_OFF(convert) = NODE_SZ_STR(convert);
+                assert( NODE_SZ_STR(convert) <= U16_MAX );
+                NEXT_OFF(convert) = (U16)(NODE_SZ_STR(convert));
                 trie->startstate = state;
                 trie->minlen -= (state - 1);
                 trie->maxlen -= (state - 1);
@@ -3771,7 +3863,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
     reg_ac_data *aho;
     const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
     regnode *stclass;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
     PERL_UNUSED_CONTEXT;
@@ -4006,11 +4098,6 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *      using /iaa matching will be doing so almost entirely with ASCII
  *      strings, so this should rarely be encountered in practice */
 
-#define JOIN_EXACT(scan,min_subtract,unfolded_multi_char, flags)    \
-    if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT         \
-                                      && OP(scan) != LEXACT_REQ8)  \
-        join_exact(pRExC_state,(scan),(min_subtract),unfolded_multi_char, (flags), NULL, depth+1)
-
 STATIC U32
 S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                    UV *min_subtract, bool *unfolded_multi_char,
@@ -4025,7 +4112,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
     U32 stopnow = 0;
 #ifdef DEBUGGING
     regnode *stop = scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 #else
     PERL_UNUSED_ARG(depth);
 #endif
@@ -4174,7 +4261,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
             merged++;
 
             NEXT_OFF(scan) += NEXT_OFF(n);
-            setSTR_LEN(scan, STR_LEN(scan) + STR_LEN(n));
+            assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
+            setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
             next = n + NODE_SZ_STR(n);
             /* Now we can overwrite *n : */
             Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
@@ -4399,23 +4487,6 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
            }
 #endif
        }
-
-        if (     STR_LEN(scan) == 1
-            &&   isALPHA_A(* STRING(scan))
-            &&  (         OP(scan) == EXACTFAA
-                 || (     OP(scan) == EXACTFU
-                     && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(scan)))))
-        {
-            U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
-
-            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
-             * with the mask set to the complement of the bit that differs
-             * between upper and lower case, and the lowest code point of the
-             * pair (which the '&' forces) */
-            OP(scan) = ANYOFM;
-            ARG_SET(scan, *STRING(scan) & mask);
-            FLAGS(scan) = mask;
-        }
     }
 
 #ifdef DEBUGGING
@@ -4475,6 +4546,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
 {
     dVAR;
+    SSize_t final_minlen;
     /* There must be at least this number of characters to match */
     SSize_t min = 0;
     I32 pars = 0, code;
@@ -4486,9 +4558,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     scan_data_t data_fake;
     SV *re_trie_maxbuff = NULL;
     regnode *first_non_open = scan;
-    SSize_t stopmin = SSize_t_MAX;
+    SSize_t stopmin = OPTIMIZE_INFTY;
     scan_frame *frame = NULL;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_STUDY_CHUNK;
     RExC_study_started= 1;
@@ -4518,15 +4590,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             U32 j;
             for ( j = 0 ; j < recursed_depth ; j++ ) {
                 for ( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
-                    if (
-                        PAREN_TEST(RExC_study_chunk_recursed +
-                                   ( j * RExC_study_chunk_recursed_bytes), i )
-                        && (
-                            !j ||
-                            !PAREN_TEST(RExC_study_chunk_recursed +
-                                   (( j - 1 ) * RExC_study_chunk_recursed_bytes), i)
-                        )
-                    ) {
+                    if (PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
                         Perl_re_printf( aTHX_ " %d",(int)i);
                         break;
                     }
@@ -4554,7 +4618,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          * parsing code, as each (?:..) is handled by a different invocation of
          * reg() -- Yves
          */
-        JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
+        if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT
+                                          && OP(scan) != LEXACT_REQ8)
+            join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
+                    0, NULL, depth + 1);
 
         /* Follow the next-chain of the current node and optimize
            away all the NOTHINGs from it.  */
@@ -4625,7 +4692,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 /* NOTE - There is similar code to this block below for
                  * handling TRIE nodes on a re-study.  If you change stuff here
                  * check there too. */
-               SSize_t max1 = 0, min1 = SSize_t_MAX, num = 0;
+               SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
                regnode_ssc accum;
                regnode * const startbranch=scan;
 
@@ -4676,9 +4743,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                    if (min1 > minnext)
                        min1 = minnext;
-                   if (deltanext == SSize_t_MAX) {
+                   if (deltanext == OPTIMIZE_INFTY) {
                        is_inf = is_inf_internal = 1;
-                       max1 = SSize_t_MAX;
+                       max1 = OPTIMIZE_INFTY;
                    } else if (max1 < minnext + deltanext)
                        max1 = minnext + deltanext;
                    scan = next;
@@ -4703,17 +4770,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    min1 = 0;
                if (flags & SCF_DO_SUBSTR) {
                    data->pos_min += min1;
-                   if (data->pos_delta >= SSize_t_MAX - (max1 - min1))
-                       data->pos_delta = SSize_t_MAX;
+                   if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
+                       data->pos_delta = OPTIMIZE_INFTY;
                    else
                        data->pos_delta += max1 - min1;
                    if (max1 != min1 || is_inf)
                        data->cur_is_floating = 1;
                }
                min += min1;
-               if (delta == SSize_t_MAX
-                || SSize_t_MAX - delta - (max1 - min1) < 0)
-                   delta = SSize_t_MAX;
+               if (delta == OPTIMIZE_INFTY
+                || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
+                   delta = OPTIMIZE_INFTY;
                else
                    delta += max1 - min1;
                if (flags & SCF_DO_STCLASS_OR) {
@@ -5128,8 +5195,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                 if (
                     !recursed_depth
-                    ||
-                    !PAREN_TEST(RExC_study_chunk_recursed + ((recursed_depth-1) * RExC_study_chunk_recursed_bytes), paren)
+                    || !PAREN_TEST(recursed_depth - 1, paren)
                 ) {
                     /* it is quite possible that there are more efficient ways
                      * to do this. We maintain a bitmap per level of recursion
@@ -5144,13 +5210,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (!recursed_depth) {
                         Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
                     } else {
-                        Copy(RExC_study_chunk_recursed + ((recursed_depth-1) * RExC_study_chunk_recursed_bytes),
-                             RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes),
+                        Copy(PAREN_OFFSET(recursed_depth - 1),
+                             PAREN_OFFSET(recursed_depth),
                              RExC_study_chunk_recursed_bytes, U8);
                     }
                     /* we havent recursed into this paren yet, so recurse into it */
                     DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
-                    PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
+                    PAREN_SET(recursed_depth, paren);
                     my_recursed_depth= recursed_depth + 1;
                 } else {
                     DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
@@ -5215,26 +5281,27 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  || OP(scan) == LEXACT_REQ8
                  || OP(scan) == EXACTL)
         {
-           SSize_t l = STR_LEN(scan);
+           SSize_t bytelen = STR_LEN(scan), charlen;
            UV uc;
-            assert(l);
+            assert(bytelen);
            if (UTF) {
                const U8 * const s = (U8*)STRING(scan);
-               uc = utf8_to_uvchr_buf(s, s + l, NULL);
-               l = utf8_length(s, s + l);
+               uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+               charlen = utf8_length(s, s + bytelen);
            } else {
                uc = *((U8*)STRING(scan));
+                charlen = bytelen;
            }
-           min += l;
+           min += charlen;
            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
                /* The code below prefers earlier match for fixed
                   offset, later match for variable offset.  */
                if (data->last_end == -1) { /* Update the start info. */
                    data->last_start_min = data->pos_min;
                    data->last_start_max = is_inf
-                       ? SSize_t_MAX : data->pos_min + data->pos_delta;
+                        ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
                }
-               sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+               sv_catpvn(data->last_found, STRING(scan), bytelen);
                if (UTF)
                    SvUTF8_on(data->last_found);
                {
@@ -5242,11 +5309,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
                    if (mg && mg->mg_len >= 0)
-                       mg->mg_len += utf8_length((U8*)STRING(scan),
-                                              (U8*)STRING(scan)+STR_LEN(scan));
+                       mg->mg_len += charlen;
                }
-               data->last_end = data->pos_min + l;
-               data->pos_min += l; /* As in the first entry. */
+               data->last_end = data->pos_min + charlen;
+               data->pos_min += charlen; /* As in the first entry. */
                data->flags &= ~SF_BEFORE_EOL;
            }
 
@@ -5268,25 +5334,42 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
        }
         else if (PL_regkind[OP(scan)] == EXACT) {
             /* But OP != EXACT!, so is EXACTFish */
-           SSize_t l = STR_LEN(scan);
+           SSize_t bytelen = STR_LEN(scan), charlen;
             const U8 * s = (U8*)STRING(scan);
 
+            /* Replace a length 1 ASCII fold pair node with an ANYOFM node,
+             * with the mask set to the complement of the bit that differs
+             * between upper and lower case, and the lowest code point of the
+             * pair (which the '&' forces) */
+            if (     bytelen == 1
+                &&   isALPHA_A(*s)
+                &&  (         OP(scan) == EXACTFAA
+                     || (     OP(scan) == EXACTFU
+                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s))))
+            {
+                U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
+
+                OP(scan) = ANYOFM;
+                ARG_SET(scan, *s & mask);
+                FLAGS(scan) = mask;
+                /* we're not EXACTFish any more, so restudy */
+                continue;
+            }
+
            /* Search for fixed substrings supports EXACT only. */
            if (flags & SCF_DO_SUBSTR) {
                assert(data);
                 scan_commit(pRExC_state, data, minlenp, is_inf);
            }
-           if (UTF) {
-               l = utf8_length(s, s + l);
-           }
+            charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
            if (unfolded_multi_char) {
                 RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
            }
-           min += l - min_subtract;
+           min += charlen - min_subtract;
             assert (min >= 0);
             delta += min_subtract;
            if (flags & SCF_DO_SUBSTR) {
-               data->pos_min += l - min_subtract;
+               data->pos_min += charlen - min_subtract;
                if (data->pos_min < 0) {
                     data->pos_min = 0;
                 }
@@ -5499,11 +5582,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
 
                min += minnext * mincount;
-               is_inf_internal |= deltanext == SSize_t_MAX
+               is_inf_internal |= deltanext == OPTIMIZE_INFTY
                          || (maxcount == REG_INFTY && minnext + deltanext > 0);
                is_inf |= is_inf_internal;
                 if (is_inf) {
-                   delta = SSize_t_MAX;
+                   delta = OPTIMIZE_INFTY;
                 } else {
                    delta += (minnext + deltanext) * maxcount
                              - minnext * mincount;
@@ -5699,7 +5782,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                            data->last_start_min += minnext * (mincount - 1);
                            data->last_start_max =
                               is_inf
-                               ? SSize_t_MAX
+                               ? OPTIMIZE_INFTY
                               : data->last_start_max +
                                  (maxcount - 1) * (minnext + data->pos_delta);
                        }
@@ -5708,18 +5791,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    data->pos_min += minnext * (mincount - counted);
 #if 0
 Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
-                              " SSize_t_MAX=%" UVuf " minnext=%" UVuf
+                              " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
                               " maxcount=%" UVuf " mincount=%" UVuf "\n",
-    (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
+    (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext, (UV)maxcount,
     (UV)mincount);
-if (deltanext != SSize_t_MAX)
+if (deltanext != OPTIMIZE_INFTY)
 Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
     (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
-          - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
+          - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
 #endif
-                   if (deltanext == SSize_t_MAX
-                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= SSize_t_MAX - data->pos_delta)
-                       data->pos_delta = SSize_t_MAX;
+                   if (deltanext == OPTIMIZE_INFTY
+                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
+                       data->pos_delta = OPTIMIZE_INFTY;
                    else
                        data->pos_delta += - counted * deltanext +
                        (minnext + deltanext) * maxcount - minnext * mincount;
@@ -5738,7 +5821,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                            data->last_end = data->pos_min;
                            data->last_start_min = data->pos_min - last_chrs;
                            data->last_start_max = is_inf
-                               ? SSize_t_MAX
+                               ? OPTIMIZE_INFTY
                                : data->pos_min + data->pos_delta - last_chrs;
                        }
                        data->cur_is_floating = 1; /* float */
@@ -5756,10 +5839,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                continue;
 
            default:
-#ifdef DEBUGGING
                 Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
                                                                     OP(scan));
-#endif
             case REF:
             case CLUMP:
                if (flags & SCF_DO_SUBSTR) {
@@ -5805,13 +5886,13 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                flags &= ~SCF_DO_STCLASS;
             }
            min++;
-            if (delta != SSize_t_MAX)
+            if (delta != OPTIMIZE_INFTY)
                 delta++;    /* Because of the 2 char string cr-lf */
             if (flags & SCF_DO_SUBSTR) {
                 /* Cannot expect anything... */
                 scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
-                if (data->pos_delta != SSize_t_MAX) {
+                if (data->pos_delta != OPTIMIZE_INFTY) {
                     data->pos_delta += 1;
                 }
                data->cur_is_floating = 1; /* float */
@@ -6274,7 +6355,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
             regnode *trie_node= scan;
             regnode *tail= regnext(scan);
             reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            SSize_t max1 = 0, min1 = SSize_t_MAX;
+            SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
             regnode_ssc accum;
 
             if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
@@ -6329,9 +6410,9 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
 
                     if (min1 > (SSize_t)(minnext + trie->minlen))
                         min1 = minnext + trie->minlen;
-                    if (deltanext == SSize_t_MAX) {
+                    if (deltanext == OPTIMIZE_INFTY) {
                         is_inf = is_inf_internal = 1;
-                        max1 = SSize_t_MAX;
+                        max1 = OPTIMIZE_INFTY;
                     } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
                         max1 = minnext + deltanext + trie->maxlen;
 
@@ -6360,11 +6441,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     data->cur_is_floating = 1; /* float */
             }
             min += min1;
-            if (delta != SSize_t_MAX) {
-                if (SSize_t_MAX - (max1 - min1) >= delta)
+            if (delta != OPTIMIZE_INFTY) {
+                if (OPTIMIZE_INFTY - (max1 - min1) >= delta)
                     delta += max1 - min1;
                 else
-                    delta = SSize_t_MAX;
+                    delta = OPTIMIZE_INFTY;
             }
             if (flags & SCF_DO_STCLASS_OR) {
                 ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
@@ -6410,6 +6491,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
            if (trie->jump) /* no more substrings -- for now /grr*/
                flags &= ~SCF_DO_SUBSTR;
        }
+        else if (OP(scan) == REGEX_SET) {
+            Perl_croak(aTHX_ "panic: %s regnode should be resolved"
+                             " before optimization", reg_name[REGEX_SET]);
+        }
+
 #endif /* old or new */
 #endif /* TRIE_STUDY_OPT */
 
@@ -6440,10 +6526,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
     DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
 
     *scanp = scan;
-    *deltap = is_inf_internal ? SSize_t_MAX : delta;
+    *deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
 
     if (flags & SCF_DO_SUBSTR && is_inf)
-       data->pos_delta = SSize_t_MAX - data->pos_min;
+       data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
     if (is_par > (I32)U8_MAX)
        is_par = 0;
     if (is_par && pars==1 && data) {
@@ -6461,18 +6547,16 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
 
     DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
 
-    {
-        SSize_t final_minlen= min < stopmin ? min : stopmin;
+    final_minlen = min < stopmin
+            ? min : stopmin;
 
-        if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-            if (final_minlen > SSize_t_MAX - delta)
-                RExC_maxlen = SSize_t_MAX;
-            else if (RExC_maxlen < final_minlen + delta)
-                RExC_maxlen = final_minlen + delta;
-        }
-        return final_minlen;
+    if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
+        if (final_minlen > OPTIMIZE_INFTY - delta)
+            RExC_maxlen = OPTIMIZE_INFTY;
+        else if (RExC_maxlen < final_minlen + delta)
+            RExC_maxlen = final_minlen + delta;
     }
-    NOT_REACHED; /* NOTREACHED */
+    return final_minlen;
 }
 
 STATIC U32
@@ -6580,7 +6664,7 @@ REGEXP *
 Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 {
     regexp_engine const *eng = current_re_engine();
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_PREGCOMP;
 
@@ -6602,7 +6686,9 @@ REGEXP *
 Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 {
     SV *pat = pattern; /* defeat constness! */
+
     PERL_ARGS_ASSERT_RE_COMPILE;
+
     return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
 #ifdef PERL_IN_XSUB_RE
                                 &my_reg_engine,
@@ -6612,7 +6698,6 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
                                 NULL, NULL, rx_flags, 0);
 }
 
-
 static void
 S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
 {
@@ -6664,7 +6749,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
     int n=0;
     STRLEN s = 0;
     bool do_end = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_PARSE_r(Perl_re_printf( aTHX_
         "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
@@ -7004,7 +7089,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 {
     SV *qr;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     if (pRExC_state->runtime_code_qr) {
        /* this is the second time we've been called; this should
@@ -7365,7 +7450,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
  *
  * pm_flags contains the PMf_* flags, typically based on those from the
  * pm_flags field of the related PMOP. Currently we're only interested in
- * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL, PMf_WILDCARD.
  *
  * For many years this code had an initial sizing pass that calculated
  * (sometimes incorrectly, leading to security holes) the size needed for the
@@ -7424,7 +7509,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     int restudied = 0;
     RExC_state_t copyRExC_state;
 #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_RE_OP_COMPILE;
 
@@ -7550,6 +7635,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_frame_count= 0;
     RExC_latest_warn_offset = 0;
     RExC_use_BRANCHJ = 0;
+    RExC_warned_WARN_EXPERIMENTAL__VLB = 0;
+    RExC_warned_WARN_EXPERIMENTAL__REGEX_SETS = 0;
     RExC_total_parens = 0;
     RExC_open_parens = NULL;
     RExC_close_parens = NULL;
@@ -7662,6 +7749,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_study_chunk_recursed = NULL;
     RExC_study_chunk_recursed_bytes= 0;
     RExC_recurse_count = 0;
+    RExC_sets_depth = 0;
     pRExC_state->code_index = 0;
 
     /* Initialize the string in the compiled pattern.  This is so that there is
@@ -7846,7 +7934,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     DEBUG_OFFSETS_r(if (RExC_offsets) {
         const STRLEN len = RExC_offsets[0];
         STRLEN i;
-        GET_RE_DEBUG_FLAGS_DECL;
+        DECLARE_AND_GET_RE_DEBUG_FLAGS;
         Perl_re_printf( aTHX_
                       "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
         for (i = 1; i <= len; i++) {
@@ -8143,7 +8231,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
                 RExC_rx->substrs->data[i].max_offset = data.substrs[i].max_offset;
                 /* Don't offset infinity */
-                if (data.substrs[i].max_offset < SSize_t_MAX)
+                if (data.substrs[i].max_offset < OPTIMIZE_INFTY)
                     RExC_rx->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
                 SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
             }
@@ -8542,7 +8630,7 @@ SV*
 Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
 {
     struct regexp *const rx = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
 
@@ -8829,7 +8917,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
 
         i = t1 - s1;
         if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
-                       i = el;
+            i = el;
     }
     return i;
 }
@@ -8933,7 +9021,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
         Perl_re_printf( aTHX_ "%16s","");                       \
                                                                 \
     if (RExC_lastnum!=RExC_emit)                                \
-       Perl_re_printf( aTHX_ "|%4d", RExC_emit);                \
+       Perl_re_printf( aTHX_ "|%4zu", RExC_emit);                \
     else                                                        \
        Perl_re_printf( aTHX_ "|%4s","");                        \
     Perl_re_printf( aTHX_ "|%*s%-4s",                           \
@@ -10622,7 +10710,7 @@ S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
         }
         else {  /* Single char fold */
             unsigned int k;
-            unsigned int first_fold;
+            U32 first_fold;
             const U32 * remaining_folds;
             Size_t folds_count;
 
@@ -10728,6 +10816,24 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
         /* && memCHRs("iogcmsx", *RExC_parse) */
         /* (?g), (?gc) and (?o) are useless here
            and must be globally applied -- japhy */
+        if ((RExC_pm_flags & PMf_WILDCARD)) {
+            if (flagsp == & negflags) {
+                if (*RExC_parse == 'm') {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL("Use of modifier '-m' is not allowed in Unicode"
+                          " property wildcard subpatterns");
+                }
+            }
+            else {
+                if (*RExC_parse == 's') {
+                    goto modifier_illegal_in_wildcard;
+                }
+            }
+        }
+
         switch (*RExC_parse) {
 
             /* Code for the imsxn flags */
@@ -10807,8 +10913,12 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
                                     *(RExC_parse - 1));
                 NOT_REACHED; /*NOTREACHED*/
-            case ONCE_PAT_MOD: /* 'o' */
             case GLOBAL_PAT_MOD: /* 'g' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
+                /*FALLTHROUGH*/
+            case ONCE_PAT_MOD: /* 'o' */
                 if (ckWARN(WARN_REGEXP)) {
                     const I32 wflagbit = *RExC_parse == 'o'
                                          ? WASTED_O
@@ -10829,6 +10939,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 break;
 
             case CONTINUE_PAT_MOD: /* 'c' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                 if (ckWARN(WARN_REGEXP)) {
                     if (! (wastedflags & WASTED_C) ) {
                         wastedflags |= WASTED_GC;
@@ -10843,6 +10956,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 }
                 break;
             case KEEPCOPY_PAT_MOD: /* 'p' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                 if (flagsp == &negflags) {
                     ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                 } else {
@@ -10863,6 +10979,18 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
             case ':':
             case ')':
 
+                if (  (RExC_pm_flags & PMf_WILDCARD)
+                    && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
+                {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL2("Use of modifier '%c' is not allowed in Unicode"
+                           " property wildcard subpatterns",
+                           has_charset_modifier);
+                }
+
                 if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
                     negflags |= RXf_PMf_EXTENDED_MORE;
                 }
@@ -10888,6 +11016,13 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
     }
 
     vFAIL("Sequence (?... not terminated");
+
+  modifier_illegal_in_wildcard:
+    RExC_parse++;
+    /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+       subpatterns in regex; marked by <-- HERE in m/%s/ */
+    vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
+           " subpatterns", *(RExC_parse - 1));
 }
 
 /*
@@ -10917,7 +11052,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     char* name_start = RExC_parse;
     U32 num = 0;
     SV *sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
 
@@ -10985,7 +11120,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     char * parse_start = RExC_parse; /* MJD */
     char * const oregcomp_parse = RExC_parse;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REG;
     DEBUG_PARSE("reg ");
@@ -11310,7 +11445,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             }
             if ( arg_required && !start_arg ) {
                 vFAIL3("Verb pattern '%.*s' has a mandatory argument",
-                    verb_len, start_verb);
+                    (int) verb_len, start_verb);
             }
             if (internal_argval == -1) {
                 ret = reganode(pRExC_state, op, 0);
@@ -11376,7 +11511,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                vFAIL3("Sequence (%.*s...) not recognized",
-                                RExC_parse-seqstart, seqstart);
+                                (int) (RExC_parse - seqstart), seqstart);
                NOT_REACHED; /*NOTREACHED*/
             case '<':           /* (?<...) */
                 /* If you want to support (?<*...), first reconcile with GH #17363 */
@@ -11397,7 +11532,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         || *RExC_parse != paren)
                     {
                        vFAIL2("Sequence (?%c... not terminated",
-                           paren=='>' ? '<' : paren);
+                           paren=='>' ? '<' : (char) paren);
                     }
                    {
                        HE *he_str;
@@ -12025,7 +12160,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             if (RExC_open_parens && !RExC_open_parens[parno])
             {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                    "%*s%*s Setting open paren #%" IVdf " to %d\n",
+                    "%*s%*s Setting open paren #%" IVdf " to %zu\n",
                     22, "|    |", (int)(depth * 2 + 1), "",
                     (IV)parno, ret));
                 RExC_open_parens[parno]= ret;
@@ -12120,7 +12255,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            ender = reganode(pRExC_state, CLOSE, parno);
             if ( RExC_close_parens ) {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #%" IVdf " to %d\n",
+                        "%*s%*s Setting close paren #%" IVdf " to %zu\n",
                         22, "|    |", (int)(depth * 2 + 1), "",
                         (IV)parno, ender));
                 RExC_close_parens[parno]= ender;
@@ -12154,7 +12289,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RExC_end_op = REGNODE_p(ender);
             if (RExC_close_parens) {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                    "%*s%*s Setting close paren #0 (END) to %d\n",
+                    "%*s%*s Setting close paren #0 (END) to %zu\n",
                     22, "|    |", (int)(depth * 2 + 1), "",
                     ender));
 
@@ -12162,7 +12297,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             }
            break;
        }
-        DEBUG_PARSE_r(
+        DEBUG_PARSE_r({
             DEBUG_PARSE_MSG("lsbr");
             regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
             regprop(RExC_rx, RExC_mysv2, REGNODE_p(ender), NULL, pRExC_state);
@@ -12173,7 +12308,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                           (IV)ender,
                           (IV)(ender - lastbr)
             );
-        );
+        });
         if (! REGTAIL(pRExC_state, lastbr, ender)) {
             REQUIRE_BRANCHJ(flagp, 0);
         }
@@ -12214,7 +12349,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 br= PL_regkind[OP(ret_as_regnode)] != BRANCH
                                ? regnext(ret_as_regnode)
                                : ret_as_regnode;
-                DEBUG_PARSE_r(
+                DEBUG_PARSE_r({
                     DEBUG_PARSE_MSG("NADA");
                     regprop(RExC_rx, RExC_mysv1, ret_as_regnode,
                                      NULL, pRExC_state);
@@ -12227,7 +12362,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                                   (IV)ender,
                                   (IV)(ender - ret)
                     );
-                );
+                });
                 OP(br)= NOTHING;
                 if (OP(REGNODE_p(ender)) == TAIL) {
                     NEXT_OFF(br)= 0;
@@ -12321,7 +12456,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
     regnode_offset chain = 0;
     regnode_offset latest;
     I32 flags = 0, c = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGBRANCH;
 
@@ -12418,7 +12553,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Save the original in case we change the emitted regop to a FAIL. */
     const regnode_offset orig_emit = RExC_emit;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGPIECE;
 
@@ -12496,6 +12631,23 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          do_curly:
            if ((flags&SIMPLE)) {
                 if (min == 0 && max == REG_INFTY) {
+
+                    /* Going from 0..inf is currently forbidden in wildcard
+                     * subpatterns.  The only reason is to make it harder to
+                     * write patterns that take a long long time to halt, and
+                     * because the use of this construct isn't necessary in
+                     * matching Unicode property values */
+                    if (RExC_pm_flags & PMf_WILDCARD) {
+                        RExC_parse++;
+                        /* diag_listed_as: Use of %s is not allowed in Unicode
+                           property wildcard subpatterns in regex; marked by
+                           <-- HERE in m/%s/ */
+                        vFAIL("Use of quantifier '*' is not allowed in"
+                              " Unicode property wildcard subpatterns");
+                        /* Note, don't need to worry about {0,}, as a '}' isn't
+                         * legal at all in wildcards, so wouldn't get this far
+                         * */
+                    }
                     reginsert(pRExC_state, STAR, ret, depth+1);
                     MARK_NAUGHTY(4);
                     RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
@@ -12598,12 +12750,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     }
   nest_check:
     if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
-       ckWARN2reg(RExC_parse,
-                  "%" UTF8f " matches null string many times",
-                  UTF8fARG(UTF, (RExC_parse >= origparse
-                                 ? RExC_parse - origparse
-                                 : 0),
-                  origparse));
+        if (origparse[0] == '\\' && origparse[1] == 'K') {
+            vFAIL2utf8f(
+                       "%" UTF8f " is forbidden - matches null string many times",
+                       UTF8fARG(UTF, (RExC_parse >= origparse
+                                     ? RExC_parse - origparse
+                                     : 0),
+                       origparse));
+            /* NOT-REACHED */
+        } else {
+            ckWARN2reg(RExC_parse,
+                       "%" UTF8f " matches null string many times",
+                       UTF8fARG(UTF, (RExC_parse >= origparse
+                                     ? RExC_parse - origparse
+                                     : 0),
+                       origparse));
+        }
     }
 
     if (*RExC_parse == '?') {
@@ -12732,12 +12894,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     char *save_start;
     I32 flags;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_N;
 
-    GET_RE_DEBUG_FLAGS;
-
     assert(cBOOL(node_p) ^ cBOOL(code_point_p));  /* Exactly one should be set */
     assert(! (node_p && cp_count));               /* At most 1 should be set */
 
@@ -12936,48 +13096,30 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          * thing. */
 
         do {    /* Loop until the ending brace */
-            UV cp = 0;
-            char * start_digit;     /* The first of the current code point */
-            if (! isXDIGIT(*RExC_parse)) {
+            I32 flags = PERL_SCAN_SILENT_OVERFLOW
+                      | PERL_SCAN_SILENT_ILLDIGIT
+                      | PERL_SCAN_NOTIFY_ILLDIGIT
+                      | PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES
+                      | PERL_SCAN_DISALLOW_PREFIX;
+            STRLEN len = endbrace - RExC_parse;
+            NV overflow_value;
+            char * start_digit = RExC_parse;
+            UV cp = grok_hex(RExC_parse, &len, &flags, &overflow_value);
+
+            if (len == 0) {
                 RExC_parse++;
+              bad_NU:
                 vFAIL("Invalid hexadecimal number in \\N{U+...}");
             }
 
-            start_digit = RExC_parse;
-            count++;
-
-            /* Loop through the hex digits of the current code point */
-            do {
-                /* Adding this digit will shift the result 4 bits.  If that
-                 * result would be above the legal max, it's overflow */
-                if (cp > MAX_LEGAL_CP >> 4) {
-
-                    /* Find the end of the code point */
-                    do {
-                        RExC_parse ++;
-                    } while (isXDIGIT(*RExC_parse) || *RExC_parse == '_');
+            RExC_parse += len;
 
-                    /* Be sure to synchronize this message with the similar one
-                     * in utf8.c */
-                    vFAIL4("Use of code point 0x%.*s is not allowed; the"
-                        " permissible max is 0x%" UVxf,
-                        (int) (RExC_parse - start_digit), start_digit,
-                        MAX_LEGAL_CP);
-                }
-
-                /* Accumulate this (valid) digit into the running total */
-                cp  = (cp << 4) + READ_XDIGIT(RExC_parse);
-
-                /* READ_XDIGIT advanced the input pointer.  Ignore a single
-                 * underscore separator */
-                if (*RExC_parse == '_' && isXDIGIT(RExC_parse[1])) {
-                    RExC_parse++;
-                }
-            } while (isXDIGIT(*RExC_parse));
+            if (cp > MAX_LEGAL_CP) {
+                vFAIL(form_cp_too_large_msg(16, start_digit, len, 0));
+            }
 
-            /* Here, have accumulated the next code point */
-            if (RExC_parse >= endbrace) {   /* If done ... */
-                if (count != 1) {
+            if (RExC_parse >= endbrace) { /* Got to the closing '}' */
+                if (count) {
                     goto do_concat;
                 }
 
@@ -12994,18 +13136,19 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                 return TRUE;
             }
 
-            /* Here, the only legal thing would be a multiple character
-             * sequence (of the form "\N{U+c1.c2. ... }".   So the next
-             * character must be a dot (and the one after that can't be the
-             * endbrace, or we'd have something like \N{U+100.} ) */
+            /* Here, the parse stopped bfore the ending brace.  This is legal
+             * only if that character is a dot separating code points, like a
+             * multiple character sequence (of the form "\N{U+c1.c2. ... }".
+             * So the next character must be a dot (and the one after that
+             * can't be the endbrace, or we'd have something like \N{U+100.} )
+             * */
             if (*RExC_parse != '.' || RExC_parse + 1 >= endbrace) {
                 RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
-                                ? UTF8SKIP(RExC_parse)
-                                : 1;
-                if (RExC_parse >= endbrace) { /* Guard against malformed utf8 */
-                    RExC_parse = endbrace;
-                }
-                vFAIL("Invalid hexadecimal number in \\N{U+...}");
+                              ? UTF8SKIP(RExC_parse)
+                              : 1;
+                RExC_parse = MIN(endbrace, RExC_parse);/* Guard against
+                                                          malformed utf8 */
+                goto bad_NU;
             }
 
             /* Here, looks like its really a multiple character sequence.  Fail
@@ -13023,7 +13166,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
              * but go through the motions of code point counting and error
              * checking, if the caller doesn't want a node returned. */
 
-            if (node_p && count == 1) {
+            if (node_p && ! substitute_parse) {
                 substitute_parse = newSVpvs("?:");
             }
 
@@ -13259,7 +13402,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     U8 op;
     int invert = 0;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     *flagp = WORST;            /* Tentatively. */
 
@@ -13374,13 +13517,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        /* Special Escapes */
        case 'A':
            RExC_seen_zerolen++;
-           ret = reg_node(pRExC_state, SBOL);
-            /* SBOL is shared with /^/ so we set the flags so we can tell
-             * /\A/ from /^/ in split. */
-            FLAGS(REGNODE_p(ret)) = 1;
+            /* Under wildcards, this is changed to match \n; should be
+             * invisible to the user, as they have to compile under /m */
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                ret = reg_node(pRExC_state, MBOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SBOL);
+                /* SBOL is shared with /^/ so we set the flags so we can tell
+                 * /\A/ from /^/ in split. */
+                FLAGS(REGNODE_p(ret)) = 1;
+            }
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 'G':
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                RExC_parse++;
+                /* diag_listed_as: Use of %s is not allowed in Unicode property
+                   wildcard subpatterns in regex; marked by <-- HERE in m/%s/
+                 */
+                vFAIL("Use of '\\G' is not allowed in Unicode property"
+                      " wildcard subpatterns");
+            }
            ret = reg_node(pRExC_state, GPOS);
             RExC_seen |= REG_GPOS_SEEN;
            *flagp |= SIMPLE;
@@ -13402,12 +13560,24 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 vFAIL("\\K not permitted in lookahead/lookbehind");
             }
        case 'Z':
-           ret = reg_node(pRExC_state, SEOL);
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SEOL);
+            }
            *flagp |= SIMPLE;
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            goto finish_meta_pat;
        case 'z':
-           ret = reg_node(pRExC_state, EOS);
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, EOS);
+            }
            *flagp |= SIMPLE;
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            goto finish_meta_pat;
@@ -13939,6 +14109,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        || ! is_PATWS_safe((p), RExC_end, UTF));
 
                switch ((U8)*p) {
+                  const char* message;
+                  U32 packed_warn;
+                  U8 grok_c_char;
+
                case '^':
                case '$':
                case '.':
@@ -14054,67 +14228,70 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        p++;
                        break;
                    case 'o':
-                       {
-                           UV result;
-                           const char* error_msg;
-
-                           bool valid = grok_bslash_o(&p,
-                                                       RExC_end,
-                                                      &result,
-                                                      &error_msg,
-                                                      TO_OUTPUT_WARNINGS(p),
-                                                       (bool) RExC_strict,
-                                                       TRUE, /* Output warnings
-                                                                for non-
-                                                                portables */
-                                                       UTF);
-                           if (! valid) {
-                               RExC_parse = p; /* going to die anyway; point
-                                                  to exact spot of failure */
-                               vFAIL(error_msg);
-                           }
-                            UPDATE_WARNINGS_LOC(p - 1);
-                            ender = result;
-                           break;
-                       }
+                        if (! grok_bslash_o(&p,
+                                            RExC_end,
+                                            &ender,
+                                            &message,
+                                            &packed_warn,
+                                            (bool) RExC_strict,
+                                            FALSE, /* No illegal cp's */
+                                            UTF))
+                        {
+                            RExC_parse = p; /* going to die anyway; point to
+                                               exact spot of failure */
+                            vFAIL(message);
+                        }
+
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
+                        break;
                    case 'x':
-                       {
-                            UV result = UV_MAX; /* initialize to erroneous
-                                                   value */
-                           const char* error_msg;
-
-                           bool valid = grok_bslash_x(&p,
-                                                       RExC_end,
-                                                      &result,
-                                                      &error_msg,
-                                                       TO_OUTPUT_WARNINGS(p),
-                                                       (bool) RExC_strict,
-                                                       TRUE, /* Silence warnings
-                                                                for non-
-                                                                portables */
-                                                       UTF);
-                           if (! valid) {
-                               RExC_parse = p; /* going to die anyway; point
-                                                  to exact spot of failure */
-                               vFAIL(error_msg);
-                           }
-                            UPDATE_WARNINGS_LOC(p - 1);
-                            ender = result;
+                        if (! grok_bslash_x(&p,
+                                            RExC_end,
+                                            &ender,
+                                            &message,
+                                            &packed_warn,
+                                            (bool) RExC_strict,
+                                            FALSE, /* No illegal cp's */
+                                            UTF))
+                        {
+                            RExC_parse = p;    /* going to die anyway; point
+                                                   to exact spot of failure */
+                            vFAIL(message);
+                        }
+
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
 
 #ifdef EBCDIC
-                            if (ender < 0x100) {
-                                if (RExC_recode_x_to_native) {
-                                    ender = LATIN1_TO_NATIVE(ender);
-                                }
-                           }
+                        if (ender < 0x100) {
+                            if (RExC_recode_x_to_native) {
+                                ender = LATIN1_TO_NATIVE(ender);
+                            }
+                        }
 #endif
-                           break;
-                       }
+                        break;
                    case 'c':
-                       p++;
-                       ender = grok_bslash_c(*p, TO_OUTPUT_WARNINGS(p));
-                        UPDATE_WARNINGS_LOC(p);
                         p++;
+                        if (! grok_bslash_c(*p, &grok_c_char,
+                                            &message, &packed_warn))
+                        {
+                            /* going to die anyway; point to exact spot of
+                             * failure */
+                            RExC_parse = p + ((UTF)
+                                              ? UTF8_SAFE_SKIP(p, RExC_end)
+                                              : 1);
+                            vFAIL(message);
+                        }
+
+                        ender = grok_c_char;
+                        p++;
+                        if (message && TO_OUTPUT_WARNINGS(p)) {
+                            warn_non_literal_string(p, packed_warn, message);
+                        }
+
                        break;
                     case '8': case '9': /* must be a backreference */
                         --p;
@@ -14149,17 +14326,19 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         /* FALLTHROUGH */
                     case '0':
                        {
-                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                                      | PERL_SCAN_NOTIFY_ILLDIGIT;
                            STRLEN numlen = 3;
                            ender = grok_oct(p, &numlen, &flags, NULL);
                            p += numlen;
-                            if (   isDIGIT(*p)  /* like \08, \178 */
-                                && ckWARN(WARN_REGEXP)
-                                && numlen < 3)
+                            if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
+                                && isDIGIT(*p)  /* like \08, \178 */
+                                && ckWARN(WARN_REGEXP))
                             {
                                reg_warn_non_literal_string(
-                                         p + 1,
-                                         form_short_octal_warning(p, numlen));
+                                     p + 1,
+                                     form_alien_digit_msg(8, numlen, p,
+                                                        RExC_end, UTF, FALSE));
                             }
                        }
                        break;
@@ -14236,6 +14415,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
                 if (ender > 255) {
                     REQUIRE_UTF8(flagp);
+                    if (   UNICODE_IS_PERL_EXTENDED(ender)
+                        && TO_OUTPUT_WARNINGS(p))
+                    {
+                        ckWARN2_non_literal_string(p,
+                                                   packWARN(WARN_PORTABLE),
+                                                   PL_extended_cp_format,
+                                                   ender);
+                    }
                 }
 
                 /* We need to check if the next non-ignored thing is a
@@ -14564,8 +14751,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else if (FOLD) {
                 bool splittable = FALSE;
                 bool backed_up = FALSE;
-                char * e;
-                char * s_start;
+                char * e;       /* should this be U8? */
+                char * s_start; /* should this be U8? */
 
                 /* Here is /i.  Running out of room creates a problem if we are
                  * folding, and the split happens in the middle of a
@@ -14882,7 +15069,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     if (   ender != LATIN_SMALL_LETTER_SHARP_S
                         || ASCII_FOLD_RESTRICTED)
                     {
-                        *e++ = toLOWER_L1(ender);
+                        assert( toLOWER_L1(ender) < 256 );
+                        *e++ = (char)(toLOWER_L1(ender)); /* should e and the cast be U8? */
                     }
                     else {
                         *e++ = 's';
@@ -14900,7 +15088,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             if (   UCHARAT(p) != LATIN_SMALL_LETTER_SHARP_S
                                 || ASCII_FOLD_RESTRICTED)
                             {
-                                *e++ = toLOWER_L1(ender);
+                                assert( toLOWER_L1(ender) < 256 );
+                                *e++ = (char)(toLOWER_L1(ender)); /* should e and the cast be U8? */
                             }
                             else {
                                 *e++ = 's';
@@ -16067,15 +16256,16 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                        with left paren in stack is; -1 if none.
                                      */
     STRLEN len;                     /* Temporary */
-    regnode_offset node;                  /* Temporary, and final regnode returned by
+    regnode_offset node;            /* Temporary, and final regnode returned by
                                        this function */
     const bool save_fold = FOLD;    /* Temporary */
     char *save_end, *save_parse;    /* Temporaries */
     const bool in_locale = LOC;     /* we turn off /l during processing */
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
+    PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
 
     DEBUG_PARSE("xcls");
 
@@ -16202,14 +16392,16 @@ redo_curchar:
             SV* rhs;                /* Operand to the right of the operator */
             SV* fence_ptr;          /* Pointer to top element of the fence
                                        stack */
-
             case '(':
 
                 if (   RExC_parse < RExC_end - 2
                     && UCHARAT(RExC_parse + 1) == '?'
                     && UCHARAT(RExC_parse + 2) == '^')
                 {
-                    /* If is a '(?', could be an embedded '(?^flags:(?[...])'.
+                    const regnode_offset orig_emit = RExC_emit;
+                    SV * resultant_invlist;
+
+                    /* If is a '(?^', could be an embedded '(?^flags:(?[...])'.
                      * This happens when we have some thing like
                      *
                      *   my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
@@ -16218,62 +16410,33 @@ redo_curchar:
                      *
                      * Here we would be handling the interpolated
                      * '$thai_or_lao'.  We handle this by a recursive call to
-                     * ourselves which returns the inversion list the
-                     * interpolated expression evaluates to.  We use the flags
-                     * from the interpolated pattern. */
-                    U32 save_flags = RExC_flags;
-                    const char * save_parse;
-
-                    RExC_parse += 2;        /* Skip past the '(?' */
-                    save_parse = RExC_parse;
-
-                    /* Parse the flags for the '(?'.  We already know the first
-                     * flag to parse is a '^' */
-                    parse_lparen_question_flags(pRExC_state);
-
-                    if (   RExC_parse >= RExC_end - 4
-                        || UCHARAT(RExC_parse) != ':'
-                        || UCHARAT(++RExC_parse) != '('
-                        || UCHARAT(++RExC_parse) != '?'
-                        || UCHARAT(++RExC_parse) != '[')
-                    {
-
-                        /* In combination with the above, this moves the
-                         * pointer to the point just after the first erroneous
-                         * character. */
-                        if (RExC_parse >= RExC_end - 4) {
-                            RExC_parse = RExC_end;
-                        }
-                        else if (RExC_parse != save_parse) {
-                            RExC_parse += (UTF)
-                                          ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
-                                          : 1;
-                        }
-                        vFAIL("Expecting '(?flags:(?[...'");
-                    }
+                     * reg which returns the inversion list the
+                     * interpolated expression evaluates to.  Actually, the
+                     * return is a special regnode containing a pointer to that
+                     * inversion list.  If the return isn't that regnode alone,
+                     * we know that this wasn't such an interpolation, which is
+                     * an error: we need to get a single inversion list back
+                     * from the recursion */
 
-                    /* Recurse, with the meat of the embedded expression */
                     RExC_parse++;
-                    if (! handle_regex_sets(pRExC_state, &current, flagp,
-                                                    depth+1, oregcomp_parse))
-                    {
-                        RETURN_FAIL_ON_RESTART(*flagp, flagp);
-                    }
+                    RExC_sets_depth++;
 
-                    /* Here, 'current' contains the embedded expression's
-                     * inversion list, and RExC_parse points to the trailing
-                     * ']'; the next character should be the ')' */
-                    RExC_parse++;
-                    if (UCHARAT(RExC_parse) != ')')
-                        vFAIL("Expecting close paren for nested extended charclass");
+                   node = reg(pRExC_state, 2, flagp, depth+1);
+                    RETURN_FAIL_ON_RESTART(*flagp, flagp);
 
-                    /* Then the ')' matching the original '(' handled by this
-                     * case: statement */
-                    RExC_parse++;
-                    if (UCHARAT(RExC_parse) != ')')
-                        vFAIL("Expecting close paren for wrapper for nested extended charclass");
+                    if (   OP(REGNODE_p(node)) != REGEX_SET
+                           /* If more than a single node returned, the nested
+                            * parens evaluated to more than just a (?[...]),
+                            * which isn't legal */
+                        || node != 1) {
+                        vFAIL("Expecting interpolated extended charclass");
+                    }
+                    resultant_invlist = (SV *) ARGp(REGNODE_p(node));
+                    current = invlist_clone(resultant_invlist, NULL);
+                    SvREFCNT_dec(resultant_invlist);
 
-                    RExC_flags = save_flags;
+                    RExC_sets_depth--;
+                    RExC_emit = orig_emit;
                     goto handle_operand;
                 }
 
@@ -16661,88 +16824,96 @@ redo_curchar:
         return END;
     }
 
-    /* Otherwise generate a resultant node, based on 'final'.  regclass() is
-     * expecting a string of ranges and individual code points */
-    invlist_iterinit(final);
-    result_string = newSVpvs("");
-    while (invlist_iternext(final, &start, &end)) {
-        if (start == end) {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}",
-                                                     start,          end);
-        }
+    if (RExC_sets_depth) {  /* If within a recursive call, return in a special
+                               regnode */
+        RExC_parse++;
+        node = regpnode(pRExC_state, REGEX_SET, (void *) final);
     }
+    else {
 
-    /* About to generate an ANYOF (or similar) node from the inversion list we
-     * have calculated */
-    save_parse = RExC_parse;
-    RExC_parse = SvPV(result_string, len);
-    save_end = RExC_end;
-    RExC_end = RExC_parse + len;
-    TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
+        /* Otherwise generate a resultant node, based on 'final'.  regclass()
+         * is expecting a string of ranges and individual code points */
+        invlist_iterinit(final);
+        result_string = newSVpvs("");
+        while (invlist_iternext(final, &start, &end)) {
+            if (start == end) {
+                Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start);
+            }
+            else {
+                Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%"
+                                                        UVXf "}", start, end);
+            }
+        }
+
+        /* About to generate an ANYOF (or similar) node from the inversion list
+         * we have calculated */
+        save_parse = RExC_parse;
+        RExC_parse = SvPV(result_string, len);
+        save_end = RExC_end;
+        RExC_end = RExC_parse + len;
+        TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
+
+        /* We turn off folding around the call, as the class we have
+         * constructed already has all folding taken into consideration, and we
+         * don't want regclass() to add to that */
+        RExC_flags &= ~RXf_PMf_FOLD;
+        /* regclass() can only return RESTART_PARSE and NEED_UTF8 if multi-char
+         * folds are allowed.  */
+        node = regclass(pRExC_state, flagp, depth+1,
+                        FALSE, /* means parse the whole char class */
+                        FALSE, /* don't allow multi-char folds */
+                        TRUE, /* silence non-portable warnings.  The above may
+                                 very well have generated non-portable code
+                                 points, but they're valid on this machine */
+                        FALSE, /* similarly, no need for strict */
+
+                        /* We can optimize into something besides an ANYOF,
+                         * except under /l, which needs to be ANYOF because of
+                         * runtime checks for locale sanity, etc */
+                    ! in_locale,
+                        NULL
+                    );
 
-    /* We turn off folding around the call, as the class we have constructed
-     * already has all folding taken into consideration, and we don't want
-     * regclass() to add to that */
-    RExC_flags &= ~RXf_PMf_FOLD;
-    /* regclass() can only return RESTART_PARSE and NEED_UTF8 if multi-char
-     * folds are allowed.  */
-    node = regclass(pRExC_state, flagp, depth+1,
-                    FALSE, /* means parse the whole char class */
-                    FALSE, /* don't allow multi-char folds */
-                    TRUE, /* silence non-portable warnings.  The above may very
-                             well have generated non-portable code points, but
-                             they're valid on this machine */
-                    FALSE, /* similarly, no need for strict */
-
-                    /* We can optimize into something besides an ANYOF, except
-                     * under /l, which needs to be ANYOF because of runtime
-                     * checks for locale sanity, etc */
-                  ! in_locale,
-                    NULL
-                );
+        RESTORE_WARNINGS;
+        RExC_parse = save_parse + 1;
+        RExC_end = save_end;
+        SvREFCNT_dec_NN(final);
+        SvREFCNT_dec_NN(result_string);
 
-    RESTORE_WARNINGS;
-    RExC_parse = save_parse + 1;
-    RExC_end = save_end;
-    SvREFCNT_dec_NN(final);
-    SvREFCNT_dec_NN(result_string);
-
-    if (save_fold) {
-        RExC_flags |= RXf_PMf_FOLD;
-    }
-
-    if (!node) {
-        RETURN_FAIL_ON_RESTART(*flagp, flagp);
-        goto regclass_failed;
-    }
-
-    /* Fix up the node type if we are in locale.  (We have pretended we are
-     * under /u for the purposes of regclass(), as this construct will only
-     * work under UTF-8 locales.  But now we change the opcode to be ANYOFL (so
-     * as to cause any warnings about bad locales to be output in regexec.c),
-     * and add the flag that indicates to check if not in a UTF-8 locale.  The
-     * reason we above forbid optimization into something other than an ANYOF
-     * node is simply to minimize the number of code changes in regexec.c.
-     * Otherwise we would have to create new EXACTish node types and deal with
-     * them.  This decision could be revisited should this construct become
-     * popular.
-     *
-     * (One might think we could look at the resulting ANYOF node and suppress
-     * the flag if everything is above 255, as those would be UTF-8 only,
-     * but this isn't true, as the components that led to that result could
-     * have been locale-affected, and just happen to cancel each other out
-     * under UTF-8 locales.) */
-    if (in_locale) {
-        set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
+        if (save_fold) {
+            RExC_flags |= RXf_PMf_FOLD;
+        }
 
-        assert(OP(REGNODE_p(node)) == ANYOF);
+        if (!node) {
+            RETURN_FAIL_ON_RESTART(*flagp, flagp);
+            goto regclass_failed;
+        }
+
+        /* Fix up the node type if we are in locale.  (We have pretended we are
+         * under /u for the purposes of regclass(), as this construct will only
+         * work under UTF-8 locales.  But now we change the opcode to be ANYOFL
+         * (so as to cause any warnings about bad locales to be output in
+         * regexec.c), and add the flag that indicates to check if not in a
+         * UTF-8 locale.  The reason we above forbid optimization into
+         * something other than an ANYOF node is simply to minimize the number
+         * of code changes in regexec.c.  Otherwise we would have to create new
+         * EXACTish node types and deal with them.  This decision could be
+         * revisited should this construct become popular.
+         *
+         * (One might think we could look at the resulting ANYOF node and
+         * suppress the flag if everything is above 255, as those would be
+         * UTF-8 only, but this isn't true, as the components that led to that
+         * result could have been locale-affected, and just happen to cancel
+         * each other out under UTF-8 locales.) */
+        if (in_locale) {
+            set_regex_charset(&RExC_flags, REGEX_LOCALE_CHARSET);
+
+            assert(OP(REGNODE_p(node)) == ANYOF);
 
-        OP(REGNODE_p(node)) = ANYOFL;
-        ANYOF_FLAGS(REGNODE_p(node))
-                |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            OP(REGNODE_p(node)) = ANYOFL;
+            ANYOF_FLAGS(REGNODE_p(node))
+                    |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+        }
     }
 
     nextchar(pRExC_state);
@@ -16854,7 +17025,7 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
                        current Unicode version */
           {
             Size_t folds_count;
-            unsigned int first_fold;
+            U32 first_fold;
             const U32 * remaining_folds;
             UV folded_cp;
 
@@ -16911,6 +17082,7 @@ S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
     PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
 
     if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+        CLEAR_POSIX_WARNINGS();
         return;
     }
 
@@ -17154,7 +17326,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             what gets folded */
     U32 has_runtime_dependency = 0;     /* OR of the above flags */
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGCLASS;
 #ifndef DEBUGGING
@@ -17341,6 +17513,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              * is already in 'value'.  Otherwise, need to translate the escape
              * into what it signifies. */
             if (! skip_white || ! isBLANK_A(value)) switch ((I32)value) {
+                const char * message;
+                U32 packed_warn;
+                U8 grok_c_char;
 
            case 'w':   namedclass = ANYOF_WORDCHAR;    break;
            case 'W':   namedclass = ANYOF_NWORDCHAR;   break;
@@ -17423,6 +17598,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                {
                char *e;
 
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL3("Use of '\\%c%c' is not allowed in Unicode property"
+                           " wildcard subpatterns", (char) value, *(RExC_parse - 1));
+                }
+
                /* \p means they want Unicode semantics */
                REQUIRE_UNI_RULES(flagp, 0);
 
@@ -17591,53 +17775,74 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            case 'a':   value = '\a';                   break;
            case 'o':
                RExC_parse--;   /* function expects to be pointed at the 'o' */
-               {
-                   const char* error_msg;
-                   bool valid = grok_bslash_o(&RExC_parse,
-                                               RExC_end,
-                                              &value,
-                                              &error_msg,
-                                               TO_OUTPUT_WARNINGS(RExC_parse),
-                                               strict,
-                                               silence_non_portable,
-                                               UTF);
-                   if (! valid) {
-                       vFAIL(error_msg);
-                   }
-                    UPDATE_WARNINGS_LOC(RExC_parse - 1);
-               }
-                non_portable_endpoint++;
+                if (! grok_bslash_o(&RExC_parse,
+                                            RExC_end,
+                                            &value,
+                                            &message,
+                                            &packed_warn,
+                                            strict,
+                                            cBOOL(range), /* MAX_UV allowed for range
+                                                      upper limit */
+                                            UTF))
+                {
+                    vFAIL(message);
+                }
+                else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
+                if (value < 256) {
+                    non_portable_endpoint++;
+                }
                break;
            case 'x':
                RExC_parse--;   /* function expects to be pointed at the 'x' */
-               {
-                   const char* error_msg;
-                   bool valid = grok_bslash_x(&RExC_parse,
-                                               RExC_end,
-                                              &value,
-                                              &error_msg,
-                                              TO_OUTPUT_WARNINGS(RExC_parse),
-                                               strict,
-                                               silence_non_portable,
-                                               UTF);
-                    if (! valid) {
-                       vFAIL(error_msg);
-                   }
-                    UPDATE_WARNINGS_LOC(RExC_parse - 1);
-               }
-                non_portable_endpoint++;
+                if (!  grok_bslash_x(&RExC_parse,
+                                            RExC_end,
+                                            &value,
+                                            &message,
+                                            &packed_warn,
+                                            strict,
+                                            cBOOL(range), /* MAX_UV allowed for range
+                                                      upper limit */
+                                            UTF))
+                {
+                    vFAIL(message);
+                }
+                else if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
+                if (value < 256) {
+                    non_portable_endpoint++;
+                }
                break;
            case 'c':
-               value = grok_bslash_c(*RExC_parse, TO_OUTPUT_WARNINGS(RExC_parse));
-                UPDATE_WARNINGS_LOC(RExC_parse);
-               RExC_parse++;
+                if (! grok_bslash_c(*RExC_parse, &grok_c_char, &message,
+                                                                &packed_warn))
+                {
+                    /* going to die anyway; point to exact spot of
+                        * failure */
+                    RExC_parse += (UTF)
+                                  ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
+                                  : 1;
+                    vFAIL(message);
+                }
+
+                value = grok_c_char;
+                RExC_parse++;
+                if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
+                    warn_non_literal_string(RExC_parse, packed_warn, message);
+                }
+
                 non_portable_endpoint++;
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7':
                {
                    /* Take 1-3 octal digits */
-                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
+                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+                              | PERL_SCAN_NOTIFY_ILLDIGIT;
                     numlen = (strict) ? 4 : 3;
                     value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
                    RExC_parse += numlen;
@@ -17648,17 +17853,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                           : 1;
                             vFAIL("Need exactly 3 octal digits");
                         }
-                        else if (   numlen < 3 /* like \08, \178 */
+                        else if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
                                  && RExC_parse < RExC_end
                                  && isDIGIT(*RExC_parse)
                                  && ckWARN(WARN_REGEXP))
                         {
                             reg_warn_non_literal_string(
                                  RExC_parse + 1,
-                                 form_short_octal_warning(RExC_parse, numlen));
+                                 form_alien_digit_msg(8, numlen, RExC_parse,
+                                                        RExC_end, UTF, FALSE));
                         }
                     }
-                    non_portable_endpoint++;
+                    if (value < 256) {
+                        non_portable_endpoint++;
+                    }
                    break;
                }
            default:
@@ -17920,7 +18128,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
        /* non-Latin1 code point implies unicode semantics. */
        if (value > 255) {
+            if (value > MAX_LEGAL_CP && (   value != UV_MAX
+                                         || prevvalue > MAX_LEGAL_CP))
+            {
+                vFAIL(form_cp_too_large_msg(16, NULL, 0, value));
+            }
             REQUIRE_UNI_RULES(flagp, 0);
+            if (  ! silence_non_portable
+                &&  UNICODE_IS_PERL_EXTENDED(value)
+                &&  TO_OUTPUT_WARNINGS(RExC_parse))
+            {
+                ckWARN2_non_literal_string(RExC_parse,
+                                           packWARN(WARN_PORTABLE),
+                                           PL_extended_cp_format,
+                                           value);
+            }
        }
 
         /* Ready to process either the single value, or the completed range.
@@ -18305,7 +18527,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     STRLEN foldlen;
                     unsigned int k;
                     Size_t folds_count;
-                    unsigned int first_fold;
+                    U32 first_fold;
                     const U32 * remaining_folds;
 
                     if (j < 256) {
@@ -18645,17 +18867,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        invert = FALSE;
     }
 
+    /* All possible optimizations below still have these characteristics.
+     * (Multi-char folds aren't SIMPLE, but they don't get this far in this
+     * routine) */
+    *flagp |= HASWIDTH|SIMPLE;
+
     if (ret_invlist) {
         *ret_invlist = cp_list;
 
         return RExC_emit;
     }
 
-    /* All possible optimizations below still have these characteristics.
-     * (Multi-char folds aren't SIMPLE, but they don't get this far in this
-     * routine) */
-    *flagp |= HASWIDTH|SIMPLE;
-
     if (anyof_flags & ANYOF_LOCALE_FLAGS) {
         RExC_contains_locale = 1;
     }
@@ -19011,7 +19233,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     U8 foldbuf[UTF8_MAXBYTES_CASE];
                     UV folded = _to_uni_fold_flags(start[0],
                                                         foldbuf, &foldlen, 0);
-                    unsigned int first_fold;
+                    U32 first_fold;
                     const U32 * remaining_folds;
                     Size_t folds_to_this_cp_count = _inverse_folds(
                                                             folded,
@@ -19678,14 +19900,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
     }
 }
 
-#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
 SV *
-Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
-                                        const regnode* node,
-                                        bool doinit,
-                                        SV** listsvp,
-                                        SV** only_utf8_locale_ptr,
-                                        SV** output_invlist)
+
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+Perl_get_regclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#else
+Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#endif
 
 {
     /* For internal core use only.
@@ -19721,7 +19942,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
     RXi_GET_DECL(prog, progi);
     const struct reg_data * const data = prog ? progi->data : NULL;
 
-    PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+    PERL_ARGS_ASSERT_GET_REGCLASS_NONBITMAP_DATA;
+#else
+    PERL_ARGS_ASSERT_GET_RE_GCLASS_NONBITMAP_DATA;
+#endif
     assert(! output_invlist || listsvp);
 
     if (data && data->count) {
@@ -19825,10 +20050,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                     /* The data consists of just strings defining user-defined
                      * property names, but in prior incarnations, and perhaps
                      * somehow from pluggable regex engines, it could still
-                     * hold hex code point definitions.  Each component of a
-                     * range would be separated by a tab, and each range by a
-                     * new-line.  If these are found, instead add them to the
-                     * inversion list */
+                     * hold hex code point definitions, all of which should be
+                     * legal (or it wouldn't have gotten this far).  Each
+                     * component of a range would be separated by a tab, and
+                     * each range by a new-line.  If these are found, instead
+                     * add them to the inversion list */
                     I32 grok_flags =  PERL_SCAN_SILENT_ILLDIGIT
                                      |PERL_SCAN_SILENT_NON_PORTABLE;
                     STRLEN len = remaining;
@@ -19923,7 +20149,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
 
     return invlist;
 }
-#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
 
 /* reg_skipcomment()
 
@@ -20087,7 +20312,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
 
     const regnode_offset ret = RExC_emit;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGNODE_GUTS;
 
@@ -20153,6 +20378,22 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
     return(ret);
 }
 
+/*
+- regpnode - emit a temporary node with a void* argument
+*/
+STATIC regnode_offset /* Location. */
+S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, void * arg)
+{
+    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regvnode");
+    regnode_offset ptr = ret;
+
+    PERL_ARGS_ASSERT_REGPNODE;
+
+    FILL_ADVANCE_NODE_ARGp(ptr, op, arg);
+    RExC_emit = ptr;
+    return(ret);
+}
+
 STATIC regnode_offset
 S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2)
 {
@@ -20193,7 +20434,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
     regnode *place;
     const int offset = regarglen[(U8)op];
     const int size = NODE_STEP_REGNODE + offset;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGINSERT;
     PERL_UNUSED_CONTEXT;
@@ -20297,7 +20538,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
                 const U32 depth)
 {
     regnode_offset scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGTAIL;
 #ifndef DEBUGGING
@@ -20311,7 +20552,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
         DEBUG_PARSE_r({
             DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
             regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ %s (%d) %s %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%zu) %s %s\n",
                 SvPV_nolen_const(RExC_mysv), scan,
                     (temp == NULL ? "->" : ""),
                     (temp == NULL ? PL_reg_name[OP(REGNODE_p(val))] : "")
@@ -20371,7 +20612,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 #ifdef EXPERIMENTAL_INPLACESCAN
     I32 min = 0;
 #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGTAIL_STUDY;
 
@@ -20418,7 +20659,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         DEBUG_PARSE_r({
             DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
             regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ %s (%d) -> %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%zu) -> %s\n",
                 SvPV_nolen_const(RExC_mysv),
                 scan,
                 PL_reg_name[exact]);
@@ -20575,7 +20816,7 @@ Perl_regdump(pTHX_ const regexp *r)
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGDUMP;
 
@@ -20717,7 +20958,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
     dVAR;
     int k;
     RXi_GET_DECL(prog, progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGPROP;
 
@@ -20919,10 +21160,17 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                             ANYOFRbase(o) + ANYOFRdelta(o));
             }
             else {
-                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                (void) get_regclass_nonbitmap_data(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+#else
+                (void) get_re_gclass_nonbitmap_data(prog, o, FALSE,
                                                 &unresolved,
                                                 &only_utf8_locale_invlist,
                                                 &nonbitmap_invlist);
+#endif
             }
 
             /* The non-bitmap data may contain stuff that could fit in the
@@ -21148,19 +21396,23 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 SV *
 Perl_re_intuit_string(pTHX_ REGEXP * const r)
 {                              /* Assume that RE_INTUIT is set */
+    /* Returns an SV containing a string that must appear in the target for it
+     * to match */
+
     struct regexp *const prog = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_RE_INTUIT_STRING;
     PERL_UNUSED_CONTEXT;
 
     DEBUG_COMPILE_r(
        {
-           const char * const s = SvPV_nolen_const(RX_UTF8(r)
+            if (prog->maxlen > 0) {
+                const char * const s = SvPV_nolen_const(RX_UTF8(r)
                      ? prog->check_utf8 : prog->check_substr);
 
-           if (!PL_colorset) reginitcolors();
-            Perl_re_printf( aTHX_
+                if (!PL_colorset) reginitcolors();
+                Perl_re_printf( aTHX_
                      "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                      PL_colors[4],
                      RX_UTF8(r) ? "utf8 " : "",
@@ -21168,6 +21420,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                      s,
                      PL_colors[1],
                      (strlen(s) > PL_dump_re_max_len ? "..." : ""));
+            }
        } );
 
     /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -21197,7 +21450,7 @@ void
 Perl_pregfree2(pTHX_ REGEXP *rx)
 {
     struct regexp *const r = ReANY(rx);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_PREGFREE2;
 
@@ -21361,7 +21614,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 {
     struct regexp *const r = ReANY(rx);
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGFREE_INTERNAL;
 
@@ -21743,34 +21996,30 @@ Perl_regnext(pTHX_ regnode *p)
 #endif
 
 STATIC void
-S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2,...)
+S_re_croak(pTHX_ bool utf8, const char* pat,...)
 {
     va_list args;
-    STRLEN l1 = strlen(pat1);
-    STRLEN l2 = strlen(pat2);
+    STRLEN len = strlen(pat);
     char buf[512];
     SV *msv;
     const char *message;
 
-    PERL_ARGS_ASSERT_RE_CROAK2;
-
-    if (l1 > 510)
-       l1 = 510;
-    if (l1 + l2 > 510)
-       l2 = 510 - l1;
-    Copy(pat1, buf, l1 , char);
-    Copy(pat2, buf + l1, l2 , char);
-    buf[l1 + l2] = '\n';
-    buf[l1 + l2 + 1] = '\0';
-    va_start(args, pat2);
+    PERL_ARGS_ASSERT_RE_CROAK;
+
+    if (len > 510)
+       len = 510;
+    Copy(pat, buf, len , char);
+    buf[len] = '\n';
+    buf[len + 1] = '\0';
+    va_start(args, pat);
     msv = vmess(buf, &args);
     va_end(args);
-    message = SvPV_const(msv, l1);
-    if (l1 > 512)
-       l1 = 512;
-    Copy(message, buf, l1 , char);
-    /* l1-1 to avoid \n */
-    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf));
+    message = SvPV_const(msv, len);
+    if (len > 512)
+       len = 512;
+    Copy(message, buf, len , char);
+    /* len-1 to avoid \n */
+    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, len-1, buf));
 }
 
 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
@@ -22440,7 +22689,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
     const regnode *optstart= NULL;
 
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMPUNTIL;
 
@@ -22588,14 +22837,14 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 
 #ifndef PERL_IN_XSUB_RE
 
-#include "uni_keywords.h"
+#  include "uni_keywords.h"
 
 void
 Perl_init_uniprops(pTHX)
 {
     dVAR;
 
-#ifdef DEBUGGING
+#  ifdef DEBUGGING
     char * dump_len_string;
 
     dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
@@ -22604,16 +22853,16 @@ Perl_init_uniprops(pTHX)
     {
         PL_dump_re_max_len = 60;    /* A reasonable default */
     }
-#endif
+#  endif
 
     PL_user_def_props = newHV();
 
-#ifdef USE_ITHREADS
+#  ifdef USE_ITHREADS
 
     HvSHAREKEYS_off(PL_user_def_props);
     PL_user_def_props_aTHX = aTHX;
 
-#endif
+#  endif
 
     /* Set up the inversion list interpreter-level variables */
 
@@ -22685,15 +22934,52 @@ Perl_init_uniprops(pTHX)
     PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
     PL_Private_Use = _new_invlist_C_array(uni_prop_ptrs[UNI_CO]);
 
-#ifdef UNI_XIDC
+#  ifdef UNI_XIDC
     /* The below are used only by deprecated functions.  They could be removed */
     PL_utf8_xidcont  = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDC]);
     PL_utf8_idcont   = _new_invlist_C_array(uni_prop_ptrs[UNI_IDC]);
     PL_utf8_xidstart = _new_invlist_C_array(uni_prop_ptrs[UNI_XIDS]);
-#endif
+#  endif
 }
 
-#if 0
+/* These four functions are compiled only in regcomp.c, where they have access
+ * to the data they return.  They are a way for re_comp.c to get access to that
+ * data without having to compile the whole data structures. */
+
+I16
+Perl_do_uniprop_match(const char * const key, const U16 key_len)
+{
+    PERL_ARGS_ASSERT_DO_UNIPROP_MATCH;
+
+    return match_uniprop((U8 *) key, key_len);
+}
+
+SV *
+Perl_get_prop_definition(pTHX_ const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_DEFINITION;
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+const char * const *
+Perl_get_prop_values(const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_VALUES;
+
+    return UNI_prop_value_ptrs[table_index];
+}
+
+const char *
+Perl_get_deprecated_property_msg(const Size_t warning_offset)
+{
+    PERL_ARGS_ASSERT_GET_DEPRECATED_PROPERTY_MSG;
+
+    return deprecated_property_msgs[warning_offset];
+}
+
+#  if 0
 
 This code was mainly added for backcompat to give a warning for non-portable
 code points in user-defined properties.  But experiments showed that the
@@ -22723,10 +23009,111 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
     return SvPVX(*msg);
 }
 
+#  endif
+#endif /* end of ! PERL_IN_XSUB_RE */
+
+STATIC REGEXP *
+S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len,
+                         const bool ignore_case)
+{
+    /* Pretends that the input subpattern is qr/subpattern/aam, compiling it
+     * possibly with /i if the 'ignore_case' parameter is true.  Use /aa
+     * because nothing outside of ASCII will match.  Use /m because the input
+     * string may be a bunch of lines strung together.
+     *
+     * Also sets up the debugging info */
+
+    U32 flags = PMf_MULTILINE|PMf_WILDCARD;
+    U32 rx_flags;
+    SV * subpattern_sv = sv_2mortal(newSVpvn(subpattern, len));
+    REGEXP * subpattern_re;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_COMPILE_WILDCARD;
+
+    if (ignore_case) {
+        flags |= PMf_FOLD;
+    }
+    set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+
+    /* Like in op.c, we copy the compile time pm flags to the rx ones */
+    rx_flags = flags & RXf_PMf_COMPILETIME;
+
+#ifndef PERL_IN_XSUB_RE
+    /* Use the core engine if this file is regcomp.c.  That means no
+     * 'use re "Debug ..." is in effect, so the core engine is sufficient */
+    subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                             &PL_core_reg_engine,
+                                             NULL, NULL,
+                                             rx_flags, flags);
+#else
+    if (isDEBUG_WILDCARD) {
+        /* Use the special debugging engine if this file is re_comp.c and wants
+         * to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &my_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+    }
+    else {
+        /* Use the special wildcard engine if this file is re_comp.c and
+         * doesn't want to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect for compilation, but this engine
+         * structure has been set up so that it uses the core engine for
+         * execution, so no execution debugging as a result of re.pm will be
+         * displayed. */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &wild_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+        /* XXX The above has the effect that any user-supplied regex engine
+         * won't be called for matching wildcards.  That might be good, or bad.
+         * It could be changed in several ways.  The reason it is done the
+         * current way is to avoid having to save and restore
+         * ^{^RE_DEBUG_FLAGS} around the execution.  save_scalar() perhaps
+         * could be used.  Another suggestion is to keep the authoritative
+         * value of the debug flags in a thread-local variable and add set/get
+         * magic to ${^RE_DEBUG_FLAGS} to keep the C level variable up to date.
+         * Still another is to pass a flag, say in the engine's intflags that
+         * would be checked each time before doing the debug output */
+    }
 #endif
 
+    assert(subpattern_re);  /* Should have died if didn't compile successfully */
+    return subpattern_re;
+}
+
+STATIC I32
+S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
+        char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
+{
+    I32 result;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+    PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
+
+    ENTER;
+
+    /* The compilation has set things up so that if the program doesn't want to
+     * see the wildcard matching procedure, it will get the core execution
+     * engine, which is subject only to -Dr.  So we have to turn that off
+     * around this procedure */
+    if (! isDEBUG_WILDCARD) {
+        /* Note! Casts away 'volatile' */
+        SAVEI32(PL_debug);
+        PL_debug &= ~ DEBUG_r_FLAG;
+    }
+
+    result = CALLREGEXEC(prog, stringarg, strend, strbeg, minend, screamer,
+                         NULL, nosave);
+    LEAVE;
+
+    return result;
+}
+
 SV *
-Perl_handle_user_defined_property(pTHX_
+S_handle_user_defined_property(pTHX_
 
     /* Parses the contents of a user-defined property definition; returning the
      * expanded definition if possible.  If so, the return is an inversion
@@ -22896,7 +23283,7 @@ Perl_handle_user_defined_property(pTHX_
             goto return_failure;
         }
 
-#if 0   /* See explanation at definition above of get_extended_utf8_msg() */
+#  if 0   /* See explanation at definition above of get_extended_utf8_msg() */
 
         if (   UNICODE_IS_PERL_EXTENDED(min)
             || UNICODE_IS_PERL_EXTENDED(max))
@@ -22914,7 +23301,7 @@ Perl_handle_user_defined_property(pTHX_
             sv_catpvs(msg, "\"");
         }
 
-#endif
+#  endif
 
         /* Here, this line contains a legal range */
         this_definition = sv_2mortal(_new_invlist(2));
@@ -23023,21 +23410,21 @@ Perl_handle_user_defined_property(pTHX_
 
 /* As explained below, certain operations need to take place in the first
  * thread created.  These macros switch contexts */
-#ifdef USE_ITHREADS
-#  define DECLARATION_FOR_GLOBAL_CONTEXT                                    \
+#  ifdef USE_ITHREADS
+#    define DECLARATION_FOR_GLOBAL_CONTEXT                                  \
                                         PerlInterpreter * save_aTHX = aTHX;
-#  define SWITCH_TO_GLOBAL_CONTEXT                                          \
+#    define SWITCH_TO_GLOBAL_CONTEXT                                        \
                            PERL_SET_CONTEXT((aTHX = PL_user_def_props_aTHX))
-#  define RESTORE_CONTEXT  PERL_SET_CONTEXT((aTHX = save_aTHX));
-#  define CUR_CONTEXT      aTHX
-#  define ORIGINAL_CONTEXT save_aTHX
-#else
-#  define DECLARATION_FOR_GLOBAL_CONTEXT
-#  define SWITCH_TO_GLOBAL_CONTEXT          NOOP
-#  define RESTORE_CONTEXT                   NOOP
-#  define CUR_CONTEXT                       NULL
-#  define ORIGINAL_CONTEXT                  NULL
-#endif
+#    define RESTORE_CONTEXT  PERL_SET_CONTEXT((aTHX = save_aTHX));
+#    define CUR_CONTEXT      aTHX
+#    define ORIGINAL_CONTEXT save_aTHX
+#  else
+#    define DECLARATION_FOR_GLOBAL_CONTEXT
+#    define SWITCH_TO_GLOBAL_CONTEXT          NOOP
+#    define RESTORE_CONTEXT                   NOOP
+#    define CUR_CONTEXT                       NULL
+#    define ORIGINAL_CONTEXT                  NULL
+#  endif
 
 STATIC void
 S_delete_recursion_entry(pTHX_ void *key)
@@ -23100,8 +23487,8 @@ S_get_fq_name(pTHX_
     return fq_name;
 }
 
-SV *
-Perl_parse_uniprop_string(pTHX_
+STATIC SV *
+S_parse_uniprop_string(pTHX_
 
     /* Parse the interior of a \p{}, \P{}.  Returns its definition if knowable
      * now.  If so, the return is an inversion list.
@@ -23135,14 +23522,15 @@ Perl_parse_uniprop_string(pTHX_
                                    user-defined property */
     SV * msg,                   /* Any error or warning msg(s) are appended to
                                    this */
-   const STRLEN level)          /* Recursion level of this call */
+    const STRLEN level)         /* Recursion level of this call */
 {
     dVAR;
     char* lookup_name;          /* normalized name for lookup in our tables */
     unsigned lookup_len;        /* Its length */
-    bool stricter = FALSE;      /* Some properties have stricter name
-                                   normalization rules, which we decide upon
-                                   based on parsing */
+    enum { Not_Strict = 0,      /* Some properties have stricter name */
+           Strict,              /* normalization rules, which we decide */
+           As_Is                /* upon based on parsing */
+         } stricter = Not_Strict;
 
     /* nv= or numeric_value=, or possibly one of the cjk numeric properties
      * (though it requires extra effort to download them from Unicode and
@@ -23276,7 +23664,7 @@ Perl_parse_uniprop_string(pTHX_
     } /* End of parsing through the lhs of the property name (or all of it if
          no rhs) */
 
-#define STRLENs(s)  (sizeof("" s "") - 1)
+#  define STRLENs(s)  (sizeof("" s "") - 1)
 
     /* If there is a single package name 'utf8::', it is ambiguous.  It could
      * be for a user-defined property, or it could be a Unicode property, as
@@ -23293,7 +23681,7 @@ Perl_parse_uniprop_string(pTHX_
      * or are positioned just after the '=' if it is compound. */
 
     if (equals_pos >= 0) {
-        assert(! stricter); /* We shouldn't have set this yet */
+        assert(stricter == Not_Strict); /* We shouldn't have set this yet */
 
         /* Space immediately after the '=' is ignored */
         i++;
@@ -23314,18 +23702,19 @@ Perl_parse_uniprop_string(pTHX_
                  * but it must be punctuation */
             && (name[i] != '\\' || (i < name_len && isPUNCT_A(name[i+1]))))
         {
-            /* Find the property.  The table includes the equals sign, so we
-             * use 'j' as-is */
-            table_index = match_uniprop((U8 *) lookup_name, j);
-            if (table_index) {
-                const char * const * prop_values
-                                            = UNI_prop_value_ptrs[table_index];
-                SV * subpattern;
-                Size_t subpattern_len;
+            bool special_property = memEQs(lookup_name, j - 1, "name")
+                                 || memEQs(lookup_name, j - 1, "na");
+            if (! special_property) {
+                /* Find the property.  The table includes the equals sign, so
+                 * we use 'j' as-is */
+                table_index = do_uniprop_match(lookup_name, j);
+            }
+            if (special_property || table_index) {
                 REGEXP * subpattern_re;
                 char open = name[i++];
                 char close;
                 const char * pos_in_brackets;
+                const char * const * prop_values;
                 bool escaped = 0;
 
                 /* Backslash => delimitter is the character following.  We
@@ -23359,20 +23748,42 @@ Perl_parse_uniprop_string(pTHX_
                     packWARN(WARN_EXPERIMENTAL__UNIPROP_WILDCARDS),
                     "The Unicode property wildcards feature is experimental");
 
-                /* Now create and compile the wildcard subpattern.  Use /iaa
-                 * because nothing outside of ASCII will match, and it the
-                 * property values should all match /i.  Note that when the
-                 * pattern fails to compile, our added text to the user's
-                 * pattern will be displayed to the user, which is not so
-                 * desirable. */
-                subpattern_len = name_len - i - 1 - escaped;
-                subpattern = Perl_newSVpvf(aTHX_ "(?iaa:%.*s)",
-                                              (unsigned) subpattern_len,
-                                              name + i);
-                subpattern = sv_2mortal(subpattern);
-                subpattern_re = re_compile(subpattern, 0);
-                assert(subpattern_re);  /* Should have died if didn't compile
-                                         successfully */
+                if (special_property) {
+                    const char * error_msg;
+                    const char * revised_name = name + i;
+                    Size_t revised_name_len = name_len - (i + 1 + escaped);
+
+                    /* Currently, the only 'special_property' is name, which we
+                     * lookup in _charnames.pm */
+
+                    if (! load_charnames(newSVpvs("placeholder"),
+                                         revised_name, revised_name_len,
+                                         &error_msg))
+                    {
+                        sv_catpv(msg, error_msg);
+                        goto append_name_to_msg;
+                    }
+
+                    /* Farm this out to a function just to make the current
+                     * function less unwieldy */
+                    if (handle_names_wildcard(revised_name, revised_name_len,
+                                &prop_definition))
+                    {
+                        return prop_definition;
+                    }
+
+                    goto failed;
+                }
+
+                prop_values = get_prop_values(table_index);
+
+                /* Now create and compile the wildcard subpattern.  Use /i
+                 * because the property values are supposed to match with case
+                 * ignored. */
+                subpattern_re = compile_wildcard(name + i,
+                                                 name_len - i - 1 - escaped,
+                                                 TRUE /* /i */
+                                                );
 
                 /* For each legal property value, see if the supplied pattern
                  * matches it. */
@@ -23381,7 +23792,7 @@ Perl_parse_uniprop_string(pTHX_
                     const Size_t len = strlen(entry);
                     SV* entry_sv = newSVpvn_flags(entry, len, SVs_TEMP);
 
-                    if (pregexec(subpattern_re,
+                    if (execute_wildcard(subpattern_re,
                                  (char *) entry,
                                  (char *) entry + len,
                                  (char *) entry, 0,
@@ -23427,31 +23838,20 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Here's how khw thinks we should proceed to handle the properties
-             * not yet done:    Bidi Mirroring Glyph
-                                Bidi Paired Bracket
+             * not yet done:    Bidi Mirroring Glyph        can map to ""
+                                Bidi Paired Bracket         can map to ""
                                 Case Folding  (both full and simple)
+                                            Shouldn't /i be good enough for Full
                                 Decomposition Mapping
-                                Equivalent Unified Ideograph
-                                Name
-                                Name Alias
+                                Equivalent Unified Ideograph    can map to ""
                                 Lowercase Mapping  (both full and simple)
-                                NFKC Case Fold
+                                NFKC Case Fold                  can map to ""
                                 Titlecase Mapping  (both full and simple)
                                 Uppercase Mapping  (both full and simple)
-             * Move the part that looks at the property values into a perl
-             * script, like utf8_heavy.pl was done.  This makes things somewhat
-             * easier, but most importantly, it avoids always adding all these
-             * strings to the memory usage when the feature is little-used.
-             *
-             * The property values would all be concatenated into a single
-             * string per property with each value on a separate line, and the
-             * code point it's for on alternating lines.  Then we match the
-             * user's input pattern m//mg, without having to worry about their
-             * uses of '^' and '$'.  Only the values that aren't the default
-             * would be in the strings.  Code points would be in UTF-8.  The
-             * search pattern that we would construct would look like
-             * (?: \n (code-point_re) \n (?aam: user-re ) \n )
-             * And so $1 would contain the code point that matched the user-re.
+             * Handle these the same way Name is done, using say, _wild.pm, but
+             * having both loose and full, like in charclass_invlists.h.
+             * Perhaps move block and script to that as they are somewhat large
+             * in charclass_invlists.h.
              * For properties where the default is the code point itself, such
              * as any of the case changing mappings, the string would otherwise
              * consist of all Unicode code points in UTF-8 strung together.
@@ -23460,18 +23860,99 @@ Perl_parse_uniprop_string(pTHX_
              * error.  Otherwise run the pattern against every code point in
              * the ssc.  The ssc is kind of like tr18's 3.9 Possible Match Sets
              * And it might be good to create an API to return the ssc.
-             *
-             * For the name properties, a new function could be created in
-             * charnames which essentially does the same thing as above,
-             * sharing Name.pl with the other charname functions.  Don't know
-             * about loose name matching, or algorithmically determined names.
-             * Decomposition.pl similarly.
-             *
-             * It might be that a new pattern modifier would have to be
-             * created, like /t for resTricTed, which changed the behavior of
-             * some constructs in their subpattern, like \A. */
+             * Or handle them like the algorithmic names are done
+             */
         } /* End of is a wildcard subppattern */
 
+        /* \p{name=...} is handled specially.  Instead of using the normal
+         * mechanism involving charclass_invlists.h, it uses _charnames.pm
+         * which has the necessary (huge) data accessible to it, and which
+         * doesn't get loaded unless necessary.  The legal syntax for names is
+         * somewhat different than other properties due both to the vagaries of
+         * a few outlier official names, and the fact that only a few ASCII
+         * characters are permitted in them */
+        if (   memEQs(lookup_name, j - 1, "name")
+            || memEQs(lookup_name, j - 1, "na"))
+        {
+            dSP;
+            HV * table;
+            SV * character;
+            const char * error_msg;
+            CV* lookup_loose;
+            SV * character_name;
+            STRLEN character_len;
+            UV cp;
+
+            stricter = As_Is;
+
+            /* Since the RHS (after skipping initial space) is passed unchanged
+             * to charnames, and there are different criteria for what are
+             * legal characters in the name, just parse it here.  A character
+             * name must begin with an ASCII alphabetic */
+            if (! isALPHA(name[i])) {
+                goto failed;
+            }
+            lookup_name[j++] = name[i];
+
+            for (++i; i < name_len; i++) {
+                /* Official names can only be in the ASCII range, and only
+                 * certain characters */
+                if (! isASCII(name[i]) || ! isCHARNAME_CONT(name[i])) {
+                    goto failed;
+                }
+                lookup_name[j++] = name[i];
+            }
+
+            /* Finished parsing, save the name into an SV */
+            character_name = newSVpvn(lookup_name + equals_pos, j - equals_pos);
+
+            /* Make sure _charnames is loaded.  (The parameters give context
+             * for any errors generated */
+            table = load_charnames(character_name, name, name_len, &error_msg);
+            if (table == NULL) {
+                sv_catpv(msg, error_msg);
+                goto append_name_to_msg;
+            }
+
+            lookup_loose = get_cv("_charnames::_loose_regcomp_lookup", 0);
+            if (! lookup_loose) {
+                Perl_croak(aTHX_
+                       "panic: Can't find '_charnames::_loose_regcomp_lookup");
+            }
+
+            PUSHSTACKi(PERLSI_REGCOMP);
+            ENTER ;
+            SAVETMPS;
+            save_re_context();
+
+            PUSHMARK(SP) ;
+            XPUSHs(character_name);
+            PUTBACK;
+            call_sv(MUTABLE_SV(lookup_loose), G_SCALAR);
+
+            SPAGAIN ;
+
+            character = POPs;
+            SvREFCNT_inc_simple_void_NN(character);
+
+            PUTBACK ;
+            FREETMPS ;
+            LEAVE ;
+            POPSTACK;
+
+            if (! SvOK(character)) {
+                goto failed;
+            }
+
+            cp = valid_utf8_to_uvchr((U8 *) SvPVX(character), &character_len);
+            if (character_len < SvCUR(character)) {
+                /* Temporarily, named sequences aren't handled */
+                goto failed;
+            }
+
+            prop_definition = add_cp_to_invlist(NULL, cp);
+            return prop_definition;
+        }
 
         /* Certain properties whose values are numeric need special handling.
          * They may optionally be prefixed by 'is'.  Ignore that prefix for the
@@ -23524,12 +24005,12 @@ Perl_parse_uniprop_string(pTHX_
              * But the numeric type properties can have the alphas [Ee] to
              * signify an exponent, and it is still a number with stricter
              * rules.  So look for an alpha that signifies not-strict */
-            stricter = TRUE;
+            stricter = Strict;
             for (k = i; k < name_len; k++) {
                 if (   isALPHA_A(name[k])
                     && (! is_nv_type || ! isALPHA_FOLD_EQ(name[k], 'E')))
                 {
-                    stricter = FALSE;
+                    stricter = Not_Strict;
                     break;
                 }
             }
@@ -23567,7 +24048,7 @@ Perl_parse_uniprop_string(pTHX_
             && memNEs(lookup_name + 4, j - 4, "space")
             && memNEs(lookup_name + 4, j - 4, "word"))
         {
-            stricter = TRUE;
+            stricter = Strict;
 
             /* We set the inputs back to 0 and the code below will reparse,
              * using strict */
@@ -23879,7 +24360,7 @@ Perl_parse_uniprop_string(pTHX_
              * for this property in the hash.  So we have the go ahead to
              * expand the definition ourselves. */
 
-            PUSHSTACKi(PERLSI_MAGIC);
+            PUSHSTACKi(PERLSI_REGCOMP);
             ENTER;
 
             /* Create a temporary placeholder in the hash to detect recursion
@@ -23915,6 +24396,7 @@ Perl_parse_uniprop_string(pTHX_
              * but not yet used. */
             save_item(PL_subname);
 
+            /* G_SCALAR guarantees a single return value */
             (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
 
             SPAGAIN;
@@ -23942,7 +24424,7 @@ Perl_parse_uniprop_string(pTHX_
                 (void) POPs;
                 prop_definition = NULL;
             }
-            else {  /* G_SCALAR guarantees a single return value */
+            else {
                 SV * contents = POPs;
 
                 /* The contents is supposed to be the expansion of the property
@@ -24029,7 +24511,7 @@ Perl_parse_uniprop_string(pTHX_
 
     /* Get the index into our pointer table of the inversion list corresponding
      * to the property */
-    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+    table_index = do_uniprop_match(lookup_name, lookup_len);
 
     /* If it didn't find the property ... */
     if (table_index == 0) {
@@ -24044,7 +24526,7 @@ Perl_parse_uniprop_string(pTHX_
             equals_pos -= 2;
             slash_pos -= 2;
 
-            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+            table_index = do_uniprop_match(lookup_name, lookup_len);
         }
 
         if (table_index == 0) {
@@ -24208,7 +24690,7 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Here, we have the number in canonical form.  Try that */
-            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            table_index = do_uniprop_match(canonical, strlen(canonical));
             if (table_index == 0) {
                 goto failed;
             }
@@ -24232,7 +24714,8 @@ Perl_parse_uniprop_string(pTHX_
         table_index %= MAX_UNI_KEYWORD_INDEX;
         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
                 "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
-                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+                (int) name_len, name,
+                get_deprecated_property_msg(warning_offset));
     }
 
     /* In a few properties, a different property is used under /i.  These are
@@ -24260,10 +24743,9 @@ Perl_parse_uniprop_string(pTHX_
     }
 
     /* Create and return the inversion list */
-    prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+    prop_definition = get_prop_definition(table_index);
     sv_2mortal(prop_definition);
 
-
     /* See if there is a private use override to add to this definition */
     {
         COPHH * hinthash = (IN_PERL_COMPILETIME)
@@ -24383,8 +24865,386 @@ Perl_parse_uniprop_string(pTHX_
     }
 }
 
+STATIC bool
+S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
+                              const STRLEN wname_len, /* Its length */
+                              SV ** prop_definition)
+{
+    /* Deal with Name property wildcard subpatterns; returns TRUE if there were
+     * any matches, adding them to prop_definition */
+
+    dSP;
+
+    CV * get_names_info;        /* entry to charnames.pm to get info we need */
+    SV * names_string;          /* Contains all character names, except algo */
+    SV * algorithmic_names;     /* Contains info about algorithmically
+                                   generated character names */
+    REGEXP * subpattern_re;     /* The user's pattern to match with */
+    struct regexp * prog;       /* The compiled pattern */
+    char * all_names_start;     /* lib/unicore/Name.pl string of every
+                                   (non-algorithmic) character name */
+    char * cur_pos;             /* We match, effectively using /gc; this is
+                                   where we are now */
+    bool found_matches = FALSE; /* Did any name match so far? */
+    SV * empty;                 /* For matching zero length names */
+    SV * must;                  /* What substring, if any, must be in a name
+                                   for the subpattern to match */
+    SV * syllable_name = NULL;  /* For Hangul syllables */
+    const char hangul_prefix[] = "HANGUL SYLLABLE ";
+    const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
+
+    /* By inspection, there are a maximum of 7 bytes in the suffix of a hangul
+     * syllable name, and these are immutable and guaranteed by the Unicode
+     * standard to never be extended */
+    const STRLEN syl_max_len = hangul_prefix_len + 7;
+
+    IV i;
+
+    PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD;
+
+    /* Make sure _charnames is loaded.  (The parameters give context
+     * for any errors generated */
+    get_names_info = get_cv("_charnames::_get_names_info", 0);
+    if (! get_names_info) {
+        Perl_croak(aTHX_ "panic: Can't find '_charnames::_get_names_info");
+    }
+
+    /* Get the charnames data */
+    PUSHSTACKi(PERLSI_REGCOMP);
+    ENTER ;
+    SAVETMPS;
+    save_re_context();
+
+    PUSHMARK(SP) ;
+    PUTBACK;
+
+    /* Special _charnames entry point that returns the info this routine
+     * requires */
+    call_sv(MUTABLE_SV(get_names_info), G_ARRAY);
+
+    SPAGAIN ;
+
+    /* Data structure for names which end in their very own code points */
+    algorithmic_names = POPs;
+    SvREFCNT_inc_simple_void_NN(algorithmic_names);
+
+    /* The lib/unicore/Name.pl string */
+    names_string = POPs;
+    SvREFCNT_inc_simple_void_NN(names_string);
+
+    PUTBACK ;
+    FREETMPS ;
+    LEAVE ;
+    POPSTACK;
+
+    if (   ! SvROK(names_string)
+        || ! SvROK(algorithmic_names))
+    {   /* Perhaps should panic instead XXX */
+        SvREFCNT_dec(names_string);
+        SvREFCNT_dec(algorithmic_names);
+        return FALSE;
+    }
+
+    names_string = sv_2mortal(SvRV(names_string));
+    all_names_start = SvPVX(names_string);
+    cur_pos = all_names_start;
+
+    algorithmic_names= sv_2mortal(SvRV(algorithmic_names));
+
+    /* Compile the subpattern consisting of the name being looked for */
+    subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
+    must = re_intuit_string(subpattern_re);
+    prog = ReANY(subpattern_re);
+
+    /* If only nothing is matched, skip to where empty names are looked for */
+    if (prog->maxlen == 0) {
+        goto check_empty;
+    }
+
+    /* And match against the string of all names /gc.  Don't even try if it
+     * must match a character not found in any name. */
+    if ( ! must
+        || SvCUR(must) == 0
+        || strspn(SvPVX(must), "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()")
+                                                              == SvCUR(must))
+    {
+        while (execute_wildcard(subpattern_re,
+                                cur_pos,
+                                SvEND(names_string),
+                                all_names_start, 0,
+                                names_string,
+                                0))
+        { /* Here, matched. */
+
+            /* Note the string entries look like
+             *      00001\nSTART OF HEADING\n\n
+             * so we could match anywhere in that string.  We have to rule out
+             * matching a code point line */
+            char * this_name_start = all_names_start
+                                                + RX_OFFS(subpattern_re)->start;
+            char * this_name_end   = all_names_start
+                                                + RX_OFFS(subpattern_re)->end;
+            char * cp_start;
+            char * cp_end;
+            UV cp;
+
+            /* If matched nothing, advance to next possible match */
+            if (this_name_start == this_name_end) {
+                cur_pos = (char *) memchr(this_name_end + 1, '\n',
+                                          SvEND(names_string) - this_name_end);
+                if (cur_pos == NULL) {
+                    break;
+                }
+            }
+            else {
+                /* Position the next match to start beyond the current returned
+                 * entry */
+                cur_pos = (char *) memchr(this_name_end, '\n',
+                                          SvEND(names_string) - this_name_end);
+            }
+
+            /* Back up to the \n just before the beginning of the character. */
+            cp_end = (char *) my_memrchr(all_names_start,
+                                         '\n',
+                                         this_name_start - all_names_start);
+
+            /* If we didn't find a \n, it means it matched somewhere in the
+             * initial '00000' in the string, so isn't a real match */
+            if (cp_end == NULL) {
+                continue;
+            }
+
+            this_name_start = cp_end + 1;   /* The name starts just after */
+            cp_end--;                       /* the \n, and the code point */
+                                            /* ends just before it */
+
+            /* All code points are 5 digits long */
+            cp_start = cp_end - 4;
+
+            /* Except for the first line in the string, the sequence before the
+             * code point is \n\n.  If that isn't the case here, we didn't
+             * match the name of a character.  (We could have matched a named
+             * sequence, not currently handled */
+            if (      cp_start > all_names_start + 1
+                && (*(cp_start - 1) != '\n' || *(cp_start - 2) != '\n'))
+            {
+                continue;
+            }
+
+            /* Calculate the code point from its 5 digits */
+            cp = (XDIGIT_VALUE(cp_start[0]) << 16)
+               + (XDIGIT_VALUE(cp_start[1]) << 12)
+               + (XDIGIT_VALUE(cp_start[2]) << 8)
+               + (XDIGIT_VALUE(cp_start[3]) << 4)
+               +  XDIGIT_VALUE(cp_start[4]);
+
+            /* We matched!  Add this to the list */
+            *prop_definition = add_cp_to_invlist(*prop_definition, cp);
+            found_matches = TRUE;
+        } /* End of loop through the non-algorithmic names string */
+    }
+
+    /* There are also character names not in 'names_string'.  These are
+     * algorithmically generatable.  Try this pattern on each possible one.
+     * (khw originally planned to leave this out given the large number of
+     * matches attempted; but the speed turned out to be quite acceptable
+     *
+     * There are plenty of opportunities to optimize to skip many of the tests.
+     * beyond the rudimentary ones already here */
+
+    /* First see if the subpattern matches any of the algorithmic generatable
+     * Hangul syllable names.
+     *
+     * We know none of these syllable names will match if the input pattern
+     * requires more bytes than any syllable has, or if the input pattern only
+     * matches an empty name, or if the pattern has something it must match and
+     * one of the characters in that isn't in any Hangul syllable. */
+    if (    prog->minlen <= (SSize_t) syl_max_len
+        &&  prog->maxlen > 0
+        && ( ! must
+            || SvCUR(must) == 0
+            || strspn(SvPVX(must), "\n ABCDEGHIJKLMNOPRSTUWY") == SvCUR(must)))
+    {
+        /* These constants, names, values, and algorithm are adapted from the
+         * Unicode standard, version 5.1, section 3.12, and should never
+         * change. */
+        const char * JamoL[] = {
+            "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
+            "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
+        };
+        const int LCount = C_ARRAY_LENGTH(JamoL);
+
+        const char * JamoV[] = {
+            "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
+            "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
+            "I"
+        };
+        const int VCount = C_ARRAY_LENGTH(JamoV);
+
+        const char * JamoT[] = {
+            "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L",
+            "LG", "LM", "LB", "LS", "LT", "LP", "LH", "M", "B",
+            "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
+        };
+        const int TCount = C_ARRAY_LENGTH(JamoT);
+
+        int L, V, T;
+
+        /* This is the initial Hangul syllable code point; each time through the
+         * inner loop, it maps to the next higher code point.  For more info,
+         * see the Hangul syllable section of the Unicode standard. */
+        int cp = 0xAC00;
+
+        syllable_name = sv_2mortal(newSV(syl_max_len));
+        sv_setpvn(syllable_name, hangul_prefix, hangul_prefix_len);
+
+        for (L = 0; L < LCount; L++) {
+            for (V = 0; V < VCount; V++) {
+                for (T = 0; T < TCount; T++) {
+
+                    /* Truncate back to the prefix, which is unvarying */
+                    SvCUR_set(syllable_name, hangul_prefix_len);
+
+                    sv_catpv(syllable_name, JamoL[L]);
+                    sv_catpv(syllable_name, JamoV[V]);
+                    sv_catpv(syllable_name, JamoT[T]);
+
+                    if (execute_wildcard(subpattern_re,
+                                SvPVX(syllable_name),
+                                SvEND(syllable_name),
+                                SvPVX(syllable_name), 0,
+                                syllable_name,
+                                0))
+                    {
+                        *prop_definition = add_cp_to_invlist(*prop_definition,
+                                                             cp);
+                        found_matches = TRUE;
+                    }
+
+                    cp++;
+                }
+            }
+        }
+    }
+
+    /* The rest of the algorithmically generatable names are of the form
+     * "PREFIX-code_point".  The prefixes and the code point limits of each
+     * were returned to us in the array 'algorithmic_names' from data in
+     * lib/unicore/Name.pm.  'code_point' in the name is expressed in hex. */
+    for (i = 0; i <= av_top_index((AV *) algorithmic_names); i++) {
+        IV j;
+
+        /* Each element of the array is a hash, giving the details for the
+         * series of names it covers.  There is the base name of the characters
+         * in the series, and the low and high code points in the series.  And,
+         * for optimization purposes a string containing all the legal
+         * characters that could possibly be in a name in this series. */
+        HV * this_series = (HV *) SvRV(* av_fetch((AV *) algorithmic_names, i, 0));
+        SV * prefix = * hv_fetchs(this_series, "name", 0);
+        IV low = SvIV(* hv_fetchs(this_series, "low", 0));
+        IV high = SvIV(* hv_fetchs(this_series, "high", 0));
+        char * legal = SvPVX(* hv_fetchs(this_series, "legal", 0));
+
+        /* Pre-allocate an SV with enough space */
+        SV * algo_name = sv_2mortal(Perl_newSVpvf(aTHX_ "%s-0000",
+                                                        SvPVX(prefix)));
+        if (high >= 0x10000) {
+            sv_catpvs(algo_name, "0");
+        }
+
+        /* This series can be skipped entirely if the pattern requires
+         * something longer than any name in the series, or can only match an
+         * empty name, or contains a character not found in any name in the
+         * series */
+        if (    prog->minlen <= (SSize_t) SvCUR(algo_name)
+            &&  prog->maxlen > 0
+            && ( ! must
+                || SvCUR(must) == 0
+                || strspn(SvPVX(must), legal) == SvCUR(must)))
+        {
+            for (j = low; j <= high; j++) { /* For each code point in the series */
+
+                /* Get its name, and see if it matches the subpattern */
+                Perl_sv_setpvf(aTHX_ algo_name, "%s-%X", SvPVX(prefix),
+                                     (unsigned) j);
+
+                if (execute_wildcard(subpattern_re,
+                                    SvPVX(algo_name),
+                                    SvEND(algo_name),
+                                    SvPVX(algo_name), 0,
+                                    algo_name,
+                                    0))
+                {
+                    *prop_definition = add_cp_to_invlist(*prop_definition, j);
+                    found_matches = TRUE;
+                }
+            }
+        }
+    }
+
+  check_empty:
+    /* Finally, see if the subpattern matches an empty string */
+    empty = newSVpvs("");
+    if (execute_wildcard(subpattern_re,
+                         SvPVX(empty),
+                         SvEND(empty),
+                         SvPVX(empty), 0,
+                         empty,
+                         0))
+    {
+        /* Many code points have empty names.  Currently these are the \p{GC=C}
+         * ones, minus CC and CF */
+
+        SV * empty_names_ref = get_prop_definition(UNI_C);
+        SV * empty_names = invlist_clone(empty_names_ref, NULL);
+
+        SV * subtract = get_prop_definition(UNI_CC);
+
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(empty_names_ref);
+        SvREFCNT_dec_NN(subtract);
+
+        subtract = get_prop_definition(UNI_CF);
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(subtract);
+
+        _invlist_union(*prop_definition, empty_names, prop_definition);
+        found_matches = TRUE;
+        SvREFCNT_dec_NN(empty_names);
+    }
+    SvREFCNT_dec_NN(empty);
+
+#if 0
+    /* If we ever were to accept aliases for, say private use names, we would
+     * need to do something fancier to find empty names.  The code below works
+     * (at the time it was written), and is slower than the above */
+    const char empties_pat[] = "^.";
+    if (strNE(name, empties_pat)) {
+        SV * empty = newSVpvs("");
+        if (execute_wildcard(subpattern_re,
+                    SvPVX(empty),
+                    SvEND(empty),
+                    SvPVX(empty), 0,
+                    empty,
+                    0))
+        {
+            SV * empties = NULL;
+
+            (void) handle_names_wildcard(empties_pat, strlen(empties_pat), &empties);
+
+            _invlist_union_complement_2nd(*prop_definition, empties, prop_definition);
+            SvREFCNT_dec_NN(empties);
+
+            found_matches = TRUE;
+        }
+        SvREFCNT_dec_NN(empty);
+    }
 #endif
 
+    SvREFCNT_dec_NN(subpattern_re);
+    return found_matches;
+}
+
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */