This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Move some code in regpiece()
[perl5.git] / regcomp.c
index 59b346b..f2ee72a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
  * precedence is structured in regular expressions.  Serious changes in
  * regular-expression syntax might require a total rethink.
  */
+
+/* Note on debug output:
+ *
+ * This is set up so that -Dr turns on debugging like all other flags that are
+ * enabled by -DDEBUGGING.  -Drv gives more verbose output.  This applies to
+ * all regular expressions encountered in a program, and gives a huge amount of
+ * output for all but the shortest programs.
+ *
+ * The ability to output pattern debugging information lexically, and with much
+ * finer grained control was added, with 'use re qw(Debug ....);' available even
+ * in non-DEBUGGING builds.  This is accomplished by copying the contents of
+ * regcomp.c to ext/re/re_comp.c, and regexec.c is copied to ext/re/re_exec.c.
+ * Those files are compiled and linked into the perl executable, and they are
+ * compiled essentially as if DEBUGGING were enabled, and controlled by calls
+ * to re.pm.
+ *
+ * That would normally mean linking errors when two functions of the same name
+ * are attempted to be placed into the same executable.  That is solved in one
+ * of four ways:
+ *  1)  Static functions aren't known outside the file they are in, so for the
+ *      many functions of that type in this file, it just isn't a problem.
+ *  2)  Most externally known functions are enclosed in
+ *          #ifndef PERL_IN_XSUB_RE
+ *          ...
+ *          #endif
+ *      blocks, so there is only one defintion for them in the whole
+ *      executable, the one in regcomp.c (or regexec.c).  The implication of
+ *      that is any debugging info that comes from them is controlled only by
+ *      -Dr.  Further, any static function they call will also be the version
+ *      in regcomp.c (or regexec.c), so its debugging will also be by -Dr.
+ *  3)  About a dozen external functions are re-#defined in ext/re/re_top.h, to
+ *      have different names, so that what gets loaded in the executable is
+ *      'Perl_foo' from regcomp.c (and regexec.c), and the identical function
+ *      from re_comp.c (and re_exec.c), but with the name 'my_foo'  Debugging
+ *      in the 'Perl_foo' versions is controlled by -Dr, but the 'my_foo'
+ *      versions and their callees are under control of re.pm.   The catch is
+ *      that references to all these go through the regexp_engine structure,
+ *      which is initialized in regcomp.h to the Perl_foo versions, and
+ *      substituted out in lexical scopes where 'use re' is in effect to the
+ *      'my_foo' ones.   That structure is public API, so it would be a hard
+ *      sell to add any additional members.
+ *  4)  For functions in regcomp.c and re_comp.c that are called only from,
+ *      respectively, regexec.c and re_exec.c, they can have two different
+ *      names, depending on #ifdef'ing PERL_IN_XSUB_RE, in both regexec.c and
+ *      embed.fnc.
+ *
+ * The bottom line is that if you add code to one of the public functions
+ * listed in ext/re/re_top.h, debugging automagically works.  But if you write
+ * a new function that needs to do debugging or there is a chain of calls from
+ * it that need to do debugging, all functions in the chain should use options
+ * 2) or 4) above.
+ *
+ * A function may have to be split so that debugging stuff is static, but it
+ * calls out to some other function that only gets compiled in regcomp.c to
+ * access data that we don't want to duplicate.
+ */
+
 #include "EXTERN.h"
 #define PERL_IN_REGCOMP_C
 #include "perl.h"
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
 EXTERN_C const struct regexp_engine my_reg_engine;
+EXTERN_C const struct regexp_engine wild_reg_engine;
 #else
 #  include "regcomp.h"
 #endif
@@ -105,6 +163,7 @@ typedef struct scan_frame {
     regnode *next_regnode;      /* next node to process when last is reached */
     U32 prev_recursed_depth;
     I32 stopparen;              /* what stopparen do we use */
+    bool in_gosub;              /* this or an outer frame is for GOSUB */
 
     struct scan_frame *this_prev_frame; /* this previous frame */
     struct scan_frame *prev_frame;      /* previous frame */
@@ -139,11 +198,16 @@ struct RExC_state_t {
     regnode_offset emit;               /* Code-emit pointer */
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
-    U32                seen;
     SSize_t    size;                   /* Number of regnode equivalents in
                                            pattern */
     Size_t      sets_depth;              /* Counts recursion depth of already-
                                            compiled regex set patterns */
+    U32                seen;
+
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
+    regnode_offset *open_parens;       /* offsets to open parens */
+    regnode_offset *close_parens;      /* offsets to close parens */
+    HV         *paren_names;           /* Paren names */
 
     /* position beyond 'precomp' of the warning message furthest away from
      * 'precomp'.  During the parse, no warnings are raised for any problems
@@ -164,9 +228,6 @@ struct RExC_state_t {
     I32                nestroot;               /* root parens we are in - used by
                                            accept */
     I32                seen_zerolen;
-    regnode_offset *open_parens;       /* offsets to open parens */
-    regnode_offset *close_parens;      /* offsets to close parens */
-    I32      parens_buf_size;           /* #slots malloced open/close_parens */
     regnode     *end_op;                /* END node in program */
     I32                utf8;           /* whether the pattern is utf8 or not */
     I32                orig_utf8;      /* whether the pattern was originally in utf8 */
@@ -175,10 +236,9 @@ struct RExC_state_t {
     I32                uni_semantics;  /* If a d charset modifier should use unicode
                                   rules, even if the pattern is not in
                                   utf8 */
-    HV         *paren_names;           /* Paren names */
 
-    regnode    **recurse;              /* Recurse regops */
     I32         recurse_count;          /* Number of recurse regops we have generated */
+    regnode    **recurse;              /* Recurse regops */
     U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                            through */
     U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
@@ -188,25 +248,21 @@ struct RExC_state_t {
     I32                override_recoding;
     I32         recode_x_to_native;
     I32                in_multi_char_class;
+    int                code_index;             /* next code_blocks[] slot */
     struct reg_code_blocks *code_blocks;/* positions of literal (?{})
                                            within pattern */
-    int                code_index;             /* next code_blocks[] slot */
     SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
     scan_frame *frame_head;
     scan_frame *frame_last;
     U32         frame_count;
     AV         *warn_text;
     HV         *unlexed_names;
-#ifdef ADD_TO_REGEXEC
-    char       *starttry;              /* -Dr: where regtry was called. */
-#define RExC_starttry  (pRExC_state->starttry)
-#endif
     SV         *runtime_code_qr;       /* qr with the runtime code blocks */
 #ifdef DEBUGGING
     const char  *lastparse;
     I32         lastnum;
-    AV          *paren_name_list;       /* idx -> name */
     U32         study_chunk_recursed_count;
+    AV          *paren_name_list;       /* idx -> name */
     SV          *mysv1;
     SV          *mysv2;
 
@@ -314,22 +370,15 @@ struct RExC_state_t {
         RExC_naughty += RExC_naughty / (exp) + (add)
 
 #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
-#define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
-       ((*s) == '{' && regcurly(s)))
+#define        ISMULT2(s)      (ISMULT1(*s) || ((*s) == '{' && regcurly(s)))
 
 /*
  * Flags to be passed up and down.
  */
-#define        WORST           0       /* Worst case. */
 #define        HASWIDTH        0x01    /* Known to not match null strings, could match
                                    non-null ones. */
-
-/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
- * character.  (There needs to be a case: in the switch statement in regexec.c
- * for any node marked SIMPLE.)  Note that this is not the same thing as
- * REGNODE_SIMPLE */
-#define        SIMPLE          0x02
-#define        SPSTART         0x04    /* Starts with * or + */
+#define        SIMPLE          0x02    /* Exactly one character wide */
+                                /* (or LNBREAK as a special case) */
 #define POSTPONED      0x08    /* (?1),(?&name), (??{...}) or similar */
 #define TRYAGAIN       0x10    /* Weeded out a declaration. */
 #define RESTART_PARSE   0x20    /* Need to redo the parse */
@@ -448,6 +497,9 @@ struct RExC_state_t {
 #define DEFERRED_COULD_BE_OFFICIAL_MARKERs  "~"
 #define DEFERRED_COULD_BE_OFFICIAL_MARKERc  '~'
 
+/* What is infinity for optimization purposes */
+#define OPTIMIZE_INFTY  SSize_t_MAX
+
 /* About scan_data_t.
 
   During optimisation we recurse through the regexp program performing
@@ -482,7 +534,7 @@ struct RExC_state_t {
 
   - max_offset
     Only used for floating strings. This is the rightmost point that
-    the string can appear at. If set to SSize_t_MAX it indicates that the
+    the string can appear at. If set to OPTIMIZE_INFTY it indicates that the
     string can occur infinitely far to the right.
     For fixed strings, it is equal to min_offset.
 
@@ -795,7 +847,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts two arguments.
  */
 #define        Simple_vFAIL2(m,a1) STMT_START {                        \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,             \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,               \
                       REPORT_LOCATION_ARGS(RExC_parse));       \
 } STMT_END
 
@@ -812,7 +864,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts three arguments.
  */
 #define        Simple_vFAIL3(m, a1, a2) STMT_START {                   \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,         \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,           \
            REPORT_LOCATION_ARGS(RExC_parse));                  \
 } STMT_END
 
@@ -828,7 +880,7 @@ static const scan_data_t zero_scan_data = {
  * Like Simple_vFAIL(), but accepts four arguments.
  */
 #define        Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2, a3,     \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3,       \
            REPORT_LOCATION_ARGS(RExC_parse));                  \
 } STMT_END
 
@@ -840,13 +892,13 @@ static const scan_data_t zero_scan_data = {
 /* A specialized version of vFAIL2 that works with UTF8f */
 #define vFAIL2utf8f(m, a1) STMT_START {             \
     PREPARE_TO_DIE;                                 \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,  \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1,  \
             REPORT_LOCATION_ARGS(RExC_parse));      \
 } STMT_END
 
 #define vFAIL3utf8f(m, a1, a2) STMT_START {             \
     PREPARE_TO_DIE;                                     \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
+    S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2,  \
             REPORT_LOCATION_ARGS(RExC_parse));          \
 } STMT_END
 
@@ -1180,7 +1232,7 @@ static void
 S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                     U32 depth, int is_inf)
 {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_OPTIMISE_MORE_r({
         if (!data)
@@ -1234,7 +1286,7 @@ static void
 S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
                 regnode *scan, U32 depth, U32 flags)
 {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_OPTIMISE_r({
         regnode *Next;
@@ -1422,7 +1474,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
     const STRLEN l = CHR_SVLEN(data->last_found);
     SV * const longest_sv = data->substrs[data->cur_is_floating].str;
     const STRLEN old_l = CHR_SVLEN(longest_sv);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_SCAN_COMMIT;
 
@@ -1434,20 +1486,20 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
        if (!i) /* fixed */
            data->substrs[0].max_offset = data->substrs[0].min_offset;
        else { /* float */
-           data->substrs[1].max_offset = (l
+           data->substrs[1].max_offset =
+                      (is_inf)
+                       ? OPTIMIZE_INFTY
+                       : (l
                           ? data->last_start_max
-                          : (data->pos_delta > SSize_t_MAX - data->pos_min
-                                        ? SSize_t_MAX
+                          /* temporary underflow guard for 5.32 */
+                          : data->pos_delta < 0 ? OPTIMIZE_INFTY
+                          : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
+                                        ? OPTIMIZE_INFTY
                                         : data->pos_min + data->pos_delta));
-           if (is_inf
-                || (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
-               data->substrs[1].max_offset = SSize_t_MAX;
         }
 
-        if (data->flags & SF_BEFORE_EOL)
-            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
-        else
-            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
         data->substrs[i].minlenp = minlenp;
         data->substrs[i].lookbehind = 0;
     }
@@ -1602,7 +1654,6 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      * returned list must, and will, contain every code point that is a
      * possibility. */
 
-    dVAR;
     SV* invlist = NULL;
     SV* only_utf8_locale_invlist = NULL;
     unsigned int i;
@@ -2011,7 +2062,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
               );
 }
 
-PERL_STATIC_INLINE void
+STATIC void
 S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
 {
     PERL_ARGS_ASSERT_SSC_UNION;
@@ -2024,7 +2075,7 @@ S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
                                         &ssc->invlist);
 }
 
-PERL_STATIC_INLINE void
+STATIC void
 S_ssc_intersection(pTHX_ regnode_ssc *ssc,
                          SV* const invlist,
                          const bool invert2nd)
@@ -2039,7 +2090,7 @@ S_ssc_intersection(pTHX_ regnode_ssc *ssc,
                                                &ssc->invlist);
 }
 
-PERL_STATIC_INLINE void
+STATIC void
 S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
 {
     PERL_ARGS_ASSERT_SSC_ADD_RANGE;
@@ -2049,7 +2100,7 @@ S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
     ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
 }
 
-PERL_STATIC_INLINE void
+STATIC void
 S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
 {
     /* AND just the single code point 'cp' into the SSC 'ssc' */
@@ -2067,7 +2118,7 @@ S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
     SvREFCNT_dec_NN(cp_list);
 }
 
-PERL_STATIC_INLINE void
+STATIC void
 S_ssc_clear_locale(regnode_ssc *ssc)
 {
     /* Set the SSC 'ssc' to not match any locale things */
@@ -2215,7 +2266,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
     U16 word;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE;
 
@@ -2309,7 +2360,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
     U32 state;
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
 
@@ -2370,7 +2421,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
     U16 charid;
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
 
@@ -2715,7 +2766,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
     STRLEN trie_charcount=0;
 #endif
     SV *re_trie_maxbuff;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_MAKE_TRIE;
 #ifndef DEBUGGING
@@ -3805,7 +3856,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
     reg_ac_data *aho;
     const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
     regnode *stclass;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
     PERL_UNUSED_CONTEXT;
@@ -3946,7 +3997,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  *      character folded sequences.  Since a single character can fold into
  *      such a sequence, the minimum match length for this node is less than
  *      the number of characters in the node.  This routine returns in
- *      *min_subtract how many characters to subtract from the the actual
+ *      *min_subtract how many characters to subtract from the actual
  *      length of the string to get a real minimum match length; it is 0 if
  *      there are no multi-char foldeds.  This delta is used by the caller to
  *      adjust the min length of the match, and the delta between min and max,
@@ -4054,7 +4105,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
     U32 stopnow = 0;
 #ifdef DEBUGGING
     regnode *stop = scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 #else
     PERL_UNUSED_ARG(depth);
 #endif
@@ -4384,7 +4435,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                 s++;
             }
         }
-       else {
+       else if (OP(scan) != EXACTFAA_NO_TRIE) {
 
             /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
              * folds that are all Latin1.  As explained in the comments
@@ -4416,7 +4467,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     /* EXACTF nodes need to know that the minimum length
                      * changed so that a sharp s in the string can match this
                      * ss in the pattern, but they remain EXACTF nodes, as they
-                     * won't match this unless the target string is is UTF-8,
+                     * won't match this unless the target string is in UTF-8,
                      * which we don't know until runtime.  EXACTFL nodes can't
                      * transform into EXACTFU nodes */
                     if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
@@ -4469,6 +4520,42 @@ S_unwind_scan_frames(pTHX_ const void *p)
     } while (f);
 }
 
+/* Follow the next-chain of the current node and optimize away
+   all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+    if (OP(node) != CURLYX) {
+        const int max = (reg_off_by_arg[OP(node)]
+                        ? I32_MAX
+                          /* I32 may be smaller than U16 on CRAYs! */
+                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+        int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+        int noff;
+        regnode *n = node;
+
+        /* Skip NOTHING and LONGJMP. */
+        while (
+            (n = regnext(n))
+            && (
+                (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+            )
+            && off + noff < max
+        ) {
+            off += noff;
+        }
+        if (reg_off_by_arg[OP(node)])
+            ARG(node) = off;
+        else
+            NEXT_OFF(node) = off;
+    }
+    return;
+}
+
 /* the return from this sub is the minimum length that could possibly match */
 STATIC SSize_t
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
@@ -4478,7 +4565,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        I32 stopparen,
                         U32 recursed_depth,
                        regnode_ssc *and_withp,
-                       U32 flags, U32 depth)
+                       U32 flags, U32 depth, bool was_mutate_ok)
                        /* scanp: Start here (read-write). */
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
@@ -4487,7 +4574,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        /* recursed: which subroutines have we recursed into */
                        /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
 {
-    dVAR;
     SSize_t final_minlen;
     /* There must be at least this number of characters to match */
     SSize_t min = 0;
@@ -4500,9 +4586,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
     scan_data_t data_fake;
     SV *re_trie_maxbuff = NULL;
     regnode *first_non_open = scan;
-    SSize_t stopmin = SSize_t_MAX;
+    SSize_t stopmin = OPTIMIZE_INFTY;
     scan_frame *frame = NULL;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_STUDY_CHUNK;
     RExC_study_started= 1;
@@ -4550,6 +4636,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                    node length to get a real minimum (because
                                    the folded version may be shorter) */
        bool unfolded_multi_char = FALSE;
+        /* avoid mutating ops if we are anywhere within the recursed or
+         * enframed handling for a GOSUB: the outermost level will handle it.
+         */
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
        /* Peephole optimizer: */
         DEBUG_STUDYDATA("Peep", data, depth, is_inf);
         DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4560,33 +4650,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          * parsing code, as each (?:..) is handled by a different invocation of
          * reg() -- Yves
          */
-        if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT
-                                          && OP(scan) != LEXACT_REQ8)
+        if (PL_regkind[OP(scan)] == EXACT
+            && OP(scan) != LEXACT
+            && OP(scan) != LEXACT_REQ8
+            && mutate_ok
+        ) {
             join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
                     0, NULL, depth + 1);
+        }
 
         /* Follow the next-chain of the current node and optimize
-           away all the NOTHINGs from it.  */
-        if (OP(scan) != CURLYX) {
-            const int max = (reg_off_by_arg[OP(scan)]
-                            ? I32_MAX
-                              /* I32 may be smaller than U16 on CRAYs! */
-                            : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
-            int noff;
-            regnode *n = scan;
-
-            /* Skip NOTHING and LONGJMP. */
-            while (   (n = regnext(n))
-                   && (   (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
-                       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
-                   && off + noff < max)
-                off += noff;
-            if (reg_off_by_arg[OP(scan)])
-                ARG(scan) = off;
-            else
-                NEXT_OFF(scan) = off;
-        }
+           away all the NOTHINGs from it.
+         */
+        rck_elide_nothing(scan);
 
         /* The principal pseudo-switch.  Cannot be a switch, since we look into
          * several different things.  */
@@ -4613,7 +4689,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             /* DEFINEP study_chunk() recursion */
             (void)study_chunk(pRExC_state, &scan, &minlen,
                               &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1);
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
 
             scan = next;
         } else
@@ -4634,7 +4710,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 /* NOTE - There is similar code to this block below for
                  * handling TRIE nodes on a re-study.  If you change stuff here
                  * check there too. */
-               SSize_t max1 = 0, min1 = SSize_t_MAX, num = 0;
+               SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
                regnode_ssc accum;
                regnode * const startbranch=scan;
 
@@ -4681,13 +4757,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* recurse study_chunk() for each BRANCH in an alternation */
                    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
 
                    if (min1 > minnext)
                        min1 = minnext;
-                   if (deltanext == SSize_t_MAX) {
+                   if (deltanext == OPTIMIZE_INFTY) {
                        is_inf = is_inf_internal = 1;
-                       max1 = SSize_t_MAX;
+                       max1 = OPTIMIZE_INFTY;
                    } else if (max1 < minnext + deltanext)
                        max1 = minnext + deltanext;
                    scan = next;
@@ -4712,17 +4789,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    min1 = 0;
                if (flags & SCF_DO_SUBSTR) {
                    data->pos_min += min1;
-                   if (data->pos_delta >= SSize_t_MAX - (max1 - min1))
-                       data->pos_delta = SSize_t_MAX;
+                   if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
+                       data->pos_delta = OPTIMIZE_INFTY;
                    else
                        data->pos_delta += max1 - min1;
                    if (max1 != min1 || is_inf)
                        data->cur_is_floating = 1;
                }
                min += min1;
-               if (delta == SSize_t_MAX
-                || SSize_t_MAX - delta - (max1 - min1) < 0)
-                   delta = SSize_t_MAX;
+               if (delta == OPTIMIZE_INFTY
+                || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
+                   delta = OPTIMIZE_INFTY;
                else
                    delta += max1 - min1;
                if (flags & SCF_DO_STCLASS_OR) {
@@ -4748,9 +4825,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    }
                }
 
-                if (PERL_ENABLE_TRIE_OPTIMISATION &&
-                        OP( startbranch ) == BRANCH )
-                {
+                if (PERL_ENABLE_TRIE_OPTIMISATION
+                    && OP(startbranch) == BRANCH
+                    && mutate_ok
+                ) {
                /* demq.
 
                    Assuming this was/is a branch we are dealing with: 'scan'
@@ -5130,7 +5208,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      * might result in a minlen of 1 and not of 4,
                      * but this doesn't make us mismatch, just try a bit
                      * harder than we should.
-                     * */
+                     *
+                     * However we must assume this GOSUB is infinite, to
+                     * avoid wrongly applying other optimizations in the
+                     * enclosing scope - see GH 18096, for example.
+                     */
+                    is_inf = is_inf_internal = 1;
                     scan= regnext(scan);
                     continue;
                 }
@@ -5203,6 +5286,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 newframe->stopparen = stopparen;
                 newframe->prev_recursed_depth = recursed_depth;
                 newframe->this_prev_frame= frame;
+                newframe->in_gosub = (
+                    (frame && frame->in_gosub) || OP(scan) == GOSUB
+                );
 
                 DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
                 DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5240,8 +5326,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                   offset, later match for variable offset.  */
                if (data->last_end == -1) { /* Update the start info. */
                    data->last_start_min = data->pos_min;
-                   data->last_start_max = is_inf
-                       ? SSize_t_MAX : data->pos_min + data->pos_delta;
+                    data->last_start_max =
+                        is_inf ? OPTIMIZE_INFTY
+                        : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
+                            ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), bytelen);
                if (UTF)
@@ -5287,8 +5375,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 &&   isALPHA_A(*s)
                 &&  (         OP(scan) == EXACTFAA
                      || (     OP(scan) == EXACTFU
-                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s))))
-            {
+                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
+                &&   mutate_ok
+            ) {
                 U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
 
                 OP(scan) = ANYOFM;
@@ -5374,6 +5463,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                }
                if (flags & SCF_DO_SUBSTR)
                    data->pos_min++;
+                /* This will bypass the formal 'min += minnext * mincount'
+                 * calculation in the do_curly path, so assumes min width
+                 * of the PLUS payload is exactly one. */
                min++;
                /* FALLTHROUGH */
            case STAR:
@@ -5381,7 +5473,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
                 /* This temporary node can now be turned into EXACTFU, and
                  * must, as regexec.c doesn't handle it */
-                if (OP(next) == EXACTFU_S_EDGE) {
+                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
                     OP(next) = EXACTFU;
                 }
 
@@ -5389,8 +5481,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     &&   isALPHA_A(* STRING(next))
                     && (         OP(next) == EXACTFAA
                         || (     OP(next) == EXACTFU
-                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
-                {
+                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
+                    &&   mutate_ok
+                ) {
                     /* These differ in just one bit */
                     U8 mask = ~ ('A' ^ 'a');
 
@@ -5477,7 +5570,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   (mincount == 0
                                    ? (f & ~SCF_DO_SUBSTR)
                                    : f)
-                                  ,depth+1);
+                                  , depth+1, mutate_ok);
 
                if (flags & SCF_DO_STCLASS)
                    data->start_class = oclass;
@@ -5523,12 +5616,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  RExC_precomp)));
                 }
 
+                if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
+                    || min >= SSize_t_MAX - minnext * mincount )
+                {
+                    FAIL("Regexp out of space");
+                }
+
                min += minnext * mincount;
-               is_inf_internal |= deltanext == SSize_t_MAX
+               is_inf_internal |= deltanext == OPTIMIZE_INFTY
                          || (maxcount == REG_INFTY && minnext + deltanext > 0);
                is_inf |= is_inf_internal;
                 if (is_inf) {
-                   delta = SSize_t_MAX;
+                   delta = OPTIMIZE_INFTY;
                 } else {
                    delta += (minnext + deltanext) * maxcount
                              - minnext * mincount;
@@ -5537,7 +5636,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                if (  OP(oscan) == CURLYX && data
                      && data->flags & SF_IN_PAR
                      && !(data->flags & SF_HAS_EVAL)
-                     && !deltanext && minnext == 1 ) {
+                     && !deltanext && minnext == 1
+                      && mutate_ok
+                ) {
                    /* Try to optimize to CURLYN.  */
                    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
                    regnode * const nxt1 = nxt;
@@ -5587,10 +5688,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      && !(data->flags & SF_HAS_EVAL)
                      && !deltanext     /* atom is fixed width */
                      && minnext != 0   /* CURLYM can't handle zero width */
-
                          /* Nor characters whose fold at run-time may be
                           * multi-character */
                       && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+                      && mutate_ok
                ) {
                    /* XXXX How to optimize if data == 0? */
                    /* Optimize to a simpler form.  */
@@ -5643,7 +5744,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recurse study_chunk() on optimised CURLYX => CURLYM */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                     NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1);
+                                    depth+1, mutate_ok);
                    }
                    else
                        oscan->flags = 0;
@@ -5724,7 +5825,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                            data->last_start_min += minnext * (mincount - 1);
                            data->last_start_max =
                               is_inf
-                               ? SSize_t_MAX
+                               ? OPTIMIZE_INFTY
                               : data->last_start_max +
                                  (maxcount - 1) * (minnext + data->pos_delta);
                        }
@@ -5733,18 +5834,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    data->pos_min += minnext * (mincount - counted);
 #if 0
 Perl_re_printf( aTHX_  "counted=%" UVuf " deltanext=%" UVuf
-                              " SSize_t_MAX=%" UVuf " minnext=%" UVuf
+                              " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
                               " maxcount=%" UVuf " mincount=%" UVuf "\n",
-    (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
+    (UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext, (UV)maxcount,
     (UV)mincount);
-if (deltanext != SSize_t_MAX)
+if (deltanext != OPTIMIZE_INFTY)
 Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
     (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
-          - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
+          - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
 #endif
-                   if (deltanext == SSize_t_MAX
-                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= SSize_t_MAX - data->pos_delta)
-                       data->pos_delta = SSize_t_MAX;
+                   if (deltanext == OPTIMIZE_INFTY
+                        || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
+                       data->pos_delta = OPTIMIZE_INFTY;
                    else
                        data->pos_delta += - counted * deltanext +
                        (minnext + deltanext) * maxcount - minnext * mincount;
@@ -5763,7 +5864,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                            data->last_end = data->pos_min;
                            data->last_start_min = data->pos_min - last_chrs;
                            data->last_start_max = is_inf
-                               ? SSize_t_MAX
+                               ? OPTIMIZE_INFTY
                                : data->pos_min + data->pos_delta - last_chrs;
                        }
                        data->cur_is_floating = 1; /* float */
@@ -5773,11 +5874,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                if (data && (fl & SF_HAS_EVAL))
                    data->flags |= SF_HAS_EVAL;
              optimize_curly_tail:
-               if (OP(oscan) != CURLYX) {
-                   while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
-                          && NEXT_OFF(next))
-                       NEXT_OFF(oscan) += NEXT_OFF(next);
-               }
+               rck_elide_nothing(oscan);
                continue;
 
            default:
@@ -5828,13 +5925,13 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                flags &= ~SCF_DO_STCLASS;
             }
            min++;
-            if (delta != SSize_t_MAX)
+            if (delta != OPTIMIZE_INFTY)
                 delta++;    /* Because of the 2 char string cr-lf */
             if (flags & SCF_DO_SUBSTR) {
                 /* Cannot expect anything... */
                 scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
-                if (data->pos_delta != SSize_t_MAX) {
+                if (data->pos_delta != OPTIMIZE_INFTY) {
                     data->pos_delta += 1;
                 }
                data->cur_is_floating = 1; /* float */
@@ -5908,7 +6005,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                           (regnode_charclass *) scan);
                    break;
 
-                case NANYOFM:
+                case NANYOFM: /* NANYOFM already contains the inversion of the
+                                 input ANYOF data, so, unlike things like
+                                 NPOSIXA, don't change 'invert' to TRUE */
+                    /* FALLTHROUGH */
                 case ANYOFM:
                   {
                     SV* cp_list = get_ANYOFM_contents(scan);
@@ -6073,7 +6173,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 /* recurse study_chunk() for lookahead body */
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                       last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
                 if (scan->flags) {
                     if (   deltanext < 0
                         || deltanext > (I32) U8_MAX
@@ -6178,7 +6279,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
-                                        f, depth+1);
+                                        f, depth+1, mutate_ok);
                 if (scan->flags) {
                     assert(0);  /* This code has never been tested since this
                                    is normally not compiled */
@@ -6297,7 +6398,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
             regnode *trie_node= scan;
             regnode *tail= regnext(scan);
             reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
-            SSize_t max1 = 0, min1 = SSize_t_MAX;
+            SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
             regnode_ssc accum;
 
             if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
@@ -6345,16 +6446,17 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1);
+                            stopparen, recursed_depth, NULL, f, depth+1,
+                            mutate_ok);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
 
                     if (min1 > (SSize_t)(minnext + trie->minlen))
                         min1 = minnext + trie->minlen;
-                    if (deltanext == SSize_t_MAX) {
+                    if (deltanext == OPTIMIZE_INFTY) {
                         is_inf = is_inf_internal = 1;
-                        max1 = SSize_t_MAX;
+                        max1 = OPTIMIZE_INFTY;
                     } else if (max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
                         max1 = minnext + deltanext + trie->maxlen;
 
@@ -6383,11 +6485,11 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                     data->cur_is_floating = 1; /* float */
             }
             min += min1;
-            if (delta != SSize_t_MAX) {
-                if (SSize_t_MAX - (max1 - min1) >= delta)
+            if (delta != OPTIMIZE_INFTY) {
+                if (OPTIMIZE_INFTY - (max1 - min1) >= delta)
                     delta += max1 - min1;
                 else
-                    delta = SSize_t_MAX;
+                    delta = OPTIMIZE_INFTY;
             }
             if (flags & SCF_DO_STCLASS_OR) {
                 ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
@@ -6468,10 +6570,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
     DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
 
     *scanp = scan;
-    *deltap = is_inf_internal ? SSize_t_MAX : delta;
+    *deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
 
     if (flags & SCF_DO_SUBSTR && is_inf)
-       data->pos_delta = SSize_t_MAX - data->pos_min;
+       data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
     if (is_par > (I32)U8_MAX)
        is_par = 0;
     if (is_par && pars==1 && data) {
@@ -6491,9 +6593,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
 
     final_minlen = min < stopmin
             ? min : stopmin;
+
     if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
-        if (final_minlen > SSize_t_MAX - delta)
-            RExC_maxlen = SSize_t_MAX;
+        if (final_minlen > OPTIMIZE_INFTY - delta)
+            RExC_maxlen = OPTIMIZE_INFTY;
         else if (RExC_maxlen < final_minlen + delta)
             RExC_maxlen = final_minlen + delta;
     }
@@ -6605,7 +6708,7 @@ REGEXP *
 Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 {
     regexp_engine const *eng = current_re_engine();
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_PREGCOMP;
 
@@ -6626,16 +6729,9 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 REGEXP *
 Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 {
-    PERL_ARGS_ASSERT_RE_COMPILE;
-    return re_op_compile_wrapper(pattern, rx_flags, 0);
-}
-
-REGEXP *
-S_re_op_compile_wrapper(pTHX_ SV * const pattern, U32 rx_flags, const U32 pm_flags)
-{
     SV *pat = pattern; /* defeat constness! */
 
-    PERL_ARGS_ASSERT_RE_OP_COMPILE_WRAPPER;
+    PERL_ARGS_ASSERT_RE_COMPILE;
 
     return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
 #ifdef PERL_IN_XSUB_RE
@@ -6643,10 +6739,9 @@ S_re_op_compile_wrapper(pTHX_ SV * const pattern, U32 rx_flags, const U32 pm_fla
 #else
                                 &PL_core_reg_engine,
 #endif
-                                NULL, NULL, rx_flags, pm_flags);
+                                NULL, NULL, rx_flags, 0);
 }
 
-
 static void
 S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
 {
@@ -6698,7 +6793,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
     int n=0;
     STRLEN s = 0;
     bool do_end = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     DEBUG_PARSE_r(Perl_re_printf( aTHX_
         "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
@@ -6825,7 +6920,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
         /* we make the assumption here that each op in the list of
          * op_siblings maps to one SV pushed onto the stack,
          * except for code blocks, with have both an OP_NULL and
-         * and OP_CONST.
+         * an OP_CONST.
          * This allows us to match up the list of SVs against the
          * list of OPs to find the next code block.
          *
@@ -6875,7 +6970,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                 pRExC_state->code_blocks->count -= n;
             n = 0;
         }
-        else  {
+        else {
             /* ... or failing that, try "" overload */
             while (SvAMAGIC(msv)
                     && (sv = AMG_CALLunary(msv, string_amg))
@@ -7038,7 +7133,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 {
     SV *qr;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     if (pRExC_state->runtime_code_qr) {
        /* this is the second time we've been called; this should
@@ -7318,7 +7413,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
     PERL_ARGS_ASSERT_SET_REGEX_PV;
 
     /* make sure PL_bitcount bounds not exceeded */
-    assert(sizeof(STD_PAT_MODS) <= 8);
+    STATIC_ASSERT_STMT(sizeof(STD_PAT_MODS) <= 8);
 
     p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */
     SvPOK_on(Rx);
@@ -7399,7 +7494,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
  *
  * pm_flags contains the PMf_* flags, typically based on those from the
  * pm_flags field of the related PMOP. Currently we're only interested in
- * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL, PMf_WILDCARD.
  *
  * For many years this code had an initial sizing pass that calculated
  * (sometimes incorrectly, leading to security holes) the size needed for the
@@ -7412,7 +7507,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
  * length of the pattern.  Patches welcome to improve that guess.  That amount
  * of space is malloc'd and then immediately freed, and then clawed back node
  * by node.  This design is to minimze, to the extent possible, memory churn
- * when doing the the reallocs.
+ * when doing the reallocs.
  *
  * A separate parentheses counting pass may be needed in some cases.
  * (Previously the sizing pass did this.)  Patches welcome to reduce the number
@@ -7430,7 +7525,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                    OP *expr, const regexp_engine* eng, REGEXP *old_re,
                     bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
 {
-    dVAR;
     REGEXP *Rx;         /* Capital 'R' means points to a REGEXP */
     STRLEN plen;
     char *exp;
@@ -7458,7 +7552,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     int restudied = 0;
     RExC_state_t copyRExC_state;
 #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_RE_OP_COMPILE;
 
@@ -7883,7 +7977,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     DEBUG_OFFSETS_r(if (RExC_offsets) {
         const STRLEN len = RExC_offsets[0];
         STRLEN i;
-        GET_RE_DEBUG_FLAGS_DECL;
+        DECLARE_AND_GET_RE_DEBUG_FLAGS;
         Perl_re_printf( aTHX_
                       "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
         for (i = 1; i <= len; i++) {
@@ -8140,7 +8234,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             &data, -1, 0, NULL,
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                           | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
-            0);
+            0, TRUE);
 
 
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8180,7 +8274,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
                 RExC_rx->substrs->data[i].max_offset = data.substrs[i].max_offset;
                 /* Don't offset infinity */
-                if (data.substrs[i].max_offset < SSize_t_MAX)
+                if (data.substrs[i].max_offset < OPTIMIZE_INFTY)
                     RExC_rx->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
                 SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
             }
@@ -8269,7 +8363,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                       ? SCF_TRIE_DOING_RESTUDY
                                                       : 0),
-            0);
+            0, TRUE);
 
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
@@ -8579,7 +8673,7 @@ SV*
 Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
 {
     struct regexp *const rx = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
 
@@ -8624,9 +8718,9 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
         } else if (flags & RXapif_ONE) {
             ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
             av = MUTABLE_AV(SvRV(ret));
-            length = av_tindex(av);
+            length = av_count(av);
            SvREFCNT_dec_NN(ret);
-            return newSViv(length + 1);
+            return newSViv(length);
         } else {
             Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
                                                 (int)flags);
@@ -8970,7 +9064,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
         Perl_re_printf( aTHX_ "%16s","");                       \
                                                                 \
     if (RExC_lastnum!=RExC_emit)                                \
-       Perl_re_printf( aTHX_ "|%4d", RExC_emit);                \
+       Perl_re_printf( aTHX_ "|%4zu", RExC_emit);                \
     else                                                        \
        Perl_re_printf( aTHX_ "|%4s","");                        \
     Perl_re_printf( aTHX_ "|%*s%-4s",                           \
@@ -9649,7 +9743,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      * one of them */
     while (i_a < len_a && i_b < len_b) {
        UV cp;      /* The element to potentially add to the union's array */
-       bool cp_in_set;   /* is it in the the input list's set or not */
+       bool cp_in_set;   /* is it in the input list's set or not */
 
        /* We need to take one or the other of the two inputs for the union.
         * Since we are merging two sorted lists, we take the smaller of the
@@ -10546,7 +10640,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
 STATIC SV*
 S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
 {
-    dVAR;
     const U8 * s = (U8*)STRING(node);
     SSize_t bytelen = STR_LEN(node);
     UV uc;
@@ -10607,7 +10700,7 @@ S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
         fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
 
         /* The only code points that aren't folded in a UTF EXACTFish
-         * node are are the problematic ones in EXACTFL nodes */
+         * node are the problematic ones in EXACTFL nodes */
         if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
             /* We need to check for the possibility that this EXACTFL
              * node begins with a multi-char fold.  Therefore we fold
@@ -10765,6 +10858,24 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
         /* && memCHRs("iogcmsx", *RExC_parse) */
         /* (?g), (?gc) and (?o) are useless here
            and must be globally applied -- japhy */
+        if ((RExC_pm_flags & PMf_WILDCARD)) {
+            if (flagsp == & negflags) {
+                if (*RExC_parse == 'm') {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL("Use of modifier '-m' is not allowed in Unicode"
+                          " property wildcard subpatterns");
+                }
+            }
+            else {
+                if (*RExC_parse == 's') {
+                    goto modifier_illegal_in_wildcard;
+                }
+            }
+        }
+
         switch (*RExC_parse) {
 
             /* Code for the imsxn flags */
@@ -10845,6 +10956,10 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                                     *(RExC_parse - 1));
                 NOT_REACHED; /*NOTREACHED*/
             case GLOBAL_PAT_MOD: /* 'g' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
+                /*FALLTHROUGH*/
             case ONCE_PAT_MOD: /* 'o' */
                 if (ckWARN(WARN_REGEXP)) {
                     const I32 wflagbit = *RExC_parse == 'o'
@@ -10866,6 +10981,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 break;
 
             case CONTINUE_PAT_MOD: /* 'c' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                 if (ckWARN(WARN_REGEXP)) {
                     if (! (wastedflags & WASTED_C) ) {
                         wastedflags |= WASTED_GC;
@@ -10880,6 +10998,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 }
                 break;
             case KEEPCOPY_PAT_MOD: /* 'p' */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    goto modifier_illegal_in_wildcard;
+                }
                 if (flagsp == &negflags) {
                     ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                 } else {
@@ -10900,6 +11021,18 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
             case ':':
             case ')':
 
+                if (  (RExC_pm_flags & PMf_WILDCARD)
+                    && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
+                {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL2("Use of modifier '%c' is not allowed in Unicode"
+                           " property wildcard subpatterns",
+                           has_charset_modifier);
+                }
+
                 if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
                     negflags |= RXf_PMf_EXTENDED_MORE;
                 }
@@ -10925,6 +11058,13 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
     }
 
     vFAIL("Sequence (?... not terminated");
+
+  modifier_illegal_in_wildcard:
+    RExC_parse++;
+    /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+       subpatterns in regex; marked by <-- HERE in m/%s/ */
+    vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
+           " subpatterns", *(RExC_parse - 1));
 }
 
 /*
@@ -10943,7 +11083,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
 #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
 #endif
 
-PERL_STATIC_INLINE regnode_offset
+STATIC regnode_offset
 S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
                              I32 *flagp,
                              char * parse_start,
@@ -10954,7 +11094,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
     char* name_start = RExC_parse;
     U32 num = 0;
     SV *sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
 
@@ -11022,7 +11162,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     char * parse_start = RExC_parse; /* MJD */
     char * const oregcomp_parse = RExC_parse;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REG;
     DEBUG_PARSE("reg ");
@@ -11037,7 +11177,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
         vFAIL("Too many nested open parens");
     }
 
-    *flagp = 0;                                /* Tentatively. */
+    *flagp = 0;                                /* Initialize. */
 
     if (RExC_in_lookbehind) {
        RExC_in_lookbehind++;
@@ -11347,7 +11487,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             }
             if ( arg_required && !start_arg ) {
                 vFAIL3("Verb pattern '%.*s' has a mandatory argument",
-                    verb_len, start_verb);
+                    (int) verb_len, start_verb);
             }
             if (internal_argval == -1) {
                 ret = reganode(pRExC_state, op, 0);
@@ -11374,6 +11514,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            bool is_logical = 0;
            const char * const seqstart = RExC_parse;
             const char * endptr;
+            const char non_existent_group_msg[]
+                                            = "Reference to nonexistent group";
+            const char impossible_group[] = "Invalid reference to group";
+
             if (has_intervening_patws) {
                 RExC_parse++;
                 vFAIL("In '(?...)', the '(' and '?' must be adjacent");
@@ -11413,7 +11557,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                 RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                vFAIL3("Sequence (%.*s...) not recognized",
-                                RExC_parse-seqstart, seqstart);
+                                (int) (RExC_parse - seqstart), seqstart);
                NOT_REACHED; /*NOTREACHED*/
             case '<':           /* (?<...) */
                 /* If you want to support (?<*...), first reconcile with GH #17363 */
@@ -11434,7 +11578,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                         || *RExC_parse != paren)
                     {
                        vFAIL2("Sequence (?%c... not terminated",
-                           paren=='>' ? '<' : paren);
+                           paren=='>' ? '<' : (char) paren);
                     }
                    {
                        HE *he_str;
@@ -11600,10 +11744,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     ) {
                         num = (I32)unum;
                         RExC_parse = (char*)endptr;
-                    } else
-                        num = I32_MAX;
+                    }
+                    else {  /* Overflow, or something like that.  Position
+                               beyond all digits for the message */
+                        while (RExC_parse < RExC_end && isDIGIT(*RExC_parse))  {
+                            RExC_parse++;
+                        }
+                        vFAIL(impossible_group);
+                    }
                     if (is_neg) {
-                        /* Some limit for num? */
+                        /* -num is always representable on 1 and 2's complement
+                         * machines */
                         num = -num;
                     }
                 }
@@ -11611,45 +11762,43 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                    vFAIL("Expecting close bracket");
 
               gen_recurse_regop:
-                if ( paren == '-' ) {
+                if (paren == '-' || paren == '+') {
+
+                    /* Don't overflow */
+                    if (UNLIKELY(I32_MAX - RExC_npar < num)) {
+                        RExC_parse++;
+                        vFAIL(impossible_group);
+                    }
+
                     /*
                     Diagram of capture buffer numbering.
                     Top line is the normal capture buffer numbers
                     Bottom line is the negative indexing as from
                     the X (the (?-2))
 
-                    +   1 2    3 4 5 X          6 7
+                        1 2    3 4 5 X   Y      6 7
+                       /(a(x)y)(a(b(c(?+2)d)e)f)(g(h))/
                        /(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
-                    -   5 4    3 2 1 X          x x
+                    -   5 4    3 2 1 X   Y      x x
 
+                    Resolve to absolute group.  Recall that RExC_npar is +1 of
+                    the actual parenthesis group number.  For lookahead, we
+                    have to compensate for that.  Using the above example, when
+                    we get to Y in the parse, num is 2 and RExC_npar is 6.  We
+                    want 7 for +2, and 4 for -2.
                     */
-                    num = RExC_npar + num;
-                    if (num < 1)  {
+                    if ( paren == '+' ) {
+                        num--;
+                    }
 
-                        /* It might be a forward reference; we can't fail until
-                         * we know, by completing the parse to get all the
-                         * groups, and then reparsing */
-                        if (ALL_PARENS_COUNTED)  {
-                            RExC_parse++;
-                            vFAIL("Reference to nonexistent group");
-                        }
-                        else {
-                            REQUIRE_PARENS_PASS;
-                        }
+                    num += RExC_npar;
+
+                    if (paren == '-' && num < 1) {
+                        RExC_parse++;
+                        vFAIL(non_existent_group_msg);
                     }
-                } else if ( paren == '+' ) {
-                    num = RExC_npar + num - 1;
                 }
-                /* We keep track how many GOSUB items we have produced.
-                   To start off the ARG2L() of the GOSUB holds its "id",
-                   which is used later in conjunction with RExC_recurse
-                   to calculate the offset we need to jump for the GOSUB,
-                   which it will store in the final representation.
-                   We have to defer the actual calculation until much later
-                   as the regop may move.
-                 */
 
-                ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                 if (num >= RExC_npar) {
 
                     /* It might be a forward reference; we can't fail until we
@@ -11658,13 +11807,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     if (ALL_PARENS_COUNTED)  {
                         if (num >= RExC_total_parens) {
                             RExC_parse++;
-                            vFAIL("Reference to nonexistent group");
+                            vFAIL(non_existent_group_msg);
                         }
                     }
                     else {
                         REQUIRE_PARENS_PASS;
                     }
                 }
+
+                /* We keep track how many GOSUB items we have produced.
+                   To start off the ARG2L() of the GOSUB holds its "id",
+                   which is used later in conjunction with RExC_recurse
+                   to calculate the offset we need to jump for the GOSUB,
+                   which it will store in the final representation.
+                   We have to defer the actual calculation until much later
+                   as the regop may move.
+                 */
+                ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                 RExC_recurse_count++;
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
                     "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
@@ -12062,7 +12221,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             if (RExC_open_parens && !RExC_open_parens[parno])
             {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                    "%*s%*s Setting open paren #%" IVdf " to %d\n",
+                    "%*s%*s Setting open paren #%" IVdf " to %zu\n",
                     22, "|    |", (int)(depth * 2 + 1), "",
                     (IV)parno, ret));
                 RExC_open_parens[parno]= ret;
@@ -12112,7 +12271,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
     }
     else if (paren != '?')             /* Not Conditional */
        ret = br;
-    *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+    *flagp |= flags & (HASWIDTH | POSTPONED);
     lastbr = br;
     while (*RExC_parse == '|') {
        if (RExC_use_BRANCHJ) {
@@ -12142,7 +12301,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             REQUIRE_BRANCHJ(flagp, 0);
         }
        lastbr = br;
-       *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+       *flagp |= flags & (HASWIDTH | POSTPONED);
     }
 
     if (have_branch || paren != ':') {
@@ -12157,7 +12316,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
            ender = reganode(pRExC_state, CLOSE, parno);
             if ( RExC_close_parens ) {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                        "%*s%*s Setting close paren #%" IVdf " to %d\n",
+                        "%*s%*s Setting close paren #%" IVdf " to %zu\n",
                         22, "|    |", (int)(depth * 2 + 1), "",
                         (IV)parno, ender));
                 RExC_close_parens[parno]= ender;
@@ -12191,7 +12350,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             RExC_end_op = REGNODE_p(ender);
             if (RExC_close_parens) {
                 DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
-                    "%*s%*s Setting close paren #0 (END) to %d\n",
+                    "%*s%*s Setting close paren #0 (END) to %zu\n",
                     22, "|    |", (int)(depth * 2 + 1), "",
                     ender));
 
@@ -12358,7 +12517,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
     regnode_offset chain = 0;
     regnode_offset latest;
     I32 flags = 0, c = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGBRANCH;
 
@@ -12375,7 +12534,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         }
     }
 
-    *flagp = WORST;                    /* Tentatively. */
+    *flagp = 0;                        /* Initialize. */
 
     skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                             FALSE /* Don't force to /x */ );
@@ -12391,9 +12550,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
        else if (ret == 0)
             ret = latest;
        *flagp |= flags&(HASWIDTH|POSTPONED);
-       if (chain == 0)         /* First piece. */
-           *flagp |= flags&SPSTART;
-       else {
+       if (chain != 0) {
            /* FIXME adding one for every branch after the first is probably
             * excessive now we have TRIE support. (hv) */
            MARK_NAUGHTY(1);
@@ -12455,7 +12612,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     /* Save the original in case we change the emitted regop to a FAIL. */
     const regnode_offset orig_emit = RExC_emit;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGPIECE;
 
@@ -12467,13 +12624,38 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
     }
 
+    if (! ISMULT2(RExC_parse)) {
+       *flagp = flags;
+       return(ret);
+    }
+
+    /* Here we know the input is a legal quantifier, including {m,n} */
+
     op = *RExC_parse;
 
-    if (op == '{' && regcurly(RExC_parse)) {
-       maxpos = NULL;
 #ifdef RE_TRACK_PATTERN_OFFSETS
-        parse_start = RExC_parse; /* MJD */
+    parse_start = RExC_parse;
 #endif
+
+    if (op != '{') {
+        nextchar(pRExC_state);
+
+        *flagp = HASWIDTH;
+
+        if (op == '*') {
+            min = 0;
+        }
+        else if (op == '+') {
+            min = 1;
+        }
+        else if (op == '?') {
+            min = 0; max = 1;
+        }
+    }
+    else {  /* is '{' */
+        const char* endptr;
+
+       maxpos = NULL;
        next = RExC_parse + 1;
        while (isDIGIT(*next) || *next == ',') {
            if (*next == ',') {
@@ -12484,119 +12666,130 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            }
            next++;
        }
-       if (*next == '}') {             /* got one */
-            const char* endptr;
-           if (!maxpos)
-               maxpos = next;
-           RExC_parse++;
-            if (isDIGIT(*RExC_parse)) {
-                endptr = RExC_end;
-                if (!grok_atoUV(RExC_parse, &uv, &endptr))
-                    vFAIL("Invalid quantifier in {,}");
-                if (uv >= REG_INFTY)
-                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
-                min = (I32)uv;
-            } else {
-                min = 0;
-            }
-           if (*maxpos == ',')
-               maxpos++;
-           else
-               maxpos = RExC_parse;
-            if (isDIGIT(*maxpos)) {
-                endptr = RExC_end;
-                if (!grok_atoUV(maxpos, &uv, &endptr))
-                    vFAIL("Invalid quantifier in {,}");
-                if (uv >= REG_INFTY)
-                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
-                max = (I32)uv;
-            } else {
-               max = REG_INFTY;                /* meaning "infinity" */
-            }
-           RExC_parse = next;
-           nextchar(pRExC_state);
-            if (max < min) {    /* If can't match, warn and optimize to fail
-                                   unconditionally */
-                reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
-                ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
-                NEXT_OFF(REGNODE_p(orig_emit)) =
-                                    regarglen[OPFAIL] + NODE_STEP_REGNODE;
-                return ret;
-            }
-            else if (min == max && *RExC_parse == '?')
-            {
-                ckWARN2reg(RExC_parse + 1,
-                           "Useless use of greediness modifier '%c'",
-                           *RExC_parse);
-            }
-
-         do_curly:
-           if ((flags&SIMPLE)) {
-                if (min == 0 && max == REG_INFTY) {
-                    reginsert(pRExC_state, STAR, ret, depth+1);
-                    MARK_NAUGHTY(4);
-                    RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-                    goto nest_check;
-                }
-                if (min == 1 && max == REG_INFTY) {
-                    reginsert(pRExC_state, PLUS, ret, depth+1);
-                    MARK_NAUGHTY(3);
-                    RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-                    goto nest_check;
-                }
-                MARK_NAUGHTY_EXP(2, 2);
-               reginsert(pRExC_state, CURLY, ret, depth+1);
-                Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
-                Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-           }
-           else {
-               const regnode_offset w = reg_node(pRExC_state, WHILEM);
 
-               FLAGS(REGNODE_p(w)) = 0;
-                if (!  REGTAIL(pRExC_state, ret, w)) {
-                    REQUIRE_BRANCHJ(flagp, 0);
-                }
-               if (RExC_use_BRANCHJ) {
-                   reginsert(pRExC_state, LONGJMP, ret, depth+1);
-                   reginsert(pRExC_state, NOTHING, ret, depth+1);
-                   NEXT_OFF(REGNODE_p(ret)) = 3;       /* Go over LONGJMP. */
-               }
-               reginsert(pRExC_state, CURLYX, ret, depth+1);
-                                /* MJD hk */
-                Set_Node_Offset(REGNODE_p(ret), parse_start+1);
-                Set_Node_Length(REGNODE_p(ret),
-                                op == '{' ? (RExC_parse - parse_start) : 1);
+       assert(*next == '}');
 
-               if (RExC_use_BRANCHJ)
-                    NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
-                                                       LONGJMP. */
-                if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
-                                                          NOTHING)))
-                {
-                    REQUIRE_BRANCHJ(flagp, 0);
+        if (!maxpos)
+            maxpos = next;
+        RExC_parse++;
+        if (isDIGIT(*RExC_parse)) {
+            endptr = RExC_end;
+            if (!grok_atoUV(RExC_parse, &uv, &endptr))
+                vFAIL("Invalid quantifier in {,}");
+            if (uv >= REG_INFTY)
+                vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+            min = (I32)uv;
+        } else {
+            min = 0;
+        }
+        if (*maxpos == ',')
+            maxpos++;
+        else
+            maxpos = RExC_parse;
+        if (isDIGIT(*maxpos)) {
+            endptr = RExC_end;
+            if (!grok_atoUV(maxpos, &uv, &endptr))
+                vFAIL("Invalid quantifier in {,}");
+            if (uv >= REG_INFTY)
+                vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+            max = (I32)uv;
+        } else {
+            max = REG_INFTY;           /* meaning "infinity" */
+        }
+        RExC_parse = next;
+        nextchar(pRExC_state);
+        if (max < min) {    /* If can't match, warn and optimize to fail
+                               unconditionally */
+            reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+            ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+            NEXT_OFF(REGNODE_p(orig_emit)) =
+                                regarglen[OPFAIL] + NODE_STEP_REGNODE;
+            return ret;
+        }
+        else if (min == max && *RExC_parse == '?')
+        {
+            ckWARN2reg(RExC_parse + 1,
+                       "Useless use of greediness modifier '%c'",
+                       *RExC_parse);
+        }
+    }
+
+        if ((flags&SIMPLE)) {
+            if (min == 0 && max == REG_INFTY) {
+
+                /* Going from 0..inf is currently forbidden in wildcard
+                 * subpatterns.  The only reason is to make it harder to
+                 * write patterns that take a long long time to halt, and
+                 * because the use of this construct isn't necessary in
+                 * matching Unicode property values */
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by
+                       <-- HERE in m/%s/ */
+                    vFAIL("Use of quantifier '*' is not allowed in"
+                          " Unicode property wildcard subpatterns");
+                    /* Note, don't need to worry about {0,}, as a '}' isn't
+                     * legal at all in wildcards, so wouldn't get this far
+                     * */
                 }
-                RExC_whilem_seen++;
-                MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
-           }
-           FLAGS(REGNODE_p(ret)) = 0;
-
-           if (min > 0)
-               *flagp = WORST;
-           if (max > 0)
-               *flagp |= HASWIDTH;
-            ARG1_SET(REGNODE_p(ret), (U16)min);
-            ARG2_SET(REGNODE_p(ret), (U16)max);
-            if (max == REG_INFTY)
+                reginsert(pRExC_state, STAR, ret, depth+1);
+                MARK_NAUGHTY(4);
                 RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
+                goto nest_check;
+            }
+            if (min == 1 && max == REG_INFTY) {
+                reginsert(pRExC_state, PLUS, ret, depth+1);
+                MARK_NAUGHTY(3);
+                RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
+                goto nest_check;
+            }
+            MARK_NAUGHTY_EXP(2, 2);
+            reginsert(pRExC_state, CURLY, ret, depth+1);
+            Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
+            Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
+        }
+        else {
+            const regnode_offset w = reg_node(pRExC_state, WHILEM);
 
-           goto nest_check;
-       }
-    }
+            FLAGS(REGNODE_p(w)) = 0;
+            if (!  REGTAIL(pRExC_state, ret, w)) {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
+            if (RExC_use_BRANCHJ) {
+                reginsert(pRExC_state, LONGJMP, ret, depth+1);
+                reginsert(pRExC_state, NOTHING, ret, depth+1);
+                NEXT_OFF(REGNODE_p(ret)) = 3;  /* Go over LONGJMP. */
+            }
+            reginsert(pRExC_state, CURLYX, ret, depth+1);
+                            /* MJD hk */
+            Set_Node_Offset(REGNODE_p(ret), parse_start+1);
+            Set_Node_Length(REGNODE_p(ret),
+                            op == '{' ? (RExC_parse - parse_start) : 1);
+
+            if (RExC_use_BRANCHJ)
+                NEXT_OFF(REGNODE_p(ret)) = 3;   /* Go over NOTHING to
+                                                   LONGJMP. */
+            if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+                                                      NOTHING)))
+            {
+                REQUIRE_BRANCHJ(flagp, 0);
+            }
+            RExC_whilem_seen++;
+            MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
+        }
+        FLAGS(REGNODE_p(ret)) = 0;
 
-    if (!ISMULT1(op)) {
-       *flagp = flags;
-       return(ret);
-    }
+        if (min > 0)
+            *flagp = 0;
+        if (max > 0)
+            *flagp |= HASWIDTH;
+        ARG1_SET(REGNODE_p(ret), (U16)min);
+        ARG2_SET(REGNODE_p(ret), (U16)max);
+        if (max == REG_INFTY)
+            RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
+
+        goto nest_check;
 
 #if 0                          /* Now runtime fix should be reliable. */
 
@@ -12613,26 +12806,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     if (!(flags&HASWIDTH) && op != '?')
       vFAIL("Regexp *+ operand could be empty");
 #endif
-
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    parse_start = RExC_parse;
-#endif
-    nextchar(pRExC_state);
-
-    *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
-
-    if (op == '*') {
-       min = 0;
-       goto do_curly;
-    }
-    else if (op == '+') {
-       min = 1;
-       goto do_curly;
-    }
-    else if (op == '?') {
-       min = 0; max = 1;
-       goto do_curly;
-    }
   nest_check:
     if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
         if (origparse[0] == '\\' && origparse[1] == 'K') {
@@ -12779,12 +12952,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     char *save_start;
     I32 flags;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_GROK_BSLASH_N;
 
-    GET_RE_DEBUG_FLAGS;
-
     assert(cBOOL(node_p) ^ cBOOL(code_point_p));  /* Exactly one should be set */
     assert(! (node_p && cp_count));               /* At most 1 should be set */
 
@@ -13119,7 +13290,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
             (UV) flags);
     }
-    *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+    *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
 
     nextchar(pRExC_state);
 
@@ -13127,7 +13298,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
 }
 
 
-PERL_STATIC_INLINE U8
+STATIC U8
 S_compute_EXACTish(RExC_state_t *pRExC_state)
 {
     U8 op;
@@ -13282,16 +13453,15 @@ S_backref_value(char *p, char *e)
 STATIC regnode_offset
 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 {
-    dVAR;
     regnode_offset ret = 0;
     I32 flags = 0;
     char *parse_start;
     U8 op;
     int invert = 0;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
-    *flagp = WORST;            /* Tentatively. */
+    *flagp = 0;                /* Initialize. */
 
     DEBUG_PARSE("atom");
 
@@ -13369,7 +13539,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf,
                                                                  (UV) flags);
        }
-       *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+       *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
        break;
     case '|':
     case ')':
@@ -13404,22 +13574,35 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        /* Special Escapes */
        case 'A':
            RExC_seen_zerolen++;
-           ret = reg_node(pRExC_state, SBOL);
-            /* SBOL is shared with /^/ so we set the flags so we can tell
-             * /\A/ from /^/ in split. */
-            FLAGS(REGNODE_p(ret)) = 1;
-           *flagp |= SIMPLE;
+            /* Under wildcards, this is changed to match \n; should be
+             * invisible to the user, as they have to compile under /m */
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                ret = reg_node(pRExC_state, MBOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SBOL);
+                /* SBOL is shared with /^/ so we set the flags so we can tell
+                 * /\A/ from /^/ in split. */
+                FLAGS(REGNODE_p(ret)) = 1;
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
+            }
            goto finish_meta_pat;
        case 'G':
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                RExC_parse++;
+                /* diag_listed_as: Use of %s is not allowed in Unicode property
+                   wildcard subpatterns in regex; marked by <-- HERE in m/%s/
+                 */
+                vFAIL("Use of '\\G' is not allowed in Unicode property"
+                      " wildcard subpatterns");
+            }
            ret = reg_node(pRExC_state, GPOS);
             RExC_seen |= REG_GPOS_SEEN;
-           *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 'K':
             if (!RExC_in_lookbehind && !RExC_in_lookahead) {
                 RExC_seen_zerolen++;
                 ret = reg_node(pRExC_state, KEEPS);
-                *flagp |= SIMPLE;
                 /* XXX:dmq : disabling in-place substitution seems to
                  * be necessary here to avoid cases of memory corruption, as
                  * with: C<$_="x" x 80; s/x\K/y/> -- rgs
@@ -13432,13 +13615,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 vFAIL("\\K not permitted in lookahead/lookbehind");
             }
        case 'Z':
-           ret = reg_node(pRExC_state, SEOL);
-           *flagp |= SIMPLE;
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, SEOL);
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
+            }
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            goto finish_meta_pat;
        case 'z':
-           ret = reg_node(pRExC_state, EOS);
-           *flagp |= SIMPLE;
+            if (RExC_pm_flags & PMf_WILDCARD) {
+                /* See comment under \A above */
+                ret = reg_node(pRExC_state, MEOL);
+            }
+            else {
+                ret = reg_node(pRExC_state, EOS);
+                *flagp |= SIMPLE;   /* Wrong, but too late to fix for 5.32 */
+            }
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            goto finish_meta_pat;
        case 'C':
@@ -13561,8 +13756,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            ret = reg_node(pRExC_state, op);
             FLAGS(REGNODE_p(ret)) = flags;
 
-           *flagp |= SIMPLE;
-
            goto finish_meta_pat;
           }
 
@@ -13737,10 +13930,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         num > 9
                         /* any numeric escape < RExC_npar is a backref */
                         && num >= RExC_npar
-                        /* cannot be an octal escape if it starts with 8 */
-                        && *RExC_parse != '8'
-                        /* cannot be an octal escape if it starts with 9 */
-                        && *RExC_parse != '9'
+                        /* cannot be an octal escape if it starts with [89] */
+                        && ! inRANGE(*RExC_parse, '8', '9')
                     ) {
                         /* Probably not meant to be a backref, instead likely
                          * to be an octal character escape, e.g. \35 or \777.
@@ -14995,23 +15186,18 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         * have to map that back to the original */
                         if (need_to_fold_loc) {
                             upper_fill = loc_correspondence[s - s_start];
-                            Safefree(locfold_buf);
-                            Safefree(loc_correspondence);
-
                             if (upper_fill == 0) {
                                 FAIL2("panic: loc_correspondence[%d] is 0",
                                       (int) (s - s_start));
                             }
+                            Safefree(locfold_buf);
+                            Safefree(loc_correspondence);
                         }
                         else {
                             upper_fill = s - s0;
                         }
                         goto reparse;
                     }
-                    else if (need_to_fold_loc) {
-                        Safefree(locfold_buf);
-                        Safefree(loc_correspondence);
-                    }
 
                     /* Here the node consists entirely of non-final multi-char
                      * folds.  (Likely it is all 'f's or all 's's.)  There's no
@@ -15019,6 +15205,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * whole thing */
                     len = old_s - s0;
                 }
+
+                if (need_to_fold_loc) {
+                    Safefree(locfold_buf);
+                    Safefree(loc_correspondence);
+                }
            }   /* End of verifying node ends with an appropriate char */
 
             /* We need to start the next node at the character that didn't fit
@@ -15179,7 +15370,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
      * sets up the bitmap and any flags, removing those code points from the
      * inversion list, setting it to NULL should it become completely empty */
 
-    dVAR;
 
     PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST;
     assert(PL_regkind[OP(node)] == ANYOF);
@@ -15219,9 +15409,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
                    ? end
                    : NUM_ANYOF_CODE_POINTS - 1;
            for (i = start; i <= (int) high; i++) {
-               if (! ANYOF_BITMAP_TEST(node, i)) {
-                   ANYOF_BITMAP_SET(node, i);
-               }
+                ANYOF_BITMAP_SET(node, i);
            }
        }
        invlist_iterfinish(*invlist_ptr);
@@ -16036,7 +16224,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
 
             if (   posix_warnings
                 && RExC_warn_text
-                && av_top_index(RExC_warn_text) > -1)
+                && av_count(RExC_warn_text) > 0)
             {
                 *posix_warnings = RExC_warn_text;
             }
@@ -16122,9 +16310,10 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
     char *save_end, *save_parse;    /* Temporaries */
     const bool in_locale = LOC;     /* we turn off /l during processing */
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
+    PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
 
     DEBUG_PARSE("xcls");
 
@@ -16287,7 +16476,10 @@ redo_curchar:
                            /* If more than a single node returned, the nested
                             * parens evaluated to more than just a (?[...]),
                             * which isn't legal */
-                        || node != 1) {
+                        || RExC_emit != orig_emit
+                                      + NODE_STEP_REGNODE
+                                      + regarglen[REGEX_SET])
+                    {
                         vFAIL("Expecting interpolated extended charclass");
                     }
                     resultant_invlist = (SV *) ARGp(REGNODE_p(node));
@@ -16338,6 +16530,8 @@ redo_curchar:
                     goto regclass_failed;
                 }
 
+                assert(current);
+
                 /* regclass() will return with parsing just the \ sequence,
                  * leaving the parse pointer at the next thing to parse */
                 RExC_parse--;
@@ -16375,9 +16569,7 @@ redo_curchar:
                     goto regclass_failed;
                 }
 
-                if (! current) {
-                    break;
-                }
+                assert(current);
 
                 /* function call leaves parse pointing to the ']', except if we
                  * faked it */
@@ -16686,7 +16878,7 @@ redo_curchar:
     if (RExC_sets_depth) {  /* If within a recursive call, return in a special
                                regnode */
         RExC_parse++;
-        node = regpnode(pRExC_state, REGEX_SET, (void *) final);
+        node = regpnode(pRExC_state, REGEX_SET, final);
     }
     else {
 
@@ -17035,10 +17227,10 @@ S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN c
  *
  * There is a line below that uses the same white space criteria but is outside
  * this macro.  Both here and there must use the same definition */
-#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p)                          \
+#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p, stop_p)                  \
     STMT_START {                                                        \
         if (do_skip) {                                                  \
-            while (isBLANK_A(UCHARAT(p)))                               \
+            while (p < stop_p && isBLANK_A(UCHARAT(p)))                 \
             {                                                           \
                 p++;                                                    \
             }                                                           \
@@ -17088,7 +17280,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      * UTF-8
      */
 
-    dVAR;
     UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
     IV range = 0;
     UV value = OOB_UNICODE, save_value = OOB_UNICODE;
@@ -17185,13 +17376,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             what gets folded */
     U32 has_runtime_dependency = 0;     /* OR of the above flags */
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGCLASS;
 #ifndef DEBUGGING
     PERL_UNUSED_ARG(depth);
 #endif
 
+    assert(! (ret_invlist && allow_mutiple_chars));
 
     /* If wants an inversion list returned, we can't optimize to something
      * else. */
@@ -17213,7 +17405,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     initial_listsv_len = SvCUR(listsv);
     SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
 
-    SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+    SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
 
     assert(RExC_parse <= RExC_end);
 
@@ -17222,7 +17414,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         invert = TRUE;
         allow_mutiple_chars = FALSE;
         MARK_NAUGHTY(1);
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
     }
 
     /* Check that they didn't say [:posix:] instead of [[:posix:]] */
@@ -17269,12 +17461,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             output_posix_warnings(pRExC_state, posix_warnings);
         }
 
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
+
         if  (RExC_parse >= stop_ptr) {
             break;
         }
 
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
-
         if  (UCHARAT(RExC_parse) == ']') {
             break;
         }
@@ -17457,6 +17649,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                {
                char *e;
 
+                if (RExC_pm_flags & PMf_WILDCARD) {
+                    RExC_parse++;
+                    /* diag_listed_as: Use of %s is not allowed in Unicode
+                       property wildcard subpatterns in regex; marked by <--
+                       HERE in m/%s/ */
+                    vFAIL3("Use of '\\%c%c' is not allowed in Unicode property"
+                           " wildcard subpatterns", (char) value, *(RExC_parse - 1));
+                }
+
                /* \p means they want Unicode semantics */
                REQUIRE_UNI_RULES(flagp, 0);
 
@@ -17520,6 +17721,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     /* If set TRUE, the property is user-defined as opposed to
                      * official Unicode */
                     bool user_defined = FALSE;
+                    AV * strings = NULL;
 
                     SV * prop_definition = parse_uniprop_string(
                                             name, n, UTF, FOLD,
@@ -17530,6 +17732,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                              * this call */
                                             ! cBOOL(ret_invlist),
 
+                                            &strings,
                                             &user_defined,
                                             msg,
                                             0 /* Base level */
@@ -17547,7 +17750,66 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                     SvCUR(msg), SvPVX(msg)));
                     }
 
-                    if (! is_invlist(prop_definition)) {
+                    assert(prop_definition || strings);
+
+                    if (strings) {
+                        if (ret_invlist) {
+                            if (! prop_definition) {
+                                RExC_parse = e + 1;
+                                vFAIL("Unicode string properties are not implemented in (?[...])");
+                            }
+                            else {
+                                ckWARNreg(e + 1,
+                                    "Using just the single character results"
+                                    " returned by \\p{} in (?[...])");
+                            }
+                        }
+                        else if (! RExC_in_multi_char_class) {
+                            if (invert ^ (value == 'P')) {
+                                RExC_parse = e + 1;
+                                vFAIL("Inverting a character class which contains"
+                                    " a multi-character sequence is illegal");
+                            }
+
+                            /* For each multi-character string ... */
+                            while (av_count(strings) > 0) {
+                                /* ... Each entry is itself an array of code
+                                * points. */
+                                AV * this_string = (AV *) av_shift( strings);
+                                STRLEN cp_count = av_count(this_string);
+                                SV * final = newSV(cp_count * 4);
+                                SvPVCLEAR(final);
+
+                                /* Create another string of sequences of \x{...} */
+                                while (av_count(this_string) > 0) {
+                                    SV * character = av_shift(this_string);
+                                    UV cp = SvUV(character);
+
+                                    if (cp > 255) {
+                                        REQUIRE_UTF8(flagp);
+                                    }
+                                    Perl_sv_catpvf(aTHX_ final, "\\x{%" UVXf "}",
+                                                                        cp);
+                                    SvREFCNT_dec_NN(character);
+                                }
+                                SvREFCNT_dec_NN(this_string);
+
+                                /* And add that to the list of such things */
+                                multi_char_matches
+                                            = add_multi_match(multi_char_matches,
+                                                            final,
+                                                            cp_count);
+                            }
+                        }
+                        SvREFCNT_dec_NN(strings);
+                    }
+
+                    if (! prop_definition) {    /* If we got only a string,
+                                                   this iteration didn't really
+                                                   find a character */
+                        element_count--;
+                    }
+                    else if (! is_invlist(prop_definition)) {
 
                         /* Here, the definition isn't known, so we have gotten
                          * returned a string that will be evaluated if and when
@@ -17893,7 +18155,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            }
        } /* end of namedclass \blah */
 
-        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
 
         /* If 'range' is set, 'value' is the ending of a range--check its
          * validity.  (If value isn't a single code point in the case of a
@@ -17936,7 +18198,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 char* next_char_ptr = RExC_parse + 1;
 
                 /* Get the next real char after the '-' */
-                SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr);
+                SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr, RExC_end);
 
                 /* If the '-' is at the end of the class (just before the ']',
                  * it is a literal minus; otherwise it is a range */
@@ -18073,7 +18335,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * printable should have each end point be a portable value
                      * for it (preferably like 'A', but we don't warn if it is
                      * a (portable) Unicode name or code point), and the range
-                     * must be be all digits or all letters of the same case.
+                     * must be all digits or all letters of the same case.
                      * Otherwise, the range is non-portable and unclear as to
                      * what it contains */
                     if (             (isPRINT_A(prevvalue) || isPRINT_A(value))
@@ -18265,7 +18527,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
 #endif
 
-        /* Look at the longest folds first */
+        /* Look at the longest strings first */
         for (cp_count = av_tindex_skip_len_mg(multi_char_matches);
                         cp_count > 0;
                         cp_count--)
@@ -18291,15 +18553,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         }
 
         /* If the character class contains anything else besides these
-         * multi-character folds, have to include it in recursive parsing */
+         * multi-character strings, have to include it in recursive parsing */
         if (element_count) {
-            sv_catpvs(substitute_parse, "|[");
+            bool has_l_bracket = orig_parse > RExC_start && *(orig_parse - 1) == '[';
+
+            sv_catpvs(substitute_parse, "|");
+            if (has_l_bracket) {    /* Add an [ if the original had one */
+                sv_catpvs(substitute_parse, "[");
+            }
             constructed_prefix_len = SvCUR(substitute_parse);
             sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
 
-            /* Put in a closing ']' only if not going off the end, as otherwise
-             * we are adding something that really isn't there */
-            if (RExC_parse < RExC_end) {
+            /* Put in a closing ']' to match any opening one, but not if going
+             * off the end, as otherwise we are adding something that really
+             * isn't there */
+            if (has_l_bracket && RExC_parse < RExC_end) {
                 sv_catpvs(substitute_parse, "]");
             }
         }
@@ -18325,7 +18593,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
        ret = reg(pRExC_state, 1, &reg_flags, depth+1);
 
-        *flagp |= reg_flags & (HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PARSE|NEED_UTF8);
+        *flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
 
         /* And restore so can parse the rest of the pattern */
         RExC_parse = save_parse;
@@ -18725,7 +18993,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     if (ret_invlist) {
         *ret_invlist = cp_list;
 
-        return RExC_emit;
+        return (cp_list) ? RExC_emit : 0;
     }
 
     if (anyof_flags & ANYOF_LOCALE_FLAGS) {
@@ -19066,7 +19334,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      * points) in the ASCII range, so we can't use it here to
                      * artificially restrict the fold domain, so we check if
                      * the class does or does not match some EXACTFish node.
-                     * Further, if we aren't under /i, and and the folded-to
+                     * Further, if we aren't under /i, and the folded-to
                      * character is part of a multi-character fold, we can't do
                      * this optimization, as the sequence around it could be
                      * that multi-character fold, and we don't here know the
@@ -19733,6 +20001,9 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
             av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list));
         }
 
+        /* (Note that if any of this changes, the size calculations in
+         * S_optimize_regclass() might need to be updated.) */
+
         if (only_utf8_locale_list) {
             av_store(av, ONLY_LOCALE_MATCHES_INDEX,
                                      SvREFCNT_inc_NN(only_utf8_locale_list));
@@ -19750,14 +20021,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
     }
 }
 
-#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
 SV *
-Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
-                                        const regnode* node,
-                                        bool doinit,
-                                        SV** listsvp,
-                                        SV** only_utf8_locale_ptr,
-                                        SV** output_invlist)
+
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+Perl_get_regclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#else
+Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#endif
 
 {
     /* For internal core use only.
@@ -19793,7 +20063,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
     RXi_GET_DECL(prog, progi);
     const struct reg_data * const data = prog ? progi->data : NULL;
 
-    PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+    PERL_ARGS_ASSERT_GET_REGCLASS_NONBITMAP_DATA;
+#else
+    PERL_ARGS_ASSERT_GET_RE_GCLASS_NONBITMAP_DATA;
+#endif
     assert(! output_invlist || listsvp);
 
     if (data && data->count) {
@@ -19996,7 +20270,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
 
     return invlist;
 }
-#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
 
 /* reg_skipcomment()
 
@@ -20160,7 +20433,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
 
     const regnode_offset ret = RExC_emit;
 
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGNODE_GUTS;
 
@@ -20227,12 +20500,12 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 }
 
 /*
-- regpnode - emit a temporary node with a void* argument
+- regpnode - emit a temporary node with a SV* argument
 */
 STATIC regnode_offset /* Location. */
-S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, void * arg)
+S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg)
 {
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regvnode");
+    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regpnode");
     regnode_offset ptr = ret;
 
     PERL_ARGS_ASSERT_REGPNODE;
@@ -20282,7 +20555,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
     regnode *place;
     const int offset = regarglen[(U8)op];
     const int size = NODE_STEP_REGNODE + offset;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGINSERT;
     PERL_UNUSED_CONTEXT;
@@ -20386,21 +20659,22 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
                 const U32 depth)
 {
     regnode_offset scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGTAIL;
 #ifndef DEBUGGING
     PERL_UNUSED_ARG(depth);
 #endif
 
-    /* Find last node. */
+    /* The final node in the chain is the first one with a nonzero next pointer
+     * */
     scan = (regnode_offset) p;
     for (;;) {
        regnode * const temp = regnext(REGNODE_p(scan));
         DEBUG_PARSE_r({
             DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
             regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ %s (%d) %s %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%zu) %s %s\n",
                 SvPV_nolen_const(RExC_mysv), scan,
                     (temp == NULL ? "->" : ""),
                     (temp == NULL ? PL_reg_name[OP(REGNODE_p(val))] : "")
@@ -20411,6 +20685,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
         scan = REGNODE_OFFSET(temp);
     }
 
+    /* Populate this node's next pointer */
     assert(val >= scan);
     if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
         assert((UV) (val - scan) <= U32_MAX);
@@ -20460,7 +20735,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 #ifdef EXPERIMENTAL_INPLACESCAN
     I32 min = 0;
 #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGTAIL_STUDY;
 
@@ -20507,7 +20782,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
         DEBUG_PARSE_r({
             DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
             regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
-            Perl_re_printf( aTHX_  "~ %s (%d) -> %s\n",
+            Perl_re_printf( aTHX_  "~ %s (%zu) -> %s\n",
                 SvPV_nolen_const(RExC_mysv),
                 scan,
                 PL_reg_name[exact]);
@@ -20664,7 +20939,7 @@ Perl_regdump(pTHX_ const regexp *r)
     SV * const sv = sv_newmortal();
     SV *dsv= sv_newmortal();
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGDUMP;
 
@@ -20803,10 +21078,9 @@ void
 Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
 {
 #ifdef DEBUGGING
-    dVAR;
     int k;
     RXi_GET_DECL(prog, progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGPROP;
 
@@ -21008,10 +21282,17 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                             ANYOFRbase(o) + ANYOFRdelta(o));
             }
             else {
-                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                (void) get_regclass_nonbitmap_data(prog, o, FALSE,
                                                 &unresolved,
                                                 &only_utf8_locale_invlist,
                                                 &nonbitmap_invlist);
+#else
+                (void) get_re_gclass_nonbitmap_data(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+#endif
             }
 
             /* The non-bitmap data may contain stuff that could fit in the
@@ -21155,11 +21436,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                          : (OP(o) == ANYOFH || OP(o) == ANYOFR)
                            ? 0xFF
                            : lowest;
-            Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
-            if (lowest != highest) {
-                Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+#ifndef EBCDIC
+            if (OP(o) != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
+#endif
+            {
+                Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+                if (lowest != highest) {
+                    Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+                }
+                Perl_sv_catpvf(aTHX_ sv, ")");
             }
-            Perl_sv_catpvf(aTHX_ sv, ")");
         }
 
         SvREFCNT_dec(unresolved);
@@ -21237,19 +21523,25 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
 SV *
 Perl_re_intuit_string(pTHX_ REGEXP * const r)
 {                              /* Assume that RE_INTUIT is set */
+    /* Returns an SV containing a string that must appear in the target for it
+     * to match, or NULL if nothing is known that must match.
+     *
+     * CAUTION: the SV can be freed during execution of the regex engine */
+
     struct regexp *const prog = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_RE_INTUIT_STRING;
     PERL_UNUSED_CONTEXT;
 
     DEBUG_COMPILE_r(
        {
-           const char * const s = SvPV_nolen_const(RX_UTF8(r)
+            if (prog->maxlen > 0) {
+                const char * const s = SvPV_nolen_const(RX_UTF8(r)
                      ? prog->check_utf8 : prog->check_substr);
 
-           if (!PL_colorset) reginitcolors();
-            Perl_re_printf( aTHX_
+                if (!PL_colorset) reginitcolors();
+                Perl_re_printf( aTHX_
                      "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                      PL_colors[4],
                      RX_UTF8(r) ? "utf8 " : "",
@@ -21257,6 +21549,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
                      s,
                      PL_colors[1],
                      (strlen(s) > PL_dump_re_max_len ? "..." : ""));
+            }
        } );
 
     /* use UTF8 check substring if regexp pattern itself is in UTF8 */
@@ -21286,7 +21579,7 @@ void
 Perl_pregfree2(pTHX_ REGEXP *rx)
 {
     struct regexp *const r = ReANY(rx);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_PREGFREE2;
 
@@ -21450,7 +21743,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 {
     struct regexp *const r = ReANY(rx);
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGFREE_INTERNAL;
 
@@ -21502,7 +21795,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                     U32 refcount;
                     reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
 #ifdef USE_ITHREADS
-                    dVAR;
 #endif
                     OP_REFCNT_LOCK;
                     refcount = --aho->refcount;
@@ -21531,7 +21823,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                    U32 refcount;
                    reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
 #ifdef USE_ITHREADS
-                    dVAR;
 #endif
                     OP_REFCNT_LOCK;
                     refcount = --trie->refcount;
@@ -21567,15 +21858,19 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 #define SAVEPVN(p, n)  ((p) ? savepvn(p, n) : NULL)
 
 /*
-   re_dup_guts - duplicate a regexp.
+=for apidoc_section REGEXP Functions
+=for apidoc re_dup_guts
+Duplicate a regexp.
+
+This routine is expected to clone a given regexp structure. It is only
+compiled under USE_ITHREADS.
 
-   This routine is expected to clone a given regexp structure. It is only
-   compiled under USE_ITHREADS.
+After all of the core data stored in struct regexp is duplicated
+the regexp_engine.dupe method is used to copy any private data
+stored in the *pprivate pointer. This allows extensions to handle
+any duplication they need to do.
 
-   After all of the core data stored in struct regexp is duplicated
-   the regexp_engine.dupe method is used to copy any private data
-   stored in the *pprivate pointer. This allows extensions to handle
-   any duplication it needs to do.
+=cut
 
    See pregfree() and regfree_internal() if you change anything here.
 */
@@ -21584,7 +21879,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
 void
 Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
 {
-    dVAR;
     I32 npar;
     const struct regexp *r = ReANY(sstr);
     struct regexp *ret = ReANY(dstr);
@@ -21687,7 +21981,6 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
 void *
 Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 {
-    dVAR;
     struct regexp *const r = ReANY(rx);
     regexp_internal *reti;
     int len;
@@ -21832,34 +22125,30 @@ Perl_regnext(pTHX_ regnode *p)
 #endif
 
 STATIC void
-S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2,...)
+S_re_croak(pTHX_ bool utf8, const char* pat,...)
 {
     va_list args;
-    STRLEN l1 = strlen(pat1);
-    STRLEN l2 = strlen(pat2);
+    STRLEN len = strlen(pat);
     char buf[512];
     SV *msv;
     const char *message;
 
-    PERL_ARGS_ASSERT_RE_CROAK2;
-
-    if (l1 > 510)
-       l1 = 510;
-    if (l1 + l2 > 510)
-       l2 = 510 - l1;
-    Copy(pat1, buf, l1 , char);
-    Copy(pat2, buf + l1, l2 , char);
-    buf[l1 + l2] = '\n';
-    buf[l1 + l2 + 1] = '\0';
-    va_start(args, pat2);
+    PERL_ARGS_ASSERT_RE_CROAK;
+
+    if (len > 510)
+       len = 510;
+    Copy(pat, buf, len , char);
+    buf[len] = '\n';
+    buf[len + 1] = '\0';
+    va_start(args, pat);
     msv = vmess(buf, &args);
     va_end(args);
-    message = SvPV_const(msv, l1);
-    if (l1 > 512)
-       l1 = 512;
-    Copy(message, buf, l1 , char);
-    /* l1-1 to avoid \n */
-    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf));
+    message = SvPV_const(msv, len);
+    if (len > 512)
+       len = 512;
+    Copy(message, buf, len , char);
+    /* len-1 to avoid \n */
+    Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, len-1, buf));
 }
 
 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
@@ -21956,9 +22245,11 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
         UV this_end;
         const char * format;
 
-        if (end - start < min_range_count) {
-
-            /* Output chars individually when they occur in short ranges */
+        if (    end - start < min_range_count
+            && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
+        {
+            /* Output a range of 1 or 2 chars individually, or longer ranges
+             * when printable */
             for (; start <= end; start++) {
                 put_code_point(sv, start);
             }
@@ -22182,7 +22473,6 @@ S_put_charclass_bitmap_innards_common(pTHX_
      * output would have been only the inversion indicator '^', NULL is instead
      * returned. */
 
-    dVAR;
     SV * output;
 
     PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
@@ -22289,7 +22579,6 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
      * cases where it can't try inverting, as what actually matches isn't known
      * until runtime, and hence the inversion isn't either. */
 
-    dVAR;
     bool inverting_allowed = ! force_as_is_display;
 
     int i;
@@ -22430,7 +22719,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
 
         int inverted_bias, as_is_bias;
 
-        /* We will apply our bias to whichever of the the results doesn't have
+        /* We will apply our bias to whichever of the results doesn't have
          * the '^' */
         if (invert) {
             invert = FALSE;
@@ -22529,7 +22818,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
     const regnode *optstart= NULL;
 
     RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_DUMPUNTIL;
 
@@ -22682,7 +22971,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 void
 Perl_init_uniprops(pTHX)
 {
-    dVAR;
 
 #  ifdef DEBUGGING
     char * dump_len_string;
@@ -22782,6 +23070,43 @@ Perl_init_uniprops(pTHX)
 #  endif
 }
 
+/* These four functions are compiled only in regcomp.c, where they have access
+ * to the data they return.  They are a way for re_comp.c to get access to that
+ * data without having to compile the whole data structures. */
+
+I16
+Perl_do_uniprop_match(const char * const key, const U16 key_len)
+{
+    PERL_ARGS_ASSERT_DO_UNIPROP_MATCH;
+
+    return match_uniprop((U8 *) key, key_len);
+}
+
+SV *
+Perl_get_prop_definition(pTHX_ const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_DEFINITION;
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+const char * const *
+Perl_get_prop_values(const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_VALUES;
+
+    return UNI_prop_value_ptrs[table_index];
+}
+
+const char *
+Perl_get_deprecated_property_msg(const Size_t warning_offset)
+{
+    PERL_ARGS_ASSERT_GET_DEPRECATED_PROPERTY_MSG;
+
+    return deprecated_property_msgs[warning_offset];
+}
+
 #  if 0
 
 This code was mainly added for backcompat to give a warning for non-portable
@@ -22813,13 +23138,24 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
 }
 
 #  endif
+#endif /* end of ! PERL_IN_XSUB_RE */
 
 STATIC REGEXP *
-S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
+S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len,
                          const bool ignore_case)
 {
-    U32 flags = PMf_MULTILINE;
+    /* Pretends that the input subpattern is qr/subpattern/aam, compiling it
+     * possibly with /i if the 'ignore_case' parameter is true.  Use /aa
+     * because nothing outside of ASCII will match.  Use /m because the input
+     * string may be a bunch of lines strung together.
+     *
+     * Also sets up the debugging info */
+
+    U32 flags = PMf_MULTILINE|PMf_WILDCARD;
+    U32 rx_flags;
+    SV * subpattern_sv = sv_2mortal(newSVpvn(subpattern, len));
     REGEXP * subpattern_re;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_COMPILE_WILDCARD;
 
@@ -22828,10 +23164,49 @@ S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
     }
     set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
 
-    subpattern_re = re_op_compile_wrapper(sv_2mortal(newSVpvn(name, len)),
-                                        /* Like in op.c, we copy the compile
-                                         * time pm flags to the rx ones */
-                                        (flags & RXf_PMf_COMPILETIME), flags);
+    /* Like in op.c, we copy the compile time pm flags to the rx ones */
+    rx_flags = flags & RXf_PMf_COMPILETIME;
+
+#ifndef PERL_IN_XSUB_RE
+    /* Use the core engine if this file is regcomp.c.  That means no
+     * 'use re "Debug ..." is in effect, so the core engine is sufficient */
+    subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                             &PL_core_reg_engine,
+                                             NULL, NULL,
+                                             rx_flags, flags);
+#else
+    if (isDEBUG_WILDCARD) {
+        /* Use the special debugging engine if this file is re_comp.c and wants
+         * to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &my_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+    }
+    else {
+        /* Use the special wildcard engine if this file is re_comp.c and
+         * doesn't want to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect for compilation, but this engine
+         * structure has been set up so that it uses the core engine for
+         * execution, so no execution debugging as a result of re.pm will be
+         * displayed. */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &wild_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+        /* XXX The above has the effect that any user-supplied regex engine
+         * won't be called for matching wildcards.  That might be good, or bad.
+         * It could be changed in several ways.  The reason it is done the
+         * current way is to avoid having to save and restore
+         * ^{^RE_DEBUG_FLAGS} around the execution.  save_scalar() perhaps
+         * could be used.  Another suggestion is to keep the authoritative
+         * value of the debug flags in a thread-local variable and add set/get
+         * magic to ${^RE_DEBUG_FLAGS} to keep the C level variable up to date.
+         * Still another is to pass a flag, say in the engine's intflags that
+         * would be checked each time before doing the debug output */
+    }
+#endif
 
     assert(subpattern_re);  /* Should have died if didn't compile successfully */
     return subpattern_re;
@@ -22842,16 +23217,31 @@ S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
         char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
 {
     I32 result;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
 
-    result = pregexec(prog, stringarg, strend, strbeg, minend, screamer, nosave);
+    ENTER;
+
+    /* The compilation has set things up so that if the program doesn't want to
+     * see the wildcard matching procedure, it will get the core execution
+     * engine, which is subject only to -Dr.  So we have to turn that off
+     * around this procedure */
+    if (! isDEBUG_WILDCARD) {
+        /* Note! Casts away 'volatile' */
+        SAVEI32(PL_debug);
+        PL_debug &= ~ DEBUG_r_FLAG;
+    }
+
+    result = CALLREGEXEC(prog, stringarg, strend, strbeg, minend, screamer,
+                         NULL, nosave);
+    LEAVE;
 
     return result;
 }
 
 SV *
-Perl_handle_user_defined_property(pTHX_
+S_handle_user_defined_property(pTHX_
 
     /* Parses the contents of a user-defined property definition; returning the
      * expanded definition if possible.  If so, the return is an inversion
@@ -23066,6 +23456,7 @@ Perl_handle_user_defined_property(pTHX_
         this_definition = parse_uniprop_string(s0, s - s0,
                                                is_utf8, to_fold, runtime,
                                                deferrable,
+                                               NULL,
                                                user_defined_ptr, msg,
                                                (name_len == 0)
                                                 ? level /* Don't increase level
@@ -23157,7 +23548,7 @@ Perl_handle_user_defined_property(pTHX_
 #    define CUR_CONTEXT      aTHX
 #    define ORIGINAL_CONTEXT save_aTHX
 #  else
-#    define DECLARATION_FOR_GLOBAL_CONTEXT
+#    define DECLARATION_FOR_GLOBAL_CONTEXT    dNOOP
 #    define SWITCH_TO_GLOBAL_CONTEXT          NOOP
 #    define RESTORE_CONTEXT                   NOOP
 #    define CUR_CONTEXT                       NULL
@@ -23171,7 +23562,6 @@ S_delete_recursion_entry(pTHX_ void *key)
      * properties.  This is a function so it can be set up to be called even if
      * the program unexpectedly quits */
 
-    dVAR;
     SV ** current_entry;
     const STRLEN key_len = strlen((const char *) key);
     DECLARATION_FOR_GLOBAL_CONTEXT;
@@ -23225,8 +23615,8 @@ S_get_fq_name(pTHX_
     return fq_name;
 }
 
-SV *
-Perl_parse_uniprop_string(pTHX_
+STATIC SV *
+S_parse_uniprop_string(pTHX_
 
     /* Parse the interior of a \p{}, \P{}.  Returns its definition if knowable
      * now.  If so, the return is an inversion list.
@@ -23255,6 +23645,8 @@ Perl_parse_uniprop_string(pTHX_
     const bool runtime,         /* TRUE if this is being called at run time */
     const bool deferrable,      /* TRUE if it's ok for the definition to not be
                                    known at this call */
+    AV ** strings,              /* To return string property values, like named
+                                   sequences */
     bool *user_defined_ptr,     /* Upon return from this function it will be
                                    set to TRUE if any component is a
                                    user-defined property */
@@ -23262,7 +23654,6 @@ Perl_parse_uniprop_string(pTHX_
                                    this */
     const STRLEN level)         /* Recursion level of this call */
 {
-    dVAR;
     char* lookup_name;          /* normalized name for lookup in our tables */
     unsigned lookup_len;        /* Its length */
     enum { Not_Strict = 0,      /* Some properties have stricter name */
@@ -23440,16 +23831,19 @@ Perl_parse_uniprop_string(pTHX_
                  * but it must be punctuation */
             && (name[i] != '\\' || (i < name_len && isPUNCT_A(name[i+1]))))
         {
-            /* Find the property.  The table includes the equals sign, so we
-             * use 'j' as-is */
-            table_index = match_uniprop((U8 *) lookup_name, j);
-            if (table_index) {
-                const char * const * prop_values
-                                            = UNI_prop_value_ptrs[table_index];
+            bool special_property = memEQs(lookup_name, j - 1, "name")
+                                 || memEQs(lookup_name, j - 1, "na");
+            if (! special_property) {
+                /* Find the property.  The table includes the equals sign, so
+                 * we use 'j' as-is */
+                table_index = do_uniprop_match(lookup_name, j);
+            }
+            if (special_property || table_index) {
                 REGEXP * subpattern_re;
                 char open = name[i++];
                 char close;
                 const char * pos_in_brackets;
+                const char * const * prop_values;
                 bool escaped = 0;
 
                 /* Backslash => delimitter is the character following.  We
@@ -23483,12 +23877,39 @@ Perl_parse_uniprop_string(pTHX_
                     packWARN(WARN_EXPERIMENTAL__UNIPROP_WILDCARDS),
                     "The Unicode property wildcards feature is experimental");
 
-                /* Now create and compile the wildcard subpattern.  Use /iaa
-                 * because nothing outside of ASCII will match, and it the
-                 * property values should all match /i.  Note that when the
-                 * pattern fails to compile, our added text to the user's
-                 * pattern will be displayed to the user, which is not so
-                 * desirable. */
+                if (special_property) {
+                    const char * error_msg;
+                    const char * revised_name = name + i;
+                    Size_t revised_name_len = name_len - (i + 1 + escaped);
+
+                    /* Currently, the only 'special_property' is name, which we
+                     * lookup in _charnames.pm */
+
+                    if (! load_charnames(newSVpvs("placeholder"),
+                                         revised_name, revised_name_len,
+                                         &error_msg))
+                    {
+                        sv_catpv(msg, error_msg);
+                        goto append_name_to_msg;
+                    }
+
+                    /* Farm this out to a function just to make the current
+                     * function less unwieldy */
+                    if (handle_names_wildcard(revised_name, revised_name_len,
+                                              &prop_definition,
+                                              strings))
+                    {
+                        return prop_definition;
+                    }
+
+                    goto failed;
+                }
+
+                prop_values = get_prop_values(table_index);
+
+                /* Now create and compile the wildcard subpattern.  Use /i
+                 * because the property values are supposed to match with case
+                 * ignored. */
                 subpattern_re = compile_wildcard(name + i,
                                                  name_len - i - 1 - escaped,
                                                  TRUE /* /i */
@@ -23525,6 +23946,7 @@ Perl_parse_uniprop_string(pTHX_
                                                            to_fold,
                                                            runtime,
                                                            deferrable,
+                                                           NULL,
                                                            user_defined_ptr,
                                                            msg,
                                                            level + 1);
@@ -23547,31 +23969,20 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Here's how khw thinks we should proceed to handle the properties
-             * not yet done:    Bidi Mirroring Glyph
-                                Bidi Paired Bracket
+             * not yet done:    Bidi Mirroring Glyph        can map to ""
+                                Bidi Paired Bracket         can map to ""
                                 Case Folding  (both full and simple)
+                                            Shouldn't /i be good enough for Full
                                 Decomposition Mapping
-                                Equivalent Unified Ideograph
-                                Name
-                                Name Alias
+                                Equivalent Unified Ideograph    can map to ""
                                 Lowercase Mapping  (both full and simple)
-                                NFKC Case Fold
+                                NFKC Case Fold                  can map to ""
                                 Titlecase Mapping  (both full and simple)
                                 Uppercase Mapping  (both full and simple)
-             * Move the part that looks at the property values into a perl
-             * script, like utf8_heavy.pl was done.  This makes things somewhat
-             * easier, but most importantly, it avoids always adding all these
-             * strings to the memory usage when the feature is little-used.
-             *
-             * The property values would all be concatenated into a single
-             * string per property with each value on a separate line, and the
-             * code point it's for on alternating lines.  Then we match the
-             * user's input pattern m//mg, without having to worry about their
-             * uses of '^' and '$'.  Only the values that aren't the default
-             * would be in the strings.  Code points would be in UTF-8.  The
-             * search pattern that we would construct would look like
-             * (?: \n (code-point_re) \n (?aam: user-re ) \n )
-             * And so $1 would contain the code point that matched the user-re.
+             * Handle these the same way Name is done, using say, _wild.pm, but
+             * having both loose and full, like in charclass_invlists.h.
+             * Perhaps move block and script to that as they are somewhat large
+             * in charclass_invlists.h.
              * For properties where the default is the code point itself, such
              * as any of the case changing mappings, the string would otherwise
              * consist of all Unicode code points in UTF-8 strung together.
@@ -23580,16 +23991,8 @@ Perl_parse_uniprop_string(pTHX_
              * error.  Otherwise run the pattern against every code point in
              * the ssc.  The ssc is kind of like tr18's 3.9 Possible Match Sets
              * And it might be good to create an API to return the ssc.
-             *
-             * For the name properties, a new function could be created in
-             * charnames which essentially does the same thing as above,
-             * sharing Name.pl with the other charname functions.  Don't know
-             * about loose name matching, or algorithmically determined names.
-             * Decomposition.pl similarly.
-             *
-             * It might be that a new pattern modifier would have to be
-             * created, like /t for resTricTed, which changed the behavior of
-             * some constructs in their subpattern, like \A. */
+             * Or handle them like the algorithmic names are done
+             */
         } /* End of is a wildcard subppattern */
 
         /* \p{name=...} is handled specially.  Instead of using the normal
@@ -23642,13 +24045,13 @@ Perl_parse_uniprop_string(pTHX_
                 goto append_name_to_msg;
             }
 
-            lookup_loose = get_cv("_charnames::_loose_regcomp_lookup", 0);
+            lookup_loose = get_cvs("_charnames::_loose_regcomp_lookup", 0);
             if (! lookup_loose) {
                 Perl_croak(aTHX_
                        "panic: Can't find '_charnames::_loose_regcomp_lookup");
             }
 
-            PUSHSTACKi(PERLSI_OVERLOAD);
+            PUSHSTACKi(PERLSI_REGCOMP);
             ENTER ;
             SAVETMPS;
             save_re_context();
@@ -23673,11 +24076,36 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             cp = valid_utf8_to_uvchr((U8 *) SvPVX(character), &character_len);
-            if (character_len < SvCUR(character)) {
-                goto failed;
+            if (character_len == SvCUR(character)) {
+                prop_definition = add_cp_to_invlist(NULL, cp);
+            }
+            else {
+                AV * this_string;
+
+                /* First of the remaining characters in the string. */
+                char * remaining = SvPVX(character) + character_len;
+
+                if (strings == NULL) {
+                    goto failed;    /* XXX Perhaps a specific msg instead, like
+                                       'not available here' */
+                }
+
+                if (*strings == NULL) {
+                    *strings = newAV();
+                }
+
+                this_string = newAV();
+                av_push(this_string, newSVuv(cp));
+
+                do {
+                    cp = valid_utf8_to_uvchr((U8 *) remaining, &character_len);
+                    av_push(this_string, newSVuv(cp));
+                    remaining += character_len;
+                } while (remaining < SvEND(character));
+
+                av_push(*strings, (SV *) this_string);
             }
 
-            prop_definition = add_cp_to_invlist(NULL, cp);
             return prop_definition;
         }
 
@@ -24087,7 +24515,7 @@ Perl_parse_uniprop_string(pTHX_
              * for this property in the hash.  So we have the go ahead to
              * expand the definition ourselves. */
 
-            PUSHSTACKi(PERLSI_MAGIC);
+            PUSHSTACKi(PERLSI_REGCOMP);
             ENTER;
 
             /* Create a temporary placeholder in the hash to detect recursion
@@ -24238,7 +24666,7 @@ Perl_parse_uniprop_string(pTHX_
 
     /* Get the index into our pointer table of the inversion list corresponding
      * to the property */
-    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+    table_index = do_uniprop_match(lookup_name, lookup_len);
 
     /* If it didn't find the property ... */
     if (table_index == 0) {
@@ -24246,14 +24674,16 @@ Perl_parse_uniprop_string(pTHX_
         /* Try again stripping off any initial 'Is'.  This is because we
          * promise that an initial Is is optional.  The same isn't true of
          * names that start with 'In'.  Those can match only blocks, and the
-         * lookup table already has those accounted for. */
-        if (starts_with_Is) {
+         * lookup table already has those accounted for.  The lookup table also
+         * has already accounted for Perl extensions (without and = sign)
+         * starting with 'i's'. */
+        if (starts_with_Is && equals_pos >= 0) {
             lookup_name += 2;
             lookup_len -= 2;
             equals_pos -= 2;
             slash_pos -= 2;
 
-            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+            table_index = do_uniprop_match(lookup_name, lookup_len);
         }
 
         if (table_index == 0) {
@@ -24417,7 +24847,7 @@ Perl_parse_uniprop_string(pTHX_
             }
 
             /* Here, we have the number in canonical form.  Try that */
-            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            table_index = do_uniprop_match(canonical, strlen(canonical));
             if (table_index == 0) {
                 goto failed;
             }
@@ -24441,7 +24871,8 @@ Perl_parse_uniprop_string(pTHX_
         table_index %= MAX_UNI_KEYWORD_INDEX;
         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
                 "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
-                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+                (int) name_len, name,
+                get_deprecated_property_msg(warning_offset));
     }
 
     /* In a few properties, a different property is used under /i.  These are
@@ -24469,10 +24900,9 @@ Perl_parse_uniprop_string(pTHX_
     }
 
     /* Create and return the inversion list */
-    prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+    prop_definition = get_prop_definition(table_index);
     sv_2mortal(prop_definition);
 
-
     /* See if there is a private use override to add to this definition */
     {
         COPHH * hinthash = (IN_PERL_COMPILETIME)
@@ -24592,7 +25022,442 @@ Perl_parse_uniprop_string(pTHX_
     }
 }
 
-#endif /* end of ! PERL_IN_XSUB_RE */
+STATIC bool
+S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
+                              const STRLEN wname_len, /* Its length */
+                              SV ** prop_definition,
+                              AV ** strings)
+{
+    /* Deal with Name property wildcard subpatterns; returns TRUE if there were
+     * any matches, adding them to prop_definition */
+
+    dSP;
+
+    CV * get_names_info;        /* entry to charnames.pm to get info we need */
+    SV * names_string;          /* Contains all character names, except algo */
+    SV * algorithmic_names;     /* Contains info about algorithmically
+                                   generated character names */
+    REGEXP * subpattern_re;     /* The user's pattern to match with */
+    struct regexp * prog;       /* The compiled pattern */
+    char * all_names_start;     /* lib/unicore/Name.pl string of every
+                                   (non-algorithmic) character name */
+    char * cur_pos;             /* We match, effectively using /gc; this is
+                                   where we are now */
+    bool found_matches = FALSE; /* Did any name match so far? */
+    SV * empty;                 /* For matching zero length names */
+    SV * must_sv;               /* Contains the substring, if any, that must be
+                                   in a name for the subpattern to match */
+    const char * must;          /* The PV of 'must' */
+    STRLEN must_len;            /* And its length */
+    SV * syllable_name = NULL;  /* For Hangul syllables */
+    const char hangul_prefix[] = "HANGUL SYLLABLE ";
+    const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
+
+    /* By inspection, there are a maximum of 7 bytes in the suffix of a hangul
+     * syllable name, and these are immutable and guaranteed by the Unicode
+     * standard to never be extended */
+    const STRLEN syl_max_len = hangul_prefix_len + 7;
+
+    IV i;
+
+    PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD;
+
+    /* Make sure _charnames is loaded.  (The parameters give context
+     * for any errors generated */
+    get_names_info = get_cv("_charnames::_get_names_info", 0);
+    if (! get_names_info) {
+        Perl_croak(aTHX_ "panic: Can't find '_charnames::_get_names_info");
+    }
+
+    /* Get the charnames data */
+    PUSHSTACKi(PERLSI_REGCOMP);
+    ENTER ;
+    SAVETMPS;
+    save_re_context();
+
+    PUSHMARK(SP) ;
+    PUTBACK;
+
+    /* Special _charnames entry point that returns the info this routine
+     * requires */
+    call_sv(MUTABLE_SV(get_names_info), G_ARRAY);
+
+    SPAGAIN ;
+
+    /* Data structure for names which end in their very own code points */
+    algorithmic_names = POPs;
+    SvREFCNT_inc_simple_void_NN(algorithmic_names);
+
+    /* The lib/unicore/Name.pl string */
+    names_string = POPs;
+    SvREFCNT_inc_simple_void_NN(names_string);
+
+    PUTBACK ;
+    FREETMPS ;
+    LEAVE ;
+    POPSTACK;
+
+    if (   ! SvROK(names_string)
+        || ! SvROK(algorithmic_names))
+    {   /* Perhaps should panic instead XXX */
+        SvREFCNT_dec(names_string);
+        SvREFCNT_dec(algorithmic_names);
+        return FALSE;
+    }
+
+    names_string = sv_2mortal(SvRV(names_string));
+    all_names_start = SvPVX(names_string);
+    cur_pos = all_names_start;
+
+    algorithmic_names= sv_2mortal(SvRV(algorithmic_names));
+
+    /* Compile the subpattern consisting of the name being looked for */
+    subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
+
+    must_sv = re_intuit_string(subpattern_re);
+    if (must_sv) {
+        /* regexec.c can free the re_intuit_string() return. GH #17734 */
+        must_sv = sv_2mortal(newSVsv(must_sv));
+        must = SvPV(must_sv, must_len);
+    }
+    else {
+        must = "";
+        must_len = 0;
+    }
+
+    /* (Note: 'must' could contain a NUL.  And yet we use strspn() below on it.
+     * This works because the NUL causes the function to return early, thus
+     * showing that there are characters in it other than the acceptable ones,
+     * which is our desired result.) */
+
+    prog = ReANY(subpattern_re);
+
+    /* If only nothing is matched, skip to where empty names are looked for */
+    if (prog->maxlen == 0) {
+        goto check_empty;
+    }
+
+    /* And match against the string of all names /gc.  Don't even try if it
+     * must match a character not found in any name. */
+    if (strspn(must, "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()") == must_len)
+    {
+        while (execute_wildcard(subpattern_re,
+                                cur_pos,
+                                SvEND(names_string),
+                                all_names_start, 0,
+                                names_string,
+                                0))
+        { /* Here, matched. */
+
+            /* Note the string entries look like
+             *      00001\nSTART OF HEADING\n\n
+             * so we could match anywhere in that string.  We have to rule out
+             * matching a code point line */
+            char * this_name_start = all_names_start
+                                                + RX_OFFS(subpattern_re)->start;
+            char * this_name_end   = all_names_start
+                                                + RX_OFFS(subpattern_re)->end;
+            char * cp_start;
+            char * cp_end;
+            UV cp = 0;      /* Silences some compilers */
+            AV * this_string = NULL;
+            bool is_multi = FALSE;
+
+            /* If matched nothing, advance to next possible match */
+            if (this_name_start == this_name_end) {
+                cur_pos = (char *) memchr(this_name_end + 1, '\n',
+                                          SvEND(names_string) - this_name_end);
+                if (cur_pos == NULL) {
+                    break;
+                }
+            }
+            else {
+                /* Position the next match to start beyond the current returned
+                 * entry */
+                cur_pos = (char *) memchr(this_name_end, '\n',
+                                          SvEND(names_string) - this_name_end);
+            }
+
+            /* Back up to the \n just before the beginning of the character. */
+            cp_end = (char *) my_memrchr(all_names_start,
+                                         '\n',
+                                         this_name_start - all_names_start);
+
+            /* If we didn't find a \n, it means it matched somewhere in the
+             * initial '00000' in the string, so isn't a real match */
+            if (cp_end == NULL) {
+                continue;
+            }
+
+            this_name_start = cp_end + 1;   /* The name starts just after */
+            cp_end--;                       /* the \n, and the code point */
+                                            /* ends just before it */
+
+            /* All code points are 5 digits long */
+            cp_start = cp_end - 4;
+
+            /* This shouldn't happen, as we found a \n, and the first \n is
+             * further along than what we subtracted */
+            assert(cp_start >= all_names_start);
+
+            if (cp_start == all_names_start) {
+                *prop_definition = add_cp_to_invlist(*prop_definition, 0);
+                continue;
+            }
+
+            /* If the character is a blank, we either have a named sequence, or
+             * something is wrong */
+            if (*(cp_start - 1) == ' ') {
+                cp_start = (char *) my_memrchr(all_names_start,
+                                               '\n',
+                                               cp_start - all_names_start);
+                cp_start++;
+            }
+
+            assert(cp_start != NULL && cp_start >= all_names_start + 2);
+
+            /* Except for the first line in the string, the sequence before the
+             * code point is \n\n.  If that isn't the case here, we didn't
+             * match the name of a character.  (We could have matched a named
+             * sequence, not currently handled */
+            if (*(cp_start - 1) != '\n' || *(cp_start - 2) != '\n') {
+                continue;
+            }
+
+            /* We matched!  Add this to the list */
+            found_matches = TRUE;
+
+            /* Loop through all the code points in the sequence */
+            while (cp_start < cp_end) {
+
+                /* Calculate this code point from its 5 digits */
+                cp = (XDIGIT_VALUE(cp_start[0]) << 16)
+                   + (XDIGIT_VALUE(cp_start[1]) << 12)
+                   + (XDIGIT_VALUE(cp_start[2]) << 8)
+                   + (XDIGIT_VALUE(cp_start[3]) << 4)
+                   +  XDIGIT_VALUE(cp_start[4]);
+
+                cp_start += 6;  /* Go past any blank */
+
+                if (cp_start < cp_end || is_multi) {
+                    if (this_string == NULL) {
+                        this_string = newAV();
+                    }
+
+                    is_multi = TRUE;
+                    av_push(this_string, newSVuv(cp));
+                }
+            }
+
+            if (is_multi) { /* Was more than one code point */
+                if (*strings == NULL) {
+                    *strings = newAV();
+                }
+
+                av_push(*strings, (SV *) this_string);
+            }
+            else {  /* Only a single code point */
+                *prop_definition = add_cp_to_invlist(*prop_definition, cp);
+            }
+        } /* End of loop through the non-algorithmic names string */
+    }
+
+    /* There are also character names not in 'names_string'.  These are
+     * algorithmically generatable.  Try this pattern on each possible one.
+     * (khw originally planned to leave this out given the large number of
+     * matches attempted; but the speed turned out to be quite acceptable
+     *
+     * There are plenty of opportunities to optimize to skip many of the tests.
+     * beyond the rudimentary ones already here */
+
+    /* First see if the subpattern matches any of the algorithmic generatable
+     * Hangul syllable names.
+     *
+     * We know none of these syllable names will match if the input pattern
+     * requires more bytes than any syllable has, or if the input pattern only
+     * matches an empty name, or if the pattern has something it must match and
+     * one of the characters in that isn't in any Hangul syllable. */
+    if (    prog->minlen <= (SSize_t) syl_max_len
+        &&  prog->maxlen > 0
+        && (strspn(must, "\n ABCDEGHIJKLMNOPRSTUWY") == must_len))
+    {
+        /* These constants, names, values, and algorithm are adapted from the
+         * Unicode standard, version 5.1, section 3.12, and should never
+         * change. */
+        const char * JamoL[] = {
+            "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
+            "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
+        };
+        const int LCount = C_ARRAY_LENGTH(JamoL);
+
+        const char * JamoV[] = {
+            "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
+            "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
+            "I"
+        };
+        const int VCount = C_ARRAY_LENGTH(JamoV);
+
+        const char * JamoT[] = {
+            "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L",
+            "LG", "LM", "LB", "LS", "LT", "LP", "LH", "M", "B",
+            "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
+        };
+        const int TCount = C_ARRAY_LENGTH(JamoT);
+
+        int L, V, T;
+
+        /* This is the initial Hangul syllable code point; each time through the
+         * inner loop, it maps to the next higher code point.  For more info,
+         * see the Hangul syllable section of the Unicode standard. */
+        int cp = 0xAC00;
+
+        syllable_name = sv_2mortal(newSV(syl_max_len));
+        sv_setpvn(syllable_name, hangul_prefix, hangul_prefix_len);
+
+        for (L = 0; L < LCount; L++) {
+            for (V = 0; V < VCount; V++) {
+                for (T = 0; T < TCount; T++) {
+
+                    /* Truncate back to the prefix, which is unvarying */
+                    SvCUR_set(syllable_name, hangul_prefix_len);
+
+                    sv_catpv(syllable_name, JamoL[L]);
+                    sv_catpv(syllable_name, JamoV[V]);
+                    sv_catpv(syllable_name, JamoT[T]);
+
+                    if (execute_wildcard(subpattern_re,
+                                SvPVX(syllable_name),
+                                SvEND(syllable_name),
+                                SvPVX(syllable_name), 0,
+                                syllable_name,
+                                0))
+                    {
+                        *prop_definition = add_cp_to_invlist(*prop_definition,
+                                                             cp);
+                        found_matches = TRUE;
+                    }
+
+                    cp++;
+                }
+            }
+        }
+    }
+
+    /* The rest of the algorithmically generatable names are of the form
+     * "PREFIX-code_point".  The prefixes and the code point limits of each
+     * were returned to us in the array 'algorithmic_names' from data in
+     * lib/unicore/Name.pm.  'code_point' in the name is expressed in hex. */
+    for (i = 0; i <= av_top_index((AV *) algorithmic_names); i++) {
+        IV j;
+
+        /* Each element of the array is a hash, giving the details for the
+         * series of names it covers.  There is the base name of the characters
+         * in the series, and the low and high code points in the series.  And,
+         * for optimization purposes a string containing all the legal
+         * characters that could possibly be in a name in this series. */
+        HV * this_series = (HV *) SvRV(* av_fetch((AV *) algorithmic_names, i, 0));
+        SV * prefix = * hv_fetchs(this_series, "name", 0);
+        IV low = SvIV(* hv_fetchs(this_series, "low", 0));
+        IV high = SvIV(* hv_fetchs(this_series, "high", 0));
+        char * legal = SvPVX(* hv_fetchs(this_series, "legal", 0));
+
+        /* Pre-allocate an SV with enough space */
+        SV * algo_name = sv_2mortal(Perl_newSVpvf(aTHX_ "%s-0000",
+                                                        SvPVX(prefix)));
+        if (high >= 0x10000) {
+            sv_catpvs(algo_name, "0");
+        }
+
+        /* This series can be skipped entirely if the pattern requires
+         * something longer than any name in the series, or can only match an
+         * empty name, or contains a character not found in any name in the
+         * series */
+        if (    prog->minlen <= (SSize_t) SvCUR(algo_name)
+            &&  prog->maxlen > 0
+            && (strspn(must, legal) == must_len))
+        {
+            for (j = low; j <= high; j++) { /* For each code point in the series */
+
+                /* Get its name, and see if it matches the subpattern */
+                Perl_sv_setpvf(aTHX_ algo_name, "%s-%X", SvPVX(prefix),
+                                     (unsigned) j);
+
+                if (execute_wildcard(subpattern_re,
+                                    SvPVX(algo_name),
+                                    SvEND(algo_name),
+                                    SvPVX(algo_name), 0,
+                                    algo_name,
+                                    0))
+                {
+                    *prop_definition = add_cp_to_invlist(*prop_definition, j);
+                    found_matches = TRUE;
+                }
+            }
+        }
+    }
+
+  check_empty:
+    /* Finally, see if the subpattern matches an empty string */
+    empty = newSVpvs("");
+    if (execute_wildcard(subpattern_re,
+                         SvPVX(empty),
+                         SvEND(empty),
+                         SvPVX(empty), 0,
+                         empty,
+                         0))
+    {
+        /* Many code points have empty names.  Currently these are the \p{GC=C}
+         * ones, minus CC and CF */
+
+        SV * empty_names_ref = get_prop_definition(UNI_C);
+        SV * empty_names = invlist_clone(empty_names_ref, NULL);
+
+        SV * subtract = get_prop_definition(UNI_CC);
+
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(empty_names_ref);
+        SvREFCNT_dec_NN(subtract);
+
+        subtract = get_prop_definition(UNI_CF);
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(subtract);
+
+        _invlist_union(*prop_definition, empty_names, prop_definition);
+        found_matches = TRUE;
+        SvREFCNT_dec_NN(empty_names);
+    }
+    SvREFCNT_dec_NN(empty);
+
+#if 0
+    /* If we ever were to accept aliases for, say private use names, we would
+     * need to do something fancier to find empty names.  The code below works
+     * (at the time it was written), and is slower than the above */
+    const char empties_pat[] = "^.";
+    if (strNE(name, empties_pat)) {
+        SV * empty = newSVpvs("");
+        if (execute_wildcard(subpattern_re,
+                    SvPVX(empty),
+                    SvEND(empty),
+                    SvPVX(empty), 0,
+                    empty,
+                    0))
+        {
+            SV * empties = NULL;
+
+            (void) handle_names_wildcard(empties_pat, strlen(empties_pat), &empties);
+
+            _invlist_union_complement_2nd(*prop_definition, empties, prop_definition);
+            SvREFCNT_dec_NN(empties);
+
+            found_matches = TRUE;
+        }
+        SvREFCNT_dec_NN(empty);
+    }
+#endif
+
+    SvREFCNT_dec_NN(subpattern_re);
+    return found_matches;
+}
 
 /*
  * ex: set ts=8 sts=4 sw=4 et: