gh16947: avoid mutating regexp program only within GOSUB

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index c9d34db..b208c01 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -135,6 +135,7 @@
  #ifdef PERL_IN_XSUB_RE
  #  include "re_comp.h"
  EXTERN_C const struct regexp_engine my_reg_engine;
+EXTERN_C const struct regexp_engine wild_reg_engine;
  #else
  #  include "regcomp.h"
  #endif
@@ -162,6 +163,7 @@ typedef struct scan_frame {
      regnode *next_regnode;      /* next node to process when last is reached */
      U32 prev_recursed_depth;
      I32 stopparen;              /* what stopparen do we use */
+    bool in_gosub;              /* this or an outer frame is for GOSUB */
  
      struct scan_frame *this_prev_frame; /* this previous frame */
      struct scan_frame *prev_frame;      /* previous frame */
@@ -1237,7 +1239,7 @@ static void
  S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
                      U32 depth, int is_inf)
  {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      DEBUG_OPTIMISE_MORE_r({
          if (!data)
@@ -1291,7 +1293,7 @@ static void
  S_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
                  regnode *scan, U32 depth, U32 flags)
  {
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      DEBUG_OPTIMISE_r({
          regnode *Next;
@@ -1479,7 +1481,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
      const STRLEN l = CHR_SVLEN(data->last_found);
      SV * const longest_sv = data->substrs[data->cur_is_floating].str;
      const STRLEN old_l = CHR_SVLEN(longest_sv);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_SCAN_COMMIT;
  
@@ -1491,20 +1493,18 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
         if (!i) /* fixed */
             data->substrs[0].max_offset = data->substrs[0].min_offset;
         else { /* float */
-           data->substrs[1].max_offset = (l
+           data->substrs[1].max_offset =
+                      (is_inf)
+                       ? OPTIMIZE_INFTY
+                       : (l
                            ? data->last_start_max
                            : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
                                          ? OPTIMIZE_INFTY
                                          : data->pos_min + data->pos_delta));
-           if (is_inf
-                || (STRLEN)data->substrs[1].max_offset > (STRLEN)OPTIMIZE_INFTY)
-               data->substrs[1].max_offset = OPTIMIZE_INFTY;
          }
  
-        if (data->flags & SF_BEFORE_EOL)
-            data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
-        else
-            data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].flags &= ~SF_BEFORE_EOL;
+        data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
          data->substrs[i].minlenp = minlenp;
          data->substrs[i].lookbehind = 0;
      }
@@ -2068,7 +2068,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
                );
  }
  
-PERL_STATIC_INLINE void
+STATIC void
  S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
  {
      PERL_ARGS_ASSERT_SSC_UNION;
@@ -2081,7 +2081,7 @@ S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
                                          &ssc->invlist);
  }
  
-PERL_STATIC_INLINE void
+STATIC void
  S_ssc_intersection(pTHX_ regnode_ssc *ssc,
                           SV* const invlist,
                           const bool invert2nd)
@@ -2096,7 +2096,7 @@ S_ssc_intersection(pTHX_ regnode_ssc *ssc,
                                                 &ssc->invlist);
  }
  
-PERL_STATIC_INLINE void
+STATIC void
  S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
  {
      PERL_ARGS_ASSERT_SSC_ADD_RANGE;
@@ -2106,7 +2106,7 @@ S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
      ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
  }
  
-PERL_STATIC_INLINE void
+STATIC void
  S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
  {
      /* AND just the single code point 'cp' into the SSC 'ssc' */
@@ -2124,7 +2124,7 @@ S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
      SvREFCNT_dec_NN(cp_list);
  }
  
-PERL_STATIC_INLINE void
+STATIC void
  S_ssc_clear_locale(regnode_ssc *ssc)
  {
      /* Set the SSC 'ssc' to not match any locale things */
@@ -2272,7 +2272,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
      U16 word;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_DUMP_TRIE;
  
@@ -2366,7 +2366,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
      U32 state;
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
  
@@ -2427,7 +2427,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
      U16 charid;
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
  
@@ -2772,7 +2772,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
      STRLEN trie_charcount=0;
  #endif
      SV *re_trie_maxbuff;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_MAKE_TRIE;
  #ifndef DEBUGGING
@@ -3862,7 +3862,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
      reg_ac_data *aho;
      const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
      regnode *stclass;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
      PERL_UNUSED_CONTEXT;
@@ -4111,7 +4111,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
      U32 stopnow = 0;
  #ifdef DEBUGGING
      regnode *stop = scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  #else
      PERL_UNUSED_ARG(depth);
  #endif
@@ -4441,7 +4441,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                  s++;
              }
          }
-       else {
+       else if (OP(scan) != EXACTFAA_NO_TRIE) {
  
              /* Non-UTF-8 pattern, not EXACTFAA node.  Look for the multi-char
               * folds that are all Latin1.  As explained in the comments
@@ -4559,7 +4559,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      regnode *first_non_open = scan;
      SSize_t stopmin = OPTIMIZE_INFTY;
      scan_frame *frame = NULL;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
      RExC_study_started= 1;
@@ -4607,6 +4607,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                     node length to get a real minimum (because
                                     the folded version may be shorter) */
         bool unfolded_multi_char = FALSE;
+        bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1;
         /* Peephole optimizer: */
          DEBUG_STUDYDATA("Peep", data, depth, is_inf);
          DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4617,10 +4618,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
           * parsing code, as each (?:..) is handled by a different invocation of
           * reg() -- Yves
           */
-        if (PL_regkind[OP(scan)] == EXACT && OP(scan) != LEXACT
-                                          && OP(scan) != LEXACT_REQ8)
+        if (PL_regkind[OP(scan)] == EXACT
+            && OP(scan) != LEXACT
+            && OP(scan) != LEXACT_REQ8
+            && mutate_ok
+        ) {
              join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
                      0, NULL, depth + 1);
+        }
  
          /* Follow the next-chain of the current node and optimize
             away all the NOTHINGs from it.  */
@@ -4805,9 +4810,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     }
                 }
  
-                if (PERL_ENABLE_TRIE_OPTIMISATION &&
-                        OP( startbranch ) == BRANCH )
-                {
+                if (PERL_ENABLE_TRIE_OPTIMISATION
+                    && OP(startbranch) == BRANCH
+                    && mutate_ok
+                ) {
                 /* demq.
  
                     Assuming this was/is a branch we are dealing with: 'scan'
@@ -5260,6 +5266,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  newframe->stopparen = stopparen;
                  newframe->prev_recursed_depth = recursed_depth;
                  newframe->this_prev_frame= frame;
+                newframe->in_gosub = (
+                    (frame && frame->in_gosub) || OP(scan) == GOSUB
+                );
  
                  DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
                  DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5344,8 +5353,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  &&   isALPHA_A(*s)
                  &&  (         OP(scan) == EXACTFAA
                       || (     OP(scan) == EXACTFU
-                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s))))
-            {
+                         && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
+                &&   mutate_ok
+            ) {
                  U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
  
                  OP(scan) = ANYOFM;
@@ -5438,7 +5448,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                  /* This temporary node can now be turned into EXACTFU, and
                   * must, as regexec.c doesn't handle it */
-                if (OP(next) == EXACTFU_S_EDGE) {
+                if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
                      OP(next) = EXACTFU;
                  }
  
@@ -5446,8 +5456,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      &&   isALPHA_A(* STRING(next))
                      && (         OP(next) == EXACTFAA
                          || (     OP(next) == EXACTFU
-                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
-                {
+                            && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
+                    &&   mutate_ok
+                ) {
                      /* These differ in just one bit */
                      U8 mask = ~ ('A' ^ 'a');
  
@@ -5594,7 +5605,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 if (  OP(oscan) == CURLYX && data
                       && data->flags & SF_IN_PAR
                       && !(data->flags & SF_HAS_EVAL)
-                     && !deltanext && minnext == 1 ) {
+                     && !deltanext && minnext == 1
+                      && mutate_ok
+                ) {
                     /* Try to optimize to CURLYN.  */
                     regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
                     regnode * const nxt1 = nxt;
@@ -5644,10 +5657,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                       && !(data->flags & SF_HAS_EVAL)
                       && !deltanext     /* atom is fixed width */
                       && minnext != 0   /* CURLYM can't handle zero width */
-
                           /* Nor characters whose fold at run-time may be
                            * multi-character */
                        && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+                      && mutate_ok
                 ) {
                     /* XXXX How to optimize if data == 0? */
                     /* Optimize to a simpler form.  */
@@ -5965,7 +5978,10 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                            (regnode_charclass *) scan);
                     break;
  
-                case NANYOFM:
+                case NANYOFM: /* NANYOFM already contains the inversion of the
+                                 input ANYOF data, so, unlike things like
+                                 NPOSIXA, don't change 'invert' to TRUE */
+                    /* FALLTHROUGH */
                  case ANYOFM:
                    {
                      SV* cp_list = get_ANYOFM_contents(scan);
@@ -6663,7 +6679,7 @@ REGEXP *
  Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  {
      regexp_engine const *eng = current_re_engine();
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_PREGCOMP;
  
@@ -6684,16 +6700,9 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  REGEXP *
  Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
  {
-    PERL_ARGS_ASSERT_RE_COMPILE;
-    return re_op_compile_wrapper(pattern, rx_flags, 0);
-}
-
-REGEXP *
-S_re_op_compile_wrapper(pTHX_ SV * const pattern, U32 rx_flags, const U32 pm_flags)
-{
      SV *pat = pattern; /* defeat constness! */
  
-    PERL_ARGS_ASSERT_RE_OP_COMPILE_WRAPPER;
+    PERL_ARGS_ASSERT_RE_COMPILE;
  
      return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
  #ifdef PERL_IN_XSUB_RE
@@ -6701,10 +6710,9 @@ S_re_op_compile_wrapper(pTHX_ SV * const pattern, U32 rx_flags, const U32 pm_fla
  #else
                                  &PL_core_reg_engine,
  #endif
-                                NULL, NULL, rx_flags, pm_flags);
+                                NULL, NULL, rx_flags, 0);
  }
  
-
  static void
  S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
  {
@@ -6756,7 +6764,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
      int n=0;
      STRLEN s = 0;
      bool do_end = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      DEBUG_PARSE_r(Perl_re_printf( aTHX_
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
@@ -6933,7 +6941,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  pRExC_state->code_blocks->count -= n;
              n = 0;
          }
-        else  {
+        else {
              /* ... or failing that, try "" overload */
              while (SvAMAGIC(msv)
                      && (sv = AMG_CALLunary(msv, string_amg))
@@ -7096,7 +7104,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  {
      SV *qr;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      if (pRExC_state->runtime_code_qr) {
         /* this is the second time we've been called; this should
@@ -7516,7 +7524,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      int restudied = 0;
      RExC_state_t copyRExC_state;
  #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_RE_OP_COMPILE;
  
@@ -7941,7 +7949,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      DEBUG_OFFSETS_r(if (RExC_offsets) {
          const STRLEN len = RExC_offsets[0];
          STRLEN i;
-        GET_RE_DEBUG_FLAGS_DECL;
+        DECLARE_AND_GET_RE_DEBUG_FLAGS;
          Perl_re_printf( aTHX_
                        "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
          for (i = 1; i <= len; i++) {
@@ -8637,7 +8645,7 @@ SV*
  Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
  {
      struct regexp *const rx = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
  
@@ -11048,7 +11056,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
  #endif
  
-PERL_STATIC_INLINE regnode_offset
+STATIC regnode_offset
  S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
                               I32 *flagp,
                               char * parse_start,
@@ -11059,7 +11067,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
      char* name_start = RExC_parse;
      U32 num = 0;
      SV *sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
  
@@ -11127,7 +11135,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      char * parse_start = RExC_parse; /* MJD */
      char * const oregcomp_parse = RExC_parse;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REG;
      DEBUG_PARSE("reg ");
@@ -11479,6 +11487,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
              const char * endptr;
+            const char non_existent_group_msg[]
+                                            = "Reference to nonexistent group";
+            const char impossible_group[] = "Invalid reference to group";
+
              if (has_intervening_patws) {
                  RExC_parse++;
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
@@ -11705,10 +11717,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      ) {
                          num = (I32)unum;
                          RExC_parse = (char*)endptr;
-                    } else
-                        num = I32_MAX;
+                    }
+                    else {  /* Overflow, or something like that.  Position
+                               beyond all digits for the message */
+                        while (RExC_parse < RExC_end && isDIGIT(*RExC_parse))  {
+                            RExC_parse++;
+                        }
+                        vFAIL(impossible_group);
+                    }
                      if (is_neg) {
-                        /* Some limit for num? */
+                        /* -num is always representable on 1 and 2's complement
+                         * machines */
                          num = -num;
                      }
                  }
@@ -11716,45 +11735,43 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                     vFAIL("Expecting close bracket");
  
                gen_recurse_regop:
-                if ( paren == '-' ) {
+                if (paren == '-' || paren == '+') {
+
+                    /* Don't overflow */
+                    if (UNLIKELY(I32_MAX - RExC_npar < num)) {
+                        RExC_parse++;
+                        vFAIL(impossible_group);
+                    }
+
                      /*
                      Diagram of capture buffer numbering.
                      Top line is the normal capture buffer numbers
                      Bottom line is the negative indexing as from
                      the X (the (?-2))
  
-                    +   1 2    3 4 5 X          6 7
+                        1 2    3 4 5 X   Y      6 7
+                       /(a(x)y)(a(b(c(?+2)d)e)f)(g(h))/
                         /(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
-                    -   5 4    3 2 1 X          x x
+                    -   5 4    3 2 1 X   Y      x x
  
+                    Resolve to absolute group.  Recall that RExC_npar is +1 of
+                    the actual parenthesis group number.  For lookahead, we
+                    have to compensate for that.  Using the above example, when
+                    we get to Y in the parse, num is 2 and RExC_npar is 6.  We
+                    want 7 for +2, and 4 for -2.
                      */
-                    num = RExC_npar + num;
-                    if (num < 1)  {
+                    if ( paren == '+' ) {
+                        num--;
+                    }
  
-                        /* It might be a forward reference; we can't fail until
-                         * we know, by completing the parse to get all the
-                         * groups, and then reparsing */
-                        if (ALL_PARENS_COUNTED)  {
-                            RExC_parse++;
-                            vFAIL("Reference to nonexistent group");
-                        }
-                        else {
-                            REQUIRE_PARENS_PASS;
-                        }
+                    num += RExC_npar;
+
+                    if (paren == '-' && num < 1) {
+                        RExC_parse++;
+                        vFAIL(non_existent_group_msg);
                      }
-                } else if ( paren == '+' ) {
-                    num = RExC_npar + num - 1;
                  }
-                /* We keep track how many GOSUB items we have produced.
-                   To start off the ARG2L() of the GOSUB holds its "id",
-                   which is used later in conjunction with RExC_recurse
-                   to calculate the offset we need to jump for the GOSUB,
-                   which it will store in the final representation.
-                   We have to defer the actual calculation until much later
-                   as the regop may move.
-                 */
  
-                ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                  if (num >= RExC_npar) {
  
                      /* It might be a forward reference; we can't fail until we
@@ -11763,13 +11780,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                      if (ALL_PARENS_COUNTED)  {
                          if (num >= RExC_total_parens) {
                              RExC_parse++;
-                            vFAIL("Reference to nonexistent group");
+                            vFAIL(non_existent_group_msg);
                          }
                      }
                      else {
                          REQUIRE_PARENS_PASS;
                      }
                  }
+
+                /* We keep track how many GOSUB items we have produced.
+                   To start off the ARG2L() of the GOSUB holds its "id",
+                   which is used later in conjunction with RExC_recurse
+                   to calculate the offset we need to jump for the GOSUB,
+                   which it will store in the final representation.
+                   We have to defer the actual calculation until much later
+                   as the regop may move.
+                 */
+                ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                  RExC_recurse_count++;
                  DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
                      "%*s%*s Recurse #%" UVuf " to %" IVdf "\n",
@@ -12463,7 +12490,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
      regnode_offset chain = 0;
      regnode_offset latest;
      I32 flags = 0, c = 0;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGBRANCH;
  
@@ -12560,7 +12587,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      /* Save the original in case we change the emitted regop to a FAIL. */
      const regnode_offset orig_emit = RExC_emit;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGPIECE;
  
@@ -12901,12 +12928,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      char *save_start;
      I32 flags;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_GROK_BSLASH_N;
  
-    GET_RE_DEBUG_FLAGS;
-
      assert(cBOOL(node_p) ^ cBOOL(code_point_p));  /* Exactly one should be set */
      assert(! (node_p && cp_count));               /* At most 1 should be set */
  
@@ -13249,7 +13274,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  }
  
  
-PERL_STATIC_INLINE U8
+STATIC U8
  S_compute_EXACTish(RExC_state_t *pRExC_state)
  {
      U8 op;
@@ -13411,7 +13436,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      U8 op;
      int invert = 0;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      *flagp = WORST;            /* Tentatively. */
  
@@ -16271,7 +16296,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      char *save_end, *save_parse;    /* Temporaries */
      const bool in_locale = LOC;     /* we turn off /l during processing */
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
      PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
@@ -16836,7 +16861,7 @@ redo_curchar:
      if (RExC_sets_depth) {  /* If within a recursive call, return in a special
                                 regnode */
          RExC_parse++;
-        node = regpnode(pRExC_state, REGEX_SET, (void *) final);
+        node = regpnode(pRExC_state, REGEX_SET, final);
      }
      else {
  
@@ -17335,7 +17360,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                              what gets folded */
      U32 has_runtime_dependency = 0;     /* OR of the above flags */
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCLASS;
  #ifndef DEBUGGING
@@ -17679,6 +17704,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      /* If set TRUE, the property is user-defined as opposed to
                       * official Unicode */
                      bool user_defined = FALSE;
+                    AV * strings = NULL;
  
                      SV * prop_definition = parse_uniprop_string(
                                              name, n, UTF, FOLD,
@@ -17689,6 +17715,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                               * this call */
                                              ! cBOOL(ret_invlist),
  
+                                            &strings,
                                              &user_defined,
                                              msg,
                                              0 /* Base level */
@@ -17706,7 +17733,55 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                      SvCUR(msg), SvPVX(msg)));
                      }
  
-                    if (! is_invlist(prop_definition)) {
+                    assert(prop_definition || strings);
+
+                    if (strings) {
+                        if (! RExC_in_multi_char_class) {
+                            if (invert ^ (value == 'P')) {
+                                RExC_parse = e + 1;
+                                vFAIL("Inverting a character class which contains"
+                                    " a multi-character sequence is illegal");
+                            }
+
+                            /* For each multi-character string ... */
+                            while (av_tindex(strings) >= 0) {
+                                /* ... Each entry is itself an array of code
+                                * points. */
+                                AV * this_string = (AV *) av_shift( strings);
+                                STRLEN cp_count = av_tindex(this_string) + 1;
+                                SV * final = newSV(cp_count * 4);
+                                SvPVCLEAR(final);
+
+                                /* Create another string of sequences of \x{...} */
+                                while (av_tindex(this_string) >= 0) {
+                                    SV * character = av_shift(this_string);
+                                    UV cp = SvUV(character);
+
+                                    if (cp > 255) {
+                                        REQUIRE_UTF8(flagp);
+                                    }
+                                    Perl_sv_catpvf(aTHX_ final, "\\x{%" UVXf "}",
+                                                                        cp);
+                                    SvREFCNT_dec_NN(character);
+                                }
+                                SvREFCNT_dec_NN(this_string);
+
+                                /* And add that to the list of such things */
+                                multi_char_matches
+                                            = add_multi_match(multi_char_matches,
+                                                            final,
+                                                            cp_count);
+                            }
+                        }
+                        SvREFCNT_dec_NN(strings);
+                    }
+
+                    if (! prop_definition) {    /* If we got only a string,
+                                                   this iteration didn't really
+                                                   find a character */
+                        element_count--;
+                    }
+                    else if (! is_invlist(prop_definition)) {
  
                          /* Here, the definition isn't known, so we have gotten
                           * returned a string that will be evaluated if and when
@@ -18424,7 +18499,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
  #endif
  
-        /* Look at the longest folds first */
+        /* Look at the longest strings first */
          for (cp_count = av_tindex_skip_len_mg(multi_char_matches);
                          cp_count > 0;
                          cp_count--)
@@ -18450,15 +18525,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
  
          /* If the character class contains anything else besides these
-         * multi-character folds, have to include it in recursive parsing */
+         * multi-character strings, have to include it in recursive parsing */
          if (element_count) {
-            sv_catpvs(substitute_parse, "|[");
+            bool has_l_bracket = orig_parse > RExC_start && *(orig_parse - 1) == '[';
+
+            sv_catpvs(substitute_parse, "|");
+            if (has_l_bracket) {    /* Add an [ if the original had one */
+                sv_catpvs(substitute_parse, "[");
+            }
              constructed_prefix_len = SvCUR(substitute_parse);
              sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
  
-            /* Put in a closing ']' only if not going off the end, as otherwise
-             * we are adding something that really isn't there */
-            if (RExC_parse < RExC_end) {
+            /* Put in a closing ']' to match any opening one, but not if going
+             * off the end, as otherwise we are adding something that really
+             * isn't there */
+            if (has_l_bracket && RExC_parse < RExC_end) {
                  sv_catpvs(substitute_parse, "]");
              }
          }
@@ -19909,14 +19990,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
      }
  }
  
-#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
  SV *
-Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
-                                        const regnode* node,
-                                        bool doinit,
-                                        SV** listsvp,
-                                        SV** only_utf8_locale_ptr,
-                                        SV** output_invlist)
+
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+Perl_get_regclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#else
+Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV** only_utf8_locale_ptr, SV** output_invlist)
+#endif
  
  {
      /* For internal core use only.
@@ -19952,7 +20032,11 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
      RXi_GET_DECL(prog, progi);
      const struct reg_data * const data = prog ? progi->data : NULL;
  
-    PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+    PERL_ARGS_ASSERT_GET_REGCLASS_NONBITMAP_DATA;
+#else
+    PERL_ARGS_ASSERT_GET_RE_GCLASS_NONBITMAP_DATA;
+#endif
      assert(! output_invlist || listsvp);
  
      if (data && data->count) {
@@ -20155,7 +20239,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
      return invlist;
  }
-#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
  
  /* reg_skipcomment()
  
@@ -20319,7 +20402,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
  
      const regnode_offset ret = RExC_emit;
  
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGNODE_GUTS;
  
@@ -20386,12 +20469,12 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  }
  
  /*
-- regpnode - emit a temporary node with a void* argument
+- regpnode - emit a temporary node with a SV* argument
  */
  STATIC regnode_offset /* Location. */
-S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, void * arg)
+S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg)
  {
-    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regvnode");
+    const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regpnode");
      regnode_offset ptr = ret;
  
      PERL_ARGS_ASSERT_REGPNODE;
@@ -20441,7 +20524,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
      regnode *place;
      const int offset = regarglen[(U8)op];
      const int size = NODE_STEP_REGNODE + offset;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGINSERT;
      PERL_UNUSED_CONTEXT;
@@ -20545,7 +20628,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
                  const U32 depth)
  {
      regnode_offset scan;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGTAIL;
  #ifndef DEBUGGING
@@ -20619,7 +20702,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
  #ifdef EXPERIMENTAL_INPLACESCAN
      I32 min = 0;
  #endif
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGTAIL_STUDY;
  
@@ -20823,7 +20906,7 @@ Perl_regdump(pTHX_ const regexp *r)
      SV * const sv = sv_newmortal();
      SV *dsv= sv_newmortal();
      RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGDUMP;
  
@@ -20965,7 +21048,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      dVAR;
      int k;
      RXi_GET_DECL(prog, progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGPROP;
  
@@ -21167,10 +21250,17 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                              ANYOFRbase(o) + ANYOFRdelta(o));
              }
              else {
-                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                (void) get_regclass_nonbitmap_data(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+#else
+                (void) get_re_gclass_nonbitmap_data(prog, o, FALSE,
                                                  &unresolved,
                                                  &only_utf8_locale_invlist,
                                                  &nonbitmap_invlist);
+#endif
              }
  
              /* The non-bitmap data may contain stuff that could fit in the
@@ -21400,7 +21490,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
       * to match */
  
      struct regexp *const prog = ReANY(r);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_RE_INTUIT_STRING;
      PERL_UNUSED_CONTEXT;
@@ -21450,7 +21540,7 @@ void
  Perl_pregfree2(pTHX_ REGEXP *rx)
  {
      struct regexp *const r = ReANY(rx);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_PREGFREE2;
  
@@ -21614,7 +21704,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
  {
      struct regexp *const r = ReANY(rx);
      RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGFREE_INTERNAL;
  
@@ -22689,7 +22779,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
      const regnode *optstart= NULL;
  
      RXi_GET_DECL(r, ri);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_DUMPUNTIL;
  
@@ -22942,6 +23032,43 @@ Perl_init_uniprops(pTHX)
  #  endif
  }
  
+/* These four functions are compiled only in regcomp.c, where they have access
+ * to the data they return.  They are a way for re_comp.c to get access to that
+ * data without having to compile the whole data structures. */
+
+I16
+Perl_do_uniprop_match(const char * const key, const U16 key_len)
+{
+    PERL_ARGS_ASSERT_DO_UNIPROP_MATCH;
+
+    return match_uniprop((U8 *) key, key_len);
+}
+
+SV *
+Perl_get_prop_definition(pTHX_ const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_DEFINITION;
+
+    /* Create and return the inversion list */
+    return _new_invlist_C_array(uni_prop_ptrs[table_index]);
+}
+
+const char * const *
+Perl_get_prop_values(const int table_index)
+{
+    PERL_ARGS_ASSERT_GET_PROP_VALUES;
+
+    return UNI_prop_value_ptrs[table_index];
+}
+
+const char *
+Perl_get_deprecated_property_msg(const Size_t warning_offset)
+{
+    PERL_ARGS_ASSERT_GET_DEPRECATED_PROPERTY_MSG;
+
+    return deprecated_property_msgs[warning_offset];
+}
+
  #  if 0
  
  This code was mainly added for backcompat to give a warning for non-portable
@@ -22976,11 +23103,21 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
  #endif /* end of ! PERL_IN_XSUB_RE */
  
  STATIC REGEXP *
-S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
+S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len,
                           const bool ignore_case)
  {
+    /* Pretends that the input subpattern is qr/subpattern/aam, compiling it
+     * possibly with /i if the 'ignore_case' parameter is true.  Use /aa
+     * because nothing outside of ASCII will match.  Use /m because the input
+     * string may be a bunch of lines strung together.
+     *
+     * Also sets up the debugging info */
+
      U32 flags = PMf_MULTILINE|PMf_WILDCARD;
+    U32 rx_flags;
+    SV * subpattern_sv = sv_2mortal(newSVpvn(subpattern, len));
      REGEXP * subpattern_re;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_COMPILE_WILDCARD;
  
@@ -22989,10 +23126,49 @@ S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
      }
      set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
  
-    subpattern_re = re_op_compile_wrapper(sv_2mortal(newSVpvn(name, len)),
-                                        /* Like in op.c, we copy the compile
-                                         * time pm flags to the rx ones */
-                                        (flags & RXf_PMf_COMPILETIME), flags);
+    /* Like in op.c, we copy the compile time pm flags to the rx ones */
+    rx_flags = flags & RXf_PMf_COMPILETIME;
+
+#ifndef PERL_IN_XSUB_RE
+    /* Use the core engine if this file is regcomp.c.  That means no
+     * 'use re "Debug ..." is in effect, so the core engine is sufficient */
+    subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                             &PL_core_reg_engine,
+                                             NULL, NULL,
+                                             rx_flags, flags);
+#else
+    if (isDEBUG_WILDCARD) {
+        /* Use the special debugging engine if this file is re_comp.c and wants
+         * to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &my_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+    }
+    else {
+        /* Use the special wildcard engine if this file is re_comp.c and
+         * doesn't want to output the wildcard matching.  This uses whatever
+         * 'use re "Debug ..." is in effect for compilation, but this engine
+         * structure has been set up so that it uses the core engine for
+         * execution, so no execution debugging as a result of re.pm will be
+         * displayed. */
+        subpattern_re = Perl_re_op_compile(aTHX_ &subpattern_sv, 1, NULL,
+                                                 &wild_reg_engine,
+                                                 NULL, NULL,
+                                                 rx_flags, flags);
+        /* XXX The above has the effect that any user-supplied regex engine
+         * won't be called for matching wildcards.  That might be good, or bad.
+         * It could be changed in several ways.  The reason it is done the
+         * current way is to avoid having to save and restore
+         * ^{^RE_DEBUG_FLAGS} around the execution.  save_scalar() perhaps
+         * could be used.  Another suggestion is to keep the authoritative
+         * value of the debug flags in a thread-local variable and add set/get
+         * magic to ${^RE_DEBUG_FLAGS} to keep the C level variable up to date.
+         * Still another is to pass a flag, say in the engine's intflags that
+         * would be checked each time before doing the debug output */
+    }
+#endif
  
      assert(subpattern_re);  /* Should have died if didn't compile successfully */
      return subpattern_re;
@@ -23003,18 +23179,31 @@ S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
          char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
  {
      I32 result;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
  
-    result = pregexec(prog, stringarg, strend, strbeg, minend, screamer, nosave);
+    ENTER;
+
+    /* The compilation has set things up so that if the program doesn't want to
+     * see the wildcard matching procedure, it will get the core execution
+     * engine, which is subject only to -Dr.  So we have to turn that off
+     * around this procedure */
+    if (! isDEBUG_WILDCARD) {
+        /* Note! Casts away 'volatile' */
+        SAVEI32(PL_debug);
+        PL_debug &= ~ DEBUG_r_FLAG;
+    }
+
+    result = CALLREGEXEC(prog, stringarg, strend, strbeg, minend, screamer,
+                         NULL, nosave);
+    LEAVE;
  
      return result;
  }
  
-#ifndef PERL_IN_XSUB_RE
-
  SV *
-Perl_handle_user_defined_property(pTHX_
+S_handle_user_defined_property(pTHX_
  
      /* Parses the contents of a user-defined property definition; returning the
       * expanded definition if possible.  If so, the return is an inversion
@@ -23229,6 +23418,7 @@ Perl_handle_user_defined_property(pTHX_
          this_definition = parse_uniprop_string(s0, s - s0,
                                                 is_utf8, to_fold, runtime,
                                                 deferrable,
+                                               NULL,
                                                 user_defined_ptr, msg,
                                                 (name_len == 0)
                                                  ? level /* Don't increase level
@@ -23388,8 +23578,8 @@ S_get_fq_name(pTHX_
      return fq_name;
  }
  
-SV *
-Perl_parse_uniprop_string(pTHX_
+STATIC SV *
+S_parse_uniprop_string(pTHX_
  
      /* Parse the interior of a \p{}, \P{}.  Returns its definition if knowable
       * now.  If so, the return is an inversion list.
@@ -23418,6 +23608,8 @@ Perl_parse_uniprop_string(pTHX_
      const bool runtime,         /* TRUE if this is being called at run time */
      const bool deferrable,      /* TRUE if it's ok for the definition to not be
                                     known at this call */
+    AV ** strings,              /* To return string property values, like named
+                                   sequences */
      bool *user_defined_ptr,     /* Upon return from this function it will be
                                     set to TRUE if any component is a
                                     user-defined property */
@@ -23603,16 +23795,19 @@ Perl_parse_uniprop_string(pTHX_
                   * but it must be punctuation */
              && (name[i] != '\\' || (i < name_len && isPUNCT_A(name[i+1]))))
          {
-            /* Find the property.  The table includes the equals sign, so we
-             * use 'j' as-is */
-            table_index = match_uniprop((U8 *) lookup_name, j);
-            if (table_index) {
-                const char * const * prop_values
-                                            = UNI_prop_value_ptrs[table_index];
+            bool special_property = memEQs(lookup_name, j - 1, "name")
+                                 || memEQs(lookup_name, j - 1, "na");
+            if (! special_property) {
+                /* Find the property.  The table includes the equals sign, so
+                 * we use 'j' as-is */
+                table_index = do_uniprop_match(lookup_name, j);
+            }
+            if (special_property || table_index) {
                  REGEXP * subpattern_re;
                  char open = name[i++];
                  char close;
                  const char * pos_in_brackets;
+                const char * const * prop_values;
                  bool escaped = 0;
  
                  /* Backslash => delimitter is the character following.  We
@@ -23646,12 +23841,39 @@ Perl_parse_uniprop_string(pTHX_
                      packWARN(WARN_EXPERIMENTAL__UNIPROP_WILDCARDS),
                      "The Unicode property wildcards feature is experimental");
  
-                /* Now create and compile the wildcard subpattern.  Use /iaa
-                 * because nothing outside of ASCII will match, and it the
-                 * property values should all match /i.  Note that when the
-                 * pattern fails to compile, our added text to the user's
-                 * pattern will be displayed to the user, which is not so
-                 * desirable. */
+                if (special_property) {
+                    const char * error_msg;
+                    const char * revised_name = name + i;
+                    Size_t revised_name_len = name_len - (i + 1 + escaped);
+
+                    /* Currently, the only 'special_property' is name, which we
+                     * lookup in _charnames.pm */
+
+                    if (! load_charnames(newSVpvs("placeholder"),
+                                         revised_name, revised_name_len,
+                                         &error_msg))
+                    {
+                        sv_catpv(msg, error_msg);
+                        goto append_name_to_msg;
+                    }
+
+                    /* Farm this out to a function just to make the current
+                     * function less unwieldy */
+                    if (handle_names_wildcard(revised_name, revised_name_len,
+                                              &prop_definition,
+                                              strings))
+                    {
+                        return prop_definition;
+                    }
+
+                    goto failed;
+                }
+
+                prop_values = get_prop_values(table_index);
+
+                /* Now create and compile the wildcard subpattern.  Use /i
+                 * because the property values are supposed to match with case
+                 * ignored. */
                  subpattern_re = compile_wildcard(name + i,
                                                   name_len - i - 1 - escaped,
                                                   TRUE /* /i */
@@ -23688,6 +23910,7 @@ Perl_parse_uniprop_string(pTHX_
                                                             to_fold,
                                                             runtime,
                                                             deferrable,
+                                                           NULL,
                                                             user_defined_ptr,
                                                             msg,
                                                             level + 1);
@@ -23710,31 +23933,20 @@ Perl_parse_uniprop_string(pTHX_
              }
  
              /* Here's how khw thinks we should proceed to handle the properties
-             * not yet done:    Bidi Mirroring Glyph
-                                Bidi Paired Bracket
+             * not yet done:    Bidi Mirroring Glyph        can map to ""
+                                Bidi Paired Bracket         can map to ""
                                  Case Folding  (both full and simple)
+                                            Shouldn't /i be good enough for Full
                                  Decomposition Mapping
-                                Equivalent Unified Ideograph
-                                Name
-                                Name Alias
+                                Equivalent Unified Ideograph    can map to ""
                                  Lowercase Mapping  (both full and simple)
-                                NFKC Case Fold
+                                NFKC Case Fold                  can map to ""
                                  Titlecase Mapping  (both full and simple)
                                  Uppercase Mapping  (both full and simple)
-             * Move the part that looks at the property values into a perl
-             * script, like utf8_heavy.pl was done.  This makes things somewhat
-             * easier, but most importantly, it avoids always adding all these
-             * strings to the memory usage when the feature is little-used.
-             *
-             * The property values would all be concatenated into a single
-             * string per property with each value on a separate line, and the
-             * code point it's for on alternating lines.  Then we match the
-             * user's input pattern m//mg, without having to worry about their
-             * uses of '^' and '$'.  Only the values that aren't the default
-             * would be in the strings.  Code points would be in UTF-8.  The
-             * search pattern that we would construct would look like
-             * (?: \n (code-point_re) \n (?aam: user-re ) \n )
-             * And so $1 would contain the code point that matched the user-re.
+             * Handle these the same way Name is done, using say, _wild.pm, but
+             * having both loose and full, like in charclass_invlists.h.
+             * Perhaps move block and script to that as they are somewhat large
+             * in charclass_invlists.h.
               * For properties where the default is the code point itself, such
               * as any of the case changing mappings, the string would otherwise
               * consist of all Unicode code points in UTF-8 strung together.
@@ -23743,16 +23955,8 @@ Perl_parse_uniprop_string(pTHX_
               * error.  Otherwise run the pattern against every code point in
               * the ssc.  The ssc is kind of like tr18's 3.9 Possible Match Sets
               * And it might be good to create an API to return the ssc.
-             *
-             * For the name properties, a new function could be created in
-             * charnames which essentially does the same thing as above,
-             * sharing Name.pl with the other charname functions.  Don't know
-             * about loose name matching, or algorithmically determined names.
-             * Decomposition.pl similarly.
-             *
-             * It might be that a new pattern modifier would have to be
-             * created, like /t for resTricTed, which changed the behavior of
-             * some constructs in their subpattern, like \A. */
+             * Or handle them like the algorithmic names are done
+             */
          } /* End of is a wildcard subppattern */
  
          /* \p{name=...} is handled specially.  Instead of using the normal
@@ -23811,7 +24015,7 @@ Perl_parse_uniprop_string(pTHX_
                         "panic: Can't find '_charnames::_loose_regcomp_lookup");
              }
  
-            PUSHSTACKi(PERLSI_OVERLOAD);
+            PUSHSTACKi(PERLSI_REGCOMP);
              ENTER ;
              SAVETMPS;
              save_re_context();
@@ -23836,11 +24040,36 @@ Perl_parse_uniprop_string(pTHX_
              }
  
              cp = valid_utf8_to_uvchr((U8 *) SvPVX(character), &character_len);
-            if (character_len < SvCUR(character)) {
-                goto failed;
+            if (character_len == SvCUR(character)) {
+                prop_definition = add_cp_to_invlist(NULL, cp);
+            }
+            else {
+                AV * this_string;
+
+                /* First of the remaining characters in the string. */
+                char * remaining = SvPVX(character) + character_len;
+
+                if (strings == NULL) {
+                    goto failed;    /* XXX Perhaps a specific msg instead, like
+                                       'not available here' */
+                }
+
+                if (*strings == NULL) {
+                    *strings = newAV();
+                }
+
+                this_string = newAV();
+                av_push(this_string, newSVuv(cp));
+
+                do {
+                    cp = valid_utf8_to_uvchr((U8 *) remaining, &character_len);
+                    av_push(this_string, newSVuv(cp));
+                    remaining += character_len;
+                } while (remaining < SvEND(character));
+
+                av_push(*strings, (SV *) this_string);
              }
  
-            prop_definition = add_cp_to_invlist(NULL, cp);
              return prop_definition;
          }
  
@@ -24250,7 +24479,7 @@ Perl_parse_uniprop_string(pTHX_
               * for this property in the hash.  So we have the go ahead to
               * expand the definition ourselves. */
  
-            PUSHSTACKi(PERLSI_MAGIC);
+            PUSHSTACKi(PERLSI_REGCOMP);
              ENTER;
  
              /* Create a temporary placeholder in the hash to detect recursion
@@ -24401,7 +24630,7 @@ Perl_parse_uniprop_string(pTHX_
  
      /* Get the index into our pointer table of the inversion list corresponding
       * to the property */
-    table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+    table_index = do_uniprop_match(lookup_name, lookup_len);
  
      /* If it didn't find the property ... */
      if (table_index == 0) {
@@ -24416,7 +24645,7 @@ Perl_parse_uniprop_string(pTHX_
              equals_pos -= 2;
              slash_pos -= 2;
  
-            table_index = match_uniprop((U8 *) lookup_name, lookup_len);
+            table_index = do_uniprop_match(lookup_name, lookup_len);
          }
  
          if (table_index == 0) {
@@ -24580,7 +24809,7 @@ Perl_parse_uniprop_string(pTHX_
              }
  
              /* Here, we have the number in canonical form.  Try that */
-            table_index = match_uniprop((U8 *) canonical, strlen(canonical));
+            table_index = do_uniprop_match(canonical, strlen(canonical));
              if (table_index == 0) {
                  goto failed;
              }
@@ -24604,7 +24833,8 @@ Perl_parse_uniprop_string(pTHX_
          table_index %= MAX_UNI_KEYWORD_INDEX;
          Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
                  "Use of '%.*s' in \\p{} or \\P{} is deprecated because: %s",
-                (int) name_len, name, deprecated_property_msgs[warning_offset]);
+                (int) name_len, name,
+                get_deprecated_property_msg(warning_offset));
      }
  
      /* In a few properties, a different property is used under /i.  These are
@@ -24632,10 +24862,9 @@ Perl_parse_uniprop_string(pTHX_
      }
  
      /* Create and return the inversion list */
-    prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+    prop_definition = get_prop_definition(table_index);
      sv_2mortal(prop_definition);
  
-
      /* See if there is a private use override to add to this definition */
      {
          COPHH * hinthash = (IN_PERL_COMPILETIME)
@@ -24755,7 +24984,431 @@ Perl_parse_uniprop_string(pTHX_
      }
  }
  
-#endif /* end of ! PERL_IN_XSUB_RE */
+STATIC bool
+S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
+                              const STRLEN wname_len, /* Its length */
+                              SV ** prop_definition,
+                              AV ** strings)
+{
+    /* Deal with Name property wildcard subpatterns; returns TRUE if there were
+     * any matches, adding them to prop_definition */
+
+    dSP;
+
+    CV * get_names_info;        /* entry to charnames.pm to get info we need */
+    SV * names_string;          /* Contains all character names, except algo */
+    SV * algorithmic_names;     /* Contains info about algorithmically
+                                   generated character names */
+    REGEXP * subpattern_re;     /* The user's pattern to match with */
+    struct regexp * prog;       /* The compiled pattern */
+    char * all_names_start;     /* lib/unicore/Name.pl string of every
+                                   (non-algorithmic) character name */
+    char * cur_pos;             /* We match, effectively using /gc; this is
+                                   where we are now */
+    bool found_matches = FALSE; /* Did any name match so far? */
+    SV * empty;                 /* For matching zero length names */
+    SV * must;                  /* What substring, if any, must be in a name
+                                   for the subpattern to match */
+    SV * syllable_name = NULL;  /* For Hangul syllables */
+    const char hangul_prefix[] = "HANGUL SYLLABLE ";
+    const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
+
+    /* By inspection, there are a maximum of 7 bytes in the suffix of a hangul
+     * syllable name, and these are immutable and guaranteed by the Unicode
+     * standard to never be extended */
+    const STRLEN syl_max_len = hangul_prefix_len + 7;
+
+    IV i;
+
+    PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD;
+
+    /* Make sure _charnames is loaded.  (The parameters give context
+     * for any errors generated */
+    get_names_info = get_cv("_charnames::_get_names_info", 0);
+    if (! get_names_info) {
+        Perl_croak(aTHX_ "panic: Can't find '_charnames::_get_names_info");
+    }
+
+    /* Get the charnames data */
+    PUSHSTACKi(PERLSI_REGCOMP);
+    ENTER ;
+    SAVETMPS;
+    save_re_context();
+
+    PUSHMARK(SP) ;
+    PUTBACK;
+
+    /* Special _charnames entry point that returns the info this routine
+     * requires */
+    call_sv(MUTABLE_SV(get_names_info), G_ARRAY);
+
+    SPAGAIN ;
+
+    /* Data structure for names which end in their very own code points */
+    algorithmic_names = POPs;
+    SvREFCNT_inc_simple_void_NN(algorithmic_names);
+
+    /* The lib/unicore/Name.pl string */
+    names_string = POPs;
+    SvREFCNT_inc_simple_void_NN(names_string);
+
+    PUTBACK ;
+    FREETMPS ;
+    LEAVE ;
+    POPSTACK;
+
+    if (   ! SvROK(names_string)
+        || ! SvROK(algorithmic_names))
+    {   /* Perhaps should panic instead XXX */
+        SvREFCNT_dec(names_string);
+        SvREFCNT_dec(algorithmic_names);
+        return FALSE;
+    }
+
+    names_string = sv_2mortal(SvRV(names_string));
+    all_names_start = SvPVX(names_string);
+    cur_pos = all_names_start;
+
+    algorithmic_names= sv_2mortal(SvRV(algorithmic_names));
+
+    /* Compile the subpattern consisting of the name being looked for */
+    subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
+    must = re_intuit_string(subpattern_re);
+    prog = ReANY(subpattern_re);
+
+    /* If only nothing is matched, skip to where empty names are looked for */
+    if (prog->maxlen == 0) {
+        goto check_empty;
+    }
+
+    /* And match against the string of all names /gc.  Don't even try if it
+     * must match a character not found in any name. */
+    if ( ! must
+        || SvCUR(must) == 0
+        || strspn(SvPVX(must), "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()")
+                                                              == SvCUR(must))
+    {
+        while (execute_wildcard(subpattern_re,
+                                cur_pos,
+                                SvEND(names_string),
+                                all_names_start, 0,
+                                names_string,
+                                0))
+        { /* Here, matched. */
+
+            /* Note the string entries look like
+             *      00001\nSTART OF HEADING\n\n
+             * so we could match anywhere in that string.  We have to rule out
+             * matching a code point line */
+            char * this_name_start = all_names_start
+                                                + RX_OFFS(subpattern_re)->start;
+            char * this_name_end   = all_names_start
+                                                + RX_OFFS(subpattern_re)->end;
+            char * cp_start;
+            char * cp_end;
+            UV cp = 0;      /* Silences some compilers */
+            AV * this_string = NULL;
+            bool is_multi = FALSE;
+
+            /* If matched nothing, advance to next possible match */
+            if (this_name_start == this_name_end) {
+                cur_pos = (char *) memchr(this_name_end + 1, '\n',
+                                          SvEND(names_string) - this_name_end);
+                if (cur_pos == NULL) {
+                    break;
+                }
+            }
+            else {
+                /* Position the next match to start beyond the current returned
+                 * entry */
+                cur_pos = (char *) memchr(this_name_end, '\n',
+                                          SvEND(names_string) - this_name_end);
+            }
+
+            /* Back up to the \n just before the beginning of the character. */
+            cp_end = (char *) my_memrchr(all_names_start,
+                                         '\n',
+                                         this_name_start - all_names_start);
+
+            /* If we didn't find a \n, it means it matched somewhere in the
+             * initial '00000' in the string, so isn't a real match */
+            if (cp_end == NULL) {
+                continue;
+            }
+
+            this_name_start = cp_end + 1;   /* The name starts just after */
+            cp_end--;                       /* the \n, and the code point */
+                                            /* ends just before it */
+
+            /* All code points are 5 digits long */
+            cp_start = cp_end - 4;
+
+            /* This shouldn't happen, as we found a \n, and the first \n is
+             * further along than what we subtracted */
+            assert(cp_start >= all_names_start);
+
+            if (cp_start == all_names_start) {
+                *prop_definition = add_cp_to_invlist(*prop_definition, 0);
+                continue;
+            }
+
+            /* If the character is a blank, we either have a named sequence, or
+             * something is wrong */
+            if (*(cp_start - 1) == ' ') {
+                cp_start = (char *) my_memrchr(all_names_start,
+                                               '\n',
+                                               cp_start - all_names_start);
+                cp_start++;
+            }
+
+            assert(cp_start != NULL && cp_start >= all_names_start + 2);
+
+            /* Except for the first line in the string, the sequence before the
+             * code point is \n\n.  If that isn't the case here, we didn't
+             * match the name of a character.  (We could have matched a named
+             * sequence, not currently handled */
+            if (*(cp_start - 1) != '\n' || *(cp_start - 2) != '\n') {
+                continue;
+            }
+
+            /* We matched!  Add this to the list */
+            found_matches = TRUE;
+
+            /* Loop through all the code points in the sequence */
+            while (cp_start < cp_end) {
+
+                /* Calculate this code point from its 5 digits */
+                cp = (XDIGIT_VALUE(cp_start[0]) << 16)
+                   + (XDIGIT_VALUE(cp_start[1]) << 12)
+                   + (XDIGIT_VALUE(cp_start[2]) << 8)
+                   + (XDIGIT_VALUE(cp_start[3]) << 4)
+                   +  XDIGIT_VALUE(cp_start[4]);
+
+                cp_start += 6;  /* Go past any blank */
+
+                if (cp_start < cp_end || is_multi) {
+                    if (this_string == NULL) {
+                        this_string = newAV();
+                    }
+
+                    is_multi = TRUE;
+                    av_push(this_string, newSVuv(cp));
+                }
+            }
+
+            if (is_multi) { /* Was more than one code point */
+                if (*strings == NULL) {
+                    *strings = newAV();
+                }
+
+                av_push(*strings, (SV *) this_string);
+            }
+            else {  /* Only a single code point */
+                *prop_definition = add_cp_to_invlist(*prop_definition, cp);
+            }
+        } /* End of loop through the non-algorithmic names string */
+    }
+
+    /* There are also character names not in 'names_string'.  These are
+     * algorithmically generatable.  Try this pattern on each possible one.
+     * (khw originally planned to leave this out given the large number of
+     * matches attempted; but the speed turned out to be quite acceptable
+     *
+     * There are plenty of opportunities to optimize to skip many of the tests.
+     * beyond the rudimentary ones already here */
+
+    /* First see if the subpattern matches any of the algorithmic generatable
+     * Hangul syllable names.
+     *
+     * We know none of these syllable names will match if the input pattern
+     * requires more bytes than any syllable has, or if the input pattern only
+     * matches an empty name, or if the pattern has something it must match and
+     * one of the characters in that isn't in any Hangul syllable. */
+    if (    prog->minlen <= (SSize_t) syl_max_len
+        &&  prog->maxlen > 0
+        && ( ! must
+            || SvCUR(must) == 0
+            || strspn(SvPVX(must), "\n ABCDEGHIJKLMNOPRSTUWY") == SvCUR(must)))
+    {
+        /* These constants, names, values, and algorithm are adapted from the
+         * Unicode standard, version 5.1, section 3.12, and should never
+         * change. */
+        const char * JamoL[] = {
+            "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
+            "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
+        };
+        const int LCount = C_ARRAY_LENGTH(JamoL);
+
+        const char * JamoV[] = {
+            "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
+            "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",
+            "I"
+        };
+        const int VCount = C_ARRAY_LENGTH(JamoV);
+
+        const char * JamoT[] = {
+            "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L",
+            "LG", "LM", "LB", "LS", "LT", "LP", "LH", "M", "B",
+            "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
+        };
+        const int TCount = C_ARRAY_LENGTH(JamoT);
+
+        int L, V, T;
+
+        /* This is the initial Hangul syllable code point; each time through the
+         * inner loop, it maps to the next higher code point.  For more info,
+         * see the Hangul syllable section of the Unicode standard. */
+        int cp = 0xAC00;
+
+        syllable_name = sv_2mortal(newSV(syl_max_len));
+        sv_setpvn(syllable_name, hangul_prefix, hangul_prefix_len);
+
+        for (L = 0; L < LCount; L++) {
+            for (V = 0; V < VCount; V++) {
+                for (T = 0; T < TCount; T++) {
+
+                    /* Truncate back to the prefix, which is unvarying */
+                    SvCUR_set(syllable_name, hangul_prefix_len);
+
+                    sv_catpv(syllable_name, JamoL[L]);
+                    sv_catpv(syllable_name, JamoV[V]);
+                    sv_catpv(syllable_name, JamoT[T]);
+
+                    if (execute_wildcard(subpattern_re,
+                                SvPVX(syllable_name),
+                                SvEND(syllable_name),
+                                SvPVX(syllable_name), 0,
+                                syllable_name,
+                                0))
+                    {
+                        *prop_definition = add_cp_to_invlist(*prop_definition,
+                                                             cp);
+                        found_matches = TRUE;
+                    }
+
+                    cp++;
+                }
+            }
+        }
+    }
+
+    /* The rest of the algorithmically generatable names are of the form
+     * "PREFIX-code_point".  The prefixes and the code point limits of each
+     * were returned to us in the array 'algorithmic_names' from data in
+     * lib/unicore/Name.pm.  'code_point' in the name is expressed in hex. */
+    for (i = 0; i <= av_top_index((AV *) algorithmic_names); i++) {
+        IV j;
+
+        /* Each element of the array is a hash, giving the details for the
+         * series of names it covers.  There is the base name of the characters
+         * in the series, and the low and high code points in the series.  And,
+         * for optimization purposes a string containing all the legal
+         * characters that could possibly be in a name in this series. */
+        HV * this_series = (HV *) SvRV(* av_fetch((AV *) algorithmic_names, i, 0));
+        SV * prefix = * hv_fetchs(this_series, "name", 0);
+        IV low = SvIV(* hv_fetchs(this_series, "low", 0));
+        IV high = SvIV(* hv_fetchs(this_series, "high", 0));
+        char * legal = SvPVX(* hv_fetchs(this_series, "legal", 0));
+
+        /* Pre-allocate an SV with enough space */
+        SV * algo_name = sv_2mortal(Perl_newSVpvf(aTHX_ "%s-0000",
+                                                        SvPVX(prefix)));
+        if (high >= 0x10000) {
+            sv_catpvs(algo_name, "0");
+        }
+
+        /* This series can be skipped entirely if the pattern requires
+         * something longer than any name in the series, or can only match an
+         * empty name, or contains a character not found in any name in the
+         * series */
+        if (    prog->minlen <= (SSize_t) SvCUR(algo_name)
+            &&  prog->maxlen > 0
+            && ( ! must
+                || SvCUR(must) == 0
+                || strspn(SvPVX(must), legal) == SvCUR(must)))
+        {
+            for (j = low; j <= high; j++) { /* For each code point in the series */
+
+                /* Get its name, and see if it matches the subpattern */
+                Perl_sv_setpvf(aTHX_ algo_name, "%s-%X", SvPVX(prefix),
+                                     (unsigned) j);
+
+                if (execute_wildcard(subpattern_re,
+                                    SvPVX(algo_name),
+                                    SvEND(algo_name),
+                                    SvPVX(algo_name), 0,
+                                    algo_name,
+                                    0))
+                {
+                    *prop_definition = add_cp_to_invlist(*prop_definition, j);
+                    found_matches = TRUE;
+                }
+            }
+        }
+    }
+
+  check_empty:
+    /* Finally, see if the subpattern matches an empty string */
+    empty = newSVpvs("");
+    if (execute_wildcard(subpattern_re,
+                         SvPVX(empty),
+                         SvEND(empty),
+                         SvPVX(empty), 0,
+                         empty,
+                         0))
+    {
+        /* Many code points have empty names.  Currently these are the \p{GC=C}
+         * ones, minus CC and CF */
+
+        SV * empty_names_ref = get_prop_definition(UNI_C);
+        SV * empty_names = invlist_clone(empty_names_ref, NULL);
+
+        SV * subtract = get_prop_definition(UNI_CC);
+
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(empty_names_ref);
+        SvREFCNT_dec_NN(subtract);
+
+        subtract = get_prop_definition(UNI_CF);
+        _invlist_subtract(empty_names, subtract, &empty_names);
+        SvREFCNT_dec_NN(subtract);
+
+        _invlist_union(*prop_definition, empty_names, prop_definition);
+        found_matches = TRUE;
+        SvREFCNT_dec_NN(empty_names);
+    }
+    SvREFCNT_dec_NN(empty);
+
+#if 0
+    /* If we ever were to accept aliases for, say private use names, we would
+     * need to do something fancier to find empty names.  The code below works
+     * (at the time it was written), and is slower than the above */
+    const char empties_pat[] = "^.";
+    if (strNE(name, empties_pat)) {
+        SV * empty = newSVpvs("");
+        if (execute_wildcard(subpattern_re,
+                    SvPVX(empty),
+                    SvEND(empty),
+                    SvPVX(empty), 0,
+                    empty,
+                    0))
+        {
+            SV * empties = NULL;
+
+            (void) handle_names_wildcard(empties_pat, strlen(empties_pat), &empties);
+
+            _invlist_union_complement_2nd(*prop_definition, empties, prop_definition);
+            SvREFCNT_dec_NN(empties);
+
+            found_matches = TRUE;
+        }
+        SvREFCNT_dec_NN(empty);
+    }
+#endif
+
+    SvREFCNT_dec_NN(subpattern_re);
+    return found_matches;
+}
  
  /*
   * ex: set ts=8 sts=4 sw=4 et: