remove unused perldelta sections

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index f040a35..5068a0a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -131,6 +131,8 @@ struct RExC_state_t {
      char       *parse;                 /* Input-scan pointer. */
      char        *copy_start;            /* start of copy of input within
                                             constructed parse string */
+    char        *save_copy_start;       /* Provides one level of saving
+                                           and restoring 'copy_start' */
      char        *copy_start_in_input;   /* Position in input string
                                             corresponding to copy_start */
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
@@ -229,6 +231,7 @@ struct RExC_state_t {
  #define RExC_precomp   (pRExC_state->precomp)
  #define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
  #define RExC_copy_start_in_constructed  (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed  (pRExC_state->save_copy_start)
  #define RExC_precomp_end (pRExC_state->precomp_end)
  #define RExC_rx_sv     (pRExC_state->rx_sv)
  #define RExC_rx                (pRExC_state->rx)
@@ -821,8 +824,13 @@ static const scan_data_t zero_scan_data = {
  } STMT_END
  
  /* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
-#define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE                               \
+    STMT_START {                                                            \
+      RExC_save_copy_start_in_constructed  = RExC_copy_start_in_constructed;\
+      RExC_copy_start_in_constructed = NULL;                                \
+    } STMT_END
+#define RESTORE_WARNINGS                                                    \
+    RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
  
  /* Since a warning can be generated multiple times as the input is reparsed, we
   * output it the first time we come to that point in the parse, but suppress it
@@ -1574,7 +1582,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      unsigned int i;
      const U32 n = ARG(node);
      bool new_node_has_latin1 = FALSE;
-    const U8 flags = OP(node) == ANYOFH ? 0 : ANYOF_FLAGS(node);
+    const U8 flags = OP(node) == ANYOFHb ? 0 : ANYOF_FLAGS(node);
  
      PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
  
@@ -1627,7 +1635,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      }
  
      /* Add in the points from the bit map */
-    if (OP(node) != ANYOFH) {
+    if (OP(node) != ANYOFH && OP(node) != ANYOFHb) {
          for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
              if (ANYOF_BITMAP_TEST(node, i)) {
                  unsigned int start = i++;
@@ -1714,7 +1722,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
       * another SSC or a regular ANYOF class.  Can create false positives. */
  
      SV* anded_cp_list;
-    U8  and_with_flags = (OP(and_with) == ANYOFH) ? 0 : ANYOF_FLAGS(and_with);
+    U8  and_with_flags = (OP(and_with) == ANYOFHb) ? 0 : ANYOF_FLAGS(and_with);
      U8  anded_flags;
  
      PERL_ARGS_ASSERT_SSC_AND;
@@ -1898,7 +1906,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
  
      SV* ored_cp_list;
      U8 ored_flags;
-    U8  or_with_flags = (OP(or_with) == ANYOFH) ? 0 : ANYOF_FLAGS(or_with);
+    U8  or_with_flags = (OP(or_with) == ANYOFHb) ? 0 : ANYOF_FLAGS(or_with);
  
      PERL_ARGS_ASSERT_SSC_OR;
  
@@ -5836,6 +5844,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                  case ANYOFL:
                  case ANYOFPOSIXL:
                  case ANYOFH:
+                case ANYOFHb:
                  case ANYOF:
                     if (flags & SCF_DO_STCLASS_AND)
                         ssc_and(pRExC_state, data->start_class,
@@ -7242,7 +7251,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
          const char* name;
  
          name = get_regex_charset_name(RExC_rx->extflags, &len);
-        if strEQ(name, DEPENDS_PAT_MODS) {  /* /d under UTF-8 => /u */
+        if (strEQ(name, DEPENDS_PAT_MODS)) {  /* /d under UTF-8 => /u */
              assert(RExC_utf8);
              name = UNICODE_PAT_MODS;
              len = sizeof(UNICODE_PAT_MODS) - 1;
@@ -10992,14 +11001,14 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
      RExC_sawback = 1;
      ret = reganode(pRExC_state,
                     ((! FOLD)
-                     ? NREF
+                     ? REFN
                       : (ASCII_FOLD_RESTRICTED)
-                       ? NREFFA
+                       ? REFFAN
                         : (AT_LEAST_UNI_SEMANTICS)
-                         ? NREFFU
+                         ? REFFUN
                           : (LOC)
-                           ? NREFFL
-                           : NREFF),
+                           ? REFFLN
+                           : REFFN),
                      num);
      *flagp |= HASWIDTH;
  
@@ -11054,8 +11063,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
      if (!SvIOK(max_open)) {
          sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
      }
-    if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open
-                                         paren */
+    if (depth > 4 * (UV) SvIV(max_open)) { /* We increase depth by 4 for each
+                                              open paren */
          vFAIL("Too many nested open parens");
      }
  
@@ -11840,7 +11849,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
                          RExC_rxi->data->data[num]=(void*)sv_dat;
                          SvREFCNT_inc_simple_void_NN(sv_dat);
                      }
-                    ret = reganode(pRExC_state, NGROUPP, num);
+                    ret = reganode(pRExC_state, GROUPPN, num);
                      goto insert_if_check_paren;
                 }
                 else if (memBEGINs(RExC_parse,
@@ -12883,9 +12892,9 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          value = (U8 *) SvPV(value_sv, value_len);
  
          /* See if the result is one code point vs 0 or multiple */
-        if (value_len > 0 && value_len <= ((SvUTF8(value_sv))
-                                           ? UTF8SKIP(value)
-                                           : 1))
+        if (value_len > 0 && value_len <= (UV) ((SvUTF8(value_sv))
+                                               ? UTF8SKIP(value)
+                                               : 1))
          {
              /* Here, exactly one code point.  If that isn't what is wanted,
               * fail */
@@ -14466,18 +14475,16 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              has_micro_sign = TRUE;
                          }
  
-                        *(s++) = (char) (DEPENDS_SEMANTICS)
-                                        ? toFOLD(ender)
-
-                                          /* Under /u, the fold of any
-                                           * character in the 0-255 range
-                                           * happens to be its lowercase
-                                           * equivalent, except for LATIN SMALL
-                                           * LETTER SHARP S, which was handled
-                                           * above, and the MICRO SIGN, whose
-                                           * fold requires UTF-8 to represent.
-                                           * */
-                                        : toLOWER_L1(ender);
+                        *(s++) = (DEPENDS_SEMANTICS)
+                                 ? (char) toFOLD(ender)
+
+                                   /* Under /u, the fold of any character in
+                                    * the 0-255 range happens to be its
+                                    * lowercase equivalent, except for LATIN
+                                    * SMALL LETTER SHARP S, which was handled
+                                    * above, and the MICRO SIGN, whose fold
+                                    * requires UTF-8 to represent.  */
+                                 : (char) toLOWER_L1(ender);
                      }
                 } /* End of adding current character to the node */
  
@@ -14650,8 +14657,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            loopdone:   /* Jumped to when encounters something that shouldn't be
                           in the node */
  
-            /* Free up any over-allocated space */
-            change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+            /* Free up any over-allocated space; cast is to silence bogus
+             * warning in MS VC */
+            change_engine_size(pRExC_state,
+                                - (Ptrdiff_t) (initial_size - STR_SZ(len)));
  
              /* I (khw) don't know if you can get here with zero length, but the
               * old code handled this situation by creating a zero-length EXACT
@@ -14727,7 +14736,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  RExC_emit += STR_SZ(len);
  
                  /* If the node isn't a single character, it can't be SIMPLE */
-                if (len > ((UTF) ? UVCHR_SKIP(ender) : 1)) {
+                if (len > (Size_t) ((UTF) ? UVCHR_SKIP(ender) : 1)) {
                      maybe_SIMPLE = 0;
                  }
  
@@ -14779,7 +14788,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
      assert(PL_regkind[OP(node)] == ANYOF);
  
      /* There is no bitmap for this node type */
-    if (OP(node) == ANYOFH) {
+    if (OP(node) == ANYOFH || OP(node) == ANYOFHb) {
          return;
      }
  
@@ -15952,8 +15961,7 @@ redo_curchar:
                                FALSE, /* Require return to be an ANYOF */
                                &current))
                  {
-                    FAIL2("panic: regclass returned failure to handle_sets, "
-                          "flags=%#" UVxf, (UV) *flagp);
+                    goto regclass_failed;
                  }
  
                  /* regclass() will return with parsing just the \ sequence,
@@ -15989,8 +15997,7 @@ redo_curchar:
                                  FALSE, /* Require return to be an ANYOF */
                                  &current))
                  {
-                    FAIL2("panic: regclass returned failure to handle_sets, "
-                          "flags=%#" UVxf, (UV) *flagp);
+                    goto regclass_failed;
                  }
  
                  if (! current) {
@@ -16351,8 +16358,7 @@ redo_curchar:
      }
  
      if (!node)
-        FAIL2("panic: regclass returned failure to handle_sets, flags=%#" UVxf,
-                    PTR2UV(flagp));
+        goto regclass_failed;
  
      /* Fix up the node type if we are in locale.  (We have pretended we are
       * under /u for the purposes of regclass(), as this construct will only
@@ -16383,6 +16389,10 @@ redo_curchar:
      nextchar(pRExC_state);
      Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
      return node;
+
+  regclass_failed:
+    FAIL2("panic: regclass returned failure to handle_sets, " "flags=%#" UVxf,
+                                                                (UV) *flagp);
  }
  
  #ifdef ENABLE_REGEX_SETS_DEBUGGING
@@ -16633,7 +16643,7 @@ STATIC regnode_offset
  S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   const bool stop_at_1,  /* Just parse the next thing, don't
                                             look for a full character class */
-                 bool allow_multi_folds,
+                 bool allow_mutiple_chars,
                   const bool silence_non_portable,   /* Don't output warnings
                                                         about too large
                                                         characters */
@@ -16788,7 +16798,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
      || (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0          \
                                     && UNICODE_DOT_DOT_VERSION == 0)
-    allow_multi_folds = FALSE;
+    allow_mutiple_chars = FALSE;
  #endif
  
      /* We include the /i status at the beginning of this so that we can
@@ -16804,7 +16814,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      if (UCHARAT(RExC_parse) == '^') {  /* Complement the class */
         RExC_parse++;
          invert = TRUE;
-        allow_multi_folds = FALSE;
+        allow_mutiple_chars = FALSE;
          MARK_NAUGHTY(1);
          SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
      }
@@ -16996,7 +17006,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          else { /* cp_count > 1 */
                              assert(cp_count > 1);
                              if (! RExC_in_multi_char_class) {
-                                if (invert || range || *RExC_parse == '-') {
+                                if ( ! allow_mutiple_chars
+                                    || invert
+                                    || range
+                                    || *RExC_parse == '-')
+                                {
                                      if (strict) {
                                          RExC_parse--;
                                          vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
@@ -17328,40 +17342,60 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              ) {
                  SV* scratch_list = NULL;
  
-                /* What the Posix classes (like \w, [:space:]) match in locale
-                 * isn't knowable under locale until actual match time.  A
+                /* What the Posix classes (like \w, [:space:]) match isn't
+                 * generally knowable under locale until actual match time.  A
                   * special node is used for these which has extra space for a
                   * bitmap, with a bit reserved for each named class that is to
-                 * be matched against.  This isn't needed for \p{} and
+                 * be matched against.  (This isn't needed for \p{} and
                   * pseudo-classes, as they are not affected by locale, and
-                 * hence are dealt with separately */
-                POSIXL_SET(posixl, namedclass);
-                has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
-                anyof_flags |= ANYOF_MATCHES_POSIXL;
-
-                /* The above-Latin1 characters are not subject to locale rules.
-                 * Just add them to the unconditionally-matched list */
-
-                /* Get the list of the above-Latin1 code points this matches */
-                _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
-                                        PL_XPosix_ptrs[classnum],
-
-                                        /* Odd numbers are complements, like
-                                        * NDIGIT, NASCII, ... */
-                                        namedclass % 2 != 0,
-                                        &scratch_list);
-                /* Checking if 'cp_list' is NULL first saves an extra clone.
-                 * Its reference count will be decremented at the next union,
-                 * etc, or if this is the only instance, at the end of the
-                 * routine */
-                if (! cp_list) {
-                    cp_list = scratch_list;
-                }
-                else {
-                    _invlist_union(cp_list, scratch_list, &cp_list);
-                    SvREFCNT_dec_NN(scratch_list);
+                 * hence are dealt with separately.)  However, if a named class
+                 * and its complement are both present, then it matches
+                 * everything, and there is no runtime dependency.  Odd numbers
+                 * are the complements of the next lower number, so xor works.
+                 * (Note that something like [\w\D] should match everything,
+                 * because \d should be a proper subset of \w.  But rather than
+                 * trust that the locale is well behaved, we leave this to
+                 * runtime to sort out) */
+                if (POSIXL_TEST(posixl, namedclass ^ 1)) {
+                    cp_list = _add_range_to_invlist(cp_list, 0, UV_MAX);
+                    POSIXL_ZERO(posixl);
+                    has_runtime_dependency &= ~HAS_L_RUNTIME_DEPENDENCY;
+                    anyof_flags &= ~ANYOF_MATCHES_POSIXL;
+                    continue;   /* We could ignore the rest of the class, but
+                                   best to parse it for any errors */
+                }
+                else { /* Here, isn't the complement of any already parsed
+                          class */
+                    POSIXL_SET(posixl, namedclass);
+                    has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
+                    anyof_flags |= ANYOF_MATCHES_POSIXL;
+
+                    /* The above-Latin1 characters are not subject to locale
+                     * rules.  Just add them to the unconditionally-matched
+                     * list */
+
+                    /* Get the list of the above-Latin1 code points this
+                     * matches */
+                    _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+                                            PL_XPosix_ptrs[classnum],
+
+                                            /* Odd numbers are complements,
+                                             * like NDIGIT, NASCII, ... */
+                                            namedclass % 2 != 0,
+                                            &scratch_list);
+                    /* Checking if 'cp_list' is NULL first saves an extra
+                     * clone.  Its reference count will be decremented at the
+                     * next union, etc, or if this is the only instance, at the
+                     * end of the routine */
+                    if (! cp_list) {
+                        cp_list = scratch_list;
+                    }
+                    else {
+                        _invlist_union(cp_list, scratch_list, &cp_list);
+                        SvREFCNT_dec_NN(scratch_list);
+                    }
+                    continue;   /* Go get next character */
                  }
-                continue;   /* Go get next character */
              }
              else {
  
@@ -17523,7 +17557,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           *  "ss"  =~ /^[^\xDF]+$/i => N
           *
           * See [perl #89750] */
-        if (FOLD && allow_multi_folds && value == prevvalue) {
+        if (FOLD && allow_mutiple_chars && value == prevvalue) {
              if (    value == LATIN_SMALL_LETTER_SHARP_S
                  || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
                                                          value)))
@@ -17697,7 +17731,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             literal
                          );
                  }
-                else if isMNEMONIC_CNTRL(value) {
+                else if (isMNEMONIC_CNTRL(value)) {
                      vWARN4(RExC_parse,
                             "\"%.*s\" is more clearly written simply as \"%s\"",
                             (int) (RExC_parse - rangebegin),
@@ -18742,7 +18776,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      RExC_emit += 1 + STR_SZ(len);
                      STR_LEN(REGNODE_p(ret)) = len;
                      if (len == 1) {
-                        *STRING(REGNODE_p(ret)) = value;
+                        *STRING(REGNODE_p(ret)) = (U8) value;
                      }
                      else {
                          uvchr_to_utf8((U8 *) STRING(REGNODE_p(ret)), value);
@@ -18995,40 +19029,39 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              /* If the lowest and highest code point in the class have the same
               * UTF-8 first byte, then all do, and we can store that byte for
               * regexec.c to use so that it can more quickly scan the target
-             * string for potential matches for this class.  We co-opt the the
-             * flags field for this.  Zero means, they don't have the same
-             * first byte.  We do accept here very large code points (for
-             * future use), but don't bother with this optimization for them,
-             * as it would cause other complications */
-            if (highest_cp > IV_MAX) {
-                anyof_flags = 0;
-            }
-            else {
+             * string for potential matches for this class.  We co-opt the
+             * flags field for this, and make the node ANYOFb.  We do accept
+             * here very large code points (for future use), but don't do
+             * this optimization for them, as it would cause other
+             * complications */
+            op = ANYOFH;
+            if (highest_cp <= IV_MAX) {
                  U8 low_utf8[UTF8_MAXBYTES+1];
                  U8 high_utf8[UTF8_MAXBYTES+1];
  
                  (void) uvchr_to_utf8(low_utf8, start[0]);
                  (void) uvchr_to_utf8(high_utf8, invlist_highest(cp_list));
  
-                anyof_flags = (low_utf8[0] == high_utf8[0])
-                            ? low_utf8[0]
-                            : 0;
+                if (low_utf8[0] == high_utf8[0]) {
+                    anyof_flags = low_utf8[0];
+                    op = ANYOFHb;
+                }
              }
  
-            op = ANYOFH;
+            goto done_finding_op;
          }
      }   /* End of seeing if can optimize it into a different node */
  
    is_anyof: /* It's going to be an ANYOF node. */
-    if (op != ANYOFH) {
-        op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
-             ? ANYOFD
-             : ((posixl)
-                ? ANYOFPOSIXL
-                : ((LOC)
-                   ? ANYOFL
-                   : ANYOF));
-    }
+    op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
+         ? ANYOFD
+         : ((posixl)
+            ? ANYOFPOSIXL
+            : ((LOC)
+               ? ANYOFL
+               : ANYOF));
+
+  done_finding_op:
  
      ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
      FILL_NODE(ret, op);        /* We set the argument later */
@@ -19780,7 +19813,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
      }
  
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
-        assert(val - scan <= U32_MAX);
+        assert((UV) (val - scan) <= U32_MAX);
          ARG_SET(REGNODE_p(scan), val - scan);
      }
      else {
@@ -19892,7 +19925,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
          );
      });
      if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
-        assert(val - scan <= U32_MAX);
+        assert((UV) (val - scan) <= U32_MAX);
         ARG_SET(REGNODE_p(scan), val - scan);
      }
      else {
@@ -20261,7 +20294,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              name_list= RExC_paren_name_list;
          }
          if (name_list) {
-            if ( k != REF || (OP(o) < NREF)) {
+            if ( k != REF || (OP(o) < REFN)) {
                  SV **name= av_fetch(name_list, parno, 0 );
                 if (name)
                     Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
@@ -20315,7 +20348,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* 2: embedded, otherwise 1 */
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
      else if (k == ANYOF) {
-       const U8 flags = (OP(o) == ANYOFH) ? 0 : ANYOF_FLAGS(o);
+       const U8 flags = (OP(o) == ANYOFHb) ? 0 : ANYOF_FLAGS(o);
          bool do_sep = FALSE;    /* Do we need to separate various components of
                                     the output? */
          /* Set if there is still an unresolved user-defined property */
@@ -20371,7 +20404,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* Ready to start outputting.  First, the initial left bracket */
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
  
-        if (OP(o) != ANYOFH) {
+        if (OP(o) != ANYOFH && OP(o) != ANYOFHb) {
              /* Then all the things that could fit in the bitmap */
              do_sep = put_charclass_bitmap_innards(sv,
                                                    ANYOF_BITMAP(o),
@@ -20469,7 +20502,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
  
-        if (OP(o) == ANYOFH && FLAGS(o) != 0) {
+        if (OP(o) == ANYOFHb) {
              Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=\\x%02x)", FLAGS(o));
          }
  
@@ -22455,6 +22488,38 @@ S_delete_recursion_entry(pTHX_ void *key)
      RESTORE_CONTEXT;
  }
  
+STATIC SV *
+S_get_fq_name(pTHX_
+              const char * const name,    /* The first non-blank in the \p{}, \P{} */
+              const Size_t name_len,      /* Its length in bytes, not including any trailing space */
+              const bool is_utf8,         /* ? Is 'name' encoded in UTF-8 */
+              const bool has_colon_colon
+             )
+{
+    /* Returns a mortal SV containing the fully qualified version of the input
+     * name */
+
+    SV * fq_name;
+
+    fq_name = newSVpvs_flags("", SVs_TEMP);
+
+    /* Use the current package if it wasn't included in our input */
+    if (! has_colon_colon) {
+        const HV * pkg = (IN_PERL_COMPILETIME)
+                         ? PL_curstash
+                         : CopSTASH(PL_curcop);
+        const char* pkgname = HvNAME(pkg);
+
+        Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+        sv_catpvs(fq_name, "::");
+    }
+
+    Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+                         UTF8fARG(is_utf8, name_len, name));
+    return fq_name;
+}
+
  SV *
  Perl_parse_uniprop_string(pTHX_
  
@@ -22509,8 +22574,7 @@ Perl_parse_uniprop_string(pTHX_
      int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
      int table_index = 0;    /* The entry number for this property in the table
                                 of all Unicode property names */
-    bool starts_with_In_or_Is = FALSE;  /* ? Does the name start with 'In' or
-                                             'Is' */
+    bool starts_with_Is = FALSE;  /* ? Does the name start with 'Is' */
      Size_t lookup_offset = 0;   /* Used to ignore the first few characters of
                                     the normalized name in certain situations */
      Size_t non_pkg_begin = 0;   /* Offset of first byte in 'name' that isn't
@@ -22523,6 +22587,8 @@ Perl_parse_uniprop_string(pTHX_
                                       it is the definition.  Otherwise it is a
                                       string containing the fully qualified sub
                                       name of 'name' */
+    SV * fq_name = NULL;        /* For user-defined properties, the fully
+                                   qualified name */
      bool invert_return = FALSE; /* ? Do we need to complement the result before
                                       returning it */
  
@@ -22669,7 +22735,8 @@ Perl_parse_uniprop_string(pTHX_
                  pos_in_brackets = strchr("([<)]>)]>", open);
                  close = (pos_in_brackets) ? pos_in_brackets[3] : open;
  
-                if (   name[name_len-1] != close
+                if (    i >= name_len
+                    ||  name[name_len-1] != close
                      || (escaped && name[name_len-2] != '\\'))
                  {
                      sv_catpvs(msg, "Unicode property wildcard not terminated");
@@ -22985,7 +23052,11 @@ Perl_parse_uniprop_string(pTHX_
          &&  name[non_pkg_begin+0] == 'I'
          && (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
      {
-        starts_with_In_or_Is = TRUE;
+        /* Names that start with In have different characterstics than those
+         * that start with Is */
+        if (name[non_pkg_begin+1] == 's') {
+            starts_with_Is = TRUE;
+        }
      }
      else {
          could_be_user_defined = FALSE;
@@ -22994,20 +23065,28 @@ Perl_parse_uniprop_string(pTHX_
      if (could_be_user_defined) {
          CV* user_sub;
  
+        /* If the user defined property returns the empty string, it could
+         * easily be because the pattern is being compiled before the data it
+         * actually needs to compile is available.  This could be argued to be
+         * a bug in the perl code, but this is a change of behavior for Perl,
+         * so we handle it.  This means that intentionally returning nothing
+         * will not be resolved until runtime */
+        bool empty_return = FALSE;
+
          /* Here, the name could be for a user defined property, which are
           * implemented as subs. */
          user_sub = get_cvn_flags(name, name_len, 0);
          if (user_sub) {
+            const char insecure[] = "Insecure user-defined property";
  
              /* Here, there is a sub by the correct name.  Normally we call it
               * to get the property definition */
              dSP;
              SV * user_sub_sv = MUTABLE_SV(user_sub);
              SV * error;     /* Any error returned by calling 'user_sub' */
-            SV * fq_name;   /* Fully qualified property name */
+            SV * key;       /* The key into the hash of user defined sub names
+                             */
              SV * placeholder;
-            char to_fold_string[] = "0:";   /* The 0 gets overwritten with the
-                                               actual value */
              SV ** saved_user_prop_ptr;      /* Hash entry for this property */
  
              /* How many times to retry when another thread is in the middle of
@@ -23019,11 +23098,11 @@ Perl_parse_uniprop_string(pTHX_
              /* If we get here, we know this property is user-defined */
              *user_defined_ptr = TRUE;
  
-            /* We refuse to call a tainted subroutine; returning an error
-             * instead */
+            /* We refuse to call a potentially tainted subroutine; returning an
+             * error instead */
              if (TAINT_get) {
                  if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Insecure user-defined property");
+                sv_catpvn(msg, insecure, sizeof(insecure) - 1);
                  goto append_name_to_msg;
              }
  
@@ -23037,14 +23116,13 @@ Perl_parse_uniprop_string(pTHX_
               * should the need arise, passing the /i status as a parameter.
               *
               * We start by constructing the hash key name, consisting of the
-             * fully qualified subroutine name */
-            fq_name = sv_2mortal(newSV(10));    /* 10 is just a guess */
-            (void) cv_name(user_sub, fq_name, 0);
-
-            /* But precede the sub name in the key with the /i status, so that
-             * there is a key for /i and a different key for non-/i */
-            to_fold_string[0] = to_fold + '0';
-            sv_insert(fq_name, 0, 0, to_fold_string, 2);
+             * fully qualified subroutine name, preceded by the /i status, so
+             * that there is a key for /i and a different key for non-/i */
+            key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+            fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                          non_pkg_begin != 0);
+            sv_catsv(key, fq_name);
+            sv_2mortal(key);
  
              /* We only call the sub once throughout the life of the program
               * (with the /i, non-/i exception noted above).  That means the
@@ -23094,7 +23172,7 @@ Perl_parse_uniprop_string(pTHX_
              /* If we have an entry for this key, the subroutine has already
               * been called once with this /i status. */
              saved_user_prop_ptr = hv_fetch(PL_user_def_props,
-                                           SvPVX(fq_name), SvCUR(fq_name), 0);
+                                                   SvPVX(key), SvCUR(key), 0);
              if (saved_user_prop_ptr) {
  
                  /* If the saved result is an inversion list, it is the valid
@@ -23162,13 +23240,14 @@ Perl_parse_uniprop_string(pTHX_
               * for this property in the hash.  So we have the go ahead to
               * expand the definition ourselves. */
  
+            PUSHSTACKi(PERLSI_MAGIC);
              ENTER;
  
              /* Create a temporary placeholder in the hash to detect recursion
               * */
              SWITCH_TO_GLOBAL_CONTEXT;
              placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
-            (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+            (void) hv_store_ent(PL_user_def_props, key, placeholder, 0);
              RESTORE_CONTEXT;
  
              /* Now that we have a placeholder, we can let other threads
@@ -23176,7 +23255,7 @@ Perl_parse_uniprop_string(pTHX_
              USER_PROP_MUTEX_UNLOCK;
  
              /* Make sure the placeholder always gets destroyed */
-            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+            SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(key));
  
              PUSHMARK(SP);
              SAVETMPS;
@@ -23187,16 +23266,33 @@ Perl_parse_uniprop_string(pTHX_
              XPUSHs(boolSV(to_fold));
              PUTBACK;
  
+            /* The following block was taken from swash_init().  Presumably
+             * they apply to here as well, though we no longer use a swash --
+             * khw */
+            SAVEHINTS();
+            save_re_context();
+            /* We might get here via a subroutine signature which uses a utf8
+             * parameter name, at which point PL_subname will have been set
+             * but not yet used. */
+            save_item(PL_subname);
+
              (void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
  
              SPAGAIN;
  
              error = ERRSV;
-            if (SvTRUE(error)) {
+            if (TAINT_get || SvTRUE(error)) {
                  if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
-                sv_catpvs(msg, "Error \"");
-                sv_catsv(msg, error);
-                sv_catpvs(msg, "\"");
+                if (SvTRUE(error)) {
+                    sv_catpvs(msg, "Error \"");
+                    sv_catsv(msg, error);
+                    sv_catpvs(msg, "\"");
+                }
+                if (TAINT_get) {
+                    if (SvTRUE(error)) sv_catpvs(msg, "; ");
+                    sv_catpvn(msg, insecure, sizeof(insecure) - 1);
+                }
+
                  if (name_len > 0) {
                      sv_catpvs(msg, " in expansion of ");
                      Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8,
@@ -23208,16 +23304,28 @@ Perl_parse_uniprop_string(pTHX_
                  prop_definition = NULL;
              }
              else {  /* G_SCALAR guarantees a single return value */
+                SV * contents = POPs;
  
                  /* The contents is supposed to be the expansion of the property
-                 * definition.  Call a function to check for valid syntax and
-                 * handle it */
-                prop_definition = handle_user_defined_property(name, name_len,
+                 * definition.  If the definition is deferrable, and we got an
+                 * empty string back, set a flag to later defer it (after clean
+                 * up below). */
+                if (      deferrable
+                    && (! SvPOK(contents) || SvCUR(contents) == 0))
+                {
+                        empty_return = TRUE;
+                }
+                else { /* Otherwise, call a function to check for valid syntax,
+                          and handle it */
+
+                    prop_definition = handle_user_defined_property(
+                                                    name, name_len,
                                                      is_utf8, to_fold, runtime,
                                                      deferrable,
-                                                    POPs, user_defined_ptr,
+                                                    contents, user_defined_ptr,
                                                      msg,
                                                      level);
+                }
              }
  
              /* Here, we have the results of the expansion.  Delete the
@@ -23227,15 +23335,16 @@ Perl_parse_uniprop_string(pTHX_
               * and add the permanent entry */
              USER_PROP_MUTEX_LOCK;
  
-            S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
-
-            if (! prop_definition || is_invlist(prop_definition)) {
+            S_delete_recursion_entry(aTHX_ SvPVX(key));
  
+            if (    ! empty_return
+                && (! prop_definition || is_invlist(prop_definition)))
+            {
                  /* If we got success we use the inversion list defining the
                   * property; otherwise use the error message */
                  SWITCH_TO_GLOBAL_CONTEXT;
                  (void) hv_store_ent(PL_user_def_props,
-                                    fq_name,
+                                    key,
                                      ((prop_definition)
                                       ? newSVsv(prop_definition)
                                       : newSVsv(msg)),
@@ -23249,6 +23358,11 @@ Perl_parse_uniprop_string(pTHX_
  
              FREETMPS;
              LEAVE;
+            POPSTACK;
+
+            if (empty_return) {
+                goto definition_deferred;
+            }
  
              if (prop_definition) {
  
@@ -23281,8 +23395,11 @@ Perl_parse_uniprop_string(pTHX_
      /* If it didn't find the property ... */
      if (table_index == 0) {
  
-        /* Try again stripping off any initial 'In' or 'Is' */
-        if (starts_with_In_or_Is) {
+        /* Try again stripping off any initial 'Is'.  This is because we
+         * promise that an initial Is is optional.  The same isn't true of
+         * names that start with 'In'.  Those can match only blocks, and the
+         * lookup table already has those accounted for. */
+        if (starts_with_Is) {
              lookup_name += 2;
              lookup_len -= 2;
              equals_pos -= 2;
@@ -23325,10 +23442,12 @@ Perl_parse_uniprop_string(pTHX_
                   * NV. */
  
                  NV value;
+                SSize_t value_len = lookup_len - equals_pos;
  
                  /* Get the value */
-                if (my_atof3(lookup_name + equals_pos, &value,
-                             lookup_len - equals_pos)
+                if (   value_len <= 0
+                    || my_atof3(lookup_name + equals_pos, &value,
+                                value_len)
                            != lookup_name + lookup_len)
                  {
                      goto failed;
@@ -23594,28 +23713,17 @@ Perl_parse_uniprop_string(pTHX_
    definition_deferred:
  
      /* Here it could yet to be defined, so defer evaluation of this
-     * until its needed at runtime. */
-    prop_definition = newSVpvs_flags("", SVs_TEMP);
-
-    /* To avoid any ambiguity, the package is always specified.
-     * Use the current one if it wasn't included in our input */
-    if (non_pkg_begin == 0) {
-        const HV * pkg = (IN_PERL_COMPILETIME)
-                         ? PL_curstash
-                         : CopSTASH(PL_curcop);
-        const char* pkgname = HvNAME(pkg);
-
-        Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                      UTF8fARG(is_utf8, strlen(pkgname), pkgname));
-        sv_catpvs(prop_definition, "::");
+     * until its needed at runtime.  We need the fully qualified property name
+     * to avoid ambiguity, and a trailing newline */
+    if (! fq_name) {
+        fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+                                      non_pkg_begin != 0 /* If has "::" */
+                               );
      }
-
-    Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
-                         UTF8fARG(is_utf8, name_len, name));
-    sv_catpvs(prop_definition, "\n");
+    sv_catpvs(fq_name, "\n");
  
      *user_defined_ptr = TRUE;
-    return prop_definition;
+    return fq_name;
  }
  
  #endif