regcomp.c: Silence uninit compiler warning

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 0c360a4..7a028fd 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1308,7 +1308,8 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
          else {
              anded_flags = ANYOF_FLAGS(and_with)
              &( ANYOF_COMMON_FLAGS
-              |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER);
+              |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+              |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
          }
      }
  
@@ -1463,7 +1464,8 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
          if (OP(or_with) != ANYOFD) {
              ored_flags
              |= ANYOF_FLAGS(or_with)
-             & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+             & ( ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+                |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
          }
      }
  
@@ -1665,7 +1667,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
       * by the time we reach here */
      assert(! (ANYOF_FLAGS(ssc)
          & ~( ANYOF_COMMON_FLAGS
-            |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)));
+            |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+            |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)));
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
  
@@ -6679,7 +6682,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_pm_flags = pm_flags;
  
      if (runtime_code) {
-       if (TAINTING_get && TAINT_get)
+        assert(TAINTING_get || !TAINT_get);
+       if (TAINT_get)
             Perl_croak(aTHX_ "Eval-group in insecure regular expression");
  
         if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
@@ -9379,7 +9383,7 @@ Perl__load_PL_utf8_foldclosures (pTHX)
  
  #ifdef PERL_ARGS_ASSERT__INVLISTEQ
  bool
-S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
+Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  {
      /* Return a boolean as to if the two passed in inversion lists are
       * identical.  The final argument, if TRUE, says to take the complement of
@@ -9797,9 +9801,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
          ++RExC_parse;
      }
  
-    if (PASS2) {
-        STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
-    }
+    vFAIL("Sequence (?... not terminated");
  }
  
  /*
@@ -9980,7 +9982,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
  
             RExC_parse++;
             paren = *RExC_parse++;
-           ret = NULL;                 /* For look-ahead/behind. */
+           ret = NULL;                 /* For lookahead/behind. */
             switch (paren) {
  
             case 'P':   /* (?P...) variants for those used to PCRE/Python */
@@ -10940,7 +10942,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * enough space for all the things we are about to throw
                       * away, but we can shrink it by the ammount we are about
                       * to re-use here */
-                    RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
+                    RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
                  else {
                      ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
@@ -10956,8 +10958,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                 "Useless use of greediness modifier '%c'",
                                 *RExC_parse);
                  }
-                /* Absorb the modifier, so later code doesn't see nor use it */
-                nextchar(pRExC_state);
              }
  
           do_curly:
@@ -11427,10 +11427,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * it returns U+FFFD (Replacement character) and sets *encp to NULL.
   */
  STATIC UV
-S_reg_recode(pTHX_ const char value, SV **encp)
+S_reg_recode(pTHX_ const U8 value, SV **encp)
  {
      STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
+    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
      const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
      const STRLEN newlen = SvCUR(sv);
      UV uv = UNICODE_REPLACEMENT;
@@ -12621,7 +12621,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       recode_encoding:
                         if (! RExC_override_recoding) {
                             SV* enc = _get_encoding();
-                           ender = reg_recode((const char)(U8)ender, &enc);
+                           ender = reg_recode((U8)ender, &enc);
                             if (!enc && PASS2)
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
                             REQUIRE_UTF8(flagp);
@@ -12749,14 +12749,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      goto not_fold_common;
                  }
                  else /* A regular FOLD code point */
-                    if (! ( UTF
+                    if (! (   UTF
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
-                        /* See comments for join_exact() as to why we fold this
-                         * non-UTF at compile time */
-                        || (node_type == EXACTFU
-                            && ender == LATIN_SMALL_LETTER_SHARP_S)
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                            || (   node_type == EXACTFU
+                                && ender == LATIN_SMALL_LETTER_SHARP_S)
  #endif
                  )) {
                      /* Here, are folding and are not UTF-8 encoded; therefore
@@ -13099,9 +13099,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
              if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
                  ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
              }
-            else if (end >= NUM_ANYOF_CODE_POINTS) {
-                ANYOF_FLAGS(node) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
-            }
  
             /* Quit if are above what we should change */
             if (start >= NUM_ANYOF_CODE_POINTS) {
@@ -13444,6 +13441,10 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                       * default: case next time and keep on incrementing until
                       * we find one of the invariants we do handle. */
                      RExC_parse++;
+                    if (*RExC_parse == 'c') {
+                            /* Skip the \cX notation for control characters */
+                            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                    }
                      break;
                  case '[':
                  {
@@ -13807,8 +13808,12 @@ redo_curchar:
                  /* Having gotten rid of the fence, we pop the operand at the
                   * stack top and process it as a newly encountered operand */
                  current = av_pop(stack);
-                assert(IS_OPERAND(current));
-                goto handle_operand;
+                if (IS_OPERAND(current)) {
+                    goto handle_operand;
+                }
+
+                RExC_parse++;
+                goto bad_syntax;
  
              case '&':
              case '|':
@@ -13884,11 +13889,23 @@ redo_curchar:
                  /* Here, the new operator has equal or lower precedence than
                   * what's already there.  This means the operation already
                   * there should be performed now, before the new one. */
+
                  rhs = av_pop(stack);
+                if (! IS_OPERAND(rhs)) {
+
+                    /* This can happen when a ! is not followed by an operand,
+                     * like in /(?[\t &!])/ */
+                    goto bad_syntax;
+                }
+
                  lhs = av_pop(stack);
  
-                assert(IS_OPERAND(rhs));
-                assert(IS_OPERAND(lhs));
+                if (! IS_OPERAND(lhs)) {
+
+                    /* This can happen when there is an empty (), like in
+                     * /(?[[0]+()+])/ */
+                    goto bad_syntax;
+                }
  
                  switch (stacked_operator) {
                      case '&':
@@ -13934,9 +13951,20 @@ redo_curchar:
                  av_push(stack, rhs);
                  goto redo_curchar;
  
-            case '!':   /* Highest priority, right associative, so just push
-                           onto stack */
-                av_push(stack, newSVuv(curchar));
+            case '!':   /* Highest priority, right associative */
+
+                /* If what's already at the top of the stack is another '!",
+                 * they just cancel each other out */
+                if (   (top_ptr = av_fetch(stack, top_index, FALSE))
+                    && (IS_OPERATOR(*top_ptr) && SvUV(*top_ptr) == '!'))
+                {
+                    only_to_avoid_leaks = av_pop(stack);
+                    SvREFCNT_dec(only_to_avoid_leaks);
+                }
+                else { /* Otherwise, since it's right associative, just push
+                          onto the stack */
+                    av_push(stack, newSVuv(curchar));
+                }
                  break;
  
              default:
@@ -14011,6 +14039,7 @@ redo_curchar:
          || SvTYPE(final) != SVt_INVLIST
          || av_tindex(stack) >= 0)  /* More left on stack */
      {
+      bad_syntax:
          SvREFCNT_dec(final);
          vFAIL("Incomplete expression within '(?[ ])'");
      }
@@ -14340,8 +14369,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      bool has_user_defined_property = FALSE;
  
      /* inversion list of code points this node matches only when the target
-     * string is in UTF-8.  (Because is under /d) */
-    SV* depends_list = NULL;
+     * string is in UTF-8.  These are all non-ASCII, < 256.  (Because is under
+     * /d) */
+    SV* has_upper_latin1_only_utf8_matches = NULL;
  
      /* Inversion list of code points this node matches regardless of things
       * like locale, folding, utf8ness of the target string */
@@ -14394,9 +14424,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      ret = reganode(pRExC_state,
                     (LOC)
                      ? ANYOFL
-                    : (DEPENDS_SEMANTICS)
-                      ? ANYOFD
-                      : ANYOF,
+                    : ANYOF,
                     0);
  
      if (SIZE_ONLY) {
@@ -14650,6 +14678,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 if (!SIZE_ONLY) {
                      SV* invlist;
                      char* name;
+                    char* base_name;    /* name after any packages are stripped */
+                    const char * const colon_colon = "::";
  
                      /* Try to get the definition of the property into
                       * <invlist>.  If /i is in effect, the effective property
@@ -14679,6 +14709,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
                                        : CopSTASH(PL_curcop);
+                        UV final_n = n;
+                        bool has_pkg;
+
                          if (swash) {    /* Got a swash but no inversion list.
                                             Something is likely wrong that will
                                             be sorted-out later */
@@ -14690,25 +14723,43 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                           * typo) in specifying a Unicode property, or it could
                           * be a user-defined property that will be available at
                           * run-time.  The names of these must begin with 'In'
-                         * or 'Is'.  So
+                         * or 'Is' (after any packages are stripped off).  So
                           * if not one of those, or if we accept only
                           * compile-time properties, is an error; otherwise add
                           * it to the list for run-time look up. */
-                        if (   n < 3
-                            || name[0] != 'I'
-                            || (name[1] != 's' && name[1] != 'n')
+                        if ((base_name = rninstr(name, name + n,
+                                                 colon_colon, colon_colon + 2)))
+                        { /* Has ::.  We know this must be a user-defined
+                             property */
+                            base_name += 2;
+                            final_n -= base_name - name;
+                            has_pkg = TRUE;
+                        }
+                        else {
+                            base_name = name;
+                            has_pkg = FALSE;
+                        }
+
+                        if (   final_n < 3
+                            || base_name[0] != 'I'
+                            || (base_name[1] != 's' && base_name[1] != 'n')
                              || ret_invlist)
                          {
+                            const char * const msg
+                                = (has_pkg)
+                                  ? "Illegal user-defined property name"
+                                  : "Can't find Unicode property definition";
                              RExC_parse = e + 1;
-                            vFAIL2utf8f(
-                                "Can't find Unicode property definition \"%"UTF8f"\"",
-                                UTF8fARG(UTF, n, name));
+
+                            /* diag_listed_as: Can't find Unicode property definition "%s" */
+                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                                msg, UTF8fARG(UTF, n, name));
                          }
  
                          /* If the property name doesn't already have a package
                           * name, add the current one to it so that it can be
                           * referred to outside it. [perl #121777] */
-                        if (curpkg && ! instr(name, "::")) {
+                        if (! has_pkg && curpkg) {
                              char* pkgname = HvNAME(curpkg);
                              if (strNE(pkgname, "main")) {
                                  char* full_name = Perl_form(aTHX_
@@ -14727,15 +14778,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          optimizable = FALSE;    /* Will have to leave this an
                                                     ANYOF node */
  
-                        /* We don't know yet, so have to assume that the
-                         * property could match something in the upper Latin1
-                         * range, hence something that isn't utf8.  Note that
-                         * this would cause things in <depends_list> to match
-                         * inappropriately, except that any \p{}, including
-                         * this one forces Unicode semantics, which means there
-                         * is no <depends_list> */
-                        ANYOF_FLAGS(ret)
-                                      |= ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES;
+                        /* We don't know yet what this matches, so have to flag
+                         * it */
+                        ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
                      }
                      else {
  
@@ -14871,7 +14916,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               recode_encoding:
                 if (! RExC_override_recoding) {
                     SV* enc = _get_encoding();
-                   value = reg_recode((const char)(U8)value, &enc);
+                   value = reg_recode((U8)value, &enc);
                     if (!enc) {
                          if (strict) {
                              vFAIL("Invalid escape in the specified encoding");
@@ -15733,9 +15778,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                              PL_fold_latin1[j]);
                              }
                              else {
-                                depends_list =
-                                 add_cp_to_invlist(depends_list,
-                                                   PL_fold_latin1[j]);
+                                has_upper_latin1_only_utf8_matches
+                                    = add_cp_to_invlist(
+                                            has_upper_latin1_only_utf8_matches,
+                                            PL_fold_latin1[j]);
                              }
                          }
  
@@ -15799,8 +15845,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              else {
                                  /* Similarly folds involving non-ascii Latin1
                                  * characters under /d are added to their list */
-                                depends_list = add_cp_to_invlist(depends_list,
-                                                                 c);
+                                has_upper_latin1_only_utf8_matches
+                                        = add_cp_to_invlist(
+                                           has_upper_latin1_only_utf8_matches,
+                                           c);
                              }
                          }
                      }
@@ -15876,13 +15924,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  cp_list = posixes;
              }
  
-            if (depends_list) {
-                _invlist_union(depends_list, nonascii_but_latin1_properties,
-                               &depends_list);
+            if (has_upper_latin1_only_utf8_matches) {
+                _invlist_union(has_upper_latin1_only_utf8_matches,
+                               nonascii_but_latin1_properties,
+                               &has_upper_latin1_only_utf8_matches);
                  SvREFCNT_dec_NN(nonascii_but_latin1_properties);
              }
              else {
-                depends_list = nonascii_but_latin1_properties;
+                has_upper_latin1_only_utf8_matches
+                                            = nonascii_but_latin1_properties;
              }
          }
      }
@@ -15896,8 +15946,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * class that isn't a Unicode property, and which matches above Unicode, \W
       * or [\x{110000}] for example.
       * (Note that in this case, unlike the Posix one above, there is no
-     * <depends_list>, because having a Unicode property forces Unicode
-     * semantics */
+     * <has_upper_latin1_only_utf8_matches>, because having a Unicode property
+     * forces Unicode semantics */
      if (properties) {
          if (cp_list) {
  
@@ -15946,7 +15996,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * locales, or the class matches at least one 0-255 range code point */
      if (LOC && FOLD) {
          if (only_utf8_locale_list) {
-            ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
+            ANYOF_FLAGS(ret) |=  ANYOF_LOC_FOLD
+                                |ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES;
          }
          else if (cp_list) { /* Look to see if a 0-255 code point is in list */
              UV start, end;
@@ -15958,14 +16009,83 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
      }
  
+#define MATCHES_ALL_NON_UTF8_NON_ASCII(ret)                                 \
+    (   DEPENDS_SEMANTICS                                                   \
+     && ANYOF_FLAGS(ret)                                                    \
+        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+
+    /* See if we can simplify things under /d */
+    if (   has_upper_latin1_only_utf8_matches
+        || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+    {
+        if (has_upper_latin1_only_utf8_matches) {
+            if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+
+                /* Here, we have two, almost opposite, constraints in effect
+                 * for upper latin1 characters.  The macro means they all match
+                 * when the target string ISN'T in UTF-8.
+                 * 'has_upper_latin1_only_utf8_matches' contains the chars that
+                 * match only if the target string IS UTF-8.  Therefore the
+                 * ones in 'has_upper_latin1_only_utf8_matches' match
+                 * regardless of UTF-8, so can be added to the regular list,
+                 * and 'has_upper_latin1_only_utf8_matches' cleared */
+                _invlist_union(cp_list,
+                               has_upper_latin1_only_utf8_matches,
+                               &cp_list);
+                SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                has_upper_latin1_only_utf8_matches = NULL;
+            }
+            else if (cp_list) {
+
+                /* Here, 'cp_list' gives chars that always match, and
+                 * 'has_upper_latin1_only_utf8_matches' gives chars that were
+                 * specified to match only if the target string is in UTF-8.
+                 * It may be that these overlap, so we can subtract the
+                 * unconditionally matching from the conditional ones, to make
+                 * the conditional list as small as possible, perhaps even
+                 * clearing it, in which case more optimizations are possible
+                 * later */
+                _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                                  cp_list,
+                                  &has_upper_latin1_only_utf8_matches);
+                if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
+            }
+        }
+
+        /* Similarly, if the unconditional matches include every upper latin1
+         * character, we can clear that flag to permit later optimizations */
+        if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+            SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+            _invlist_subtract(only_non_utf8_list, cp_list, &only_non_utf8_list);
+            if (_invlist_len(only_non_utf8_list) == 0) {
+                ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+            }
+            SvREFCNT_dec_NN(only_non_utf8_list);
+            only_non_utf8_list = NULL;;
+        }
+
+        /* If we haven't gotten rid of all conditional matching, we change the
+         * regnode type to indicate that */
+        if (   has_upper_latin1_only_utf8_matches
+            || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+        {
+            OP(ret) = ANYOFD;
+            optimizable = FALSE;
+        }
+    }
+#undef MATCHES_ALL_NON_UTF8_NON_ASCII
+
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
       * invert if there are things such as \w, which aren't known until runtime
       * */
      if (cp_list
          && invert
+        && OP(ret) != ANYOFD
          && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
-       && ! depends_list
         && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
      {
          _invlist_invert(cp_list);
@@ -16007,16 +16127,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * adjacent such nodes.  And if the class is equivalent to things like /./,
       * expensive run-time swashes can be avoided.  Now that we have more
       * complete information, we can find things necessarily missed by the
-     * earlier code.  I (khw) did some benchmarks and found essentially no
-     * speed difference between using a POSIXA node versus an ANYOF node, so
-     * there is no reason to optimize, for example [A-Za-z0-9_] into
-     * [[:word:]]/a (although if we did it in the sizing pass it would save
-     * space).  _invlistEQ() could be used if one ever wanted to do something
-     * like this at this point in the code */
-
-    if (optimizable && cp_list && ! invert && ! depends_list) {
+     * earlier code. */
+
+    if (optimizable && cp_list && ! invert) {
          UV start, end;
          U8 op = END;  /* The optimzation node-type */
+        int posix_class = -1;   /* Illegal value */
          const char * cur_parse= RExC_parse;
  
          invlist_iterinit(cp_list);
@@ -16099,6 +16215,37 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
          invlist_iterfinish(cp_list);
  
+        if (op == END) {
+
+            /* Here, didn't find an optimization.  See if this matches any of
+             * the POSIX classes.  These run slightly faster for above-Unicode
+             * code points, so don't bother with POSIXA ones nor the 2 that
+             * have no above-Unicode matches */
+            for (posix_class = 0;
+                 posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                 posix_class++)
+            {
+                int try_inverted;
+                if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+                    continue;
+                }
+                for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+
+                    /* Check if matches normal or inverted */
+                    if (_invlistEQ(cp_list,
+                                   PL_XPosix_ptrs[posix_class],
+                                   try_inverted))
+                    {
+                        op = (try_inverted)
+                             ? NPOSIXU
+                             : POSIXU;
+                        *flagp |= HASWIDTH|SIMPLE;
+                        goto found_posix;
+                    }
+                }
+            }
+          found_posix: ;
+        }
          if (op != END) {
              RExC_parse = (char *)orig_parse;
              RExC_emit = (regnode *)orig_emit;
@@ -16116,6 +16263,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             TRUE /* downgradable to EXACT */
                                            );
              }
+            else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
+                FLAGS(ret) = posix_class;
+            }
  
              SvREFCNT_dec_NN(cp_list);
              return ret;
@@ -16136,16 +16286,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* Here, the bitmap has been populated with all the Latin1 code points that
       * always match.  Can now add to the overall list those that match only
-     * when the target string is UTF-8 (<depends_list>). */
-    if (depends_list) {
+     * when the target string is UTF-8 (<has_upper_latin1_only_utf8_matches>).
+     * */
+    if (has_upper_latin1_only_utf8_matches) {
         if (cp_list) {
-           _invlist_union(cp_list, depends_list, &cp_list);
-           SvREFCNT_dec_NN(depends_list);
+           _invlist_union(cp_list,
+                           has_upper_latin1_only_utf8_matches,
+                           &cp_list);
+           SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
         }
         else {
-           cp_list = depends_list;
+           cp_list = has_upper_latin1_only_utf8_matches;
         }
-        ANYOF_FLAGS(ret) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
+        ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
@@ -16213,18 +16366,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
  
      if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
          assert(! (ANYOF_FLAGS(node)
-                  & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                     |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES)));
+                & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP));
         ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
      }
      else {
         AV * const av = newAV();
         SV *rv;
  
-        assert(ANYOF_FLAGS(node)
-               & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                  |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
-
         av_store(av, 0, (runtime_defns)
                         ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
         if (swash) {
@@ -16288,10 +16436,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
      PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
  
-    assert(ANYOF_FLAGS(node)
-        & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-           |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
-
      if (data && data->count) {
         const U32 n = ARG(node);
  
@@ -16303,9 +16447,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
-           /* Elements 3 and 4 are either both present or both absent. [3] is
-            * any inversion list generated at compile time; [4] indicates if
-            * that inversion list has any user-defined properties in it. */
              if (av_tindex(av) >= 2) {
                  if (only_utf8_locale_ptr
                      && ary[2]
@@ -16318,6 +16459,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                      *only_utf8_locale_ptr = NULL;
                  }
  
+                /* Elements 3 and 4 are either both present or both absent. [3]
+                 * is any inversion list generated at compile time; [4]
+                 * indicates if that inversion list has any user-defined
+                 * properties in it. */
                  if (av_tindex(av) >= 3) {
                      invlist = ary[3];
                      if (SvUV(ary[4])) {
@@ -17202,7 +17347,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
-        SV* bitmap_invlist;  /* Will hold what the bit map contains */
+        SV* bitmap_invlist = NULL;  /* Will hold what the bit map contains */
  
  
         if (OP(o) == ANYOFL) {
@@ -17236,10 +17381,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              }
          }
  
-       if ((flags & (ANYOF_MATCHES_ALL_ABOVE_BITMAP
-                      |ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                      |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES
-                      |ANYOF_LOC_FOLD)))
+        if (    ARG(o) != ANYOF_ONLY_HAS_BITMAP
+           || (flags
+                & ( ANYOF_MATCHES_ALL_ABOVE_BITMAP
+                   |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP
+                   |ANYOF_LOC_FOLD)))
          {
              if (do_sep) {
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
@@ -17278,11 +17424,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                      if (*s == '\n') {
                          const char * const t = ++s;
  
-                        if (flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) {
-                            sv_catpvs(sv, "{outside bitmap}");
-                        }
-                        else {
-                            sv_catpvs(sv, "{utf8}");
+                        if (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) {
+                            if (OP(o) == ANYOFD) {
+                                sv_catpvs(sv, "{utf8}");
+                            }
+                            else {
+                                sv_catpvs(sv, "{outside bitmap}");
+                            }
                          }
  
                          if (byte_output) {
@@ -18197,23 +18345,21 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
      int i;
      UV start, end;
      unsigned int punct_count = 0;
-    SV* invlist = NULL;
-    SV** invlist_ptr;   /* Temporary, in case bitmap_invlist is NULL */
+    SV* invlist;
      bool allow_literals = TRUE;
+    bool inverted_for_output = FALSE;
  
      PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
  
-    invlist_ptr = (bitmap_invlist) ? bitmap_invlist : &invlist;
-
      /* Worst case is exactly every-other code point is in the list */
-    *invlist_ptr = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
+    invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
  
      /* Convert the bit map to an inversion list, keeping track of how many
       * ASCII puncts are set, including an extra amount for the backslashed
       * ones.  */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (BITMAP_TEST(bitmap, i)) {
-            *invlist_ptr = add_cp_to_invlist(*invlist_ptr, i);
+            invlist = add_cp_to_invlist(invlist, i);
              if (isPUNCT_A(i)) {
                  punct_count++;
                  if isBACKSLASHED_PUNCT(i) {
@@ -18224,8 +18370,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
      }
  
      /* Nothing to output */
-    if (_invlist_len(*invlist_ptr) == 0) {
-        SvREFCNT_dec(invlist);
+    if (_invlist_len(invlist) == 0) {
+        SvREFCNT_dec_NN(invlist);
          return FALSE;
      }
  
@@ -18233,8 +18379,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
       * literals, but if a range (nearly) spans all of them, it's best to output
       * it as a single range.  This code will use a single range if all but 2
       * printables are in it */
-    invlist_iterinit(*invlist_ptr);
-    while (invlist_iternext(*invlist_ptr, &start, &end)) {
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
  
          /* If range starts beyond final printable, it doesn't have any in it */
          if (start > MAX_PRINT_A) {
@@ -18257,7 +18403,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
              break;
          }
      }
-    invlist_iterfinish(*invlist_ptr);
+    invlist_iterfinish(invlist);
  
      /* The legibility of the output depends mostly on how many punctuation
       * characters are output.  There are 32 possible ASCII ones, and some have
@@ -18272,19 +18418,35 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
  
          /* Add everything remaining to the list, so when we invert it just
           * below, it will be excluded */
-        _invlist_union_complement_2nd(*invlist_ptr, PL_InBitmap, invlist_ptr);
-        _invlist_invert(*invlist_ptr);
+        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
+        _invlist_invert(invlist);
+        inverted_for_output = TRUE;
      }
  
      /* Here we have figured things out.  Output each range */
-    invlist_iterinit(*invlist_ptr);
-    while (invlist_iternext(*invlist_ptr, &start, &end)) {
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
          if (start >= NUM_ANYOF_CODE_POINTS) {
              break;
          }
          put_range(sv, start, end, allow_literals);
      }
-    invlist_iterfinish(*invlist_ptr);
+    invlist_iterfinish(invlist);
+
+    if (bitmap_invlist) {
+
+        /* Here, wants the inversion list returned.  If we inverted it, we have
+         * to restore it to the original */
+        if (inverted_for_output) {
+            _invlist_invert(invlist);
+            _invlist_intersection(invlist, PL_InBitmap, &invlist);
+        }
+
+        *bitmap_invlist = invlist;
+    }
+    else {
+        SvREFCNT_dec_NN(invlist);
+    }
  
      return TRUE;
  }