infnan: more tests.

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 236cb24..be9c184 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -93,6 +93,8 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  
  #define HAS_NONLATIN1_FOLD_CLOSURE(i) \
   _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
  
  #define HAS_NONLATIN1_FOLD_CLOSURE(i) \
   _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \
+ _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
  #define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  #define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
  #define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  #define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
@@ -223,7 +225,7 @@ struct RExC_state_t {
  
  #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
  #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
  
  #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
  #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
-       ((*s) == '{' && regcurly(s, FALSE)))
+       ((*s) == '{' && regcurly(s)))
  
  /*
   * Flags to be passed up and down.
  
  /*
   * Flags to be passed up and down.
@@ -375,24 +377,6 @@ typedef struct scan_data_t {
      regnode_ssc *start_class;
  } scan_data_t;
  
      regnode_ssc *start_class;
  } scan_data_t;
  
-/* The below is perhaps overboard, but this allows us to save a test at the
- * expense of a mask.  This is because on both EBCDIC and ASCII machines, 'A'
- * and 'a' differ by a single bit; the same with the upper and lower case of
- * all other ASCII-range alphabetics.  On ASCII platforms, they are 32 apart;
- * on EBCDIC, they are 64.  This uses an exclusive 'or' to find that bit and
- * then inverts it to form a mask, with just a single 0, in the bit position
- * where the upper- and lowercase differ.  XXX There are about 40 other
- * instances in the Perl core where this micro-optimization could be used.
- * Should decide if maintenance cost is worse, before changing those
- *
- * Returns a boolean as to whether or not 'v' is either a lowercase or
- * uppercase instance of 'c', where 'c' is in [A-Za-z].  If 'c' is a
- * compile-time constant, the generated code is better than some optimizing
- * compilers figure out, amounting to a mask and test.  The results are
- * meaningless if 'c' is not one of [A-Za-z] */
-#define isARG2_lower_or_UPPER_ARG1(c, v) \
-                              (((v) & ~('A' ^ 'a')) ==  ((c) & ~('A' ^ 'a')))
-
  /*
   * Forward declarations for pregcomp()'s friends.
   */
  /*
   * Forward declarations for pregcomp()'s friends.
   */
@@ -586,80 +570,85 @@ static const scan_data_t zero_scan_data =
              REPORT_LOCATION_ARGS(offset));         \
  } STMT_END
  
              REPORT_LOCATION_ARGS(offset));         \
  } STMT_END
  
+/* These have asserts in them because of [perl #122671] Many warnings in
+ * regcomp.c can occur twice.  If they get output in pass1 and later in that
+ * pass, the pattern has to be converted to UTF-8 and the pass restarted, they
+ * would get output again.  So they should be output in pass2, and these
+ * asserts make sure new warnings follow that paradigm. */
  
  /* m is not necessarily a "literal string", in this macro */
  #define reg_warn_non_literal_string(loc, m) STMT_START {                \
      const IV offset = loc - RExC_precomp;                               \
  
  /* m is not necessarily a "literal string", in this macro */
  #define reg_warn_non_literal_string(loc, m) STMT_START {                \
      const IV offset = loc - RExC_precomp;                               \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,      \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,      \
              m, REPORT_LOCATION_ARGS(offset));       \
  } STMT_END
  
  #define        ckWARNreg(loc,m) STMT_START {                                   \
      const IV offset = loc - RExC_precomp;                              \
              m, REPORT_LOCATION_ARGS(offset));       \
  } STMT_END
  
  #define        ckWARNreg(loc,m) STMT_START {                                   \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        vWARN_dep(loc, m) STMT_START {                                  \
      const IV offset = loc - RExC_precomp;                              \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        vWARN_dep(loc, m) STMT_START {                                  \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION,    \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION,   \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARNdep(loc,m) STMT_START {                                   \
      const IV offset = loc - RExC_precomp;                              \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARNdep(loc,m) STMT_START {                                   \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),                  \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),                 \
             m REPORT_LOCATION,                                          \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARNregdep(loc,m) STMT_START {                                \
      const IV offset = loc - RExC_precomp;                              \
             m REPORT_LOCATION,                                          \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARNregdep(loc,m) STMT_START {                                \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),    \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),   \
             m REPORT_LOCATION,                                          \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARN2reg_d(loc,m, a1) STMT_START {                            \
      const IV offset = loc - RExC_precomp;                              \
             m REPORT_LOCATION,                                          \
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
  #define        ckWARN2reg_d(loc,m, a1) STMT_START {                            \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),                      \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),                     \
             m REPORT_LOCATION,                                          \
             a1, REPORT_LOCATION_ARGS(offset));  \
  } STMT_END
  
  #define        ckWARN2reg(loc, m, a1) STMT_START {                             \
      const IV offset = loc - RExC_precomp;                              \
             m REPORT_LOCATION,                                          \
             a1, REPORT_LOCATION_ARGS(offset));  \
  } STMT_END
  
  #define        ckWARN2reg(loc, m, a1) STMT_START {                             \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
             a1, REPORT_LOCATION_ARGS(offset));  \
  } STMT_END
  
  #define        vWARN3(loc, m, a1, a2) STMT_START {                             \
      const IV offset = loc - RExC_precomp;                              \
             a1, REPORT_LOCATION_ARGS(offset));  \
  } STMT_END
  
  #define        vWARN3(loc, m, a1, a2) STMT_START {                             \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
             a1, a2, REPORT_LOCATION_ARGS(offset));      \
  } STMT_END
  
  #define        ckWARN3reg(loc, m, a1, a2) STMT_START {                         \
      const IV offset = loc - RExC_precomp;                              \
             a1, a2, REPORT_LOCATION_ARGS(offset));      \
  } STMT_END
  
  #define        ckWARN3reg(loc, m, a1, a2) STMT_START {                         \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
             a1, a2, REPORT_LOCATION_ARGS(offset));      \
  } STMT_END
  
  #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
      const IV offset = loc - RExC_precomp;                              \
             a1, a2, REPORT_LOCATION_ARGS(offset));      \
  } STMT_END
  
  #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
             a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
  #define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
      const IV offset = loc - RExC_precomp;                              \
             a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
  #define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
             a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
  #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
      const IV offset = loc - RExC_precomp;                              \
             a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
  #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
             a1, a2, a3, a4, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
             a1, a2, a3, a4, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
@@ -816,6 +805,33 @@ DEBUG_OPTIMISE_MORE_r(if(data){                                      \
      PerlIO_printf(Perl_debug_log,"\n");                              \
  });
  
      PerlIO_printf(Perl_debug_log,"\n");                              \
  });
  
+#ifdef DEBUGGING
+
+/* is c a control character for which we have a mnemonic? */
+#define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
+
+STATIC const char *
+S_cntrl_to_mnemonic(const U8 c)
+{
+    /* Returns the mnemonic string that represents character 'c', if one
+     * exists; NULL otherwise.  The only ones that exist for the purposes of
+     * this routine are a few control characters */
+
+    switch (c) {
+        case '\a':       return "\\a";
+        case '\b':       return "\\b";
+        case ESC_NATIVE: return "\\e";
+        case '\f':       return "\\f";
+        case '\n':       return "\\n";
+        case '\r':       return "\\r";
+        case '\t':       return "\\t";
+    }
+
+    return NULL;
+}
+
+#endif
+
  /* Mark that we cannot extend a found fixed substring at this point.
     Update the longest found anchored substring and the longest found
     floating substrings if needed. */
  /* Mark that we cannot extend a found fixed substring at this point.
     Update the longest found anchored substring and the longest found
     floating substrings if needed. */
@@ -889,11 +905,11 @@ S_ssc_anything(pTHX_ regnode_ssc *ssc)
  
      ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
      _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
  
      ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
      _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
-    ANYOF_FLAGS(ssc) |= ANYOF_EMPTY_STRING;    /* Plus match empty string */
+    ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
  }
  
  STATIC int
  }
  
  STATIC int
-S_ssc_is_anything(pTHX_ const regnode_ssc *ssc)
+S_ssc_is_anything(const regnode_ssc *ssc)
  {
      /* Returns TRUE if the SSC 'ssc' can match the empty string and any code
       * point; FALSE otherwise.  Thus, this is used to see if using 'ssc' buys
  {
      /* Returns TRUE if the SSC 'ssc' can match the empty string and any code
       * point; FALSE otherwise.  Thus, this is used to see if using 'ssc' buys
@@ -907,7 +923,7 @@ S_ssc_is_anything(pTHX_ const regnode_ssc *ssc)
  
      assert(is_ANYOF_SYNTHETIC(ssc));
  
  
      assert(is_ANYOF_SYNTHETIC(ssc));
  
-    if (! (ANYOF_FLAGS(ssc) & ANYOF_EMPTY_STRING)) {
+    if (! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
          return FALSE;
      }
  
          return FALSE;
      }
  
@@ -946,7 +962,7 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
  
      Zero(ssc, 1, regnode_ssc);
      set_ANYOF_SYNTHETIC(ssc);
  
      Zero(ssc, 1, regnode_ssc);
      set_ANYOF_SYNTHETIC(ssc);
-    ARG_SET(ssc, ANYOF_NONBITMAP_EMPTY);
+    ARG_SET(ssc, ANYOF_ONLY_HAS_BITMAP);
      ssc_anything(ssc);
  
      /* If any portion of the regex is to operate under locale rules,
      ssc_anything(ssc);
  
      /* If any portion of the regex is to operate under locale rules,
@@ -965,8 +981,8 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
  }
  
  STATIC int
  }
  
  STATIC int
-S_ssc_is_cp_posixl_init(pTHX_ const RExC_state_t *pRExC_state,
-                              const regnode_ssc *ssc)
+S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
+                        const regnode_ssc *ssc)
  {
      /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
       * to the list of code points matched, and locale posix classes; hence does
  {
      /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
       * to the list of code points matched, and locale posix classes; hence does
@@ -1016,7 +1032,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
  
      /* Look at the data structure created by S_set_ANYOF_arg() */
      PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
  
      /* Look at the data structure created by S_set_ANYOF_arg() */
-    if (n != ANYOF_NONBITMAP_EMPTY) {
+    if (n != ANYOF_ONLY_HAS_BITMAP) {
          SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
          AV * const av = MUTABLE_AV(SvRV(rv));
          SV **const ary = AvARRAY(av);
          SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
          AV * const av = MUTABLE_AV(SvRV(rv));
          SV **const ary = AvARRAY(av);
@@ -1046,15 +1062,16 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
          }
      }
  
          }
      }
  
-    /* An ANYOF node contains a bitmap for the first 256 code points, and an
-     * inversion list for the others, but if there are code points that should
-     * match only conditionally on the target string being UTF-8, those are
-     * placed in the inversion list, and not the bitmap.  Since there are
-     * circumstances under which they could match, they are included in the
-     * SSC.  But if the ANYOF node is to be inverted, we have to exclude them
-     * here, so that when we invert below, the end result actually does include
-     * them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We have to do this here
-     * before we add the unconditionally matched code points */
+    /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
+     * code points, and an inversion list for the others, but if there are code
+     * points that should match only conditionally on the target string being
+     * UTF-8, those are placed in the inversion list, and not the bitmap.
+     * Since there are circumstances under which they could match, they are
+     * included in the SSC.  But if the ANYOF node is to be inverted, we have
+     * to exclude them here, so that when we invert below, the end result
+     * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
+     * have to do this here before we add the unconditionally matched code
+     * points */
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
          _invlist_intersection_complement_2nd(invlist,
                                               PL_UpperLatin1,
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
          _invlist_intersection_complement_2nd(invlist,
                                               PL_UpperLatin1,
@@ -1062,7 +1079,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      }
  
      /* Add in the points from the bit map */
      }
  
      /* Add in the points from the bit map */
-    for (i = 0; i < 256; i++) {
+    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
              invlist = add_cp_to_invlist(invlist, i);
              new_node_has_latin1 = TRUE;
          if (ANYOF_BITMAP_TEST(node, i)) {
              invlist = add_cp_to_invlist(invlist, i);
              new_node_has_latin1 = TRUE;
@@ -1071,13 +1088,13 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
  
      /* If this can match all upper Latin1 code points, have to add them
       * as well */
  
      /* If this can match all upper Latin1 code points, have to add them
       * as well */
-    if (ANYOF_FLAGS(node) & ANYOF_NON_UTF8_NON_ASCII_ALL) {
+    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
          _invlist_union(invlist, PL_UpperLatin1, &invlist);
      }
  
      /* Similarly for these */
          _invlist_union(invlist, PL_UpperLatin1, &invlist);
      }
  
      /* Similarly for these */
-    if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) {
-        invlist = _add_range_to_invlist(invlist, 256, UV_MAX);
+    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
      }
  
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
      }
  
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
@@ -1110,8 +1127,8 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
  #define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
  
  /* 'AND' a given class with another one.  Can create false positives.  'ssc'
  #define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
  
  /* 'AND' a given class with another one.  Can create false positives.  'ssc'
- * should not be inverted.  'and_with->flags & ANYOF_POSIXL' should be 0 if
- * 'and_with' is a regnode_charclass instead of a regnode_ssc. */
+ * should not be inverted.  'and_with->flags & ANYOF_MATCHES_POSIXL' should be
+ * 0 if 'and_with' is a regnode_charclass instead of a regnode_ssc. */
  
  STATIC void
  S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
  
  STATIC void
  S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
@@ -1202,7 +1219,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
  
          /* If either P1 or P2 is empty, the intersection will be also; can skip
           * the loop */
  
          /* If either P1 or P2 is empty, the intersection will be also; can skip
           * the loop */
-        if (! (ANYOF_FLAGS(and_with) & ANYOF_POSIXL)) {
+        if (! (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL)) {
              ANYOF_POSIXL_ZERO(ssc);
          }
          else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
              ANYOF_POSIXL_ZERO(ssc);
          }
          else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
@@ -1261,16 +1278,16 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
              else {
                  ssc->invlist = anded_cp_list;
                  ANYOF_POSIXL_ZERO(ssc);
              else {
                  ssc->invlist = anded_cp_list;
                  ANYOF_POSIXL_ZERO(ssc);
-                if (ANYOF_FLAGS(and_with) & ANYOF_POSIXL) {
+                if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
                      ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
                  }
              }
          }
          else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
                      ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
                  }
              }
          }
          else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
-                 || (ANYOF_FLAGS(and_with) & ANYOF_POSIXL))
+                 || (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL))
          {
              /* One or the other of P1, P2 is non-empty. */
          {
              /* One or the other of P1, P2 is non-empty. */
-            if (ANYOF_FLAGS(and_with) & ANYOF_POSIXL) {
+            if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
                  ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
              }
              ssc_union(ssc, anded_cp_list, FALSE);
                  ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
              }
              ssc_union(ssc, anded_cp_list, FALSE);
@@ -1332,7 +1349,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
      {
          /* We ignore P2, leaving P1 going forward */
      }   /* else  Not inverted */
      {
          /* We ignore P2, leaving P1 going forward */
      }   /* else  Not inverted */
-    else if (ANYOF_FLAGS(or_with) & ANYOF_POSIXL) {
+    else if (ANYOF_FLAGS(or_with) & ANYOF_MATCHES_POSIXL) {
          ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
          if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
              unsigned int i;
          ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
          if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
              unsigned int i;
@@ -1410,10 +1427,9 @@ S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_ssc_clear_locale(pTHX_ regnode_ssc *ssc)
+S_ssc_clear_locale(regnode_ssc *ssc)
  {
      /* Set the SSC 'ssc' to not match any locale things */
  {
      /* Set the SSC 'ssc' to not match any locale things */
-
      PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
  
      assert(is_ANYOF_SYNTHETIC(ssc));
      PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
  
      assert(is_ANYOF_SYNTHETIC(ssc));
@@ -1427,7 +1443,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
  {
      /* The inversion list in the SSC is marked mortal; now we need a more
       * permanent copy, which is stored the same way that is done in a regular
  {
      /* The inversion list in the SSC is marked mortal; now we need a more
       * permanent copy, which is stored the same way that is done in a regular
-     * ANYOF node, with the first 256 code points in a bit map */
+     * ANYOF node, with the first NUM_ANYOF_CODE_POINTS code points in a bit
+     * map */
  
      SV* invlist = invlist_clone(ssc->invlist);
  
  
      SV* invlist = invlist_clone(ssc->invlist);
  
@@ -1436,8 +1453,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
      assert(is_ANYOF_SYNTHETIC(ssc));
  
      /* The code in this file assumes that all but these flags aren't relevant
      assert(is_ANYOF_SYNTHETIC(ssc));
  
      /* The code in this file assumes that all but these flags aren't relevant
-     * to the SSC, except ANYOF_EMPTY_STRING, which should be cleared by the
-     * time we reach here */
+     * to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
+     * by the time we reach here */
      assert(! (ANYOF_FLAGS(ssc) & ~ANYOF_COMMON_FLAGS));
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
      assert(! (ANYOF_FLAGS(ssc) & ~ANYOF_COMMON_FLAGS));
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
@@ -1449,7 +1466,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
      ssc->invlist = NULL;
  
      if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
      ssc->invlist = NULL;
  
      if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        ANYOF_FLAGS(ssc) |= ANYOF_POSIXL;
+        ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
      }
  
      assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
      }
  
      assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
@@ -1718,7 +1735,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
                May be the same as tail.
    tail       : item following the branch sequence
    count      : words in the sequence
                May be the same as tail.
    tail       : item following the branch sequence
    count      : words in the sequence
-  flags      : currently the OP() type we will be building one of /EXACT(|F|Fl)/
+  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS)/
    depth      : indent depth
  
  Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
    depth      : indent depth
  
  Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
@@ -1947,7 +1964,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
                    regnode *first, regnode *last, regnode *tail,
                    U32 word_count, U32 flags, U32 depth)
  {
                    regnode *first, regnode *last, regnode *tail,
                    U32 word_count, U32 flags, U32 depth)
  {
-    dVAR;
      /* first pass, loop through and scan words */
      reg_trie_data *trie;
      HV *widecharmap = NULL;
      /* first pass, loop through and scan words */
      reg_trie_data *trie;
      HV *widecharmap = NULL;
@@ -2982,8 +2998,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
               : MADE_TRIE;
  }
  
               : MADE_TRIE;
  }
  
-STATIC void
-S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode *stclass, U32 depth)
+STATIC regnode *
+S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
  {
  /* The Trie is constructed and compressed now so we can build a fail array if
   * it's needed
  {
  /* The Trie is constructed and compressed now so we can build a fail array if
   * it's needed
@@ -3021,13 +3037,27 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
      U32 *fail;
      reg_ac_data *aho;
      const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
      U32 *fail;
      reg_ac_data *aho;
      const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
+    regnode *stclass;
      GET_RE_DEBUG_FLAGS_DECL;
  
      GET_RE_DEBUG_FLAGS_DECL;
  
-    PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE;
+    PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
+    PERL_UNUSED_CONTEXT;
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
  
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
  
+    if ( OP(source) == TRIE ) {
+        struct regnode_1 *op = (struct regnode_1 *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_1));
+        StructCopy(source,op,struct regnode_1);
+        stclass = (regnode *)op;
+    } else {
+        struct regnode_charclass *op = (struct regnode_charclass *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
+        StructCopy(source,op,struct regnode_charclass);
+        stclass = (regnode *)op;
+    }
+    OP(stclass)+=2; /* covert the TRIE type to its AHO-CORASICK equivalent */
  
      ARG_SET( stclass, data_slot );
      aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
  
      ARG_SET( stclass, data_slot );
      aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
@@ -3094,6 +3124,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
      });
      Safefree(q);
      /*RExC_seen |= REG_TRIEDFA_SEEN;*/
      });
      Safefree(q);
      /*RExC_seen |= REG_TRIEDFA_SEEN;*/
+    return stclass;
  }
  
  
  }
  
  
@@ -3417,9 +3448,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                  else { /* Here is a generic multi-char fold. */
                      U8* multi_end  = s + len;
  
                  else { /* Here is a generic multi-char fold. */
                      U8* multi_end  = s + len;
  
-                    /* Count how many characters in it.  In the case of /aa, no
-                     * folds which contain ASCII code points are allowed, so
-                     * check for those, and skip if found. */
+                    /* Count how many characters are in it.  In the case of
+                     * /aa, no folds which contain ASCII code points are
+                     * allowed, so check for those, and skip if found. */
                      if (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE) {
                          count = utf8_length(s, multi_end);
                          s = multi_end;
                      if (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE) {
                          count = utf8_length(s, multi_end);
                          s = multi_end;
@@ -3501,8 +3532,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                  }
  
                  if (len == 2
                  }
  
                  if (len == 2
-                    && isARG2_lower_or_UPPER_ARG1('s', *s)
-                    && isARG2_lower_or_UPPER_ARG1('s', *(s+1)))
+                    && isALPHA_FOLD_EQ(*s, 's')
+                    && isALPHA_FOLD_EQ(*(s+1), 's'))
                  {
  
                      /* EXACTF nodes need to know that the minimum length
                  {
  
                      /* EXACTF nodes need to know that the minimum length
@@ -3578,7 +3609,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recursed: which subroutines have we recursed into */
                         /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
  {
                         /* recursed: which subroutines have we recursed into */
                         /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
  {
-    dVAR;
      /* There must be at least this number of characters to match */
      SSize_t min = 0;
      I32 pars = 0, code;
      /* There must be at least this number of characters to match */
      SSize_t min = 0;
      I32 pars = 0, code;
@@ -4237,7 +4267,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
               * can't match null string */
             if (flags & SCF_DO_STCLASS_AND) {
                  ssc_cp_and(data->start_class, uc);
               * can't match null string */
             if (flags & SCF_DO_STCLASS_AND) {
                  ssc_cp_and(data->start_class, uc);
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
                  ssc_clear_locale(data->start_class);
             }
             else if (flags & SCF_DO_STCLASS_OR) {
                  ssc_clear_locale(data->start_class);
             }
             else if (flags & SCF_DO_STCLASS_OR) {
@@ -4245,15 +4275,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
                 ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
             }
             flags &= ~SCF_DO_STCLASS;
         }
             }
             flags &= ~SCF_DO_STCLASS;
         }
-       else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT! */
+        else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT!, so is
+                                                     EXACTFish */
             SSize_t l = STR_LEN(scan);
             UV uc = *((U8*)STRING(scan));
              SV* EXACTF_invlist = _new_invlist(4); /* Start out big enough for 2
                                                       separate code points */
             SSize_t l = STR_LEN(scan);
             UV uc = *((U8*)STRING(scan));
              SV* EXACTF_invlist = _new_invlist(4); /* Start out big enough for 2
                                                       separate code points */
+            const U8 * s = (U8*)STRING(scan);
  
             /* Search for fixed substrings supports EXACT only. */
             if (flags & SCF_DO_SUBSTR) {
  
             /* Search for fixed substrings supports EXACT only. */
             if (flags & SCF_DO_SUBSTR) {
@@ -4261,7 +4293,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  scan_commit(pRExC_state, data, minlenp, is_inf);
             }
             if (UTF) {
                  scan_commit(pRExC_state, data, minlenp, is_inf);
             }
             if (UTF) {
-               const U8 * const s = (U8 *)STRING(scan);
                 uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
             }
                 uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
             }
@@ -4281,76 +4312,145 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     data->longest = &(data->longest_float);
                 }
             }
                     data->longest = &(data->longest_float);
                 }
             }
-            if (OP(scan) == EXACTFL) {
  
  
-                /* We don't know what the folds are; it could be anything. XXX
-                 * Actually, we only support UTF-8 encoding for code points
-                 * above Latin1, so we could know what those folds are. */
-                EXACTF_invlist = _add_range_to_invlist(EXACTF_invlist,
-                                                       0,
-                                                       UV_MAX);
+            if (OP(scan) != EXACTFL && flags & SCF_DO_STCLASS_AND) {
+                ssc_clear_locale(data->start_class);
              }
              }
-            else {  /* Non-locale EXACTFish */
-                EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, uc);
-                if (flags & SCF_DO_STCLASS_AND) {
-                    ssc_clear_locale(data->start_class);
+
+            if (! UTF) {
+
+                /* We punt and assume can match anything if the node begins
+                 * with a multi-character fold.  Things are complicated.  For
+                 * example, /ffi/i could match any of:
+                 *  "\N{LATIN SMALL LIGATURE FFI}"
+                 *  "\N{LATIN SMALL LIGATURE FF}I"
+                 *  "F\N{LATIN SMALL LIGATURE FI}"
+                 *  plus several other things; and making sure we have all the
+                 *  possibilities is hard. */
+                if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + STR_LEN(scan))) {
+                    EXACTF_invlist =
+                             _add_range_to_invlist(EXACTF_invlist, 0, UV_MAX);
                  }
                  }
-                if (uc < 256) { /* We know what the Latin1 folds are ... */
-                    if (IS_IN_SOME_FOLD_L1(uc)) {   /* For instance, we
-                                                       know if anything folds
-                                                       with this */
-                        EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist,
+                else {
+
+                    /* Any Latin1 range character can potentially match any
+                     * other depending on the locale */
+                    if (OP(scan) == EXACTFL) {
+                        _invlist_union(EXACTF_invlist, PL_Latin1,
+                                                              &EXACTF_invlist);
+                    }
+                    else {
+                        /* But otherwise, it matches at least itself.  We can
+                         * quickly tell if it has a distinct fold, and if so,
+                         * it matches that as well */
+                        EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, uc);
+                        if (IS_IN_SOME_FOLD_L1(uc)) {
+                            EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist,
                                                             PL_fold_latin1[uc]);
                                                             PL_fold_latin1[uc]);
-                        if (OP(scan) != EXACTFA) { /* The folds below aren't
-                                                      legal under /iaa */
-                            if (isARG2_lower_or_UPPER_ARG1('s', uc)) {
-                                EXACTF_invlist
-                                    = add_cp_to_invlist(EXACTF_invlist,
-                                                LATIN_SMALL_LETTER_SHARP_S);
-                            }
-                            else if (uc == LATIN_SMALL_LETTER_SHARP_S) {
-                                EXACTF_invlist
-                                    = add_cp_to_invlist(EXACTF_invlist, 's');
-                                EXACTF_invlist
-                                    = add_cp_to_invlist(EXACTF_invlist, 'S');
-                            }
                          }
                          }
+                    }
  
  
-                        /* We also know if there are above-Latin1 code points
-                         * that fold to this (none legal for ASCII and /iaa) */
-                        if ((! isASCII(uc) || OP(scan) != EXACTFA)
-                            && HAS_NONLATIN1_FOLD_CLOSURE(uc))
-                        {
-                            /* XXX We could know exactly what does fold to this
-                             * if the reverse folds are loaded, as currently in
-                             * S_regclass() */
-                            _invlist_union(EXACTF_invlist,
-                                           PL_AboveLatin1,
-                                           &EXACTF_invlist);
+                    /* Some characters match above-Latin1 ones under /i.  This
+                     * is true of EXACTFL ones when the locale is UTF-8 */
+                    if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
+                        && (! isASCII(uc) || (OP(scan) != EXACTFA
+                                            && OP(scan) != EXACTFA_NO_TRIE)))
+                    {
+                        add_above_Latin1_folds(pRExC_state,
+                                               (U8) uc,
+                                               &EXACTF_invlist);
+                    }
+                }
+            }
+            else {  /* Pattern is UTF-8 */
+                U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+                STRLEN foldlen = UTF8SKIP(s);
+                const U8* e = s + STR_LEN(scan);
+                SV** listp;
+
+                /* The only code points that aren't folded in a UTF EXACTFish
+                 * node are are the problematic ones in EXACTFL nodes */
+                if (OP(scan) == EXACTFL
+                    && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc))
+                {
+                    /* We need to check for the possibility that this EXACTFL
+                     * node begins with a multi-char fold.  Therefore we fold
+                     * the first few characters of it so that we can make that
+                     * check */
+                    U8 *d = folded;
+                    int i;
+
+                    for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
+                        if (isASCII(*s)) {
+                            *(d++) = (U8) toFOLD(*s);
+                            s++;
+                        }
+                        else {
+                            STRLEN len;
+                            to_utf8_fold(s, d, &len);
+                            d += len;
+                            s += UTF8SKIP(s);
                          }
                      }
                          }
                      }
+
+                    /* And set up so the code below that looks in this folded
+                     * buffer instead of the node's string */
+                    e = d;
+                    foldlen = UTF8SKIP(folded);
+                    s = folded;
                  }
                  }
-                else {  /* Non-locale, above Latin1.  XXX We don't currently
-                           know what participates in folds with this, so have
-                           to assume anything could */
-
-                    /* XXX We could know exactly what does fold to this if the
-                     * reverse folds are loaded, as currently in S_regclass().
-                     * But we do know that under /iaa nothing in the ASCII
-                     * range can participate */
-                    if (OP(scan) == EXACTFA) {
-                        _invlist_union_complement_2nd(EXACTF_invlist,
-                                                      PL_XPosix_ptrs[_CC_ASCII],
-                                                      &EXACTF_invlist);
+
+                /* When we reach here 's' points to the fold of the first
+                 * character(s) of the node; and 'e' points to far enough along
+                 * the folded string to be just past any possible multi-char
+                 * fold. 'foldlen' is the length in bytes of the first
+                 * character in 's'
+                 *
+                 * Unlike the non-UTF-8 case, the macro for determining if a
+                 * string is a multi-char fold requires all the characters to
+                 * already be folded.  This is because of all the complications
+                 * if not.  Note that they are folded anyway, except in EXACTFL
+                 * nodes.  Like the non-UTF case above, we punt if the node
+                 * begins with a multi-char fold  */
+
+                if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
+                    EXACTF_invlist =
+                             _add_range_to_invlist(EXACTF_invlist, 0, UV_MAX);
+                }
+                else {  /* Single char fold */
+
+                    /* It matches all the things that fold to it, which are
+                     * found in PL_utf8_foldclosures (including itself) */
+                    EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, uc);
+                    if (! PL_utf8_foldclosures) {
+                        _load_PL_utf8_foldclosures();
                      }
                      }
-                    else {
-                        EXACTF_invlist = _add_range_to_invlist(EXACTF_invlist,
-                                                               0, UV_MAX);
+                    if ((listp = hv_fetch(PL_utf8_foldclosures,
+                                        (char *) s, foldlen, FALSE)))
+                    {
+                        AV* list = (AV*) *listp;
+                        IV k;
+                        for (k = 0; k <= av_tindex(list); k++) {
+                            SV** c_p = av_fetch(list, k, FALSE);
+                            UV c;
+                            assert(c_p);
+
+                            c = SvUV(*c_p);
+
+                            /* /aa doesn't allow folds between ASCII and non- */
+                            if ((OP(scan) == EXACTFA || OP(scan) == EXACTFA_NO_TRIE)
+                                && isASCII(c) != isASCII(uc))
+                            {
+                                continue;
+                            }
+
+                            EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, c);
+                        }
                      }
                  }
              }
             if (flags & SCF_DO_STCLASS_AND) {
                      }
                  }
              }
             if (flags & SCF_DO_STCLASS_AND) {
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
                  ANYOF_POSIXL_ZERO(data->start_class);
                  ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
             }
                  ANYOF_POSIXL_ZERO(data->start_class);
                  ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
             }
@@ -4359,7 +4459,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
                 ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
             }
             flags &= ~SCF_DO_STCLASS;
              SvREFCNT_dec(EXACTF_invlist);
             }
             flags &= ~SCF_DO_STCLASS;
              SvREFCNT_dec(EXACTF_invlist);
@@ -4478,7 +4578,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         flags &= ~SCF_DO_STCLASS_AND;
                         StructCopy(&this_class, data->start_class, regnode_ssc);
                         flags |= SCF_DO_STCLASS_OR;
                         flags &= ~SCF_DO_STCLASS_AND;
                         StructCopy(&this_class, data->start_class, regnode_ssc);
                         flags |= SCF_DO_STCLASS_OR;
-                        ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
+                        ANYOF_FLAGS(data->start_class)
+                                                |= SSC_MATCHES_EMPTY_STRING;
                     }
                 } else {                /* Non-zero len */
                     if (flags & SCF_DO_STCLASS_OR) {
                     }
                 } else {                /* Non-zero len */
                     if (flags & SCF_DO_STCLASS_OR) {
@@ -4693,13 +4794,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* It is counted once already... */
                     data->pos_min += minnext * (mincount - counted);
  #if 0
                     /* It is counted once already... */
                     data->pos_min += minnext * (mincount - counted);
  #if 0
-PerlIO_printf(Perl_debug_log, "counted=%"UVdf" deltanext=%"UVdf
-                              " SSize_t_MAX=%"UVdf" minnext=%"UVdf
-                              " maxcount=%"UVdf" mincount=%"UVdf"\n",
+PerlIO_printf(Perl_debug_log, "counted=%"UVuf" deltanext=%"UVuf
+                              " SSize_t_MAX=%"UVuf" minnext=%"UVuf
+                              " maxcount=%"UVuf" mincount=%"UVuf"\n",
      (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
      (UV)mincount);
  if (deltanext != SSize_t_MAX)
      (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
      (UV)mincount);
  if (deltanext != SSize_t_MAX)
-PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
+PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
      (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
            - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
  #endif
      (UV)(-counted * deltanext + (minnext + deltanext) * maxcount
            - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
  #endif
@@ -4774,7 +4875,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                      ssc_intersection(data->start_class,
                                      PL_XPosix_ptrs[_CC_VERTSPACE], FALSE);
                      ssc_clear_locale(data->start_class);
                      ssc_intersection(data->start_class,
                                      PL_XPosix_ptrs[_CC_VERTSPACE], FALSE);
                      ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
                  }
                  else if (flags & SCF_DO_STCLASS_OR) {
                      ssc_union(data->start_class,
                  }
                  else if (flags & SCF_DO_STCLASS_OR) {
                      ssc_union(data->start_class,
@@ -4784,7 +4886,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
  
                      /* See commit msg for
                       * 749e076fceedeb708a624933726e7989f2302f6a */
  
                      /* See commit msg for
                       * 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
                  }
                 flags &= ~SCF_DO_STCLASS;
              }
                  }
                 flags &= ~SCF_DO_STCLASS;
              }
@@ -4811,7 +4914,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                  U8 namedclass;
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
                  U8 namedclass;
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
  
                 /* Some of the logic below assumes that switching
                    locale on will only add false positives. */
  
                 /* Some of the logic below assumes that switching
                    locale on will only add false positives. */
@@ -4959,7 +5062,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
         else if (  PL_regkind[OP(scan)] == BRANCHJ
                  /* Lookbehind, or need to calculate parens/evals/stclass: */
                    && (scan->flags || data || (flags & SCF_DO_STCLASS))
         else if (  PL_regkind[OP(scan)] == BRANCHJ
                  /* Lookbehind, or need to calculate parens/evals/stclass: */
                    && (scan->flags || data || (flags & SCF_DO_STCLASS))
-                  && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
+                  && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
+        {
              if ( OP(scan) == UNLESSM &&
                   scan->flags == 0 &&
                   OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
              if ( OP(scan) == UNLESSM &&
                   scan->flags == 0 &&
                   OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
@@ -5047,8 +5151,12 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                          */
                         ssc_init(pRExC_state, data->start_class);
                     }  else {
                          */
                         ssc_init(pRExC_state, data->start_class);
                     }  else {
-                       /* AND before and after: combine and continue */
+                        /* AND before and after: combine and continue.  These
+                         * assertions are zero-length, so can match an EMPTY
+                         * string */
                         ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
                         ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                        ANYOF_FLAGS(data->start_class)
+                                                   |= SSC_MATCHES_EMPTY_STRING;
                     }
                  }
             }
                     }
                  }
             }
@@ -5120,6 +5228,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
  
                  if (f & SCF_DO_STCLASS_AND) {
                      ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
  
                  if (f & SCF_DO_STCLASS_AND) {
                      ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                    ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
                  }
                  if (data) {
                      if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
                  }
                  if (data) {
                      if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -5430,12 +5539,12 @@ S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
      return count;
  }
  
      return count;
  }
  
-/*XXX: todo make this not included in a non debugging perl */
+/*XXX: todo make this not included in a non debugging perl, but appears to be
+ * used anyway there, in 'use re' */
  #ifndef PERL_IN_XSUB_RE
  void
  Perl_reginitcolors(pTHX)
  {
  #ifndef PERL_IN_XSUB_RE
  void
  Perl_reginitcolors(pTHX)
  {
-    dVAR;
      const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
      if (s) {
         char *t = savepv(s);
      const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
      if (s) {
         char *t = savepv(s);
@@ -5489,8 +5598,6 @@ Perl_reginitcolors(pTHX)
  regexp_engine const *
  Perl_current_re_engine(pTHX)
  {
  regexp_engine const *
  Perl_current_re_engine(pTHX)
  {
-    dVAR;
-
      if (IN_PERL_COMPILETIME) {
         HV * const table = GvHV(PL_hintgv);
         SV **ptr;
      if (IN_PERL_COMPILETIME) {
         HV * const table = GvHV(PL_hintgv);
         SV **ptr;
@@ -5517,7 +5624,6 @@ Perl_current_re_engine(pTHX)
  REGEXP *
  Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  {
  REGEXP *
  Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  {
-    dVAR;
      regexp_engine const *eng = current_re_engine();
      GET_RE_DEBUG_FLAGS_DECL;
  
      regexp_engine const *eng = current_re_engine();
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -5564,9 +5670,9 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
                     char **pat_p, STRLEN *plen_p, int num_code_blocks)
  {
      U8 *const src = (U8*)*pat_p;
                     char **pat_p, STRLEN *plen_p, int num_code_blocks)
  {
      U8 *const src = (U8*)*pat_p;
-    U8 *dst;
+    U8 *dst, *d;
      int n=0;
      int n=0;
-    STRLEN s = 0, d = 0;
+    STRLEN s = 0;
      bool do_end = 0;
      GET_RE_DEBUG_FLAGS_DECL;
  
      bool do_end = 0;
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -5574,32 +5680,27 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  
      Newx(dst, *plen_p * 2 + 1, U8);
          "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  
      Newx(dst, *plen_p * 2 + 1, U8);
+    d = dst;
  
      while (s < *plen_p) {
  
      while (s < *plen_p) {
-        if (NATIVE_BYTE_IS_INVARIANT(src[s]))
-            dst[d]   = src[s];
-        else {
-            dst[d++] = UTF8_EIGHT_BIT_HI(src[s]);
-            dst[d]   = UTF8_EIGHT_BIT_LO(src[s]);
-        }
+        append_utf8_from_native_byte(src[s], &d);
          if (n < num_code_blocks) {
              if (!do_end && pRExC_state->code_blocks[n].start == s) {
          if (n < num_code_blocks) {
              if (!do_end && pRExC_state->code_blocks[n].start == s) {
-                pRExC_state->code_blocks[n].start = d;
-                assert(dst[d] == '(');
+                pRExC_state->code_blocks[n].start = d - dst - 1;
+                assert(*(d - 1) == '(');
                  do_end = 1;
              }
              else if (do_end && pRExC_state->code_blocks[n].end == s) {
                  do_end = 1;
              }
              else if (do_end && pRExC_state->code_blocks[n].end == s) {
-                pRExC_state->code_blocks[n].end = d;
-                assert(dst[d] == ')');
+                pRExC_state->code_blocks[n].end = d - dst - 1;
+                assert(*(d - 1) == ')');
                  do_end = 0;
                  n++;
              }
          }
          s++;
                  do_end = 0;
                  n++;
              }
          }
          s++;
-        d++;
      }
      }
-    dst[d] = '\0';
-    *plen_p = d;
+    *d = '\0';
+    *plen_p = d - dst;
      *pat_p = (char*) dst;
      SAVEFREEPV(*pat_p);
      RExC_orig_utf8 = RExC_utf8 = 1;
      *pat_p = (char*) dst;
      SAVEFREEPV(*pat_p);
      RExC_orig_utf8 = RExC_utf8 = 1;
@@ -5633,7 +5734,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
      /* if we know we have at least two args, create an empty string,
       * then concatenate args to that. For no args, return an empty string */
      if (!pat && pat_count != 1) {
      /* if we know we have at least two args, create an empty string,
       * then concatenate args to that. For no args, return an empty string */
      if (!pat && pat_count != 1) {
-        pat = newSVpvn("", 0);
+        pat = newSVpvs("");
          SAVEFREESV(pat);
          alloced = TRUE;
      }
          SAVEFREESV(pat);
          alloced = TRUE;
      }
@@ -5669,7 +5770,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
              if (oplist) {
                  assert(oplist->op_type == OP_PADAV
                      || oplist->op_type == OP_RV2AV);
              if (oplist) {
                  assert(oplist->op_type == OP_PADAV
                      || oplist->op_type == OP_RV2AV);
-                oplist = oplist->op_sibling;;
+                oplist = OP_SIBLING(oplist);
              }
  
              if (SvRMAGICAL(av)) {
              }
  
              if (SvRMAGICAL(av)) {
@@ -5716,10 +5817,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  pRExC_state->code_blocks[n].src_regex = NULL;
                  n++;
                  code = 1;
                  pRExC_state->code_blocks[n].src_regex = NULL;
                  n++;
                  code = 1;
-                oplist = oplist->op_sibling; /* skip CONST */
+                oplist = OP_SIBLING(oplist); /* skip CONST */
                  assert(oplist);
              }
                  assert(oplist);
              }
-            oplist = oplist->op_sibling;;
+            oplist = OP_SIBLING(oplist);;
          }
  
         /* apply magic and QR overloading to arg */
          }
  
         /* apply magic and QR overloading to arg */
@@ -5839,6 +5940,8 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  {
      int n = 0;
      STRLEN s;
  {
      int n = 0;
      STRLEN s;
+    
+    PERL_UNUSED_CONTEXT;
  
      for (s = 0; s < plen; s++) {
         if (n < pRExC_state->num_code_blocks
  
      for (s = 0; s < plen; s++) {
         if (n < pRExC_state->num_code_blocks
@@ -5951,7 +6054,6 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  
         ENTER;
         SAVETMPS;
  
         ENTER;
         SAVETMPS;
-       save_re_context();
         PUSHSTACKi(PERLSI_REQUIRE);
          /* G_RE_REPARSING causes the toker to collapse \\ into \ when
           * parsing qr''; normally only q'' does this. It also alters
         PUSHSTACKi(PERLSI_REQUIRE);
          /* G_RE_REPARSING causes the toker to collapse \\ into \ when
           * parsing qr''; normally only q'' does this. It also alters
@@ -6154,7 +6256,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                     OP *expr, const regexp_engine* eng, REGEXP *old_re,
                      bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
  {
                     OP *expr, const regexp_engine* eng, REGEXP *old_re,
                      bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
  {
-    dVAR;
      REGEXP *rx;
      struct regexp *r;
      regexp_internal *ri;
      REGEXP *rx;
      struct regexp *r;
      regexp_internal *ri;
@@ -6201,6 +6302,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
          PL_HasMultiCharFold =
                         _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
          PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
          PL_HasMultiCharFold =
                         _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
+
+        /* This is calculated here, because the Perl program that generates the
+         * static global ones doesn't currently have access to
+         * NUM_ANYOF_CODE_POINTS */
+       PL_InBitmap = _new_invlist(2);
+       PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
+                                                    NUM_ANYOF_CODE_POINTS - 1);
      }
  #endif
  
      }
  #endif
  
@@ -6216,7 +6324,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         OP *o;
         int ncode = 0;
  
         OP *o;
         int ncode = 0;
  
-       for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling)
+       for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o))
             if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
                 ncode++; /* count of DO blocks */
         if (ncode) {
             if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
                 ncode++; /* count of DO blocks */
         if (ncode) {
@@ -6237,7 +6345,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (expr->op_type == OP_CONST)
              n = 1;
          else
          if (expr->op_type == OP_CONST)
              n = 1;
          else
-            for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+            for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o)) {
                  if (o->op_type == OP_CONST)
                      n++;
              }
                  if (o->op_type == OP_CONST)
                      n++;
              }
@@ -6253,7 +6361,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (expr->op_type == OP_CONST)
              new_patternp[n] = cSVOPx_sv(expr);
          else
          if (expr->op_type == OP_CONST)
              new_patternp[n] = cSVOPx_sv(expr);
          else
-            for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+            for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o)) {
                  if (o->op_type == OP_CONST)
                      new_patternp[n++] = cSVOPo_sv;
              }
                  if (o->op_type == OP_CONST)
                      new_patternp[n++] = cSVOPo_sv;
              }
@@ -6273,7 +6381,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              assert(   expr->op_type == OP_PUSHMARK
                     || (expr->op_type == OP_NULL && expr->op_targ == OP_PUSHMARK)
                     || expr->op_type == OP_PADRANGE);
              assert(   expr->op_type == OP_PUSHMARK
                     || (expr->op_type == OP_NULL && expr->op_targ == OP_PUSHMARK)
                     || expr->op_type == OP_PADRANGE);
-            expr = expr->op_sibling;
+            expr = OP_SIBLING(expr);
      }
  
      pat = S_concat_pat(aTHX_ pRExC_state, NULL, new_patternp, pat_count,
      }
  
      pat = S_concat_pat(aTHX_ pRExC_state, NULL, new_patternp, pat_count,
@@ -6751,22 +6859,8 @@ reStudy:
         else if (PL_regkind[OP(first)] == TRIE &&
                 ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
         {
         else if (PL_regkind[OP(first)] == TRIE &&
                 ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
         {
-           regnode *trie_op;
-           /* this can happen only on restudy */
-           if ( OP(first) == TRIE ) {
-                struct regnode_1 *trieop = (struct regnode_1 *)
-                   PerlMemShared_calloc(1, sizeof(struct regnode_1));
-                StructCopy(first,trieop,struct regnode_1);
-                trie_op=(regnode *)trieop;
-            } else {
-                struct regnode_charclass *trieop = (struct regnode_charclass *)
-                   PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
-                StructCopy(first,trieop,struct regnode_charclass);
-                trie_op=(regnode *)trieop;
-            }
-            OP(trie_op)+=2;
-            make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
-           ri->regstclass = trie_op;
+            /* this can happen only on restudy */
+            ri->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0);
         }
  #endif
         else if (REGNODE_SIMPLE(OP(first)))
         }
  #endif
         else if (REGNODE_SIMPLE(OP(first)))
@@ -6777,9 +6871,7 @@ reStudy:
         else if (PL_regkind[OP(first)] == BOL) {
              r->intflags |= (OP(first) == MBOL
                             ? PREGf_ANCH_MBOL
         else if (PL_regkind[OP(first)] == BOL) {
              r->intflags |= (OP(first) == MBOL
                             ? PREGf_ANCH_MBOL
-                          : (OP(first) == SBOL
-                              ? PREGf_ANCH_SBOL
-                              : PREGf_ANCH_BOL));
+                           : PREGf_ANCH_SBOL);
             first = NEXTOPER(first);
             goto again;
         }
             first = NEXTOPER(first);
             goto again;
         }
@@ -6789,6 +6881,7 @@ reStudy:
             goto again;
         }
         else if ((!sawopen || !RExC_sawback) &&
             goto again;
         }
         else if ((!sawopen || !RExC_sawback) &&
+            !sawlookahead &&
             (OP(first) == STAR &&
             PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
              !(r->intflags & PREGf_ANCH) && !pRExC_state->num_code_blocks)
             (OP(first) == STAR &&
             PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
              !(r->intflags & PREGf_ANCH) && !pRExC_state->num_code_blocks)
@@ -6931,7 +7024,7 @@ reStudy:
  
         if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
             && stclass_flag
  
         if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
             && stclass_flag
-            && ! (ANYOF_FLAGS(data.start_class) & ANYOF_EMPTY_STRING)
+            && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && !ssc_is_anything(data.start_class))
         {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
             && !ssc_is_anything(data.start_class))
         {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
@@ -7011,7 +7104,7 @@ reStudy:
         r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
                 = r->float_substr = r->float_utf8 = NULL;
  
         r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
                 = r->float_substr = r->float_utf8 = NULL;
  
-        if (! (ANYOF_FLAGS(data.start_class) & ANYOF_EMPTY_STRING)
+        if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
              && ! ssc_is_anything(data.start_class))
          {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
              && ! ssc_is_anything(data.start_class))
          {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
@@ -7044,8 +7137,8 @@ reStudy:
      /* Guard against an embedded (?=) or (?<=) with a longer minlen than
         the "real" pattern. */
      DEBUG_OPTIMISE_r({
      /* Guard against an embedded (?=) or (?<=) with a longer minlen than
         the "real" pattern. */
      DEBUG_OPTIMISE_r({
-        PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%ld\n",
-                      (IV)minlen, (IV)r->minlen, RExC_maxlen);
+        PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n",
+                      (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen);
      });
      r->minlenret = minlen;
      if (r->minlen < minlen)
      });
      r->minlenret = minlen;
      if (r->minlen < minlen)
@@ -7095,7 +7188,12 @@ reStudy:
  
          if (PL_regkind[fop] == NOTHING && nop == END)
              r->extflags |= RXf_NULL;
  
          if (PL_regkind[fop] == NOTHING && nop == END)
              r->extflags |= RXf_NULL;
-        else if (PL_regkind[fop] == BOL && nop == END)
+        else if ((fop == MBOL || (fop == SBOL && !first->flags)) && nop == END)
+            /* when fop is SBOL first->flags will be true only when it was
+             * produced by parsing /\A/, and not when parsing /^/. This is
+             * very important for the split code as there we want to
+             * treat /^/ as /^/m, but we do not want to treat /\A/ as /^/m.
+             * See rt #122761 for more details. -- Yves */
              r->extflags |= RXf_START_ONLY;
          else if (fop == PLUS
                   && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE
              r->extflags |= RXf_START_ONLY;
          else if (fop == PLUS
                   && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE
@@ -7759,7 +7857,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  /* The header definitions are in F<inline_invlist.c> */
  
  PERL_STATIC_INLINE UV*
  /* The header definitions are in F<inline_invlist.c> */
  
  PERL_STATIC_INLINE UV*
-S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
+S__invlist_array_init(SV* const invlist, const bool will_have_0)
  {
      /* Returns a pointer to the first element in the inversion list's array.
       * This is called upon initialization of an inversion list.  Where the
  {
      /* Returns a pointer to the first element in the inversion list's array.
       * This is called upon initialization of an inversion list.  Where the
@@ -7785,7 +7883,7 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
  }
  
  PERL_STATIC_INLINE UV*
  }
  
  PERL_STATIC_INLINE UV*
-S_invlist_array(pTHX_ SV* const invlist)
+S_invlist_array(SV* const invlist)
  {
      /* Returns the pointer to the inversion list's array.  Every time the
       * length changes, this needs to be called in case malloc or realloc moved
  {
      /* Returns the pointer to the inversion list's array.  Every time the
       * length changes, this needs to be called in case malloc or realloc moved
@@ -7810,7 +7908,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
      /* Sets the current number of elements stored in the inversion list.
       * Updates SvCUR correspondingly */
  {
      /* Sets the current number of elements stored in the inversion list.
       * Updates SvCUR correspondingly */
-
+    PERL_UNUSED_CONTEXT;
      PERL_ARGS_ASSERT_INVLIST_SET_LEN;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
      PERL_ARGS_ASSERT_INVLIST_SET_LEN;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
@@ -7823,11 +7921,10 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  }
  
  PERL_STATIC_INLINE IV*
  }
  
  PERL_STATIC_INLINE IV*
-S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
+S_get_invlist_previous_index_addr(SV* invlist)
  {
      /* Return the address of the IV that is reserved to hold the cached index
       * */
  {
      /* Return the address of the IV that is reserved to hold the cached index
       * */
-
      PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
      PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
@@ -7836,7 +7933,7 @@ S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
  }
  
  PERL_STATIC_INLINE IV
  }
  
  PERL_STATIC_INLINE IV
-S_invlist_previous_index(pTHX_ SV* const invlist)
+S_invlist_previous_index(SV* const invlist)
  {
      /* Returns cached index of previous search */
  
  {
      /* Returns cached index of previous search */
  
@@ -7846,7 +7943,7 @@ S_invlist_previous_index(pTHX_ SV* const invlist)
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
+S_invlist_set_previous_index(SV* const invlist, const IV index)
  {
      /* Caches <index> for later retrieval */
  
  {
      /* Caches <index> for later retrieval */
  
@@ -7858,7 +7955,7 @@ S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
  }
  
  PERL_STATIC_INLINE UV
  }
  
  PERL_STATIC_INLINE UV
-S_invlist_max(pTHX_ SV* const invlist)
+S_invlist_max(SV* const invlist)
  {
      /* Returns the maximum number of elements storable in the inversion list's
       * array, without having to realloc() */
  {
      /* Returns the maximum number of elements storable in the inversion list's
       * array, without having to realloc() */
@@ -7977,7 +8074,7 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_invlist_trim(pTHX_ SV* const invlist)
+S_invlist_trim(SV* const invlist)
  {
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
  {
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
@@ -8076,7 +8173,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
  #ifndef PERL_IN_XSUB_RE
  
  IV
  #ifndef PERL_IN_XSUB_RE
  
  IV
-Perl__invlist_search(pTHX_ SV* const invlist, const UV cp)
+Perl__invlist_search(SV* const invlist, const UV cp)
  {
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
  {
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
@@ -8164,8 +8261,8 @@ Perl__invlist_search(pTHX_ SV* const invlist, const UV cp)
  }
  
  void
  }
  
  void
-Perl__invlist_populate_swatch(pTHX_ SV* const invlist,
-                                    const UV start, const UV end, U8* swatch)
+Perl__invlist_populate_swatch(SV* const invlist,
+                              const UV start, const UV end, U8* swatch)
  {
      /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
       * but is used when the swash has an inversion list.  This makes this much
  {
      /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
       * but is used when the swash has an inversion list.  This makes this much
@@ -8877,7 +8974,7 @@ S_invlist_clone(pTHX_ SV* const invlist)
  }
  
  PERL_STATIC_INLINE STRLEN*
  }
  
  PERL_STATIC_INLINE STRLEN*
-S_get_invlist_iter_addr(pTHX_ SV* invlist)
+S_get_invlist_iter_addr(SV* invlist)
  {
      /* Return the address of the UV that contains the current iteration
       * position */
  {
      /* Return the address of the UV that contains the current iteration
       * position */
@@ -8890,7 +8987,7 @@ S_get_invlist_iter_addr(pTHX_ SV* invlist)
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_invlist_iterinit(pTHX_ SV* invlist)  /* Initialize iterator for invlist */
+S_invlist_iterinit(SV* invlist)        /* Initialize iterator for invlist */
  {
      PERL_ARGS_ASSERT_INVLIST_ITERINIT;
  
  {
      PERL_ARGS_ASSERT_INVLIST_ITERINIT;
  
@@ -8898,7 +8995,7 @@ S_invlist_iterinit(pTHX_ SV* invlist)     /* Initialize iterator for invlist */
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_invlist_iterfinish(pTHX_ SV* invlist)
+S_invlist_iterfinish(SV* invlist)
  {
      /* Terminate iterator for invlist.  This is to catch development errors.
       * Any iteration that is interrupted before completed should call this
  {
      /* Terminate iterator for invlist.  This is to catch development errors.
       * Any iteration that is interrupted before completed should call this
@@ -8914,7 +9011,7 @@ S_invlist_iterfinish(pTHX_ SV* invlist)
  }
  
  STATIC bool
  }
  
  STATIC bool
-S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
+S_invlist_iternext(SV* invlist, UV* start, UV* end)
  {
      /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
       * This call sets in <*start> and <*end>, the next range in <invlist>.
  {
      /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
       * This call sets in <*start> and <*end>, the next range in <invlist>.
@@ -8949,7 +9046,7 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
  }
  
  PERL_STATIC_INLINE bool
  }
  
  PERL_STATIC_INLINE bool
-S_invlist_is_iterating(pTHX_ SV* const invlist)
+S_invlist_is_iterating(SV* const invlist)
  {
      PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
  
  {
      PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
  
@@ -8957,7 +9054,7 @@ S_invlist_is_iterating(pTHX_ SV* const invlist)
  }
  
  PERL_STATIC_INLINE UV
  }
  
  PERL_STATIC_INLINE UV
-S_invlist_highest(pTHX_ SV* const invlist)
+S_invlist_highest(SV* const invlist)
  {
      /* Returns the highest code point that matches an inversion list.  This API
       * has an ambiguity, as it returns 0 under either the highest is actually
  {
      /* Returns the highest code point that matches an inversion list.  This API
       * has an ambiguity, as it returns 0 under either the highest is actually
@@ -9069,6 +9166,23 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
          count += 2;
      }
  }
          count += 2;
      }
  }
+
+void
+Perl__load_PL_utf8_foldclosures (pTHX)
+{
+    assert(! PL_utf8_foldclosures);
+
+    /* If the folds haven't been read in, call a fold function
+     * to force that */
+    if (! PL_utf8_tofold) {
+        U8 dummy[UTF8_MAXBYTES_CASE+1];
+
+        /* This string is just a short named one above \xff */
+        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
+        assert(PL_utf8_tofold); /* Verify that worked */
+    }
+    PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
+}
  #endif
  
  #ifdef PERL_ARGS_ASSERT__INVLISTEQ
  #endif
  
  #ifdef PERL_ARGS_ASSERT__INVLISTEQ
@@ -9268,7 +9382,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  /*NOTREACHED*/
              case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
                  /*NOTREACHED*/
              case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
-                if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                if (PASS2 && ckWARN(WARN_REGEXP)) {
                      const I32 wflagbit = *RExC_parse == 'o'
                                           ? WASTED_O
                                           : WASTED_G;
                      const I32 wflagbit = *RExC_parse == 'o'
                                           ? WASTED_O
                                           : WASTED_G;
@@ -9288,7 +9402,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  break;
  
              case CONTINUE_PAT_MOD: /* 'c' */
                  break;
  
              case CONTINUE_PAT_MOD: /* 'c' */
-                if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                if (PASS2 && ckWARN(WARN_REGEXP)) {
                      if (! (wastedflags & WASTED_C) ) {
                          wastedflags |= WASTED_GC;
                         /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
                      if (! (wastedflags & WASTED_C) ) {
                          wastedflags |= WASTED_GC;
                         /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
@@ -9303,7 +9417,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  break;
              case KEEPCOPY_PAT_MOD: /* 'p' */
                  if (flagsp == &negflags) {
                  break;
              case KEEPCOPY_PAT_MOD: /* 'p' */
                  if (flagsp == &negflags) {
-                    if (SIZE_ONLY)
+                    if (PASS2)
                          ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                  } else {
                      *flagsp |= RXf_PMf_KEEPCOPY;
                          ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                  } else {
                      *flagsp |= RXf_PMf_KEEPCOPY;
@@ -9370,7 +9484,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
       * RExC_parse beyond the '('.  Things like '(?' are indivisible tokens, and
       * this flag alerts us to the need to check for that */
  {
       * RExC_parse beyond the '('.  Things like '(?' are indivisible tokens, and
       * this flag alerts us to the need to check for that */
  {
-    dVAR;
      regnode *ret;              /* Will be the head of the group. */
      regnode *br;
      regnode *lastbr;
      regnode *ret;              /* Will be the head of the group. */
      regnode *br;
      regnode *lastbr;
@@ -9382,6 +9495,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      bool is_open = 0;
      I32 freeze_paren = 0;
      I32 after_freeze = 0;
      bool is_open = 0;
      I32 freeze_paren = 0;
      I32 after_freeze = 0;
+    I32 num; /* numeric backreferences */
  
      char * parse_start = RExC_parse; /* MJD */
      char * const oregcomp_parse = RExC_parse;
  
      char * parse_start = RExC_parse; /* MJD */
      char * const oregcomp_parse = RExC_parse;
@@ -9520,6 +9634,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
          else if (*RExC_parse == '?') { /* (?...) */
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
          else if (*RExC_parse == '?') { /* (?...) */
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
+            const char * endptr;
              if (has_intervening_patws) {
                  RExC_parse++;
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
              if (has_intervening_patws) {
                  RExC_parse++;
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
@@ -9689,18 +9804,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             case '@':           /* (?@...) */
                 vFAIL2("Sequence (?%c...) not implemented", (int)paren);
                 break;
             case '@':           /* (?@...) */
                 vFAIL2("Sequence (?%c...) not implemented", (int)paren);
                 break;
-            case '#':           /* (?#...) */
-                /* XXX As soon as we disallow separating the '?' and '*' (by
-                 * spaces or (?#...) comment), it is believed that this case
-                 * will be unreachable and can be removed.  See
-                 * [perl #117327] */
-                while (*RExC_parse && *RExC_parse != ')')
-                   RExC_parse++;
-               if (*RExC_parse != ')')
-                   FAIL("Sequence (?#... not terminated");
-               nextchar(pRExC_state);
-               *flagp = TRYAGAIN;
-               return NULL;
             case '0' :           /* (?0) */
             case 'R' :           /* (?R) */
                 if (*RExC_parse != ')')
             case '0' :           /* (?0) */
             case 'R' :           /* (?R) */
                 if (*RExC_parse != ')')
@@ -9711,8 +9814,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 nextchar(pRExC_state);
                 return ret;
                 /*notreached*/
                 nextchar(pRExC_state);
                 return ret;
                 /*notreached*/
-            { /* named and numeric backreferences */
-                I32 num;
+            /* named and numeric backreferences */
              case '&':            /* (?&NAME) */
                  parse_start = RExC_parse - 1;
                named_recursion:
              case '&':            /* (?&NAME) */
                  parse_start = RExC_parse - 1;
                named_recursion:
@@ -9742,12 +9844,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             case '5': case '6': case '7': case '8': case '9':
                 RExC_parse--;
                parse_recursion:
             case '5': case '6': case '7': case '8': case '9':
                 RExC_parse--;
                parse_recursion:
-               num = atoi(RExC_parse);
-               parse_start = RExC_parse - 1; /* MJD */
-               if (*RExC_parse == '-')
-                   RExC_parse++;
-               while (isDIGIT(*RExC_parse))
-                       RExC_parse++;
+                {
+                    bool is_neg = FALSE;
+                    parse_start = RExC_parse - 1; /* MJD */
+                    if (*RExC_parse == '-') {
+                        RExC_parse++;
+                        is_neg = TRUE;
+                    }
+                    num = grok_atou(RExC_parse, &endptr);
+                    if (endptr)
+                       RExC_parse = (char*)endptr;
+                    if (is_neg) {
+                        /* Some limit for num? */
+                        num = -num;
+                    }
+                }
                 if (*RExC_parse!=')')
                     vFAIL("Expecting close bracket");
  
                 if (*RExC_parse!=')')
                     vFAIL("Expecting close bracket");
  
@@ -9794,7 +9905,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  *flagp |= POSTPONED;
                  nextchar(pRExC_state);
                  return ret;
                  *flagp |= POSTPONED;
                  nextchar(pRExC_state);
                  return ret;
-            } /* named and numeric backreferences */
+
              assert(0); /* NOT REACHED */
  
             case '?':           /* (??...) */
              assert(0); /* NOT REACHED */
  
             case '?':           /* (??...) */
@@ -9887,6 +9998,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                          REGTAIL(pRExC_state, ret, tail);
                         goto insert_if;
                     }
                          REGTAIL(pRExC_state, ret, tail);
                         goto insert_if;
                     }
+                   /* Fall through to ‘Unknown switch condition’ at the
+                      end of the if/else chain. */
                 }
                 else if ( RExC_parse[0] == '<'     /* (?(<NAME>)...) */
                          || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
                 }
                 else if ( RExC_parse[0] == '<'     /* (?(<NAME>)...) */
                          || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
@@ -9924,9 +10037,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_parse++;
                     parno = 0;
                     if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                     RExC_parse++;
                     parno = 0;
                     if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
-                       parno = atoi(RExC_parse++);
-                       while (isDIGIT(*RExC_parse))
-                           RExC_parse++;
+                       parno = grok_atou(RExC_parse, &endptr);
+                       if (endptr)
+                            RExC_parse = (char*)endptr;
                     } else if (RExC_parse[0] == '&') {
                         SV *sv_dat;
                         RExC_parse++;
                     } else if (RExC_parse[0] == '&') {
                         SV *sv_dat;
                         RExC_parse++;
@@ -9943,10 +10056,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      /* (?(1)...) */
                     char c;
                     char *tmp;
                      /* (?(1)...) */
                     char c;
                     char *tmp;
-                   parno = atoi(RExC_parse++);
-
-                   while (isDIGIT(*RExC_parse))
-                       RExC_parse++;
+                   parno = grok_atou(RExC_parse, &endptr);
+                    if (endptr)
+                       RExC_parse = (char*)endptr;
                      ret = reganode(pRExC_state, GROUPP, parno);
  
                   insert_if_check_paren:
                      ret = reganode(pRExC_state, GROUPP, parno);
  
                   insert_if_check_paren:
@@ -10011,10 +10123,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                      but I can't figure out why. -- dmq*/
                     return ret;
                 }
                                      but I can't figure out why. -- dmq*/
                     return ret;
                 }
-               else {
-                    RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
-                    vFAIL("Unknown switch condition (?(...))");
-               }
+                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                vFAIL("Unknown switch condition (?(...))");
             }
             case '[':           /* (?[ ... ]) */
                  return handle_regex_sets(pRExC_state, NULL, flagp, depth,
             }
             case '[':           /* (?[ ... ]) */
                  return handle_regex_sets(pRExC_state, NULL, flagp, depth,
@@ -10291,7 +10401,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
  STATIC regnode *
  S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  {
  STATIC regnode *
  S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  {
-    dVAR;
      regnode *ret;
      regnode *chain = NULL;
      regnode *latest;
      regnode *ret;
      regnode *chain = NULL;
      regnode *latest;
@@ -10373,7 +10482,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
  STATIC regnode *
  S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
  STATIC regnode *
  S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
-    dVAR;
      regnode *ret;
      char op;
      char *next;
      regnode *ret;
      char op;
      char *next;
@@ -10406,7 +10514,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
      op = *RExC_parse;
  
  
      op = *RExC_parse;
  
-    if (op == '{' && regcurly(RExC_parse, FALSE)) {
+    if (op == '{' && regcurly(RExC_parse)) {
         maxpos = NULL;
  #ifdef RE_TRACK_PATTERN_OFFSETS
          parse_start = RExC_parse; /* MJD */
         maxpos = NULL;
  #ifdef RE_TRACK_PATTERN_OFFSETS
          parse_start = RExC_parse; /* MJD */
@@ -10422,15 +10530,16 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             next++;
         }
         if (*next == '}') {             /* got one */
             next++;
         }
         if (*next == '}') {             /* got one */
+            const char* endptr;
             if (!maxpos)
                 maxpos = next;
             RExC_parse++;
             if (!maxpos)
                 maxpos = next;
             RExC_parse++;
-           min = atoi(RExC_parse);
+           min = grok_atou(RExC_parse, &endptr);
             if (*maxpos == ',')
                 maxpos++;
             else
                 maxpos = RExC_parse;
             if (*maxpos == ',')
                 maxpos++;
             else
                 maxpos = RExC_parse;
-           max = atoi(maxpos);
+           max = grok_atou(maxpos, &endptr);
             if (!max && *maxpos != '0')
                 max = REG_INFTY;                /* meaning "infinity" */
             else if (max >= REG_INFTY)
             if (!max && *maxpos != '0')
                 max = REG_INFTY;                /* meaning "infinity" */
             else if (max >= REG_INFTY)
@@ -10440,7 +10549,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              if (max < min) {    /* If can't match, warn and optimize to fail
                                     unconditionally */
                  if (SIZE_ONLY) {
              if (max < min) {    /* If can't match, warn and optimize to fail
                                     unconditionally */
                  if (SIZE_ONLY) {
-                    ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
  
                      /* We can't back off the size because we have to reserve
                       * enough space for all the things we are about to throw
  
                      /* We can't back off the size because we have to reserve
                       * enough space for all the things we are about to throw
@@ -10449,6 +10557,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
                  else {
                      RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
                  else {
+                    ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
                      RExC_emit = orig_emit;
                  }
                  ret = reg_node(pRExC_state, OPFAIL);
                      RExC_emit = orig_emit;
                  }
                  ret = reg_node(pRExC_state, OPFAIL);
@@ -10458,7 +10567,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       && RExC_parse < RExC_end
                       && (*RExC_parse == '?' || *RExC_parse == '+'))
              {
                       && RExC_parse < RExC_end
                       && (*RExC_parse == '?' || *RExC_parse == '+'))
              {
-                if (SIZE_ONLY) {
+                if (PASS2) {
                      ckWARN2reg(RExC_parse + 1,
                                 "Useless use of greediness modifier '%c'",
                                 *RExC_parse);
                      ckWARN2reg(RExC_parse + 1,
                                 "Useless use of greediness modifier '%c'",
                                 *RExC_parse);
@@ -10604,10 +10713,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      return(ret);
  }
  
      return(ret);
  }
  
-STATIC bool
+STATIC STRLEN
  S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
  S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
-                      UV *valuep, I32 *flagp, U32 depth, bool in_char_class,
-                      const bool strict   /* Apply stricter parsing rules? */
+                      UV *valuep, I32 *flagp, U32 depth, SV** substitute_parse
      )
  {
  
      )
  {
  
@@ -10615,46 +10723,75 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
     and needs to handle the rest. RExC_parse is expected to point at the first
     char following the N at the time of the call.  On successful return,
     RExC_parse has been updated to point to just after the sequence identified
     and needs to handle the rest. RExC_parse is expected to point at the first
     char following the N at the time of the call.  On successful return,
     RExC_parse has been updated to point to just after the sequence identified
-   by this routine, and <*flagp> has been updated.
-
-   The \N may be inside (indicated by the boolean <in_char_class>) or outside a
-   character class.
-
-   \N may begin either a named sequence, or if outside a character class, mean
-   to match a non-newline.  For non single-quoted regexes, the tokenizer has
-   attempted to decide which, and in the case of a named sequence, converted it
+   by this routine, <*flagp> has been updated, and the non-NULL input pointers
+   have been set appropriately.
+
+   The typical case for this is \N{some character name}.  This is usually
+   called while parsing the input, filling in or ready to fill in an EXACTish
+   node, and the code point for the character should be returned, so that it
+   can be added to the node, and parsing continued with the next input
+   character.  But it may be that instead of a single character the \N{}
+   expands to more than one, a named sequence.  In this case any following
+   quantifier applies to the whole sequence, and it is easier, given the code
+   structure that calls this, to handle it from a different area of the code.
+   For this reason, the input parameters can be set so that it returns valid
+   only on one or the other of these cases.
+
+   Another possibility is for the input to be an empty \N{}, which for
+   backwards compatibility we accept, but generate a NOTHING node which should
+   later get optimized out.  This is handled from the area of code which can
+   handle a named sequence, so if called with the parameters for the other, it
+   fails.
+
+   Still another possibility is for the \N to mean [^\n], and not a single
+   character or explicit sequence at all.  This is determined by context.
+   Again, this is handled from the area of code which can handle a named
+   sequence, so if called with the parameters for the other, it also fails.
+
+   And the final possibility is for the \N to be called from within a bracketed
+   character class.  In this case the [^\n] meaning makes no sense, and so is
+   an error.  Other anomalous situations are left to the calling code to handle.
+
+   For non-single-quoted regexes, the tokenizer has attempted to decide which
+   of the above applies, and in the case of a named sequence, has converted it
     into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
     where c1... are the characters in the sequence.  For single-quoted regexes,
     the tokenizer passes the \N sequence through unchanged; this code will not
     attempt to determine this nor expand those, instead raising a syntax error.
     The net effect is that if the beginning of the passed-in pattern isn't '{U+'
     or there is no '}', it signals that this \N occurrence means to match a
     into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
     where c1... are the characters in the sequence.  For single-quoted regexes,
     the tokenizer passes the \N sequence through unchanged; this code will not
     attempt to determine this nor expand those, instead raising a syntax error.
     The net effect is that if the beginning of the passed-in pattern isn't '{U+'
     or there is no '}', it signals that this \N occurrence means to match a
-   non-newline.
+   non-newline. (This mostly was done because of [perl #56444].)
  
  
-   Only the \N{U+...} form should occur in a character class, for the same
-   reason that '.' inside a character class means to just match a period: it
-   just doesn't make sense.
+   The API is somewhat convoluted due to historical and the above reasons.
  
     The function raises an error (via vFAIL), and doesn't return for various
  
     The function raises an error (via vFAIL), and doesn't return for various
-   syntax errors.  Otherwise it returns TRUE and sets <node_p> or <valuep> on
-   success; it returns FALSE otherwise. Returns FALSE, setting *flagp to
-   RESTART_UTF8 if the sizing scan needs to be restarted. Such a restart is
-   only possible if node_p is non-NULL.
-
+   syntax errors.  For other failures, it returns (STRLEN) -1.  For successes,
+   it returns a count of how many characters were accounted for by it.  (This
+   can be 0 for \N{}; 1 for it meaning [^\n]; and otherwise the number of code
+   points in the sequence.  It sets <node_p>, <valuep>, and/or
+   <substitute_parse> on success.
  
     If <valuep> is non-null, it means the caller can accept an input sequence
  
     If <valuep> is non-null, it means the caller can accept an input sequence
-   consisting of a just a single code point; <*valuep> is set to that value
-   if the input is such.
-
-   If <node_p> is non-null it signifies that the caller can accept any other
-   legal sequence (i.e., one that isn't just a single code point).  <*node_p>
-   is set as follows:
-    1) \N means not-a-NL: points to a newly created REG_ANY node;
-    2) \N{}:              points to a new NOTHING node;
+   consisting of a just a single code point; <*valuep> is set to the value
+   of the only or first code point in the input.
+
+   If <substitute_parse> is non-null, it means the caller can accept an input
+   sequence consisting of one or more code points; <*substitute_parse> is a
+   newly created mortal SV* in this case, containing \x{} escapes representing
+   those code points.
+
+   Both <valuep> and <substitute_parse> can be non-NULL.
+
+   If <node_p> is non-null, <substitute_parse> must be NULL.  This signifies
+   that the caller can accept any legal sequence other than a single code
+   point.  To wit, <*node_p> is set as follows:
+    1) \N means not-a-NL: points to a newly created REG_ANY node; return is 1
+    2) \N{}:              points to a new NOTHING node; return is 0
      3) otherwise:         points to a new EXACT node containing the resolved
      3) otherwise:         points to a new EXACT node containing the resolved
-                          string.
-   Note that FALSE is returned for single code point sequences if <valuep> is
-   null.
+                          string; return is the number of code points in the
+                          string.  This will never be 1.
+   Note that failure is returned for single code point sequences if <valuep> is
+   null and <node_p> is not.
   */
  
      char * endbrace;    /* '}' following the name */
   */
  
      char * endbrace;    /* '}' following the name */
@@ -10663,6 +10800,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
                             stream */
      bool has_multiple_chars; /* true if the input stream contains a sequence of
                                  more than one character */
                             stream */
      bool has_multiple_chars; /* true if the input stream contains a sequence of
                                  more than one character */
+    bool in_char_class = substitute_parse != NULL;
+    STRLEN count = 0;   /* Number of characters in this sequence */
  
      GET_RE_DEBUG_FLAGS_DECL;
  
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -10671,6 +10810,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
      GET_RE_DEBUG_FLAGS;
  
      assert(cBOOL(node_p) ^ cBOOL(valuep));  /* Exactly one should be set */
      GET_RE_DEBUG_FLAGS;
  
      assert(cBOOL(node_p) ^ cBOOL(valuep));  /* Exactly one should be set */
+    assert(! (node_p && substitute_parse)); /* At most 1 should be set */
  
      /* The [^\n] meaning of \N ignores spaces and comments under the /x
       * modifier.  The other meaning does not, so use a temporary until we find
  
      /* The [^\n] meaning of \N ignores spaces and comments under the /x
       * modifier.  The other meaning does not, so use a temporary until we find
@@ -10682,14 +10822,14 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
  
      /* Disambiguate between \N meaning a named character versus \N meaning
       * [^\n].  The former is assumed when it can't be the latter. */
  
      /* Disambiguate between \N meaning a named character versus \N meaning
       * [^\n].  The former is assumed when it can't be the latter. */
-    if (*p != '{' || regcurly(p, FALSE)) {
+    if (*p != '{' || regcurly(p)) {
         RExC_parse = p;
         if (! node_p) {
             /* no bare \N allowed in a charclass */
              if (in_char_class) {
                  vFAIL("\\N in a character class must be a named character: \\N{...}");
              }
         RExC_parse = p;
         if (! node_p) {
             /* no bare \N allowed in a charclass */
              if (in_char_class) {
                  vFAIL("\\N in a character class must be a named character: \\N{...}");
              }
-            return FALSE;
+            return (STRLEN) -1;
          }
          RExC_parse--;   /* Need to back off so nextchar() doesn't skip the
                             current char */
          }
          RExC_parse--;   /* Need to back off so nextchar() doesn't skip the
                             current char */
@@ -10698,7 +10838,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
         *flagp |= HASWIDTH|SIMPLE;
         RExC_naughty++;
          Set_Node_Length(*node_p, 1); /* MJD */
         *flagp |= HASWIDTH|SIMPLE;
         RExC_naughty++;
          Set_Node_Length(*node_p, 1); /* MJD */
-       return TRUE;
+       return 1;
      }
  
      /* Here, we have decided it should be a named character or sequence */
      }
  
      /* Here, we have decided it should be a named character or sequence */
@@ -10725,28 +10865,14 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
      }
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
      }
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
-        bool ret = TRUE;
         if (node_p) {
             *node_p = reg_node(pRExC_state,NOTHING);
         }
         if (node_p) {
             *node_p = reg_node(pRExC_state,NOTHING);
         }
-        else if (in_char_class) {
-            if (SIZE_ONLY && in_char_class) {
-                if (strict) {
-                    RExC_parse++;   /* Position after the "}" */
-                    vFAIL("Zero length \\N{}");
-                }
-                else {
-                    ckWARNreg(RExC_parse,
-                              "Ignoring zero length \\N{} in character class");
-                }
-            }
-            ret = FALSE;
-       }
-        else {
-            return FALSE;
+        else if (! in_char_class) {
+            return (STRLEN) -1;
          }
          nextchar(pRExC_state);
          }
          nextchar(pRExC_state);
-        return ret;
+        return 0;
      }
  
      RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
      }
  
      RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
@@ -10758,90 +10884,103 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
       * point, and is terminated by the brace */
      has_multiple_chars = (endchar < endbrace);
  
       * point, and is terminated by the brace */
      has_multiple_chars = (endchar < endbrace);
  
-    if (valuep && (! has_multiple_chars || in_char_class)) {
-       /* We only pay attention to the first char of
-        multichar strings being returned in char classes. I kinda wonder
-       if this makes sense as it does change the behaviour
-       from earlier versions, OTOH that behaviour was broken
-       as well. XXX Solution is to recharacterize as
-       [rest-of-class]|multi1|multi2... */
-
+    /* We get the first code point if we want it, and either there is only one,
+     * or we can accept both cases of one and more than one */
+    if (valuep && (substitute_parse || ! has_multiple_chars)) {
         STRLEN length_of_hex = (STRLEN)(endchar - RExC_parse);
         I32 grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
         STRLEN length_of_hex = (STRLEN)(endchar - RExC_parse);
         I32 grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
-           | PERL_SCAN_DISALLOW_PREFIX
-           | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
+                           | PERL_SCAN_DISALLOW_PREFIX
+
+                             /* No errors in the first pass (See [perl
+                              * #122671].)  We let the code below find the
+                              * errors when there are multiple chars. */
+                           | ((SIZE_ONLY || has_multiple_chars)
+                              ? PERL_SCAN_SILENT_ILLDIGIT
+                              : 0);
  
         *valuep = grok_hex(RExC_parse, &length_of_hex, &grok_hex_flags, NULL);
  
         /* The tokenizer should have guaranteed validity, but it's possible to
  
         *valuep = grok_hex(RExC_parse, &length_of_hex, &grok_hex_flags, NULL);
  
         /* The tokenizer should have guaranteed validity, but it's possible to
-        * bypass it by using single quoting, so check */
-       if (length_of_hex == 0
-           || length_of_hex != (STRLEN)(endchar - RExC_parse) )
-       {
-           RExC_parse += length_of_hex;        /* Includes all the valid */
-           RExC_parse += (RExC_orig_utf8)      /* point to after 1st invalid */
-                           ? UTF8SKIP(RExC_parse)
-                           : 1;
-           /* Guard against malformed utf8 */
-           if (RExC_parse >= endchar) {
-                RExC_parse = endchar;
+         * bypass it by using single quoting, so check.  Don't do the check
+         * here when there are multiple chars; we do it below anyway. */
+        if (! has_multiple_chars) {
+            if (length_of_hex == 0
+                || length_of_hex != (STRLEN)(endchar - RExC_parse) )
+            {
+                RExC_parse += length_of_hex;   /* Includes all the valid */
+                RExC_parse += (RExC_orig_utf8) /* point to after 1st invalid */
+                                ? UTF8SKIP(RExC_parse)
+                                : 1;
+                /* Guard against malformed utf8 */
+                if (RExC_parse >= endchar) {
+                    RExC_parse = endchar;
+                }
+                vFAIL("Invalid hexadecimal number in \\N{U+...}");
              }
              }
-           vFAIL("Invalid hexadecimal number in \\N{U+...}");
-       }
  
  
-        if (in_char_class && has_multiple_chars) {
-            if (strict) {
-                RExC_parse = endbrace;
-                vFAIL("\\N{} in character class restricted to one character");
-            }
-            else {
-                ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
-            }
+            RExC_parse = endbrace + 1;
+            return 1;
          }
          }
-
-        RExC_parse = endbrace + 1;
      }
      }
-    else if (! node_p || ! has_multiple_chars) {
  
  
-        /* Here, the input is legal, but not according to the caller's
-         * options.  We fail without advancing the parse, so that the
-         * caller can try again */
+    /* Here, we should have already handled the case where a single character
+     * is expected and found.  So it is a failure if we aren't expecting
+     * multiple chars and got them; or didn't get them but wanted them.  We
+     * fail without advancing the parse, so that the caller can try again with
+     * different acceptance criteria */
+    if ((! node_p && ! substitute_parse) || ! has_multiple_chars) {
          RExC_parse = p;
          RExC_parse = p;
-        return FALSE;
+        return (STRLEN) -1;
      }
      }
-    else {
+
+    {
  
         /* What is done here is to convert this to a sub-pattern of the form
  
         /* What is done here is to convert this to a sub-pattern of the form
-        * (?:\x{char1}\x{char2}...)
-        * and then call reg recursively.  That way, it retains its atomicness,
-        * while not having to worry about special handling that some code
-        * points may have.  toke.c has converted the original Unicode values
-        * to native, so that we can just pass on the hex values unchanged.  We
-        * do have to set a flag to keep recoding from happening in the
-        * recursion */
-
-       SV * substitute_parse = newSVpvn_flags("?:", 2, SVf_UTF8|SVs_TEMP);
+        * \x{char1}\x{char2}...
+         * and then either return it in <*substitute_parse> if non-null; or
+         * call reg recursively to parse it (enclosing in "(?: ... )" ).  That
+         * way, it retains its atomicness, while not having to worry about
+         * special handling that some code points may have.  toke.c has
+         * converted the original Unicode values to native, so that we can just
+         * pass on the hex values unchanged.  We do have to set a flag to keep
+         * recoding from happening in the recursion */
+
+       SV * dummy = NULL;
         STRLEN len;
         char *orig_end = RExC_end;
          I32 flags;
  
         STRLEN len;
         char *orig_end = RExC_end;
          I32 flags;
  
+        if (substitute_parse) {
+            *substitute_parse = newSVpvs("");
+        }
+        else {
+            substitute_parse = &dummy;
+            *substitute_parse = newSVpvs("?:");
+        }
+        *substitute_parse = sv_2mortal(*substitute_parse);
+
         while (RExC_parse < endbrace) {
  
             /* Convert to notation the rest of the code understands */
         while (RExC_parse < endbrace) {
  
             /* Convert to notation the rest of the code understands */
-           sv_catpv(substitute_parse, "\\x{");
-           sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
-           sv_catpv(substitute_parse, "}");
+           sv_catpv(*substitute_parse, "\\x{");
+           sv_catpvn(*substitute_parse, RExC_parse, endchar - RExC_parse);
+           sv_catpv(*substitute_parse, "}");
  
             /* Point to the beginning of the next character in the sequence. */
             RExC_parse = endchar + 1;
             endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
             /* Point to the beginning of the next character in the sequence. */
             RExC_parse = endchar + 1;
             endchar = RExC_parse + strcspn(RExC_parse, ".}");
+
+            count++;
         }
         }
-       sv_catpv(substitute_parse, ")");
+        if (! in_char_class) {
+            sv_catpv(*substitute_parse, ")");
+        }
  
  
-       RExC_parse = SvPV(substitute_parse, len);
+       RExC_parse = SvPV(*substitute_parse, len);
  
         /* Don't allow empty number */
  
         /* Don't allow empty number */
-       if (len < 8) {
+       if (len < (STRLEN) ((substitute_parse) ? 6 : 8)) {
+            RExC_parse = endbrace;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
         RExC_end = RExC_parse + len;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
         RExC_end = RExC_parse + len;
@@ -10849,15 +10988,17 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
         /* The values are Unicode, and therefore not subject to recoding */
         RExC_override_recoding = 1;
  
         /* The values are Unicode, and therefore not subject to recoding */
         RExC_override_recoding = 1;
  
-       if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
-            if (flags & RESTART_UTF8) {
-                *flagp = RESTART_UTF8;
-                return FALSE;
+        if (node_p) {
+            if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
+                if (flags & RESTART_UTF8) {
+                    *flagp = RESTART_UTF8;
+                    return (STRLEN) -1;
+                }
+                FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
+                    (UV) flags);
              }
              }
-            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
-                  (UV) flags);
+            *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
          }
          }
-       *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
  
         RExC_parse = endbrace;
         RExC_end = orig_end;
  
         RExC_parse = endbrace;
         RExC_end = orig_end;
@@ -10866,7 +11007,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
          nextchar(pRExC_state);
      }
  
          nextchar(pRExC_state);
      }
  
-    return TRUE;
+    return count;
  }
  
  
  }
  
  
@@ -10904,7 +11045,7 @@ S_reg_recode(pTHX_ const char value, SV **encp)
  }
  
  PERL_STATIC_INLINE U8
  }
  
  PERL_STATIC_INLINE U8
-S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
+S_compute_EXACTish(RExC_state_t *pRExC_state)
  {
      U8 op;
  
  {
      U8 op;
  
@@ -10972,15 +11113,21 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
                  if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                      *character = (U8) code_point;
                  }
                  if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                      *character = (U8) code_point;
                  }
-                else { /* Here is /i and not /l (toFOLD() is defined on just
+                else { /* Here is /i and not /l. (toFOLD() is defined on just
                            ASCII, which isn't the same thing as INVARIANT on
                            EBCDIC, but it works there, as the extra invariants
                            fold to themselves) */
                      *character = toFOLD((U8) code_point);
                            ASCII, which isn't the same thing as INVARIANT on
                            EBCDIC, but it works there, as the extra invariants
                            fold to themselves) */
                      *character = toFOLD((U8) code_point);
-                    if (downgradable
-                        && *character == code_point
-                        && ! HAS_NONLATIN1_FOLD_CLOSURE(code_point))
-                    {
+
+                    /* We can downgrade to an EXACT node if this character
+                     * isn't a folding one.  Note that this assumes that
+                     * nothing above Latin1 folds to some other invariant than
+                     * one of these alphabetics; otherwise we would also have
+                     * to check:
+                     *  && (! HAS_NONLATIN1_FOLD_CLOSURE(code_point)
+                     *      || ASCII_FOLD_RESTRICTED))
+                     */
+                    if (downgradable && PL_fold[code_point] == code_point) {
                          OP(node) = EXACT;
                      }
                  }
                          OP(node) = EXACT;
                      }
                  }
@@ -10997,7 +11144,10 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
                                                        ? FOLD_FLAGS_NOMIX_ASCII
                                                        : 0));
                  if (downgradable
                                                        ? FOLD_FLAGS_NOMIX_ASCII
                                                        : 0));
                  if (downgradable
-                    && folded == code_point
+                    && folded == code_point /* This quickly rules out many
+                                               cases, avoiding the
+                                               _invlist_contains_cp() overhead
+                                               for those.  */
                      && ! _invlist_contains_cp(PL_utf8_foldable, code_point))
                  {
                      OP(node) = EXACT;
                      && ! _invlist_contains_cp(PL_utf8_foldable, code_point))
                  {
                      OP(node) = EXACT;
@@ -11077,18 +11227,17 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
  }
  
  
  }
  
  
-/* return atoi(p), unless it's too big to sensibly be a backref,
+/* Parse backref decimal value, unless it's too big to sensibly be a backref,
   * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
  
  static I32
  S_backref_value(char *p)
  {
   * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
  
  static I32
  S_backref_value(char *p)
  {
-    char *q = p;
-
-    for (;isDIGIT(*q); q++) {} /* calculate length of num */
-    if (q - p == 0 || q - p > 9)
+    const char* endptr;
+    UV val = grok_atou(p, &endptr);
+    if (endptr == p || endptr == NULL || val > I32_MAX)
          return I32_MAX;
          return I32_MAX;
-    return atoi(p);
+    return (I32)val;
  }
  
  
  }
  
  
@@ -11161,12 +11310,12 @@ S_backref_value(char *p)
  STATIC regnode *
  S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
  STATIC regnode *
  S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  {
-    dVAR;
      regnode *ret = NULL;
      I32 flags = 0;
      char *parse_start = RExC_parse;
      U8 op;
      int invert = 0;
      regnode *ret = NULL;
      I32 flags = 0;
      char *parse_start = RExC_parse;
      U8 op;
      int invert = 0;
+    U8 arg;
  
      GET_RE_DEBUG_FLAGS_DECL;
  
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -11183,10 +11332,8 @@ tryagain:
         nextchar(pRExC_state);
         if (RExC_flags & RXf_PMf_MULTILINE)
             ret = reg_node(pRExC_state, MBOL);
         nextchar(pRExC_state);
         if (RExC_flags & RXf_PMf_MULTILINE)
             ret = reg_node(pRExC_state, MBOL);
-       else if (RExC_flags & RXf_PMf_SINGLELINE)
-           ret = reg_node(pRExC_state, SBOL);
         else
         else
-           ret = reg_node(pRExC_state, BOL);
+           ret = reg_node(pRExC_state, SBOL);
          Set_Node_Length(ret, 1); /* MJD */
         break;
      case '$':
          Set_Node_Length(ret, 1); /* MJD */
         break;
      case '$':
@@ -11195,10 +11342,8 @@ tryagain:
             RExC_seen_zerolen++;
         if (RExC_flags & RXf_PMf_MULTILINE)
             ret = reg_node(pRExC_state, MEOL);
             RExC_seen_zerolen++;
         if (RExC_flags & RXf_PMf_MULTILINE)
             ret = reg_node(pRExC_state, MEOL);
-       else if (RExC_flags & RXf_PMf_SINGLELINE)
-           ret = reg_node(pRExC_state, SEOL);
         else
         else
-           ret = reg_node(pRExC_state, EOL);
+           ret = reg_node(pRExC_state, SEOL);
          Set_Node_Length(ret, 1); /* MJD */
         break;
      case '.':
          Set_Node_Length(ret, 1); /* MJD */
         break;
      case '.':
@@ -11263,12 +11408,6 @@ tryagain:
         vFAIL("Internal urp");
                                 /* Supposed to be caught earlier. */
         break;
         vFAIL("Internal urp");
                                 /* Supposed to be caught earlier. */
         break;
-    case '{':
-       if (!regcurly(RExC_parse, FALSE)) {
-           RExC_parse++;
-           goto defchar;
-       }
-       /* FALLTHROUGH */
      case '?':
      case '+':
      case '*':
      case '?':
      case '+':
      case '*':
@@ -11289,11 +11428,15 @@ tryagain:
            literal text handling code.
         */
         switch ((U8)*++RExC_parse) {
            literal text handling code.
         */
         switch ((U8)*++RExC_parse) {
-            U8 arg;
         /* Special Escapes */
         case 'A':
             RExC_seen_zerolen++;
             ret = reg_node(pRExC_state, SBOL);
         /* Special Escapes */
         case 'A':
             RExC_seen_zerolen++;
             ret = reg_node(pRExC_state, SBOL);
+            /* SBOL is shared with /^/ so we set the flags so we can tell
+             * /\A/ from /^/ in split. We check ret because first pass we
+             * have no regop struct to set the flags on. */
+            if (PASS2)
+                ret->flags = 1;
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'G':
@@ -11325,6 +11468,9 @@ tryagain:
             ret = reg_node(pRExC_state, CANY);
              RExC_seen |= REG_CANY_SEEN;
             *flagp |= HASWIDTH|SIMPLE;
             ret = reg_node(pRExC_state, CANY);
              RExC_seen |= REG_CANY_SEEN;
             *flagp |= HASWIDTH|SIMPLE;
+            if (PASS2) {
+                ckWARNdep(RExC_parse+1, "\\C is deprecated");
+            }
             goto finish_meta_pat;
         case 'X':
             ret = reg_node(pRExC_state, CLUMP);
             goto finish_meta_pat;
         case 'X':
             ret = reg_node(pRExC_state, CLUMP);
@@ -11351,7 +11497,7 @@ tryagain:
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
-           if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
+           if ((U8) *(RExC_parse + 1) == '{') {
                  /* diag_listed_as: Use "%s" instead of "%s" */
                 vFAIL("Use \"\\b\\{\" instead of \"\\b{\"");
             }
                  /* diag_listed_as: Use "%s" instead of "%s" */
                 vFAIL("Use \"\\b\\{\" instead of \"\\b{\"");
             }
@@ -11369,7 +11515,7 @@ tryagain:
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
             ret = reg_node(pRExC_state, op);
             FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
-           if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
+           if ((U8) *(RExC_parse + 1) == '{') {
                  /* diag_listed_as: Use "%s" instead of "%s" */
                 vFAIL("Use \"\\B\\{\" instead of \"\\B{\"");
             }
                  /* diag_listed_as: Use "%s" instead of "%s" */
                 vFAIL("Use \"\\B\\{\" instead of \"\\B{\"");
             }
@@ -11478,8 +11624,9 @@ tryagain:
               * special treatment for quantifiers is not needed for such single
               * character sequences */
              ++RExC_parse;
               * special treatment for quantifiers is not needed for such single
               * character sequences */
              ++RExC_parse;
-            if (! grok_bslash_N(pRExC_state, &ret, NULL, flagp, depth, FALSE,
-                                FALSE /* not strict */ )) {
+            if ((STRLEN) -1 == grok_bslash_N(pRExC_state, &ret, NULL, flagp,
+                                             depth, FALSE))
+            {
                  if (*flagp & RESTART_UTF8)
                      return NULL;
                  RExC_parse--;
                  if (*flagp & RESTART_UTF8)
                      return NULL;
                  RExC_parse--;
@@ -11780,10 +11927,12 @@ tryagain:
                           * point sequence.  Handle those in the switch() above
                           * */
                          RExC_parse = p + 1;
                           * point sequence.  Handle those in the switch() above
                           * */
                          RExC_parse = p + 1;
-                        if (! grok_bslash_N(pRExC_state, NULL, &ender,
-                                            flagp, depth, FALSE,
-                                            FALSE /* not strict */ ))
-                        {
+                        if ((STRLEN) -1 == grok_bslash_N(pRExC_state, NULL,
+                                                         &ender,
+                                                         flagp,
+                                                         depth,
+                                                         FALSE
+                        )) {
                              if (*flagp & RESTART_UTF8)
                                  FAIL("panic: grok_bslash_N set RESTART_UTF8");
                              RExC_parse = p = oldp;
                              if (*flagp & RESTART_UTF8)
                                  FAIL("panic: grok_bslash_N set RESTART_UTF8");
                              RExC_parse = p = oldp;
@@ -11807,11 +11956,11 @@ tryagain:
                         p++;
                         break;
                     case 'e':
                         p++;
                         break;
                     case 'e':
-                         ender = ASCII_TO_NATIVE('\033');
+                       ender = ESC_NATIVE;
                         p++;
                         break;
                     case 'a':
                         p++;
                         break;
                     case 'a':
-                         ender = '\a';
+                       ender = '\a';
                         p++;
                         break;
                     case 'o':
                         p++;
                         break;
                     case 'o':
@@ -11822,7 +11971,7 @@ tryagain:
                             bool valid = grok_bslash_o(&p,
                                                        &result,
                                                        &error_msg,
                             bool valid = grok_bslash_o(&p,
                                                        &result,
                                                        &error_msg,
-                                                      TRUE, /* out warnings */
+                                                      PASS2, /* out warnings */
                                                         FALSE, /* not strict */
                                                         TRUE, /* Output warnings
                                                                  for non-
                                                         FALSE, /* not strict */
                                                         TRUE, /* Output warnings
                                                                  for non-
@@ -11851,7 +12000,7 @@ tryagain:
                             bool valid = grok_bslash_x(&p,
                                                        &result,
                                                        &error_msg,
                             bool valid = grok_bslash_x(&p,
                                                        &result,
                                                        &error_msg,
-                                                      TRUE, /* out warnings */
+                                                      PASS2, /* out warnings */
                                                         FALSE, /* not strict */
                                                         TRUE, /* Output warnings
                                                                  for non-
                                                         FALSE, /* not strict */
                                                         TRUE, /* Output warnings
                                                                  for non-
@@ -11874,7 +12023,7 @@ tryagain:
                         }
                     case 'c':
                         p++;
                         }
                     case 'c':
                         p++;
-                       ender = grok_bslash_c(*p++, SIZE_ONLY);
+                       ender = grok_bslash_c(*p++, PASS2);
                         break;
                      case '8': case '9': /* must be a backreference */
                          --p;
                         break;
                      case '8': case '9': /* must be a backreference */
                          --p;
@@ -11913,7 +12062,7 @@ tryagain:
                                 REQUIRE_UTF8;
                             }
                             p += numlen;
                                 REQUIRE_UTF8;
                             }
                             p += numlen;
-                            if (SIZE_ONLY   /* like \08, \178 */
+                            if (PASS2   /* like \08, \178 */
                                  && numlen < 3
                                  && p < RExC_end
                                  && isDIGIT(*p) && ckWARN(WARN_REGEXP))
                                  && numlen < 3
                                  && p < RExC_end
                                  && isDIGIT(*p) && ckWARN(WARN_REGEXP))
@@ -11930,7 +12079,7 @@ tryagain:
                         if (! RExC_override_recoding) {
                             SV* enc = PL_encoding;
                             ender = reg_recode((const char)(U8)ender, &enc);
                         if (! RExC_override_recoding) {
                             SV* enc = PL_encoding;
                             ender = reg_recode((const char)(U8)ender, &enc);
-                           if (!enc && SIZE_ONLY)
+                           if (!enc && PASS2)
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
                             REQUIRE_UTF8;
                         }
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
                             REQUIRE_UTF8;
                         }
@@ -11950,8 +12099,18 @@ tryagain:
                         goto normal_default;
                     } /* End of switch on '\' */
                     break;
                         goto normal_default;
                     } /* End of switch on '\' */
                     break;
+               case '{':
+                   /* Currently we don't warn when the lbrace is at the start
+                    * of a construct.  This catches it in the middle of a
+                    * literal string, or when its the first thing after
+                    * something like "\b" */
+                   if (! SIZE_ONLY
+                       && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
+                   {
+                       ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+                   }
+                   /*FALLTHROUGH*/
                 default:    /* A literal character */
                 default:    /* A literal character */
-
                   normal_default:
                     if (UTF8_IS_START(*p) && UTF) {
                         STRLEN numlen;
                   normal_default:
                     if (UTF8_IS_START(*p) && UTF) {
                         STRLEN numlen;
@@ -12037,9 +12196,8 @@ tryagain:
                              && (PL_fold[ender] != PL_fold_latin1[ender]
                                  || ender == LATIN_SMALL_LETTER_SHARP_S
                                  || (len > 0
                              && (PL_fold[ender] != PL_fold_latin1[ender]
                                  || ender == LATIN_SMALL_LETTER_SHARP_S
                                  || (len > 0
-                                   && isARG2_lower_or_UPPER_ARG1('s', ender)
-                                   && isARG2_lower_or_UPPER_ARG1('s',
-                                                                 *(s-1)))))
+                                   && isALPHA_FOLD_EQ(ender, 's')
+                                   && isALPHA_FOLD_EQ(*(s-1), 's'))))
                          {
                              maybe_exactfu = FALSE;
                          }
                          {
                              maybe_exactfu = FALSE;
                          }
@@ -12223,7 +12381,7 @@ tryagain:
                       * as if it turns into an EXACTFU, it could later get
                       * joined with another 's' that would then wrongly match
                       * the sharp s */
                       * as if it turns into an EXACTFU, it could later get
                       * joined with another 's' that would then wrongly match
                       * the sharp s */
-                    if (maybe_exactfu && isARG2_lower_or_UPPER_ARG1('s', ender))
+                    if (maybe_exactfu && isALPHA_FOLD_EQ(ender, 's'))
                      {
                          maybe_exactfu = FALSE;
                      }
                      {
                          maybe_exactfu = FALSE;
                      }
@@ -12325,7 +12483,7 @@ tryagain:
  }
  
  STATIC char *
  }
  
  STATIC char *
-S_regpatws(pTHX_ RExC_state_t *pRExC_state, char *p , const bool recognize_comment )
+S_regpatws(RExC_state_t *pRExC_state, char *p , const bool recognize_comment )
  {
      /* Returns the next non-pattern-white space, non-comment character (the
       * latter only if 'recognize_comment is true) in the string p, which is
  {
      /* Returns the next non-pattern-white space, non-comment character (the
       * latter only if 'recognize_comment is true) in the string p, which is
@@ -12372,22 +12530,24 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
             UV high;
             int i;
  
             UV high;
             int i;
  
-            if (end == UV_MAX && start <= 256) {
-                ANYOF_FLAGS(node) |= ANYOF_ABOVE_LATIN1_ALL;
+            if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
+                ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
              }
              }
-            else if (end >= 256) {
-                ANYOF_FLAGS(node) |= ANYOF_UTF8;
+            else if (end >= NUM_ANYOF_CODE_POINTS) {
+                ANYOF_FLAGS(node) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
              }
  
             /* Quit if are above what we should change */
              }
  
             /* Quit if are above what we should change */
-           if (start > 255) {
+           if (start >= NUM_ANYOF_CODE_POINTS) {
                 break;
             }
  
             change_invlist = TRUE;
  
             /* Set all the bits in the range, up to the max that we are doing */
                 break;
             }
  
             change_invlist = TRUE;
  
             /* Set all the bits in the range, up to the max that we are doing */
-           high = (end < 255) ? end : 255;
+           high = (end < NUM_ANYOF_CODE_POINTS - 1)
+                   ? end
+                   : NUM_ANYOF_CODE_POINTS - 1;
             for (i = start; i <= (int) high; i++) {
                 if (! ANYOF_BITMAP_TEST(node, i)) {
                     ANYOF_BITMAP_SET(node, i);
             for (i = start; i <= (int) high; i++) {
                 if (! ANYOF_BITMAP_TEST(node, i)) {
                     ANYOF_BITMAP_SET(node, i);
@@ -12397,13 +12557,13 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
         invlist_iterfinish(*invlist_ptr);
  
          /* Done with loop; remove any code points that are in the bitmap from
         invlist_iterfinish(*invlist_ptr);
  
          /* Done with loop; remove any code points that are in the bitmap from
-         * *invlist_ptr; similarly for code points above latin1 if we have a
-         * flag to match all of them anyways */
+         * *invlist_ptr; similarly for code points above the bitmap if we have
+         * a flag to match all of them anyways */
         if (change_invlist) {
         if (change_invlist) {
-           _invlist_subtract(*invlist_ptr, PL_Latin1, invlist_ptr);
+           _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr);
         }
         }
-        if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) {
-           _invlist_intersection(*invlist_ptr, PL_Latin1, invlist_ptr);
+        if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+           _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr);
         }
  
         /* If have completely emptied it, remove it completely */
         }
  
         /* If have completely emptied it, remove it completely */
@@ -12427,7 +12587,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
  PERL_STATIC_INLINE I32
  S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, const bool strict)
  {
  PERL_STATIC_INLINE I32
  S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, const bool strict)
  {
-    dVAR;
      I32 namedclass = OOB_NAMEDCLASS;
  
      PERL_ARGS_ASSERT_REGPPOSIXCC;
      I32 namedclass = OOB_NAMEDCLASS;
  
      PERL_ARGS_ASSERT_REGPPOSIXCC;
@@ -12569,7 +12728,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, const bool strict)
  }
  
  STATIC bool
  }
  
  STATIC bool
-S_could_it_be_a_POSIX_class(pTHX_ RExC_state_t *pRExC_state)
+S_could_it_be_a_POSIX_class(RExC_state_t *pRExC_state)
  {
      /* This applies some heuristics at the current parse position (which should
       * be at a '[') to see if what follows might be intended to be a [:posix:]
  {
      /* This applies some heuristics at the current parse position (which should
       * be at a '[') to see if what follows might be intended to be a [:posix:]
@@ -12654,9 +12813,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * upon an unescaped ']' that isn't one ending a regclass.  To do both
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
       * upon an unescaped ']' that isn't one ending a regclass.  To do both
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
-    if (SIZE_ONLY) {
-        UV depth = 0; /* how many nested (?[...]) constructs */
-
+    if (PASS2) {
          Perl_ck_warner_d(aTHX_
              packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
              "The regex_sets feature is experimental" REPORT_LOCATION,
          Perl_ck_warner_d(aTHX_
              packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
              "The regex_sets feature is experimental" REPORT_LOCATION,
@@ -12664,6 +12821,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  UTF8fARG(UTF,
                           RExC_end - RExC_start - (RExC_parse - RExC_precomp),
                           RExC_precomp + (RExC_parse - RExC_precomp)));
                  UTF8fARG(UTF,
                           RExC_end - RExC_start - (RExC_parse - RExC_precomp),
                           RExC_precomp + (RExC_parse - RExC_precomp)));
+    }
+    else {
+        UV depth = 0; /* how many nested (?[...]) constructs */
  
          while (RExC_parse < RExC_end) {
              SV* current = NULL;
  
          while (RExC_parse < RExC_end) {
              SV* current = NULL;
@@ -13114,6 +13274,110 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  }
  #undef IS_OPERAND
  
  }
  #undef IS_OPERAND
  
+STATIC void
+S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
+{
+    /* This hard-codes the Latin1/above-Latin1 folding rules, so that an
+     * innocent-looking character class, like /[ks]/i won't have to go out to
+     * disk to find the possible matches.
+     *
+     * This should be called only for a Latin1-range code points, cp, which is
+     * known to be involved in a simple fold with other code points above
+     * Latin1.  It would give false results if /aa has been specified.
+     * Multi-char folds are outside the scope of this, and must be handled
+     * specially.
+     *
+     * XXX It would be better to generate these via regen, in case a new
+     * version of the Unicode standard adds new mappings, though that is not
+     * really likely, and may be caught by the default: case of the switch
+     * below. */
+
+    PERL_ARGS_ASSERT_ADD_ABOVE_LATIN1_FOLDS;
+
+    assert(HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(cp));
+
+    switch (cp) {
+        case 'k':
+        case 'K':
+          *invlist =
+             add_cp_to_invlist(*invlist, KELVIN_SIGN);
+            break;
+        case 's':
+        case 'S':
+          *invlist = add_cp_to_invlist(*invlist, LATIN_SMALL_LETTER_LONG_S);
+            break;
+        case MICRO_SIGN:
+          *invlist = add_cp_to_invlist(*invlist, GREEK_CAPITAL_LETTER_MU);
+          *invlist = add_cp_to_invlist(*invlist, GREEK_SMALL_LETTER_MU);
+            break;
+        case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
+        case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
+          *invlist = add_cp_to_invlist(*invlist, ANGSTROM_SIGN);
+            break;
+        case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+          *invlist = add_cp_to_invlist(*invlist,
+                                        LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
+            break;
+        case LATIN_SMALL_LETTER_SHARP_S:
+          *invlist = add_cp_to_invlist(*invlist, LATIN_CAPITAL_LETTER_SHARP_S);
+            break;
+        default:
+            /* Use deprecated warning to increase the chances of this being
+             * output */
+            if (PASS2) {
+                ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%02X; please use the perlbug utility to report;", cp);
+            }
+            break;
+    }
+}
+
+STATIC AV *
+S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
+{
+    /* This adds the string scalar <multi_string> to the array
+     * <multi_char_matches>.  <multi_string> is known to have exactly
+     * <cp_count> code points in it.  This is used when constructing a
+     * bracketed character class and we find something that needs to match more
+     * than a single character.
+     *
+     * <multi_char_matches> is actually an array of arrays.  Each top-level
+     * element is an array that contains all the strings known so far that are
+     * the same length.  And that length (in number of code points) is the same
+     * as the index of the top-level array.  Hence, the [2] element is an
+     * array, each element thereof is a string containing TWO code points;
+     * while element [3] is for strings of THREE characters, and so on.  Since
+     * this is for multi-char strings there can never be a [0] nor [1] element.
+     *
+     * When we rewrite the character class below, we will do so such that the
+     * longest strings are written first, so that it prefers the longest
+     * matching strings first.  This is done even if it turns out that any
+     * quantifier is non-greedy, out of this programmer's (khw) laziness.  Tom
+     * Christiansen has agreed that this is ok.  This makes the test for the
+     * ligature 'ffi' come before the test for 'ff', for example */
+
+    AV* this_array;
+    AV** this_array_ptr;
+
+    PERL_ARGS_ASSERT_ADD_MULTI_MATCH;
+
+    if (! multi_char_matches) {
+        multi_char_matches = newAV();
+    }
+
+    if (av_exists(multi_char_matches, cp_count)) {
+        this_array_ptr = (AV**) av_fetch(multi_char_matches, cp_count, FALSE);
+        this_array = *this_array_ptr;
+    }
+    else {
+        this_array = newAV();
+        av_store(multi_char_matches, cp_count,
+                 (SV*) this_array);
+    }
+    av_push(this_array, multi_string);
+
+    return multi_char_matches;
+}
+
  /* The names of properties whose definitions are not known at compile time are
   * stored in this SV, after a constant heading.  So if the length has been
   * changed since initialization, then there is a run-time definition. */
  /* The names of properties whose definitions are not known at compile time are
   * stored in this SV, after a constant heading.  So if the length has been
   * changed since initialization, then there is a run-time definition. */
@@ -13145,17 +13409,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * ignored in the recursion by means of a flag:
       * <RExC_in_multi_char_class>.)
       *
       * ignored in the recursion by means of a flag:
       * <RExC_in_multi_char_class>.)
       *
-     * ANYOF nodes contain a bit map for the first 256 characters, with the
-     * corresponding bit set if that character is in the list.  For characters
-     * above 255, a range list or swash is used.  There are extra bits for \w,
-     * etc. in locale ANYOFs, as what these match is not determinable at
-     * compile time
+     * ANYOF nodes contain a bit map for the first NUM_ANYOF_CODE_POINTS
+     * characters, with the corresponding bit set if that character is in the
+     * list.  For characters above this, a range list or swash is used.  There
+     * are extra bits for \w, etc. in locale ANYOFs, as what these match is not
+     * determinable at compile time
       *
       * Returns NULL, setting *flagp to RESTART_UTF8 if the sizing scan needs
       * to be restarted.  This can only happen if ret_invlist is non-NULL.
       */
  
       *
       * Returns NULL, setting *flagp to RESTART_UTF8 if the sizing scan needs
       * to be restarted.  This can only happen if ret_invlist is non-NULL.
       */
  
-    dVAR;
      UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
      IV range = 0;
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
      UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
      IV range = 0;
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
@@ -13297,7 +13560,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      if (UCHARAT(RExC_parse) == ']')
         goto charclassloop;
  
      if (UCHARAT(RExC_parse) == ']')
         goto charclassloop;
  
-parseit:
      while (1) {
          if  (RExC_parse >= stop_ptr) {
              break;
      while (1) {
          if  (RExC_parse >= stop_ptr) {
              break;
@@ -13337,8 +13599,14 @@ parseit:
          {
              namedclass = regpposixcc(pRExC_state, value, strict);
          }
          {
              namedclass = regpposixcc(pRExC_state, value, strict);
          }
-        else if (value == '\\') {
-           if (UTF) {
+        else if (value != '\\') {
+#ifdef EBCDIC
+            literal_endpoint++;
+#endif
+        }
+        else {
+            /* Is a backslash; get the code point of the char after it */
+           if (UTF && ! UTF8_IS_INVARIANT(RExC_parse)) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
@@ -13371,19 +13639,54 @@ parseit:
             case 'H':   namedclass = ANYOF_NHORIZWS;    break;
              case 'N':  /* Handle \N{NAME} in class */
                  {
             case 'H':   namedclass = ANYOF_NHORIZWS;    break;
              case 'N':  /* Handle \N{NAME} in class */
                  {
-                    /* We only pay attention to the first char of
-                    multichar strings being returned. I kinda wonder
-                    if this makes sense as it does change the behaviour
-                    from earlier versions, OTOH that behaviour was broken
-                    as well. */
-                    if (! grok_bslash_N(pRExC_state, NULL, &value, flagp, depth,
-                                      TRUE, /* => charclass */
-                                      strict))
-                    {
-                        if (*flagp & RESTART_UTF8)
-                            FAIL("panic: grok_bslash_N set RESTART_UTF8");
-                        goto parseit;
+                    SV *as_text;
+                    STRLEN cp_count = grok_bslash_N(pRExC_state, NULL, &value,
+                                                    flagp, depth, &as_text);
+                    if (*flagp & RESTART_UTF8)
+                        FAIL("panic: grok_bslash_N set RESTART_UTF8");
+                    if (cp_count != 1) {    /* The typical case drops through */
+                        assert(cp_count != (STRLEN) -1);
+                        if (cp_count == 0) {
+                            if (strict) {
+                                RExC_parse++;   /* Position after the "}" */
+                                vFAIL("Zero length \\N{}");
+                            }
+                            else if (PASS2) {
+                                ckWARNreg(RExC_parse,
+                                        "Ignoring zero length \\N{} in character class");
+                            }
+                        }
+                        else { /* cp_count > 1 */
+                            if (! RExC_in_multi_char_class) {
+                                if (invert || range || *RExC_parse == '-') {
+                                    if (strict) {
+                                        RExC_parse--;
+                                        vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
+                                    }
+                                    else if (PASS2) {
+                                        ckWARNreg(RExC_parse, "Using just the first character returned by \\N{} in character class");
+                                    }
+                                }
+                                else {
+                                    multi_char_matches
+                                        = add_multi_match(multi_char_matches,
+                                                          as_text,
+                                                          cp_count);
+                                }
+                                break; /* <value> contains the first code
+                                          point. Drop out of the switch to
+                                          process it */
+                            }
+                        } /* End of cp_count != 1 */
+
+                        /* This element should not be processed further in this
+                         * class */
+                        element_count--;
+                        value = save_value;
+                        prevvalue = save_prevvalue;
+                        continue;   /* Back to top of loop to get next char */
                      }
                      }
+                    /* Here, is a single code point, and <value> contains it */
                  }
                  break;
             case 'p':
                  }
                  break;
             case 'p':
@@ -13420,7 +13723,6 @@ parseit:
                 }
                 if (!SIZE_ONLY) {
                      SV* invlist;
                 }
                 if (!SIZE_ONLY) {
                      SV* invlist;
-                    char* formatted;
                      char* name;
  
                     if (UCHARAT(RExC_parse) == '^') {
                      char* name;
  
                     if (UCHARAT(RExC_parse) == '^') {
@@ -13441,14 +13743,13 @@ parseit:
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    formatted = Perl_form(aTHX_
+                    name = savepv(Perl_form(aTHX_
                                            "%s%.*s%s\n",
                                            (FOLD) ? "__" : "",
                                            (int)n,
                                            RExC_parse,
                                            (FOLD) ? "_i" : ""
                                            "%s%.*s%s\n",
                                            (FOLD) ? "__" : "",
                                            (int)n,
                                            RExC_parse,
                                            (FOLD) ? "_i" : ""
-                                );
-                    name = savepvn(formatted, strlen(formatted));
+                                ));
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
@@ -13462,6 +13763,9 @@ parseit:
                                               &swash_init_flags
                                              );
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                                               &swash_init_flags
                                              );
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
+                        HV* curpkg = (IN_PERL_COMPILETIME)
+                                      ? PL_curstash
+                                      : CopSTASH(PL_curcop);
                          if (swash) {
                              SvREFCNT_dec_NN(swash);
                              swash = NULL;
                          if (swash) {
                              SvREFCNT_dec_NN(swash);
                              swash = NULL;
@@ -13477,6 +13781,22 @@ parseit:
                                  "Property '%"UTF8f"' is unknown",
                                  UTF8fARG(UTF, n, name));
                          }
                                  "Property '%"UTF8f"' is unknown",
                                  UTF8fARG(UTF, n, name));
                          }
+
+                        /* If the property name doesn't already have a package
+                         * name, add the current one to it so that it can be
+                         * referred to outside it. [perl #121777] */
+                        if (curpkg && ! instr(name, "::")) {
+                            char* pkgname = HvNAME(curpkg);
+                            if (strNE(pkgname, "main")) {
+                                char* full_name = Perl_form(aTHX_
+                                                            "%s::%s",
+                                                            pkgname,
+                                                            name);
+                                n = strlen(full_name);
+                                Safefree(name);
+                                name = savepvn(full_name, n);
+                            }
+                        }
                          Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
                                          (value == 'p' ? '+' : '!'),
                                          UTF8fARG(UTF, n, name));
                          Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
                                          (value == 'p' ? '+' : '!'),
                                          UTF8fARG(UTF, n, name));
@@ -13489,7 +13809,8 @@ parseit:
                           * inappropriately, except that any \p{}, including
                           * this one forces Unicode semantics, which means there
                           * is no <depends_list> */
                           * inappropriately, except that any \p{}, including
                           * this one forces Unicode semantics, which means there
                           * is no <depends_list> */
-                        ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
+                        ANYOF_FLAGS(ret)
+                                      |= ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES;
                      }
                      else {
  
                      }
                      else {
  
@@ -13545,7 +13866,7 @@ parseit:
             case 't':   value = '\t';                   break;
             case 'f':   value = '\f';                   break;
             case 'b':   value = '\b';                   break;
             case 't':   value = '\t';                   break;
             case 'f':   value = '\f';                   break;
             case 'b':   value = '\b';                   break;
-           case 'e':   value = ASCII_TO_NATIVE('\033');break;
+           case 'e':   value = ESC_NATIVE;             break;
             case 'a':   value = '\a';                   break;
             case 'o':
                 RExC_parse--;   /* function expects to be pointed at the 'o' */
             case 'a':   value = '\a';                   break;
             case 'o':
                 RExC_parse--;   /* function expects to be pointed at the 'o' */
@@ -13554,8 +13875,8 @@ parseit:
                     bool valid = grok_bslash_o(&RExC_parse,
                                                &value,
                                                &error_msg,
                     bool valid = grok_bslash_o(&RExC_parse,
                                                &value,
                                                &error_msg,
-                                               SIZE_ONLY,   /* warnings in pass
-                                                               1 only */
+                                               PASS2,   /* warnings only in
+                                                           pass 2 */
                                                 strict,
                                                 silence_non_portable,
                                                 UTF);
                                                 strict,
                                                 silence_non_portable,
                                                 UTF);
@@ -13574,7 +13895,7 @@ parseit:
                     bool valid = grok_bslash_x(&RExC_parse,
                                                &value,
                                                &error_msg,
                     bool valid = grok_bslash_x(&RExC_parse,
                                                &value,
                                                &error_msg,
-                                              TRUE, /* Output warnings */
+                                              PASS2, /* Output warnings */
                                                 strict,
                                                 silence_non_portable,
                                                 UTF);
                                                 strict,
                                                 silence_non_portable,
                                                 UTF);
@@ -13586,7 +13907,7 @@ parseit:
                     goto recode_encoding;
                 break;
             case 'c':
                     goto recode_encoding;
                 break;
             case 'c':
-               value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
+               value = grok_bslash_c(*RExC_parse++, PASS2);
                 break;
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7':
                 break;
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7':
@@ -13626,7 +13947,7 @@ parseit:
                          if (strict) {
                              vFAIL("Invalid escape in the specified encoding");
                          }
                          if (strict) {
                              vFAIL("Invalid escape in the specified encoding");
                          }
-                        else if (SIZE_ONLY) {
+                        else if (PASS2) {
                              ckWARNreg(RExC_parse,
                                   "Invalid escape in the specified encoding");
                          }
                              ckWARNreg(RExC_parse,
                                   "Invalid escape in the specified encoding");
                          }
@@ -13651,10 +13972,6 @@ parseit:
                 break;
             }   /* End of switch on char following backslash */
         } /* end of handling backslash escape sequences */
                 break;
             }   /* End of switch on char following backslash */
         } /* end of handling backslash escape sequences */
-#ifdef EBCDIC
-        else
-            literal_endpoint++;
-#endif
  
          /* Here, we have the current token in 'value' */
  
  
          /* Here, we have the current token in 'value' */
  
@@ -13712,13 +14029,18 @@ parseit:
                      else {
                          RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
                      }
                      else {
                          RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
                      }
-                    ANYOF_FLAGS(ret) |= ANYOF_POSIXL;
+                    ANYOF_FLAGS(ret) |= ANYOF_MATCHES_POSIXL;
                      ANYOF_POSIXL_ZERO(ret);
                  }
  
                      ANYOF_POSIXL_ZERO(ret);
                  }
  
+                /* Coverity thinks it is possible for this to be negative; both
+                 * jhi and khw think it's not, but be safer */
+                assert(! (ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
+                       || (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
+
                  /* See if it already matches the complement of this POSIX
                   * class */
                  /* See if it already matches the complement of this POSIX
                   * class */
-                if ((ANYOF_FLAGS(ret) & ANYOF_POSIXL)
+                if ((ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
                      && ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
                                                              ? -1
                                                              : 1)))
                      && ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
                                                              ? -1
                                                              : 1)))
@@ -13803,22 +14125,23 @@ parseit:
                                                       namedclass % 2 != 0,
                                                       posixes_ptr);
                  }
                                                       namedclass % 2 != 0,
                                                       posixes_ptr);
                  }
-                continue;   /* Go get next character */
             }
         } /* end of namedclass \blah */
  
             }
         } /* end of namedclass \blah */
  
-        /* Here, we have a single value.  If 'range' is set, it is the ending
-         * of a range--check its validity.  Later, we will handle each
-         * individual code point in the range.  If 'range' isn't set, this
-         * could be the beginning of a range, so check for that by looking
-         * ahead to see if the next real character to be processed is the range
-         * indicator--the minus sign */
-
          if (skip_white) {
              RExC_parse = regpatws(pRExC_state, RExC_parse,
                                  FALSE /* means don't recognize comments */ );
          }
  
          if (skip_white) {
              RExC_parse = regpatws(pRExC_state, RExC_parse,
                                  FALSE /* means don't recognize comments */ );
          }
  
+        /* If 'range' is set, 'value' is the ending of a range--check its
+         * validity.  (If value isn't a single code point in the case of a
+         * range, we should have figured that out above in the code that
+         * catches false ranges).  Later, we will handle each individual code
+         * point in the range.  If 'range' isn't set, this could be the
+         * beginning of a range, so check for that by looking ahead to see if
+         * the next real character to be processed is the range indicator--the
+         * minus sign */
+
         if (range) {
             if (prevvalue > value) /* b-a */ {
                 const int w = RExC_parse - rangebegin;
         if (range) {
             if (prevvalue > value) /* b-a */ {
                 const int w = RExC_parse - rangebegin;
@@ -13848,15 +14171,15 @@ parseit:
  
                      /* a bad range like \w-, [:word:]- ? */
                      if (namedclass > OOB_NAMEDCLASS) {
  
                      /* a bad range like \w-, [:word:]- ? */
                      if (namedclass > OOB_NAMEDCLASS) {
-                        if (strict || ckWARN(WARN_REGEXP)) {
-                            const int w =
-                                RExC_parse >= rangebegin ?
-                                RExC_parse - rangebegin : 0;
+                        if (strict || (PASS2 && ckWARN(WARN_REGEXP))) {
+                            const int w = RExC_parse >= rangebegin
+                                          ?  RExC_parse - rangebegin
+                                          : 0;
                              if (strict) {
                                  vFAIL4("False [] range \"%*.*s\"",
                                      w, w, rangebegin);
                              }
                              if (strict) {
                                  vFAIL4("False [] range \"%*.*s\"",
                                      w, w, rangebegin);
                              }
-                            else {
+                            else if (PASS2) {
                                  vWARN4(RExC_parse,
                                      "False [] range \"%*.*s\"",
                                      w, w, rangebegin);
                                  vWARN4(RExC_parse,
                                      "False [] range \"%*.*s\"",
                                      w, w, rangebegin);
@@ -13873,8 +14196,12 @@ parseit:
             }
         }
  
             }
         }
  
-        /* Here, <prevvalue> is the beginning of the range, if any; or <value>
-         * if not */
+        if (namedclass > OOB_NAMEDCLASS) {
+            continue;
+        }
+
+        /* Here, we have a single value, and <prevvalue> is the beginning of
+         * the range, if any; or <value> if not */
  
         /* non-Latin1 code point implies unicode semantics.  Must be set in
          * pass1 so is there for the whole of pass 2 */
  
         /* non-Latin1 code point implies unicode semantics.  Must be set in
          * pass1 so is there for the whole of pass 2 */
@@ -13922,44 +14249,17 @@ parseit:
                       * again.  Otherwise add this character to the list of
                       * multi-char folds. */
                      if (! RExC_in_multi_char_class) {
                       * again.  Otherwise add this character to the list of
                       * multi-char folds. */
                      if (! RExC_in_multi_char_class) {
-                        AV** this_array_ptr;
-                        AV* this_array;
                          STRLEN cp_count = utf8_length(foldbuf,
                                                        foldbuf + foldlen);
                          STRLEN cp_count = utf8_length(foldbuf,
                                                        foldbuf + foldlen);
-                        SV* multi_fold = sv_2mortal(newSVpvn("", 0));
+                        SV* multi_fold = sv_2mortal(newSVpvs(""));
  
                          Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
  
  
                          Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
  
+                        multi_char_matches
+                                        = add_multi_match(multi_char_matches,
+                                                          multi_fold,
+                                                          cp_count);
  
  
-                        if (! multi_char_matches) {
-                            multi_char_matches = newAV();
-                        }
-
-                        /* <multi_char_matches> is actually an array of arrays.
-                         * There will be one or two top-level elements: [2],
-                         * and/or [3].  The [2] element is an array, each
-                         * element thereof is a character which folds to TWO
-                         * characters; [3] is for folds to THREE characters.
-                         * (Unicode guarantees a maximum of 3 characters in any
-                         * fold.)  When we rewrite the character class below,
-                         * we will do so such that the longest folds are
-                         * written first, so that it prefers the longest
-                         * matching strings first.  This is done even if it
-                         * turns out that any quantifier is non-greedy, out of
-                         * programmer laziness.  Tom Christiansen has agreed
-                         * that this is ok.  This makes the test for the
-                         * ligature 'ffi' come before the test for 'ff' */
-                        if (av_exists(multi_char_matches, cp_count)) {
-                            this_array_ptr = (AV**) av_fetch(multi_char_matches,
-                                                             cp_count, FALSE);
-                            this_array = *this_array_ptr;
-                        }
-                        else {
-                            this_array = newAV();
-                            av_store(multi_char_matches, cp_count,
-                                     (SV*) this_array);
-                        }
-                        av_push(this_array, multi_fold);
                      }
  
                      /* This element should not be processed further in this
                      }
  
                      /* This element should not be processed further in this
@@ -13993,7 +14293,7 @@ parseit:
                  && ((prevvalue >= 'a' && value <= 'z')
                      || (prevvalue >= 'A' && value <= 'Z')))
              {
                  && ((prevvalue >= 'a' && value <= 'z')
                      || (prevvalue >= 'A' && value <= 'Z')))
              {
-                _invlist_intersection(this_range, PL_ASCII,
+                _invlist_intersection(this_range, PL_XPosix_ptrs[_CC_ASCII],
                                        &this_range);
  
                  /* Since this above only contains ascii, the intersection of it
                                        &this_range);
  
                  /* Since this above only contains ascii, the intersection of it
@@ -14073,6 +14373,7 @@ parseit:
         RExC_parse = SvPV(substitute_parse, len);
         RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
         RExC_parse = SvPV(substitute_parse, len);
         RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
+       RExC_override_recoding = 1;
          RExC_emit = (regnode *)orig_emit;
  
         ret = reg(pRExC_state, 1, &reg_flags, depth+1);
          RExC_emit = (regnode *)orig_emit;
  
         ret = reg(pRExC_state, 1, &reg_flags, depth+1);
@@ -14082,6 +14383,7 @@ parseit:
         RExC_parse = save_parse;
         RExC_end = save_end;
         RExC_in_multi_char_class = 0;
         RExC_parse = save_parse;
         RExC_end = save_end;
         RExC_in_multi_char_class = 0;
+       RExC_override_recoding = 0;
          SvREFCNT_dec_NN(multi_char_matches);
          return ret;
      }
          SvREFCNT_dec_NN(multi_char_matches);
          return ret;
      }
@@ -14316,18 +14618,7 @@ parseit:
                  /* This is a hash that for a particular fold gives all
                   * characters that are involved in it */
                  if (! PL_utf8_foldclosures) {
                  /* This is a hash that for a particular fold gives all
                   * characters that are involved in it */
                  if (! PL_utf8_foldclosures) {
-
-                    /* If the folds haven't been read in, call a fold function
-                     * to force that */
-                    if (! PL_utf8_tofold) {
-                        U8 dummy[UTF8_MAXBYTES_CASE+1];
-
-                        /* This string is just a short named one above \xff */
-                        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
-                        assert(PL_utf8_tofold); /* Verify that worked */
-                    }
-                    PL_utf8_foldclosures
-                                      = _swash_inversion_hash(PL_utf8_tofold);
+                    _load_PL_utf8_foldclosures();
                  }
              }
  
                  }
              }
  
@@ -14344,15 +14635,6 @@ parseit:
  
                      if (j < 256) {
  
  
                      if (j < 256) {
  
-                        /* We have the latin1 folding rules hard-coded here so
-                         * that an innocent-looking character class, like
-                         * /[ks]/i won't have to go out to disk to find the
-                         * possible matches.  XXX It would be better to
-                         * generate these via regen, in case a new version of
-                         * the Unicode standard adds new mappings, though that
-                         * is not really likely, and may be caught by the
-                         * default: case of the switch below. */
-
                          if (IS_IN_SOME_FOLD_L1(j)) {
  
                              /* ASCII is always matched; non-ASCII is matched
                          if (IS_IN_SOME_FOLD_L1(j)) {
  
                              /* ASCII is always matched; non-ASCII is matched
@@ -14369,72 +14651,12 @@ parseit:
                              }
                          }
  
                              }
                          }
  
-                        if (HAS_NONLATIN1_FOLD_CLOSURE(j)
+                        if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(j)
                              && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
                          {
                              && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
                          {
-                            /* Certain Latin1 characters have matches outside
-                            * Latin1.  To get here, <j> is one of those
-                            * characters.   None of these matches is valid for
-                            * ASCII characters under /aa, which is why the 'if'
-                            * just above excludes those.  These matches only
-                            * happen when the target string is utf8.  The code
-                            * below adds the single fold closures for <j> to the
-                            * inversion list. */
-
-                            switch (j) {
-                                case 'k':
-                                case 'K':
-                                  *use_list =
-                                     add_cp_to_invlist(*use_list, KELVIN_SIGN);
-                                    break;
-                                case 's':
-                                case 'S':
-                                  *use_list = add_cp_to_invlist(*use_list,
-                                                    LATIN_SMALL_LETTER_LONG_S);
-                                    break;
-                                case MICRO_SIGN:
-                                  *use_list = add_cp_to_invlist(*use_list,
-                                                      GREEK_CAPITAL_LETTER_MU);
-                                  *use_list = add_cp_to_invlist(*use_list,
-                                                        GREEK_SMALL_LETTER_MU);
-                                    break;
-                                case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
-                                case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-                                  *use_list =
-                                   add_cp_to_invlist(*use_list, ANGSTROM_SIGN);
-                                    break;
-                                case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-                                  *use_list = add_cp_to_invlist(*use_list,
-                                        LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
-                                    break;
-                                case LATIN_SMALL_LETTER_SHARP_S:
-                                  *use_list = add_cp_to_invlist(*use_list,
-                                                 LATIN_CAPITAL_LETTER_SHARP_S);
-                                    break;
-                                case 'F': case 'f':
-                                case 'I': case 'i':
-                                case 'L': case 'l':
-                                case 'T': case 't':
-                                case 'A': case 'a':
-                                case 'H': case 'h':
-                                case 'J': case 'j':
-                                case 'N': case 'n':
-                                case 'W': case 'w':
-                                case 'Y': case 'y':
-                                    /* These all are targets of multi-character
-                                     * folds from code points that require UTF8
-                                     * to express, so they can't match unless
-                                     * the target string is in UTF-8, so no
-                                     * action here is necessary, as regexec.c
-                                     * properly handles the general case for
-                                     * UTF-8 matching and multi-char folds */
-                                    break;
-                                default:
-                                    /* Use deprecated warning to increase the
-                                    * chances of this being output */
-                                    ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
-                                    break;
-                            }
+                            add_above_Latin1_folds(pRExC_state,
+                                                   (U8) j,
+                                                   use_list);
                          }
                          continue;
                      }
                          }
                          continue;
                      }
@@ -14463,9 +14685,8 @@ parseit:
                          for (k = 0; k <= av_tindex(list); k++) {
                              SV** c_p = av_fetch(list, k, FALSE);
                              UV c;
                          for (k = 0; k <= av_tindex(list); k++) {
                              SV** c_p = av_fetch(list, k, FALSE);
                              UV c;
-                            if (c_p == NULL) {
-                                Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                            }
+                            assert(c_p);
+
                              c = SvUV(*c_p);
  
                              /* /aa doesn't allow folds between ASCII and non- */
                              c = SvUV(*c_p);
  
                              /* /aa doesn't allow folds between ASCII and non- */
@@ -14521,7 +14742,7 @@ parseit:
              if (DEPENDS_SEMANTICS) {
                  /* Under /d, everything in the upper half of the Latin1 range
                   * matches these complements */
              if (DEPENDS_SEMANTICS) {
                  /* Under /d, everything in the upper half of the Latin1 range
                   * matches these complements */
-                ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_NON_ASCII_ALL;
+                ANYOF_FLAGS(ret) |= ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII;
              }
              else if (AT_LEAST_ASCII_RESTRICTED) {
                  /* Under /a and /aa, everything above ASCII matches these
              }
              else if (AT_LEAST_ASCII_RESTRICTED) {
                  /* Under /a and /aa, everything above ASCII matches these
@@ -14827,7 +15048,7 @@ parseit:
         else {
             cp_list = depends_list;
         }
         else {
             cp_list = depends_list;
         }
-        ANYOF_FLAGS(ret) |= ANYOF_UTF8;
+        ANYOF_FLAGS(ret) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
@@ -14837,6 +15058,10 @@ parseit:
         swash = NULL;
      }
  
         swash = NULL;
      }
  
+    /* Note that the optimization of using 'swash' if it is the only thing in
+     * the class doesn't have us change swash at all, so it can include things
+     * that are also in the bitmap; otherwise we have purposely deleted that
+     * duplicate information */
      set_ANYOF_arg(pRExC_state, ret, cp_list,
                    (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
                     ? listsv : NULL,
      set_ANYOF_arg(pRExC_state, ret, cp_list,
                    (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
                     ? listsv : NULL,
@@ -14865,7 +15090,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
  {
      /* Sets the arg field of an ANYOF-type node 'node', using information about
       * the node passed-in.  If there is nothing outside the node's bitmap, the
  {
      /* Sets the arg field of an ANYOF-type node 'node', using information about
       * the node passed-in.  If there is nothing outside the node's bitmap, the
-     * arg is set to ANYOF_NONBITMAP_EMPTY.  Otherwise, it sets the argument to
+     * arg is set to ANYOF_ONLY_HAS_BITMAP.  Otherwise, it sets the argument to
       * the count returned by add_data(), having allocated and stored an array,
       * av, that that count references, as follows:
       *  av[0] stores the character class description in its textual form.
       * the count returned by add_data(), having allocated and stored an array,
       * av, that that count references, as follows:
       *  av[0] stores the character class description in its textual form.
@@ -14891,15 +15116,17 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
  
      if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
          assert(! (ANYOF_FLAGS(node)
  
      if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
          assert(! (ANYOF_FLAGS(node)
-                    & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8)));
-       ARG_SET(node, ANYOF_NONBITMAP_EMPTY);
+                  & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                     |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES)));
+       ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
      }
      else {
         AV * const av = newAV();
         SV *rv;
  
          assert(ANYOF_FLAGS(node)
      }
      else {
         AV * const av = newAV();
         SV *rv;
  
          assert(ANYOF_FLAGS(node)
-                    & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8|ANYOF_LOC_FOLD));
+               & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                  |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
  
         av_store(av, 0, (runtime_defns)
                         ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
  
         av_store(av, 0, (runtime_defns)
                         ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
@@ -14930,6 +15157,135 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
      }
  }
  
      }
  }
  
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+SV *
+Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
+                                        const regnode* node,
+                                        bool doinit,
+                                        SV** listsvp,
+                                        SV** only_utf8_locale_ptr,
+                                        SV*  exclude_list)
+
+{
+    /* For internal core use only.
+     * Returns the swash for the input 'node' in the regex 'prog'.
+     * If <doinit> is 'true', will attempt to create the swash if not already
+     *   done.
+     * If <listsvp> is non-null, will return the printable contents of the
+     *    swash.  This can be used to get debugging information even before the
+     *    swash exists, by calling this function with 'doinit' set to false, in
+     *    which case the components that will be used to eventually create the
+     *    swash are returned  (in a printable form).
+     * If <exclude_list> is not NULL, it is an inversion list of things to
+     *    exclude from what's returned in <listsvp>.
+     * Tied intimately to how S_set_ANYOF_arg sets up the data structure.  Note
+     * that, in spite of this function's name, the swash it returns may include
+     * the bitmap data as well */
+
+    SV *sw  = NULL;
+    SV *si  = NULL;         /* Input swash initialization string */
+    SV*  invlist = NULL;
+
+    RXi_GET_DECL(prog,progi);
+    const struct reg_data * const data = prog ? progi->data : NULL;
+
+    PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
+
+    assert(ANYOF_FLAGS(node)
+        & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+           |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
+
+    if (data && data->count) {
+       const U32 n = ARG(node);
+
+       if (data->what[n] == 's') {
+           SV * const rv = MUTABLE_SV(data->data[n]);
+           AV * const av = MUTABLE_AV(SvRV(rv));
+           SV **const ary = AvARRAY(av);
+           U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+
+           si = *ary;  /* ary[0] = the string to initialize the swash with */
+
+           /* Elements 3 and 4 are either both present or both absent. [3] is
+            * any inversion list generated at compile time; [4] indicates if
+            * that inversion list has any user-defined properties in it. */
+            if (av_tindex(av) >= 2) {
+                if (only_utf8_locale_ptr
+                    && ary[2]
+                    && ary[2] != &PL_sv_undef)
+                {
+                    *only_utf8_locale_ptr = ary[2];
+                }
+                else {
+                    assert(only_utf8_locale_ptr);
+                    *only_utf8_locale_ptr = NULL;
+                }
+
+                if (av_tindex(av) >= 3) {
+                    invlist = ary[3];
+                    if (SvUV(ary[4])) {
+                        swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
+                    }
+                }
+                else {
+                    invlist = NULL;
+                }
+           }
+
+           /* Element [1] is reserved for the set-up swash.  If already there,
+            * return it; if not, create it and store it there */
+           if (ary[1] && SvROK(ary[1])) {
+               sw = ary[1];
+           }
+           else if (doinit && ((si && si != &PL_sv_undef)
+                                 || (invlist && invlist != &PL_sv_undef))) {
+               assert(si);
+               sw = _core_swash_init("utf8", /* the utf8 package */
+                                     "", /* nameless */
+                                     si,
+                                     1, /* binary */
+                                     0, /* not from tr/// */
+                                     invlist,
+                                     &swash_init_flags);
+               (void)av_store(av, 1, sw);
+           }
+       }
+    }
+
+    /* If requested, return a printable version of what this swash matches */
+    if (listsvp) {
+       SV* matches_string = newSVpvs("");
+
+        /* The swash should be used, if possible, to get the data, as it
+         * contains the resolved data.  But this function can be called at
+         * compile-time, before everything gets resolved, in which case we
+         * return the currently best available information, which is the string
+         * that will eventually be used to do that resolving, 'si' */
+       if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
+            && (si && si != &PL_sv_undef))
+        {
+           sv_catsv(matches_string, si);
+       }
+
+       /* Add the inversion list to whatever we have.  This may have come from
+        * the swash, or from an input parameter */
+       if (invlist) {
+            if (exclude_list) {
+                SV* clone = invlist_clone(invlist);
+                _invlist_subtract(clone, exclude_list, &clone);
+                sv_catsv(matches_string, _invlist_contents(clone));
+                SvREFCNT_dec_NN(clone);
+            }
+            else {
+                sv_catsv(matches_string, _invlist_contents(invlist));
+            }
+       }
+       *listsvp = matches_string;
+    }
+
+    return sw;
+}
+#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
  
  /* reg_skipcomment()
  
  
  /* reg_skipcomment()
  
@@ -14945,11 +15301,11 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
  */
  
  PERL_STATIC_INLINE char*
  */
  
  PERL_STATIC_INLINE char*
-S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state, char* p)
+S_reg_skipcomment(RExC_state_t *pRExC_state, char* p)
  {
      PERL_ARGS_ASSERT_REG_SKIPCOMMENT;
  
  {
      PERL_ARGS_ASSERT_REG_SKIPCOMMENT;
  
-    assert(*p = '#');
+    assert(*p == '#');
  
      while (p < RExC_end) {
          if (*(++p) == '\n') {
  
      while (p < RExC_end) {
          if (*(++p) == '\n') {
@@ -15015,7 +15371,6 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
  STATIC regnode *                       /* Location. */
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
  STATIC regnode *                       /* Location. */
  S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
-    dVAR;
      regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
      regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
@@ -15029,7 +15384,7 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
      }
      if (RExC_emit >= RExC_emit_bound)
          Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
      }
      if (RExC_emit >= RExC_emit_bound)
          Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
-                  op, RExC_emit, RExC_emit_bound);
+                  op, (void*)RExC_emit, (void*)RExC_emit_bound);
  
      NODE_ALIGN_FILL(ret);
      ptr = ret;
  
      NODE_ALIGN_FILL(ret);
      ptr = ret;
@@ -15058,7 +15413,6 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  STATIC regnode *                       /* Location. */
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
  STATIC regnode *                       /* Location. */
  S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  {
-    dVAR;
      regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
      regnode *ptr;
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
@@ -15086,7 +15440,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
      }
      if (RExC_emit >= RExC_emit_bound)
          Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
      }
      if (RExC_emit >= RExC_emit_bound)
          Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
-                  op, RExC_emit, RExC_emit_bound);
+                  op, (void*)RExC_emit, (void*)RExC_emit_bound);
  
      NODE_ALIGN_FILL(ret);
      ptr = ret;
  
      NODE_ALIGN_FILL(ret);
      ptr = ret;
@@ -15116,8 +15470,6 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
  PERL_STATIC_INLINE STRLEN
  S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
  {
  PERL_STATIC_INLINE STRLEN
  S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
  {
-    dVAR;
-
      PERL_ARGS_ASSERT_REGUNI;
  
      return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
      PERL_ARGS_ASSERT_REGUNI;
  
      return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
@@ -15131,7 +15483,6 @@ S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
  STATIC void
  S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
  {
  STATIC void
  S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
  {
-    dVAR;
      regnode *src;
      regnode *dst;
      regnode *place;
      regnode *src;
      regnode *dst;
      regnode *place;
@@ -15140,6 +15491,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGINSERT;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGINSERT;
+    PERL_UNUSED_CONTEXT;
      PERL_UNUSED_ARG(depth);
  /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
      DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
      PERL_UNUSED_ARG(depth);
  /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
      DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
@@ -15222,7 +15574,6 @@ STATIC void
  S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                  const regnode *val,U32 depth)
  {
  S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                  const regnode *val,U32 depth)
  {
-    dVAR;
      regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
  
      regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -15282,7 +15633,6 @@ STATIC U8
  S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                        const regnode *val,U32 depth)
  {
  S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
                        const regnode *val,U32 depth)
  {
-    dVAR;
      regnode *scan;
      U8 exact = PSEUDO;
  #ifdef EXPERIMENTAL_INPLACESCAN
      regnode *scan;
      U8 exact = PSEUDO;
  #ifdef EXPERIMENTAL_INPLACESCAN
@@ -15445,7 +15795,6 @@ void
  Perl_regdump(pTHX_ const regexp *r)
  {
  #ifdef DEBUGGING
  Perl_regdump(pTHX_ const regexp *r)
  {
  #ifdef DEBUGGING
-    dVAR;
      SV * const sv = sv_newmortal();
      SV *dsv= sv_newmortal();
      RXi_GET_DECL(r,ri);
      SV * const sv = sv_newmortal();
      SV *dsv= sv_newmortal();
      RXi_GET_DECL(r,ri);
@@ -15505,8 +15854,6 @@ Perl_regdump(pTHX_ const regexp *r)
      }
      if (r->intflags & PREGf_ANCH) {
         PerlIO_printf(Perl_debug_log, "anchored");
      }
      if (r->intflags & PREGf_ANCH) {
         PerlIO_printf(Perl_debug_log, "anchored");
-        if (r->intflags & PREGf_ANCH_BOL)
-           PerlIO_printf(Perl_debug_log, "(BOL)");
          if (r->intflags & PREGf_ANCH_MBOL)
             PerlIO_printf(Perl_debug_log, "(MBOL)");
          if (r->intflags & PREGf_ANCH_SBOL)
          if (r->intflags & PREGf_ANCH_MBOL)
             PerlIO_printf(Perl_debug_log, "(MBOL)");
          if (r->intflags & PREGf_ANCH_SBOL)
@@ -15544,7 +15891,6 @@ void
  Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo)
  {
  #ifdef DEBUGGING
  Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo)
  {
  #ifdef DEBUGGING
-    dVAR;
      int k;
  
      /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
      int k;
  
      /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
@@ -15647,9 +15993,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          );
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
          );
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
-            (void) put_latin1_charclass_innards(sv, IS_ANYOF_TRIE(op)
-                                                   ? ANYOF_BITMAP(o)
-                                                   : TRIE_BITMAP(trie));
+            (void) put_charclass_bitmap_innards(sv,
+                                                (IS_ANYOF_TRIE(op))
+                                                 ? ANYOF_BITMAP(o)
+                                                 : TRIE_BITMAP(trie),
+                                                NULL);
              sv_catpvs(sv, "]");
          }
  
              sv_catpvs(sv, "]");
          }
  
@@ -15713,6 +16061,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
+        SV* bitmap_invlist;  /* Will hold what the bit map contains */
  
  
         if (flags & ANYOF_LOCALE_FLAGS)
  
  
         if (flags & ANYOF_LOCALE_FLAGS)
@@ -15723,8 +16072,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         if (flags & ANYOF_INVERT)
             sv_catpvs(sv, "^");
  
         if (flags & ANYOF_INVERT)
             sv_catpvs(sv, "^");
  
-       /* output what the standard cp 0-255 bitmap matches */
-        do_sep = put_latin1_charclass_innards(sv, ANYOF_BITMAP(o));
+        /* output what the standard cp 0-NUM_ANYOF_CODE_POINTS-1 bitmap matches
+         * */
+        do_sep = put_charclass_bitmap_innards(sv, ANYOF_BITMAP(o),
+                                                            &bitmap_invlist);
  
          /* output any special charclass tests (used entirely under use
           * locale) * */
  
          /* output any special charclass tests (used entirely under use
           * locale) * */
@@ -15738,9 +16089,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              }
          }
  
              }
          }
  
-       if ((flags & (ANYOF_ABOVE_LATIN1_ALL
-                      |ANYOF_UTF8
-                      |ANYOF_NONBITMAP_NON_UTF8
+       if ((flags & (ANYOF_MATCHES_ALL_ABOVE_BITMAP
+                      |ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                      |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES
                        |ANYOF_LOC_FOLD)))
          {
              if (do_sep) {
                        |ANYOF_LOC_FOLD)))
          {
              if (do_sep) {
@@ -15750,22 +16101,25 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                      sv_catpvs(sv, "^");
              }
  
                      sv_catpvs(sv, "^");
              }
  
-            if (flags & ANYOF_NON_UTF8_NON_ASCII_ALL) {
+            if (flags & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
                  sv_catpvs(sv, "{non-utf8-latin1-all}");
              }
  
              /* output information about the unicode matching */
                  sv_catpvs(sv, "{non-utf8-latin1-all}");
              }
  
              /* output information about the unicode matching */
-            if (flags & ANYOF_ABOVE_LATIN1_ALL)
-                sv_catpvs(sv, "{unicode_all}");
-            else if (ARG(o) != ANYOF_NONBITMAP_EMPTY) {
+            if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
+                sv_catpvs(sv, "{above_bitmap_all}");
+            else if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
                  SV *lv; /* Set if there is something outside the bit map. */
                  bool byte_output = FALSE;   /* If something in the bitmap has
                                                 been output */
                  SV *only_utf8_locale;
  
                  SV *lv; /* Set if there is something outside the bit map. */
                  bool byte_output = FALSE;   /* If something in the bitmap has
                                                 been output */
                  SV *only_utf8_locale;
  
-                /* Get the stuff that wasn't in the bitmap */
+                /* Get the stuff that wasn't in the bitmap.  'bitmap_invlist'
+                 * is used to guarantee that nothing in the bitmap gets
+                 * returned */
                  (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
                  (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
-                                                    &lv, &only_utf8_locale);
+                                                    &lv, &only_utf8_locale,
+                                                    bitmap_invlist);
                  if (lv && lv != &PL_sv_undef) {
                      char *s = savesvpv(lv);
                      char * const origs = s;
                  if (lv && lv != &PL_sv_undef) {
                      char *s = savesvpv(lv);
                      char * const origs = s;
@@ -15776,7 +16130,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                      if (*s == '\n') {
                          const char * const t = ++s;
  
                      if (*s == '\n') {
                          const char * const t = ++s;
  
-                        if (flags & ANYOF_NONBITMAP_NON_UTF8) {
+                        if (flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) {
                              sv_catpvs(sv, "{outside bitmap}");
                          }
                          else {
                              sv_catpvs(sv, "{outside bitmap}");
                          }
                          else {
@@ -15828,7 +16182,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                      invlist_iterinit(only_utf8_locale);
                      while (invlist_iternext(only_utf8_locale,
                                              &start, &end)) {
                      invlist_iterinit(only_utf8_locale);
                      while (invlist_iternext(only_utf8_locale,
                                              &start, &end)) {
-                        put_range(sv, start, end);
+                        put_range(sv, start, end, FALSE);
                          max_entries --;
                          if (max_entries < 0) {
                              sv_catpvs(sv, "...");
                          max_entries --;
                          if (max_entries < 0) {
                              sv_catpvs(sv, "...");
@@ -15839,6 +16193,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  }
              }
         }
                  }
              }
         }
+        SvREFCNT_dec(bitmap_invlist);
+
  
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
  
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
@@ -15859,6 +16215,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
         Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
      }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
         Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
+    else if (OP(o) == SBOL)
+        Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
  #else
      PERL_UNUSED_CONTEXT;
      PERL_UNUSED_ARG(sv);
  #else
      PERL_UNUSED_CONTEXT;
      PERL_UNUSED_ARG(sv);
@@ -15873,7 +16231,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  SV *
  Perl_re_intuit_string(pTHX_ REGEXP * const r)
  {                              /* Assume that RE_INTUIT is set */
  SV *
  Perl_re_intuit_string(pTHX_ REGEXP * const r)
  {                              /* Assume that RE_INTUIT is set */
-    dVAR;
      struct regexp *const prog = ReANY(r);
      GET_RE_DEBUG_FLAGS_DECL;
  
      struct regexp *const prog = ReANY(r);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -15921,7 +16278,6 @@ Perl_pregfree(pTHX_ REGEXP *r)
  void
  Perl_pregfree2(pTHX_ REGEXP *rx)
  {
  void
  Perl_pregfree2(pTHX_ REGEXP *rx)
  {
-    dVAR;
      struct regexp *const r = ReANY(rx);
      GET_RE_DEBUG_FLAGS_DECL;
  
      struct regexp *const r = ReANY(rx);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -16050,7 +16406,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
  void
  Perl_regfree_internal(pTHX_ REGEXP * const rx)
  {
  void
  Perl_regfree_internal(pTHX_ REGEXP * const rx)
  {
-    dVAR;
      struct regexp *const r = ReANY(rx);
      RXi_GET_DECL(r,ri);
      GET_RE_DEBUG_FLAGS_DECL;
      struct regexp *const r = ReANY(rx);
      RXi_GET_DECL(r,ri);
      GET_RE_DEBUG_FLAGS_DECL;
@@ -16103,6 +16458,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                       Used in stclass optimization only */
                      U32 refcount;
                      reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
                       Used in stclass optimization only */
                      U32 refcount;
                      reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
+#ifdef USE_ITHREADS
+                    dVAR;
+#endif
                      OP_REFCNT_LOCK;
                      refcount = --aho->refcount;
                      OP_REFCNT_UNLOCK;
                      OP_REFCNT_LOCK;
                      refcount = --aho->refcount;
                      OP_REFCNT_UNLOCK;
@@ -16111,7 +16469,16 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                          PerlMemShared_free(aho->fail);
                          /* do this last!!!! */
                          PerlMemShared_free(ri->data->data[n]);
                          PerlMemShared_free(aho->fail);
                          /* do this last!!!! */
                          PerlMemShared_free(ri->data->data[n]);
-                        PerlMemShared_free(ri->regstclass);
+                        /* we should only ever get called once, so
+                         * assert as much, and also guard the free
+                         * which /might/ happen twice. At the least
+                         * it will make code anlyzers happy and it
+                         * doesn't cost much. - Yves */
+                        assert(ri->regstclass);
+                        if (ri->regstclass) {
+                            PerlMemShared_free(ri->regstclass);
+                            ri->regstclass = 0;
+                        }
                      }
                  }
                  break;
                      }
                  }
                  break;
@@ -16120,6 +16487,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                     /* trie structure. */
                     U32 refcount;
                     reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
                     /* trie structure. */
                     U32 refcount;
                     reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
+#ifdef USE_ITHREADS
+                    dVAR;
+#endif
                      OP_REFCNT_LOCK;
                      refcount = --trie->refcount;
                      OP_REFCNT_UNLOCK;
                      OP_REFCNT_LOCK;
                      refcount = --trie->refcount;
                      OP_REFCNT_UNLOCK;
@@ -16370,7 +16740,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
  regnode *
  Perl_regnext(pTHX_ regnode *p)
  {
  regnode *
  Perl_regnext(pTHX_ regnode *p)
  {
-    dVAR;
      I32 offset;
  
      if (!p)
      I32 offset;
  
      if (!p)
@@ -16420,140 +16789,299 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
      Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
  }
  
      Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
  }
  
-/* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
-
-#ifndef PERL_IN_XSUB_RE
-void
-Perl_save_re_context(pTHX)
-{
-    dVAR;
-
-    /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
-    if (PL_curpm) {
-       const REGEXP * const rx = PM_GETRE(PL_curpm);
-       if (rx) {
-           U32 i;
-           for (i = 1; i <= RX_NPARENS(rx); i++) {
-               char digits[TYPE_CHARS(long)];
-               const STRLEN len = my_snprintf(digits, sizeof(digits),
-                                               "%lu", (long)i);
-               GV *const *const gvp
-                   = (GV**)hv_fetch(PL_defstash, digits, len, 0);
-
-               if (gvp) {
-                   GV * const gv = *gvp;
-                   if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
-                       save_scalar(gv);
-               }
-           }
-       }
-    }
-}
-#endif
-
  #ifdef DEBUGGING
  #ifdef DEBUGGING
+/* Certain characters are output as a sequence with the first being a
+ * backslash. */
+#define isBACKSLASHED_PUNCT(c)                                              \
+                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
  
  STATIC void
  
  STATIC void
-S_put_byte(pTHX_ SV *sv, int c)
+S_put_code_point(pTHX_ SV *sv, UV c)
  {
  {
-    PERL_ARGS_ASSERT_PUT_BYTE;
-
-    if (!isPRINT(c)) {
-        switch (c) {
-            case '\r': Perl_sv_catpvf(aTHX_ sv, "\\r"); break;
-            case '\n': Perl_sv_catpvf(aTHX_ sv, "\\n"); break;
-            case '\t': Perl_sv_catpvf(aTHX_ sv, "\\t"); break;
-            case '\f': Perl_sv_catpvf(aTHX_ sv, "\\f"); break;
-            case '\a': Perl_sv_catpvf(aTHX_ sv, "\\a"); break;
+    PERL_ARGS_ASSERT_PUT_CODE_POINT;
  
  
-            default:
-                Perl_sv_catpvf(aTHX_ sv, "\\x{%x}", c);
-                break;
-        }
+    if (c > 255) {
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c);
      }
      }
-    else {
-       const char string = c;
-       if (c == '-' || c == ']' || c == '\\' || c == '^')
+    else if (isPRINT(c)) {
+       const char string = (char) c;
+       if (isBACKSLASHED_PUNCT(c))
             sv_catpvs(sv, "\\");
         sv_catpvn(sv, &string, 1);
      }
             sv_catpvs(sv, "\\");
         sv_catpvn(sv, &string, 1);
      }
+    else {
+        const char * const mnemonic = cntrl_to_mnemonic((char) c);
+        if (mnemonic) {
+            Perl_sv_catpvf(aTHX_ sv, "%s", mnemonic);
+        }
+        else {
+            Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", (U8) c);
+        }
+    }
  }
  
  }
  
+#define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C
+
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
  STATIC void
  STATIC void
-S_put_range(pTHX_ SV *sv, UV start, UV end)
+S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  {
  {
-
      /* Appends to 'sv' a displayable version of the range of code points from
      /* Appends to 'sv' a displayable version of the range of code points from
-     * 'start' to 'end' */
+     * 'start' to 'end'.  It assumes that only ASCII printables are displayable
+     * as-is (though some of these will be escaped by put_code_point()). */
+
+    const unsigned int min_range_count = 3;
  
      assert(start <= end);
  
      PERL_ARGS_ASSERT_PUT_RANGE;
  
  
      assert(start <= end);
  
      PERL_ARGS_ASSERT_PUT_RANGE;
  
-    if (end - start < 3) {  /* Individual chars in short ranges */
-        for (; start <= end; start++)
-            put_byte(sv, start);
-    }
-    else if (   end > 255
-             || ! isALPHANUMERIC(start)
-             || ! isALPHANUMERIC(end)
-             || isDIGIT(start) != isDIGIT(end)
-             || isUPPER(start) != isUPPER(end)
-             || isLOWER(start) != isLOWER(end)
-
-                /* This final test should get optimized out except on EBCDIC
-                 * platforms, where it causes ranges that cross discontinuities
-                 * like i/j to be shown as hex instead of the misleading,
-                 * e.g. H-K (since that range includes more than H, I, J, K).
+    while (start <= end) {
+        UV this_end;
+        const char * format;
+
+        if (end - start < min_range_count) {
+
+            /* Individual chars in short ranges */
+            for (; start <= end; start++) {
+                put_code_point(sv, start);
+            }
+            break;
+        }
+
+        /* If permitted by the input options, and there is a possibility that
+         * this range contains a printable literal, look to see if there is
+         * one.  */
+        if (allow_literals && start <= MAX_PRINT_A) {
+
+            /* If the range begin isn't an ASCII printable, effectively split
+             * the range into two parts:
+             *  1) the portion before the first such printable,
+             *  2) the rest
+             * and output them separately. */
+            if (! isPRINT_A(start)) {
+                UV temp_end = start + 1;
+
+                /* There is no point looking beyond the final possible
+                 * printable, in MAX_PRINT_A */
+                UV max = MIN(end, MAX_PRINT_A);
+
+                while (temp_end <= max && ! isPRINT_A(temp_end)) {
+                    temp_end++;
+                }
+
+                /* Here, temp_end points to one beyond the first printable if
+                 * found, or to one beyond 'max' if not.  If none found, make
+                 * sure that we use the entire range */
+                if (temp_end > MAX_PRINT_A) {
+                    temp_end = end + 1;
+                }
+
+                /* Output the first part of the split range, the part that
+                 * doesn't have printables, with no looking for literals
+                 * (otherwise we would infinitely recurse) */
+                put_range(sv, start, temp_end - 1, FALSE);
+
+                /* The 2nd part of the range (if any) starts here. */
+                start = temp_end;
+
+                /* We continue instead of dropping down because even if the 2nd
+                 * part is non-empty, it could be so short that we want to
+                 * output it specially, as tested for at the top of this loop.
                   * */
                   * */
-             || (end - start) != NATIVE_TO_ASCII(end) - NATIVE_TO_ASCII(start))
-    {
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%02" UVXf "}-\\x{%02" UVXf "}",
-                       start,
-                       (end < 256) ? end : 255);
-    }
-    else { /* Here, the ends of the range are both digits, or both uppercase,
-              or both lowercase; and there's no discontinuity in the range
-              (which could happen on EBCDIC platforms) */
-        put_byte(sv, start);
-        sv_catpvs(sv, "-");
-        put_byte(sv, end);
+                continue;
+            }
+
+            /* Here, 'start' is a printable ASCII.  If it is an alphanumeric,
+             * output a sub-range of just the digits or letters, then process
+             * the remaining portion as usual. */
+            if (isALPHANUMERIC_A(start)) {
+                UV mask = (isDIGIT_A(start))
+                           ? _CC_DIGIT
+                             : isUPPER_A(start)
+                               ? _CC_UPPER
+                               : _CC_LOWER;
+                UV temp_end = start + 1;
+
+                /* Find the end of the sub-range that includes just the
+                 * characters in the same class as the first character in it */
+                while (temp_end <= end && _generic_isCC_A(temp_end, mask)) {
+                    temp_end++;
+                }
+                temp_end--;
+
+                /* For short ranges, don't duplicate the code above to output
+                 * them; just call recursively */
+                if (temp_end - start < min_range_count) {
+                    put_range(sv, start, temp_end, FALSE);
+                }
+                else {  /* Output as a range */
+                    put_code_point(sv, start);
+                    sv_catpvs(sv, "-");
+                    put_code_point(sv, temp_end);
+                }
+                start = temp_end + 1;
+                continue;
+            }
+
+            /* We output any other printables as individual characters */
+            if (isPUNCT_A(start) || isSPACE_A(start)) {
+                while (start <= end && (isPUNCT_A(start)
+                                        || isSPACE_A(start)))
+                {
+                    put_code_point(sv, start);
+                    start++;
+                }
+                continue;
+            }
+        } /* End of looking for literals */
+
+        /* Here is not to output as a literal.  Some control characters have
+         * mnemonic names.  Split off any of those at the beginning and end of
+         * the range to print mnemonically.  It isn't possible for many of
+         * these to be in a row, so this won't overwhelm with output */
+        while (isMNEMONIC_CNTRL(start) && start <= end) {
+            put_code_point(sv, start);
+            start++;
+        }
+        if (start < end && isMNEMONIC_CNTRL(end)) {
+
+            /* Here, the final character in the range has a mnemonic name.
+             * Work backwards from the end to find the final non-mnemonic */
+            UV temp_end = end - 1;
+            while (isMNEMONIC_CNTRL(temp_end)) {
+                temp_end--;
+            }
+
+            /* And separately output the range that doesn't have mnemonics */
+            put_range(sv, start, temp_end, FALSE);
+
+            /* Then output the mnemonic trailing controls */
+            start = temp_end + 1;
+            while (start <= end) {
+                put_code_point(sv, start);
+                start++;
+            }
+            break;
+        }
+
+        /* As a final resort, output the range or subrange as hex. */
+
+        this_end = (end < NUM_ANYOF_CODE_POINTS)
+                    ? end
+                    : NUM_ANYOF_CODE_POINTS - 1;
+        format = (this_end < 256)
+                 ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
+                 : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+        Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
+        break;
      }
  }
  
  STATIC bool
      }
  }
  
  STATIC bool
-S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap)
+S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class whose bitmap is 'bitmap';  Returns 'TRUE' if it actually
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class whose bitmap is 'bitmap';  Returns 'TRUE' if it actually
-     * output anything */
+     * output anything, and bitmap_invlist, if not NULL, will point to an
+     * inversion list of what is in the bit map */
  
      int i;
  
      int i;
-    bool has_output_anything = FALSE;
+    UV start, end;
+    unsigned int punct_count = 0;
+    SV* invlist = NULL;
+    SV** invlist_ptr;   /* Temporary, in case bitmap_invlist is NULL */
+    bool allow_literals = TRUE;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
+
+    invlist_ptr = (bitmap_invlist) ? bitmap_invlist : &invlist;
+
+    /* Worst case is exactly every-other code point is in the list */
+    *invlist_ptr = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
+
+    /* Convert the bit map to an inversion list, keeping track of how many
+     * ASCII puncts are set, including an extra amount for the backslashed
+     * ones.  */
+    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+        if (BITMAP_TEST(bitmap, i)) {
+            *invlist_ptr = add_cp_to_invlist(*invlist_ptr, i);
+            if (isPUNCT_A(i)) {
+                punct_count++;
+                if isBACKSLASHED_PUNCT(i) {
+                    punct_count++;
+                }
+            }
+        }
+    }
  
  
-    PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS;
+    /* Nothing to output */
+    if (_invlist_len(*invlist_ptr) == 0) {
+        SvREFCNT_dec(invlist);
+        return FALSE;
+    }
  
  
-    for (i = 0; i < 256; i++) {
-        if (i < 256 && BITMAP_TEST((U8 *) bitmap,i)) {
+    /* Generally, it is more readable if printable characters are output as
+     * literals, but if a range (nearly) spans all of them, it's best to output
+     * it as a single range.  This code will use a single range if all but 2
+     * printables are in it */
+    invlist_iterinit(*invlist_ptr);
+    while (invlist_iternext(*invlist_ptr, &start, &end)) {
  
  
-            /* The character at index i should be output.  Find the next
-             * character that should NOT be output */
-            int j;
-            for (j = i + 1; j <= 256; j++) {
-                if (! BITMAP_TEST((U8 *) bitmap, j)) {
-                    break;
-                }
-            }
+        /* If range starts beyond final printable, it doesn't have any in it */
+        if (start > MAX_PRINT_A) {
+            break;
+        }
  
  
-            /* Everything between them is a single range that should be output
-             * */
-            put_range(sv, i, j - 1);
-            has_output_anything = TRUE;
-            i = j;
+        /* In both ASCII and EBCDIC, a SPACE is the lowest printable.  To span
+         * all but two, the range must start and end no later than 2 from
+         * either end */
+        if (start < ' ' + 2 && end > MAX_PRINT_A - 2) {
+            if (end > MAX_PRINT_A) {
+                end = MAX_PRINT_A;
+            }
+            if (start < ' ') {
+                start = ' ';
+            }
+            if (end - start >= MAX_PRINT_A - ' ' - 2) {
+                allow_literals = FALSE;
+            }
+            break;
+        }
+    }
+    invlist_iterfinish(*invlist_ptr);
+
+    /* The legibility of the output depends mostly on how many punctuation
+     * characters are output.  There are 32 possible ASCII ones, and some have
+     * an additional backslash, bringing it to currently 36, so if any more
+     * than 18 are to be output, we can instead output it as its complement,
+     * yielding fewer puncts, and making it more legible.  But give some weight
+     * to the fact that outputting it as a complement is less legible than a
+     * straight output, so don't complement unless we are somewhat over the 18
+     * mark */
+    if (allow_literals && punct_count > 22) {
+        sv_catpvs(sv, "^");
+
+        /* Add everything remaining to the list, so when we invert it just
+         * below, it will be excluded */
+        _invlist_union_complement_2nd(*invlist_ptr, PL_InBitmap, invlist_ptr);
+        _invlist_invert(*invlist_ptr);
+    }
+
+    /* Here we have figured things out.  Output each range */
+    invlist_iterinit(*invlist_ptr);
+    while (invlist_iternext(*invlist_ptr, &start, &end)) {
+        if (start >= NUM_ANYOF_CODE_POINTS) {
+            break;
          }
          }
+        put_range(sv, start, end, allow_literals);
      }
      }
+    invlist_iterfinish(*invlist_ptr);
  
  
-    return has_output_anything;
+    return TRUE;
  }
  
  #define CLEAR_OPTSTART \
  }
  
  #define CLEAR_OPTSTART \
@@ -16572,7 +17100,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             const regnode *last, const regnode *plast,
             SV* sv, I32 indent, U32 depth)
  {
             const regnode *last, const regnode *plast,
             SV* sv, I32 indent, U32 depth)
  {
-    dVAR;
      U8 op = PSEUDO;    /* Arbitrary non-END op. */
      const regnode *next;
      const regnode *optstart= NULL;
      U8 op = PSEUDO;    /* Arbitrary non-END op. */
      const regnode *next;
      const regnode *optstart= NULL;
@@ -16705,7 +17232,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         }
         else if (PL_regkind[(U8)op] == ANYOF) {
             /* arglen 1 + class block */
         }
         else if (PL_regkind[(U8)op] == ANYOF) {
             /* arglen 1 + class block */
-           node += 1 + ((ANYOF_FLAGS(node) & ANYOF_POSIXL)
+           node += 1 + ((ANYOF_FLAGS(node) & ANYOF_MATCHES_POSIXL)
                            ? ANYOF_POSIXL_SKIP
                            : ANYOF_SKIP);
             node = NEXTOPER(node);
                            ? ANYOF_POSIXL_SKIP
                            : ANYOF_SKIP);
             node = NEXTOPER(node);