X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5a279aa01b85454d24e44898e581820d24e712b3..243effed56c5bea983c9cdbdc24b329f19ff0aad:/regcomp.c

diff --git a/regcomp.c b/regcomp.c
index 7460680..979dcae 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -81,16 +81,25 @@
 #define REG_COMP_C
 #ifdef PERL_IN_XSUB_RE
 #  include "re_comp.h"
+extern const struct regexp_engine my_reg_engine;
 #else
 #  include "regcomp.h"
 #endif
 
 #include "dquote_static.c"
-#ifndef PERL_IN_XSUB_RE
-#  include "charclass_invlists.h"
+#include "charclass_invlists.h"
+#include "inline_invlist.c"
+#include "unicode_constants.h"
+
+#ifdef HAS_ISBLANK
+#   define hasISBLANK 1
+#else
+#   define hasISBLANK 0
 #endif
 
 #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
+#define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
 
 #ifdef op
 #undef op
@@ -150,6 +159,7 @@ typedef struct RExC_state_t {
     I32		in_lookbehind;
     I32		contains_locale;
     I32		override_recoding;
+    I32		in_multi_char_class;
     struct reg_code_block *code_blocks;	/* positions of literal (?{})
 					    within pattern */
     int		num_code_blocks;	/* size of code_blocks[] */
@@ -204,7 +214,8 @@ typedef struct RExC_state_t {
 #define RExC_recurse_count	(pRExC_state->recurse_count)
 #define RExC_in_lookbehind	(pRExC_state->in_lookbehind)
 #define RExC_contains_locale	(pRExC_state->contains_locale)
-#define RExC_override_recoding	(pRExC_state->override_recoding)
+#define RExC_override_recoding (pRExC_state->override_recoding)
+#define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
 
 
 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
@@ -220,11 +231,12 @@ typedef struct RExC_state_t {
 #define	WORST		0	/* Worst case. */
 #define	HASWIDTH	0x01	/* Known to match non-null strings. */
 
-/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
- * character, and if utf8, must be invariant.  Note that this is not the same
- * thing as REGNODE_SIMPLE */
+/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
+ * character.  (There needs to be a case: in the switch statement in regexec.c
+ * for any node marked SIMPLE.)  Note that this is not the same thing as
+ * REGNODE_SIMPLE */
 #define	SIMPLE		0x02
-#define	SPSTART		0x04	/* Starts with * or +. */
+#define	SPSTART		0x04	/* Starts with * or + */
 #define TRYAGAIN	0x08	/* Weeded out a declaration. */
 #define POSTPONED	0x10    /* (?1),(?&name), (??{...}) or similar */
 
@@ -289,8 +301,8 @@ typedef struct RExC_state_t {
     string can occur infinitely far to the right.
   
   - minlenp
-    A pointer to the minimum length of the pattern that the string 
-    was found inside. This is important as in the case of positive 
+    A pointer to the minimum number of characters of the pattern that the
+    string was found inside. This is important as in the case of positive
     lookahead or positive lookbehind we can have multiple patterns 
     involved. Consider
     
@@ -398,8 +410,8 @@ static const scan_data_t zero_scan_data =
 #define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
 #define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags) >= REGEX_UNICODE_CHARSET)
 #define ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_RESTRICTED_CHARSET)
-#define MORE_ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
 #define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags) >= REGEX_ASCII_RESTRICTED_CHARSET)
+#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
 
 #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
 
@@ -437,7 +449,7 @@ static const scan_data_t zero_scan_data =
     IV len = RExC_end - RExC_precomp;					\
 									\
     if (!SIZE_ONLY)							\
-	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);			\
+	SAVEFREESV(RExC_rx_sv);						\
     if (len > RegexLengthToShowInErrorMessages) {			\
 	/* chop 10 shorter than the max, to ensure meaning of "..." */	\
 	len = RegexLengthToShowInErrorMessages - 10;			\
@@ -468,7 +480,7 @@ static const scan_data_t zero_scan_data =
  */
 #define	vFAIL(m) STMT_START {				\
     if (!SIZE_ONLY)					\
-	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);	\
+	SAVEFREESV(RExC_rx_sv);				\
     Simple_vFAIL(m);					\
 } STMT_END
 
@@ -486,7 +498,7 @@ static const scan_data_t zero_scan_data =
  */
 #define	vFAIL2(m,a1) STMT_START {			\
     if (!SIZE_ONLY)					\
-	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);	\
+	SAVEFREESV(RExC_rx_sv);				\
     Simple_vFAIL2(m, a1);				\
 } STMT_END
 
@@ -505,7 +517,7 @@ static const scan_data_t zero_scan_data =
  */
 #define	vFAIL3(m,a1,a2) STMT_START {			\
     if (!SIZE_ONLY)					\
-	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv);	\
+	SAVEFREESV(RExC_rx_sv);				\
     Simple_vFAIL3(m, a1, a2);				\
 } STMT_END
 
@@ -683,8 +695,6 @@ DEBUG_OPTIMISE_MORE_r(if(data){                                      \
     PerlIO_printf(Perl_debug_log,"\n");                              \
 });
 
-static void clear_re(pTHX_ void *r);
-
 /* Mark that we cannot extend a found fixed substring at this point.
    Update the longest found anchored substring and the longest found
    floating substrings if needed. */
@@ -748,7 +758,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c
 
     ANYOF_BITMAP_SETALL(cl);
     cl->flags = ANYOF_CLASS|ANYOF_EOS|ANYOF_UNICODE_ALL
-		|ANYOF_LOC_NONBITMAP_FOLD|ANYOF_NON_UTF8_LATIN1_ALL;
+		|ANYOF_NON_UTF8_LATIN1_ALL;
 
     /* If any portion of the regex is to operate under locale rules,
      * initialization includes it.  The reason this isn't done for all regexes
@@ -759,7 +769,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c
      * necessary. */
     if (RExC_contains_locale) {
 	ANYOF_CLASS_SETALL(cl);	    /* /l uses class */
-	cl->flags |= ANYOF_LOCALE;
+	cl->flags |= ANYOF_LOCALE|ANYOF_LOC_FOLD;
     }
     else {
 	ANYOF_CLASS_ZERO(cl);	    /* Only /l uses class now */
@@ -814,8 +824,8 @@ S_cl_and(struct regnode_charclass_class *cl,
     if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
 	&& !(ANYOF_CLASS_TEST_ANY_SET(cl))
 	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-	&& !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
-	&& !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) {
+	&& !(and_with->flags & ANYOF_LOC_FOLD)
+	&& !(cl->flags & ANYOF_LOC_FOLD)) {
 	int i;
 
 	if (and_with->flags & ANYOF_INVERT)
@@ -948,8 +958,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
 	 *   (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
 	 */
 	else if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-	     && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
-	     && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) {
+	     && !(or_with->flags & ANYOF_LOC_FOLD)
+	     && !(cl->flags & ANYOF_LOC_FOLD) ) {
 	    int i;
 
 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
@@ -975,8 +985,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
     } else {    /* 'or_with' is not inverted */
 	/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
 	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-	     && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
-		 || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) {
+	     && (!(or_with->flags & ANYOF_LOC_FOLD)
+		 || (cl->flags & ANYOF_LOC_FOLD)) ) {
 	    int i;
 
 	    /* OR char bitmap and class bitmap separately */
@@ -2579,89 +2589,72 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
 
 
 /* The below joins as many adjacent EXACTish nodes as possible into a single
- * one, and looks for problematic sequences of characters whose folds vs.
- * non-folds have sufficiently different lengths, that the optimizer would be
- * fooled into rejecting legitimate matches of them, and the trie construction
- * code can't cope with them.  The joining is only done if:
+ * one.  The regop may be changed if the node(s) contain certain sequences that
+ * require special handling.  The joining is only done if:
  * 1) there is room in the current conglomerated node to entirely contain the
  *    next one.
  * 2) they are the exact same node type
  *
- * The adjacent nodes actually may be separated by NOTHING kind nodes, and
+ * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
  * these get optimized out
  *
- * If there are problematic code sequences, *min_subtract is set to the delta
- * that the minimum size of the node can be less than its actual size.  And,
- * the node type of the result is changed to reflect that it contains these
- * sequences.
+ * If a node is to match under /i (folded), the number of characters it matches
+ * can be different than its character length if it contains a multi-character
+ * fold.  *min_subtract is set to the total delta of the input nodes.
  *
  * And *has_exactf_sharp_s is set to indicate whether or not the node is EXACTF
  * and contains LATIN SMALL LETTER SHARP S
  *
  * This is as good a place as any to discuss the design of handling these
- * problematic sequences.  It's been wrong in Perl for a very long time.  There
- * are three code points in Unicode whose folded lengths differ so much from
- * the un-folded lengths that it causes problems for the optimizer and trie
- * construction.  Why only these are problematic, and not others where lengths
- * also differ is something I (khw) do not understand.  New versions of Unicode
- * might add more such code points.  Hopefully the logic in fold_grind.t that
- * figures out what to test (in part by verifying that each size-combination
- * gets tested) will catch any that do come along, so they can be added to the
- * special handling below.  The chances of new ones are actually rather small,
- * as most, if not all, of the world's scripts that have casefolding have
- * already been encoded by Unicode.  Also, a number of Unicode's decisions were
- * made to allow compatibility with pre-existing standards, and almost all of
- * those have already been dealt with.  These would otherwise be the most
- * likely candidates for generating further tricky sequences.  In other words,
- * Unicode by itself is unlikely to add new ones unless it is for compatibility
- * with pre-existing standards, and there aren't many of those left.
- *
- * The previous designs for dealing with these involved assigning a special
- * node for them.  This approach doesn't work, as evidenced by this example:
+ * multi-character fold sequences.  It's been wrong in Perl for a very long
+ * time.  There are three code points in Unicode whose multi-character folds
+ * were long ago discovered to mess things up.  The previous designs for
+ * dealing with these involved assigning a special node for them.  This
+ * approach doesn't work, as evidenced by this example:
  *      "\xDFs" =~ /s\xDF/ui    # Used to fail before these patches
- * Both these fold to "sss", but if the pattern is parsed to create a node of
- * that would match just the \xDF, it won't be able to handle the case where a
+ * Both these fold to "sss", but if the pattern is parsed to create a node that
+ * would match just the \xDF, it won't be able to handle the case where a
  * successful match would have to cross the node's boundary.  The new approach
  * that hopefully generally solves the problem generates an EXACTFU_SS node
  * that is "sss".
  *
- * There are a number of components to the approach (a lot of work for just
- * three code points!):
- * 1)   This routine examines each EXACTFish node that could contain the
- *      problematic sequences.  It returns in *min_subtract how much to
+ * It turns out that there are problems with all multi-character folds, and not
+ * just these three.  Now the code is general, for all such cases, but the
+ * three still have some special handling.  The approach taken is:
+ * 1)   This routine examines each EXACTFish node that could contain multi-
+ *      character fold sequences.  It returns in *min_subtract how much to
  *      subtract from the the actual length of the string to get a real minimum
- *      for one that could match it.  This number is usually 0 except for the
- *      problematic sequences.  This delta is used by the caller to adjust the
- *      min length of the match, and the delta between min and max, so that the
- *      optimizer doesn't reject these possibilities based on size constraints.
- * 2)   These sequences require special handling by the trie code, so it
- *      changes the joined node type to ops for the trie's benefit, those new
- *      ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
- * 3)   This is sufficient for the two Greek sequences (described below), but
- *      the one involving the Sharp s (\xDF) needs more.  The node type
- *      EXACTFU_SS is used for an EXACTFU node that contains at least one "ss"
- *      sequence in it.  For non-UTF-8 patterns and strings, this is the only
- *      case where there is a possible fold length change.  That means that a
- *      regular EXACTFU node without UTF-8 involvement doesn't have to concern
- *      itself with length changes, and so can be processed faster.  regexec.c
- *      takes advantage of this.  Generally, an EXACTFish node that is in UTF-8
- *      is pre-folded by regcomp.c.  This saves effort in regex matching.
- *      However, probably mostly for historical reasons, the pre-folding isn't
- *      done for non-UTF8 patterns (and it can't be for EXACTF and EXACTFL
- *      nodes, as what they fold to isn't known until runtime.)  The fold
+ *      match length; it is 0 if there are no multi-char folds.  This delta is
+ *      used by the caller to adjust the min length of the match, and the delta
+ *      between min and max, so that the optimizer doesn't reject these
+ *      possibilities based on size constraints.
+ * 2)   Certain of these sequences require special handling by the trie code,
+ *      so, if found, this code changes the joined node type to special ops:
+ *      EXACTFU_TRICKYFOLD and EXACTFU_SS.
+ * 3)   For the sequence involving the Sharp s (\xDF), the node type EXACTFU_SS
+ *      is used for an EXACTFU node that contains at least one "ss" sequence in
+ *      it.  For non-UTF-8 patterns and strings, this is the only case where
+ *      there is a possible fold length change.  That means that a regular
+ *      EXACTFU node without UTF-8 involvement doesn't have to concern itself
+ *      with length changes, and so can be processed faster.  regexec.c takes
+ *      advantage of this.  Generally, an EXACTFish node that is in UTF-8 is
+ *      pre-folded by regcomp.c.  This saves effort in regex matching.
+ *      However, the pre-folding isn't done for non-UTF8 patterns because the
+ *      fold of the MICRO SIGN requires UTF-8, and we don't want to slow things
+ *      down by forcing the pattern into UTF8 unless necessary.  Also what
+ *      EXACTF and EXACTFL nodes fold to isn't known until runtime.  The fold
  *      possibilities for the non-UTF8 patterns are quite simple, except for
- *      the sharp s.  All the ones that don't involve a UTF-8 target string
- *      are members of a fold-pair, and arrays are set up for all of them
- *      that quickly find the other member of the pair.  It might actually
- *      be faster to pre-fold these, but it isn't currently done, except for
- *      the sharp s.  Code elsewhere in this file makes sure that it gets
- *      folded to 'ss', even if the pattern isn't UTF-8.  This avoids the
- *      issues described in the next item.
+ *      the sharp s.  All the ones that don't involve a UTF-8 target string are
+ *      members of a fold-pair, and arrays are set up for all of them so that
+ *      the other member of the pair can be found quickly.  Code elsewhere in
+ *      this file makes sure that in EXACTFU nodes, the sharp s gets folded to
+ *      'ss', even if the pattern isn't UTF-8.  This avoids the issues
+ *      described in the next item.
  * 4)   A problem remains for the sharp s in EXACTF nodes.  Whether it matches
  *      'ss' or not is not knowable at compile time.  It will match iff the
  *      target string is in UTF-8, unlike the EXACTFU nodes, where it always
  *      matches; and the EXACTFL and EXACTFA nodes where it never does.  Thus
- *      it can't be folded to "ss" at compile time, unlike EXACTFU does as
+ *      it can't be folded to "ss" at compile time, unlike EXACTFU does (as
  *      described in item 3).  An assumption that the optimizer part of
  *      regexec.c (probably unwittingly) makes is that a character in the
  *      pattern corresponds to at most a single character in the target string.
@@ -2733,6 +2726,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
             const unsigned int oldl = STR_LEN(scan);
             regnode * const nnext = regnext(n);
 
+            /* XXX I (khw) kind of doubt that this works on platforms where
+             * U8_MAX is above 255 because of lots of other assumptions */
             if (oldl + STR_LEN(n) > U8_MAX)
                 break;
             
@@ -2774,19 +2769,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
      * hence missed).  The sequences only happen in folding, hence for any
      * non-EXACT EXACTish node */
     if (OP(scan) != EXACT) {
-        U8 *s;
-        U8 * s0 = (U8*) STRING(scan);
-        U8 * const s_end = s0 + STR_LEN(scan);
-
-	/* The below is perhaps overboard, but this allows us to save a test
-	 * each time through the loop at the expense of a mask.  This is
-	 * because on both EBCDIC and ASCII machines, 'S' and 's' differ by a
-	 * single bit.  On ASCII they are 32 apart; on EBCDIC, they are 64.
-	 * This uses an exclusive 'or' to find that bit and then inverts it to
-	 * form a mask, with just a single 0, in the bit position where 'S' and
-	 * 's' differ. */
-	const U8 S_or_s_mask = (U8) ~ ('S' ^ 's');
-	const U8 s_masked = 's' & S_or_s_mask;
+        const U8 * const s0 = (U8*) STRING(scan);
+        const U8 * s = s0;
+        const U8 * const s_end = s0 + STR_LEN(scan);
 
 	/* One pass is made over the node's string looking for all the
 	 * possibilities.  to avoid some tests in the loop, there are two main
@@ -2794,146 +2779,137 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
 	 * non-UTF-8 */
 	if (UTF) {
 
-	    /* There are two problematic Greek code points in Unicode
-	     * casefolding
-	     *
-	     * U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-	     * U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-	     *
-	     * which casefold to
-	     *
-	     * Unicode                      UTF-8
-	     *
-	     * U+03B9 U+0308 U+0301         0xCE 0xB9 0xCC 0x88 0xCC 0x81
-	     * U+03C5 U+0308 U+0301         0xCF 0x85 0xCC 0x88 0xCC 0x81
-             *
-	     * This means that in case-insensitive matching (or "loose
-	     * matching", as Unicode calls it), an EXACTF of length six (the
-	     * UTF-8 encoded byte length of the above casefolded versions) can
-	     * match a target string of length two (the byte length of UTF-8
-	     * encoded U+0390 or U+03B0).  This would rather mess up the
-	     * minimum length computation.  (there are other code points that
-	     * also fold to these two sequences, but the delta is smaller)
-	     *
-	     * If these sequences are found, the minimum length is decreased by
-	     * four (six minus two).
-	     *
-	     * Similarly, 'ss' may match the single char and byte LATIN SMALL
-	     * LETTER SHARP S.  We decrease the min length by 1 for each
-	     * occurrence of 'ss' found */
-
-#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
-#	    define U390_first_byte 0xb4
-	    const U8 U390_tail[] = "\x68\xaf\x49\xaf\x42";
-#	    define U3B0_first_byte 0xb5
-	    const U8 U3B0_tail[] = "\x46\xaf\x49\xaf\x42";
-#else
-#	    define U390_first_byte 0xce
-	    const U8 U390_tail[] = "\xb9\xcc\x88\xcc\x81";
-#	    define U3B0_first_byte 0xcf
-	    const U8 U3B0_tail[] = "\x85\xcc\x88\xcc\x81";
-#endif
-	    const U8 len = sizeof(U390_tail); /* (-1 for NUL; +1 for 1st byte;
-						 yields a net of 0 */
-	    /* Examine the string for one of the problematic sequences */
-	    for (s = s0;
-		 s < s_end - 1; /* Can stop 1 before the end, as minimum length
-				 * sequence we are looking for is 2 */
-		 s += UTF8SKIP(s))
+            /* Examine the string for a multi-character fold sequence.  UTF-8
+             * patterns have all characters pre-folded by the time this code is
+             * executed */
+            while (s < s_end - 1) /* Can stop 1 before the end, as minimum
+                                     length sequence we are looking for is 2 */
 	    {
+                int count = 0;
+                int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
+                if (! len) {    /* Not a multi-char fold: get next char */
+                    s += UTF8SKIP(s);
+                    continue;
+                }
 
-		/* Look for the first byte in each problematic sequence */
-		switch (*s) {
-		    /* We don't have to worry about other things that fold to
-		     * 's' (such as the long s, U+017F), as all above-latin1
-		     * code points have been pre-folded */
-		    case 's':
-		    case 'S':
-
-                        /* Current character is an 's' or 'S'.  If next one is
-                         * as well, we have the dreaded sequence */
-			if (((*(s+1) & S_or_s_mask) == s_masked)
-			    /* These two node types don't have special handling
-			     * for 'ss' */
-			    && OP(scan) != EXACTFL && OP(scan) != EXACTFA)
-			{
-			    *min_subtract += 1;
-			    OP(scan) = EXACTFU_SS;
-			    s++;    /* No need to look at this character again */
-			}
-			break;
-
-		    case U390_first_byte:
-			if (s_end - s >= len
-
-			    /* The 1's are because are skipping comparing the
-			     * first byte */
-			    && memEQ(s + 1, U390_tail, len - 1))
-			{
-			    goto greek_sequence;
-			}
-			break;
+                /* Nodes with 'ss' require special handling, except for EXACTFL
+                 * and EXACTFA for which there is no multi-char fold to this */
+                if (len == 2 && *s == 's' && *(s+1) == 's'
+                    && OP(scan) != EXACTFL && OP(scan) != EXACTFA)
+                {
+                    count = 2;
+                    OP(scan) = EXACTFU_SS;
+                    s += 2;
+                }
+                else if (len == 6   /* len is the same in both ASCII and EBCDIC for these */
+                         && (memEQ(s, GREEK_SMALL_LETTER_IOTA_UTF8
+                                      COMBINING_DIAERESIS_UTF8
+                                      COMBINING_ACUTE_ACCENT_UTF8,
+                                   6)
+                             || memEQ(s, GREEK_SMALL_LETTER_UPSILON_UTF8
+                                         COMBINING_DIAERESIS_UTF8
+                                         COMBINING_ACUTE_ACCENT_UTF8,
+                                     6)))
+                {
+                    count = 3;
+
+                    /* These two folds require special handling by trie's, so
+                     * change the node type to indicate this.  If EXACTFA and
+                     * EXACTFL were ever to be handled by trie's, this would
+                     * have to be changed.  If this node has already been
+                     * changed to EXACTFU_SS in this loop, leave it as is.  (I
+                     * (khw) think it doesn't matter in regexec.c for UTF
+                     * patterns, but no need to change it */
+                    if (OP(scan) == EXACTFU) {
+                        OP(scan) = EXACTFU_TRICKYFOLD;
+                    }
+                    s += 6;
+                }
+                else { /* Here is a generic multi-char fold. */
+                    const U8* multi_end  = s + len;
+
+                    /* Count how many characters in it.  In the case of /l and
+                     * /aa, no folds which contain ASCII code points are
+                     * allowed, so check for those, and skip if found.  (In
+                     * EXACTFL, no folds are allowed to any Latin1 code point,
+                     * not just ASCII.  But there aren't any of these
+                     * currently, nor ever likely, so don't take the time to
+                     * test for them.  The code that generates the
+                     * is_MULTI_foo() macros croaks should one actually get put
+                     * into Unicode .) */
+                    if (OP(scan) != EXACTFL && OP(scan) != EXACTFA) {
+                        count = utf8_length(s, multi_end);
+                        s = multi_end;
+                    }
+                    else {
+                        while (s < multi_end) {
+                            if (isASCII(*s)) {
+                                s++;
+                                goto next_iteration;
+                            }
+                            else {
+                                s += UTF8SKIP(s);
+                            }
+                            count++;
+                        }
+                    }
+                }
 
-		    case U3B0_first_byte:
-			if (! (s_end - s >= len
-			       && memEQ(s + 1, U3B0_tail, len - 1)))
-			{
-			    break;
-			}
-		      greek_sequence:
-			*min_subtract += 4;
-
-			/* This can't currently be handled by trie's, so change
-			 * the node type to indicate this.  If EXACTFA and
-			 * EXACTFL were ever to be handled by trie's, this
-			 * would have to be changed.  If this node has already
-			 * been changed to EXACTFU_SS in this loop, leave it as
-			 * is.  (I (khw) think it doesn't matter in regexec.c
-			 * for UTF patterns, but no need to change it */
-			if (OP(scan) == EXACTFU) {
-			    OP(scan) = EXACTFU_TRICKYFOLD;
-			}
-			s += 6;	/* We already know what this sequence is.  Skip
-				   the rest of it */
-			break;
-		}
+                /* The delta is how long the sequence is minus 1 (1 is how long
+                 * the character that folds to the sequence is) */
+                *min_subtract += count - 1;
+            next_iteration: ;
 	    }
 	}
 	else if (OP(scan) != EXACTFL && OP(scan) != EXACTFA) {
 
-	    /* Here, the pattern is not UTF-8.  We need to look only for the
-	     * 'ss' sequence, and in the EXACTF case, the sharp s, which can be
-	     * in the final position.  Otherwise we can stop looking 1 byte
-	     * earlier because have to find both the first and second 's' */
+            /* Here, the pattern is not UTF-8.  Look for the multi-char folds
+             * that are all ASCII.  As in the above case, EXACTFL and EXACTFA
+             * nodes can't have multi-char folds to this range (and there are
+             * no existing ones in the upper latin1 range).  In the EXACTF
+             * case we look also for the sharp s, which can be in the final
+             * position.  Otherwise we can stop looking 1 byte earlier because
+             * have to find at least two characters for a multi-fold */
 	    const U8* upper = (OP(scan) == EXACTF) ? s_end : s_end -1;
 
-	    for (s = s0; s < upper; s++) {
-		switch (*s) {
-		    case 'S':
-		    case 's':
-			if (s_end - s > 1
-			    && ((*(s+1) & S_or_s_mask) == s_masked))
-			{
-			    *min_subtract += 1;
-
-			    /* EXACTF nodes need to know that the minimum
-			     * length changed so that a sharp s in the string
-			     * can match this ss in the pattern, but they
-			     * remain EXACTF nodes, as they are not trie'able,
-			     * so don't have to invent a new node type to
-			     * exclude them from the trie code */
-			    if (OP(scan) != EXACTF) {
-				OP(scan) = EXACTFU_SS;
-			    }
-			    s++;
-			}
-			break;
-		    case LATIN_SMALL_LETTER_SHARP_S:
-			if (OP(scan) == EXACTF) {
-			    *has_exactf_sharp_s = TRUE;
-			}
-			break;
+            /* The below is perhaps overboard, but this allows us to save a
+             * test each time through the loop at the expense of a mask.  This
+             * is because on both EBCDIC and ASCII machines, 'S' and 's' differ
+             * by a single bit.  On ASCII they are 32 apart; on EBCDIC, they
+             * are 64.  This uses an exclusive 'or' to find that bit and then
+             * inverts it to form a mask, with just a single 0, in the bit
+             * position where 'S' and 's' differ. */
+            const U8 S_or_s_mask = (U8) ~ ('S' ^ 's');
+            const U8 s_masked = 's' & S_or_s_mask;
+
+	    while (s < upper) {
+                int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
+                if (! len) {    /* Not a multi-char fold. */
+                    if (*s == LATIN_SMALL_LETTER_SHARP_S && OP(scan) == EXACTF)
+                    {
+                        *has_exactf_sharp_s = TRUE;
+                    }
+                    s++;
+                    continue;
+                }
+
+                if (len == 2
+                    && ((*s & S_or_s_mask) == s_masked)
+                    && ((*(s+1) & S_or_s_mask) == s_masked))
+                {
+
+                    /* EXACTF nodes need to know that the minimum length
+                     * changed so that a sharp s in the string can match this
+                     * ss in the pattern, but they remain EXACTF nodes, as they
+                     * won't match this unless the target string is is UTF-8,
+                     * which we don't know until runtime */
+                    if (OP(scan) != EXACTF) {
+                        OP(scan) = EXACTFU_SS;
+                    }
 		}
+
+                *min_subtract += len - 1;
+                s += len;
 	    }
 	}
     }
@@ -3024,7 +3000,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 			/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
 {
     dVAR;
-    I32 min = 0, pars = 0, code;
+    I32 min = 0;    /* There must be at least this number of characters to match */
+    I32 pars = 0, code;
     regnode *scan = *scanp, *next;
     I32 delta = 0;
     int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
@@ -3051,9 +3028,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 
   fake_study_recurse:
     while ( scan && OP(scan) != END && scan < last ){
-        UV min_subtract = 0;    /* How much to subtract from the minimum node
-                                   length to get a real minimum (because the
-                                   folded version may be shorter) */
+        UV min_subtract = 0;    /* How mmany chars to subtract from the minimum
+                                   node length to get a real minimum (because
+                                   the folded version may be shorter) */
 	bool has_exactf_sharp_s = FALSE;
 	/* Peephole optimizer: */
 	DEBUG_STUDYDATA("Peep:", data,depth);
@@ -3418,7 +3395,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                      * trietype so we can turn them into a trie. If/when we
                                      * allow NOTHING to start a trie sequence this condition will be
                                      * required, and it isn't expensive so we leave it in for now. */
-                                    if ( trietype != NOTHING )
+                                    if ( trietype && trietype != NOTHING )
                                         make_trie( pRExC_state,
                                                 startbranch, first, cur, tail, count,
                                                 trietype, depth+1 );
@@ -3449,7 +3426,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                               "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
 
                         });
-                        if ( last ) {
+                        if ( last && trietype ) {
                             if ( trietype != NOTHING ) {
                                 /* the last branch of the sequence was part of a trie,
                                  * so we have to construct it here outside of the loop
@@ -3606,7 +3583,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		if (uc >= 0x100 ||
 		    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
 		    && !ANYOF_BITMAP_TEST(data->start_class, uc)
-		    && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD)
+		    && (!(data->start_class->flags & ANYOF_LOC_FOLD)
 			|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
                     )
 		{
@@ -3661,13 +3638,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		uc = utf8_to_uvchr_buf(s, s + l, NULL);
 		l = utf8_length(s, s + l);
 	    }
-	    else if (has_exactf_sharp_s) {
+	    if (has_exactf_sharp_s) {
 		RExC_seen |= REG_SEEN_EXACTF_SHARP_S;
 	    }
 	    min += l - min_subtract;
-            if (min < 0) {
-                min = 0;
-            }
+            assert (min >= 0);
             delta += min_subtract;
 	    if (flags & SCF_DO_SUBSTR) {
 		data->pos_min += l - min_subtract;
@@ -3694,12 +3669,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		if (compat) {
 		    ANYOF_BITMAP_SET(data->start_class, uc);
 		    data->start_class->flags &= ~ANYOF_EOS;
-		    data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
 		    if (OP(scan) == EXACTFL) {
 			/* XXX This set is probably no longer necessary, and
 			 * probably wrong as LOCALE now is on in the initial
 			 * state */
-			data->start_class->flags |= ANYOF_LOCALE;
+			data->start_class->flags |= ANYOF_LOCALE|ANYOF_LOC_FOLD;
 		    }
 		    else {
 
@@ -3734,7 +3708,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		}
 	    }
 	    else if (flags & SCF_DO_STCLASS_OR) {
-		if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) {
+		if (data->start_class->flags & ANYOF_LOC_FOLD) {
 		    /* false positive possible if the class is case-folded.
 		       Assume that the locale settings are the same... */
 		    if (uc < 0x100) {
@@ -3897,8 +3871,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
 		    && maxcount <= REG_INFTY/3) /* Complement check for big count */
 		{
+		    /* Fatal warnings may leak the regexp without this: */
+		    SAVEFREESV(RExC_rx_sv);
 		    ckWARNreg(RExC_parse,
 			      "Quantifier unexpected on zero-length expression");
+		    ReREFCNT_inc(RExC_rx_sv);
 		}
 
 		min += minnext * mincount;
@@ -3958,6 +3935,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		      && !(data->flags & SF_HAS_EVAL)
 		      && !deltanext	/* atom is fixed width */
 		      && minnext != 0	/* CURLYM can't handle zero width */
+                      && ! (RExC_seen & REG_SEEN_EXACTF_SHARP_S) /* Nor \xDF */
 		) {
 		    /* XXXX How to optimize if data == 0? */
 		    /* Optimize to a simpler form.  */
@@ -4155,8 +4133,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		    cl_and(data->start_class, and_withp);
 		flags &= ~SCF_DO_STCLASS;
             }
-	    min += 1;
-	    delta += 1;
+	    min++;
+	    delta++;    /* Because of the 2 char string cr-lf */
             if (flags & SCF_DO_SUBSTR) {
     	        SCAN_COMMIT(pRExC_state,data,minlenp);	/* Cannot expect anything... */
     	        data->pos_min += 1;
@@ -4207,7 +4185,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		case ALNUM:
 		    if (flags & SCF_DO_STCLASS_AND) {
 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
-			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
+			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NWORDCHAR);
                             if (OP(scan) == ALNUMU) {
                                 for (value = 0; value < 256; value++) {
                                     if (!isWORDCHAR_L1(value)) {
@@ -4225,7 +4203,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		    }
 		    else {
 			if (data->start_class->flags & ANYOF_LOCALE)
-			    ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
+			    ANYOF_CLASS_SET(data->start_class,ANYOF_WORDCHAR);
 
 			/* Even if under locale, set the bits for non-locale
 			 * in case it isn't a true locale-node.  This will
@@ -4248,7 +4226,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		case NALNUM:
 		    if (flags & SCF_DO_STCLASS_AND) {
 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
-			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
+			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_WORDCHAR);
                             if (OP(scan) == NALNUMU) {
                                 for (value = 0; value < 256; value++) {
                                     if (isWORDCHAR_L1(value)) {
@@ -4266,7 +4244,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 		    }
 		    else {
 			if (data->start_class->flags & ANYOF_LOCALE)
-			    ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
+			    ANYOF_CLASS_SET(data->start_class,ANYOF_NWORDCHAR);
 
 			/* Even if under locale, set the bits for non-locale in
 			 * case it isn't a true locale-node.  This will create
@@ -4895,13 +4873,18 @@ Perl_reginitcolors(pTHX)
 
 
 #ifdef TRIE_STUDY_OPT
-#define CHECK_RESTUDY_GOTO                                  \
+#define CHECK_RESTUDY_GOTO_butfirst(dOsomething)            \
+    STMT_START {                                            \
         if (                                                \
               (data.flags & SCF_TRIE_RESTUDY)               \
               && ! restudied++                              \
-        )     goto reStudy
+        ) {                                                 \
+            dOsomething;                                    \
+            goto reStudy;                                   \
+        }                                                   \
+    } STMT_END
 #else
-#define CHECK_RESTUDY_GOTO
+#define CHECK_RESTUDY_GOTO_butfirst
 #endif        
 
 /*
@@ -4961,16 +4944,23 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
 }
 #endif
 
-/* public(ish) wrapper for Perl_re_op_compile that only takes an SV
- * pattern rather than a list of OPs */
+/* public(ish) entry point for the perl core's own regex compiling code.
+ * It's actually a wrapper for Perl_re_op_compile that only takes an SV
+ * pattern rather than a list of OPs, and uses the internal engine rather
+ * than the current one */
 
 REGEXP *
 Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
 {
     SV *pat = pattern; /* defeat constness! */
     PERL_ARGS_ASSERT_RE_COMPILE;
-    return Perl_re_op_compile(aTHX_ &pat, 1, NULL, current_re_engine(),
-				NULL, NULL, rx_flags, 0);
+    return Perl_re_op_compile(aTHX_ &pat, 1, NULL,
+#ifdef PERL_IN_XSUB_RE
+                                &my_reg_engine,
+#else
+                                &PL_core_reg_engine,
+#endif
+                                NULL, NULL, rx_flags, 0);
 }
 
 /* see if there are any run-time code blocks in the pattern.
@@ -5021,7 +5011,7 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
  *
  * becomes
  *
- *    qr'a\\bc                       def\'ghi\\\\jkl(?{"this is runtime"})mno'
+ *    qr'a\\bc_______________________def\'ghi\\\\jkl(?{"this is runtime"})mno'
  *
  * After eval_sv()-ing that, grab any new code blocks from the returned qr
  * and merge them with any code blocks of the original regexp.
@@ -5074,7 +5064,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 		/* blank out literal code block */
 		assert(pat[s] == '(');
 		while (s <= pRExC_state->code_blocks[n].end) {
-		    *p++ = ' ';
+		    *p++ = '_';
 		    s++;
 		}
 		s--;
@@ -5111,8 +5101,15 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 	SPAGAIN;
 	qr_ref = POPs;
 	PUTBACK;
-	if (SvTRUE(ERRSV))
-	    Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV));
+	{
+	    SV * const errsv = ERRSV;
+	    if (SvTRUE_NN(errsv))
+	    {
+		Safefree(pRExC_state->code_blocks);
+                /* use croak_sv ? */
+		Perl_croak_nocontext("%s", SvPV_nolen_const(errsv));
+	    }
+	}
 	assert(SvROK(qr_ref));
 	qr = SvRV(qr_ref);
 	assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
@@ -5139,13 +5136,16 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 
     /* merge the main (r1) and run-time (r2) code blocks into one */
     {
-	RXi_GET_DECL(((struct regexp*)SvANY(qr)), r2);
+	RXi_GET_DECL(ReANY((REGEXP *)qr), r2);
 	struct reg_code_block *new_block, *dst;
 	RExC_state_t * const r1 = pRExC_state; /* convenient alias */
 	int i1 = 0, i2 = 0;
 
 	if (!r2->num_code_blocks) /* we guessed wrong */
+	{
+	    SvREFCNT_dec(qr);
 	    return 1;
+	}
 
 	Newx(new_block,
 	    r1->num_code_blocks + r2->num_code_blocks,
@@ -5197,6 +5197,50 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
 }
 
 
+STATIC bool
+S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, SV** rx_utf8, SV** rx_substr, I32* rx_end_shift, I32 lookbehind, I32 offset, I32 *minlen, STRLEN longest_length, bool eol, bool meol)
+{
+    /* This is the common code for setting up the floating and fixed length
+     * string data extracted from Perlre_op_compile() below.  Returns a boolean
+     * as to whether succeeded or not */
+
+    I32 t,ml;
+
+    if (! (longest_length
+           || (eol /* Can't have SEOL and MULTI */
+               && (! meol || (RExC_flags & RXf_PMf_MULTILINE)))
+          )
+            /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
+        || (RExC_seen & REG_SEEN_EXACTF_SHARP_S))
+    {
+        return FALSE;
+    }
+
+    /* copy the information about the longest from the reg_scan_data
+        over to the program. */
+    if (SvUTF8(sv_longest)) {
+        *rx_utf8 = sv_longest;
+        *rx_substr = NULL;
+    } else {
+        *rx_substr = sv_longest;
+        *rx_utf8 = NULL;
+    }
+    /* end_shift is how many chars that must be matched that
+        follow this item. We calculate it ahead of time as once the
+        lookbehind offset is added in we lose the ability to correctly
+        calculate it.*/
+    ml = minlen ? *(minlen) : (I32)longest_length;
+    *rx_end_shift = ml - offset
+        - longest_length + (SvTAIL(sv_longest) != 0)
+        + lookbehind;
+
+    t = (eol/* Can't have SEOL and MULTI */
+         && (! meol || (RExC_flags & RXf_PMf_MULTILINE)));
+    fbm_compile(sv_longest, t ? FBMcf_TAIL : 0);
+
+    return TRUE;
+}
+
 /*
  * Perl_re_op_compile - the perl internal RE engine's function to compile a
  * regular expression into internal code.
@@ -5249,7 +5293,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     dVAR;
     REGEXP *rx;
     struct regexp *r;
-    register regexp_internal *ri;
+    regexp_internal *ri;
     STRLEN plen;
     char  * VOL exp;
     char* xend;
@@ -5258,6 +5302,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     I32 minlen = 0;
     U32 rx_flags;
     SV * VOL pat;
+    SV * VOL code_blocksv = NULL;
 
     /* these are all flags - maybe they should be turned
      * into a single int with different bit masks */
@@ -5311,9 +5356,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 	PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist);
 	PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist);
 
-	PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
-	PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
-
 	PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
 	PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
 
@@ -5339,6 +5381,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
 	PL_PosixXDigit = _new_invlist_C_array(PosixXDigit_invlist);
 	PL_XPosixXDigit = _new_invlist_C_array(XPosixXDigit_invlist);
+
+        PL_HasMultiCharFold = _new_invlist_C_array(_Perl_Multi_Char_Folds_invlist);
     }
 #endif
 
@@ -5399,7 +5443,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
 	    if (pRExC_state->num_code_blocks) {
 		o = cLISTOPx(expr)->op_first;
-		assert(o->op_type == OP_PUSHMARK);
+		assert(   o->op_type == OP_PUSHMARK
+                       || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK)
+                       || o->op_type == OP_PADRANGE);
 		o = o->op_sibling;
 	    }
 
@@ -5423,6 +5469,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		SV *sv, *msv = *svp;
 		SV *rx;
 		bool code = 0;
+                /* we make the assumption here that each op in the list of
+                 * op_siblings maps to one SV pushed onto the stack,
+                 * except for code blocks, with have both an OP_NULL and
+                 * and OP_CONST.
+                 * This allows us to match up the list of SVs against the
+                 * list of OPs to find the next code block.
+                 *
+                 * Note that       PUSHMARK PADSV PADSV ..
+                 * is optimised to
+                 *                 PADRANGE NULL  NULL  ..
+                 * so the alignment still works. */
 		if (o) {
 		    if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
 			assert(n < pRExC_state->num_code_blocks);
@@ -5451,8 +5508,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		else  {
                     while (SvAMAGIC(msv)
                             && (sv = AMG_CALLunary(msv, string_amg))
-                            && sv != msv)
-                    {
+                            && sv != msv
+                            &&  !(   SvROK(msv)
+                                  && SvROK(sv)
+                                  && SvRV(msv) == SvRV(sv))
+                    ) {
                         msv = sv;
                         SvGETMAGIC(msv);
                     }
@@ -5470,7 +5530,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 		    && RX_ENGINE((REGEXP*)rx)->op_comp)
 		{
 
-		    RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+		    RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
 		    if (ri->num_code_blocks) {
 			int i;
 			/* the presence of an embedded qr// with code means
@@ -5485,7 +5545,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 			for (i=0; i < ri->num_code_blocks; i++) {
 			    struct reg_code_block *src, *dst;
 			    STRLEN offset =  orig_patlen
-				+ ((struct regexp *)SvANY(rx))->pre_prefix;
+				+ ReANY((REGEXP *)rx)->pre_prefix;
 			    assert(n < pRExC_state->num_code_blocks);
 			    src = &ri->code_blocks[i];
 			    dst = &pRExC_state->code_blocks[n];
@@ -5688,7 +5748,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 	runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
 					    exp, plen);
 	if (!runtime_code) {
-	    ReREFCNT_inc(old_re);
 	    if (used_setjump) {
 		JMPENV_POP;
 	    }
@@ -5726,7 +5785,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_pm_flags = pm_flags;
 
     if (runtime_code) {
-	if (PL_tainting && PL_tainted)
+	if (TAINTING_get && TAINT_get)
 	    Perl_croak(aTHX_ "Eval-group in insecure regular expression");
 
 	if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
@@ -5744,6 +5803,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
     RExC_extralen = 0;
     RExC_override_recoding = 0;
+    RExC_in_multi_char_class = 0;
 
     /* First pass: determine size, legality. */
     RExC_parse = exp;
@@ -5775,11 +5835,23 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         RExC_lastnum=0;
         RExC_lastparse=NULL;
     );
+    /* reg may croak on us, not giving us a chance to free
+       pRExC_state->code_blocks.  We cannot SAVEFREEPV it now, as we may
+       need it to survive as long as the regexp (qr/(?{})/).
+       We must check that code_blocksv is not already set, because we may
+       have longjmped back. */
+    if (pRExC_state->code_blocks && !code_blocksv) {
+	code_blocksv = newSV_type(SVt_PV);
+	SAVEFREESV(code_blocksv);
+	SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks);
+	SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/
+    }
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
 	RExC_precomp = NULL;
-	Safefree(pRExC_state->code_blocks);
 	return(NULL);
     }
+    if (code_blocksv)
+	SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
 
     /* Here, finished first pass.  Get rid of any added setjmp */
     if (used_setjump) {
@@ -5815,7 +5887,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        of zeroing when in debug mode, thus anything assigned has to 
        happen after that */
     rx = (REGEXP*) newSV_type(SVt_REGEXP);
-    r = (struct regexp*)SvANY(rx);
+    r = ReANY(rx);
     Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
 	 char, regexp_internal);
     if ( r == NULL || ri == NULL )
@@ -5837,7 +5909,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 	ri->num_code_blocks = pRExC_state->num_code_blocks;
     }
     else
+    {
+	int n;
+	for (n = 0; n < pRExC_state->num_code_blocks; n++)
+	    if (pRExC_state->code_blocks[n].src_regex)
+		SAVEFREESV(pRExC_state->code_blocks[n].src_regex);
 	SAVEFREEPV(pRExC_state->code_blocks);
+    }
 
     {
         bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
@@ -5868,8 +5946,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             + (sizeof(STD_PAT_MODS) - 1)
             + (sizeof("(?:)") - 1);
 
-        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
-	SvPOK_on(rx);
+        Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
+	r->xpv_len_u.xpvlenu_pv = p;
 	if (RExC_utf8)
 	    SvFLAGS(rx) |= SVf_UTF8;
         *p++='('; *p++='?';
@@ -5904,7 +5982,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             *p++ = '\n';
         *p++ = ')';
         *p = 0;
-	SvCUR_set(rx, p - SvPVX_const(rx));
+	SvCUR_set(rx, p - RX_WRAPPED(rx));
     }
 
     r->intflags = 0;
@@ -5972,11 +6050,6 @@ reStudy:
             RExC_seen |= REG_TOP_LEVEL_BRANCHES;
         else
             RExC_seen &= ~REG_TOP_LEVEL_BRANCHES;
-        if (data.last_found) {
-            SvREFCNT_dec(data.longest_fixed);
-	    SvREFCNT_dec(data.longest_float);
-	    SvREFCNT_dec(data.last_found);
-	}
 	StructCopy(&zero_scan_data, &data, scan_data_t);
     }
 #else
@@ -6142,6 +6215,10 @@ reStudy:
 	data.longest_float = newSVpvs("");
 	data.last_found = newSVpvs("");
 	data.longest = &(data.longest_fixed);
+	ENTER_with_name("study_chunk");
+	SAVEFREESV(data.longest_fixed);
+	SAVEFREESV(data.longest_float);
+	SAVEFREESV(data.last_found);
 	first = scan;
 	if (!ri->regstclass) {
 	    cl_init(pRExC_state, &ch_class);
@@ -6156,7 +6233,7 @@ reStudy:
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
 
 
-        CHECK_RESTUDY_GOTO;
+        CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
 
 
 	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
@@ -6166,107 +6243,58 @@ reStudy:
 	     && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
 	    r->extflags |= RXf_CHECK_ALL;
 	scan_commit(pRExC_state, &data,&minlen,0);
-	SvREFCNT_dec(data.last_found);
 
-        /* Note that code very similar to this but for anchored string 
-           follows immediately below, changes may need to be made to both. 
-           Be careful. 
-         */
 	longest_float_length = CHR_SVLEN(data.longest_float);
-	if (longest_float_length
-	    || (data.flags & SF_FL_BEFORE_EOL
-		&& (!(data.flags & SF_FL_BEFORE_MEOL)
-		    || (RExC_flags & RXf_PMf_MULTILINE)))) 
-        {
-            I32 t,ml;
 
-            /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
-	    if ((RExC_seen & REG_SEEN_EXACTF_SHARP_S)
-		|| (SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
-		    && data.offset_fixed == data.offset_float_min
-		    && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
-		    goto remove_float;		/* As in (a)+. */
-
-            /* copy the information about the longest float from the reg_scan_data
-               over to the program. */
-	    if (SvUTF8(data.longest_float)) {
-		r->float_utf8 = data.longest_float;
-		r->float_substr = NULL;
-	    } else {
-		r->float_substr = data.longest_float;
-		r->float_utf8 = NULL;
-	    }
-	    /* float_end_shift is how many chars that must be matched that 
-	       follow this item. We calculate it ahead of time as once the
-	       lookbehind offset is added in we lose the ability to correctly
-	       calculate it.*/
-	    ml = data.minlen_float ? *(data.minlen_float) 
-	                           : (I32)longest_float_length;
-	    r->float_end_shift = ml - data.offset_float_min
-	        - longest_float_length + (SvTAIL(data.longest_float) != 0)
-	        + data.lookbehind_float;
+        if (! ((SvCUR(data.longest_fixed)  /* ok to leave SvCUR */
+                   && data.offset_fixed == data.offset_float_min
+                   && SvCUR(data.longest_fixed) == SvCUR(data.longest_float)))
+            && S_setup_longest (aTHX_ pRExC_state,
+                                    data.longest_float,
+                                    &(r->float_utf8),
+                                    &(r->float_substr),
+                                    &(r->float_end_shift),
+                                    data.lookbehind_float,
+                                    data.offset_float_min,
+                                    data.minlen_float,
+                                    longest_float_length,
+                                    data.flags & SF_FL_BEFORE_EOL,
+                                    data.flags & SF_FL_BEFORE_MEOL))
+        {
 	    r->float_min_offset = data.offset_float_min - data.lookbehind_float;
 	    r->float_max_offset = data.offset_float_max;
 	    if (data.offset_float_max < I32_MAX) /* Don't offset infinity */
 	        r->float_max_offset -= data.lookbehind_float;
-	    
-	    t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
-		       && (!(data.flags & SF_FL_BEFORE_MEOL)
-			   || (RExC_flags & RXf_PMf_MULTILINE)));
-	    fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
+	    SvREFCNT_inc_simple_void_NN(data.longest_float);
 	}
 	else {
-	  remove_float:
 	    r->float_substr = r->float_utf8 = NULL;
-	    SvREFCNT_dec(data.longest_float);
 	    longest_float_length = 0;
 	}
 
-        /* Note that code very similar to this but for floating string 
-           is immediately above, changes may need to be made to both. 
-           Be careful. 
-         */
 	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
 
-        /* See comments for join_exact for why REG_SEEN_EXACTF_SHARP_S */
-	if (! (RExC_seen & REG_SEEN_EXACTF_SHARP_S)
-	    && (longest_fixed_length
-	        || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
-		    && (!(data.flags & SF_FIX_BEFORE_MEOL)
-		        || (RExC_flags & RXf_PMf_MULTILINE)))) )
+        if (S_setup_longest (aTHX_ pRExC_state,
+                                data.longest_fixed,
+                                &(r->anchored_utf8),
+                                &(r->anchored_substr),
+                                &(r->anchored_end_shift),
+                                data.lookbehind_fixed,
+                                data.offset_fixed,
+                                data.minlen_fixed,
+                                longest_fixed_length,
+                                data.flags & SF_FIX_BEFORE_EOL,
+                                data.flags & SF_FIX_BEFORE_MEOL))
         {
-            I32 t,ml;
-
-            /* copy the information about the longest fixed 
-               from the reg_scan_data over to the program. */
-	    if (SvUTF8(data.longest_fixed)) {
-		r->anchored_utf8 = data.longest_fixed;
-		r->anchored_substr = NULL;
-	    } else {
-		r->anchored_substr = data.longest_fixed;
-		r->anchored_utf8 = NULL;
-	    }
-	    /* fixed_end_shift is how many chars that must be matched that 
-	       follow this item. We calculate it ahead of time as once the
-	       lookbehind offset is added in we lose the ability to correctly
-	       calculate it.*/
-            ml = data.minlen_fixed ? *(data.minlen_fixed) 
-                                   : (I32)longest_fixed_length;
-            r->anchored_end_shift = ml - data.offset_fixed
-	        - longest_fixed_length + (SvTAIL(data.longest_fixed) != 0)
-	        + data.lookbehind_fixed;
 	    r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
-
-	    t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
-		 && (!(data.flags & SF_FIX_BEFORE_MEOL)
-		     || (RExC_flags & RXf_PMf_MULTILINE)));
-	    fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
+	    SvREFCNT_inc_simple_void_NN(data.longest_fixed);
 	}
 	else {
 	    r->anchored_substr = r->anchored_utf8 = NULL;
-	    SvREFCNT_dec(data.longest_fixed);
 	    longest_fixed_length = 0;
 	}
+	LEAVE_with_name("study_chunk");
+
 	if (ri->regstclass
 	    && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
 	    ri->regstclass = NULL;
@@ -6340,7 +6368,7 @@ reStudy:
 	minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
 	    &data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
         
-        CHECK_RESTUDY_GOTO;
+        CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
 	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
 		= r->float_substr = r->float_utf8 = NULL;
@@ -6385,7 +6413,10 @@ reStudy:
     if (RExC_seen & REG_SEEN_CANY)
 	r->extflags |= RXf_CANY_SEEN;
     if (RExC_seen & REG_SEEN_VERBARG)
+    {
 	r->intflags |= PREGf_VERBARG_SEEN;
+	r->extflags |= RXf_MODIFIES_VARS;
+    }
     if (RExC_seen & REG_SEEN_CUTGROUP)
 	r->intflags |= PREGf_CUTGROUP_SEEN;
     if (pm_flags & PMf_USE_RE_EVAL)
@@ -6398,18 +6429,12 @@ reStudy:
 #ifdef STUPID_PATTERN_CHECKS            
     if (RX_PRELEN(rx) == 0)
         r->extflags |= RXf_NULL;
-    if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
-        /* XXX: this should happen BEFORE we compile */
-        r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
-    else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3))
+    if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3))
         r->extflags |= RXf_WHITE;
     else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^')
         r->extflags |= RXf_START_ONLY;
 #else
-    if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
-            /* XXX: this should happen BEFORE we compile */
-            r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); 
-    else {
+    {
         regnode *first = ri->program + 1;
         U8 fop = OP(first);
 
@@ -6472,7 +6497,7 @@ Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
     if (flags & RXapif_FETCH) {
         return reg_named_buff_fetch(rx, key, flags);
     } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
-        Perl_croak_no_modify(aTHX);
+        Perl_croak_no_modify();
         return NULL;
     } else if (flags & RXapif_EXISTS) {
         return reg_named_buff_exists(rx, key, flags)
@@ -6511,7 +6536,7 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
 {
     AV *retarray = NULL;
     SV *ret;
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
 
@@ -6551,7 +6576,7 @@ bool
 Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
                            const U32 flags)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
 
@@ -6575,7 +6600,7 @@ Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
 SV*
 Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
 
@@ -6591,7 +6616,7 @@ Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
 SV*
 Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
@@ -6627,7 +6652,7 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
     SV *ret;
     AV *av;
     I32 length;
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
 
@@ -6651,7 +6676,7 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
 SV*
 Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
     AV *av = newAV();
 
     PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
@@ -6687,47 +6712,67 @@ void
 Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
 			     SV * const sv)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
     char *s = NULL;
     I32 i = 0;
     I32 s1, t1;
+    I32 n = paren;
 
     PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
         
-    if (!rx->subbeg) {
-        sv_setsv(sv,&PL_sv_undef);
-        return;
-    } 
-    else               
-    if (paren == RX_BUFF_IDX_PREMATCH && rx->offs[0].start != -1) {
-        /* $` */
+    if ( (    n == RX_BUFF_IDX_CARET_PREMATCH
+           || n == RX_BUFF_IDX_CARET_FULLMATCH
+           || n == RX_BUFF_IDX_CARET_POSTMATCH
+         )
+         && !(rx->extflags & RXf_PMf_KEEPCOPY)
+    )
+        goto ret_undef;
+
+    if (!rx->subbeg)
+        goto ret_undef;
+
+    if (n == RX_BUFF_IDX_CARET_FULLMATCH)
+        /* no need to distinguish between them any more */
+        n = RX_BUFF_IDX_FULLMATCH;
+
+    if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
+        && rx->offs[0].start != -1)
+    {
+        /* $`, ${^PREMATCH} */
 	i = rx->offs[0].start;
 	s = rx->subbeg;
     }
     else 
-    if (paren == RX_BUFF_IDX_POSTMATCH && rx->offs[0].end != -1) {
-        /* $' */
-	s = rx->subbeg + rx->offs[0].end;
-	i = rx->sublen - rx->offs[0].end;
+    if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
+        && rx->offs[0].end != -1)
+    {
+        /* $', ${^POSTMATCH} */
+	s = rx->subbeg - rx->suboffset + rx->offs[0].end;
+	i = rx->sublen + rx->suboffset - rx->offs[0].end;
     } 
     else
-    if ( 0 <= paren && paren <= (I32)rx->nparens &&
-        (s1 = rx->offs[paren].start) != -1 &&
-        (t1 = rx->offs[paren].end) != -1)
+    if ( 0 <= n && n <= (I32)rx->nparens &&
+        (s1 = rx->offs[n].start) != -1 &&
+        (t1 = rx->offs[n].end) != -1)
     {
-        /* $& $1 ... */
+        /* $&, ${^MATCH},  $1 ... */
         i = t1 - s1;
-        s = rx->subbeg + s1;
+        s = rx->subbeg + s1 - rx->suboffset;
     } else {
-        sv_setsv(sv,&PL_sv_undef);
-        return;
+        goto ret_undef;
     }          
+
+    assert(s >= rx->subbeg);
     assert(rx->sublen >= (s - rx->subbeg) + i );
     if (i >= 0) {
-        const int oldtainted = PL_tainted;
+#if NO_TAINT_SUPPORT
+        sv_setpvn(sv, s, i);
+#else
+        const int oldtainted = TAINT_get;
         TAINT_NOT;
         sv_setpvn(sv, s, i);
-        PL_tainted = oldtainted;
+        TAINT_set(oldtainted);
+#endif
         if ( (rx->extflags & RXf_CANY_SEEN)
             ? (RXp_MATCH_UTF8(rx)
                         && (!i || is_utf8_string((U8*)s, i)))
@@ -6737,12 +6782,12 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
         }
         else
             SvUTF8_off(sv);
-        if (PL_tainting) {
+        if (TAINTING_get) {
             if (RXp_MATCH_TAINTED(rx)) {
                 if (SvTYPE(sv) >= SVt_PVMG) {
                     MAGIC* const mg = SvMAGIC(sv);
                     MAGIC* mgt;
-                    PL_tainted = 1;
+                    TAINT;
                     SvMAGIC_set(sv, mg->mg_moremagic);
                     SvTAINT(sv);
                     if ((mgt = SvMAGIC(sv))) {
@@ -6750,13 +6795,14 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
                         SvMAGIC_set(sv, mg);
                     }
                 } else {
-                    PL_tainted = 1;
+                    TAINT;
                     SvTAINT(sv);
                 }
             } else 
                 SvTAINTED_off(sv);
         }
     } else {
+      ret_undef:
         sv_setsv(sv,&PL_sv_undef);
         return;
     }
@@ -6773,23 +6819,27 @@ Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
     PERL_UNUSED_ARG(value);
 
     if (!PL_localizing)
-        Perl_croak_no_modify(aTHX);
+        Perl_croak_no_modify();
 }
 
 I32
 Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
                               const I32 paren)
 {
-    struct regexp *const rx = (struct regexp *)SvANY(r);
+    struct regexp *const rx = ReANY(r);
     I32 i;
     I32 s1, t1;
 
     PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
 
     /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
-	switch (paren) {
-      /* $` / ${^PREMATCH} */
-      case RX_BUFF_IDX_PREMATCH:
+    switch (paren) {
+      case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
+         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+            goto warn_undef;
+        /*FALLTHROUGH*/
+
+      case RX_BUFF_IDX_PREMATCH:       /* $` */
         if (rx->offs[0].start != -1) {
 			i = rx->offs[0].start;
 			if (i > 0) {
@@ -6799,8 +6849,11 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
 			}
 	    }
         return 0;
-      /* $' / ${^POSTMATCH} */
-      case RX_BUFF_IDX_POSTMATCH:
+
+      case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
+         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+            goto warn_undef;
+      case RX_BUFF_IDX_POSTMATCH:       /* $' */
 	    if (rx->offs[0].end != -1) {
 			i = rx->sublen - rx->offs[0].end;
 			if (i > 0) {
@@ -6810,6 +6863,12 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
 			}
 	    }
         return 0;
+
+      case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */
+         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
+            goto warn_undef;
+        /*FALLTHROUGH*/
+
       /* $& / ${^MATCH}, $1, $2, ... */
       default:
 	    if (paren <= (I32)rx->nparens &&
@@ -6819,6 +6878,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
             i = t1 - s1;
             goto getlen;
         } else {
+          warn_undef:
             if (ckWARN(WARN_UNINITIALIZED))
                 report_uninit((const SV *)sv);
             return 0;
@@ -6826,7 +6886,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
     }
   getlen:
     if (i > 0 && RXp_MATCH_UTF8(rx)) {
-        const char * const s = rx->subbeg + s1;
+        const char * const s = rx->subbeg - rx->suboffset + s1;
         const U8 *ep;
         STRLEN el;
 
@@ -6979,9 +7039,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  * list.)
  * Taking the complement (inverting) an inversion list is quite simple, if the
  * first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element at the beginning of each inversion list
- * to contain 0 when the list contains 0, and contains 1 otherwise.  The actual
- * beginning of the list is either that element if 0, or the next one if 1.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to contain 0 when the list contains 0, and contains 1 otherwise.  The
+ * actual beginning of the list is either that element if 0, or the next one if
+ * 1.
  *
  * More about inversion lists can be found in "Unicode Demystified"
  * Chapter 13 by Richard Gillam, published by Addison-Wesley.
@@ -6995,32 +7056,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  * Some of the methods should always be private to the implementation, and some
  * should eventually be made public */
 
-#define INVLIST_LEN_OFFSET 0	/* Number of elements in the inversion list */
-#define INVLIST_ITER_OFFSET 1	/* Current iteration position */
-
-/* This is a combination of a version and data structure type, so that one
- * being passed in can be validated to be an inversion list of the correct
- * vintage.  When the structure of the header is changed, a new random number
- * in the range 2**31-1 should be generated and the new() method changed to
- * insert that at this location.  Then, if an auxiliary program doesn't change
- * correspondingly, it will be discovered immediately */
-#define INVLIST_VERSION_ID_OFFSET 2
-#define INVLIST_VERSION_ID 1064334010
+/* The header definitions are in F<inline_invlist.c> */
 
-/* For safety, when adding new elements, remember to #undef them at the end of
- * the inversion list code section */
-
-#define INVLIST_ZERO_OFFSET 3	/* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1.  If 0, the inversion list
- * contains the code point U+00000, and begins here.  If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it.  By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
-
-/* Internally things are UVs */
 #define TO_INTERNAL_SIZE(x) ((x + HEADER_LENGTH) * sizeof(UV))
 #define FROM_INTERNAL_SIZE(x) ((x / sizeof(UV)) - HEADER_LENGTH)
 
@@ -7042,7 +7079,7 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
     PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
 
     /* Must be empty */
-    assert(! *get_invlist_len_addr(invlist));
+    assert(! *_get_invlist_len_addr(invlist));
 
     /* 1^1 = 0; 1^0 = 1 */
     *zero = 1 ^ will_have_0;
@@ -7060,7 +7097,7 @@ S_invlist_array(pTHX_ SV* const invlist)
 
     /* Must not be empty.  If these fail, you probably didn't check for <len>
      * being non-zero before trying to get the array */
-    assert(*get_invlist_len_addr(invlist));
+    assert(*_get_invlist_len_addr(invlist));
     assert(*get_invlist_zero_addr(invlist) == 0
 	   || *get_invlist_zero_addr(invlist) == 1);
 
@@ -7071,28 +7108,6 @@ S_invlist_array(pTHX_ SV* const invlist)
 		   + *get_invlist_zero_addr(invlist));
 }
 
-PERL_STATIC_INLINE UV*
-S_get_invlist_len_addr(pTHX_ SV* invlist)
-{
-    /* Return the address of the UV that contains the current number
-     * of used elements in the inversion list */
-
-    PERL_ARGS_ASSERT_GET_INVLIST_LEN_ADDR;
-
-    return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
-}
-
-PERL_STATIC_INLINE UV
-S_invlist_len(pTHX_ SV* const invlist)
-{
-    /* Returns the current number of elements stored in the inversion list's
-     * array */
-
-    PERL_ARGS_ASSERT_INVLIST_LEN;
-
-    return *get_invlist_len_addr(invlist);
-}
-
 PERL_STATIC_INLINE void
 S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
 {
@@ -7100,7 +7115,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
 
     PERL_ARGS_ASSERT_INVLIST_SET_LEN;
 
-    *get_invlist_len_addr(invlist) = len;
+    *_get_invlist_len_addr(invlist) = len;
 
     assert(len <= SvLEN(invlist));
 
@@ -7120,6 +7135,39 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
      * Note that when inverting, SvCUR shouldn't change */
 }
 
+PERL_STATIC_INLINE IV*
+S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
+{
+    /* Return the address of the UV that is reserved to hold the cached index
+     * */
+
+    PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
+
+    return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+}
+
+PERL_STATIC_INLINE IV
+S_invlist_previous_index(pTHX_ SV* const invlist)
+{
+    /* Returns cached index of previous search */
+
+    PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
+
+    return *get_invlist_previous_index_addr(invlist);
+}
+
+PERL_STATIC_INLINE void
+S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
+{
+    /* Caches <index> for later retrieval */
+
+    PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
+
+    assert(index == 0 || index < (int) _invlist_len(invlist));
+
+    *get_invlist_previous_index_addr(invlist) = index;
+}
+
 PERL_STATIC_INLINE UV
 S_invlist_max(pTHX_ SV* const invlist)
 {
@@ -7170,8 +7218,9 @@ Perl__new_invlist(pTHX_ IV initial_size)
      * properly */
     *get_invlist_zero_addr(new_list) = UV_MAX;
 
+    *get_invlist_previous_index_addr(new_list) = 0;
     *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 4
+#if HEADER_LENGTH != 5
 #   error Need to regenerate VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
 #endif
 
@@ -7194,7 +7243,7 @@ S__new_invlist_C_array(pTHX_ UV* list)
     SvPV_set(invlist, (char *) list);
     SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
 			       shouldn't touch it */
-    SvCUR_set(invlist, TO_INTERNAL_SIZE(invlist_len(invlist)));
+    SvCUR_set(invlist, TO_INTERNAL_SIZE(_invlist_len(invlist)));
 
     if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
         Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
@@ -7224,11 +7273,6 @@ S_invlist_trim(pTHX_ SV* const invlist)
     SvPV_shrink_to_cur((SV *) invlist);
 }
 
-/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
- * etc */
-#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
-#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-
 #define _invlist_union_complement_2nd(a, b, output) _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
 
 STATIC void
@@ -7240,7 +7284,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
 
     UV* array;
     UV max = invlist_max(invlist);
-    UV len = invlist_len(invlist);
+    UV len = _invlist_len(invlist);
 
     PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
 
@@ -7312,8 +7356,8 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
 
 #ifndef PERL_IN_XSUB_RE
 
-STATIC IV
-S_invlist_search(pTHX_ SV* const invlist, const UV cp)
+IV
+Perl__invlist_search(pTHX_ SV* const invlist, const UV cp)
 {
     /* Searches the inversion list for the entry that contains the input code
      * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
@@ -7321,23 +7365,68 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
      * contains <cp> */
 
     IV low = 0;
-    IV high = invlist_len(invlist);
-    const UV * const array = invlist_array(invlist);
+    IV mid;
+    IV high = _invlist_len(invlist);
+    const IV highest_element = high - 1;
+    const UV* array;
 
-    PERL_ARGS_ASSERT_INVLIST_SEARCH;
+    PERL_ARGS_ASSERT__INVLIST_SEARCH;
 
-    /* If list is empty or the code point is before the first element, return
-     * failure. */
-    if (high == 0 || cp < array[0]) {
+    /* If list is empty, return failure. */
+    if (high == 0) {
 	return -1;
     }
 
+    /* If the code point is before the first element, return failure.  (We
+     * can't combine this with the test above, because we can't get the array
+     * unless we know the list is non-empty) */
+    array = invlist_array(invlist);
+
+    mid = invlist_previous_index(invlist);
+    assert(mid >=0 && mid <= highest_element);
+
+    /* <mid> contains the cache of the result of the previous call to this
+     * function (0 the first time).  See if this call is for the same result,
+     * or if it is for mid-1.  This is under the theory that calls to this
+     * function will often be for related code points that are near each other.
+     * And benchmarks show that caching gives better results.  We also test
+     * here if the code point is within the bounds of the list.  These tests
+     * replace others that would have had to be made anyway to make sure that
+     * the array bounds were not exceeded, and these give us extra information
+     * at the same time */
+    if (cp >= array[mid]) {
+        if (cp >= array[highest_element]) {
+            return highest_element;
+        }
+
+        /* Here, array[mid] <= cp < array[highest_element].  This means that
+         * the final element is not the answer, so can exclude it; it also
+         * means that <mid> is not the final element, so can refer to 'mid + 1'
+         * safely */
+        if (cp < array[mid + 1]) {
+            return mid;
+        }
+        high--;
+        low = mid + 1;
+    }
+    else { /* cp < aray[mid] */
+        if (cp < array[0]) { /* Fail if outside the array */
+            return -1;
+        }
+        high = mid;
+        if (cp >= array[mid - 1]) {
+            goto found_entry;
+        }
+    }
+
     /* Binary search.  What we are looking for is <i> such that
      *	array[i] <= cp < array[i+1]
-     * The loop below converges on the i+1. */
+     * The loop below converges on the i+1.  Note that there may not be an
+     * (i+1)th element in the array, and things work nonetheless */
     while (low < high) {
-	IV mid = (low + high) / 2;
-	if (array[mid] <= cp) {
+	mid = (low + high) / 2;
+        assert(mid <= highest_element);
+	if (array[mid] <= cp) { /* cp >= array[mid] */
 	    low = mid + 1;
 
 	    /* We could do this extra test to exit the loop early.
@@ -7351,7 +7440,10 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
 	}
     }
 
-    return high - 1;
+  found_entry:
+    high--;
+    invlist_set_previous_index(invlist, high);
+    return high;
 }
 
 void
@@ -7365,7 +7457,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
      * that <swatch> is all 0's on input */
 
     UV current = start;
-    const IV len = invlist_len(invlist);
+    const IV len = _invlist_len(invlist);
     IV i;
     const UV * array;
 
@@ -7378,7 +7470,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
     array = invlist_array(invlist);
 
     /* Find which element it is */
-    i = invlist_search(invlist, start);
+    i = _invlist_search(invlist, start);
 
     /* We populate from <start> to <end> */
     while (current < end) {
@@ -7397,7 +7489,15 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
             current = array[i];
 	    if (current >= end) {   /* Finished if beyond the end of what we
 				       are populating */
-                return;
+                if (LIKELY(end < UV_MAX)) {
+                    return;
+                }
+
+                /* We get here when the upper bound is the maximum
+                 * representable on the machine, and we are looking for just
+                 * that code point.  Have to special case it */
+                i = len;
+                goto join_end_of_list;
             }
         }
         assert(current >= start);
@@ -7414,6 +7514,8 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
             swatch[offset >> 3] |= 1 << (offset & 7);
         }
 
+    join_end_of_list:
+
 	/* Quit if at the end of the list */
         if (i >= len) {
 
@@ -7485,7 +7587,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
     assert(a != b);
 
     /* If either one is empty, the union is the other one */
-    if (a == NULL || ((len_a = invlist_len(a)) == 0)) {
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
 	if (*output == a) {
             if (a != NULL) {
                 SvREFCNT_dec(a);
@@ -7499,7 +7601,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
 	} /* else *output already = b; */
 	return;
     }
-    else if ((len_b = invlist_len(b)) == 0) {
+    else if ((len_b = _invlist_len(b)) == 0) {
 	if (*output == b) {
 	    SvREFCNT_dec(b);
 	}
@@ -7640,7 +7742,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
 
     /* Set result to final length, which can change the pointer to array_u, so
      * re-find it */
-    if (len_u != invlist_len(u)) {
+    if (len_u != _invlist_len(u)) {
 	invlist_set_len(u, len_u);
 	invlist_trim(u);
 	array_u = invlist_array(u);
@@ -7719,8 +7821,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
     assert(a != b);
 
     /* Special case if either one is empty */
-    len_a = invlist_len(a);
-    if ((len_a == 0) || ((len_b = invlist_len(b)) == 0)) {
+    len_a = _invlist_len(a);
+    if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
 
         if (len_a != 0 && complement_b) {
 
@@ -7866,7 +7968,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
 
     /* Set result to final length, which can change the pointer to array_r, so
      * re-find it */
-    if (len_r != invlist_len(r)) {
+    if (len_r != _invlist_len(r)) {
 	invlist_set_len(r, len_r);
 	invlist_trim(r);
 	array_r = invlist_array(r);
@@ -7914,13 +8016,13 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
 	len = 0;
     }
     else {
-	len = invlist_len(invlist);
+	len = _invlist_len(invlist);
     }
 
     /* If comes after the final entry, can just append it to the end */
     if (len == 0
 	|| start >= invlist_array(invlist)
-				    [invlist_len(invlist) - 1])
+				    [_invlist_len(invlist) - 1])
     {
 	_append_range_to_invlist(invlist, start, end);
 	return invlist;
@@ -7954,7 +8056,7 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
      * have a zero; removes it otherwise.  As described above, the data
      * structure is set up so that this is very efficient */
 
-    UV* len_pos = get_invlist_len_addr(invlist);
+    UV* len_pos = _get_invlist_len_addr(invlist);
 
     PERL_ARGS_ASSERT__INVLIST_INVERT;
 
@@ -7991,7 +8093,7 @@ Perl__invlist_invert_prop(pTHX_ SV* const invlist)
 
     _invlist_invert(invlist);
 
-    len = invlist_len(invlist);
+    len = _invlist_len(invlist);
 
     if (len != 0) { /* If empty do nothing */
 	array = invlist_array(invlist);
@@ -8023,7 +8125,7 @@ S_invlist_clone(pTHX_ SV* const invlist)
 
     /* Need to allocate extra space to accommodate Perl's addition of a
      * trailing NUL to SvPV's, since it thinks they are always strings */
-    SV* new_invlist = _new_invlist(invlist_len(invlist) + 1);
+    SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
     STRLEN length = SvCUR(invlist);
 
     PERL_ARGS_ASSERT_INVLIST_CLONE;
@@ -8074,7 +8176,7 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
      * will start over at the beginning of the list */
 
     UV* pos = get_invlist_iter_addr(invlist);
-    UV len = invlist_len(invlist);
+    UV len = _invlist_len(invlist);
     UV *array;
 
     PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
@@ -8106,7 +8208,7 @@ S_invlist_highest(pTHX_ SV* const invlist)
      * 0, or if the list is empty.  If this distinction matters to you, check
      * for emptiness before calling this function */
 
-    UV len = invlist_len(invlist);
+    UV len = _invlist_len(invlist);
     UV *array;
 
     PERL_ARGS_ASSERT_INVLIST_HIGHEST;
@@ -8159,15 +8261,17 @@ Perl__invlist_contents(pTHX_ SV* const invlist)
 }
 #endif
 
-#if 0
+#ifdef PERL_ARGS_ASSERT__INVLIST_DUMP
 void
-S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
+Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
 {
     /* Dumps out the ranges in an inversion list.  The string 'header'
      * if present is output on a line before the first range */
 
     UV start, end;
 
+    PERL_ARGS_ASSERT__INVLIST_DUMP;
+
     if (header && strlen(header)) {
 	PerlIO_printf(Perl_debug_log, "%s\n", header);
     }
@@ -8176,13 +8280,88 @@ S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
 	if (end == UV_MAX) {
 	    PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. INFINITY\n", start);
 	}
+	else if (end != start) {
+	    PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n",
+		                                 start,         end);
+	}
 	else {
-	    PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n", start, end);
+	    PerlIO_printf(Perl_debug_log, "0x%04"UVXf"\n", start);
 	}
     }
 }
 #endif
 
+#if 0
+bool
+S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
+{
+    /* Return a boolean as to if the two passed in inversion lists are
+     * identical.  The final argument, if TRUE, says to take the complement of
+     * the second inversion list before doing the comparison */
+
+    UV* array_a = invlist_array(a);
+    UV* array_b = invlist_array(b);
+    UV len_a = _invlist_len(a);
+    UV len_b = _invlist_len(b);
+
+    UV i = 0;		    /* current index into the arrays */
+    bool retval = TRUE;     /* Assume are identical until proven otherwise */
+
+    PERL_ARGS_ASSERT__INVLISTEQ;
+
+    /* If are to compare 'a' with the complement of b, set it
+     * up so are looking at b's complement. */
+    if (complement_b) {
+
+        /* The complement of nothing is everything, so <a> would have to have
+         * just one element, starting at zero (ending at infinity) */
+        if (len_b == 0) {
+            return (len_a == 1 && array_a[0] == 0);
+        }
+        else if (array_b[0] == 0) {
+
+            /* Otherwise, to complement, we invert.  Here, the first element is
+             * 0, just remove it.  To do this, we just pretend the array starts
+             * one later, and clear the flag as we don't have to do anything
+             * else later */
+
+            array_b++;
+            len_b--;
+            complement_b = FALSE;
+        }
+        else {
+
+            /* But if the first element is not zero, we unshift a 0 before the
+             * array.  The data structure reserves a space for that 0 (which
+             * should be a '1' right now), so physical shifting is unneeded,
+             * but temporarily change that element to 0.  Before exiting the
+             * routine, we must restore the element to '1' */
+            array_b--;
+            len_b++;
+            array_b[0] = 0;
+        }
+    }
+
+    /* Make sure that the lengths are the same, as well as the final element
+     * before looping through the remainder.  (Thus we test the length, final,
+     * and first elements right off the bat) */
+    if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) {
+        retval = FALSE;
+    }
+    else for (i = 0; i < len_a - 1; i++) {
+        if (array_a[i] != array_b[i]) {
+            retval = FALSE;
+            break;
+        }
+    }
+
+    if (complement_b) {
+        array_b[0] = 1;
+    }
+    return retval;
+}
+#endif
+
 #undef HEADER_LENGTH
 #undef INVLIST_INITIAL_LENGTH
 #undef TO_INTERNAL_SIZE
@@ -8215,11 +8394,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
 {
     dVAR;
-    register regnode *ret;		/* Will be the head of the group. */
-    register regnode *br;
-    register regnode *lastbr;
-    register regnode *ender = NULL;
-    register I32 parno = 0;
+    regnode *ret;		/* Will be the head of the group. */
+    regnode *br;
+    regnode *lastbr;
+    regnode *ender = NULL;
+    I32 parno = 0;
     I32 flags;
     U32 oregflags = RExC_flags;
     bool have_branch = 0;
@@ -8390,7 +8569,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
 		    ret = reganode(pRExC_state,
 				   ((! FOLD)
 				     ? NREF
-				     : (MORE_ASCII_RESTRICTED)
+				     : (ASCII_FOLD_RESTRICTED)
 				       ? NREFFA
                                        : (AT_LEAST_UNI_SEMANTICS)
                                          ? NREFFU
@@ -9225,9 +9404,9 @@ STATIC regnode *
 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
 {
     dVAR;
-    register regnode *ret;
-    register regnode *chain = NULL;
-    register regnode *latest;
+    regnode *ret;
+    regnode *chain = NULL;
+    regnode *latest;
     I32 flags = 0, c = 0;
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -9298,9 +9477,9 @@ STATIC regnode *
 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 {
     dVAR;
-    register regnode *ret;
-    register char op;
-    register char *next;
+    regnode *ret;
+    char op;
+    char *next;
     I32 flags;
     const char * const origparse = RExC_parse;
     I32 min;
@@ -9309,6 +9488,10 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     char *parse_start;
 #endif
     const char *maxpos = NULL;
+
+    /* Save the original in case we change the emitted regop to a FAIL. */
+    regnode * const orig_emit = RExC_emit;
+
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGPIECE;
@@ -9355,6 +9538,23 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 		vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
 	    RExC_parse = next;
 	    nextchar(pRExC_state);
+            if (max < min) {    /* If can't match, warn and optimize to fail
+                                   unconditionally */
+                if (SIZE_ONLY) {
+                    ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+
+                    /* We can't back off the size because we have to reserve
+                     * enough space for all the things we are about to throw
+                     * away, but we can shrink it by the ammount we are about
+                     * to re-use here */
+                    RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
+                }
+                else {
+                    RExC_emit = orig_emit;
+                }
+                ret = reg_node(pRExC_state, OPFAIL);
+                return ret;
+            }
 
 	do_curly:
 	    if ((flags&SIMPLE)) {
@@ -9392,8 +9592,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 		*flagp = WORST;
 	    if (max > 0)
 		*flagp |= HASWIDTH;
-	    if (max < min)
-		vFAIL("Can't do {n,m} with n > m");
 	    if (!SIZE_ONLY) {
 		ARG1_SET(ret, (U16)min);
 		ARG2_SET(ret, (U16)max);
@@ -9455,10 +9653,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     }
   nest_check:
     if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+	SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
 	ckWARN3reg(RExC_parse,
 		   "%.*s matches null string many times",
 		   (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
 		   origparse);
+	ReREFCNT_inc(RExC_rx_sv);
     }
 
     if (RExC_parse < RExC_end && *RExC_parse == '?') {
@@ -9489,86 +9689,95 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     return(ret);
 }
 
-
-/* reg_namedseq(pRExC_state,UVp, UV depth)
+STATIC bool
+S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I32 *flagp, U32 depth, bool in_char_class)
+{
    
-   This is expected to be called by a parser routine that has 
-   recognized '\N' and needs to handle the rest. RExC_parse is
-   expected to point at the first char following the N at the time
-   of the call.
+ /* This is expected to be called by a parser routine that has recognized '\N'
+   and needs to handle the rest. RExC_parse is expected to point at the first
+   char following the N at the time of the call.  On successful return,
+   RExC_parse has been updated to point to just after the sequence identified
+   by this routine, and <*flagp> has been updated.
 
-   The \N may be inside (indicated by valuep not being NULL) or outside a
+   The \N may be inside (indicated by the boolean <in_char_class>) or outside a
    character class.
 
    \N may begin either a named sequence, or if outside a character class, mean
    to match a non-newline.  For non single-quoted regexes, the tokenizer has
-   attempted to decide which, and in the case of a named sequence converted it
+   attempted to decide which, and in the case of a named sequence, converted it
    into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
    where c1... are the characters in the sequence.  For single-quoted regexes,
    the tokenizer passes the \N sequence through unchanged; this code will not
-   attempt to determine this nor expand those.  The net effect is that if the
-   beginning of the passed-in pattern isn't '{U+' or there is no '}', it
-   signals that this \N occurrence means to match a non-newline.
-   
+   attempt to determine this nor expand those, instead raising a syntax error.
+   The net effect is that if the beginning of the passed-in pattern isn't '{U+'
+   or there is no '}', it signals that this \N occurrence means to match a
+   non-newline.
+
    Only the \N{U+...} form should occur in a character class, for the same
    reason that '.' inside a character class means to just match a period: it
    just doesn't make sense.
-   
-   If valuep is non-null then it is assumed that we are parsing inside 
-   of a charclass definition and the first codepoint in the resolved
-   string is returned via *valuep and the routine will return NULL. 
-   In this mode if a multichar string is returned from the charnames 
-   handler, a warning will be issued, and only the first char in the 
-   sequence will be examined. If the string returned is zero length
-   then the value of *valuep is undefined and NON-NULL will 
-   be returned to indicate failure. (This will NOT be a valid pointer 
-   to a regnode.)
-   
-   If valuep is null then it is assumed that we are parsing normal text and a
-   new EXACT node is inserted into the program containing the resolved string,
-   and a pointer to the new node is returned.  But if the string is zero length
-   a NOTHING node is emitted instead.
 
-   On success RExC_parse is set to the char following the endbrace.
-   Parsing failures will generate a fatal error via vFAIL(...)
+   The function raises an error (via vFAIL), and doesn't return for various
+   syntax errors.  Otherwise it returns TRUE and sets <node_p> or <valuep> on
+   success; it returns FALSE otherwise.
+
+   If <valuep> is non-null, it means the caller can accept an input sequence
+   consisting of a just a single code point; <*valuep> is set to that value
+   if the input is such.
+
+   If <node_p> is non-null it signifies that the caller can accept any other
+   legal sequence (i.e., one that isn't just a single code point).  <*node_p>
+   is set as follows:
+    1) \N means not-a-NL: points to a newly created REG_ANY node;
+    2) \N{}:              points to a new NOTHING node;
+    3) otherwise:         points to a new EXACT node containing the resolved
+                          string.
+   Note that FALSE is returned for single code point sequences if <valuep> is
+   null.
  */
-STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 depth)
-{
+
     char * endbrace;    /* '}' following the name */
-    regnode *ret = NULL;
     char* p;
+    char *endchar;	/* Points to '.' or '}' ending cur char in the input
+                           stream */
+    bool has_multiple_chars; /* true if the input stream contains a sequence of
+                                more than one character */
 
     GET_RE_DEBUG_FLAGS_DECL;
  
-    PERL_ARGS_ASSERT_REG_NAMEDSEQ;
+    PERL_ARGS_ASSERT_GROK_BSLASH_N;
 
     GET_RE_DEBUG_FLAGS;
 
+    assert(cBOOL(node_p) ^ cBOOL(valuep));  /* Exactly one should be set */
+
     /* The [^\n] meaning of \N ignores spaces and comments under the /x
      * modifier.  The other meaning does not */
     p = (RExC_flags & RXf_PMf_EXTENDED)
 	? regwhite( pRExC_state, RExC_parse )
 	: RExC_parse;
-   
+
     /* Disambiguate between \N meaning a named character versus \N meaning
      * [^\n].  The former is assumed when it can't be the latter. */
     if (*p != '{' || regcurly(p)) {
 	RExC_parse = p;
-	if (valuep) {
+	if (! node_p) {
 	    /* no bare \N in a charclass */
-	    vFAIL("\\N in a character class must be a named character: \\N{...}");
-	}
+            if (in_char_class) {
+                vFAIL("\\N in a character class must be a named character: \\N{...}");
+            }
+            return FALSE;
+        }
 	nextchar(pRExC_state);
-	ret = reg_node(pRExC_state, REG_ANY);
+	*node_p = reg_node(pRExC_state, REG_ANY);
 	*flagp |= HASWIDTH|SIMPLE;
 	RExC_naughty++;
 	RExC_parse--;
-        Set_Node_Length(ret, 1); /* MJD */
-	return ret;
+        Set_Node_Length(*node_p, 1); /* MJD */
+	return TRUE;
     }
 
-    /* Here, we have decided it should be a named sequence */
+    /* Here, we have decided it should be a named character or sequence */
 
     /* The test above made sure that the next real character is a '{', but
      * under the /x modifier, it could be separated by space (or a comment and
@@ -9590,44 +9799,48 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
     }
 
     if (endbrace == RExC_parse) {   /* empty: \N{} */
-	if (! valuep) {
-	    RExC_parse = endbrace + 1;  
-	    return reg_node(pRExC_state,NOTHING);
-	}
-
-	if (SIZE_ONLY) {
-	    ckWARNreg(RExC_parse,
-		    "Ignoring zero length \\N{} in character class"
-	    );
-	    RExC_parse = endbrace + 1;  
+        bool ret = TRUE;
+	if (node_p) {
+	    *node_p = reg_node(pRExC_state,NOTHING);
+	}
+        else if (in_char_class) {
+            if (SIZE_ONLY && in_char_class) {
+                ckWARNreg(RExC_parse,
+                        "Ignoring zero length \\N{} in character class"
+                );
+            }
+            ret = FALSE;
 	}
-	*valuep = 0;
-	return (regnode *) &RExC_parse; /* Invalid regnode pointer */
+        else {
+            return FALSE;
+        }
+        nextchar(pRExC_state);
+        return ret;
     }
 
-    REQUIRE_UTF8;	/* named sequences imply Unicode semantics */
+    RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
     RExC_parse += 2;	/* Skip past the 'U+' */
 
-    if (valuep) {   /* In a bracketed char class */
-	/* We only pay attention to the first char of 
-	multichar strings being returned. I kinda wonder
+    endchar = RExC_parse + strcspn(RExC_parse, ".}");
+
+    /* Code points are separated by dots.  If none, there is only one code
+     * point, and is terminated by the brace */
+    has_multiple_chars = (endchar < endbrace);
+
+    if (valuep && (! has_multiple_chars || in_char_class)) {
+	/* We only pay attention to the first char of
+        multichar strings being returned in char classes. I kinda wonder
 	if this makes sense as it does change the behaviour
 	from earlier versions, OTOH that behaviour was broken
 	as well. XXX Solution is to recharacterize as
 	[rest-of-class]|multi1|multi2... */
 
-	STRLEN length_of_hex;
-	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+	STRLEN length_of_hex = (STRLEN)(endchar - RExC_parse);
+	I32 grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
 	    | PERL_SCAN_DISALLOW_PREFIX
 	    | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
-    
-	char * endchar = RExC_parse + strcspn(RExC_parse, ".}");
-	if (endchar < endbrace) {
-	    ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
-	}
 
-	length_of_hex = (STRLEN)(endchar - RExC_parse);
-	*valuep = grok_hex(RExC_parse, &length_of_hex, &flags, NULL);
+	*valuep = grok_hex(RExC_parse, &length_of_hex, &grok_hex_flags, NULL);
 
 	/* The tokenizer should have guaranteed validity, but it's possible to
 	 * bypass it by using single quoting, so check */
@@ -9639,16 +9852,27 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
 			    ? UTF8SKIP(RExC_parse)
 			    : 1;
 	    /* Guard against malformed utf8 */
-	    if (RExC_parse >= endchar) RExC_parse = endchar;
+	    if (RExC_parse >= endchar) {
+                RExC_parse = endchar;
+            }
 	    vFAIL("Invalid hexadecimal number in \\N{U+...}");
-	}    
+	}
+
+        if (in_char_class && has_multiple_chars) {
+	    ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
+        }
 
-	RExC_parse = endbrace + 1;
-	if (endchar == endbrace) return NULL;
+        RExC_parse = endbrace + 1;
+    }
+    else if (! node_p || ! has_multiple_chars) {
 
-        ret = (regnode *) &RExC_parse;	/* Invalid regnode pointer */
+        /* Here, the input is legal, but not according to the caller's
+         * options.  We fail without advancing the parse, so that the
+         * caller can try again */
+        RExC_parse = p;
+        return FALSE;
     }
-    else {	/* Not a char class */
+    else {
 
 	/* What is done here is to convert this to a sub-pattern of the form
 	 * (?:\x{char1}\x{char2}...)
@@ -9661,16 +9885,11 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
 
 	SV * substitute_parse = newSVpvn_flags("?:", 2, SVf_UTF8|SVs_TEMP);
 	STRLEN len;
-	char *endchar;	    /* Points to '.' or '}' ending cur char in the input
-			       stream */
 	char *orig_end = RExC_end;
+        I32 flags;
 
 	while (RExC_parse < endbrace) {
 
-	    /* Code points are separated by dots.  If none, there is only one
-	     * code point, and is terminated by the brace */
-	    endchar = RExC_parse + strcspn(RExC_parse, ".}");
-
 	    /* Convert to notation the rest of the code understands */
 	    sv_catpv(substitute_parse, "\\x{");
 	    sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
@@ -9678,6 +9897,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
 
 	    /* Point to the beginning of the next character in the sequence. */
 	    RExC_parse = endchar + 1;
+	    endchar = RExC_parse + strcspn(RExC_parse, ".}");
 	}
 	sv_catpv(substitute_parse, ")");
 
@@ -9692,16 +9912,17 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp, U32 dept
 	/* The values are Unicode, and therefore not subject to recoding */
 	RExC_override_recoding = 1;
 
-	ret = reg(pRExC_state, 1, flagp, depth+1);
+	*node_p = reg(pRExC_state, 1, &flags, depth+1);
+	*flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
 
 	RExC_parse = endbrace;
 	RExC_end = orig_end;
 	RExC_override_recoding = 0;
 
-	nextchar(pRExC_state);
+        nextchar(pRExC_state);
     }
 
-    return ret;
+    return TRUE;
 }
 
 
@@ -9738,6 +9959,100 @@ S_reg_recode(pTHX_ const char value, SV **encp)
     return uv;
 }
 
+PERL_STATIC_INLINE U8
+S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
+{
+    U8 op;
+
+    PERL_ARGS_ASSERT_COMPUTE_EXACTISH;
+
+    if (! FOLD) {
+        return EXACT;
+    }
+
+    op = get_regex_charset(RExC_flags);
+    if (op >= REGEX_ASCII_RESTRICTED_CHARSET) {
+        op--; /* /a is same as /u, and map /aa's offset to what /a's would have
+                 been, so there is no hole */
+    }
+
+    return op + EXACTF;
+}
+
+PERL_STATIC_INLINE void
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32* flagp, STRLEN len, UV code_point)
+{
+    /* This knows the details about sizing an EXACTish node, setting flags for
+     * it (by setting <*flagp>, and potentially populating it with a single
+     * character.
+     *
+     * If <len> (the length in bytes) is non-zero, this function assumes that
+     * the node has already been populated, and just does the sizing.  In this
+     * case <code_point> should be the final code point that has already been
+     * placed into the node.  This value will be ignored except that under some
+     * circumstances <*flagp> is set based on it.
+     *
+     * If <len> is zero, the function assumes that the node is to contain only
+     * the single character given by <code_point> and calculates what <len>
+     * should be.  In pass 1, it sizes the node appropriately.  In pass 2, it
+     * additionally will populate the node's STRING with <code_point>, if <len>
+     * is 0.  In both cases <*flagp> is appropriately set
+     *
+     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
+     * folded (the latter only when the rules indicate it can match 'ss') */
+
+    bool len_passed_in = cBOOL(len != 0);
+    U8 character[UTF8_MAXBYTES_CASE+1];
+
+    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
+
+    if (! len_passed_in) {
+        if (UTF) {
+            if (FOLD) {
+                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            }
+            else {
+                uvchr_to_utf8( character, code_point);
+                len = UTF8SKIP(character);
+            }
+        }
+        else if (! FOLD
+                 || code_point != LATIN_SMALL_LETTER_SHARP_S
+                 || ASCII_FOLD_RESTRICTED
+                 || ! AT_LEAST_UNI_SEMANTICS)
+        {
+            *character = (U8) code_point;
+            len = 1;
+        }
+        else {
+            *character = 's';
+            *(character + 1) = 's';
+            len = 2;
+        }
+    }
+
+    if (SIZE_ONLY) {
+        RExC_size += STR_SZ(len);
+    }
+    else {
+        RExC_emit += STR_SZ(len);
+        STR_LEN(node) = len;
+        if (! len_passed_in) {
+            Copy((char *) character, STRING(node), len, char);
+        }
+    }
+
+    *flagp |= HASWIDTH;
+
+    /* A single character node is SIMPLE, except for the special-cased SHARP S
+     * under /di. */
+    if ((len == 1 || (UTF && len == UNISKIP(code_point)))
+        && (code_point != LATIN_SMALL_LETTER_SHARP_S
+            || ! FOLD || ! DEPENDS_SEMANTICS))
+    {
+        *flagp |= SIMPLE;
+    }
+}
 
 /*
  - regatom - the lowest level
@@ -9803,7 +10118,7 @@ STATIC regnode *
 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 {
     dVAR;
-    register regnode *ret = NULL;
+    regnode *ret = NULL;
     I32 flags;
     char *parse_start = RExC_parse;
     U8 op;
@@ -9851,13 +10166,12 @@ tryagain:
     case '[':
     {
 	char * const oregcomp_parse = ++RExC_parse;
-        ret = regclass(pRExC_state,depth+1);
+        ret = regclass(pRExC_state, flagp,depth+1);
 	if (*RExC_parse != ']') {
 	    RExC_parse = oregcomp_parse;
 	    vFAIL("Unmatched [");
 	}
 	nextchar(pRExC_state);
-	*flagp |= HASWIDTH|SIMPLE;
         Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
 	break;
     }
@@ -10068,7 +10382,7 @@ tryagain:
 		}
 		RExC_parse--;
 
-                ret = regclass(pRExC_state,depth+1);
+                ret = regclass(pRExC_state, flagp,depth+1);
 
 		RExC_end = oldregxend;
 		RExC_parse--;
@@ -10076,16 +10390,24 @@ tryagain:
 		Set_Node_Offset(ret, parse_start + 2);
 		Set_Node_Cur_Length(ret);
 		nextchar(pRExC_state);
-		*flagp |= HASWIDTH|SIMPLE;
 	    }
 	    break;
         case 'N': 
-            /* Handle \N and \N{NAME} here and not below because it can be
-            multicharacter. join_exact() will join them up later on. 
-            Also this makes sure that things like /\N{BLAH}+/ and 
-            \N{BLAH} being multi char Just Happen. dmq*/
+            /* Handle \N and \N{NAME} with multiple code points here and not
+             * below because it can be multicharacter. join_exact() will join
+             * them up later on.  Also this makes sure that things like
+             * /\N{BLAH}+/ and \N{BLAH} being multi char Just Happen. dmq.
+             * The options to the grok function call causes it to fail if the
+             * sequence is just a single code point.  We then go treat it as
+             * just another character in the current EXACT node, and hence it
+             * gets uniform treatment with all the other characters.  The
+             * special treatment for quantifiers is not needed for such single
+             * character sequences */
             ++RExC_parse;
-            ret= reg_namedseq(pRExC_state, NULL, flagp, depth);
+            if (! grok_bslash_N(pRExC_state, &ret, NULL, flagp, depth, FALSE)) {
+                RExC_parse--;
+                goto defchar;
+            }
             break;
 	case 'k':    /* Handle \k<NAME> and \k'NAME' */
 	parse_named_seq:
@@ -10115,7 +10437,7 @@ tryagain:
                 ret = reganode(pRExC_state,
                                ((! FOLD)
                                  ? NREF
-				 : (MORE_ASCII_RESTRICTED)
+				 : (ASCII_FOLD_RESTRICTED)
 				   ? NREFFA
                                    : (AT_LEAST_UNI_SEMANTICS)
                                      ? NREFFU
@@ -10186,7 +10508,7 @@ tryagain:
 		    ret = reganode(pRExC_state,
 				   ((! FOLD)
 				     ? REF
-				     : (MORE_ASCII_RESTRICTED)
+				     : (ASCII_FOLD_RESTRICTED)
 				       ? REFFA
                                        : (AT_LEAST_UNI_SEMANTICS)
                                          ? REFFU
@@ -10230,33 +10552,40 @@ tryagain:
 	    RExC_parse++;
 
 	defchar: {
-	    register STRLEN len;
-	    register UV ender;
-	    register char *p;
+	    STRLEN len = 0;
+	    UV ender;
+	    char *p;
 	    char *s;
+#define MAX_NODE_STRING_SIZE 127
+	    char foldbuf[MAX_NODE_STRING_SIZE+UTF8_MAXBYTES_CASE];
+	    char *s0;
+	    U8 upper_parse = MAX_NODE_STRING_SIZE;
 	    STRLEN foldlen;
-	    U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
             U8 node_type;
+            bool next_is_quantifier;
+            char * oldp = NULL;
 
-	    /* Is this a LATIN LOWER CASE SHARP S in an EXACTFU node?  If so,
-	     * it is folded to 'ss' even if not utf8 */
-	    bool is_exactfu_sharp_s;
+            /* If a folding node contains only code points that don't
+             * participate in folds, it can be changed into an EXACT node,
+             * which allows the optimizer more things to look for */
+            bool maybe_exact;
 
 	    ender = 0;
-            if (! FOLD) {
-                node_type = EXACT;
-            }
-            else {
-                node_type = get_regex_charset(RExC_flags);
-                if (node_type >= REGEX_ASCII_RESTRICTED_CHARSET) {
-                    node_type--; /* /a is same as /u, and map /aa's offset to
-                                    what /a's would have been, so there is no
-                                    hole */
-                }
-                node_type += EXACTF;
-            }
+            node_type = compute_EXACTish(pRExC_state);
 	    ret = reg_node(pRExC_state, node_type);
-	    s = STRING(ret);
+
+            /* In pass1, folded, we use a temporary buffer instead of the
+             * actual node, as the node doesn't exist yet */
+	    s = (SIZE_ONLY && FOLD) ? foldbuf : STRING(ret);
+
+            s0 = s;
+
+	reparse:
+
+            /* We do the EXACTFish to EXACT node only if folding, and not if in
+             * locale, as whether a character folds or not isn't known until
+             * runtime */
+            maybe_exact = FOLD && ! LOC;
 
 	    /* XXX The node can hold up to 255 bytes, yet this only goes to
              * 127.  I (khw) do not know why.  Keeping it somewhat less than
@@ -10276,11 +10605,11 @@ tryagain:
              * could back off to end with only a code point that isn't such a
              * non-final, but it is possible for there not to be any in the
              * entire node. */
-	    for (len = 0, p = RExC_parse - 1;
-	         len < 127 && p < RExC_end;
+	    for (p = RExC_parse - 1;
+	         len < upper_parse && p < RExC_end;
 	         len++)
 	    {
-		char * const oldp = p;
+		oldp = p;
 
 		if (RExC_flags & RXf_PMf_EXTENDED)
 		    p = regwhite( pRExC_state, p );
@@ -10316,7 +10645,6 @@ tryagain:
 		    case 'g': case 'G':   /* generic-backref, pos assertion */
 		    case 'h': case 'H':   /* HORIZWS */
 		    case 'k': case 'K':   /* named backref, keep marker */
-		    case 'N':             /* named char sequence */
 		    case 'p': case 'P':   /* Unicode property */
 		              case 'R':   /* LNBREAK */
 		    case 's': case 'S':   /* space class */
@@ -10334,6 +10662,22 @@ tryagain:
 			ender = '\n';
 			p++;
 			break;
+		    case 'N': /* Handle a single-code point named character. */
+                        /* The options cause it to fail if a multiple code
+                         * point sequence.  Handle those in the switch() above
+                         * */
+                        RExC_parse = p + 1;
+                        if (! grok_bslash_N(pRExC_state, NULL, &ender,
+                                            flagp, depth, FALSE))
+                        {
+                            RExC_parse = p = oldp;
+                            goto loopdone;
+                        }
+                        p = RExC_parse;
+                        if (ender > 0xff) {
+                            REQUIRE_UTF8;
+                        }
+                        break;
 		    case 'r':
 			ender = '\r';
 			p++;
@@ -10479,150 +10823,308 @@ tryagain:
 		    break;
 		} /* End of switch on the literal */
 
-                is_exactfu_sharp_s = (node_type == EXACTFU
-			              && ender == LATIN_SMALL_LETTER_SHARP_S);
+		/* Here, have looked at the literal character and <ender>
+		 * contains its ordinal, <p> points to the character after it
+		 */
+
 		if ( RExC_flags & RXf_PMf_EXTENDED)
 		    p = regwhite( pRExC_state, p );
-		if ((UTF && FOLD) || is_exactfu_sharp_s) {
-		    /* Prime the casefolded buffer.  Locale rules, which apply
-		     * only to code points < 256, aren't known until execution,
-		     * so for them, just output the original character using
-                     * utf8.  If we start to fold non-UTF patterns, be sure to
-                     * update join_exact() */
-		    if (LOC && ender < 256) {
-			if (UNI_IS_INVARIANT(ender)) {
-			    *tmpbuf = (U8) ender;
-			    foldlen = 1;
-			} else {
-			    *tmpbuf = UTF8_TWO_BYTE_HI(ender);
-			    *(tmpbuf + 1) = UTF8_TWO_BYTE_LO(ender);
-			    foldlen = 2;
-			}
-		    }
-		    else if (isASCII(ender)) {	/* Note: Here can't also be LOC
-						 */
-			ender = toLOWER(ender);
-			*tmpbuf = (U8) ender;
-			foldlen = 1;
-		    }
-		    else if (! MORE_ASCII_RESTRICTED && ! LOC) {
 
-			/* Locale and /aa require more selectivity about the
-			 * fold, so are handled below.  Otherwise, here, just
-			 * use the fold */
-			ender = toFOLD_uni(ender, tmpbuf, &foldlen);
-		    }
-		    else {
-			/* Under locale rules or /aa we are not to mix,
-			 * respectively, ords < 256 or ASCII with non-.  So
-			 * reject folds that mix them, using only the
-			 * non-folded code point.  So do the fold to a
-			 * temporary, and inspect each character in it. */
-			U8 trialbuf[UTF8_MAXBYTES_CASE+1];
-			U8* s = trialbuf;
-			UV tmpender = toFOLD_uni(ender, trialbuf, &foldlen);
-			U8* e = s + foldlen;
-			bool fold_ok = TRUE;
-
-			while (s < e) {
-			    if (isASCII(*s)
-				|| (LOC && (UTF8_IS_INVARIANT(*s)
-					   || UTF8_IS_DOWNGRADEABLE_START(*s))))
-			    {
-				fold_ok = FALSE;
-				break;
-			    }
-			    s += UTF8SKIP(s);
-			}
-			if (fold_ok) {
-			    Copy(trialbuf, tmpbuf, foldlen, U8);
-			    ender = tmpender;
-			}
-			else {
-			    uvuni_to_utf8(tmpbuf, ender);
-			    foldlen = UNISKIP(ender);
-			}
-		    }
-		}
-		if (p < RExC_end && ISMULT2(p)) { /* Back off on ?+*. */
-		    if (len)
-			p = oldp;
-		    else if (UTF || is_exactfu_sharp_s) {
-			 if (FOLD) {
-			      /* Emit all the Unicode characters. */
-			      STRLEN numlen;
-			      for (foldbuf = tmpbuf;
-				   foldlen;
-				   foldlen -= numlen) {
-
-				   /* tmpbuf has been constructed by us, so we
-				    * know it is valid utf8 */
-				   ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-				   if (numlen > 0) {
-					const STRLEN unilen = reguni(pRExC_state, ender, s);
-					s       += unilen;
-					len     += unilen;
-					/* In EBCDIC the numlen
-					 * and unilen can differ. */
-					foldbuf += numlen;
-					if (numlen >= foldlen)
-					     break;
-				   }
-				   else
-					break; /* "Can't happen." */
-			      }
-			 }
-			 else {
-			      const STRLEN unilen = reguni(pRExC_state, ender, s);
-			      if (unilen > 0) {
-				   s   += unilen;
-				   len += unilen;
-			      }
-			 }
-		    }
-		    else {
-			len++;
-			REGC((char)ender, s++);
-		    }
-		    break;
+                /* If the next thing is a quantifier, it applies to this
+                 * character only, which means that this character has to be in
+                 * its own node and can't just be appended to the string in an
+                 * existing node, so if there are already other characters in
+                 * the node, close the node with just them, and set up to do
+                 * this character again next time through, when it will be the
+                 * only thing in its new node */
+                if ((next_is_quantifier = (p < RExC_end && ISMULT2(p))) && len)
+		{
+                    p = oldp;
+                    goto loopdone;
+                }
+
+		if (FOLD) {
+                    if (UTF
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                        || (node_type == EXACTFU
+                            && ender == LATIN_SMALL_LETTER_SHARP_S))
+                    {
+
+
+                        /* Prime the casefolded buffer.  Locale rules, which
+                         * apply only to code points < 256, aren't known until
+                         * execution, so for them, just output the original
+                         * character using utf8.  If we start to fold non-UTF
+                         * patterns, be sure to update join_exact() */
+                        if (LOC && ender < 256) {
+                            if (UNI_IS_INVARIANT(ender)) {
+                                *s = (U8) ender;
+                                foldlen = 1;
+                            } else {
+                                *s = UTF8_TWO_BYTE_HI(ender);
+                                *(s + 1) = UTF8_TWO_BYTE_LO(ender);
+                                foldlen = 2;
+                            }
+                        }
+                        else {
+                            UV folded = _to_uni_fold_flags(
+                                           ender,
+                                           (U8 *) s,
+                                           &foldlen,
+                                           FOLD_FLAGS_FULL
+                                           | ((LOC) ?  FOLD_FLAGS_LOCALE
+                                                    : (ASCII_FOLD_RESTRICTED)
+                                                      ? FOLD_FLAGS_NOMIX_ASCII
+                                                      : 0)
+                                            );
+
+                            /* If this node only contains non-folding code
+                             * points so far, see if this new one is also
+                             * non-folding */
+                            if (maybe_exact) {
+                                if (folded != ender) {
+                                    maybe_exact = FALSE;
+                                }
+                                else {
+                                    /* Here the fold is the original; we have
+                                     * to check further to see if anything
+                                     * folds to it */
+                                    if (! PL_utf8_foldable) {
+                                        SV* swash = swash_init("utf8",
+                                                           "_Perl_Any_Folds",
+                                                           &PL_sv_undef, 1, 0);
+                                        PL_utf8_foldable =
+                                                    _get_swash_invlist(swash);
+                                        SvREFCNT_dec(swash);
+                                    }
+                                    if (_invlist_contains_cp(PL_utf8_foldable,
+                                                             ender))
+                                    {
+                                        maybe_exact = FALSE;
+                                    }
+                                }
+                            }
+                            ender = folded;
+                        }
+			s += foldlen;
+
+			/* The loop increments <len> each time, as all but this
+			 * path (and the one just below for UTF) through it add
+			 * a single byte to the EXACTish node.  But this one
+			 * has changed len to be the correct final value, so
+			 * subtract one to cancel out the increment that
+			 * follows */
+			len += foldlen - 1;
+                    }
+                    else {
+                        *(s++) = ender;
+                        maybe_exact &= ! IS_IN_SOME_FOLD_L1(ender);
+                    }
 		}
-                if (UTF || is_exactfu_sharp_s) {
-		     if (FOLD) {
-		          /* Emit all the Unicode characters. */
-			  STRLEN numlen;
-			  for (foldbuf = tmpbuf;
-			       foldlen;
-			       foldlen -= numlen) {
-			       ender = valid_utf8_to_uvchr(foldbuf, &numlen);
-			       if (numlen > 0) {
-				    const STRLEN unilen = reguni(pRExC_state, ender, s);
-				    len     += unilen;
-				    s       += unilen;
-				    /* In EBCDIC the numlen
-				     * and unilen can differ. */
-				    foldbuf += numlen;
-				    if (numlen >= foldlen)
-					 break;
-			       }
-			       else
-				    break;
-			  }
-		     }
-		     else {
-			  const STRLEN unilen = reguni(pRExC_state, ender, s);
-			  if (unilen > 0) {
-			       s   += unilen;
-			       len += unilen;
-			  }
-		     }
-		     len--;
+		else if (UTF) {
+                    const STRLEN unilen = reguni(pRExC_state, ender, s);
+                    if (unilen > 0) {
+                       s   += unilen;
+                       len += unilen;
+                    }
+
+		    /* See comment just above for - 1 */
+		    len--;
 		}
 		else {
 		    REGC((char)ender, s++);
+                }
+
+		if (next_is_quantifier) {
+
+                    /* Here, the next input is a quantifier, and to get here,
+                     * the current character is the only one in the node.
+                     * Also, here <len> doesn't include the final byte for this
+                     * character */
+                    len++;
+                    goto loopdone;
 		}
-	    }
+
+	    } /* End of loop through literal characters */
+
+            /* Here we have either exhausted the input or ran out of room in
+             * the node.  (If we encountered a character that can't be in the
+             * node, transfer is made directly to <loopdone>, and so we
+             * wouldn't have fallen off the end of the loop.)  In the latter
+             * case, we artificially have to split the node into two, because
+             * we just don't have enough space to hold everything.  This
+             * creates a problem if the final character participates in a
+             * multi-character fold in the non-final position, as a match that
+             * should have occurred won't, due to the way nodes are matched,
+             * and our artificial boundary.  So back off until we find a non-
+             * problematic character -- one that isn't at the beginning or
+             * middle of such a fold.  (Either it doesn't participate in any
+             * folds, or appears only in the final position of all the folds it
+             * does participate in.)  A better solution with far fewer false
+             * positives, and that would fill the nodes more completely, would
+             * be to actually have available all the multi-character folds to
+             * test against, and to back-off only far enough to be sure that
+             * this node isn't ending with a partial one.  <upper_parse> is set
+             * further below (if we need to reparse the node) to include just
+             * up through that final non-problematic character that this code
+             * identifies, so when it is set to less than the full node, we can
+             * skip the rest of this */
+            if (FOLD && p < RExC_end && upper_parse == MAX_NODE_STRING_SIZE) {
+
+                const STRLEN full_len = len;
+
+		assert(len >= MAX_NODE_STRING_SIZE);
+
+                /* Here, <s> points to the final byte of the final character.
+                 * Look backwards through the string until find a non-
+                 * problematic character */
+
+		if (! UTF) {
+
+                    /* These two have no multi-char folds to non-UTF characters
+                     */
+                    if (ASCII_FOLD_RESTRICTED || LOC) {
+                        goto loopdone;
+                    }
+
+                    while (--s >= s0 && IS_NON_FINAL_FOLD(*s)) { }
+                    len = s - s0 + 1;
+		}
+                else {
+                    if (!  PL_NonL1NonFinalFold) {
+                        PL_NonL1NonFinalFold = _new_invlist_C_array(
+                                        NonL1_Perl_Non_Final_Folds_invlist);
+                    }
+
+                    /* Point to the first byte of the final character */
+                    s = (char *) utf8_hop((U8 *) s, -1);
+
+                    while (s >= s0) {   /* Search backwards until find
+                                           non-problematic char */
+                        if (UTF8_IS_INVARIANT(*s)) {
+
+                            /* There are no ascii characters that participate
+                             * in multi-char folds under /aa.  In EBCDIC, the
+                             * non-ascii invariants are all control characters,
+                             * so don't ever participate in any folds. */
+                            if (ASCII_FOLD_RESTRICTED
+                                || ! IS_NON_FINAL_FOLD(*s))
+                            {
+                                break;
+                            }
+                        }
+                        else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+
+                            /* No Latin1 characters participate in multi-char
+                             * folds under /l */
+                            if (LOC
+                                || ! IS_NON_FINAL_FOLD(TWO_BYTE_UTF8_TO_UNI(
+                                                                *s, *(s+1))))
+                            {
+                                break;
+                            }
+                        }
+                        else if (! _invlist_contains_cp(
+                                        PL_NonL1NonFinalFold,
+                                        valid_utf8_to_uvchr((U8 *) s, NULL)))
+                        {
+                            break;
+                        }
+
+                        /* Here, the current character is problematic in that
+                         * it does occur in the non-final position of some
+                         * fold, so try the character before it, but have to
+                         * special case the very first byte in the string, so
+                         * we don't read outside the string */
+                        s = (s == s0) ? s -1 : (char *) utf8_hop((U8 *) s, -1);
+                    } /* End of loop backwards through the string */
+
+                    /* If there were only problematic characters in the string,
+                     * <s> will point to before s0, in which case the length
+                     * should be 0, otherwise include the length of the
+                     * non-problematic character just found */
+                    len = (s < s0) ? 0 : s - s0 + UTF8SKIP(s);
+		}
+
+                /* Here, have found the final character, if any, that is
+                 * non-problematic as far as ending the node without splitting
+                 * it across a potential multi-char fold.  <len> contains the
+                 * number of bytes in the node up-to and including that
+                 * character, or is 0 if there is no such character, meaning
+                 * the whole node contains only problematic characters.  In
+                 * this case, give up and just take the node as-is.  We can't
+                 * do any better */
+                if (len == 0) {
+                    len = full_len;
+                } else {
+
+                    /* Here, the node does contain some characters that aren't
+                     * problematic.  If one such is the final character in the
+                     * node, we are done */
+                    if (len == full_len) {
+                        goto loopdone;
+                    }
+                    else if (len + ((UTF) ? UTF8SKIP(s) : 1) == full_len) {
+
+                        /* If the final character is problematic, but the
+                         * penultimate is not, back-off that last character to
+                         * later start a new node with it */
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* Here, the final non-problematic character is earlier
+                     * in the input than the penultimate character.  What we do
+                     * is reparse from the beginning, going up only as far as
+                     * this final ok one, thus guaranteeing that the node ends
+                     * in an acceptable character.  The reason we reparse is
+                     * that we know how far in the character is, but we don't
+                     * know how to correlate its position with the input parse.
+                     * An alternate implementation would be to build that
+                     * correlation as we go along during the original parse,
+                     * but that would entail extra work for every node, whereas
+                     * this code gets executed only when the string is too
+                     * large for the node, and the final two characters are
+                     * problematic, an infrequent occurrence.  Yet another
+                     * possible strategy would be to save the tail of the
+                     * string, and the next time regatom is called, initialize
+                     * with that.  The problem with this is that unless you
+                     * back off one more character, you won't be guaranteed
+                     * regatom will get called again, unless regbranch,
+                     * regpiece ... are also changed.  If you do back off that
+                     * extra character, so that there is input guaranteed to
+                     * force calling regatom, you can't handle the case where
+                     * just the first character in the node is acceptable.  I
+                     * (khw) decided to try this method which doesn't have that
+                     * pitfall; if performance issues are found, we can do a
+                     * combination of the current approach plus that one */
+                    upper_parse = len;
+                    len = 0;
+                    s = s0;
+                    goto reparse;
+                }
+	    }   /* End of verifying node ends with an appropriate char */
+
 	loopdone:   /* Jumped to when encounters something that shouldn't be in
 		       the node */
+
+            /* If 'maybe_exact' is still set here, means there are no
+             * code points in the node that participate in folds */
+            if (FOLD && maybe_exact) {
+                OP(ret) = EXACT;
+            }
+
+            /* I (khw) don't know if you can get here with zero length, but the
+             * old code handled this situation by creating a zero-length EXACT
+             * node.  Might as well be NOTHING instead */
+            if (len == 0) {
+                OP(ret) = NOTHING;
+            }
+            else{
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender);
+            }
+
 	    RExC_parse = p - 1;
             Set_Node_Cur_Length(ret); /* MJD */
 	    nextchar(pRExC_state);
@@ -10632,20 +11134,10 @@ tryagain:
 		if (iv < 0)
 		    vFAIL("Internal disaster");
 	    }
-	    if (len > 0)
-		*flagp |= HASWIDTH;
-	    if (len == 1 && UNI_IS_INVARIANT(ender))
-		*flagp |= SIMPLE;
 
-	    if (SIZE_ONLY)
-		RExC_size += STR_SZ(len);
-	    else {
-		STR_LEN(ret) = len;
-		RExC_emit += STR_SZ(len);
-            }
-	}
+	} /* End of label 'defchar:' */
 	break;
-    }
+    } /* End of giant switch on input character */
 
     return(ret);
 }
@@ -10687,8 +11179,8 @@ S_regwhite( RExC_state_t *pRExC_state, char *p )
 #define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
 #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
 
-STATIC I32
-S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
+PERL_STATIC_INLINE I32
+S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me)
 {
     dVAR;
     I32 namedclass = OOB_NAMEDCLASS;
@@ -10722,7 +11214,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 		    switch (skip) {
 		    case 4:
 			if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
-			    namedclass = ANYOF_ALNUM;
+			    namedclass = ANYOF_WORDCHAR;
 			break;
 		    case 5:
 			/* Names all of length 5.  */
@@ -10798,7 +11290,8 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 		       the class closes */
 		    while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
 			RExC_parse++;
-		    Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
+		    SvREFCNT_dec(free_me);
+		    vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
 		}
 	    } else {
 		/* Maternal grandfather:
@@ -10811,36 +11304,6 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
     return namedclass;
 }
 
-STATIC void
-S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
-{
-    dVAR;
-
-    PERL_ARGS_ASSERT_CHECKPOSIXCC;
-
-    if (POSIXCC(UCHARAT(RExC_parse))) {
-	const char *s = RExC_parse;
-	const char  c = *s++;
-
-	while (isALNUM(*s))
-	    s++;
-	if (*s && c == *s && s[1] == ']') {
-	    ckWARN3reg(s+2,
-		       "POSIX syntax [%c %c] belongs inside character classes",
-		       c, c);
-
-	    /* [[=foo=]] and [[.foo.]] are still future. */
-	    if (POSIXCC_NOTYET(c)) {
-		/* adjust RExC_parse so the error shows after
-		   the class closes */
-		while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
-		    NOOP;
-		Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
-	    }
-	}
-    }
-}
-
 /* Generate the code to add a full posix character <class> to the bracketed
  * character class given by <node>.  (<node> is needed only under locale rules)
  * destlist     is the inversion list for non-locale rules that this class is
@@ -10974,50 +11437,49 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 	}                                                                  \
     }
 
-STATIC void
-S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
-{
-    /* Adds input 'string' with length 'len' to the ANYOF node's unicode
-     * alternate list, pointed to by 'alternate_ptr'.  This is an array of
-     * the multi-character folds of characters in the node */
-    SV *sv;
-
-    PERL_ARGS_ASSERT_ADD_ALTERNATE;
-
-    if (! *alternate_ptr) {
-	*alternate_ptr = newAV();
-    }
-    sv = newSVpvn_utf8((char*)string, len, TRUE);
-    av_push(*alternate_ptr, sv);
-    return;
-}
-
 /* The names of properties whose definitions are not known at compile time are
  * stored in this SV, after a constant heading.  So if the length has been
  * changed since initialization, then there is a run-time definition. */
 #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
 
-/*
-   parse a class specification and produce either an ANYOF node that
-   matches the pattern or perhaps will be optimized into an EXACTish node
-   instead. The node contains a bit map for the first 256 characters, with the
-   corresponding bit set if that character is in the list.  For characters
-   above 255, a range list is used */
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class)  ((class) / 2)
 
 STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 {
+    /* parse a bracketed class specification.  Most of these will produce an ANYOF node;
+     * but something like [a] will produce an EXACT node; [aA], an EXACTFish
+     * node; [[:ascii:]], a POSIXA node; etc.  It is more complex under /i with
+     * multi-character folds: it will be rewritten following the paradigm of
+     * this example, where the <multi-fold>s are characters which fold to
+     * multiple character sequences:
+     *      /[abc\x{multi-fold1}def\x{multi-fold2}ghi]/i
+     * gets effectively rewritten as:
+     *      /(?:\x{multi-fold1}|\x{multi-fold2}|[abcdefghi]/i
+     * reg() gets called (recursively) on the rewritten version, and this
+     * function will return what it constructs.  (Actually the <multi-fold>s
+     * aren't physically removed from the [abcdefghi], it's just that they are
+     * ignored in the recursion by means of a flag:
+     * <RExC_in_multi_char_class>.)
+     *
+     * ANYOF nodes contain a bit map for the first 256 characters, with the
+     * corresponding bit set if that character is in the list.  For characters
+     * above 255, a range list or swash is used.  There are extra bits for \w,
+     * etc. in locale ANYOFs, as what these match is not determinable at
+     * compile time */
+
     dVAR;
-    register UV nextvalue;
-    register IV prevvalue = OOB_UNICODE;
-    register IV range = 0;
-    UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
-    register regnode *ret;
+    UV nextvalue;
+    UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
+    IV range = 0;
+    UV value = OOB_UNICODE, save_value = OOB_UNICODE;
+    regnode *ret;
     STRLEN numlen;
     IV namedclass = OOB_NAMEDCLASS;
     char *rangebegin = NULL;
     bool need_class = 0;
-    bool allow_full_fold = TRUE;   /* Assume wants multi-char folding */
     SV *listsv = NULL;
     STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
 				      than just initialized.  */
@@ -11026,6 +11488,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
                                extended beyond the Latin1 range */
     UV element_count = 0;   /* Number of distinct elements in the class.
 			       Optimizations may be possible if this is tiny */
+    AV * multi_char_matches = NULL; /* Code points that fold to more than one
+                                       character; used under /i */
     UV n;
 
     /* Unicode properties are stored in a swash; this holds the current one
@@ -11049,14 +11513,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      * of the target string */
     SV* cp_list = NULL;
 
-    /* List of multi-character folds that are matched by this node */
-    AV* unicode_alternate  = NULL;
 #ifdef EBCDIC
     /* In a range, counts how many 0-2 of the ends of it came from literals,
      * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
     UV literal_endpoint = 0;
 #endif
-    UV stored = 0;  /* how many chars stored in the bitmap */
     bool invert = FALSE;    /* Is this class to be complemented */
 
     /* Is there any thing like \W or [:^digit:] that matches above the legal
@@ -11066,6 +11527,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
         case we need to change the emitted regop to an EXACT. */
     const char * orig_parse = RExC_parse;
+    const I32 orig_size = RExC_size;
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGCLASS;
@@ -11078,25 +11540,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     /* Assume we are going to generate an ANYOF node. */
     ret = reganode(pRExC_state, ANYOF, 0);
 
-
     if (!SIZE_ONLY) {
 	ANYOF_FLAGS(ret) = 0;
     }
 
     if (UCHARAT(RExC_parse) == '^') {	/* Complement of range. */
-	RExC_naughty++;
 	RExC_parse++;
         invert = TRUE;
-
-        /* We have decided to not allow multi-char folds in inverted character
-	 * classes, due to the confusion that can happen, especially with
-	 * classes that are designed for a non-Unicode world:  You have the
-	 * peculiar case that:
-            "s s" =~ /^[^\xDF]+$/i => Y
-            "ss"  =~ /^[^\xDF]+$/i => N
-         *
-         * See [perl #89750] */
-        allow_full_fold = FALSE;
+        RExC_naughty++;
     }
 
     if (SIZE_ONLY) {
@@ -11108,7 +11559,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
 	if (LOC) {
 	    ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
 	}
-	ANYOF_BITMAP_ZERO(ret);
 	listsv = newSVpvs("# comment\n");
 	initial_listsv_len = SvCUR(listsv);
     }
@@ -11116,7 +11566,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
     nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
 
     if (!SIZE_ONLY && POSIXCC(nextvalue))
-	checkposixcc(pRExC_state);
+    {
+	const char *s = RExC_parse;
+	const char  c = *s++;
+
+	while (isALNUM(*s))
+	    s++;
+	if (*s && c == *s && s[1] == ']') {
+	    SAVEFREESV(RExC_rx_sv);
+	    SAVEFREESV(listsv);
+	    ckWARN3reg(s+2,
+		       "POSIX syntax [%c %c] belongs inside character classes",
+		       c, c);
+	    ReREFCNT_inc(RExC_rx_sv);
+	    SvREFCNT_inc_simple_void_NN(listsv);
+	}
+    }
 
     /* allow 1st char to be ] (allowing it to be - is dealt with later) */
     if (UCHARAT(RExC_parse) == ']')
@@ -11128,6 +11593,8 @@ parseit:
     charclassloop:
 
 	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
+        save_value = value;
+        save_prevvalue = prevvalue;
 
 	if (!range) {
 	    rangebegin = RExC_parse;
@@ -11144,7 +11611,7 @@ parseit:
 
 	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
 	if (value == '[' && POSIXCC(nextvalue))
-	    namedclass = regpposixcc(pRExC_state, value);
+	    namedclass = regpposixcc(pRExC_state, value, listsv);
 	else if (value == '\\') {
 	    if (UTF) {
 		value = utf8n_to_uvchr((U8*)RExC_parse,
@@ -11160,8 +11627,8 @@ parseit:
 	     * A similar issue a little bit later when switching on
 	     * namedclass. --jhi */
 	    switch ((I32)value) {
-	    case 'w':	namedclass = ANYOF_ALNUM;	break;
-	    case 'W':	namedclass = ANYOF_NALNUM;	break;
+	    case 'w':	namedclass = ANYOF_WORDCHAR;	break;
+	    case 'W':	namedclass = ANYOF_NWORDCHAR;	break;
 	    case 's':	namedclass = ANYOF_SPACE;	break;
 	    case 'S':	namedclass = ANYOF_NSPACE;	break;
 	    case 'd':	namedclass = ANYOF_DIGIT;	break;
@@ -11177,17 +11644,21 @@ parseit:
                     if this makes sense as it does change the behaviour
                     from earlier versions, OTOH that behaviour was broken
                     as well. */
-                    UV v; /* value is register so we cant & it /grrr */
-                    if (reg_namedseq(pRExC_state, &v, NULL, depth)) {
+                    if (! grok_bslash_N(pRExC_state, NULL, &value, flagp, depth,
+                                      TRUE /* => charclass */))
+                    {
                         goto parseit;
                     }
-                    value= v; 
                 }
                 break;
 	    case 'p':
 	    case 'P':
 		{
 		char *e;
+
+                /* This routine will handle any undefined properties */
+                U8 swash_init_flags = _CORE_SWASH_INIT_RETURN_IF_UNDEF;
+
 		if (RExC_parse >= RExC_end)
 		    vFAIL2("Empty \\%c{}", (U8)value);
 		if (*RExC_parse == '{') {
@@ -11208,7 +11679,6 @@ parseit:
 		    n = 1;
 		}
 		if (!SIZE_ONLY) {
-                    SV** invlistsvp;
                     SV* invlist;
                     char* name;
 
@@ -11243,18 +11713,10 @@ parseit:
                     swash = _core_swash_init("utf8", name, &PL_sv_undef,
                                              1, /* binary */
                                              0, /* not tr/// */
-                                             TRUE, /* this routine will handle
-                                                      undefined properties */
-                                             NULL, FALSE /* No inversion list */
+                                             NULL, /* No inversion list */
+                                             &swash_init_flags
                                             );
-                    if (   ! swash
-                        || ! SvROK(swash)
-                        || ! SvTYPE(SvRV(swash)) == SVt_PVHV
-                        || ! (invlistsvp =
-				hv_fetchs(MUTABLE_HV(SvRV(swash)),
-                                "INVLIST", FALSE))
-                        || ! (invlist = *invlistsvp))
-		    {
+                    if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                         if (swash) {
                             SvREFCNT_dec(swash);
                             swash = NULL;
@@ -11283,7 +11745,8 @@ parseit:
                          * the swash is from a user-defined property, then this
                          * whole character class should be regarded as such */
                         has_user_defined_property =
-                                                _is_swash_user_defined(swash);
+                                    (swash_init_flags
+                                     & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY);
 
                         /* Invert if asking for the complement */
                         if (value == 'P') {
@@ -11304,7 +11767,7 @@ parseit:
 		    Safefree(name);
 		}
 		RExC_parse = e + 1;
-		namedclass = ANYOF_MAX;  /* no official name, but it's named */
+		namedclass = ANYOF_UNIPROP;  /* no official name, but it's named */
 
 		/* \p means they want Unicode semantics */
 		RExC_uni_semantics = 1;
@@ -11379,9 +11842,13 @@ parseit:
 	    default:
 		/* Allow \_ to not give an error */
 		if (!SIZE_ONLY && isALNUM(value) && value != '_') {
+		    SAVEFREESV(RExC_rx_sv);
+		    SAVEFREESV(listsv);
 		    ckWARN2reg(RExC_parse,
 			       "Unrecognized escape \\%c in character class passed through",
 			       (int)value);
+		    ReREFCNT_inc(RExC_rx_sv);
+		    SvREFCNT_inc_simple_void_NN(listsv);
 		}
 		break;
 	    }
@@ -11426,9 +11893,13 @@ parseit:
 		    const int w =
 			RExC_parse >= rangebegin ?
 			RExC_parse - rangebegin : 0;
+		    SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
+		    SAVEFREESV(listsv);
 		    ckWARN4reg(RExC_parse,
 			       "False [] range \"%*.*s\"",
 			       w, w, rangebegin);
+		    ReREFCNT_inc(RExC_rx_sv);
+		    SvREFCNT_inc_simple_void_NN(listsv);
                     cp_list = add_cp_to_invlist(cp_list, '-');
                     cp_list = add_cp_to_invlist(cp_list, prevvalue);
 		}
@@ -11459,32 +11930,86 @@ parseit:
                         runtime_posix_matches_above_Unicode);
 		    break;
 		case ANYOF_ASCII:
+#ifdef HAS_ISASCII
 		    if (LOC) {
 			ANYOF_CLASS_SET(ret, namedclass);
 		    }
-                    else {
+                    else
+#endif  /* Not isascii(); just use the hard-coded definition for it */
                         _invlist_union(posixes, PL_ASCII, &posixes);
-                    }
 		    break;
 		case ANYOF_NASCII:
+#ifdef HAS_ISASCII
 		    if (LOC) {
 			ANYOF_CLASS_SET(ret, namedclass);
 		    }
                     else {
+#endif
                         _invlist_union_complement_2nd(posixes,
                                                     PL_ASCII, &posixes);
                         if (DEPENDS_SEMANTICS) {
                             ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
                         }
+#ifdef HAS_ISASCII
                     }
+#endif
 		    break;
 		case ANYOF_BLANK:
-                    DO_POSIX(ret, namedclass, posixes,
+                    if (hasISBLANK || ! LOC) {
+                        DO_POSIX(ret, namedclass, posixes,
                                             PL_PosixBlank, PL_XPosixBlank);
+                    }
+                    else { /* There is no isblank() and we are in locale:  We
+                              use the ASCII range and the above-Latin1 range
+                              code points */
+                        SV* scratch_list = NULL;
+
+                        /* Include all above-Latin1 blanks */
+                        _invlist_intersection(PL_AboveLatin1,
+                                              PL_XPosixBlank,
+                                              &scratch_list);
+                        /* Add it to the running total of posix classes */
+                        if (! posixes) {
+                            posixes = scratch_list;
+                        }
+                        else {
+                            _invlist_union(posixes, scratch_list, &posixes);
+                            SvREFCNT_dec(scratch_list);
+                        }
+                        /* Add the ASCII-range blanks to the running total. */
+                        _invlist_union(posixes, PL_PosixBlank, &posixes);
+                    }
 		    break;
 		case ANYOF_NBLANK:
-                    DO_N_POSIX(ret, namedclass, posixes,
-                                            PL_PosixBlank, PL_XPosixBlank);
+                    if (hasISBLANK || ! LOC) {
+                        DO_N_POSIX(ret, namedclass, posixes,
+                                                PL_PosixBlank, PL_XPosixBlank);
+                    }
+                    else { /* There is no isblank() and we are in locale */
+                        SV* scratch_list = NULL;
+
+                        /* Include all above-Latin1 non-blanks */
+                        _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank,
+                                          &scratch_list);
+
+                        /* Add them to the running total of posix classes */
+                        _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank,
+                                          &scratch_list);
+                        if (! posixes) {
+                            posixes = scratch_list;
+                        }
+                        else {
+                            _invlist_union(posixes, scratch_list, &posixes);
+                            SvREFCNT_dec(scratch_list);
+                        }
+
+                        /* Get the list of all non-ASCII-blanks in Latin 1, and
+                         * add them to the running total */
+                        _invlist_subtract(PL_Latin1, PL_PosixBlank,
+                                          &scratch_list);
+                        _invlist_union(posixes, scratch_list, &posixes);
+                        SvREFCNT_dec(scratch_list);
+                    }
 		    break;
 		case ANYOF_CNTRL:
                     DO_POSIX(ret, namedclass, posixes,
@@ -11620,11 +12145,11 @@ parseit:
 		    }
 		    break;
 		}
-		case ANYOF_ALNUM:   /* Really is 'Word' */
+		case ANYOF_WORDCHAR:
 		    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
 		    break;
-		case ANYOF_NALNUM:
+		case ANYOF_NWORDCHAR:
 		    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
                             runtime_posix_matches_above_Unicode);
@@ -11648,8 +12173,7 @@ parseit:
                     DO_N_POSIX(ret, namedclass, posixes,
                                             PL_PosixXDigit, PL_XPosixXDigit);
 		    break;
-		case ANYOF_MAX:
-		    /* this is to handle \p and \P */
+		case ANYOF_UNIPROP: /* this is to handle \p and \P */
 		    break;
 		default:
 		    vFAIL("Invalid [::] class");
@@ -11661,7 +12185,7 @@ parseit:
 	} /* end of namedclass \blah */
 
 	if (range) {
-	    if (prevvalue > (IV)value) /* b-a */ {
+	    if (prevvalue > value) /* b-a */ {
 		const int w = RExC_parse - rangebegin;
 		Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
 		range = 0; /* not a valid range */
@@ -11685,8 +12209,10 @@ parseit:
 			       "False [] range \"%*.*s\"",
 			       w, w, rangebegin);
 		    }
-                    if (!SIZE_ONLY)
+                    if (!SIZE_ONLY) {
                         cp_list = add_cp_to_invlist(cp_list, '-');
+                    }
+                    element_count++;
 		} else
 		    range = 1;	/* yeah, it's a range! */
 		continue;	/* but do it the next time */
@@ -11702,8 +12228,100 @@ parseit:
 	    RExC_uni_semantics = 1;
 	}
 
-        /* Ready to process either the single value, or the completed range */
-	if (!SIZE_ONLY) {
+        /* Ready to process either the single value, or the completed range.
+         * For single-valued non-inverted ranges, we consider the possibility
+         * of multi-char folds.  (We made a conscious decision to not do this
+         * for the other cases because it can often lead to non-intuitive
+         * results.  For example, you have the peculiar case that:
+         *  "s s" =~ /^[^\xDF]+$/i => Y
+         *  "ss"  =~ /^[^\xDF]+$/i => N
+         *
+         * See [perl #89750] */
+        if (FOLD && ! invert && value == prevvalue) {
+            if (value == LATIN_SMALL_LETTER_SHARP_S
+                || (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
+                                                        value)))
+            {
+                /* Here <value> is indeed a multi-char fold.  Get what it is */
+
+                U8 foldbuf[UTF8_MAXBYTES_CASE];
+                STRLEN foldlen;
+
+                UV folded = _to_uni_fold_flags(
+                                value,
+                                foldbuf,
+                                &foldlen,
+                                FOLD_FLAGS_FULL
+                                | ((LOC) ?  FOLD_FLAGS_LOCALE
+                                            : (ASCII_FOLD_RESTRICTED)
+                                              ? FOLD_FLAGS_NOMIX_ASCII
+                                              : 0)
+                                );
+
+                /* Here, <folded> should be the first character of the
+                 * multi-char fold of <value>, with <foldbuf> containing the
+                 * whole thing.  But, if this fold is not allowed (because of
+                 * the flags), <fold> will be the same as <value>, and should
+                 * be processed like any other character, so skip the special
+                 * handling */
+                if (folded != value) {
+
+                    /* Skip if we are recursed, currently parsing the class
+                     * again.  Otherwise add this character to the list of
+                     * multi-char folds. */
+                    if (! RExC_in_multi_char_class) {
+                        AV** this_array_ptr;
+                        AV* this_array;
+                        STRLEN cp_count = utf8_length(foldbuf,
+                                                      foldbuf + foldlen);
+                        SV* multi_fold = sv_2mortal(newSVpvn("", 0));
+
+                        Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
+
+
+                        if (! multi_char_matches) {
+                            multi_char_matches = newAV();
+                        }
+
+                        /* <multi_char_matches> is actually an array of arrays.
+                         * There will be one or two top-level elements: [2],
+                         * and/or [3].  The [2] element is an array, each
+                         * element thereof is a character which folds to two
+                         * characters; likewise for [3].  (Unicode guarantees a
+                         * maximum of 3 characters in any fold.)  When we
+                         * rewrite the character class below, we will do so
+                         * such that the longest folds are written first, so
+                         * that it prefers the longest matching strings first.
+                         * This is done even if it turns out that any
+                         * quantifier is non-greedy, out of programmer
+                         * laziness.  Tom Christiansen has agreed that this is
+                         * ok.  This makes the test for the ligature 'ffi' come
+                         * before the test for 'ff' */
+                        if (av_exists(multi_char_matches, cp_count)) {
+                            this_array_ptr = (AV**) av_fetch(multi_char_matches,
+                                                             cp_count, FALSE);
+                            this_array = *this_array_ptr;
+                        }
+                        else {
+                            this_array = newAV();
+                            av_store(multi_char_matches, cp_count,
+                                     (SV*) this_array);
+                        }
+                        av_push(this_array, multi_fold);
+                    }
+
+                    /* This element should not be processed further in this
+                     * class */
+                    element_count--;
+                    value = save_value;
+                    prevvalue = save_prevvalue;
+                    continue;
+                }
+            }
+        }
+
+        /* Deal with this element of the class */
+	if (! SIZE_ONLY) {
 #ifndef EBCDIC
             cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
 #else
@@ -11733,11 +12351,90 @@ parseit:
 	range = 0; /* this range (if it was one) is done now */
     } /* End of loop through all the text within the brackets */
 
+    /* If anything in the class expands to more than one character, we have to
+     * deal with them by building up a substitute parse string, and recursively
+     * calling reg() on it, instead of proceeding */
+    if (multi_char_matches) {
+	SV * substitute_parse = newSVpvn_flags("?:", 2, SVs_TEMP);
+        I32 cp_count;
+	STRLEN len;
+	char *save_end = RExC_end;
+	char *save_parse = RExC_parse;
+        bool first_time = TRUE;     /* First multi-char occurrence doesn't get
+                                       a "|" */
+        I32 reg_flags;
+
+        assert(! invert);
+#if 0   /* Have decided not to deal with multi-char folds in inverted classes,
+           because too confusing */
+        if (invert) {
+            sv_catpv(substitute_parse, "(?:");
+        }
+#endif
+
+        /* Look at the longest folds first */
+        for (cp_count = av_len(multi_char_matches); cp_count > 0; cp_count--) {
+
+            if (av_exists(multi_char_matches, cp_count)) {
+                AV** this_array_ptr;
+                SV* this_sequence;
+
+                this_array_ptr = (AV**) av_fetch(multi_char_matches,
+                                                 cp_count, FALSE);
+                while ((this_sequence = av_pop(*this_array_ptr)) !=
+                                                                &PL_sv_undef)
+                {
+                    if (! first_time) {
+                        sv_catpv(substitute_parse, "|");
+                    }
+                    first_time = FALSE;
+
+                    sv_catpv(substitute_parse, SvPVX(this_sequence));
+                }
+            }
+        }
+
+        /* If the character class contains anything else besides these
+         * multi-character folds, have to include it in recursive parsing */
+        if (element_count) {
+            sv_catpv(substitute_parse, "|[");
+            sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
+            sv_catpv(substitute_parse, "]");
+        }
+
+        sv_catpv(substitute_parse, ")");
+#if 0
+        if (invert) {
+            /* This is a way to get the parse to skip forward a whole named
+             * sequence instead of matching the 2nd character when it fails the
+             * first */
+            sv_catpv(substitute_parse, "(*THEN)(*SKIP)(*FAIL)|.)");
+        }
+#endif
+
+	RExC_parse = SvPV(substitute_parse, len);
+	RExC_end = RExC_parse + len;
+        RExC_in_multi_char_class = 1;
+        RExC_emit = (regnode *)orig_emit;
+
+	ret = reg(pRExC_state, 1, &reg_flags, depth+1);
+
+	*flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED);
+
+	RExC_parse = save_parse;
+	RExC_end = save_end;
+	RExC_in_multi_char_class = 0;
+        SvREFCNT_dec(multi_char_matches);
+        SvREFCNT_dec(listsv);
+        return ret;
+    }
+
     /* If the character class contains only a single element, it may be
      * optimizable into another node type which is smaller and runs faster.
      * Check if this is the case for this class */
     if (element_count == 1) {
         U8 op = END;
+        U8 arg = 0;
 
         if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
                                               [:digit:] or \p{foo} */
@@ -11752,10 +12449,10 @@ parseit:
                  * modifier to the regex.  We first calculate the base node
                  * type, and if it should be inverted */
 
-                case ANYOF_NALNUM:
+                case ANYOF_NWORDCHAR:
                     invert = ! invert;
                     /* FALLTHROUGH */
-                case ANYOF_ALNUM:
+                case ANYOF_WORDCHAR:
                     op = ALNUM;
                     goto join_charset_classes;
 
@@ -11797,6 +12494,7 @@ parseit:
                     if (invert) {
                         op += NALNUM - ALNUM;
                     }
+                    *flagp |= HASWIDTH|SIMPLE;
                     break;
 
                 /* The second group doesn't depend of the charset modifiers.
@@ -11805,7 +12503,9 @@ parseit:
                     invert = ! invert;
                     /* FALLTHROUGH */
                 case ANYOF_HORIZWS:
+                  is_horizws:
                     op = (invert) ? NHORIZWS : HORIZWS;
+                    *flagp |= HASWIDTH|SIMPLE;
                     break;
 
                 case ANYOF_NVERTWS:
@@ -11813,17 +12513,63 @@ parseit:
                     /* FALLTHROUGH */
                 case ANYOF_VERTWS:
                     op = (invert) ? NVERTWS : VERTWS;
+                    *flagp |= HASWIDTH|SIMPLE;
                     break;
 
+                case ANYOF_UNIPROP:
+                    break;
 
+                case ANYOF_NBLANK:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_BLANK:
+                    if (AT_LEAST_UNI_SEMANTICS && ! AT_LEAST_ASCII_RESTRICTED) {
+                        goto is_horizws;
+                    }
+                    /* FALLTHROUGH */
+                default:
+                    /* A generic posix class.  All the /a ones can be handled
+                     * by the POSIXA opcode.  And all are closed under folding
+                     * in the ASCII range, so FOLD doesn't matter */
+                    if (AT_LEAST_ASCII_RESTRICTED
+                        || (! LOC && namedclass == ANYOF_ASCII))
+                    {
+                        /* The odd numbered ones are the complements of the
+                         * next-lower even number one */
+                        if (namedclass % 2 == 1) {
+                            invert = ! invert;
+                            namedclass--;
+                        }
+                        arg = namedclass_to_classnum(namedclass);
+                        op = (invert) ? NPOSIXA : POSIXA;
+                    }
+                    break;
             }
         }
-        else if (! LOC) {
-            if (invert && prevvalue == '\n' && value == '\n') {
-                op = REG_ANY; /* Optimize [^\n] */
+        else if (value == prevvalue) {
+
+            /* Here, the class consists of just a single code point */
+
+            if (invert) {
+                if (! LOC && value == '\n') {
+                    op = REG_ANY; /* Optimize [^\n] */
+                    *flagp |= HASWIDTH|SIMPLE;
+                    RExC_naughty++;
+                }
+            }
+            else if (value < 256 || UTF) {
+
+                /* Optimize a single value into an EXACTish node, but not if it
+                 * would require converting the pattern to UTF-8. */
+                op = compute_EXACTish(pRExC_state);
             }
-            else if (prevvalue == '0' && value == '9') {
-                op = (invert) ? NDIGITA : DIGITA;
+        } /* Otherwise is a range */
+        else if (! LOC) {   /* locale could vary these */
+            if (prevvalue == '0') {
+                if (value == '9') {
+                    op = (invert) ? NDIGITA : DIGITA;
+                    *flagp |= HASWIDTH|SIMPLE;
+                }
             }
         }
 
@@ -11836,20 +12582,44 @@ parseit:
              * the parse */
             const char * cur_parse = RExC_parse;
             RExC_parse = (char *)orig_parse;
-            RExC_emit = (regnode *)orig_emit;
+            if ( SIZE_ONLY) {
+                if (! LOC) {
+
+                    /* To get locale nodes to not use the full ANYOF size would
+                     * require moving the code above that writes the portions
+                     * of it that aren't in other nodes to after this point.
+                     * e.g.  ANYOF_CLASS_SET */
+                    RExC_size = orig_size;
+                }
+            }
+            else {
+                RExC_emit = (regnode *)orig_emit;
+            }
 
             ret = reg_node(pRExC_state, op);
 
+            if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
+                if (! SIZE_ONLY) {
+                    FLAGS(ret) = arg;
+                }
+                *flagp |= HASWIDTH|SIMPLE;
+            }
+            else if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
+            }
+
             RExC_parse = (char *) cur_parse;
 
+            SvREFCNT_dec(posixes);
             SvREFCNT_dec(listsv);
+            SvREFCNT_dec(cp_list);
             return ret;
         }
     }
 
     if (SIZE_ONLY)
         return ret;
-    /****** !SIZE_ONLY AFTER HERE *********/
+    /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
 
     /* If folding, we calculate all characters that could fold to or from the
      * ones already on the list */
@@ -11858,10 +12628,11 @@ parseit:
 
 	SV* fold_intersection = NULL;
 
-        /* In the Latin1 range, the characters that can be folded-to or -from
-         * are precisely the alphabetic characters.  If the highest code point
-         * is within Latin1, we can use the compiled-in list, and not have to
-         * go out to disk. */
+        /* If the highest code point is within Latin1, we can use the
+         * compiled-in Alphas list, and not have to go out to disk.  This
+         * yields two false positives, the masculine and feminine ordinal
+         * indicators, which are weeded out below using the
+         * IS_IN_SOME_FOLD_L1() macro */
         if (invlist_highest(cp_list) < 256) {
             _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
         }
@@ -11873,7 +12644,7 @@ parseit:
             if (! PL_utf8_foldable) {
                 SV* swash = swash_init("utf8", "_Perl_Any_Folds",
                                        &PL_sv_undef, 1, 0);
-                PL_utf8_foldable = _swash_to_invlist(swash);
+                PL_utf8_foldable = _get_swash_invlist(swash);
                 SvREFCNT_dec(swash);
             }
 
@@ -11887,7 +12658,7 @@ parseit:
                  * rules hard-coded into Perl.  (This case happens legitimately
                  * during compilation of Perl itself before the Unicode tables
                  * are generated) */
-                if (invlist_len(PL_utf8_foldable) == 0) {
+                if (_invlist_len(PL_utf8_foldable) == 0) {
                     PL_utf8_foldclosures = newHV();
                 }
                 else {
@@ -11895,15 +12666,13 @@ parseit:
                      * to force that */
                     if (! PL_utf8_tofold) {
                         U8 dummy[UTF8_MAXBYTES+1];
-                        STRLEN dummy_len;
 
-                        /* This particular string is above \xff in both UTF-8
-                         * and UTFEBCDIC */
-                        to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                        /* This string is just a short named one above \xff */
+                        to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
                         assert(PL_utf8_tofold); /* Verify that worked */
                     }
                     PL_utf8_foldclosures =
-                                        _swash_inversion_hash(PL_utf8_tofold);
+                                    _swash_inversion_hash(PL_utf8_tofold);
                 }
             }
 
@@ -11930,7 +12699,7 @@ parseit:
 
 		U8 foldbuf[UTF8_MAXBYTES_CASE+1];
 		STRLEN foldlen;
-                UV f;
+                SV** listp;
 
                 if (j < 256) {
 
@@ -11942,7 +12711,7 @@ parseit:
                      * mappings, though that is not really likely, and may be
                      * caught by the default: case of the switch below. */
 
-                    if (PL_fold_latin1[j] != j) {
+                    if (IS_IN_SOME_FOLD_L1(j)) {
 
                         /* ASCII is always matched; non-ASCII is matched only
                          * under Unicode rules */
@@ -11957,33 +12726,16 @@ parseit:
                     }
 
                     if (HAS_NONLATIN1_FOLD_CLOSURE(j)
-                        && (! isASCII(j) || ! MORE_ASCII_RESTRICTED))
+                        && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
                     {
                         /* Certain Latin1 characters have matches outside
-                         * Latin1, or are multi-character.  To get here, 'j' is
-                         * one of those characters.   None of these matches is
-                         * valid for ASCII characters under /aa, which is why
-                         * the 'if' just above excludes those.  The matches
-                         * fall into three categories:
-                         * 1) They are singly folded-to or -from an above 255
-                         *    character, e.g., LATIN SMALL LETTER Y WITH
-                         *    DIAERESIS and LATIN CAPITAL LETTER Y WITH
-                         *    DIAERESIS;
-                         * 2) They are part of a multi-char fold with another
-                         *    latin1 character; only LATIN SMALL LETTER
-                         *    SHARP S => "ss" fits this;
-                         * 3) They are part of a multi-char fold with a
-                         *    character outside of Latin1, such as various
-                         *    ligatures.
-                        * We aren't dealing fully with multi-char folds, except
-                        * we do deal with the pattern containing a character
-                        * that has a multi-char fold (not so much the inverse).
-                        * For types 1) and 3), the matches only happen when the
-                        * target string is utf8; that's not true for 2), and we
-                        * set a flag for it.
-                        *
-                        * The code below adds the single fold closures for 'j'
-                        * to the inversion list. */
+                         * Latin1.  To get here, <j> is one of those
+                         * characters.   None of these matches is valid for
+                         * ASCII characters under /aa, which is why the 'if'
+                         * just above excludes those.  These matches only
+                         * happen when the target string is utf8.  The code
+                         * below adds the single fold closures for <j> to the
+                         * inversion list. */
                         switch (j) {
                             case 'k':
                             case 'K':
@@ -11997,9 +12749,9 @@ parseit:
                                 break;
                             case MICRO_SIGN:
                                 cp_list = add_cp_to_invlist(cp_list,
-                                                    GREEK_SMALL_LETTER_MU);
-                                cp_list = add_cp_to_invlist(cp_list,
                                                     GREEK_CAPITAL_LETTER_MU);
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    GREEK_SMALL_LETTER_MU);
                                 break;
                             case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
                             case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
@@ -12013,20 +12765,6 @@ parseit:
                             case LATIN_SMALL_LETTER_SHARP_S:
                                 cp_list = add_cp_to_invlist(cp_list,
                                                 LATIN_CAPITAL_LETTER_SHARP_S);
-
-                                /* Under /a, /d, and /u, this can match the two
-                                 * chars "ss" */
-                                if (! MORE_ASCII_RESTRICTED) {
-                                    add_alternate(&unicode_alternate,
-                                                  (U8 *) "ss", 2);
-
-                                    /* And under /u or /a, it can match even if
-                                     * the target is not utf8 */
-                                    if (AT_LEAST_UNI_SEMANTICS) {
-                                        ANYOF_FLAGS(ret) |=
-                                                    ANYOF_NONBITMAP_NON_UTF8;
-                                    }
-                                }
                                 break;
                             case 'F': case 'f':
                             case 'I': case 'i':
@@ -12043,7 +12781,8 @@ parseit:
                                  * express, so they can't match unless the
                                  * target string is in UTF-8, so no action here
                                  * is necessary, as regexec.c properly handles
-                                 * the general case for UTF-8 matching */
+                                 * the general case for UTF-8 matching and
+                                 * multi-char folds */
                                 break;
                             default:
                                 /* Use deprecated warning to increase the
@@ -12056,89 +12795,56 @@ parseit:
                 }
 
                 /* Here is an above Latin1 character.  We don't have the rules
-                 * hard-coded for it.  First, get its fold */
-		f = _to_uni_fold_flags(j, foldbuf, &foldlen,
-                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
-                                    | ((LOC)
-                                        ? FOLD_FLAGS_LOCALE
-                                        : (MORE_ASCII_RESTRICTED)
-                                            ? FOLD_FLAGS_NOMIX_ASCII
-                                            : 0));
-
-		if (foldlen > (STRLEN)UNISKIP(f)) {
-
-		    /* Any multicharacter foldings (disallowed in lookbehind
-		     * patterns) require the following transform: [ABCDEF] ->
-		     * (?:[ABCabcDEFd]|pq|rst) where E folds into "pq" and F
-		     * folds into "rst", all other characters fold to single
-		     * characters.  We save away these multicharacter foldings,
-		     * to be later saved as part of the additional "s" data. */
-		    if (! RExC_in_lookbehind) {
-			U8* loc = foldbuf;
-			U8* e = foldbuf + foldlen;
-
-			/* If any of the folded characters of this are in the
-			 * Latin1 range, tell the regex engine that this can
-			 * match a non-utf8 target string.  */
-                        while (loc < e) {
-                            if (UTF8_IS_INVARIANT(*loc)
-                                || UTF8_IS_DOWNGRADEABLE_START(*loc))
-                            {
-                                ANYOF_FLAGS(ret)
-                                        |= ANYOF_NONBITMAP_NON_UTF8;
-                                break;
-                            }
-                            loc += UTF8SKIP(loc);
+                 * hard-coded for it.  First, get its fold.  This is the simple
+                 * fold, as the multi-character folds have been handled earlier
+                 * and separated out */
+		_to_uni_fold_flags(j, foldbuf, &foldlen,
+                                               ((LOC)
+                                               ? FOLD_FLAGS_LOCALE
+                                               : (ASCII_FOLD_RESTRICTED)
+                                                  ? FOLD_FLAGS_NOMIX_ASCII
+                                                  : 0));
+
+                /* Single character fold of above Latin1.  Add everything in
+                 * its fold closure to the list that this node should match.
+                 * The fold closures data structure is a hash with the keys
+                 * being the UTF-8 of every character that is folded to, like
+                 * 'k', and the values each an array of all code points that
+                 * fold to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ].
+                 * Multi-character folds are not included */
+                if ((listp = hv_fetch(PL_utf8_foldclosures,
+                                      (char *) foldbuf, foldlen, FALSE)))
+                {
+                    AV* list = (AV*) *listp;
+                    IV k;
+                    for (k = 0; k <= av_len(list); k++) {
+                        SV** c_p = av_fetch(list, k, FALSE);
+                        UV c;
+                        if (c_p == NULL) {
+                            Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
                         }
+                        c = SvUV(*c_p);
 
-			add_alternate(&unicode_alternate, foldbuf, foldlen);
-		    }
-		}
-                else {
-                    /* Single character fold of above Latin1.  Add everything
-                     * in its fold closure to the list that this node should
-                     * match */
-		    SV** listp;
-
-		    /* The fold closures data structure is a hash with the keys
-		     * being every character that is folded to, like 'k', and
-		     * the values each an array of everything that folds to its
-		     * key.  e.g. [ 'k', 'K', KELVIN_SIGN ] */
-		    if ((listp = hv_fetch(PL_utf8_foldclosures,
-				    (char *) foldbuf, foldlen, FALSE)))
-		    {
-			AV* list = (AV*) *listp;
-			IV k;
-			for (k = 0; k <= av_len(list); k++) {
-			    SV** c_p = av_fetch(list, k, FALSE);
-			    UV c;
-			    if (c_p == NULL) {
-				Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-			    }
-			    c = SvUV(*c_p);
-
-			    /* /aa doesn't allow folds between ASCII and non-;
-			     * /l doesn't allow them between above and below
-			     * 256 */
-			    if ((MORE_ASCII_RESTRICTED
-                                      && (isASCII(c) != isASCII(j)))
-				|| (LOC && ((c < 256) != (j < 256))))
-			    {
-				continue;
-			    }
+                        /* /aa doesn't allow folds between ASCII and non-; /l
+                         * doesn't allow them between above and below 256 */
+                        if ((ASCII_FOLD_RESTRICTED
+                                  && (isASCII(c) != isASCII(j)))
+                            || (LOC && ((c < 256) != (j < 256))))
+                        {
+                            continue;
+                        }
 
-                            /* Folds involving non-ascii Latin1 characters
-                             * under /d are added to a separate list */
-			    if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
-                            {
-				cp_list = add_cp_to_invlist(cp_list, c);
-                            }
-                            else {
-                              depends_list = add_cp_to_invlist(depends_list, c);
-			    }
-			}
-		    }
-		}
+                        /* Folds involving non-ascii Latin1 characters
+                         * under /d are added to a separate list */
+                        if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                        {
+                            cp_list = add_cp_to_invlist(cp_list, c);
+                        }
+                        else {
+                          depends_list = add_cp_to_invlist(depends_list, c);
+                        }
+                    }
+                }
             }
 	}
 	SvREFCNT_dec(fold_intersection);
@@ -12149,7 +12855,7 @@ parseit:
      * fold the classes (folding of those is automatically handled by the swash
      * fetching code) */
     if (posixes) {
-        if (AT_LEAST_UNI_SEMANTICS) {
+        if (! DEPENDS_SEMANTICS) {
             if (cp_list) {
                 _invlist_union(cp_list, posixes, &cp_list);
                 SvREFCNT_dec(posixes);
@@ -12159,7 +12865,6 @@ parseit:
             }
         }
         else {
-
             /* Under /d, we put into a separate list the Latin1 things that
              * match only when the target string is utf8 */
             SV* nonascii_but_latin1_properties = NULL;
@@ -12239,17 +12944,16 @@ parseit:
      * Now we can see about various optimizations.  Fold calculation (which we
      * did above) needs to take place before inversion.  Otherwise /[^k]/i
      * would invert to include K, which under /i would match k, which it
-     * shouldn't. */
+     * shouldn't.  Therefore we can't invert folded locale now, as it won't be
+     * folded until runtime */
 
-    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this does optimize those.  It doesn't
-     * optimize locale.  Doing so perhaps could be done as long as there is
-     * nothing like \w in it; some thought also would have to be given to the
-     * interaction with above 0x100 chars */
+    /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
+     * at compile time.  Besides not inverting folded locale now, we can't
+     * invert if there are things such as \w, which aren't known until runtime
+     * */
     if (invert
-        && ! LOC
+        && ! (LOC && (FOLD || (ANYOF_FLAGS(ret) & ANYOF_CLASS)))
 	&& ! depends_list
-	&& ! unicode_alternate
 	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
     {
         _invlist_invert(cp_list);
@@ -12264,10 +12968,147 @@ parseit:
 	invert = FALSE;
     }
 
+    /* If we didn't do folding, it's because some information isn't available
+     * until runtime; set the run-time fold flag for these.  (We don't have to
+     * worry about properties folding, as that is taken care of by the swash
+     * fetching) */
+    if (FOLD && LOC)
+    {
+       ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
+    }
+
+    /* Some character classes are equivalent to other nodes.  Such nodes take
+     * up less room and generally fewer operations to execute than ANYOF nodes.
+     * Above, we checked for and optimized into some such equivalents for
+     * certain common classes that are easy to test.  Getting to this point in
+     * the code means that the class didn't get optimized there.  Since this
+     * code is only executed in Pass 2, it is too late to save space--it has
+     * been allocated in Pass 1, and currently isn't given back.  But turning
+     * things into an EXACTish node can allow the optimizer to join it to any
+     * adjacent such nodes.  And if the class is equivalent to things like /./,
+     * expensive run-time swashes can be avoided.  Now that we have more
+     * complete information, we can find things necessarily missed by the
+     * earlier code.  I (khw) am not sure how much to look for here.  It would
+     * be easy, but perhaps too slow, to check any candidates against all the
+     * node types they could possibly match using _invlistEQ(). */
+
+    if (cp_list
+        && ! invert
+        && ! depends_list
+        && ! (ANYOF_FLAGS(ret) & ANYOF_CLASS)
+        && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+    {
+       UV start, end;
+       U8 op = END;  /* The optimzation node-type */
+        const char * cur_parse= RExC_parse;
+
+       invlist_iterinit(cp_list);
+       if (! invlist_iternext(cp_list, &start, &end)) {
+
+            /* Here, the list is empty.  This happens, for example, when a
+             * Unicode property is the only thing in the character class, and
+             * it doesn't match anything.  (perluniprops.pod notes such
+             * properties) */
+            op = OPFAIL;
+            *flagp |= HASWIDTH|SIMPLE;
+        }
+        else if (start == end) {    /* The range is a single code point */
+            if (! invlist_iternext(cp_list, &start, &end)
+
+                    /* Don't do this optimization if it would require changing
+                     * the pattern to UTF-8 */
+                && (start < 256 || UTF))
+            {
+                /* Here, the list contains a single code point.  Can optimize
+                 * into an EXACT node */
+
+                value = start;
+
+                if (! FOLD) {
+                    op = EXACT;
+                }
+                else if (LOC) {
+
+                    /* A locale node under folding with one code point can be
+                     * an EXACTFL, as its fold won't be calculated until
+                     * runtime */
+                    op = EXACTFL;
+                }
+                else {
+
+                    /* Here, we are generally folding, but there is only one
+                     * code point to match.  If we have to, we use an EXACT
+                     * node, but it would be better for joining with adjacent
+                     * nodes in the optimization pass if we used the same
+                     * EXACTFish node that any such are likely to be.  We can
+                     * do this iff the code point doesn't participate in any
+                     * folds.  For example, an EXACTF of a colon is the same as
+                     * an EXACT one, since nothing folds to or from a colon. */
+                    if (value < 256) {
+                        if (IS_IN_SOME_FOLD_L1(value)) {
+                            op = EXACT;
+                        }
+                    }
+                    else {
+                        if (! PL_utf8_foldable) {
+                            SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                                &PL_sv_undef, 1, 0);
+                            PL_utf8_foldable = _get_swash_invlist(swash);
+                            SvREFCNT_dec(swash);
+                        }
+                        if (_invlist_contains_cp(PL_utf8_foldable, value)) {
+                            op = EXACT;
+                        }
+                    }
+
+                    /* If we haven't found the node type, above, it means we
+                     * can use the prevailing one */
+                    if (op == END) {
+                        op = compute_EXACTish(pRExC_state);
+                    }
+                }
+            }
+        }
+        else if (start == 0) {
+            if (end == UV_MAX) {
+                op = SANY;
+                *flagp |= HASWIDTH|SIMPLE;
+                RExC_naughty++;
+            }
+            else if (end == '\n' - 1
+                    && invlist_iternext(cp_list, &start, &end)
+                    && start == '\n' + 1 && end == UV_MAX)
+            {
+                op = REG_ANY;
+                *flagp |= HASWIDTH|SIMPLE;
+                RExC_naughty++;
+            }
+        }
+
+        if (op != END) {
+            RExC_parse = (char *)orig_parse;
+            RExC_emit = (regnode *)orig_emit;
+
+            ret = reg_node(pRExC_state, op);
+
+            RExC_parse = (char *)cur_parse;
+
+            if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
+            }
+
+            SvREFCNT_dec(cp_list);
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
+    }
+
     /* Here, <cp_list> contains all the code points we can determine at
      * compile time that match under all conditions.  Go through it, and
      * for things that belong in the bitmap, put them there, and delete from
-     * <cp_list> */
+     * <cp_list>.  While we are at it, see if everything above 255 is in the
+     * list, and if so, set a flag to speed up execution */
+    ANYOF_BITMAP_ZERO(ret);
     if (cp_list) {
 
 	/* This gets set if we actually need to modify things */
@@ -12281,6 +13122,10 @@ parseit:
 	    UV high;
 	    int i;
 
+            if (end == UV_MAX && start <= 256) {
+                ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+            }
+
 	    /* Quit if are above what we should change */
 	    if (start > 255) {
 		break;
@@ -12293,7 +13138,6 @@ parseit:
 	    for (i = start; i <= (int) high; i++) {
 		if (! ANYOF_BITMAP_TEST(ret, i)) {
 		    ANYOF_BITMAP_SET(ret, i);
-		    stored++;
 		    prevvalue = value;
 		    value = i;
 		}
@@ -12307,7 +13151,7 @@ parseit:
 	}
 
 	/* If have completely emptied it, remove it completely */
-	if (invlist_len(cp_list) == 0) {
+	if (_invlist_len(cp_list) == 0) {
 	    SvREFCNT_dec(cp_list);
 	    cp_list = NULL;
 	}
@@ -12317,7 +13161,9 @@ parseit:
         ANYOF_FLAGS(ret) |= ANYOF_INVERT;
     }
 
-    /* Combine the two lists into one. */
+    /* Here, the bitmap has been populated with all the Latin1 code points that
+     * always match.  Can now add to the overall list those that match only
+     * when the target string is UTF-8 (<depends_list>). */
     if (depends_list) {
 	if (cp_list) {
 	    _invlist_union(cp_list, depends_list, &cp_list);
@@ -12328,128 +13174,18 @@ parseit:
 	}
     }
 
-    /* Folding in the bitmap is taken care of above, but not for locale (for
-     * which we have to wait to see what folding is in effect at runtime), and
-     * for some things not in the bitmap (only the upper latin folds in this
-     * case, as all other single-char folding has been set above).  Set
-     * run-time fold flag for these */
-    if (FOLD && (LOC
-		|| (DEPENDS_SEMANTICS
-		    && cp_list
-		    && ! (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-		|| unicode_alternate))
-    {
-	ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
-    }
-
-    /* A single character class can be "optimized" into an EXACTish node.
-     * Note that since we don't currently count how many characters there are
-     * outside the bitmap, we are XXX missing optimization possibilities for
-     * them.  This optimization can't happen unless this is a truly single
-     * character class, which means that it can't be an inversion into a
-     * many-character class, and there must be no possibility of there being
-     * things outside the bitmap.  'stored' (only) for locales doesn't include
-     * \w, etc, so have to make a special test that they aren't present
-     *
-     * Similarly A 2-character class of the very special form like [bB] can be
-     * optimized into an EXACTFish node, but only for non-locales, and for
-     * characters which only have the two folds; so things like 'fF' and 'Ii'
-     * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
-     * FI'. */
-    if (! cp_list
-	&& ! unicode_alternate
-	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
-	&& ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
-        && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-                              || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
-	    || (stored == 2 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
-				 && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
-				 /* If the latest code point has a fold whose
-				  * bit is set, it must be the only other one */
-				&& ((prevvalue = PL_fold_latin1[value]) != (IV)value)
-				 && ANYOF_BITMAP_TEST(ret, prevvalue)))))
-    {
-        /* Note that the information needed to decide to do this optimization
-         * is not currently available until the 2nd pass, and that the actually
-	 * used EXACTish node takes less space than the calculated ANYOF node,
-	 * and hence the amount of space calculated in the first pass is larger
-         * than actually used, so this optimization doesn't gain us any space.
-	 * But an EXACT node is faster than an ANYOF node, and can be combined
-	 * with any adjacent EXACT nodes later by the optimizer for further
-	 * gains.  The speed of executing an EXACTF is similar to an ANYOF
-	 * node, so the optimization advantage comes from the ability to join
-	 * it to adjacent EXACT nodes */
-
-        const char * cur_parse= RExC_parse;
-	U8 op;
-        RExC_emit = (regnode *)orig_emit;
-        RExC_parse = (char *)orig_parse;
-
-	if (stored == 1) {
-
-	    /* A locale node with one point can be folded; all the other cases
-	     * with folding will have two points, since we calculate them above
-	     */
-	    if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
-		 op = EXACTFL;
-	    }
-	    else {
-		op = EXACT;
-	    }
-	}
-	else {   /* else 2 chars in the bit map: the folds of each other */
-
-	    /* Use the folded value, which for the cases where we get here,
-	     * is just the lower case of the current one (which may resolve to
-	     * itself, or to the other one */
-	    value = toLOWER_LATIN1(value);
-
-	    /* To join adjacent nodes, they must be the exact EXACTish type.
-	     * Try to use the most likely type, by using EXACTFA if possible,
-	     * then EXACTFU if the regex calls for it, or is required because
-	     * the character is non-ASCII.  (If <value> is ASCII, its fold is
-	     * also ASCII for the cases where we get here.) */
-	    if (MORE_ASCII_RESTRICTED && isASCII(value)) {
-		op = EXACTFA;
-	    }
-	    else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
-		op = EXACTFU;
-	    }
-	    else {    /* Otherwise, more likely to be EXACTF type */
-		op = EXACTF;
-	    }
-	}
-
-	ret = reg_node(pRExC_state, op);
-        RExC_parse = (char *)cur_parse;
-	if (UTF && ! NATIVE_IS_INVARIANT(value)) {
-	    *STRING(ret)= UTF8_EIGHT_BIT_HI((U8) value);
-	    *(STRING(ret) + 1)= UTF8_EIGHT_BIT_LO((U8) value);
-	    STR_LEN(ret)= 2;
-	    RExC_emit += STR_SZ(2);
-	}
-	else {
-	    *STRING(ret)= (char)value;
-	    STR_LEN(ret)= 1;
-	    RExC_emit += STR_SZ(1);
-	}
-	SvREFCNT_dec(listsv);
-        return ret;
-    }
-
     /* If there is a swash and more than one element, we can't use the swash in
      * the optimization below. */
     if (swash && element_count > 1) {
 	SvREFCNT_dec(swash);
 	swash = NULL;
     }
+
     if (! cp_list
-	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
-	&& ! unicode_alternate)
+	&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
     {
 	ARG_SET(ret, ANYOF_NONBITMAP_EMPTY);
 	SvREFCNT_dec(listsv);
-	SvREFCNT_dec(unicode_alternate);
     }
     else {
 	/* av[0] stores the character class description in its textual form:
@@ -12458,18 +13194,16 @@ parseit:
 	 * av[1] if NULL, is a placeholder to later contain the swash computed
 	 *       from av[0].  But if no further computation need be done, the
 	 *       swash is stored there now.
-	 * av[2] stores the multicharacter foldings, used later in
-	 *       regexec.c:S_reginclass().
-	 * av[3] stores the cp_list inversion list for use in addition or
+	 * av[2] stores the cp_list inversion list for use in addition or
 	 *       instead of av[0]; used only if av[1] is NULL
-	 * av[4] is set if any component of the class is from a user-defined
+	 * av[3] is set if any component of the class is from a user-defined
 	 *       property; used only if av[1] is NULL */
 	AV * const av = newAV();
 	SV *rv;
 
 	av_store(av, 0, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
 			? listsv
-			: &PL_sv_undef);
+			: (SvREFCNT_dec(listsv), &PL_sv_undef));
 	if (swash) {
 	    av_store(av, 1, swash);
 	    SvREFCNT_dec(cp_list);
@@ -12477,27 +13211,18 @@ parseit:
 	else {
 	    av_store(av, 1, NULL);
 	    if (cp_list) {
-		av_store(av, 3, cp_list);
-		av_store(av, 4, newSVuv(has_user_defined_property));
+		av_store(av, 2, cp_list);
+		av_store(av, 3, newSVuv(has_user_defined_property));
 	    }
 	}
 
-        /* Store any computed multi-char folds only if we are allowing
-         * them */
-        if (allow_full_fold) {
-            av_store(av, 2, MUTABLE_SV(unicode_alternate));
-            if (unicode_alternate) { /* This node is variable length */
-                OP(ret) = ANYOFV;
-            }
-        }
-        else {
-            av_store(av, 2, NULL);
-        }
 	rv = newRV_noinc(MUTABLE_SV(av));
 	n = add_data(pRExC_state, 1, "s");
 	RExC_rxi->data->data[n] = (void*)rv;
 	ARG_SET(ret, n);
     }
+
+    *flagp |= HASWIDTH|SIMPLE;
     return ret;
 }
 #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
@@ -12591,7 +13316,7 @@ STATIC regnode *			/* Location. */
 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 {
     dVAR;
-    register regnode *ptr;
+    regnode *ptr;
     regnode * const ret = RExC_emit;
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -12633,7 +13358,7 @@ STATIC regnode *			/* Location. */
 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 {
     dVAR;
-    register regnode *ptr;
+    regnode *ptr;
     regnode * const ret = RExC_emit;
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -12705,9 +13430,9 @@ STATIC void
 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
 {
     dVAR;
-    register regnode *src;
-    register regnode *dst;
-    register regnode *place;
+    regnode *src;
+    regnode *dst;
+    regnode *place;
     const int offset = regarglen[(U8)op];
     const int size = NODE_STEP_REGNODE + offset;
     GET_RE_DEBUG_FLAGS_DECL;
@@ -12793,7 +13518,7 @@ STATIC void
 S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 depth)
 {
     dVAR;
-    register regnode *scan;
+    regnode *scan;
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_REGTAIL;
@@ -12852,7 +13577,7 @@ STATIC U8
 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 depth)
 {
     dVAR;
-    register regnode *scan;
+    regnode *scan;
     U8 exact = PSEUDO;
 #ifdef EXPERIMENTAL_INPLACESCAN
     I32 min = 0;
@@ -13093,7 +13818,43 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 {
 #ifdef DEBUGGING
     dVAR;
-    register int k;
+    int k;
+
+    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
+    static const char * const anyofs[] = {
+        "[\\w]",
+        "[\\W]",
+        "[\\s]",
+        "[\\S]",
+        "[\\d]",
+        "[\\D]",
+        "[:alnum:]",
+        "[:^alnum:]",
+        "[:alpha:]",
+        "[:^alpha:]",
+        "[:ascii:]",
+        "[:^ascii:]",
+        "[:cntrl:]",
+        "[:^cntrl:]",
+        "[:graph:]",
+        "[:^graph:]",
+        "[:lower:]",
+        "[:^lower:]",
+        "[:print:]",
+        "[:^print:]",
+        "[:punct:]",
+        "[:^punct:]",
+        "[:upper:]",
+        "[:^upper:]",
+        "[:xdigit:]",
+        "[:^xdigit:]",
+        "[:space:]",
+        "[:^space:]",
+        "[:blank:]",
+        "[:^blank:]",
+        "[\\v]",
+        "[\\V]"
+    };
     RXi_GET_DECL(prog,progi);
     GET_RE_DEBUG_FLAGS_DECL;
     
@@ -13214,43 +13975,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 	const U8 flags = ANYOF_FLAGS(o);
 	int do_sep = 0;
 
-	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-	static const char * const anyofs[] = {
-	    "\\w",
-	    "\\W",
-	    "\\s",
-	    "\\S",
-	    "\\d",
-	    "\\D",
-	    "[:alnum:]",
-	    "[:^alnum:]",
-	    "[:alpha:]",
-	    "[:^alpha:]",
-	    "[:ascii:]",
-	    "[:^ascii:]",
-	    "[:cntrl:]",
-	    "[:^cntrl:]",
-	    "[:graph:]",
-	    "[:^graph:]",
-	    "[:lower:]",
-	    "[:^lower:]",
-	    "[:print:]",
-	    "[:^print:]",
-	    "[:punct:]",
-	    "[:^punct:]",
-	    "[:upper:]",
-	    "[:^upper:]",
-	    "[:xdigit:]",
-	    "[:^xdigit:]",
-	    "[:space:]",
-	    "[:^space:]",
-	    "[:blank:]",
-	    "[:^blank:]"
-	};
 
 	if (flags & ANYOF_LOCALE)
 	    sv_catpvs(sv, "{loc}");
-	if (flags & ANYOF_LOC_NONBITMAP_FOLD)
+	if (flags & ANYOF_LOC_FOLD)
 	    sv_catpvs(sv, "{i}");
 	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
 	if (flags & ANYOF_INVERT)
@@ -13300,7 +14028,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 
 	if (ANYOF_NONBITMAP(o)) {
 	    SV *lv; /* Set if there is something outside the bit map */
-	    SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
+	    SV * const sw = regclass_swash(prog, o, FALSE, &lv, NULL);
             bool byte_output = FALSE;   /* If something in the bitmap has been
                                            output */
 
@@ -13384,6 +14112,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 
 	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
     }
+    else if (k == POSIXD || k == NPOSIXD) {
+        U8 index = FLAGS(o) * 2;
+        if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
+            Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+        }
+        else {
+            sv_catpv(sv, anyofs[index]);
+        }
+    }
     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
 	Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
 #else
@@ -13398,7 +14135,7 @@ SV *
 Perl_re_intuit_string(pTHX_ REGEXP * const r)
 {				/* Assume that RE_INTUIT is set */
     dVAR;
-    struct regexp *const prog = (struct regexp *)SvANY(r);
+    struct regexp *const prog = ReANY(r);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_RE_INTUIT_STRING;
@@ -13446,7 +14183,7 @@ void
 Perl_pregfree2(pTHX_ REGEXP *rx)
 {
     dVAR;
-    struct regexp *const r = (struct regexp *)SvANY(rx);
+    struct regexp *const r = ReANY(rx);
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_PREGFREE2;
@@ -13456,6 +14193,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     } else {
         CALLREGFREE_PVT(rx); /* free the private data */
         SvREFCNT_dec(RXp_PAREN_NAMES(r));
+	Safefree(r->xpv_len_u.xpvlenu_pv);
     }        
     if (r->substrs) {
         SvREFCNT_dec(r->anchored_substr);
@@ -13465,11 +14203,12 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
 	Safefree(r->substrs);
     }
     RX_MATCH_COPY_FREE(rx);
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
     SvREFCNT_dec(r->saved_copy);
 #endif
     Safefree(r->offs);
     SvREFCNT_dec(r->qr_anoncv);
+    rx->sv_u.svu_rx = 0;
 }
 
 /*  reg_temp_copy()
@@ -13493,26 +14232,42 @@ REGEXP *
 Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
 {
     struct regexp *ret;
-    struct regexp *const r = (struct regexp *)SvANY(rx);
+    struct regexp *const r = ReANY(rx);
+    const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
     if (!ret_x)
 	ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
-    ret = (struct regexp *)SvANY(ret_x);
+    else {
+	SvOK_off((SV *)ret_x);
+	if (islv) {
+	    /* For PVLVs, SvANY points to the xpvlv body while sv_u points
+	       to the regexp.  (For SVt_REGEXPs, sv_upgrade has already
+	       made both spots point to the same regexp body.) */
+	    REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
+	    assert(!SvPVX(ret_x));
+	    ret_x->sv_u.svu_rx = temp->sv_any;
+	    temp->sv_any = NULL;
+	    SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
+	    SvREFCNT_dec(temp);
+	    /* SvCUR still resides in the xpvlv struct, so the regexp copy-
+	       ing below will not set it. */
+	    SvCUR_set(ret_x, SvCUR(rx));
+	}
+    }
+    /* This ensures that SvTHINKFIRST(sv) is true, and hence that
+       sv_force_normal(sv) is called.  */
+    SvFAKE_on(ret_x);
+    ret = ReANY(ret_x);
     
-    (void)ReREFCNT_inc(rx);
-    /* We can take advantage of the existing "copied buffer" mechanism in SVs
-       by pointing directly at the buffer, but flagging that the allocated
-       space in the copy is zero. As we've just done a struct copy, it's now
-       a case of zero-ing that, rather than copying the current length.  */
-    SvPV_set(ret_x, RX_WRAPPED(rx));
-    SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
+    SvFLAGS(ret_x) |= SvUTF8(rx);
+    /* We share the same string buffer as the original regexp, on which we
+       hold a reference count, incremented when mother_re is set below.
+       The string pointer is copied here, being part of the regexp struct.
+     */
     memcpy(&(ret->xpv_cur), &(r->xpv_cur),
 	   sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
-    SvLEN_set(ret_x, 0);
-    SvSTASH_set(ret_x, NULL);
-    SvMAGIC_set(ret_x, NULL);
     if (r->offs) {
         const I32 npar = r->nparens+1;
         Newx(ret->offs, npar, regexp_paren_pair);
@@ -13531,10 +14286,10 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
 	   anchored or float namesakes, and don't hold a second reference.  */
     }
     RX_MATCH_COPIED_off(ret_x);
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
     ret->saved_copy = NULL;
 #endif
-    ret->mother_re = rx;
+    ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
     SvREFCNT_inc_void(ret->qr_anoncv);
     
     return ret_x;
@@ -13557,7 +14312,7 @@ void
 Perl_regfree_internal(pTHX_ REGEXP * const rx)
 {
     dVAR;
-    struct regexp *const r = (struct regexp *)SvANY(rx);
+    struct regexp *const r = ReANY(rx);
     RXi_GET_DECL(r,ri);
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -13678,8 +14433,8 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
 {
     dVAR;
     I32 npar;
-    const struct regexp *r = (const struct regexp *)SvANY(sstr);
-    struct regexp *ret = (struct regexp *)SvANY(dstr);
+    const struct regexp *r = ReANY(sstr);
+    struct regexp *ret = ReANY(dstr);
     
     PERL_ARGS_ASSERT_RE_DUP_GUTS;
 
@@ -13739,25 +14494,18 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
 	ret->subbeg  = SAVEPVN(ret->subbeg, ret->sublen);
     else
 	ret->subbeg = NULL;
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
     ret->saved_copy = NULL;
 #endif
 
-    if (ret->mother_re) {
-	if (SvPVX_const(dstr) == SvPVX_const(ret->mother_re)) {
-	    /* Our storage points directly to our mother regexp, but that's
+    /* Whether mother_re be set or no, we need to copy the string.  We
+       cannot refrain from copying it when the storage points directly to
+       our mother regexp, because that's
 	       1: a buffer in a different thread
 	       2: something we no longer hold a reference on
 	       so we need to copy it locally.  */
-	    /* Note we need to use SvCUR(), rather than
-	       SvLEN(), on our mother_re, because it, in
-	       turn, may well be pointing to its own mother_re.  */
-	    SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
-				   SvCUR(ret->mother_re)+1));
-	    SvLEN_set(dstr, SvCUR(ret->mother_re)+1);
-	}
-	ret->mother_re      = NULL;
-    }
+    RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+    ret->mother_re   = NULL;
     ret->gofs = 0;
 }
 #endif /* PERL_IN_XSUB_RE */
@@ -13780,7 +14528,7 @@ void *
 Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
 {
     dVAR;
-    struct regexp *const r = (struct regexp *)SvANY(rx);
+    struct regexp *const r = ReANY(rx);
     regexp_internal *reti;
     int len;
     RXi_GET_DECL(r,ri);
@@ -13884,10 +14632,10 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
  - regnext - dig the "next" pointer out of a node
  */
 regnode *
-Perl_regnext(pTHX_ register regnode *p)
+Perl_regnext(pTHX_ regnode *p)
 {
     dVAR;
-    register I32 offset;
+    I32 offset;
 
     if (!p)
 	return(NULL);
@@ -13961,11 +14709,13 @@ Perl_save_re_context(pTHX)
 
     PL_reg_oldsaved = NULL;
     PL_reg_oldsavedlen = 0;
+    PL_reg_oldsavedoffset = 0;
+    PL_reg_oldsavedcoffset = 0;
     PL_reg_maxiter = 0;
     PL_reg_leftiter = 0;
     PL_reg_poscache = NULL;
     PL_reg_poscache_size = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
     PL_nrs = NULL;
 #endif
 
@@ -13991,13 +14741,6 @@ Perl_save_re_context(pTHX)
 }
 #endif
 
-static void
-clear_re(pTHX_ void *r)
-{
-    dVAR;
-    ReREFCNT_dec((REGEXP *)r);
-}
-
 #ifdef DEBUGGING
 
 STATIC void
@@ -14046,8 +14789,8 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 	    SV* sv, I32 indent, U32 depth)
 {
     dVAR;
-    register U8 op = PSEUDO;	/* Arbitrary non-END op. */
-    register const regnode *next;
+    U8 op = PSEUDO;	/* Arbitrary non-END op. */
+    const regnode *next;
     const regnode *optstart= NULL;
     
     RXi_GET_DECL(r,ri);
@@ -14098,9 +14841,9 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
 	if (PL_regkind[(U8)op] == BRANCHJ) {
 	    assert(next);
 	    {
-                register const regnode *nnode = (OP(next) == LONGJMP
-					     ? regnext((regnode *)next)
-					     : next);
+                const regnode *nnode = (OP(next) == LONGJMP
+                                       ? regnext((regnode *)next)
+                                       : next);
                 if (last && nnode > last)
                     nnode = last;
                 DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode);