Add functions for adding and deleting stash names

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index b71e546..831d579 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2,7 +2,9 @@
   */
  
  /*
- * "A fair jaw-cracker dwarf-language must be."  --Samwise Gamgee
+ * 'A fair jaw-cracker dwarf-language must be.'            --Samwise Gamgee
+ *
+ *     [p.285 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
   */
  
  /* This file contains functions for compiling a regular expression.  See
@@ -57,7 +59,8 @@
   ****    Alterations to Henry's code are...
   ****
   ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
+ ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ ****    by Larry Wall and others
   ****
   ****    You may distribute under the terms of either the GNU General Public
   ****    License or the Artistic License, as specified in the README file.
@@ -82,6 +85,8 @@
  #  include "regcomp.h"
  #endif
  
+#include "dquote_static.c"
+
  #ifdef op
  #undef op
  #endif /* op */
@@ -129,7 +134,6 @@ typedef struct RExC_state_t {
      I32                orig_utf8;      /* whether the pattern was originally in utf8 */
                                 /* XXX use this for future optimisation of case
                                  * where pattern must be upgraded to utf8. */
-    HV         *charnames;             /* cache of named sequences */
      HV         *paren_names;           /* Paren names */
      
      regnode    **recurse;              /* Recurse regops */
@@ -174,7 +178,6 @@ typedef struct RExC_state_t {
  #define RExC_seen_evals        (pRExC_state->seen_evals)
  #define RExC_utf8      (pRExC_state->utf8)
  #define RExC_orig_utf8 (pRExC_state->orig_utf8)
-#define RExC_charnames  (pRExC_state->charnames)
  #define RExC_open_parens       (pRExC_state->open_parens)
  #define RExC_close_parens      (pRExC_state->close_parens)
  #define RExC_opend     (pRExC_state->opend)
@@ -195,7 +198,10 @@ typedef struct RExC_state_t {
   */
  #define        WORST           0       /* Worst case. */
  #define        HASWIDTH        0x01    /* Known to match non-null strings. */
-#define        SIMPLE          0x02    /* Simple enough to be STAR/PLUS operand. */
+
+/* Simple enough to be STAR/PLUS operand, in an EXACT node must be a single
+ * character, and if utf8, must be invariant. */
+#define        SIMPLE          0x02
  #define        SPSTART         0x04    /* Starts with * or +. */
  #define TRYAGAIN       0x08    /* Weeded out a declaration. */
  #define POSTPONED      0x10    /* (?1),(?&name), (??{...}) or similar */
@@ -217,6 +223,11 @@ typedef struct RExC_state_t {
  #define PAREN_SET(u8str,paren) PBYTE(u8str,paren) |= PBITVAL(paren)
  #define PAREN_UNSET(u8str,paren) PBYTE(u8str,paren) &= (~PBITVAL(paren))
  
+/* If not already in utf8, do a longjmp back to the beginning */
+#define UTF8_LONGJMP 42 /* Choose a value not likely to ever conflict */
+#define REQUIRE_UTF8   STMT_START {                                       \
+                                     if (! UTF) JMPENV_JUMP(UTF8_LONGJMP); \
+                        } STMT_END
  
  /* About scan_data_t.
  
@@ -357,9 +368,10 @@ static const scan_data_t zero_scan_data =
  #define SCF_TRIE_RESTUDY        0x4000 /* Do restudy? */
  #define SCF_SEEN_ACCEPT         0x8000 
  
-#define UTF (RExC_utf8 != 0)
-#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
-#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
+#define UTF cBOOL(RExC_utf8)
+#define LOC cBOOL(RExC_flags & RXf_PMf_LOCALE)
+#define UNI_SEMANTICS cBOOL(RExC_flags & RXf_PMf_UNICODE)
+#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
  
  #define OOB_UNICODE            12345678
  #define OOB_NAMEDCLASS         -1
@@ -472,23 +484,22 @@ static const scan_data_t zero_scan_data =
             (int)offset, RExC_precomp, RExC_precomp + offset);  \
  } STMT_END
  
-#define        vWARN(loc,m) STMT_START {                                       \
+#define        ckWARNreg(loc,m) STMT_START {                                   \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,     \
-           m, (int)offset, RExC_precomp, RExC_precomp + offset);       \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           (int)offset, RExC_precomp, RExC_precomp + offset);          \
  } STMT_END
  
-#define        vWARNdep(loc,m) STMT_START {                                    \
+#define        ckWARNregdep(loc,m) STMT_START {                                \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),         \
-           "%s" REPORT_LOCATION,                                       \
-           m, (int)offset, RExC_precomp, RExC_precomp + offset);       \
+    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),    \
+           m REPORT_LOCATION,                                          \
+           (int)offset, RExC_precomp, RExC_precomp + offset);          \
  } STMT_END
  
-
-#define        vWARN2(loc, m, a1) STMT_START {                                 \
+#define        ckWARN2reg(loc, m, a1) STMT_START {                             \
      const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
             a1, (int)offset, RExC_precomp, RExC_precomp + offset);      \
  } STMT_END
  
@@ -498,12 +509,24 @@ static const scan_data_t zero_scan_data =
             a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);  \
  } STMT_END
  
+#define        ckWARN3reg(loc, m, a1, a2) STMT_START {                         \
+    const IV offset = loc - RExC_precomp;                              \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);  \
+} STMT_END
+
  #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
      const IV offset = loc - RExC_precomp;                              \
      Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
             a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
  } STMT_END
  
+#define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
+    const IV offset = loc - RExC_precomp;                              \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
+} STMT_END
+
  #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
      const IV offset = loc - RExC_precomp;                              \
      Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
@@ -632,6 +655,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min
      const STRLEN old_l = CHR_SVLEN(*data->longest);
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_SCAN_COMMIT;
+
      if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
         SvSetMagicSV(*data->longest, data->last_found);
         if (*data->longest == data->longest_fixed) {
@@ -678,6 +703,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min
  STATIC void
  S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
  {
+    PERL_ARGS_ASSERT_CL_ANYTHING;
+
      ANYOF_CLASS_ZERO(cl);
      ANYOF_BITMAP_SETALL(cl);
      cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
@@ -691,6 +718,8 @@ S_cl_is_anything(const struct regnode_charclass_class *cl)
  {
      int value;
  
+    PERL_ARGS_ASSERT_CL_IS_ANYTHING;
+
      for (value = 0; value <= ANYOF_MAX; value += 2)
         if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
             return 1;
@@ -705,6 +734,8 @@ S_cl_is_anything(const struct regnode_charclass_class *cl)
  STATIC void
  S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
  {
+    PERL_ARGS_ASSERT_CL_INIT;
+
      Zero(cl, 1, struct regnode_charclass_class);
      cl->type = ANYOF;
      cl_anything(pRExC_state, cl);
@@ -713,6 +744,8 @@ S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
  STATIC void
  S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
  {
+    PERL_ARGS_ASSERT_CL_INIT_ZERO;
+
      Zero(cl, 1, struct regnode_charclass_class);
      cl->type = ANYOF;
      cl_anything(pRExC_state, cl);
@@ -726,6 +759,7 @@ STATIC void
  S_cl_and(struct regnode_charclass_class *cl,
         const struct regnode_charclass_class *and_with)
  {
+    PERL_ARGS_ASSERT_CL_AND;
  
      assert(and_with->type == ANYOF);
      if (!(and_with->flags & ANYOF_CLASS)
@@ -764,6 +798,8 @@ S_cl_and(struct regnode_charclass_class *cl,
  STATIC void
  S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
  {
+    PERL_ARGS_ASSERT_CL_OR;
+
      if (or_with->flags & ANYOF_INVERT) {
         /* We do not use
          * (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
@@ -845,7 +881,7 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
    Dumps the final compressed table form of the trie to Perl_debug_log.
    Used for debugging make_trie().
  */
- 
+
  STATIC void
  S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
             AV *revcharmap, U32 depth)
@@ -853,8 +889,10 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
      U32 state;
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
+    U16 word;
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_DUMP_TRIE;
  
      PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ",
          (int)depth * 2 + 2,"",
@@ -921,6 +959,13 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
          }
          PerlIO_printf( Perl_debug_log, "\n" );
      }
+    PerlIO_printf(Perl_debug_log, "%*sword_info N:(prev,len)=", (int)depth*2, "");
+    for (word=1; word <= trie->wordcount; word++) {
+       PerlIO_printf(Perl_debug_log, " %d:(%d,%d)",
+           (int)word, (int)(trie->wordinfo[word].prev),
+           (int)(trie->wordinfo[word].len));
+    }
+    PerlIO_printf(Perl_debug_log, "\n" );
  }    
  /*
    Dumps a fully constructed but uncompressed trie in list form.
@@ -937,6 +982,9 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST;
+
      /* print out the table precompression.  */
      PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
          (int)depth * 2 + 2,"", (int)depth * 2 + 2,"",
@@ -992,6 +1040,8 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
      SV *sv=sv_newmortal();
      int colwidth= widecharmap ? 6 : 4;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_TABLE;
      
      /*
         print out the table precompression so that we can do a visual check
@@ -1046,6 +1096,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
  
  #endif
  
+
  /* make_trie(startbranch,first,last,tail,word_count,flags,depth)
    startbranch: the first branch in the whole branch sequence
    first      : start branch of sequence of branch-exact nodes.
@@ -1226,8 +1277,6 @@ is the recommended Unicode-aware way of saying
      U16 dupe= trie->states[ state ].wordnum;                    \
      regnode * const noper_next = regnext( noper );              \
                                                                  \
-    if (trie->wordlen)                                          \
-        trie->wordlen[ curword ] = wordlen;                     \
      DEBUG_r({                                                   \
          /* store the word for dumping */                        \
          SV* tmp;                                                \
@@ -1239,6 +1288,9 @@ is the recommended Unicode-aware way of saying
      });                                                         \
                                                                  \
      curword++;                                                  \
+    trie->wordinfo[curword].prev   = 0;                         \
+    trie->wordinfo[curword].len    = wordlen;                   \
+    trie->wordinfo[curword].accept = state;                     \
                                                                  \
      if ( noper_next < tail ) {                                  \
          if (!trie->jump)                                        \
@@ -1251,16 +1303,11 @@ is the recommended Unicode-aware way of saying
      }                                                           \
                                                                  \
      if ( dupe ) {                                               \
-        /* So it's a dupe. This means we need to maintain a   */\
-        /* linked-list from the first to the next.            */\
-        /* we only allocate the nextword buffer when there    */\
-        /* a dupe, so first time we have to do the allocation */\
-        if (!trie->nextword)                                    \
-            trie->nextword = (U16 *)                                   \
-               PerlMemShared_calloc( word_count + 1, sizeof(U16));     \
-        while ( trie->nextword[dupe] )                          \
-            dupe= trie->nextword[dupe];                         \
-        trie->nextword[dupe]= curword;                          \
+        /* It's a dupe. Pre-insert into the wordinfo[].prev   */\
+        /* chain, so that when the bits of chain are later    */\
+        /* linked together, the dups appear in the chain      */\
+       trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
+       trie->wordinfo[dupe].prev = curword;                    \
      } else {                                                    \
          /* we haven't inserted this word yet.                */ \
          trie->states[ state ].wordnum = curword;                \
@@ -1298,6 +1345,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
      regnode *jumper = NULL;
      regnode *nextbranch = NULL;
      regnode *convert = NULL;
+    U32 *prev_states; /* temp array mapping each state to previous one */
      /* we just use folder as a flag in utf8 */
      const U8 * const folder = ( flags == EXACTF
                         ? PL_fold
@@ -1319,6 +1367,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
  #endif
      SV *re_trie_maxbuff;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_MAKE_TRIE;
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
@@ -1331,6 +1381,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
      trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
      if (!(UTF && folder))
         trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
+    trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
+                       trie->wordcount+1, sizeof(reg_trie_wordinfo));
+
      DEBUG_r({
          trie_words = newAV();
      });
@@ -1463,7 +1516,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                 (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
                 (int)trie->minlen, (int)trie->maxlen )
      );
-    trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) );
  
      /*
          We now know what we are dealing with in terms of unique chars and
@@ -1487,6 +1539,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
      */
  
  
+    Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32);
+    prev_states[1] = 0;
+
      if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
          /*
              Second Pass -- Array Of Lists Representation
@@ -1557,6 +1612,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                          }
                          if ( ! newstate ) {
                              newstate = next_alloc++;
+                           prev_states[newstate] = state;
                              TRIE_LIST_PUSH( state, charid, newstate );
                              transcount++;
                          }
@@ -1740,6 +1796,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                          if ( !trie->trans[ state + charid ].next ) {
                              trie->trans[ state + charid ].next = next_alloc;
                              trie->trans[ state ].check++;
+                           prev_states[TRIE_NODENUM(next_alloc)]
+                                   = TRIE_NODENUM(state);
                              next_alloc += trie->uniquecharcount;
                          }
                          state = trie->trans[ state + charid ].next;
@@ -1887,9 +1945,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         PerlMemShared_realloc( trie->trans, trie->lasttrans
                                * sizeof(reg_trie_trans) );
  
-    /* and now dump out the compressed format */
-    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
-
      {   /* Modify the program and insert the new TRIE node*/ 
          U8 nodetype =(U8)(flags & 0xFF);
          char *str=NULL;
@@ -1974,7 +2029,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                                      if ( folder )
                                          TRIE_BITMAP_SET(trie, folder[ *ch ]);
                                      DEBUG_OPTIMISE_r(
-                                        PerlIO_printf(Perl_debug_log, (char*)ch)
+                                        PerlIO_printf(Perl_debug_log, "%s", (char*)ch)
                                      );
                                 }
                             }
@@ -2019,6 +2074,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     break;
                 }
             }
+           trie->prefixlen = (state-1);
              if (str) {
                  regnode *n = convert+NODE_SZ_STR(convert);
                  NEXT_OFF(convert) = NODE_SZ_STR(convert);
@@ -2114,6 +2170,42 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
              Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
          });
      } /* end node insert */
+
+    /*  Finish populating the prev field of the wordinfo array.  Walk back
+     *  from each accept state until we find another accept state, and if
+     *  so, point the first word's .prev field at the second word. If the
+     *  second already has a .prev field set, stop now. This will be the
+     *  case either if we've already processed that word's accept state,
+     *  or that that state had multiple words, and the overspill words
+     *  were already linked up earlier.
+     */
+    {
+       U16 word;
+       U32 state;
+       U16 prev;
+
+       for (word=1; word <= trie->wordcount; word++) {
+           prev = 0;
+           if (trie->wordinfo[word].prev)
+               continue;
+           state = trie->wordinfo[word].accept;
+           while (state) {
+               state = prev_states[state];
+               if (!state)
+                   break;
+               prev = trie->states[state].wordnum;
+               if (prev)
+                   break;
+           }
+           trie->wordinfo[word].prev = prev;
+       }
+       Safefree(prev_states);
+    }
+
+
+    /* and now dump out the compressed format */
+    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
+
      RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
  #ifdef DEBUGGING
      RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
@@ -2163,6 +2255,8 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
      reg_ac_data *aho;
      const U32 data_slot = add_data( pRExC_state, 1, "T" );
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE;
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
@@ -2279,6 +2373,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags
  #else
      PERL_UNUSED_ARG(depth);
  #endif
+
+    PERL_ARGS_ASSERT_JOIN_EXACT;
  #ifndef EXPERIMENTAL_INPLACESCAN
      PERL_UNUSED_ARG(flags);
      PERL_UNUSED_ARG(val);
@@ -2489,9 +2585,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      regnode *first_non_open = scan;
      I32 stopmin = I32_MAX;
      scan_frame *frame = NULL;
-
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_STUDY_CHUNK;
+
  #ifdef DEBUGGING
      StructCopy(&zero_scan_data, &data_fake, scan_data_t);
  #endif
@@ -2793,13 +2890,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  }
                              } else {
  /* 
-    Currently we assume that the trie can handle unicode and ascii
-    matches fold cased matches. If this proves true then the following
-    define will prevent tries in this situation. 
-    
-    #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
-*/
+    Currently we do not believe that the trie logic can
+    handle case insensitive matching properly when the
+    pattern is not unicode (thus forcing unicode semantics).
+
+    If/when this is fixed the following define can be swapped
+    in below to fully enable trie logic.
+
  #define TRIE_TYPE_IS_SAFE 1
+
+*/
+#define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
+
                                  if ( last && TRIE_TYPE_IS_SAFE ) {
                                      make_trie( pRExC_state, 
                                              startbranch, first, cur, tail, count, 
@@ -3025,7 +3127,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             }
             flags &= ~SCF_DO_STCLASS;
         }
-       else if (strchr((const char*)PL_varies,OP(scan))) {
+       else if (REGNODE_VARIES(OP(scan))) {
             I32 mincount, maxcount, minnext, deltanext, fl = 0;
             I32 f = flags, pos_before = 0;
             regnode * const oscan = scan;
@@ -3105,7 +3207,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 /* These are the cases when once a subexpression
                    fails at a particular position, it cannot succeed
                    even after backtracking at the enclosing scope.
-               
+
                    XXXX what if minimal match and we are at the
                         initial run of {n,m}? */
                 if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
@@ -3150,11 +3252,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     (next_is_eval || !(mincount == 0 && maxcount == 1))
                     && (minnext == 0) && (deltanext == 0)
                     && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                   && maxcount <= REG_INFTY/3 /* Complement check for big count */
-                   && ckWARN(WARN_REGEXP))
+                   && maxcount <= REG_INFTY/3) /* Complement check for big count */
                 {
-                   vWARN(RExC_parse,
-                         "Quantifier unexpected on zero-length expression");
+                   ckWARNreg(RExC_parse,
+                             "Quantifier unexpected on zero-length expression");
                 }
  
                 min += minnext * mincount;
@@ -3178,7 +3279,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                     /* Skip open. */
                     nxt = regnext(nxt);
-                   if (!strchr((const char*)PL_simple,OP(nxt))
+                   if (!REGNODE_SIMPLE(OP(nxt))
                         && !(PL_regkind[OP(nxt)] == EXACT
                              && STR_LEN(nxt) == 1))
                         goto nogo;
@@ -3199,11 +3300,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
  #ifdef DEBUGGING
                     OP(nxt1 + 1) = OPTIMIZED; /* was count. */
-                   NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
-                   NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */
+                   NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
+                   NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
                     OP(nxt) = OPTIMIZED;        /* was CLOSE. */
                     OP(nxt + 1) = OPTIMIZED; /* was count. */
-                   NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
+                   NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
  #endif
                 }
               nogo:
@@ -3226,12 +3327,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         nxt = nxt2;
                     OP(nxt2)  = SUCCEED; /* Whas WHILEM */
                     /* Need to optimize away parenths. */
-                   if (data->flags & SF_IN_PAR) {
+                   if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
                         /* Set the parenth number.  */
                         regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
  
-                       if (OP(nxt) != CLOSE)
-                           FAIL("Panic opt close");
                         oscan->flags = (U8)ARG(nxt);
                         if (RExC_open_parens) {
                             RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
@@ -3249,7 +3348,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  #if 0
                         while ( nxt1 && (OP(nxt1) != WHILEM)) {
                             regnode *nnxt = regnext(nxt1);
-                       
                             if (nnxt == nxt) {
                                 if (reg_off_by_arg[OP(nxt1)])
                                     ARG_SET(nxt1, nxt2 - nxt1);
@@ -3316,7 +3414,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                         if (UTF)
                             old = utf8_hop((U8*)s, old) - (U8*)s;
-                       
                         l -= old;
                         /* Get the added string: */
                         last_str = newSVpvn_utf8(s  + old, l, UTF);
@@ -3404,13 +3501,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 if (flags & SCF_DO_STCLASS_AND) {
                      for (value = 0; value < 256; value++)
                          if (!is_VERTWS_cp(value))
-                            ANYOF_BITMAP_CLEAR(data->start_class, value);  
-                }                                                              
-                else {                                                         
+                            ANYOF_BITMAP_CLEAR(data->start_class, value);
+                }
+                else {
                      for (value = 0; value < 256; value++)
                          if (is_VERTWS_cp(value))
-                            ANYOF_BITMAP_SET(data->start_class, value);           
-                }                                                              
+                            ANYOF_BITMAP_SET(data->start_class, value);
+                }
                  if (flags & SCF_DO_STCLASS_OR)
                     cl_and(data->start_class, and_withp);
                 flags &= ~SCF_DO_STCLASS;
@@ -3423,7 +3520,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 data->pos_delta += 1;
                 data->longest = &(data->longest_float);
             }
-           
         }
         else if (OP(scan) == FOLDCHAR) {
             int d = ARG(scan)==0xDF ? 1 : 2;
@@ -3437,7 +3533,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 data->longest = &(data->longest_float);
             }
         }
-       else if (strchr((const char*)PL_simple,OP(scan))) {
+       else if (REGNODE_SIMPLE(OP(scan))) {
             int value = 0;
  
             if (flags & SCF_DO_SUBSTR) {
@@ -3481,19 +3577,37 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (flags & SCF_DO_STCLASS_AND) {
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
-                           for (value = 0; value < 256; value++)
-                               if (!isALNUM(value))
-                                   ANYOF_BITMAP_CLEAR(data->start_class, value);
+                            if (FLAGS(scan) & USE_UNI) {
+                                for (value = 0; value < 256; value++) {
+                                    if (!isWORDCHAR_L1(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            } else {
+                                for (value = 0; value < 256; value++) {
+                                    if (!isALNUM(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            }
                         }
                     }
                     else {
                         if (data->start_class->flags & ANYOF_LOCALE)
                             ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
-                       else {
-                           for (value = 0; value < 256; value++)
-                               if (isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
-                       }
+                        else if (FLAGS(scan) & USE_UNI) {
+                            for (value = 0; value < 256; value++) {
+                                if (isWORDCHAR_L1(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
+                        } else {
+                            for (value = 0; value < 256; value++) {
+                                if (isALNUM(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
+                        }
                     }
                     break;
                 case ALNUML:
@@ -3510,9 +3624,19 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (flags & SCF_DO_STCLASS_AND) {
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
-                           for (value = 0; value < 256; value++)
-                               if (isALNUM(value))
-                                   ANYOF_BITMAP_CLEAR(data->start_class, value);
+                            if (FLAGS(scan) & USE_UNI) {
+                                for (value = 0; value < 256; value++) {
+                                    if (isWORDCHAR_L1(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            } else {
+                                for (value = 0; value < 256; value++) {
+                                    if (isALNUM(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                           }
                         }
                     }
                     else {
@@ -3521,7 +3645,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         else {
                             for (value = 0; value < 256; value++)
                                 if (!isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
+                                   ANYOF_BITMAP_SET(data->start_class, value);
                         }
                     }
                     break;
@@ -3539,18 +3663,37 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (flags & SCF_DO_STCLASS_AND) {
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
-                           for (value = 0; value < 256; value++)
-                               if (!isSPACE(value))
-                                   ANYOF_BITMAP_CLEAR(data->start_class, value);
+                           if (FLAGS(scan) & USE_UNI) {
+                                for (value = 0; value < 256; value++) {
+                                    if (!isSPACE_L1(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            } else {
+                                for (value = 0; value < 256; value++) {
+                                    if (!isSPACE(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            }
                         }
                     }
                     else {
-                       if (data->start_class->flags & ANYOF_LOCALE)
+                        if (data->start_class->flags & ANYOF_LOCALE) {
                             ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
-                       else {
-                           for (value = 0; value < 256; value++)
-                               if (isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
+                        }
+                        else if (FLAGS(scan) & USE_UNI) {
+                            for (value = 0; value < 256; value++) {
+                                if (isSPACE_L1(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
+                        } else {
+                            for (value = 0; value < 256; value++) {
+                                if (isSPACE(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
                         }
                     }
                     break;
@@ -3568,19 +3711,38 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     if (flags & SCF_DO_STCLASS_AND) {
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
-                           for (value = 0; value < 256; value++)
-                               if (isSPACE(value))
-                                   ANYOF_BITMAP_CLEAR(data->start_class, value);
+                            if (FLAGS(scan) & USE_UNI) {
+                                for (value = 0; value < 256; value++) {
+                                    if (isSPACE_L1(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            } else {
+                                for (value = 0; value < 256; value++) {
+                                    if (isSPACE(value)) {
+                                        ANYOF_BITMAP_CLEAR(data->start_class, value);
+                                    }
+                                }
+                            }
                         }
                     }
                     else {
                         if (data->start_class->flags & ANYOF_LOCALE)
                             ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
-                       else {
-                           for (value = 0; value < 256; value++)
-                               if (!isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
-                       }
+                        else if (FLAGS(scan) & USE_UNI) {
+                            for (value = 0; value < 256; value++) {
+                                if (!isSPACE_L1(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
+                        }
+                        else {
+                            for (value = 0; value < 256; value++) {
+                                if (!isSPACE(value)) {
+                                    ANYOF_BITMAP_SET(data->start_class, value);
+                                }
+                            }
+                        }
                     }
                     break;
                 case NSPACEL:
@@ -3610,7 +3772,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         else {
                             for (value = 0; value < 256; value++)
                                 if (isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
+                                   ANYOF_BITMAP_SET(data->start_class, value);
                         }
                     }
                     break;
@@ -3627,7 +3789,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         else {
                             for (value = 0; value < 256; value++)
                                 if (!isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                 
+                                   ANYOF_BITMAP_SET(data->start_class, value);
                         }
                     }
                     break;
@@ -3698,11 +3860,22 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      data->whilem_c = data_fake.whilem_c;
                  }
                  if (f & SCF_DO_STCLASS_AND) {
-                    const int was = (data->start_class->flags & ANYOF_EOS);
-
-                    cl_and(data->start_class, &intrnl);
-                    if (was)
-                        data->start_class->flags |= ANYOF_EOS;
+                   if (flags & SCF_DO_STCLASS_OR) {
+                       /* OR before, AND after: ideally we would recurse with
+                        * data_fake to get the AND applied by study of the
+                        * remainder of the pattern, and then derecurse;
+                        * *** HACK *** for now just treat as "no information".
+                        * See [perl #56690].
+                        */
+                       cl_init(pRExC_state, data->start_class);
+                   }  else {
+                       /* AND before and after: combine and continue */
+                       const int was = (data->start_class->flags & ANYOF_EOS);
+
+                       cl_and(data->start_class, &intrnl);
+                       if (was)
+                           data->start_class->flags |= ANYOF_EOS;
+                   }
                  }
             }
  #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
@@ -4049,6 +4222,8 @@ S_add_data(RExC_state_t *pRExC_state, U32 n, const char *s)
  {
      U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
  
+    PERL_ARGS_ASSERT_ADD_DATA;
+
      Renewc(RExC_rxi->data,
            sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
            char, struct reg_data);
@@ -4128,10 +4303,13 @@ extern const struct regexp_engine my_reg_engine;
  
  #ifndef PERL_IN_XSUB_RE 
  REGEXP *
-Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
+Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
  {
      dVAR;
      HV * const table = GvHV(PL_hintgv);
+
+    PERL_ARGS_ASSERT_PREGCOMP;
+
      /* Dispatch a request to compile a regexp to correct 
         regexp engine. */
      if (table) {
@@ -4151,41 +4329,87 @@ Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
  #endif
  
  REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
+Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags)
  {
      dVAR;
      REGEXP *rx;
      struct regexp *r;
      register regexp_internal *ri;
      STRLEN plen;
-    char*  exp = SvPV((SV*)pattern, plen);
-    char* xend = exp + plen;
+    char  *exp;
+    char* xend;
      regnode *scan;
      I32 flags;
      I32 minlen = 0;
      I32 sawplus = 0;
      I32 sawopen = 0;
+    U8 jump_ret = 0;
+    dJMPENV;
      scan_data_t data;
      RExC_state_t RExC_state;
      RExC_state_t * const pRExC_state = &RExC_state;
  #ifdef TRIE_STUDY_OPT    
-    int restudied= 0;
+    int restudied;
      RExC_state_t copyRExC_state;
  #endif    
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_RE_COMPILE;
+
      DEBUG_r(if (!PL_colorset) reginitcolors());
  
-    RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
+    RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
  
-    DEBUG_COMPILE_r({
-        SV *dsv= sv_newmortal();
-        RE_PV_QUOTED_DECL(s, RExC_utf8,
-            dsv, exp, plen, 60);
-        PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
-                      PL_colors[4],PL_colors[5],s);
-    });
  
-redo_first_pass:
+    /* Longjmp back to here if have to switch in midstream to utf8 */
+    if (! RExC_orig_utf8) {
+       JMPENV_PUSH(jump_ret);
+    }
+
+    if (jump_ret == 0) {    /* First time through */
+        exp = SvPV(pattern, plen);
+        xend = exp + plen;
+
+        DEBUG_COMPILE_r({
+            SV *dsv= sv_newmortal();
+            RE_PV_QUOTED_DECL(s, RExC_utf8,
+                dsv, exp, plen, 60);
+            PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
+                           PL_colors[4],PL_colors[5],s);
+        });
+    }
+    else {  /* longjumped back */
+        STRLEN len = plen;
+
+        /* If the cause for the longjmp was other than changing to utf8, pop
+         * our own setjmp, and longjmp to the correct handler */
+       if (jump_ret != UTF8_LONGJMP) {
+           JMPENV_POP;
+           JMPENV_JUMP(jump_ret);
+       }
+
+       GET_RE_DEBUG_FLAGS;
+
+        /* It's possible to write a regexp in ascii that represents Unicode
+        codepoints outside of the byte range, such as via \x{100}. If we
+        detect such a sequence we have to convert the entire pattern to utf8
+        and then recompile, as our sizing calculation will have been based
+        on 1 byte == 1 character, but we will need to use utf8 to encode
+        at least some part of the pattern, and therefore must convert the whole
+        thing.
+        -- dmq */
+        DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+           "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pattern, plen), &len);
+        xend = exp + len;
+        RExC_orig_utf8 = RExC_utf8 = 1;
+        SAVEFREEPV(exp);
+    }
+
+#ifdef TRIE_STUDY_OPT
+    restudied = 0;
+#endif
+
      RExC_precomp = exp;
      RExC_flags = pm_flags;
      RExC_sawback = 0;
@@ -4205,7 +4429,6 @@ redo_first_pass:
      RExC_size = 0L;
      RExC_emit = &PL_regdummy;
      RExC_whilem_seen = 0;
-    RExC_charnames = NULL;
      RExC_open_parens = NULL;
      RExC_close_parens = NULL;
      RExC_opend = NULL;
@@ -4225,24 +4448,14 @@ redo_first_pass:
         RExC_precomp = NULL;
         return(NULL);
      }
-    if (RExC_utf8 && !RExC_orig_utf8) {
-        /* It's possible to write a regexp in ascii that represents Unicode
-        codepoints outside of the byte range, such as via \x{100}. If we
-        detect such a sequence we have to convert the entire pattern to utf8
-        and then recompile, as our sizing calculation will have been based
-        on 1 byte == 1 character, but we will need to use utf8 to encode
-        at least some part of the pattern, and therefore must convert the whole
-        thing.
-        XXX: somehow figure out how to make this less expensive...
-        -- dmq */
-        STRLEN len = plen;
-        DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
-           "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
-        xend = exp + len;
-        RExC_orig_utf8 = RExC_utf8;
-        SAVEFREEPV(exp);
-        goto redo_first_pass;
+
+    /* Here, finished first pass.  Get rid of our setjmp, which we added for
+     * efficiency only if the passed-in string wasn't in utf8, as shown by
+     * RExC_orig_utf8.  But if the first pass was redone, that variable will be
+     * 1 here even though the original string wasn't utf8, but in this case
+     * there will have been a long jump */
+    if (jump_ret == UTF8_LONGJMP || ! RExC_orig_utf8) {
+       JMPENV_POP;
      }
      DEBUG_PARSE_r({
          PerlIO_printf(Perl_debug_log, 
@@ -4264,7 +4477,7 @@ redo_first_pass:
      /* Allocate space and zero-initialize. Note, the two step process 
         of zeroing when in debug mode, thus anything assigned has to 
         happen after that */
-    rx = newSV_type(SVt_REGEXP);
+    rx = (REGEXP*) newSV_type(SVt_REGEXP);
      r = (struct regexp*)SvANY(rx);
      Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
          char, regexp_internal);
@@ -4284,38 +4497,56 @@ redo_first_pass:
      r->extflags = pm_flags;
      {
          bool has_p     = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
-       bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+        bool has_charset = cBOOL(r->extflags & (RXf_PMf_LOCALE|RXf_PMf_UNICODE));
+
+        /* The caret is output if there are any defaults: if not all the STD
+         * flags are set, or if no character set specifier is needed */
+        bool has_default =
+                    (((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD)
+                    || ! has_charset);
         bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
         U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD)
                             >> RXf_PMf_STD_PMMOD_SHIFT);
         const char *fptr = STD_PAT_MODS;        /*"msix"*/
         char *p;
-       const STRLEN wraplen = plen + has_minus + has_p + has_runon
+        /* Allocate for the worst case, which is all the std flags are turned
+         * on.  If more precision is desired, we could do a population count of
+         * the flags set.  This could be done with a small lookup table, or by
+         * shifting, masking and adding, or even, when available, assembly
+         * language for a machine-language population count.
+         * We never output a minus, as all those are defaults, so are
+         * covered by the caret */
+       const STRLEN wraplen = plen + has_p + has_runon
+            + has_default       /* If needs a caret */
+            + has_charset       /* If needs a character set specifier */
              + (sizeof(STD_PAT_MODS) - 1)
              + (sizeof("(?:)") - 1);
  
-       p = sv_grow(rx, wraplen + 1);
-       SvCUR_set(rx, wraplen);
+        p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
         SvPOK_on(rx);
+       SvFLAGS(rx) |= SvUTF8(pattern);
          *p++='('; *p++='?';
+
+        /* If a default, cover it using the caret */
+        if (has_default) {
+            *p++= DEFAULT_PAT_MOD;
+        }
+        if (has_charset) {
+            if (r->extflags & RXf_PMf_LOCALE) {
+                *p++ = LOCALE_PAT_MOD;
+            } else {
+                *p++ = UNICODE_PAT_MOD;
+            }
+        }
          if (has_p)
              *p++ = KEEPCOPY_PAT_MOD; /*'p'*/
          {
-            char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
-            char *colon = r + 1;
              char ch;
-
              while((ch = *fptr++)) {
                  if(reganch & 1)
                      *p++ = ch;
-                else
-                    *r-- = ch;
                  reganch >>= 1;
              }
-            if(has_minus) {
-                *r = '-';
-                p = colon;
-            }
          }
  
          *p++ = ':';
@@ -4327,6 +4558,7 @@ redo_first_pass:
              *p++ = '\n';
          *p++ = ')';
          *p = 0;
+       SvCUR_set(rx, p - SvPVX_const(rx));
      }
  
      r->intflags = 0;
@@ -4382,7 +4614,10 @@ reStudy:
      Zero(r->substrs, 1, struct reg_substr_data);
  
  #ifdef TRIE_STUDY_OPT
-    if ( restudied ) {
+    if (!restudied) {
+        StructCopy(&zero_scan_data, &data, scan_data_t);
+        copyRExC_state = RExC_state;
+    } else {
          U32 seen=RExC_seen;
          DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n"));
          
@@ -4397,9 +4632,6 @@ reStudy:
             SvREFCNT_dec(data.last_found);
         }
         StructCopy(&zero_scan_data, &data, scan_data_t);
-    } else {
-        StructCopy(&zero_scan_data, &data, scan_data_t);
-        copyRExC_state = RExC_state;
      }
  #else
      StructCopy(&zero_scan_data, &data, scan_data_t);
@@ -4410,7 +4642,7 @@ reStudy:
      /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
   
      if (UTF)
-        r->extflags |= RXf_UTF8;       /* Unicode in it? */
+       SvUTF8_on(rx);  /* Unicode in it? */
      ri->regstclass = NULL;
      if (RExC_naughty >= 10)    /* Probably an expensive pattern. */
         r->intflags |= PREGf_NAUGHTY;
@@ -4427,7 +4659,17 @@ reStudy:
          regnode *first= scan;
          regnode *first_next= regnext(first);
         
-       /* Skip introductions and multiplicators >= 1. */
+       /*
+        * Skip introductions and multiplicators >= 1
+        * so that we can extract the 'meat' of the pattern that must 
+        * match in the large if() sequence following.
+        * NOTE that EXACT is NOT covered here, as it is normally
+        * picked up by the optimiser separately. 
+        *
+        * This is unfortunate as the optimiser isnt handling lookahead
+        * properly currently.
+        *
+        */
         while ((OP(first) == OPEN && (sawopen = 1)) ||
                /* An OR of *one* alternative - should not happen now. */
             (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
@@ -4439,16 +4681,17 @@ reStudy:
             (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
             (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
         {
-               
+               /* 
+                * the only op that could be a regnode is PLUS, all the rest
+                * will be regnode_1 or regnode_2.
+                *
+                */
                 if (OP(first) == PLUS)
                     sawplus = 1;
                 else
                     first += regarglen[OP(first)];
-               if (OP(first) == IFMATCH) {
-                   first = NEXTOPER(first);
-                   first += EXTRA_STEP_2ARGS;
-               } else  /* XXX possible optimisation for /(?=)/  */
-                   first = NEXTOPER(first);
+               
+               first = NEXTOPER(first);
                 first_next= regnext(first);
         }
  
@@ -4484,7 +4727,7 @@ reStudy:
             ri->regstclass = trie_op;
         }
  #endif 
-       else if (strchr((const char*)PL_simple,OP(first)))
+       else if (REGNODE_SIMPLE(OP(first)))
             ri->regstclass = first;
         else if (PL_regkind[OP(first)] == BOUND ||
                  PL_regkind[OP(first)] == NBOUND)
@@ -4792,9 +5035,9 @@ reStudy:
      if (RExC_seen & REG_SEEN_CUTGROUP)
         r->intflags |= PREGf_CUTGROUP_SEEN;
      if (RExC_paren_names)
-        r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
+        RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
      else
-        r->paren_names = NULL;
+        RXp_PAREN_NAMES(r) = NULL;
  
  #ifdef STUPID_PATTERN_CHECKS            
      if (RX_PRELEN(rx) == 0)
@@ -4825,7 +5068,7 @@ reStudy:
  #endif
  #ifdef DEBUGGING
      if (RExC_paren_names) {
-        ri->name_list_idx = add_data( pRExC_state, 1, "p" );
+        ri->name_list_idx = add_data( pRExC_state, 1, "a" );
          ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
      } else
  #endif
@@ -4868,12 +5111,14 @@ SV*
  Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
                      const U32 flags)
  {
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF;
+
      PERL_UNUSED_ARG(value);
  
      if (flags & RXapif_FETCH) {
          return reg_named_buff_fetch(rx, key, flags);
      } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
-        Perl_croak(aTHX_ PL_no_modify);
+        Perl_croak_no_modify(aTHX);
          return NULL;
      } else if (flags & RXapif_EXISTS) {
          return reg_named_buff_exists(rx, key, flags)
@@ -4893,6 +5138,7 @@ SV*
  Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
                           const U32 flags)
  {
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
      PERL_UNUSED_ARG(lastkey);
  
      if (flags & RXapif_FIRSTKEY)
@@ -4912,11 +5158,14 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
      AV *retarray = NULL;
      SV *ret;
      struct regexp *const rx = (struct regexp *)SvANY(r);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
+
      if (flags & RXapif_ALL)
          retarray=newAV();
  
-    if (rx && rx->paren_names) {
-        HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
          if (he_str) {
              IV i;
              SV* sv_dat=HeVAL(he_str);
@@ -4933,13 +5182,11 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                  } else {
                      ret = newSVsv(&PL_sv_undef);
                  }
-                if (retarray) {
-                    SvREFCNT_inc_simple_void(ret);
+                if (retarray)
                      av_push(retarray, ret);
-                }
              }
              if (retarray)
-                return newRV((SV*)retarray);
+                return newRV_noinc(MUTABLE_SV(retarray));
          }
      }
      return NULL;
@@ -4950,9 +5197,12 @@ Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
                             const U32 flags)
  {
      struct regexp *const rx = (struct regexp *)SvANY(r);
-    if (rx && rx->paren_names) {
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
          if (flags & RXapif_ALL) {
-            return hv_exists_ent(rx->paren_names, key, 0);
+            return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
          } else {
             SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
              if (sv) {
@@ -4971,8 +5221,11 @@ SV*
  Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
  {
      struct regexp *const rx = (struct regexp *)SvANY(r);
-    if ( rx && rx->paren_names ) {
-       (void)hv_iterinit(rx->paren_names);
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
+
+    if ( rx && RXp_PAREN_NAMES(rx) ) {
+       (void)hv_iterinit(RXp_PAREN_NAMES(rx));
  
         return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
      } else {
@@ -4984,8 +5237,12 @@ SV*
  Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
  {
      struct regexp *const rx = (struct regexp *)SvANY(r);
-    if (rx && rx->paren_names) {
-        HV *hv = rx->paren_names;
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv = RXp_PAREN_NAMES(rx);
          HE *temphe;
          while ( (temphe = hv_iternext_flags(hv,0)) ) {
              IV i;
@@ -4993,7 +5250,7 @@ Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
              SV* sv_dat = HeVAL(temphe);
              I32 *nums = (I32*)SvPVX(sv_dat);
              for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                if ((I32)(rx->lastparen) >= nums[i] &&
                      rx->offs[nums[i]].start != -1 &&
                      rx->offs[nums[i]].end != -1)
                  {
@@ -5017,13 +5274,16 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
      I32 length;
      struct regexp *const rx = (struct regexp *)SvANY(r);
  
-    if (rx && rx->paren_names) {
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
          if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
-            return newSViv(HvTOTALKEYS(rx->paren_names));
+            return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
          } else if (flags & RXapif_ONE) {
              ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
-            av = (AV*)SvRV(ret);
+            av = MUTABLE_AV(SvRV(ret));
              length = av_len(av);
+           SvREFCNT_dec(ret);
              return newSViv(length + 1);
          } else {
              Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", (int)flags);
@@ -5039,8 +5299,10 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
      struct regexp *const rx = (struct regexp *)SvANY(r);
      AV *av = newAV();
  
-    if (rx && rx->paren_names) {
-        HV *hv= rx->paren_names;
+    PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
+
+    if (rx && RXp_PAREN_NAMES(rx)) {
+        HV *hv= RXp_PAREN_NAMES(rx);
          HE *temphe;
          (void)hv_iterinit(hv);
          while ( (temphe = hv_iternext_flags(hv,0)) ) {
@@ -5049,7 +5311,7 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
              SV* sv_dat = HeVAL(temphe);
              I32 *nums = (I32*)SvPVX(sv_dat);
              for ( i = 0; i < SvIVX(sv_dat); i++ ) {
-                if ((I32)(rx->lastcloseparen) >= nums[i] &&
+                if ((I32)(rx->lastparen) >= nums[i] &&
                      rx->offs[nums[i]].start != -1 &&
                      rx->offs[nums[i]].end != -1)
                  {
@@ -5063,7 +5325,7 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
          }
      }
  
-    return newRV((SV*)av);
+    return newRV_noinc(MUTABLE_SV(av));
  }
  
  void
@@ -5074,6 +5336,8 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
      char *s = NULL;
      I32 i = 0;
      I32 s1, t1;
+
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
          
      if (!rx->subbeg) {
          sv_setsv(sv,&PL_sv_undef);
@@ -5147,12 +5411,14 @@ void
  Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
                                                          SV const * const value)
  {
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
+
      PERL_UNUSED_ARG(rx);
      PERL_UNUSED_ARG(paren);
      PERL_UNUSED_ARG(value);
  
      if (!PL_localizing)
-        Perl_croak(aTHX_ PL_no_modify);
+        Perl_croak_no_modify(aTHX);
  }
  
  I32
@@ -5163,6 +5429,8 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
      I32 i;
      I32 s1, t1;
  
+    PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
+
      /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
         switch (paren) {
        /* $` / ${^PREMATCH} */
@@ -5197,7 +5465,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
              goto getlen;
          } else {
              if (ckWARN(WARN_UNINITIALIZED))
-                report_uninit((SV*)sv);
+                report_uninit((const SV *)sv);
              return 0;
          }
      }
@@ -5217,8 +5485,12 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
  SV*
  Perl_reg_qr_package(pTHX_ REGEXP * const rx)
  {
+    PERL_ARGS_ASSERT_REG_QR_PACKAGE;
         PERL_UNUSED_ARG(rx);
-       return NULL;
+       if (0)
+           return NULL;
+       else
+           return newSVpvs("Regexp");
  }
  
  /* Scans the name of a named buffer from the pattern.
@@ -5234,9 +5506,12 @@ Perl_reg_qr_package(pTHX_ REGEXP * const rx)
  #define REG_RSN_RETURN_DATA    2
  
  STATIC SV*
-S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
+S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
+{
      char *name_start = RExC_parse;
  
+    PERL_ARGS_ASSERT_REG_SCAN_NAME;
+
      if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
          /* skip IDFIRST by using do...while */
         if (UTF)
@@ -5367,6 +5642,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      char * const oregcomp_parse = RExC_parse;
  
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REG;
      DEBUG_PARSE("reg ");
  
      *flagp = 0;                                /* Tentatively. */
@@ -5482,6 +5759,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         if (*RExC_parse == '?') { /* (?...) */
             bool is_logical = 0;
             const char * const seqstart = RExC_parse;
+            bool has_use_defaults = FALSE;
  
             RExC_parse++;
             paren = *RExC_parse++;
@@ -5556,10 +5834,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                  "panic: reg_scan_name returned NULL");
                          if (!RExC_paren_names) {
                              RExC_paren_names= newHV();
-                            sv_2mortal((SV*)RExC_paren_names);
+                            sv_2mortal(MUTABLE_SV(RExC_paren_names));
  #ifdef DEBUGGING
                              RExC_paren_name_list= newAV();
-                            sv_2mortal((SV*)RExC_paren_name_list);
+                            sv_2mortal(MUTABLE_SV(RExC_paren_name_list));
  #endif
                          }
                          he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
@@ -5588,13 +5866,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                  pv = (I32*)SvGROW(sv_dat, SvCUR(sv_dat) + sizeof(I32)+1);
                                  SvCUR_set(sv_dat, SvCUR(sv_dat) + sizeof(I32));
                                  pv[count] = RExC_npar;
-                                SvIVX(sv_dat)++;
+                                SvIV_set(sv_dat, SvIVX(sv_dat) + 1);
                              }
                          } else {
                              (void)SvUPGRADE(sv_dat,SVt_PVNV);
                              sv_setpvn(sv_dat, (char *)&(RExC_npar), sizeof(I32));
                              SvIOK_on(sv_dat);
-                            SvIVX(sv_dat)= 1;
+                            SvIV_set(sv_dat, 1);
                          }
  #ifdef DEBUGGING
                          if (!av_store(RExC_paren_name_list, RExC_npar, SvREFCNT_inc(svname)))
@@ -5610,6 +5888,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  RExC_seen |= REG_SEEN_LOOKBEHIND;
                 RExC_parse++;
             case '=':           /* (?=...) */
+               RExC_seen_zerolen++;
+                break;
             case '!':           /* (?!...) */
                 RExC_seen_zerolen++;
                 if (*RExC_parse == ')') {
@@ -5934,12 +6214,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 RExC_parse--; /* for vFAIL to print correctly */
                  vFAIL("Sequence (? incomplete");
                  break;
+            case DEFAULT_PAT_MOD:   /* Use default flags with the exceptions
+                                      that follow */
+                has_use_defaults = TRUE;
+                STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
+                RExC_flags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+                goto parse_flags;
             default:
                 --RExC_parse;
                 parse_flags:      /* (?i) */  
             {
                  U32 posflags = 0, negflags = 0;
                 U32 *flagsp = &posflags;
+                bool has_charset_modifier = 0;
  
                 while (*RExC_parse) {
                     /* && strchr("iogcmsx", *RExC_parse) */
@@ -5947,6 +6234,32 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        and must be globally applied -- japhy */
                      switch (*RExC_parse) {
                     CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+                    case LOCALE_PAT_MOD:
+                        if (has_charset_modifier || flagsp == &negflags) {
+                            goto fail_modifiers;
+                        }
+                        posflags |= RXf_PMf_LOCALE;
+                        negflags |= RXf_PMf_UNICODE;
+                        has_charset_modifier = 1;
+                        break;
+                    case UNICODE_PAT_MOD:
+                        if (has_charset_modifier || flagsp == &negflags) {
+                            goto fail_modifiers;
+                        }
+                        posflags |= RXf_PMf_UNICODE;
+                        negflags |= RXf_PMf_LOCALE;
+                        has_charset_modifier = 1;
+                        break;
+                    case DUAL_PAT_MOD:
+                        if (has_use_defaults
+                            || has_charset_modifier
+                            || flagsp == &negflags)
+                        {
+                            goto fail_modifiers;
+                        }
+                        negflags |= (RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+                        has_charset_modifier = 1;
+                        break;
                      case ONCE_PAT_MOD: /* 'o' */
                      case GLOBAL_PAT_MOD: /* 'g' */
                         if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
@@ -5980,14 +6293,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         break;
                     case KEEPCOPY_PAT_MOD: /* 'p' */
                          if (flagsp == &negflags) {
-                            if (SIZE_ONLY && ckWARN(WARN_REGEXP))
-                                vWARN(RExC_parse + 1,"Useless use of (?-p)");
+                            if (SIZE_ONLY)
+                                ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                          } else {
                              *flagsp |= RXf_PMf_KEEPCOPY;
                          }
                         break;
                      case '-':
-                        if (flagsp == &negflags) {
+                        /* A flag is a default iff it is following a minus,  so
+                         * if there is a minus, it means will be trying to
+                         * re-specify a default which is an error */
+                        if (has_use_defaults || flagsp == &negflags) {
+            fail_modifiers:
                              RExC_parse++;
                             vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
                             /*NOTREACHED*/
@@ -6053,6 +6370,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
      /* Pick up the branches, linking them together. */
      parse_start = RExC_parse;   /* MJD */
      br = regbranch(pRExC_state, &flags, 1,depth+1);
+
+    if (freeze_paren) {
+        if (RExC_npar > after_freeze)
+            after_freeze = RExC_npar;
+        RExC_npar = freeze_paren;
+    }
+
      /*     branch_len = (paren != 0); */
  
      if (br == NULL)
@@ -6211,6 +6535,9 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
      register regnode *latest;
      I32 flags = 0, c = 0;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REGBRANCH;
+
      DEBUG_PARSE("brnc");
  
      if (first)
@@ -6286,6 +6613,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      char *parse_start;
      const char *maxpos = NULL;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REGPIECE;
+
      DEBUG_PARSE("piec");
  
      ret = regatom(pRExC_state, &flags,depth+1);
@@ -6363,7 +6693,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 *flagp = WORST;
             if (max > 0)
                 *flagp |= HASWIDTH;
-           if (max && max < min)
+           if (max < min)
                 vFAIL("Can't do {n,m} with n > m");
             if (!SIZE_ONLY) {
                 ARG1_SET(ret, (U16)min);
@@ -6423,11 +6753,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         goto do_curly;
      }
    nest_check:
-    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
-       vWARN3(RExC_parse,
-              "%.*s matches null string many times",
-              (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
-              origparse);
+    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+       ckWARN3reg(RExC_parse,
+                  "%.*s matches null string many times",
+                  (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
+                  origparse);
      }
  
      if (RExC_parse < RExC_end && *RExC_parse == '?') {
@@ -6462,271 +6792,276 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  /* reg_namedseq(pRExC_state,UVp)
     
     This is expected to be called by a parser routine that has 
-   recognized'\N' and needs to handle the rest. RExC_parse is 
+   recognized '\N' and needs to handle the rest. RExC_parse is
     expected to point at the first char following the N at the time
     of the call.
+
+   The \N may be inside (indicated by valuep not being NULL) or outside a
+   character class.
+
+   \N may begin either a named sequence, or if outside a character class, mean
+   to match a non-newline.  For non single-quoted regexes, the tokenizer has
+   attempted to decide which, and in the case of a named sequence converted it
+   into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
+   where c1... are the characters in the sequence.  For single-quoted regexes,
+   the tokenizer passes the \N sequence through unchanged; this code will not
+   attempt to determine this nor expand those.  The net effect is that if the
+   beginning of the passed-in pattern isn't '{U+' or there is no '}', it
+   signals that this \N occurrence means to match a non-newline.
+   
+   Only the \N{U+...} form should occur in a character class, for the same
+   reason that '.' inside a character class means to just match a period: it
+   just doesn't make sense.
     
     If valuep is non-null then it is assumed that we are parsing inside 
     of a charclass definition and the first codepoint in the resolved
     string is returned via *valuep and the routine will return NULL. 
     In this mode if a multichar string is returned from the charnames 
-   handler a warning will be issued, and only the first char in the 
+   handler, a warning will be issued, and only the first char in the 
     sequence will be examined. If the string returned is zero length
     then the value of *valuep is undefined and NON-NULL will 
     be returned to indicate failure. (This will NOT be a valid pointer 
     to a regnode.)
     
-   If value is null then it is assumed that we are parsing normal text
-   and inserts a new EXACT node into the program containing the resolved
-   string and returns a pointer to the new node. If the string is 
-   zerolength a NOTHING node is emitted.
-   
+   If valuep is null then it is assumed that we are parsing normal text and a
+   new EXACT node is inserted into the program containing the resolved string,
+   and a pointer to the new node is returned.  But if the string is zero length
+   a NOTHING node is emitted instead.
+
     On success RExC_parse is set to the char following the endbrace.
-   Parsing failures will generate a fatal errorvia vFAIL(...)
-   
-   NOTE: We cache all results from the charnames handler locally in 
-   the RExC_charnames hash (created on first use) to prevent a charnames 
-   handler from playing silly-buggers and returning a short string and 
-   then a long string for a given pattern. Since the regexp program 
-   size is calculated during an initial parse this would result
-   in a buffer overrun so we cache to prevent the charname result from
-   changing during the course of the parse.
-   
+   Parsing failures will generate a fatal error via vFAIL(...)
   */
  STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) 
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
  {
-    char * name;        /* start of the content of the name */
-    char * endbrace;    /* endbrace following the name */
-    SV *sv_str = NULL;  
-    SV *sv_name = NULL;
-    STRLEN len; /* this has various purposes throughout the code */
-    bool cached = 0; /* if this is true then we shouldn't refcount dev sv_str */
+    char * endbrace;    /* '}' following the name */
      regnode *ret = NULL;
-    
+#ifdef DEBUGGING
+    char* parse_start = RExC_parse - 2;            /* points to the '\N' */
+#endif
+    char* p;
+
+    GET_RE_DEBUG_FLAGS_DECL;
+ 
+    PERL_ARGS_ASSERT_REG_NAMEDSEQ;
+
+    GET_RE_DEBUG_FLAGS;
+
+    /* The [^\n] meaning of \N ignores spaces and comments under the /x
+     * modifier.  The other meaning does not */
+    p = (RExC_flags & RXf_PMf_EXTENDED)
+       ? regwhite( pRExC_state, RExC_parse )
+       : RExC_parse;
+   
+    /* Disambiguate between \N meaning a named character versus \N meaning
+     * [^\n].  The former is assumed when it can't be the latter. */
+    if (*p != '{' || regcurly(p)) {
+       RExC_parse = p;
+       if (valuep) {
+           /* no bare \N in a charclass */
+           vFAIL("\\N in a character class must be a named character: \\N{...}");
+       }
+       nextchar(pRExC_state);
+       ret = reg_node(pRExC_state, REG_ANY);
+       *flagp |= HASWIDTH|SIMPLE;
+       RExC_naughty++;
+       RExC_parse--;
+        Set_Node_Length(ret, 1); /* MJD */
+       return ret;
+    }
+
+    /* Here, we have decided it should be a named sequence */
+
+    /* The test above made sure that the next real character is a '{', but
+     * under the /x modifier, it could be separated by space (or a comment and
+     * \n) and this is not allowed (for consistency with \x{...} and the
+     * tokenizer handling of \N{NAME}). */
      if (*RExC_parse != '{') {
-        vFAIL("Missing braces on \\N{}");
+       vFAIL("Missing braces on \\N{}");
      }
-    name = RExC_parse+1;
-    endbrace = strchr(RExC_parse, '}');
-    if ( ! endbrace ) {
-        RExC_parse++;
-        vFAIL("Missing right brace on \\N{}");
-    } 
-    RExC_parse = endbrace + 1;  
-    
-    
-    /* RExC_parse points at the beginning brace, 
-       endbrace points at the last */
-    if ( name[0]=='U' && name[1]=='+' ) {
-        /* its a "Unicode hex" notation {U+89AB} */
-        I32 fl = PERL_SCAN_ALLOW_UNDERSCORES
-            | PERL_SCAN_DISALLOW_PREFIX
-            | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
-        UV cp;
-       char string;
-        len = (STRLEN)(endbrace - name - 2);
-        cp = grok_hex(name + 2, &len, &fl, NULL);
-        if ( len != (STRLEN)(endbrace - name - 2) ) {
-            cp = 0xFFFD;
-        }    
-        if (cp > 0xff)
-            RExC_utf8 = 1;
-        if ( valuep ) {
-            *valuep = cp;
-            return NULL;
-        }
-       string = (char)cp;
-        sv_str= newSVpvn(&string, 1);
-    } else {
-        /* fetch the charnames handler for this scope */
-        HV * const table = GvHV(PL_hintgv);
-        SV **cvp= table ? 
-            hv_fetchs(table, "charnames", FALSE) :
-            NULL;
-        SV *cv= cvp ? *cvp : NULL;
-        HE *he_str;
-        int count;
-        /* create an SV with the name as argument */
-        sv_name = newSVpvn(name, endbrace - name);
-        
-        if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
-            vFAIL2("Constant(\\N{%s}) unknown: "
-                  "(possibly a missing \"use charnames ...\")",
-                  SvPVX(sv_name));
-        }
-        if (!cvp || !SvOK(*cvp)) { /* when $^H{charnames} = undef; */
-            vFAIL2("Constant(\\N{%s}): "
-                  "$^H{charnames} is not defined",SvPVX(sv_name));
-        }
-        
-        
-        
-        if (!RExC_charnames) {
-            /* make sure our cache is allocated */
-            RExC_charnames = newHV();
-            sv_2mortal((SV*)RExC_charnames);
-        } 
-            /* see if we have looked this one up before */
-        he_str = hv_fetch_ent( RExC_charnames, sv_name, 0, 0 );
-        if ( he_str ) {
-            sv_str = HeVAL(he_str);
-            cached = 1;
-        } else {
-            dSP ;
  
-            ENTER ;
-            SAVETMPS ;
-            PUSHMARK(SP) ;
-            
-            XPUSHs(sv_name);
-            
-            PUTBACK ;
-            
-            count= call_sv(cv, G_SCALAR);
-            
-            if (count == 1) { /* XXXX is this right? dmq */
-                sv_str = POPs;
-                SvREFCNT_inc_simple_void(sv_str);
-            } 
-            
-            SPAGAIN ;
-            PUTBACK ;
-            FREETMPS ;
-            LEAVE ;
-            
-            if ( !sv_str || !SvOK(sv_str) ) {
-                vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
-                      "did not return a defined value",SvPVX(sv_name));
-            }
-            if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
-                cached = 1;
-        }
+    RExC_parse++;      /* Skip past the '{' */
+
+    if (! (endbrace = strchr(RExC_parse, '}')) /* no trailing brace */
+       || ! (endbrace == RExC_parse            /* nothing between the {} */
+             || (endbrace - RExC_parse >= 2    /* U+ (bad hex is checked below */
+                 && strnEQ(RExC_parse, "U+", 2)))) /* for a better error msg) */
+    {
+       if (endbrace) RExC_parse = endbrace;    /* position msg's '<--HERE' */
+       vFAIL("\\N{NAME} must be resolved by the lexer");
      }
-    if (valuep) {
-        char *p = SvPV(sv_str, len);
-        if (len) {
-            STRLEN numlen = 1;
-            if ( SvUTF8(sv_str) ) {
-                *valuep = utf8_to_uvchr((U8*)p, &numlen);
-                if (*valuep > 0x7F)
-                    RExC_utf8 = 1; 
-                /* XXXX
-                  We have to turn on utf8 for high bit chars otherwise
-                  we get failures with
-                  
-                   "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
-                   "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i
-                
-                  This is different from what \x{} would do with the same
-                  codepoint, where the condition is > 0xFF.
-                  - dmq
-                */
-                
-                
-            } else {
-                *valuep = (UV)*p;
-                /* warn if we havent used the whole string? */
-            }
-            if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-                vWARN2(RExC_parse,
-                    "Ignoring excess chars from \\N{%s} in character class",
-                    SvPVX(sv_name)
-                );
-            }        
-        } else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-            vWARN2(RExC_parse,
-                    "Ignoring zero length \\N{%s} in character class",
-                    SvPVX(sv_name)
-                );
-        }
-        if (sv_name)    
-            SvREFCNT_dec(sv_name);    
-        if (!cached)
-            SvREFCNT_dec(sv_str);    
-        return len ? NULL : (regnode *)&len;
-    } else if(SvCUR(sv_str)) {     
-        
-        char *s; 
-        char *p, *pend;        
-        STRLEN charlen = 1;
-#ifdef DEBUGGING
-        char * parse_start = name-3; /* needed for the offsets */
-#endif
-        GET_RE_DEBUG_FLAGS_DECL;     /* needed for the offsets */
-        
-        ret = reg_node(pRExC_state,
-            (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
-        s= STRING(ret);
-        
-        if ( RExC_utf8 && !SvUTF8(sv_str) ) {
-            sv_utf8_upgrade(sv_str);
-        } else if ( !RExC_utf8 && SvUTF8(sv_str) ) {
-            RExC_utf8= 1;
-        }
-        
-        p = SvPV(sv_str, len);
-        pend = p + len;
-        /* len is the length written, charlen is the size the char read */
-        for ( len = 0; p < pend; p += charlen ) {
-            if (UTF) {
-                UV uvc = utf8_to_uvchr((U8*)p, &charlen);
-                if (FOLD) {
-                    STRLEN foldlen,numlen;
-                    U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
-                    uvc = toFOLD_uni(uvc, tmpbuf, &foldlen);
-                    /* Emit all the Unicode characters. */
-                    
-                    for (foldbuf = tmpbuf;
-                        foldlen;
-                        foldlen -= numlen) 
-                    {
-                        uvc = utf8_to_uvchr(foldbuf, &numlen);
-                        if (numlen > 0) {
-                            const STRLEN unilen = reguni(pRExC_state, uvc, s);
-                            s       += unilen;
-                            len     += unilen;
-                            /* In EBCDIC the numlen
-                            * and unilen can differ. */
-                            foldbuf += numlen;
-                            if (numlen >= foldlen)
-                                break;
-                        }
-                        else
-                            break; /* "Can't happen." */
-                    }                          
-                } else {
-                    const STRLEN unilen = reguni(pRExC_state, uvc, s);
-                   if (unilen > 0) {
-                      s   += unilen;
-                      len += unilen;
-                   }
-               }
-           } else {
-                len++;
-                REGC(*p, s++);
-            }
-        }
-        if (SIZE_ONLY) {
-            RExC_size += STR_SZ(len);
-        } else {
-            STR_LEN(ret) = len;
-            RExC_emit += STR_SZ(len);
-        }
-        Set_Node_Cur_Length(ret); /* MJD */
-        RExC_parse--; 
-        nextchar(pRExC_state);
-    } else {
-        ret = reg_node(pRExC_state,NOTHING);
+
+    if (endbrace == RExC_parse) {   /* empty: \N{} */
+       if (! valuep) {
+           RExC_parse = endbrace + 1;  
+           return reg_node(pRExC_state,NOTHING);
+       }
+
+       if (SIZE_ONLY) {
+           ckWARNreg(RExC_parse,
+                   "Ignoring zero length \\N{} in character class"
+           );
+           RExC_parse = endbrace + 1;  
+       }
+       *valuep = 0;
+       return (regnode *) &RExC_parse; /* Invalid regnode pointer */
      }
-    if (!cached) {
-        SvREFCNT_dec(sv_str);
+
+    REQUIRE_UTF8;      /* named sequences imply Unicode semantics */
+    RExC_parse += 2;   /* Skip past the 'U+' */
+
+    if (valuep) {   /* In a bracketed char class */
+       /* We only pay attention to the first char of 
+       multichar strings being returned. I kinda wonder
+       if this makes sense as it does change the behaviour
+       from earlier versions, OTOH that behaviour was broken
+       as well. XXX Solution is to recharacterize as
+       [rest-of-class]|multi1|multi2... */
+
+       STRLEN length_of_hex;
+       I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+           | PERL_SCAN_DISALLOW_PREFIX
+           | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
+    
+       char * endchar = RExC_parse + strcspn(RExC_parse, ".}");
+       if (endchar < endbrace) {
+           ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
+       }
+
+       length_of_hex = (STRLEN)(endchar - RExC_parse);
+       *valuep = grok_hex(RExC_parse, &length_of_hex, &flags, NULL);
+
+       /* The tokenizer should have guaranteed validity, but it's possible to
+        * bypass it by using single quoting, so check */
+       if (length_of_hex == 0
+           || length_of_hex != (STRLEN)(endchar - RExC_parse) )
+       {
+           RExC_parse += length_of_hex;        /* Includes all the valid */
+           RExC_parse += (RExC_orig_utf8)      /* point to after 1st invalid */
+                           ? UTF8SKIP(RExC_parse)
+                           : 1;
+           /* Guard against malformed utf8 */
+           if (RExC_parse >= endchar) RExC_parse = endchar;
+           vFAIL("Invalid hexadecimal number in \\N{U+...}");
+       }    
+
+       RExC_parse = endbrace + 1;
+       if (endchar == endbrace) return NULL;
+
+        ret = (regnode *) &RExC_parse; /* Invalid regnode pointer */
      }
-    if (sv_name) {
-        SvREFCNT_dec(sv_name); 
+    else {     /* Not a char class */
+       char *s;            /* String to put in generated EXACT node */
+       STRLEN len = 0;     /* Its current byte length */
+       char *endchar;      /* Points to '.' or '}' ending cur char in the input
+                              stream */
+
+       ret = reg_node(pRExC_state,
+                       (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
+       s= STRING(ret);
+
+       /* Exact nodes can hold only a U8 length's of text = 255.  Loop through
+        * the input which is of the form now 'c1.c2.c3...}' until find the
+        * ending brace or exceed length 255.  The characters that exceed this
+        * limit are dropped.  The limit could be relaxed should it become
+        * desirable by reparsing this as (?:\N{NAME}), so could generate
+        * multiple EXACT nodes, as is done for just regular input.  But this
+        * is primarily a named character, and not intended to be a huge long
+        * string, so 255 bytes should be good enough */
+       while (1) {
+           STRLEN length_of_hex;
+           I32 grok_flags = PERL_SCAN_ALLOW_UNDERSCORES
+                           | PERL_SCAN_DISALLOW_PREFIX
+                           | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
+           UV cp;  /* Ord of current character */
+
+           /* Code points are separated by dots.  If none, there is only one
+            * code point, and is terminated by the brace */
+           endchar = RExC_parse + strcspn(RExC_parse, ".}");
+
+           /* The values are Unicode even on EBCDIC machines */
+           length_of_hex = (STRLEN)(endchar - RExC_parse);
+           cp = grok_hex(RExC_parse, &length_of_hex, &grok_flags, NULL);
+           if ( length_of_hex == 0 
+               || length_of_hex != (STRLEN)(endchar - RExC_parse) )
+           {
+               RExC_parse += length_of_hex;        /* Includes all the valid */
+               RExC_parse += (RExC_orig_utf8)  /* point to after 1st invalid */
+                               ? UTF8SKIP(RExC_parse)
+                               : 1;
+               /* Guard against malformed utf8 */
+               if (RExC_parse >= endchar) RExC_parse = endchar;
+               vFAIL("Invalid hexadecimal number in \\N{U+...}");
+           }    
+
+           if (! FOLD) {       /* Not folding, just append to the string */
+               STRLEN unilen;
+
+               /* Quit before adding this character if would exceed limit */
+               if (len + UNISKIP(cp) > U8_MAX) break;
+
+               unilen = reguni(pRExC_state, cp, s);
+               if (unilen > 0) {
+                   s   += unilen;
+                   len += unilen;
+               }
+           } else {    /* Folding, output the folded equivalent */
+               STRLEN foldlen,numlen;
+               U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
+               cp = toFOLD_uni(cp, tmpbuf, &foldlen);
+
+               /* Quit before exceeding size limit */
+               if (len + foldlen > U8_MAX) break;
+               
+               for (foldbuf = tmpbuf;
+                   foldlen;
+                   foldlen -= numlen) 
+               {
+                   cp = utf8_to_uvchr(foldbuf, &numlen);
+                   if (numlen > 0) {
+                       const STRLEN unilen = reguni(pRExC_state, cp, s);
+                       s       += unilen;
+                       len     += unilen;
+                       /* In EBCDIC the numlen and unilen can differ. */
+                       foldbuf += numlen;
+                       if (numlen >= foldlen)
+                           break;
+                   }
+                   else
+                       break; /* "Can't happen." */
+               }                          
+           }
+
+           /* Point to the beginning of the next character in the sequence. */
+           RExC_parse = endchar + 1;
+
+           /* Quit if no more characters */
+           if (RExC_parse >= endbrace) break;
+       }
+
+
+       if (SIZE_ONLY) {
+           if (RExC_parse < endbrace) {
+               ckWARNreg(RExC_parse - 1,
+                         "Using just the first characters returned by \\N{}");
+           }
+
+           RExC_size += STR_SZ(len);
+       } else {
+           STR_LEN(ret) = len;
+           RExC_emit += STR_SZ(len);
+       }
+
+       RExC_parse = endbrace + 1;
+
+       *flagp |= HASWIDTH; /* Not SIMPLE, as that causes the engine to fail
+                              with malformed in t/re/pat_advanced.t */
+       RExC_parse --;
+       Set_Node_Cur_Length(ret); /* MJD */
+       nextchar(pRExC_state);
      }
-    return ret;
  
+    return ret;
  }
  
  
@@ -6749,6 +7084,8 @@ S_reg_recode(pTHX_ const char value, SV **encp)
      const STRLEN newlen = SvCUR(sv);
      UV uv = UNICODE_REPLACEMENT;
  
+    PERL_ARGS_ASSERT_REG_RECODE;
+
      if (newlen)
         uv = SvUTF8(sv)
              ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
@@ -6795,6 +7132,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      DEBUG_PARSE("atom");
      *flagp = WORST;            /* Tentatively. */
  
+    PERL_ARGS_ASSERT_REGATOM;
  
  tryagain:
      switch ((U8)*RExC_parse) {
@@ -6957,31 +7295,61 @@ tryagain:
             *flagp |= HASWIDTH;
             goto finish_meta_pat;
         case 'w':
-           ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML     : ALNUM));
+           if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(ALNUML));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(ALNUM));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'W':
-           ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML    : NALNUM));
+            if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(NALNUML));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(NALNUM));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'b':
             RExC_seen_zerolen++;
             RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL     : BOUND));
+            if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(BOUNDL));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(BOUND));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 'B':
             RExC_seen_zerolen++;
             RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL    : NBOUND));
+            if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(NBOUNDL));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(NBOUND));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= SIMPLE;
             goto finish_meta_pat;
         case 's':
-           ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL     : SPACE));
+            if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(SPACEL));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(SPACE));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'S':
-           ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL    : NSPACE));
+            if (LOC) {
+                ret = reg_node(pRExC_state, (U8)(NSPACEL));
+            } else {
+                ret = reg_node(pRExC_state, (U8)(NSPACE));
+                FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
+            }
             *flagp |= HASWIDTH|SIMPLE;
             goto finish_meta_pat;
         case 'd':
@@ -7053,12 +7421,12 @@ tryagain:
             }
             break;
          case 'N': 
-            /* Handle \N{NAME} here and not below because it can be 
+            /* Handle \N and \N{NAME} here and not below because it can be
              multicharacter. join_exact() will join them up later on. 
              Also this makes sure that things like /\N{BLAH}+/ and 
              \N{BLAH} being multi char Just Happen. dmq*/
              ++RExC_parse;
-            ret= reg_namedseq(pRExC_state, NULL); 
+            ret= reg_namedseq(pRExC_state, NULL, flagp); 
              break;
         case 'k':    /* Handle \k<NAME> and \k'NAME' */
         parse_named_seq:
@@ -7285,6 +7653,35 @@ tryagain:
                           ender = ASCII_TO_NATIVE('\007');
                         p++;
                         break;
+                   case 'o':
+                       {
+                           STRLEN brace_len = len;
+                           UV result;
+                           const char* error_msg;
+
+                           bool valid = grok_bslash_o(p,
+                                                      &result,
+                                                      &brace_len,
+                                                      &error_msg,
+                                                      1);
+                           p += brace_len;
+                           if (! valid) {
+                               RExC_parse = p; /* going to die anyway; point
+                                                  to exact spot of failure */
+                               vFAIL(error_msg);
+                           }
+                           else
+                           {
+                               ender = result;
+                           }
+                           if (PL_encoding && ender < 0x100) {
+                               goto recode_encoding;
+                           }
+                           if (ender > 0xff) {
+                               REQUIRE_UTF8;
+                           }
+                           break;
+                       }
                     case 'x':
                         if (*++p == '{') {
                             char* const e = strchr(p, '}');
@@ -7299,7 +7696,7 @@ tryagain:
                                  STRLEN numlen = e - p - 1;
                                 ender = grok_hex(p + 1, &numlen, &flags, NULL);
                                 if (ender > 0xff)
-                                   RExC_utf8 = 1;
+                                   REQUIRE_UTF8;
                                 p = e + 1;
                             }
                         }
@@ -7314,16 +7711,19 @@ tryagain:
                         break;
                     case 'c':
                         p++;
-                       ender = UCHARAT(p++);
-                       ender = toCTRL(ender);
+                       ender = grok_bslash_c(*p++, SIZE_ONLY);
                         break;
                     case '0': case '1': case '2': case '3':case '4':
                     case '5': case '6': case '7': case '8':case '9':
                         if (*p == '0' ||
-                         (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
-                            I32 flags = 0;
+                           (isDIGIT(p[1]) && atoi(p) >= RExC_npar))
+                       {
+                           I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                             STRLEN numlen = 3;
                             ender = grok_oct(p, &numlen, &flags, NULL);
+                           if (ender > 0xff) {
+                               REQUIRE_UTF8;
+                           }
                             p += numlen;
                         }
                         else {
@@ -7337,9 +7737,9 @@ tryagain:
                         {
                             SV* enc = PL_encoding;
                             ender = reg_recode((const char)(U8)ender, &enc);
-                           if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
-                               vWARN(p, "Invalid escape in the specified encoding");
-                           RExC_utf8 = 1;
+                           if (!enc && SIZE_ONLY)
+                               ckWARNreg(p, "Invalid escape in the specified encoding");
+                           REQUIRE_UTF8;
                         }
                         break;
                     case '\0':
@@ -7347,8 +7747,8 @@ tryagain:
                             FAIL("Trailing \\");
                         /* FALL THROUGH */
                     default:
-                       if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
-                           vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
+                       if (!SIZE_ONLY&& isALPHA(*p))
+                           ckWARN2reg(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
                         goto normal_default;
                     }
                     break;
@@ -7475,6 +7875,9 @@ STATIC char *
  S_regwhite( RExC_state_t *pRExC_state, char *p )
  {
      const char *e = RExC_end;
+
+    PERL_ARGS_ASSERT_REGWHITE;
+
      while (p < e) {
         if (isSPACE(*p))
             ++p;
@@ -7511,6 +7914,8 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
      dVAR;
      I32 namedclass = OOB_NAMEDCLASS;
  
+    PERL_ARGS_ASSERT_REGPPOSIXCC;
+
      if (value == '[' && RExC_parse + 1 < RExC_end &&
         /* I smell either [: or [= or [. -- POSIX has been here, right? */
         POSIXCC(UCHARAT(RExC_parse))) {
@@ -7625,6 +8030,9 @@ STATIC void
  S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
  {
      dVAR;
+
+    PERL_ARGS_ASSERT_CHECKPOSIXCC;
+
      if (POSIXCC(UCHARAT(RExC_parse))) {
         const char *s = RExC_parse;
         const char  c = *s++;
@@ -7632,10 +8040,9 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
         while (isALNUM(*s))
             s++;
         if (*s && c == *s && s[1] == ']') {
-           if (ckWARN(WARN_REGEXP))
-               vWARN3(s+2,
-                       "POSIX syntax [%c %c] belongs inside character classes",
-                       c, c);
+           ckWARN3reg(s+2,
+                      "POSIX syntax [%c %c] belongs inside character classes",
+                      c, c);
  
             /* [[=foo=]] and [[.foo.]] are still future. */
             if (POSIXCC_NOTYET(c)) {
@@ -7674,6 +8081,7 @@ case ANYOF_N##NAME:                                     \
      what = WORD;                                        \
      break
  
+/* Like above, but no locale test */
  #define _C_C_T_NOLOC_(NAME,TEST,WORD)                   \
  ANYOF_##NAME:                                           \
         for (value = 0; value < 256; value++)           \
@@ -7690,6 +8098,58 @@ case ANYOF_N##NAME:                                     \
      what = WORD;                                        \
      break
  
+/* Like the above, but there are differences if we are in uni-8-bit or not, so
+ * there are two tests passed in, to use depending on that. There aren't any
+ * cases where the label is different from the name, so no need for that
+ * parameter */
+#define _C_C_T_UNI_8_BIT(NAME,TEST_8,TEST_7,WORD)       \
+ANYOF_##NAME:                                           \
+    if (LOC) ANYOF_CLASS_SET(ret, ANYOF_##NAME);        \
+    else if (UNI_SEMANTICS) {                           \
+        for (value = 0; value < 256; value++) {         \
+            if (TEST_8) ANYOF_BITMAP_SET(ret, value);   \
+        }                                               \
+    }                                                   \
+    else {                                              \
+        for (value = 0; value < 256; value++) {         \
+            if (TEST_7) ANYOF_BITMAP_SET(ret, value);   \
+        }                                               \
+    }                                                   \
+    yesno = '+';                                        \
+    what = WORD;                                        \
+    break;                                              \
+case ANYOF_N##NAME:                                     \
+    if (LOC) ANYOF_CLASS_SET(ret, ANYOF_N##NAME);       \
+    else if (UNI_SEMANTICS) {                           \
+        for (value = 0; value < 256; value++) {         \
+            if (! TEST_8) ANYOF_BITMAP_SET(ret, value); \
+        }                                               \
+    }                                                   \
+    else {                                              \
+        for (value = 0; value < 256; value++) {         \
+            if (! TEST_7) ANYOF_BITMAP_SET(ret, value); \
+        }                                               \
+    }                                                   \
+    yesno = '!';                                        \
+    what = WORD;                                        \
+    break
+
+/* 
+   We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
+   so that it is possible to override the option here without having to 
+   rebuild the entire core. as we are required to do if we change regcomp.h
+   which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
+*/
+#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
+#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#endif
+
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#define POSIX_CC_UNI_NAME(CCNAME) CCNAME
+#else
+#define POSIX_CC_UNI_NAME(CCNAME) "Posix" CCNAME
+#endif
+
  /*
     parse a class specification and produce either an ANYOF node that
     matches the pattern or if the pattern matches a single char only and
@@ -7723,6 +8183,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
          case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REGCLASS;
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
@@ -7819,7 +8281,7 @@ parseit:
                      from earlier versions, OTOH that behaviour was broken
                      as well. */
                      UV v; /* value is register so we cant & it /grrr */
-                    if (reg_namedseq(pRExC_state, &v)) {
+                    if (reg_namedseq(pRExC_state, &v, NULL)) {
                          goto parseit;
                      }
                      value= v; 
@@ -7873,6 +8335,24 @@ parseit:
             case 'b':   value = '\b';                   break;
             case 'e':   value = ASCII_TO_NATIVE('\033');break;
             case 'a':   value = ASCII_TO_NATIVE('\007');break;
+           case 'o':
+               RExC_parse--;   /* function expects to be pointed at the 'o' */
+               {
+                   const char* error_msg;
+                   bool valid = grok_bslash_o(RExC_parse,
+                                              &value,
+                                              &numlen,
+                                              &error_msg,
+                                              SIZE_ONLY);
+                   RExC_parse += numlen;
+                   if (! valid) {
+                       vFAIL(error_msg);
+                   }
+               }
+               if (PL_encoding && value < 0x100) {
+                   goto recode_encoding;
+               }
+               break;
             case 'x':
                 if (*RExC_parse == '{') {
                      I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
@@ -7895,13 +8375,13 @@ parseit:
                     goto recode_encoding;
                 break;
             case 'c':
-               value = UCHARAT(RExC_parse++);
-               value = toCTRL(value);
+               value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
                 break;
             case '0': case '1': case '2': case '3': case '4':
-           case '5': case '6': case '7': case '8': case '9':
+           case '5': case '6': case '7':
                 {
-                   I32 flags = 0;
+                   /* Take 1-3 octal digits */
+                   I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
                     numlen = 3;
                     value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
                     RExC_parse += numlen;
@@ -7913,16 +8393,18 @@ parseit:
                 {
                     SV* enc = PL_encoding;
                     value = reg_recode((const char)(U8)value, &enc);
-                   if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
-                       vWARN(RExC_parse,
-                             "Invalid escape in the specified encoding");
+                   if (!enc && SIZE_ONLY)
+                       ckWARNreg(RExC_parse,
+                                 "Invalid escape in the specified encoding");
                     break;
                 }
             default:
-               if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
-                   vWARN2(RExC_parse,
-                          "Unrecognized escape \\%c in character class passed through",
-                          (int)value);
+               /* Allow \_ to not give an error */
+               if (!SIZE_ONLY && isALNUM(value) && value != '_') {
+                   ckWARN2reg(RExC_parse,
+                              "Unrecognized escape \\%c in character class passed through",
+                              (int)value);
+               }
                 break;
             }
         } /* end of \blah */
@@ -7941,14 +8423,13 @@ parseit:
             /* a bad range like a-\d, a-[:digit:] ? */
             if (range) {
                 if (!SIZE_ONLY) {
-                   if (ckWARN(WARN_REGEXP)) {
-                       const int w =
-                           RExC_parse >= rangebegin ?
-                           RExC_parse - rangebegin : 0;
-                       vWARN4(RExC_parse,
+                   const int w =
+                       RExC_parse >= rangebegin ?
+                       RExC_parse - rangebegin : 0;
+                   ckWARN4reg(RExC_parse,
                                "False [] range \"%*.*s\"",
                                w, w, rangebegin);
-                   }
+
                     if (prevvalue < 256) {
                         ANYOF_BITMAP_SET(ret, prevvalue);
                         ANYOF_BITMAP_SET(ret, '-');
@@ -7976,18 +8457,26 @@ parseit:
                  * A similar issue a little earlier when switching on value.
                  * --jhi */
                 switch ((I32)namedclass) {
-               case _C_C_T_(ALNUM, isALNUM(value), "Word");
-               case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
-               case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
-               case _C_C_T_(BLANK, isBLANK(value), "Blank");
-               case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
-               case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
-               case _C_C_T_(LOWER, isLOWER(value), "Lower");
-               case _C_C_T_(PRINT, isPRINT(value), "Print");
-               case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
-               case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
-               case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
-               case _C_C_T_(UPPER, isUPPER(value), "Upper");
+               
+               case _C_C_T_(ALNUMC, isALNUMC(value), POSIX_CC_UNI_NAME("Alnum"));
+               case _C_C_T_(ALPHA, isALPHA(value), POSIX_CC_UNI_NAME("Alpha"));
+               case _C_C_T_(BLANK, isBLANK(value), POSIX_CC_UNI_NAME("Blank"));
+               case _C_C_T_(CNTRL, isCNTRL(value), POSIX_CC_UNI_NAME("Cntrl"));
+               case _C_C_T_(GRAPH, isGRAPH(value), POSIX_CC_UNI_NAME("Graph"));
+               case _C_C_T_(LOWER, isLOWER(value), POSIX_CC_UNI_NAME("Lower"));
+               case _C_C_T_(PRINT, isPRINT(value), POSIX_CC_UNI_NAME("Print"));
+               case _C_C_T_(PSXSPC, isPSXSPC(value), POSIX_CC_UNI_NAME("Space"));
+               case _C_C_T_(PUNCT, isPUNCT(value), POSIX_CC_UNI_NAME("Punct"));
+               case _C_C_T_(UPPER, isUPPER(value), POSIX_CC_UNI_NAME("Upper"));
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+                /* \s, \w match all unicode if utf8. */
+                case _C_C_T_UNI_8_BIT(SPACE, isSPACE_L1(value), isSPACE(value), "SpacePerl");
+                case _C_C_T_UNI_8_BIT(ALNUM, isWORDCHAR_L1(value), isALNUM(value), "Word");
+#else
+                /* \s, \w match ascii and locale only */
+                case _C_C_T_UNI_8_BIT(SPACE, isSPACE_L1(value), isSPACE(value), "PerlSpace");
+                case _C_C_T_UNI_8_BIT(ALNUM, isWORDCHAR_L1(value), isALNUM(value), "PerlWord");
+#endif         
                 case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
                 case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
                 case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
@@ -8034,7 +8523,7 @@ parseit:
                             ANYOF_BITMAP_SET(ret, value);
                     }
                     yesno = '+';
-                   what = "Digit";
+                   what = POSIX_CC_UNI_NAME("Digit");
                     break;
                 case ANYOF_NDIGIT:
                     if (LOC)
@@ -8047,7 +8536,7 @@ parseit:
                             ANYOF_BITMAP_SET(ret, value);
                     }
                     yesno = '!';
-                   what = "Digit";
+                   what = POSIX_CC_UNI_NAME("Digit");
                     break;              
                 case ANYOF_MAX:
                     /* this is to handle \p and \P */
@@ -8247,6 +8736,7 @@ parseit:
          *STRING(ret)= (char)value;
          STR_LEN(ret)= 1;
          RExC_emit += STR_SZ(1);
+       SvREFCNT_dec(listsv);
          return ret;
      }
      /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
@@ -8283,8 +8773,8 @@ parseit:
          * used later (regexec.c:S_reginclass()). */
         av_store(av, 0, listsv);
         av_store(av, 1, NULL);
-       av_store(av, 2, (SV*)unicode_alternate);
-       rv = newRV_noinc((SV*)av);
+       av_store(av, 2, MUTABLE_SV(unicode_alternate));
+       rv = newRV_noinc(MUTABLE_SV(av));
         n = add_data(pRExC_state, 1, "s");
         RExC_rxi->data->data[n] = (void*)rv;
         ARG_SET(ret, n);
@@ -8310,6 +8800,9 @@ STATIC bool
  S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state)
  {
      bool ended = 0;
+
+    PERL_ARGS_ASSERT_REG_SKIPCOMMENT;
+
      while (RExC_parse < RExC_end)
          if (*RExC_parse++ == '\n') {
              ended = 1;
@@ -8326,7 +8819,7 @@ S_reg_skipcomment(pTHX_ RExC_state_t *pRExC_state)
  
  /* nextchar()
  
-   Advance that parse position, and optionall absorbs
+   Advance that parse position, and optionally absorbs
     "whitespace" from the inputstream.
  
     Without /x "whitespace" means (?#...) style comments only,
@@ -8342,6 +8835,8 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
  {
      char* const retval = RExC_parse++;
  
+    PERL_ARGS_ASSERT_NEXTCHAR;
+
      for (;;) {
         if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
                 RExC_parse[2] == '#') {
@@ -8378,6 +8873,8 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_REG_NODE;
+
      if (SIZE_ONLY) {
         SIZE_ALIGN(RExC_size);
         RExC_size += 1;
@@ -8417,6 +8914,8 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_REGANODE;
+
      if (SIZE_ONLY) {
         SIZE_ALIGN(RExC_size);
         RExC_size += 2;
@@ -8467,6 +8966,9 @@ STATIC STRLEN
  S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
  {
      dVAR;
+
+    PERL_ARGS_ASSERT_REGUNI;
+
      return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
  }
  
@@ -8485,6 +8987,8 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
      const int offset = regarglen[(U8)op];
      const int size = NODE_STEP_REGNODE + offset;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REGINSERT;
      PERL_UNUSED_ARG(depth);
  /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
      DEBUG_PARSE_FMT("inst"," - %s",PL_reg_name[op]);
@@ -8567,6 +9071,8 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,U32 de
      dVAR;
      register regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_REGTAIL;
  #ifndef DEBUGGING
      PERL_UNUSED_ARG(depth);
  #endif
@@ -8627,9 +9133,10 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,
  #ifdef EXPERIMENTAL_INPLACESCAN
      I32 min = 0;
  #endif
-
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_REGTAIL_STUDY;
+
  
      if (SIZE_ONLY)
          return exact;
@@ -8694,35 +9201,15 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val,
  #endif
  
  /*
- - regcurly - a little FSA that accepts {\d+,?\d*}
- */
-STATIC I32
-S_regcurly(register const char *s)
-{
-    if (*s++ != '{')
-       return FALSE;
-    if (!isDIGIT(*s))
-       return FALSE;
-    while (isDIGIT(*s))
-       s++;
-    if (*s == ',')
-       s++;
-    while (isDIGIT(*s))
-       s++;
-    if (*s != '}')
-       return FALSE;
-    return TRUE;
-}
-
-
-/*
   - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
   */
  #ifdef DEBUGGING
-void 
-S_regdump_extflags(pTHX_ const char *lead, const U32 flags) {
+static void 
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
+{
      int bit;
      int set=0;
+
      for (bit=0; bit<32; bit++) {
          if (flags & (1<<bit)) {
              if (!set++ && lead) 
@@ -8749,6 +9236,8 @@ Perl_regdump(pTHX_ const regexp *r)
      RXi_GET_DECL(r,ri);
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_REGDUMP;
+
      (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
  
      /* Header fields of interest. */
@@ -8823,6 +9312,7 @@ Perl_regdump(pTHX_ const regexp *r)
      PerlIO_printf(Perl_debug_log, "\n");
      DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags));            
  #else
+    PERL_ARGS_ASSERT_REGDUMP;
      PERL_UNUSED_CONTEXT;
      PERL_UNUSED_ARG(r);
  #endif /* DEBUGGING */
@@ -8831,6 +9321,17 @@ Perl_regdump(pTHX_ const regexp *r)
  /*
  - regprop - printable representation of opcode
  */
+#define EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags) \
+STMT_START { \
+        if (do_sep) {                           \
+            Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]); \
+            if (flags & ANYOF_INVERT)           \
+                /*make sure the invert info is in each */ \
+                sv_catpvs(sv, "^");             \
+            do_sep = 0;                         \
+        }                                       \
+} STMT_END
+
  void
  Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
  {
@@ -8840,8 +9341,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
      
+    PERL_ARGS_ASSERT_REGPROP;
  
-    sv_setpvn(sv, "", 0);
+    sv_setpvs(sv, "");
  
      if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
         /* It would be nice to FAIL() here, but this may be called from
@@ -8920,16 +9422,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
      else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) {
         Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));    /* Parenth number */
-       if ( prog->paren_names ) {
+       if ( RXp_PAREN_NAMES(prog) ) {
              if ( k != REF || OP(o) < NREF) {       
-               AV *list= (AV *)progi->data->data[progi->name_list_idx];
+               AV *list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
                 SV **name= av_fetch(list, ARG(o), 0 );
                 if (name)
                     Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
              }      
              else {
-                AV *list= (AV *)progi->data->data[ progi->name_list_idx ];
-                SV *sv_dat=(SV*)progi->data->data[ ARG( o ) ];
+                AV *list= MUTABLE_AV(progi->data->data[ progi->name_list_idx ]);
+                SV *sv_dat= MUTABLE_SV(progi->data->data[ ARG( o ) ]);
                  I32 *nums=(I32*)SvPVX(sv_dat);
                  SV **name= av_fetch(list, nums[0], 0 );
                  I32 n;
@@ -8947,7 +9449,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
      else if (k == VERB) {
          if (!o->flags) 
              Perl_sv_catpvf(aTHX_ sv, ":%"SVf, 
-                SVfARG((SV*)progi->data->data[ ARG( o ) ]));
+                          SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
      } else if (k == LOGICAL)
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
      else if (k == FOLDCHAR)
@@ -8955,6 +9457,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
      else if (k == ANYOF) {
         int i, rangestart = -1;
         const U8 flags = ANYOF_FLAGS(o);
+       int do_sep = 0;
  
         /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
         static const char * const anyofs[] = {
@@ -8970,8 +9473,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             "[:^alpha:]",
             "[:ascii:]",
             "[:^ascii:]",
-           "[:ctrl:]",
-           "[:^ctrl:]",
+           "[:cntrl:]",
+           "[:^cntrl:]",
             "[:graph:]",
             "[:^graph:]",
             "[:lower:]",
@@ -8997,6 +9500,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
         if (flags & ANYOF_INVERT)
             sv_catpvs(sv, "^");
+       
+       /* output what the standard cp 0-255 bitmap matches */
         for (i = 0; i <= 256; i++) {
             if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
                 if (rangestart == -1)
@@ -9010,15 +9515,23 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                     sv_catpvs(sv, "-");
                     put_byte(sv, i - 1);
                 }
+               do_sep = 1;
                 rangestart = -1;
             }
         }
-
+        
+        EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+        /* output any special charclass tests (used mostly under use locale) */
         if (o->flags & ANYOF_CLASS)
             for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
-               if (ANYOF_CLASS_TEST(o,i))
+               if (ANYOF_CLASS_TEST(o,i)) {
                     sv_catpv(sv, anyofs[i]);
-
+                   do_sep = 1;
+               }
+        
+        EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+        
+        /* output information about the unicode matching */
         if (flags & ANYOF_UNICODE)
             sv_catpvs(sv, "{unicode}");
         else if (flags & ANYOF_UNICODE_ALL)
@@ -9031,7 +9544,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             if (lv) {
                 if (sw) {
                     U8 s[UTF8_MAXBYTES_CASE+1];
-               
+
                     for (i = 0; i <= 256; i++) { /* just the first 256 */
                         uvchr_to_utf8(s, i);
                         
@@ -9107,6 +9620,8 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
      dVAR;
      struct regexp *const prog = (struct regexp *)SvANY(r);
      GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_RE_INTUIT_STRING;
      PERL_UNUSED_CONTEXT;
  
      DEBUG_COMPILE_r(
@@ -9154,30 +9669,25 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      struct regexp *const r = (struct regexp *)SvANY(rx);
      GET_RE_DEBUG_FLAGS_DECL;
  
+    PERL_ARGS_ASSERT_PREGFREE2;
+
      if (r->mother_re) {
          ReREFCNT_dec(r->mother_re);
      } else {
          CALLREGFREE_PVT(rx); /* free the private data */
-        if (r->paren_names)
-            SvREFCNT_dec(r->paren_names);
+        SvREFCNT_dec(RXp_PAREN_NAMES(r));
      }        
      if (r->substrs) {
-        if (r->anchored_substr)
-            SvREFCNT_dec(r->anchored_substr);
-        if (r->anchored_utf8)
-            SvREFCNT_dec(r->anchored_utf8);
-        if (r->float_substr)
-            SvREFCNT_dec(r->float_substr);
-        if (r->float_utf8)
-            SvREFCNT_dec(r->float_utf8);
+        SvREFCNT_dec(r->anchored_substr);
+        SvREFCNT_dec(r->anchored_utf8);
+        SvREFCNT_dec(r->float_substr);
+        SvREFCNT_dec(r->float_utf8);
         Safefree(r->substrs);
      }
      RX_MATCH_COPY_FREE(rx);
  #ifdef PERL_OLD_COPY_ON_WRITE
-    if (r->saved_copy)
-        SvREFCNT_dec(r->saved_copy);
+    SvREFCNT_dec(r->saved_copy);
  #endif
-    Safefree(r->swap);
      Safefree(r->offs);
  }
  
@@ -9199,20 +9709,30 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
      
      
  REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *rx) {
-    REGEXP *ret_x = newSV_type(SVt_REGEXP);
-    struct regexp *ret = (struct regexp *)SvANY(ret_x);
+Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+{
+    struct regexp *ret;
      struct regexp *const r = (struct regexp *)SvANY(rx);
      register const I32 npar = r->nparens+1;
+
+    PERL_ARGS_ASSERT_REG_TEMP_COPY;
+
+    if (!ret_x)
+       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    ret = (struct regexp *)SvANY(ret_x);
+    
      (void)ReREFCNT_inc(rx);
-    /* FIXME ORANGE (once we start actually using the regular SV fields.) */
      /* We can take advantage of the existing "copied buffer" mechanism in SVs
         by pointing directly at the buffer, but flagging that the allocated
         space in the copy is zero. As we've just done a struct copy, it's now
         a case of zero-ing that, rather than copying the current length.  */
      SvPV_set(ret_x, RX_WRAPPED(rx));
-    StructCopy(r, ret, regexp);
+    SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
+    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+          sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
      SvLEN_set(ret_x, 0);
+    SvSTASH_set(ret_x, NULL);
+    SvMAGIC_set(ret_x, NULL);
      Newx(ret->offs, npar, regexp_paren_pair);
      Copy(r->offs, ret->offs, npar, regexp_paren_pair);
      if (r->substrs) {
@@ -9232,7 +9752,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx) {
      ret->saved_copy = NULL;
  #endif
      ret->mother_re = rx;
-    ret->swap = NULL;
      
      return ret_x;
  }
@@ -9257,7 +9776,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
      struct regexp *const r = (struct regexp *)SvANY(rx);
      RXi_GET_DECL(r,ri);
      GET_RE_DEBUG_FLAGS_DECL;
-    
+
+    PERL_ARGS_ASSERT_REGFREE_INTERNAL;
+
      DEBUG_COMPILE_r({
         if (!PL_colorset)
             reginitcolors();
@@ -9282,16 +9803,17 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
         while (--n >= 0) {
            /* If you add a ->what type here, update the comment in regcomp.h */
             switch (ri->data->what[n]) {
+           case 'a':
             case 's':
             case 'S':
             case 'u':
-               SvREFCNT_dec((SV*)ri->data->data[n]);
+               SvREFCNT_dec(MUTABLE_SV(ri->data->data[n]));
                 break;
             case 'f':
                 Safefree(ri->data->data[n]);
                 break;
             case 'p':
-               new_comppad = (AV*)ri->data->data[n];
+               new_comppad = MUTABLE_AV(ri->data->data[n]);
                 break;
             case 'o':
                 if (new_comppad == NULL)
@@ -9307,7 +9829,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                      op_free((OP_4tree*)ri->data->data[n]);
  
                 PAD_RESTORE_LOCAL(old_comppad);
-               SvREFCNT_dec((SV*)new_comppad);
+               SvREFCNT_dec(MUTABLE_SV(new_comppad));
                 new_comppad = NULL;
                 break;
             case 'n':
@@ -9343,12 +9865,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                          PerlMemShared_free(trie->trans);
                          if (trie->bitmap)
                              PerlMemShared_free(trie->bitmap);
-                        if (trie->wordlen)
-                            PerlMemShared_free(trie->wordlen);
                          if (trie->jump)
                              PerlMemShared_free(trie->jump);
-                        if (trie->nextword)
-                            PerlMemShared_free(trie->nextword);
+                       PerlMemShared_free(trie->wordinfo);
                          /* do this last!!!! */
                          PerlMemShared_free(ri->data->data[n]);
                     }
@@ -9365,16 +9884,15 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
      Safefree(ri);
  }
  
-#define sv_dup_inc(s,t)        SvREFCNT_inc(sv_dup(s,t))
-#define av_dup_inc(s,t)        (AV*)SvREFCNT_inc(sv_dup((SV*)s,t))
-#define hv_dup_inc(s,t)        (HV*)SvREFCNT_inc(sv_dup((SV*)s,t))
+#define av_dup_inc(s,t)        MUTABLE_AV(sv_dup_inc((const SV *)s,t))
+#define hv_dup_inc(s,t)        MUTABLE_HV(sv_dup_inc((const SV *)s,t))
  #define SAVEPVN(p,n)   ((p) ? savepvn(p,n) : NULL)
  
  /* 
     re_dup - duplicate a regexp. 
     
-   This routine is expected to clone a given regexp structure. It is not
-   compiler under USE_ITHREADS.
+   This routine is expected to clone a given regexp structure. It is only
+   compiled under USE_ITHREADS.
  
     After all of the core data stored in struct regexp is duplicated
     the regexp_engine.dupe method is used to copy any private data
@@ -9393,6 +9911,8 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
      const struct regexp *r = (const struct regexp *)SvANY(sstr);
      struct regexp *ret = (struct regexp *)SvANY(dstr);
      
+    PERL_ARGS_ASSERT_RE_DUP_GUTS;
+
      npar = r->nparens+1;
      Newx(ret->offs, npar, regexp_paren_pair);
      Copy(r->offs, ret->offs, npar, regexp_paren_pair);
@@ -9405,7 +9925,9 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
         /* Do it this way to avoid reading from *r after the StructCopy().
            That way, if any of the sv_dup_inc()s dislodge *r from the L1
            cache, it doesn't matter.  */
-       const bool anchored = r->check_substr == r->anchored_substr;
+       const bool anchored = r->check_substr
+           ? r->check_substr == r->anchored_substr
+           : r->check_utf8 == r->anchored_utf8;
          Newx(ret->substrs, 1, struct reg_substr_data);
         StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
  
@@ -9428,10 +9950,16 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
                 ret->check_substr = ret->float_substr;
                 ret->check_utf8 = ret->float_utf8;
             }
+       } else if (ret->check_utf8) {
+           if (anchored) {
+               ret->check_utf8 = ret->anchored_utf8;
+           } else {
+               ret->check_utf8 = ret->float_utf8;
+           }
         }
      }
  
-    ret->paren_names    = hv_dup_inc(ret->paren_names, param);
+    RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
  
      if (ret->pprivate)
         RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
@@ -9444,9 +9972,21 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
      ret->saved_copy = NULL;
  #endif
  
-    ret->mother_re      = NULL;
+    if (ret->mother_re) {
+       if (SvPVX_const(dstr) == SvPVX_const(ret->mother_re)) {
+           /* Our storage points directly to our mother regexp, but that's
+              1: a buffer in a different thread
+              2: something we no longer hold a reference on
+              so we need to copy it locally.  */
+           /* Note we need to sue SvCUR() on our mother_re, because it, in
+              turn, may well be pointing to its own mother_re.  */
+           SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
+                                  SvCUR(ret->mother_re)+1));
+           SvLEN_set(dstr, SvCUR(ret->mother_re)+1);
+       }
+       ret->mother_re      = NULL;
+    }
      ret->gofs = 0;
-    ret->seen_evals = 0;
  }
  #endif /* PERL_IN_XSUB_RE */
  
@@ -9472,11 +10012,13 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
      regexp_internal *reti;
      int len, npar;
      RXi_GET_DECL(r,ri);
+
+    PERL_ARGS_ASSERT_REGDUPE_INTERNAL;
      
      npar = r->nparens+1;
      len = ProgLen(ri);
      
-    Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
+    Newxc(reti, sizeof(regexp_internal) + len*sizeof(regnode), char, regexp_internal);
      Copy(ri->program, reti->program, len+1, regnode);
      
  
@@ -9495,13 +10037,14 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
         for (i = 0; i < count; i++) {
             d->what[i] = ri->data->what[i];
             switch (d->what[i]) {
-               /* legal options are one of: sSfpontTu
+               /* legal options are one of: sSfpontTua
                    see also regcomp.h and pregfree() */
+           case 'a': /* actually an AV, but the dup function is identical.  */
             case 's':
             case 'S':
             case 'p': /* actually an AV, but the dup function is identical.  */
             case 'u': /* actually an HV, but the dup function is identical.  */
-               d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
+               d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
                 break;
             case 'f':
                 /* This is cheating. */
@@ -9558,48 +10101,8 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
  
  #endif    /* USE_ITHREADS */
  
-/* 
-   reg_stringify() 
-   
-   converts a regexp embedded in a MAGIC struct to its stringified form, 
-   caching the converted form in the struct and returns the cached 
-   string. 
-
-   If lp is nonnull then it is used to return the length of the 
-   resulting string
-   
-   If flags is nonnull and the returned string contains UTF8 then 
-   (*flags & 1) will be true.
-   
-   If haseval is nonnull then it is used to return whether the pattern 
-   contains evals.
-   
-   Normally called via macro: 
-   
-        CALLREG_STRINGIFY(mg,&len,&utf8);
-        
-   And internally with
-   
-        CALLREG_AS_STR(mg,&lp,&flags,&haseval)        
-    
-   See sv_2pv_flags() in sv.c for an example of internal usage.
-    
- */
  #ifndef PERL_IN_XSUB_RE
  
-char *
-Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
-    dVAR;
-    const REGEXP * const re = (REGEXP *)mg->mg_obj;
-    if (haseval) 
-        *haseval = RX_SEEN_EVALS(re);
-    if (flags)    
-       *flags = RX_UTF8(re) ? 1 : 0;
-    if (lp)
-       *lp = RX_WRAPLEN(re);
-    return RX_WRAPPED(re);
-}
-
  /*
   - regnext - dig the "next" pointer out of a node
   */
@@ -9612,6 +10115,10 @@ Perl_regnext(pTHX_ register regnode *p)
      if (!p)
         return(NULL);
  
+    if (OP(p) > REGNODE_MAX) {         /* regnode.type is unsigned */
+       Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(p), (int)REGNODE_MAX);
+    }
+
      offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
      if (offset == 0)
         return(NULL);
@@ -9630,6 +10137,8 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
      SV *msv;
      const char *message;
  
+    PERL_ARGS_ASSERT_RE_CROAK2;
+
      if (l1 > 510)
         l1 = 510;
      if (l1 + l2 > 510)
@@ -9669,7 +10178,7 @@ Perl_save_re_context(pTHX)
  
      state = (struct re_save_state *)(PL_savestack + PL_savestack_ix);
      PL_savestack_ix += SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
-    SSPUSHINT(SAVEt_RE_STATE);
+    SSPUSHUV(SAVEt_RE_STATE);
  
      Copy(&PL_reg_state, state, 1, struct re_save_state);
  
@@ -9719,6 +10228,8 @@ clear_re(pTHX_ void *r)
  STATIC void
  S_put_byte(pTHX_ SV *sv, int c)
  {
+    PERL_ARGS_ASSERT_PUT_BYTE;
+
      /* Our definition of isPRINT() ignores locales, so only bytes that are
         not part of UTF-8 are considered printable. I assume that the same
         holds for UTF-EBCDIC.
@@ -9760,7 +10271,9 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
      
      RXi_GET_DECL(r,ri);
      GET_RE_DEBUG_FLAGS_DECL;
-    
+
+    PERL_ARGS_ASSERT_DUMPUNTIL;
+
  #ifdef DEBUG_DUMPUNTIL
      PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
          last ? last-start : 0,plast ? plast-start : 0);
@@ -9826,11 +10339,11 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
             const reg_trie_data * const trie =
                 (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
  #ifdef DEBUGGING
-           AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET];
+           AV *const trie_words = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
  #endif
             const regnode *nextbranch= NULL;
             I32 word_idx;
-            sv_setpvn(sv, "", 0);
+            sv_setpvs(sv, "");
             for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
                 SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);