This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Thinko: we want the flags for native cc, not gcc.
[perl5.git] / regcomp.c
index 46a0bb6..2e37633 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 /* *These* symbols are masked to allow static link. */
 #  define Perl_regnext my_regnext
 #  define Perl_save_re_context my_save_re_context
-#  define Perl_reginitcolors my_reginitcolors 
+#  define Perl_reginitcolors my_reginitcolors
 
 #  define PERL_NO_GET_CONTEXT
-#endif 
+#endif
 
 /*SUPPRESS 112*/
 /*
@@ -69,7 +69,7 @@
  *
  ****    Alterations to Henry's code are...
  ****
- ****    Copyright (c) 1991-2000, Larry Wall
+ ****    Copyright (c) 1991-2001, Larry Wall
  ****
  ****    You may distribute under the terms of either the GNU General Public
  ****    License or the Artistic License, as specified in the README file.
@@ -115,10 +115,12 @@ typedef struct RExC_state_t {
     U16                flags16;                /* are we folding, multilining? */
     char       *precomp;               /* uncompiled string. */
     regexp     *rx;
+    char       *start;                 /* Start of input for compile */
     char       *end;                   /* End of input for compile */
     char       *parse;                 /* Input-scan pointer. */
     I32                whilem_seen;            /* number of WHILEM in this expr */
-    regnode    *emit;                  /* Code-emit pointer; &regdummy = don't */
+    regnode    *emit_start;            /* Start of emitted-code area */
+    regnode    *emit;                  /* Code-emit pointer; &regdummy = don't = compiling */
     I32                naughty;                /* How bad is this pattern? */
     I32                sawback;                /* Did we see \1, ...? */
     U32                seen;
@@ -127,6 +129,7 @@ typedef struct RExC_state_t {
     I32                extralen;
     I32                seen_zerolen;
     I32                seen_evals;
+    I32                utf8;
 #if ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
@@ -136,10 +139,13 @@ typedef struct RExC_state_t {
 #define RExC_flags16   (pRExC_state->flags16)
 #define RExC_precomp   (pRExC_state->precomp)
 #define RExC_rx                (pRExC_state->rx)
+#define RExC_start     (pRExC_state->start)
 #define RExC_end       (pRExC_state->end)
 #define RExC_parse     (pRExC_state->parse)
 #define RExC_whilem_seen       (pRExC_state->whilem_seen)
+#define RExC_offsets   (pRExC_state->rx->offsets) /* I am not like the others */
 #define RExC_emit      (pRExC_state->emit)
+#define RExC_emit_start        (pRExC_state->emit_start)
 #define RExC_naughty   (pRExC_state->naughty)
 #define RExC_sawback   (pRExC_state->sawback)
 #define RExC_seen      (pRExC_state->seen)
@@ -148,15 +154,11 @@ typedef struct RExC_state_t {
 #define RExC_extralen  (pRExC_state->extralen)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
 #define RExC_seen_evals        (pRExC_state->seen_evals)
+#define RExC_utf8      (pRExC_state->utf8)
 
 #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
 #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
        ((*s) == '{' && regcurly(s)))
-#ifdef atarist
-#define        PERL_META       "^$.[()|?+*\\"
-#else
-#define        META    "^$.[()|?+*\\"
-#endif
 
 #ifdef SPSTART
 #undef SPSTART         /* dratted cpp namespace... */
@@ -197,7 +199,7 @@ typedef struct scan_data_t {
  * Forward declarations for pregcomp()'s friends.
  */
 
-static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      0, 0, 0, 0, 0, 0};
 
 #define SF_BEFORE_EOL          (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
@@ -229,13 +231,11 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
 #define SCF_WHILEM_VISITED_POS 0x2000
 
-#define RF_utf8                8
-#define UTF (PL_reg_flags & RF_utf8)
+#define UTF RExC_utf8
 #define LOC (RExC_flags16 & PMf_LOCALE)
 #define FOLD (RExC_flags16 & PMf_FOLD)
 
-#define OOB_CHAR8              1234
-#define OOB_UTF8               123456
+#define OOB_UNICODE            12345678
 #define OOB_NAMEDCLASS         -1
 
 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
@@ -250,10 +250,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
  * op/pragma/warn/regcomp.
  */
-#define MARKER1 "HERE"      /* marker as it appears in the description */
-#define MARKER2 " << HERE "  /* marker as it appears within the regex */
-   
-#define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/"
+#define MARKER1 "<-- HERE"    /* marker as it appears in the description */
+#define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
+
+#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
 
 /*
  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
@@ -393,6 +393,14 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
     } STMT_END                                                               \
 
+#define        vWARNdep(loc,m)                                                         \
+    STMT_START {                                                             \
+        unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc));          \
+        int warn_cat = ckWARN(WARN_REGEXP) ? WARN_REGEXP : WARN_DEPRECATED;  \
+       Perl_warner(aTHX_ warn_cat, "%s" REPORT_LOCATION,\
+                m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
+    } STMT_END                                                               \
+
 
 #define        vWARN2(loc, m, a1)                                                   \
     STMT_START {                                                             \
@@ -418,9 +426,62 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 (int)offset, RExC_precomp, RExC_precomp + offset);        \
     } STMT_END
 
+/* used for the parse_flags section for (?c) -- japhy */
+#define        vWARN5(loc, m, a1, a2, a3, a4)                                       \
+  STMT_START {                                                   \
+      unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc));   \
+        Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,      \
+                 a1, a2, a3, a4,                                 \
+                 (int)offset, RExC_precomp, RExC_precomp + offset);  \
+    } STMT_END
+
 
 /* Allow for side effects in s */
-#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END
+#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (void)(s);} STMT_END
+
+/* Macros for recording node offsets.   20001227 mjd@plover.com 
+ * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
+ * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
+ * Element 0 holds the number n.
+ */
+
+#define MJD_OFFSET_DEBUG(x)
+/* #define MJD_OFFSET_DEBUG(x) fprintf x */
+
+
+#  define Set_Node_Offset_To_R(node,byte)                           \
+   STMT_START {                                        \
+     if (! SIZE_ONLY) {                                  \
+       if((node) < 0) {                   \
+         Perl_croak(aTHX_ "value of node is %d in Offset macro", node); \
+       } else {                                                        \
+         RExC_offsets[2*(node)-1] = (byte);                               \
+       }                                                               \
+     }                                                                 \
+   } STMT_END
+
+#  define Set_Node_Offset(node,byte) Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
+#  define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
+
+#  define Set_Node_Length_To_R(node,len)                            \
+   STMT_START {                                        \
+     if (! SIZE_ONLY) {                                  \
+       MJD_OFFSET_DEBUG((stderr, "** (%d) size of node %d is %d.\n", __LINE__, (node), (len))); \
+       if((node) < 0) {                   \
+         Perl_croak(aTHX_ "value of node is %d in Length macro", node); \
+       } else {                                                        \
+         RExC_offsets[2*(node)] = (len);                               \
+       }                                                               \
+     }                                                                 \
+   } STMT_END
+
+#  define Set_Node_Length(node,len) Set_Node_Length_To_R((node)-RExC_emit_start, len)
+#  define Set_Cur_Node_Length(len)  Set_Node_Length(RExC_emit, len)
+#  define Set_Node_Cur_Length(node)   Set_Node_Length(node, RExC_parse - parse_start)
+
+/* Get offsets and lengths */
+#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
+#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
 
 static void clear_re(pTHXo_ void *r);
 
@@ -431,27 +492,26 @@ static void clear_re(pTHXo_ void *r);
 STATIC void
 S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
 {
-    dTHR;
     STRLEN l = CHR_SVLEN(data->last_found);
     STRLEN old_l = CHR_SVLEN(*data->longest);
-    
+
     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
        sv_setsv(*data->longest, data->last_found);
        if (*data->longest == data->longest_fixed) {
            data->offset_fixed = l ? data->last_start_min : data->pos_min;
            if (data->flags & SF_BEFORE_EOL)
-               data->flags 
+               data->flags
                    |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
            else
                data->flags &= ~SF_FIX_BEFORE_EOL;
        }
        else {
            data->offset_float_min = l ? data->last_start_min : data->pos_min;
-           data->offset_float_max = (l 
-                                     ? data->last_start_max 
+           data->offset_float_max = (l
+                                     ? data->last_start_max
                                      : data->pos_min + data->pos_delta);
            if (data->flags & SF_BEFORE_EOL)
-               data->flags 
+               data->flags
                    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
            else
                data->flags &= ~SF_FL_BEFORE_EOL;
@@ -471,7 +531,7 @@ S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *c
     ANYOF_CLASS_ZERO(cl);
     for (value = 0; value < 256; ++value)
        ANYOF_BITMAP_SET(cl, value);
-    cl->flags = ANYOF_EOS;
+    cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
     if (LOC)
        cl->flags |= ANYOF_LOCALE;
 }
@@ -485,6 +545,8 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl)
     for (value = 0; value <= ANYOF_MAX; value += 2)
        if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
            return 1;
+    if (!(cl->flags & ANYOF_UNICODE_ALL))
+       return 0;
     for (value = 0; value < 256; ++value)
        if (!ANYOF_BITMAP_TEST(cl, value))
            return 0;
@@ -532,6 +594,16 @@ S_cl_and(pTHX_ struct regnode_charclass_class *cl,
     } /* XXXX: logic is complicated otherwise, leave it along for a moment. */
     if (!(and_with->flags & ANYOF_EOS))
        cl->flags &= ~ANYOF_EOS;
+
+    if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) {
+       cl->flags &= ~ANYOF_UNICODE_ALL;
+       cl->flags |= ANYOF_UNICODE;
+       ARG_SET(cl, ARG(and_with));
+    }
+    if (!(and_with->flags & ANYOF_UNICODE_ALL))
+       cl->flags &= ~ANYOF_UNICODE_ALL;
+    if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)))
+       cl->flags &= ~ANYOF_UNICODE;
 }
 
 /* 'OR' a given class with another one.  Can create false positives */
@@ -563,7 +635,7 @@ S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, str
     } else {
        /* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
        if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-            && (!(or_with->flags & ANYOF_FOLD) 
+            && (!(or_with->flags & ANYOF_FOLD)
                 || (cl->flags & ANYOF_FOLD)) ) {
            int i;
 
@@ -582,6 +654,16 @@ S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, str
     }
     if (or_with->flags & ANYOF_EOS)
        cl->flags |= ANYOF_EOS;
+
+    if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
+       ARG(cl) != ARG(or_with)) {
+       cl->flags |= ANYOF_UNICODE_ALL;
+       cl->flags &= ~ANYOF_UNICODE;
+    }
+    if (or_with->flags & ANYOF_UNICODE_ALL) {
+       cl->flags |= ANYOF_UNICODE_ALL;
+       cl->flags &= ~ANYOF_UNICODE;
+    }
 }
 
 /* REx optimizer.  Converts nodes into quickier variants "in place".
@@ -596,7 +678,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
 {
-    dTHR;
     I32 min = 0, pars = 0, code;
     regnode *scan = *scanp, *next;
     I32 delta = 0;
@@ -605,7 +686,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
     scan_data_t data_fake;
     struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
-    
+
     while (scan && OP(scan) != END && scan < last) {
        /* Peephole optimizer: */
 
@@ -615,12 +696,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            U32 stringok = 1;
 #ifdef DEBUGGING
            regnode *stop = scan;
-#endif 
+#endif
 
            next = scan + NODE_SZ_STR(scan);
            /* Skip NOTHING, merge EXACT*. */
            while (n &&
-                  ( PL_regkind[(U8)OP(n)] == NOTHING || 
+                  ( PL_regkind[(U8)OP(n)] == NOTHING ||
                     (stringok && (OP(n) == OP(scan))))
                   && NEXT_OFF(n)
                   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
@@ -632,25 +713,23 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 #ifdef DEBUGGING
                    if (stringok)
                        stop = n;
-#endif 
+#endif
                    n = regnext(n);
                }
-               else {
+               else if (stringok) {
                    int oldl = STR_LEN(scan);
                    regnode *nnext = regnext(n);
-                   
-                   if (oldl + STR_LEN(n) > U8_MAX) 
+
+                   if (oldl + STR_LEN(n) > U8_MAX)
                        break;
                    NEXT_OFF(scan) += NEXT_OFF(n);
                    STR_LEN(scan) += STR_LEN(n);
                    next = n + NODE_SZ_STR(n);
                    /* Now we can overwrite *n : */
-                   Move(STRING(n), STRING(scan) + oldl,
-                        STR_LEN(n), char);
+                   Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
 #ifdef DEBUGGING
-                   if (stringok)
-                       stop = next - 1;
-#endif 
+                   stop = next - 1;
+#endif
                    n = nnext;
                }
            }
@@ -676,7 +755,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
            int noff;
            regnode *n = scan;
-           
+       
            /* Skip NOTHING and LONGJMP. */
            while ((n = regnext(n))
                   && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
@@ -685,17 +764,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                off += noff;
            if (reg_off_by_arg[OP(scan)])
                ARG(scan) = off;
-           else 
+           else
                NEXT_OFF(scan) = off;
        }
        /* The principal pseudo-switch.  Cannot be a switch, since we
           look into several different things.  */
-       if (OP(scan) == BRANCH || OP(scan) == BRANCHJ 
+       if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
                   || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
            next = regnext(scan);
            code = OP(scan);
-           
-           if (OP(next) == code || code == IFTHEN || code == SUSPEND) { 
+       
+           if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
                I32 max1 = 0, min1 = I32_MAX, num = 0;
                struct regnode_charclass_class accum;
                
@@ -709,7 +788,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                    num++;
                    data_fake.flags = 0;
-                   if (data) {             
+                   if (data) {         
                        data_fake.whilem_c = data->whilem_c;
                        data_fake.last_closep = data->last_closep;
                    }
@@ -723,13 +802,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        cl_init(pRExC_state, &this_class);
                        data_fake.start_class = &this_class;
                        f = SCF_DO_STCLASS_AND;
-                   }               
+                   }           
                    if (flags & SCF_WHILEM_VISITED_POS)
                        f |= SCF_WHILEM_VISITED_POS;
                    /* we suppose the run is continuous, last=next...*/
                    minnext = study_chunk(pRExC_state, &scan, &deltanext,
                                          next, &data_fake, f);
-                   if (min1 > minnext) 
+                   if (min1 > minnext)
                        min1 = minnext;
                    if (max1 < minnext + deltanext)
                        max1 = minnext + deltanext;
@@ -744,7 +823,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        data->whilem_c = data_fake.whilem_c;
                    if (flags & SCF_DO_STCLASS)
                        cl_or(pRExC_state, &accum, &this_class);
-                   if (code == SUSPEND) 
+                   if (code == SUSPEND)
                        break;
                }
                if (code == IFTHEN && num < 2) /* Empty ELSE branch */
@@ -770,7 +849,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        flags &= ~SCF_DO_STCLASS;
                    }
                    else {
-                       /* Switch to OR mode: cache the old value of 
+                       /* Switch to OR mode: cache the old value of
                         * data->start_class */
                        StructCopy(data->start_class, &and_with,
                                   struct regnode_charclass_class);
@@ -790,15 +869,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
        }
        else if (OP(scan) == EXACT) {
            I32 l = STR_LEN(scan);
+           UV uc = *((U8*)STRING(scan));
            if (UTF) {
-               unsigned char *s = (unsigned char *)STRING(scan);
-               unsigned char *e = s + l;
-               I32 newl = 0;
-               while (s < e) {
-                   newl++;
-                   s += UTF8SKIP(s);
-               }
-               l = newl;
+               U8 *s = (U8*)STRING(scan);
+               l = utf8_length(s, s + l);
+               uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
            if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
@@ -807,7 +882,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (data->last_end == -1) { /* Update the start info. */
                    data->last_start_min = data->pos_min;
                    data->last_start_max = is_inf
-                       ? I32_MAX : data->pos_min + data->pos_delta; 
+                       ? I32_MAX : data->pos_min + data->pos_delta;
                }
                sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
                data->last_end = data->pos_min + l;
@@ -818,21 +893,27 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                /* Check whether it is compatible with what we know already! */
                int compat = 1;
 
-               if (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) 
-                   && !ANYOF_BITMAP_TEST(data->start_class, *STRING(scan))
+               if (uc >= 0x100 ||
+                   (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
+                   && !ANYOF_BITMAP_TEST(data->start_class, uc)
                    && (!(data->start_class->flags & ANYOF_FOLD)
-                       || !ANYOF_BITMAP_TEST(data->start_class,
-                                             PL_fold[*(U8*)STRING(scan)])))
+                       || !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
+                    )
                    compat = 0;
                ANYOF_CLASS_ZERO(data->start_class);
                ANYOF_BITMAP_ZERO(data->start_class);
                if (compat)
-                   ANYOF_BITMAP_SET(data->start_class, *STRING(scan));
+                   ANYOF_BITMAP_SET(data->start_class, uc);
                data->start_class->flags &= ~ANYOF_EOS;
+               if (uc < 0x100)
+                 data->start_class->flags &= ~ANYOF_UNICODE_ALL;
            }
            else if (flags & SCF_DO_STCLASS_OR) {
                /* false positive possible if the class is case-folded */
-               ANYOF_BITMAP_SET(data->start_class, *STRING(scan));     
+               if (uc < 0x100)
+                   ANYOF_BITMAP_SET(data->start_class, uc);
+               else
+                   data->start_class->flags |= ANYOF_UNICODE_ALL;
                data->start_class->flags &= ~ANYOF_EOS;
                cl_and(data->start_class, &and_with);
            }
@@ -840,19 +921,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
        }
        else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
            I32 l = STR_LEN(scan);
+           UV uc = *((U8*)STRING(scan));
 
            /* Search for fixed substrings supports EXACT only. */
-           if (flags & SCF_DO_SUBSTR) 
+           if (flags & SCF_DO_SUBSTR)
                scan_commit(pRExC_state, data);
            if (UTF) {
-               unsigned char *s = (unsigned char *)STRING(scan);
-               unsigned char *e = s + l;
-               I32 newl = 0;
-               while (s < e) {
-                   newl++;
-                   s += UTF8SKIP(s);
-               }
-               l = newl;
+               U8 *s = (U8 *)STRING(scan);
+               l = utf8_length(s, s + l);
+               uc = utf8_to_uvchr(s, NULL);
            }
            min += l;
            if (data && (flags & SCF_DO_SUBSTR))
@@ -861,15 +938,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                /* Check whether it is compatible with what we know already! */
                int compat = 1;
 
-               if (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) 
-                   && !ANYOF_BITMAP_TEST(data->start_class, *STRING(scan))
-                   && !ANYOF_BITMAP_TEST(data->start_class, 
-                                         PL_fold[*(U8*)STRING(scan)]))
+               if (uc >= 0x100 ||
+                   (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
+                   && !ANYOF_BITMAP_TEST(data->start_class, uc)
+                    && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
                    compat = 0;
                ANYOF_CLASS_ZERO(data->start_class);
                ANYOF_BITMAP_ZERO(data->start_class);
                if (compat) {
-                   ANYOF_BITMAP_SET(data->start_class, *STRING(scan));
+                   ANYOF_BITMAP_SET(data->start_class, uc);
                    data->start_class->flags &= ~ANYOF_EOS;
                    data->start_class->flags |= ANYOF_FOLD;
                    if (OP(scan) == EXACTFL)
@@ -880,7 +957,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (data->start_class->flags & ANYOF_FOLD) {
                    /* false positive possible if the class is case-folded.
                       Assume that the locale settings are the same... */
-                   ANYOF_BITMAP_SET(data->start_class, *STRING(scan)); 
+                   if (uc < 0x100)
+                       ANYOF_BITMAP_SET(data->start_class, uc);
                    data->start_class->flags &= ~ANYOF_EOS;
                }
                cl_and(data->start_class, &and_with);
@@ -888,7 +966,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            flags &= ~SCF_DO_STCLASS;
        }
        else if (strchr((char*)PL_varies,OP(scan))) {
-           I32 mincount, maxcount, minnext, deltanext, fl;
+           I32 mincount, maxcount, minnext, deltanext, fl = 0;
            I32 f = flags, pos_before = 0;
            regnode *oscan = scan;
            struct regnode_charclass_class this_class;
@@ -902,8 +980,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
                    next = NEXTOPER(scan);
                    if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
-                       mincount = 1; 
-                       maxcount = REG_INFTY; 
+                       mincount = 1;
+                       maxcount = REG_INFTY;
                        next = regnext(scan);
                        scan = NEXTOPER(scan);
                        goto do_curly;
@@ -916,12 +994,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            case STAR:
                if (flags & SCF_DO_STCLASS) {
                    mincount = 0;
-                   maxcount = REG_INFTY; 
+                   maxcount = REG_INFTY;
                    next = regnext(scan);
                    scan = NEXTOPER(scan);
                    goto do_curly;
                }
-               is_inf = is_inf_internal = 1; 
+               is_inf = is_inf_internal = 1;
                scan = regnext(scan);
                if (flags & SCF_DO_SUBSTR) {
                    scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
@@ -929,7 +1007,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
                goto optimize_curly_tail;
            case CURLY:
-               mincount = ARG1(scan); 
+               mincount = ARG1(scan);
                maxcount = ARG2(scan);
                next = regnext(scan);
                if (OP(scan) == CURLYX) {
@@ -959,15 +1037,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                /* These are the cases when once a subexpression
                   fails at a particular position, it cannot succeed
                   even after backtracking at the enclosing scope.
-                  
+               
                   XXXX what if minimal match and we are at the
                        initial run of {n,m}? */
                if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
                    f &= ~SCF_WHILEM_VISITED_POS;
 
                /* This will finish on WHILEM, setting scan, or on NULL: */
-               minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data, 
-                                     mincount == 0 
+               minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
+                                     mincount == 0
                                        ? (f & ~SCF_DO_SUBSTR) : f);
 
                if (flags & SCF_DO_STCLASS)
@@ -977,7 +1055,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        cl_or(pRExC_state, data->start_class, &this_class);
                    }
                    else if (flags & SCF_DO_STCLASS_AND) {
-                       /* Switch to OR mode: cache the old value of 
+                       /* Switch to OR mode: cache the old value of
                         * data->start_class */
                        StructCopy(data->start_class, &and_with,
                                   struct regnode_charclass_class);
@@ -998,8 +1076,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0) 
-                   && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
+               if (ckWARN(WARN_REGEXP)
+                   && (minnext == 0) && (deltanext == 0)
+                   && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
                    && maxcount <= REG_INFTY/3) /* Complement check for big count */
                {
                    vWARN(RExC_parse,
@@ -1007,30 +1086,35 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
 
                min += minnext * mincount;
-               is_inf_internal |= ((maxcount == REG_INFTY 
+               is_inf_internal |= ((maxcount == REG_INFTY
                                     && (minnext + deltanext) > 0)
                                    || deltanext == I32_MAX);
                is_inf |= is_inf_internal;
                delta += (minnext + deltanext) * maxcount - minnext * mincount;
 
                /* Try powerful optimization CURLYX => CURLYN. */
-               if (  OP(oscan) == CURLYX && data 
+               if (  OP(oscan) == CURLYX && data
                      && data->flags & SF_IN_PAR
                      && !(data->flags & SF_HAS_EVAL)
                      && !deltanext && minnext == 1 ) {
                    /* Try to optimize to CURLYN.  */
                    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
-                   regnode *nxt1 = nxt, *nxt2;
+                   regnode *nxt1 = nxt;
+#ifdef DEBUGGING
+                   regnode *nxt2;
+#endif
 
                    /* Skip open. */
                    nxt = regnext(nxt);
                    if (!strchr((char*)PL_simple,OP(nxt))
                        && !(PL_regkind[(U8)OP(nxt)] == EXACT
-                            && STR_LEN(nxt) == 1)) 
+                            && STR_LEN(nxt) == 1))
                        goto nogo;
+#ifdef DEBUGGING
                    nxt2 = nxt;
+#endif
                    nxt = regnext(nxt);
-                   if (OP(nxt) != CLOSE) 
+                   if (OP(nxt) != CLOSE)
                        goto nogo;
                    /* Now we know that nxt2 is the only contents: */
                    oscan->flags = ARG(nxt);
@@ -1043,12 +1127,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    OP(nxt) = OPTIMIZED;        /* was CLOSE. */
                    OP(nxt + 1) = OPTIMIZED; /* was count. */
                    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
-#endif 
+#endif
                }
              nogo:
 
                /* Try optimization CURLYX => CURLYM. */
-               if (  OP(oscan) == CURLYX && data 
+               if (  OP(oscan) == CURLYX && data
                      && !(data->flags & SF_HAS_PAR)
                      && !(data->flags & SF_HAS_EVAL)
                      && !deltanext  ) {
@@ -1059,7 +1143,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                    OP(oscan) = CURLYM;
                    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
-                           && (OP(nxt2) != WHILEM)) 
+                           && (OP(nxt2) != WHILEM))
                        nxt = nxt2;
                    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
                    /* Need to optimize away parenths. */
@@ -1067,7 +1151,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        /* Set the parenth number.  */
                        regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
 
-                       if (OP(nxt) != CLOSE) 
+                       if (OP(nxt) != CLOSE)
                            FAIL("Panic opt close");
                        oscan->flags = ARG(nxt);
                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
@@ -1077,11 +1161,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        OP(nxt + 1) = OPTIMIZED; /* was count. */
                        NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
                        NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
-#endif 
+#endif
 #if 0
                        while ( nxt1 && (OP(nxt1) != WHILEM)) {
                            regnode *nnxt = regnext(nxt1);
-                           
+                       
                            if (nnxt == nxt) {
                                if (reg_off_by_arg[OP(nxt1)])
                                    ARG_SET(nxt1, nxt2 - nxt1);
@@ -1094,7 +1178,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        }
 #endif
                        /* Optimize again: */
-                       study_chunk(pRExC_state, &nxt1, &deltanext, nxt, 
+                       study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
                                    NULL, 0);
                    }
                    else
@@ -1116,14 +1200,14 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    PREVOPER(nxt)->flags = data->whilem_c
                        | (RExC_whilem_seen << 4); /* On WHILEM */
                }
-               if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) 
+               if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
                    pars++;
                if (flags & SCF_DO_SUBSTR) {
                    SV *last_str = Nullsv;
                    int counted = mincount != 0;
 
                    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
-                       I32 b = pos_before >= data->last_start_min 
+                       I32 b = pos_before >= data->last_start_min
                            ? pos_before : data->last_start_min;
                        STRLEN l;
                        char *s = SvPV(data->last_found, l);
@@ -1139,11 +1223,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                            /* What was added is a constant string */
                            if (mincount > 1) {
                                SvGROW(last_str, (mincount * l) + 1);
-                               repeatcpy(SvPVX(last_str) + l, 
+                               repeatcpy(SvPVX(last_str) + l,
                                          SvPVX(last_str), l, mincount - 1);
                                SvCUR(last_str) *= mincount;
                                /* Add additional parts. */
-                               SvCUR_set(data->last_found, 
+                               SvCUR_set(data->last_found,
                                          SvCUR(data->last_found) - l);
                                sv_catsv(data->last_found, last_str);
                                data->last_end += l * (mincount - 1);
@@ -1166,10 +1250,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        if (mincount && last_str) {
                            sv_setsv(data->last_found, last_str);
                            data->last_end = data->pos_min;
-                           data->last_start_min = 
+                           data->last_start_min =
                                data->pos_min - CHR_SVLEN(last_str);
-                           data->last_start_max = is_inf 
-                               ? I32_MAX 
+                           data->last_start_max = is_inf
+                               ? I32_MAX
                                : data->pos_min + data->pos_delta
                                - CHR_SVLEN(last_str);
                        }
@@ -1198,8 +1282,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                break;
            }
        }
-       else if (strchr((char*)PL_simple,OP(scan)) || PL_regkind[(U8)OP(scan)] == ANYUTF8) {
-           int value;
+       else if (strchr((char*)PL_simple,OP(scan))) {
+           int value = 0;
 
            if (flags & SCF_DO_SUBSTR) {
                scan_commit(pRExC_state,data);
@@ -1212,20 +1296,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                /* Some of the logic below assumes that switching
                   locale on will only add false positives. */
                switch (PL_regkind[(U8)OP(scan)]) {
-               case ANYUTF8:
                case SANY:
-               case SANYUTF8:
-               case ALNUMUTF8:
-               case ANYOFUTF8:
-               case ALNUMLUTF8:
-               case NALNUMUTF8:
-               case NALNUMLUTF8:
-               case SPACEUTF8:
-               case NSPACEUTF8:
-               case SPACELUTF8:
-               case NSPACELUTF8:
-               case DIGITUTF8:
-               case NDIGITUTF8:
                default:
                  do_default:
                    /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
@@ -1266,7 +1337,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1295,7 +1366,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isALNUM(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1324,7 +1395,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1353,7 +1424,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isSPACE(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1384,7 +1455,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1401,7 +1472,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        else {
                            for (value = 0; value < 256; value++)
                                if (!isDIGIT(value))
-                                   ANYOF_BITMAP_SET(data->start_class, value);                     
+                                   ANYOF_BITMAP_SET(data->start_class, value);                 
                        }
                    }
                    break;
@@ -1427,7 +1498,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            int f = 0;
 
            data_fake.flags = 0;
-           if (data) {             
+           if (data) {         
                data_fake.whilem_c = data->whilem_c;
                data_fake.last_closep = data->last_closep;
            }
@@ -1501,7 +1572,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
   finish:
     *scanp = scan;
     *deltap = is_inf_internal ? I32_MAX : delta;
-    if (flags & SCF_DO_SUBSTR && is_inf) 
+    if (flags & SCF_DO_SUBSTR && is_inf)
        data->pos_delta = I32_MAX - data->pos_min;
     if (is_par > U8_MAX)
        is_par = 0;
@@ -1521,10 +1592,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 STATIC I32
 S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, char *s)
 {
-    dTHR;
     if (RExC_rx->data) {
-       Renewc(RExC_rx->data, 
-              sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1), 
+       Renewc(RExC_rx->data,
+              sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1),
               char, struct reg_data);
        Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
        RExC_rx->data->count += n;
@@ -1542,10 +1612,9 @@ S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, char *s)
 void
 Perl_reginitcolors(pTHX)
 {
-    dTHR;
     int i = 0;
     char *s = PerlEnv_getenv("PERL_RE_COLORS");
-           
+       
     if (s) {
        PL_colors[0] = s = savepv(s);
        while (++i < 6) {
@@ -1558,7 +1627,7 @@ Perl_reginitcolors(pTHX)
                PL_colors[i] = s = "";
        }
     } else {
-       while (i < 6) 
+       while (i < 6)
            PL_colors[i++] = "";
     }
     PL_colorset = 1;
@@ -1583,7 +1652,6 @@ Perl_reginitcolors(pTHX)
 regexp *
 Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 {
-    dTHR;
     register regexp *r;
     regnode *scan;
     regnode *first;
@@ -1599,13 +1667,12 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        FAIL("NULL regexp argument");
 
     /* XXXX This looks very suspicious... */
-    if (pm->op_pmdynflags & PMdf_UTF8) {
-       PL_reg_flags |= RF_utf8;
-    }
+    if (pm->op_pmdynflags & PMdf_CMP_UTF8)
+        RExC_utf8 = 1;
     else
-       PL_reg_flags = 0;
+        RExC_utf8 = 0;
 
-    RExC_precomp = savepvn(exp, xend - exp);
+    RExC_precomp = exp;
     DEBUG_r(if (!PL_colorset) reginitcolors());
     DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
                      PL_colors[4],PL_colors[5],PL_colors[0],
@@ -1620,6 +1687,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 
     /* First pass: determine size, legality. */
     RExC_parse = exp;
+    RExC_start = exp;
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
@@ -1631,7 +1699,6 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     REGC((U8)REG_MAGIC, (char*)RExC_emit);
 #endif
     if (reg(pRExC_state, 0, &flags) == NULL) {
-       Safefree(RExC_precomp);
        RExC_precomp = Nullch;
        return(NULL);
     }
@@ -1658,7 +1725,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 #endif
     r->refcnt = 1;
     r->prelen = xend - exp;
-    r->precomp = RExC_precomp;
+    r->precomp = savepvn(RExC_precomp, r->prelen);
     r->subbeg = NULL;
     r->reganch = pm->op_pmflags & PMf_COMPILETIME;
     r->nparens = RExC_npar - 1;        /* set early to validate backrefs */
@@ -1667,6 +1734,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     r->startp = 0;                     /* Useful during FAIL. */
     r->endp = 0;                       /* Useful during FAIL. */
 
+    Newz(1304, r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
+    if (r->offsets) {
+      r->offsets[0] = RExC_size; 
+    }
+    DEBUG_r(PerlIO_printf(Perl_debug_log, 
+                          "%s %"UVuf" bytes for offset annotations.\n", 
+                          r->offsets ? "Got" : "Couldn't get", 
+                          (UV)((2*RExC_size+1) * sizeof(U32))));
+
     RExC_rx = r;
 
     /* Second pass: emit code. */
@@ -1674,6 +1750,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_end = xend;
     RExC_naughty = 0;
     RExC_npar = 1;
+    RExC_emit_start = r->program;
     RExC_emit = r->program;
     /* Store the count of eval-groups for security checks: */
     RExC_emit->next_off = ((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
@@ -1724,9 +1801,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        /* Starting-point info. */
       again:
        if (PL_regkind[(U8)OP(first)] == EXACT) {
-           if (OP(first) == EXACT);    /* Empty, get anchored substr later. */
-           else if ((OP(first) == EXACTF || OP(first) == EXACTFL)
-                    && !UTF)
+           if (OP(first) == EXACT)
+               ;       /* Empty, get anchored substr later. */
+           else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
                r->regstclass = first;
        }
        else if (strchr((char*)PL_simple,OP(first)))
@@ -1755,7 +1832,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            /* turn .* into ^.* with an implied $*=1 */
            int type = OP(NEXTOPER(first));
 
-           if (type == REG_ANY || type == ANYUTF8)
+           if (type == REG_ANY)
                type = ROPT_ANCH_MBOL;
            else
                type = ROPT_ANCH_SBOL;
@@ -1764,13 +1841,13 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            first = NEXTOPER(first);
            goto again;
        }
-       if (sawplus && (!sawopen || !RExC_sawback) 
+       if (sawplus && (!sawopen || !RExC_sawback)
            && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
            /* x+ must match at the 1st pos of run of x's */
            r->reganch |= ROPT_SKIP;
 
        /* Scan is after the zeroth branch, first is atomic matcher. */
-       DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n", 
+       DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
                              (IV)(first - scan + 1)));
        /*
        * If there's something expensive in the r.e., find the
@@ -1801,7 +1878,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        minlen = study_chunk(pRExC_state, &first, &fake, scan + RExC_size, /* Up to end */
                             &data, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag);
        if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
-            && data.last_start_min == 0 && data.last_end > 0 
+            && data.last_start_min == 0 && data.last_end > 0
             && !RExC_seen_zerolen
             && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
            r->reganch |= ROPT_CHECK_ALL;
@@ -1854,27 +1931,27 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            SvREFCNT_dec(data.longest_fixed);
            longest_fixed_length = 0;
        }
-       if (r->regstclass 
-           && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == ANYUTF8
-               || OP(r->regstclass) == SANYUTF8 || OP(r->regstclass) == SANY))
+       if (r->regstclass
+           && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY))
            r->regstclass = NULL;
        if ((!r->anchored_substr || r->anchored_offset) && stclass_flag
            && !(data.start_class->flags & ANYOF_EOS)
            && !cl_is_anything(data.start_class)) {
-           SV *sv;
            I32 n = add_data(pRExC_state, 1, "f");
 
-           New(1006, RExC_rx->data->data[n], 1, 
+           New(1006, RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
            StructCopy(data.start_class,
                       (struct regnode_charclass_class*)RExC_rx->data->data[n],
                       struct regnode_charclass_class);
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
-           DEBUG_r((sv = sv_newmortal(),
-                    regprop(sv, (regnode*)data.start_class),
-                    PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n",
-                                  SvPVX(sv))));
+           PL_regdata = r->data; /* for regprop() */
+           DEBUG_r({ SV *sv = sv_newmortal();
+                     regprop(sv, (regnode*)data.start_class);
+                     PerlIO_printf(Perl_debug_log,
+                                   "synthetic stclass `%s'.\n",
+                                   SvPVX(sv));});
        }
 
        /* A temporary algorithm prefers floated substr to fixed one to dig more info. */
@@ -1912,32 +1989,35 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
        if (!(data.start_class->flags & ANYOF_EOS)
            && !cl_is_anything(data.start_class)) {
-           SV *sv;
            I32 n = add_data(pRExC_state, 1, "f");
 
-           New(1006, RExC_rx->data->data[n], 1, 
+           New(1006, RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
            StructCopy(data.start_class,
                       (struct regnode_charclass_class*)RExC_rx->data->data[n],
                       struct regnode_charclass_class);
            r->regstclass = (regnode*)RExC_rx->data->data[n];
            r->reganch &= ~ROPT_SKIP;   /* Used in find_byclass(). */
-           DEBUG_r((sv = sv_newmortal(),
-                    regprop(sv, (regnode*)data.start_class),
-                    PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n",
-                                  SvPVX(sv))));
+           DEBUG_r({ SV* sv = sv_newmortal();
+                     regprop(sv, (regnode*)data.start_class);
+                     PerlIO_printf(Perl_debug_log,
+                                   "synthetic stclass `%s'.\n",
+                                   SvPVX(sv));});
        }
     }
 
     r->minlen = minlen;
-    if (RExC_seen & REG_SEEN_GPOS) 
+    if (RExC_seen & REG_SEEN_GPOS)
        r->reganch |= ROPT_GPOS_SEEN;
     if (RExC_seen & REG_SEEN_LOOKBEHIND)
        r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
        r->reganch |= ROPT_EVAL_SEEN;
+    if (RExC_seen & REG_SEEN_CANY)
+       r->reganch |= ROPT_CANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
+    PL_regdata = r->data; /* for regprop() */
     DEBUG_r(regdump(r));
     return(r);
 }
@@ -1955,21 +2035,32 @@ STATIC regnode *
 S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
 {
-    dTHR;
     register regnode *ret;             /* Will be the head of the group. */
     register regnode *br;
     register regnode *lastbr;
     register regnode *ender = 0;
     register I32 parno = 0;
     I32 flags, oregflags = RExC_flags16, have_branch = 0, open = 0;
+
+    /* for (?g), (?gc), and (?o) warnings; warning
+       about (?c) will warn about (?g) -- japhy    */
+
+    I32 wastedflags = 0x00,
+        wasted_o    = 0x01,
+        wasted_g    = 0x02,
+        wasted_gc   = 0x02 | 0x04,
+        wasted_c    = 0x04;
+
+    char * parse_start = RExC_parse; /* MJD */
     char *oregcomp_parse = RExC_parse;
     char c;
 
     *flagp = 0;                                /* Tentatively. */
 
+
     /* Make an OPEN node, if parenthesized. */
     if (paren) {
-       if (*RExC_parse == '?') {
+       if (*RExC_parse == '?') { /* (?...) */
            U16 posflags = 0, negflags = 0;
            U16 *flagsp = &posflags;
            int logical = 0;
@@ -1979,24 +2070,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            paren = *RExC_parse++;
            ret = NULL;                 /* For look-ahead/behind. */
            switch (paren) {
-           case '<':
+           case '<':           /* (?<...) */
                RExC_seen |= REG_SEEN_LOOKBEHIND;
-               if (*RExC_parse == '!') 
+               if (*RExC_parse == '!')
                    paren = ',';
-               if (*RExC_parse != '=' && *RExC_parse != '!') 
+               if (*RExC_parse != '=' && *RExC_parse != '!')
                    goto unknown;
                RExC_parse++;
-           case '=':
-           case '!':
+           case '=':           /* (?=...) */
+           case '!':           /* (?!...) */
                RExC_seen_zerolen++;
-           case ':':
-           case '>':
+           case ':':           /* (?:...) */
+           case '>':           /* (?>...) */
                break;
-           case '$':
-           case '@':
+           case '$':           /* (?$...) */
+           case '@':           /* (?@...) */
                vFAIL2("Sequence (?%c...) not implemented", (int)paren);
                break;
-           case '#':
+           case '#':           /* (?#...) */
                while (*RExC_parse && *RExC_parse != ')')
                    RExC_parse++;
                if (*RExC_parse != ')')
@@ -2004,17 +2095,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                nextchar(pRExC_state);
                *flagp = TRYAGAIN;
                return NULL;
-           case 'p':
-               if (SIZE_ONLY)
-                   vWARN(RExC_parse, "(?p{}) is deprecated - use (??{})");
+           case 'p':           /* (?p...) */
+               if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
+                   vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
                /* FALL THROUGH*/
-           case '?':
+           case '?':           /* (??...) */
                logical = 1;
                paren = *RExC_parse++;
                /* FALL THROUGH */
-           case '{':
+           case '{':           /* (?{...}) */
            {
-               dTHR;
                I32 count = 1, n = 0;
                char c;
                char *s = RExC_parse;
@@ -2026,21 +2116,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                while (count && (c = *RExC_parse)) {
                    if (c == '\\' && RExC_parse[1])
                        RExC_parse++;
-                   else if (c == '{') 
+                   else if (c == '{')
                        count++;
-                   else if (c == '}') 
+                   else if (c == '}')
                        count--;
                    RExC_parse++;
                }
                if (*RExC_parse != ')')
                {
-                   RExC_parse = s;                 
+                   RExC_parse = s;             
                    vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
                }
                if (!SIZE_ONLY) {
                    AV *av;
-                   
-                   if (RExC_parse - 1 - s) 
+               
+                   if (RExC_parse - 1 - s)
                        sv = newSVpvn(s, RExC_parse - 1 - s);
                    else
                        sv = newSVpvn("", 0);
@@ -2048,6 +2138,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    ENTER;
                    Perl_save_re_context(aTHX);
                    rop = sv_compile_2op(sv, &sop, "re", &av);
+                   sop->op_private |= OPpREFCOUNTED;
+                   /* re_dup will OpREFCNT_inc */
+                   OpREFCNT_set(sop, 1);
                    LEAVE;
 
                    n = add_data(pRExC_state, 3, "nop");
@@ -2062,7 +2155,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                        /* No compiled RE interpolated, has runtime
                           components ===> unsafe.  */
                        FAIL("Eval-group not allowed at runtime, use re 'eval'");
-                   if (PL_tainted)
+                   if (PL_tainting && PL_tainted)
                        FAIL("Eval-group in insecure regular expression");
                }
                
@@ -2072,15 +2165,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    if (!SIZE_ONLY)
                        ret->flags = 2;
                    regtail(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+                    /* deal with the length of this later - MJD */
                    return ret;
                }
                return reganode(pRExC_state, EVAL, n);
            }
-           case '(':
+           case '(':           /* (?(?{...})...) and (?(?=...)...) */
            {
-               if (RExC_parse[0] == '?') {
-                   if (RExC_parse[1] == '=' || RExC_parse[1] == '!' 
-                       || RExC_parse[1] == '<' 
+               if (RExC_parse[0] == '?') {        /* (?(?...)) */
+                   if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
+                       || RExC_parse[1] == '<'
                        || RExC_parse[1] == '{') { /* Lookahead or eval. */
                        I32 flag;
                        
@@ -2089,14 +2183,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                            ret->flags = 1;
                        regtail(pRExC_state, ret, reg(pRExC_state, 1, &flag));
                        goto insert_if;
-                   } 
+                   }
                }
                else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+                    /* (?(1)...) */
                    parno = atoi(RExC_parse++);
 
                    while (isDIGIT(*RExC_parse))
                        RExC_parse++;
-                   ret = reganode(pRExC_state, GROUPP, parno);
+                    ret = reganode(pRExC_state, GROUPP, parno);
+                    
                    if ((c = *nextchar(pRExC_state)) != ')')
                        vFAIL("Switch condition not recognized");
                  insert_if:
@@ -2141,14 +2237,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                 break;
            default:
                --RExC_parse;
-             parse_flags:
+             parse_flags:      /* (?i) */
                while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
-                   if (*RExC_parse != 'o')
-                       pmflag(flagsp, *RExC_parse);
+                   /* (?g), (?gc) and (?o) are useless here
+                      and must be globally applied -- japhy */
+
+                   if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+                       if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                           I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g;
+                           if (! (wastedflags & wflagbit) ) {
+                               wastedflags |= wflagbit;
+                               vWARN5(
+                                   RExC_parse + 1,
+                                   "Useless (%s%c) - %suse /%c modifier",
+                                   flagsp == &negflags ? "?-" : "?",
+                                   *RExC_parse,
+                                   flagsp == &negflags ? "don't " : "",
+                                   *RExC_parse
+                               );
+                           }
+                       }
+                   }
+                   else if (*RExC_parse == 'c') {
+                       if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                           if (! (wastedflags & wasted_c) ) {
+                               wastedflags |= wasted_gc;
+                               vWARN3(
+                                   RExC_parse + 1,
+                                   "Useless (%sc) - %suse /gc modifier",
+                                   flagsp == &negflags ? "?-" : "?",
+                                   flagsp == &negflags ? "don't " : ""
+                               );
+                           }
+                       }
+                   }
+                   else { pmflag(flagsp, *RExC_parse); }
+
                    ++RExC_parse;
                }
                if (*RExC_parse == '-') {
                    flagsp = &negflags;
+                   wastedflags = 0;  /* reset so (?g-c) warns twice */
                    ++RExC_parse;
                    goto parse_flags;
                }
@@ -2169,26 +2298,34 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                return NULL;
            }
        }
-       else {
+       else {                  /* (...) */
            parno = RExC_npar;
            RExC_npar++;
            ret = reganode(pRExC_state, OPEN, parno);
+            Set_Node_Length(ret, 1); /* MJD */
+            Set_Node_Offset(ret, RExC_parse); /* MJD */
            open = 1;
        }
     }
-    else
+    else                        /* ! paren */
        ret = NULL;
 
     /* Pick up the branches, linking them together. */
+    parse_start = RExC_parse;   /* MJD */
     br = regbranch(pRExC_state, &flags, 1);
+    /*     branch_len = (paren != 0); */
+    
     if (br == NULL)
        return(NULL);
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
            reginsert(pRExC_state, BRANCHJ, br);
        }
-       else
+       else {                  /* MJD */
            reginsert(pRExC_state, BRANCH, br);
+            Set_Node_Length(br, paren != 0);
+            Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
+        }
        have_branch = 1;
        if (SIZE_ONLY)
            RExC_extralen += 1;         /* For BRANCHJ-BRANCH. */
@@ -2214,6 +2351,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            RExC_extralen += 2;         /* Account for LONGJMP. */
        nextchar(pRExC_state);
        br = regbranch(pRExC_state, &flags, 0);
+        
        if (br == NULL)
            return(NULL);
        regtail(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
@@ -2231,6 +2369,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
            break;
        case 1:
            ender = reganode(pRExC_state, CLOSE, parno);
+            Set_Node_Offset(ender,RExC_parse+1); /* MJD */
+            Set_Node_Length(ender,1); /* MJD */
            break;
        case '<':
        case ',':
@@ -2300,24 +2440,25 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
 STATIC regnode *
 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
 {
-    dTHR;
     register regnode *ret;
     register regnode *chain = NULL;
     register regnode *latest;
     I32 flags = 0, c = 0;
 
-    if (first) 
+    if (first)
        ret = NULL;
     else {
-       if (!SIZE_ONLY && RExC_extralen) 
+       if (!SIZE_ONLY && RExC_extralen)
            ret = reganode(pRExC_state, BRANCHJ,0);
-       else
+       else {
            ret = reg_node(pRExC_state, BRANCH);
+            Set_Node_Length(ret, 1);
+        }
     }
        
-    if (!first && SIZE_ONLY) 
+    if (!first && SIZE_ONLY)
        RExC_extralen += 1;                     /* BRANCHJ */
-    
+
     *flagp = WORST;                    /* Tentatively. */
 
     RExC_parse--;
@@ -2366,7 +2507,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
 STATIC regnode *
 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 {
-    dTHR;
     register regnode *ret;
     register char op;
     register char *next;
@@ -2375,6 +2515,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
     char *maxpos;
     I32 min;
     I32 max = REG_INFTY;
+    char *parse_start;
 
     ret = regatom(pRExC_state, &flags);
     if (ret == NULL) {
@@ -2386,6 +2527,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
     op = *RExC_parse;
 
     if (op == '{' && regcurly(RExC_parse)) {
+        parse_start = RExC_parse; /* MJD */
        next = RExC_parse + 1;
        maxpos = Nullch;
        while (isDIGIT(*next) || *next == ',') {
@@ -2418,6 +2560,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
            if ((flags&SIMPLE)) {
                RExC_naughty += 2 + RExC_naughty / 2;
                reginsert(pRExC_state, CURLY, ret);
+                Set_Node_Offset(ret, parse_start+1); /* MJD */
+                Set_Node_Cur_Length(ret);
            }
            else {
                regnode *w = reg_node(pRExC_state, WHILEM);
@@ -2430,6 +2574,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
                    NEXT_OFF(ret) = 3;  /* Go over LONGJMP. */
                }
                reginsert(pRExC_state, CURLYX,ret);
+                                /* MJD hk */
+                Set_Node_Offset(ret, parse_start+1);
+                Set_Node_Length(ret, 
+                                op == '{' ? (RExC_parse - parse_start) : 1);
+                
                if (!SIZE_ONLY && RExC_extralen)
                    NEXT_OFF(ret) = 3;  /* Go over NOTHING to LONGJMP. */
                regtail(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
@@ -2473,8 +2622,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 
     if (!(flags&HASWIDTH) && op != '?')
       vFAIL("Regexp *+ operand could be empty");
-#endif 
+#endif
 
+    parse_start = RExC_parse;
     nextchar(pRExC_state);
 
     *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
@@ -2534,9 +2684,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 STATIC regnode *
 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 {
-    dTHR;
     register regnode *ret = 0;
     I32 flags;
+    char *parse_start = 0;
 
     *flagp = WORST;            /* Tentatively. */
 
@@ -2551,10 +2701,11 @@ tryagain:
            ret = reg_node(pRExC_state, SBOL);
        else
            ret = reg_node(pRExC_state, BOL);
+        Set_Node_Length(ret, 1); /* MJD */
        break;
     case '$':
        nextchar(pRExC_state);
-       if (*RExC_parse) 
+       if (*RExC_parse)
            RExC_seen_zerolen++;
        if (RExC_flags16 & PMf_MULTILINE)
            ret = reg_node(pRExC_state, MEOL);
@@ -2562,35 +2713,29 @@ tryagain:
            ret = reg_node(pRExC_state, SEOL);
        else
            ret = reg_node(pRExC_state, EOL);
+        Set_Node_Length(ret, 1); /* MJD */
        break;
     case '.':
        nextchar(pRExC_state);
-       if (UTF) {
-           if (RExC_flags16 & PMf_SINGLELINE)
-               ret = reg_node(pRExC_state, SANYUTF8);
-           else
-               ret = reg_node(pRExC_state, ANYUTF8);
-           *flagp |= HASWIDTH;
-       }
-       else {
-           if (RExC_flags16 & PMf_SINGLELINE)
-               ret = reg_node(pRExC_state, SANY);
-           else
-               ret = reg_node(pRExC_state, REG_ANY);
-           *flagp |= HASWIDTH|SIMPLE;
-       }
+       if (RExC_flags16 & PMf_SINGLELINE)
+           ret = reg_node(pRExC_state, SANY);
+       else
+           ret = reg_node(pRExC_state, REG_ANY);
+       *flagp |= HASWIDTH|SIMPLE;
        RExC_naughty++;
+        Set_Node_Length(ret, 1); /* MJD */
        break;
     case '[':
     {
        char *oregcomp_parse = ++RExC_parse;
-       ret = (UTF ? regclassutf8(pRExC_state) : regclass(pRExC_state));
+       ret = regclass(pRExC_state);
        if (*RExC_parse != ']') {
            RExC_parse = oregcomp_parse;
            vFAIL("Unmatched [");
        }
        nextchar(pRExC_state);
        *flagp |= HASWIDTH|SIMPLE;
+        Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
        break;
     }
     case '(':
@@ -2637,12 +2782,14 @@ tryagain:
            ret = reg_node(pRExC_state, SBOL);
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'G':
            ret = reg_node(pRExC_state, GPOS);
            RExC_seen |= REG_SEEN_GPOS;
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'Z':
            ret = reg_node(pRExC_state, SEOL);
@@ -2654,103 +2801,81 @@ tryagain:
            *flagp |= SIMPLE;
            RExC_seen_zerolen++;                /* Do not optimize RE away */
            nextchar(pRExC_state);
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'C':
-           ret = reg_node(pRExC_state, SANY);
+           ret = reg_node(pRExC_state, CANY);
+           RExC_seen |= REG_SEEN_CANY;
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'X':
            ret = reg_node(pRExC_state, CLUMP);
            *flagp |= HASWIDTH;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_mark)
-               is_utf8_mark((U8*)"~");         /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'w':
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? ALNUMLUTF8 : ALNUMUTF8)
-                   : (LOC ? ALNUML     : ALNUM));
+           ret = reg_node(pRExC_state, LOC ? ALNUML     : ALNUM);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'W':
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? NALNUMLUTF8 : NALNUMUTF8)
-                   : (LOC ? NALNUML     : NALNUM));
+           ret = reg_node(pRExC_state, LOC ? NALNUML     : NALNUM);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'b':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? BOUNDLUTF8 : BOUNDUTF8)
-                   : (LOC ? BOUNDL     : BOUND));
+           ret = reg_node(pRExC_state, LOC ? BOUNDL     : BOUND);
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'B':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? NBOUNDLUTF8 : NBOUNDUTF8)
-                   : (LOC ? NBOUNDL     : NBOUND));
+           ret = reg_node(pRExC_state, LOC ? NBOUNDL     : NBOUND);
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum((U8*)"a");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 's':
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? SPACELUTF8 : SPACEUTF8)
-                   : (LOC ? SPACEL     : SPACE));
+           ret = reg_node(pRExC_state, LOC ? SPACEL     : SPACE);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_space)
-               is_utf8_space((U8*)" ");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'S':
-           ret = reg_node(pRExC_state, 
-               UTF
-                   ? (LOC ? NSPACELUTF8 : NSPACEUTF8)
-                   : (LOC ? NSPACEL     : NSPACE));
+           ret = reg_node(pRExC_state, LOC ? NSPACEL     : NSPACE);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_space)
-               is_utf8_space((U8*)" ");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'd':
-           ret = reg_node(pRExC_state, UTF ? DIGITUTF8 : DIGIT);
+           ret = reg_node(pRExC_state, DIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_digit)
-               is_utf8_digit((U8*)"1");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'D':
-           ret = reg_node(pRExC_state, UTF ? NDIGITUTF8 : NDIGIT);
+           ret = reg_node(pRExC_state, NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
-           if (UTF && !PL_utf8_digit)
-               is_utf8_digit((U8*)"1");        /* preload table */
+            Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'p':
        case 'P':
-           {   /* a lovely hack--pretend we saw [\pX] instead */
+           {   
                char* oldregxend = RExC_end;
+                char* parse_start = RExC_parse;
 
                if (RExC_parse[1] == '{') {
+                 /* a lovely hack--pretend we saw [\pX] instead */
                    RExC_end = strchr(RExC_parse, '}');
                    if (!RExC_end) {
                        RExC_parse += 2;
@@ -2763,10 +2888,11 @@ tryagain:
                    RExC_end = RExC_parse + 2;
                RExC_parse--;
 
-               ret = regclassutf8(pRExC_state);
+               ret = regclass(pRExC_state);
 
                RExC_end = oldregxend;
                RExC_parse--;
+                Set_Node_Cur_Length(ret); /* MJD */
                nextchar(pRExC_state);
                *flagp |= HASWIDTH|SIMPLE;
            }
@@ -2789,6 +2915,7 @@ tryagain:
                if (num > 9 && num >= RExC_npar)
                    goto defchar;
                else {
+                    char * parse_start = RExC_parse - 1; /* MJD */
                    while (isDIGIT(*RExC_parse))
                        RExC_parse++;
 
@@ -2799,6 +2926,10 @@ tryagain:
                                   ? (LOC ? REFFL : REFF)
                                   : REF, num);
                    *flagp |= HASWIDTH;
+                    
+                    /* override incorrect value set in reganode MJD */
+                    Set_Node_Offset(ret, parse_start+1); 
+                    Set_Node_Cur_Length(ret); /* MJD */
                    RExC_parse--;
                    nextchar(pRExC_state);
                }
@@ -2830,6 +2961,8 @@ tryagain:
            char *oldp, *s;
            STRLEN numlen;
 
+            parse_start = RExC_parse - 1;
+
            RExC_parse++;
 
        defchar:
@@ -2889,32 +3022,26 @@ tryagain:
                        p++;
                        break;
                    case 'e':
-#ifdef ASCIIish
-                         ender = '\033';
-#else
-                         ender = '\047';
-#endif
+                         ender = ASCII_TO_NATIVE('\033');
                        p++;
                        break;
                    case 'a':
-#ifdef ASCIIish
-                         ender = '\007';
-#else
-                         ender = '\057';
-#endif
+                         ender = ASCII_TO_NATIVE('\007');
                        p++;
                        break;
                    case 'x':
                        if (*++p == '{') {
                            char* e = strchr(p, '}');
-        
+       
                            if (!e) {
                                RExC_parse = p + 1;
                                vFAIL("Missing right brace on \\x{}");
                            }
-                           else if (UTF) {
+                           else {
                                numlen = 1;     /* allow underscores */
                                ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+                               if (ender > 0xff)
+                                   RExC_utf8 = 1;
                                /* numlen is generous */
                                if (numlen + len >= 127) {
                                    p--;
@@ -2922,12 +3049,6 @@ tryagain:
                                }
                                p = e + 1;
                            }
-                           else
-                           {
-                               RExC_parse = e + 1;
-                               vFAIL("Can't use \\x{} without 'use utf8' declaration");
-                           }
-
                        }
                        else {
                            numlen = 0;         /* disallow underscores */
@@ -2965,8 +3086,8 @@ tryagain:
                    break;
                default:
                  normal_default:
-                   if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv((U8*)p, RExC_end - p,
+                   if (UTF8_IS_START(*p) && UTF) {
+                       ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
                                               &numlen, 0);
                        p += numlen;
                    }
@@ -2978,14 +3099,14 @@ tryagain:
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
                    if (LOC)
-                       ender = toLOWER_LC_uni(ender);
+                       ender = toLOWER_LC_uvchr(ender);
                    else
                        ender = toLOWER_uni(ender);
                }
                if (ISMULT2(p)) { /* Back off on ?+*. */
                    if (len)
                        p = oldp;
-                   else if (ender >= 0x80 && UTF) {
+                   else if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
                        reguni(pRExC_state, ender, s, &numlen);
                        s += numlen;
                        len += numlen;
@@ -2996,7 +3117,7 @@ tryagain:
                    }
                    break;
                }
-               if (ender >= 0x80 && UTF) {
+               if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(ender)) && UTF) {
                    reguni(pRExC_state, ender, s, &numlen);
                    s += numlen;
                    len += numlen - 1;
@@ -3006,6 +3127,7 @@ tryagain:
            }
        loopdone:
            RExC_parse = p - 1;
+            Set_Node_Cur_Length(ret); /* MJD */
            nextchar(pRExC_state);
            {
                /* len is STRLEN which is unsigned, need to copy to signed */
@@ -3055,7 +3177,6 @@ S_regwhite(pTHX_ char *p, char *e)
 STATIC I32
 S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
 {
-    dTHR;
     char *posixcc = 0;
     I32 namedclass = OOB_NAMEDCLASS;
 
@@ -3066,7 +3187,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
         *RExC_parse == '.')) {
        char  c = *RExC_parse;
        char* s = RExC_parse++;
-           
+       
        while (RExC_parse < RExC_end && *RExC_parse != c)
            RExC_parse++;
        if (RExC_parse == RExC_end)
@@ -3210,75 +3331,134 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
 STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state)
 {
-    dTHR;
-    register U32 value;
-    register I32 lastvalue = OOB_CHAR8;
-    register I32 range = 0;
+    register UV value;
+    register IV prevvalue = OOB_UNICODE;
+    register IV range = 0;
     register regnode *ret;
     STRLEN numlen;
-    I32 namedclass;
-    char *rangebegin;
+    IV namedclass;
+    char *rangebegin = 0;
     bool need_class = 0;
+    SV *listsv = Nullsv;
+    register char *e;
+    UV n;
+    bool optimize_invert = TRUE;
+
+    ret = reganode(pRExC_state, ANYOF, 0);
+
+    if (!SIZE_ONLY)
+       ANYOF_FLAGS(ret) = 0;
+
+    if (*RExC_parse == '^') {  /* Complement of range. */
+       RExC_naughty++;
+       RExC_parse++;
+       if (!SIZE_ONLY)
+           ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+    }
 
-    ret = reg_node(pRExC_state, ANYOF);
     if (SIZE_ONLY)
        RExC_size += ANYOF_SKIP;
     else {
-       ret->flags = 0;
-       ANYOF_BITMAP_ZERO(ret);
        RExC_emit += ANYOF_SKIP;
        if (FOLD)
            ANYOF_FLAGS(ret) |= ANYOF_FOLD;
        if (LOC)
            ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
-    }
-    if (*RExC_parse == '^') {  /* Complement of range. */
-       RExC_naughty++;
-       RExC_parse++;
-       if (!SIZE_ONLY)
-           ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+       ANYOF_BITMAP_ZERO(ret);
+       listsv = newSVpvn("# comment\n", 10);
     }
 
     if (!SIZE_ONLY && ckWARN(WARN_REGEXP))
        checkposixcc(pRExC_state);
 
     if (*RExC_parse == ']' || *RExC_parse == '-')
-       goto skipcond;          /* allow 1st char to be ] or - */
+       goto charclassloop;             /* allow 1st char to be ] or - */
+
     while (RExC_parse < RExC_end && *RExC_parse != ']') {
-       skipcond:
-       namedclass = OOB_NAMEDCLASS;
+
+    charclassloop:
+
+       namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
+
        if (!range)
            rangebegin = RExC_parse;
-       value = UCHARAT(RExC_parse++);
+       if (UTF) {
+           value = utf8n_to_uvchr((U8*)RExC_parse,
+                                  RExC_end - RExC_parse,
+                                  &numlen, 0);
+           RExC_parse += numlen;
+       }
+       else
+           value = UCHARAT(RExC_parse++);
        if (value == '[')
            namedclass = regpposixcc(pRExC_state, value);
        else if (value == '\\') {
-           value = UCHARAT(RExC_parse++);
+           if (UTF) {
+               value = utf8n_to_uvchr((U8*)RExC_parse,
+                                  RExC_end - RExC_parse,
+                                  &numlen, 0);
+               RExC_parse += numlen;
+           }
+           else
+               value = UCHARAT(RExC_parse++);
            /* Some compilers cannot handle switching on 64-bit integer
-            * values, therefore the 'value' cannot be an UV. --jhi */
-           switch (value) {
+            * values, therefore value cannot be an UV.  Yes, this will
+            * be a problem later if we want switch on Unicode.
+            * A similar issue a little bit later when switching on
+            * namedclass. --jhi */
+           switch ((I32)value) {
            case 'w':   namedclass = ANYOF_ALNUM;       break;
            case 'W':   namedclass = ANYOF_NALNUM;      break;
            case 's':   namedclass = ANYOF_SPACE;       break;
            case 'S':   namedclass = ANYOF_NSPACE;      break;
            case 'd':   namedclass = ANYOF_DIGIT;       break;
            case 'D':   namedclass = ANYOF_NDIGIT;      break;
+           case 'p':
+           case 'P':
+               if (*RExC_parse == '{') {
+                   e = strchr(RExC_parse++, '}');
+                    if (!e)
+                        vFAIL("Missing right brace on \\p{}");
+                   n = e - RExC_parse;
+               }
+               else {
+                   e = RExC_parse;
+                   n = 1;
+               }
+               if (!SIZE_ONLY) {
+                   if (value == 'p')
+                       Perl_sv_catpvf(aTHX_ listsv,
+                                      "+utf8::%.*s\n", (int)n, RExC_parse);
+                   else
+                       Perl_sv_catpvf(aTHX_ listsv,
+                                      "!utf8::%.*s\n", (int)n, RExC_parse);
+               }
+               RExC_parse = e + 1;
+               ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+               continue;
            case 'n':   value = '\n';                   break;
            case 'r':   value = '\r';                   break;
            case 't':   value = '\t';                   break;
            case 'f':   value = '\f';                   break;
            case 'b':   value = '\b';                   break;
-#ifdef ASCIIish
-           case 'e':   value = '\033';                 break;
-           case 'a':   value = '\007';                 break;
-#else
-           case 'e':   value = '\047';                 break;
-           case 'a':   value = '\057';                 break;
-#endif
+           case 'e':   value = ASCII_TO_NATIVE('\033');break;
+           case 'a':   value = ASCII_TO_NATIVE('\007');break;
            case 'x':
-               numlen = 0;             /* disallow underscores */
-               value = (UV)scan_hex(RExC_parse, 2, &numlen);
-               RExC_parse += numlen;
+               if (*RExC_parse == '{') {
+                   e = strchr(RExC_parse++, '}');
+                    if (!e)
+                        vFAIL("Missing right brace on \\x{}");
+                   numlen = 1;         /* allow underscores */
+                   value = (UV)scan_hex(RExC_parse,
+                                        e - RExC_parse,
+                                        &numlen);
+                   RExC_parse = e + 1;
+               }
+               else {
+                   numlen = 0;         /* disallow underscores */
+                   value = (UV)scan_hex(RExC_parse, 2, &numlen);
+                   RExC_parse += numlen;
+               }
                break;
            case 'c':
                value = UCHARAT(RExC_parse++);
@@ -3292,16 +3472,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                break;
            default:
                if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
-
-                   vWARN2(RExC_parse, "Unrecognized escape \\%c in character class passed through", (int)value);
+                   vWARN2(RExC_parse,
+                          "Unrecognized escape \\%c in character class passed through",
+                          (int)value);
                break;
            }
-       }
-       if (namedclass > OOB_NAMEDCLASS) {
-           if (!need_class && !SIZE_ONLY)
+       } /* end of \blah */
+
+       if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
+           if (!SIZE_ONLY && !need_class)
                ANYOF_CLASS_ZERO(ret);
+
            need_class = 1;
-           if (range) { /* a-\d, a-[:digit:] */
+
+           /* a bad range like a-\d, a-[:digit:] ? */
+           if (range) {
                if (!SIZE_ONLY) {
                    if (ckWARN(WARN_REGEXP))
                        vWARN4(RExC_parse,
@@ -3309,13 +3495,28 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                               RExC_parse - rangebegin,
                               RExC_parse - rangebegin,
                               rangebegin);
-                   ANYOF_BITMAP_SET(ret, lastvalue);
-                   ANYOF_BITMAP_SET(ret, '-');
+                   if (prevvalue < 256) {
+                       ANYOF_BITMAP_SET(ret, prevvalue);
+                       ANYOF_BITMAP_SET(ret, '-');
+                   }
+                   else {
+                       ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+                       Perl_sv_catpvf(aTHX_ listsv,
+                                      "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
+                   }
                }
-               range = 0; /* this is not a true range */
+
+               range = 0; /* this was not a true range */
            }
+
            if (!SIZE_ONLY) {
-               switch (namedclass) {
+               if (namedclass > OOB_NAMEDCLASS)
+                   optimize_invert = FALSE;
+               /* Possible truncation here but in some 64-bit environments
+                * the compiler gets heartburn about switch on 64-bit values.
+                * A similar issue a little earlier when switching on value.
+                * --jhi */
+               switch ((I32)namedclass) {
                case ANYOF_ALNUM:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
@@ -3324,6 +3525,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    
                    break;
                case ANYOF_NALNUM:
                    if (LOC)
@@ -3333,42 +3535,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUM(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
                    break;
-               case ANYOF_SPACE:
-                   if (LOC)
-                       ANYOF_CLASS_SET(ret, ANYOF_SPACE);
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (isSPACE(value))
-                               ANYOF_BITMAP_SET(ret, value);
-                   }
-                   break;
-               case ANYOF_NSPACE:
+               case ANYOF_ALNUMC:
                    if (LOC)
-                       ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
+                       ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
                    else {
                        for (value = 0; value < 256; value++)
-                           if (!isSPACE(value))
+                           if (isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   break;
-               case ANYOF_DIGIT:
-                   if (LOC)
-                       ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
-                   else {
-                       for (value = '0'; value <= '9'; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-                   }
-                   break;
-               case ANYOF_NDIGIT:
-                   if (LOC)
-                       ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
-                   else {
-                       for (value = 0; value < '0'; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-                       for (value = '9' + 1; value < 256; value++)
-                           ANYOF_BITMAP_SET(ret, value);
-                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
                    break;
                case ANYOF_NALNUMC:
                    if (LOC)
@@ -3378,15 +3555,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALNUMC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   break;
-               case ANYOF_ALNUMC:
-                   if (LOC)
-                       ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
-                   else {
-                       for (value = 0; value < 256; value++)
-                           if (isALNUMC(value))
-                               ANYOF_BITMAP_SET(ret, value);
-                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
                    break;
                case ANYOF_ALPHA:
                    if (LOC)
@@ -3396,6 +3565,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
                    break;
                case ANYOF_NALPHA:
                    if (LOC)
@@ -3405,34 +3575,39 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isALPHA(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
                    break;
                case ANYOF_ASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_ASCII);
                    else {
-#ifdef ASCIIish
+#ifndef EBCDIC
                        for (value = 0; value < 128; value++)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
-                       for (value = 0; value < 256; value++)
+                       for (value = 0; value < 256; value++) {
                            if (isASCII(value))
-                               ANYOF_BITMAP_SET(ret, value);
+                               ANYOF_BITMAP_SET(ret, value);
+                       }
 #endif /* EBCDIC */
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
                    break;
                case ANYOF_NASCII:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_NASCII);
                    else {
-#ifdef ASCIIish
+#ifndef EBCDIC
                        for (value = 128; value < 256; value++)
                            ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
-                       for (value = 0; value < 256; value++)
+                       for (value = 0; value < 256; value++) {
                            if (!isASCII(value))
-                               ANYOF_BITMAP_SET(ret, value);
+                               ANYOF_BITMAP_SET(ret, value);
+                       }
 #endif /* EBCDIC */
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
                    break;
                case ANYOF_BLANK:
                    if (LOC)
@@ -3442,6 +3617,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
                    break;
                case ANYOF_NBLANK:
                    if (LOC)
@@ -3451,6 +3627,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isBLANK(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
                    break;
                case ANYOF_CNTRL:
                    if (LOC)
@@ -3460,7 +3637,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
-                   lastvalue = OOB_CHAR8;
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
                    break;
                case ANYOF_NCNTRL:
                    if (LOC)
@@ -3470,6 +3647,29 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isCNTRL(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
+                   break;
+               case ANYOF_DIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
+                   else {
+                       /* consecutive digits assumed */
+                       for (value = '0'; value <= '9'; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
+                   break;
+               case ANYOF_NDIGIT:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
+                   else {
+                       /* consecutive digits assumed */
+                       for (value = 0; value < '0'; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+                       for (value = '9' + 1; value < 256; value++)
+                           ANYOF_BITMAP_SET(ret, value);
+                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
                    break;
                case ANYOF_GRAPH:
                    if (LOC)
@@ -3479,6 +3679,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
                    break;
                case ANYOF_NGRAPH:
                    if (LOC)
@@ -3488,6 +3689,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isGRAPH(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
                    break;
                case ANYOF_LOWER:
                    if (LOC)
@@ -3497,6 +3699,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
                    break;
                case ANYOF_NLOWER:
                    if (LOC)
@@ -3506,6 +3709,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isLOWER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
                    break;
                case ANYOF_PRINT:
                    if (LOC)
@@ -3515,6 +3719,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
                    break;
                case ANYOF_NPRINT:
                    if (LOC)
@@ -3524,6 +3729,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPRINT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
                    break;
                case ANYOF_PSXSPC:
                    if (LOC)
@@ -3533,6 +3739,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
                    break;
                case ANYOF_NPSXSPC:
                    if (LOC)
@@ -3542,6 +3749,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPSXSPC(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
                    break;
                case ANYOF_PUNCT:
                    if (LOC)
@@ -3551,6 +3759,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
                    break;
                case ANYOF_NPUNCT:
                    if (LOC)
@@ -3560,6 +3769,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isPUNCT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
+                   break;
+               case ANYOF_SPACE:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_SPACE);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isSPACE(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
+                   break;
+               case ANYOF_NSPACE:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isSPACE(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
                    break;
                case ANYOF_UPPER:
                    if (LOC)
@@ -3569,6 +3799,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
                    break;
                case ANYOF_NUPPER:
                    if (LOC)
@@ -3578,6 +3809,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isUPPER(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
                    break;
                case ANYOF_XDIGIT:
                    if (LOC)
@@ -3587,6 +3819,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
                    break;
                case ANYOF_NXDIGIT:
                    if (LOC)
@@ -3596,6 +3829,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                            if (!isXDIGIT(value))
                                ANYOF_BITMAP_SET(ret, value);
                    }
+                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
                    break;
                default:
                    vFAIL("Invalid [::] class");
@@ -3605,22 +3839,25 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
                continue;
            }
-       }
+       } /* end of namedclass \blah */
+
        if (range) {
-           if (lastvalue > value) /* b-a */ {
+           if (prevvalue > value) /* b-a */ {
                Simple_vFAIL4("Invalid [] range \"%*.*s\"",
                              RExC_parse - rangebegin,
                              RExC_parse - rangebegin,
                              rangebegin);
+               range = 0; /* not a valid range */
            }
-           range = 0;
        }
        else {
-           lastvalue = value;
+           prevvalue = value; /* save the beginning of the range */
            if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
                RExC_parse[1] != ']') {
                RExC_parse++;
-               if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+
+               /* a bad range like \w-, [:word:]- ? */
+               if (namedclass > OOB_NAMEDCLASS) {
                    if (ckWARN(WARN_REGEXP))
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
@@ -3630,318 +3867,94 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    if (!SIZE_ONLY)
                        ANYOF_BITMAP_SET(ret, '-');
                } else
-                   range = 1;
-               continue;       /* do it next time */
+                   range = 1;  /* yeah, it's a range! */
+               continue;       /* but do it the next time */
            }
        }
+
        /* now is the next time */
        if (!SIZE_ONLY) {
-#ifndef ASCIIish /* EBCDIC, for example. */
-           if ((isLOWER(lastvalue) && isLOWER(value)) ||
-               (isUPPER(lastvalue) && isUPPER(value)))
-           {
-               I32 i;
-               if (isLOWER(lastvalue)) {
-                   for (i = lastvalue; i <= value; i++)
-                       if (isLOWER(i))
-                           ANYOF_BITMAP_SET(ret, i);
-               } else {
-                   for (i = lastvalue; i <= value; i++)
-                       if (isUPPER(i))
-                           ANYOF_BITMAP_SET(ret, i);
+           IV i;
+
+           if (prevvalue < 256) {
+               IV ceilvalue = value < 256 ? value : 255;
+
+#ifdef EBCDIC
+               if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+                   (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+               {
+                   if (isLOWER(prevvalue)) {
+                       for (i = prevvalue; i <= ceilvalue; i++)
+                           if (isLOWER(i))
+                               ANYOF_BITMAP_SET(ret, i);
+                   } else {
+                       for (i = prevvalue; i <= ceilvalue; i++)
+                           if (isUPPER(i))
+                               ANYOF_BITMAP_SET(ret, i);
+                   }
                }
-           }
-           else
+               else
 #endif
-               for ( ; lastvalue <= value; lastvalue++)
-                   ANYOF_BITMAP_SET(ret, lastvalue);
+                   for (i = prevvalue; i <= ceilvalue; i++)
+                       ANYOF_BITMAP_SET(ret, i);
+         }
+         if (value > 255) {
+               ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+               if (prevvalue < value)
+                   Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
+                                  (UV)prevvalue, (UV)value);
+               else if (prevvalue == value)
+                   Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
+                                  (UV)value);
+           }
         }
-       range = 0;
+
+       range = 0; /* this range (if it was one) is done now */
     }
+
     if (need_class) {
+       ANYOF_FLAGS(ret) |= ANYOF_LARGE;
        if (SIZE_ONLY)
            RExC_size += ANYOF_CLASS_ADD_SKIP;
        else
            RExC_emit += ANYOF_CLASS_ADD_SKIP;
     }
+
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
     if (!SIZE_ONLY &&
-       (ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+        /* If the only flag is folding (plus possibly inversion). */
+       ((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
+       ) {
        for (value = 0; value < 256; ++value) {
            if (ANYOF_BITMAP_TEST(ret, value)) {
-               I32 cf = PL_fold[value];
-               ANYOF_BITMAP_SET(ret, cf);
+               IV fold = PL_fold[value];
+
+               if (fold != value)
+                   ANYOF_BITMAP_SET(ret, fold);
            }
        }
        ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
     }
+
     /* optimize inverted simple patterns (e.g. [^a-z]) */
-    if (!SIZE_ONLY && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+    if (!SIZE_ONLY && optimize_invert &&
+       /* If the only flag is inversion. */
+       (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
        for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
            ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
-       ANYOF_FLAGS(ret) = 0;
+       ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
     }
-    return ret;
-}
-
-STATIC regnode *
-S_regclassutf8(pTHX_ RExC_state_t *pRExC_state)
-{
-    dTHR;
-    register char *e;
-    register U32 value;
-    register U32 lastvalue = OOB_UTF8;
-    register I32 range = 0;
-    register regnode *ret;
-    STRLEN numlen;
-    I32 n;
-    SV *listsv;
-    U8 flags = 0;
-    I32 namedclass;
-    char *rangebegin;
 
-    if (*RExC_parse == '^') {  /* Complement of range. */
-       RExC_naughty++;
-       RExC_parse++;
-       if (!SIZE_ONLY)
-           flags |= ANYOF_INVERT;
-    }
     if (!SIZE_ONLY) {
-       if (FOLD)
-           flags |= ANYOF_FOLD;
-       if (LOC)
-           flags |= ANYOF_LOCALE;
-       listsv = newSVpvn("# comment\n",10);
-    }
-
-    if (!SIZE_ONLY && ckWARN(WARN_REGEXP))
-       checkposixcc(pRExC_state);
-
-    if (*RExC_parse == ']' || *RExC_parse == '-')
-       goto skipcond;          /* allow 1st char to be ] or - */
+       AV *av = newAV();
+       SV *rv;
 
-    while (RExC_parse < RExC_end && *RExC_parse != ']') {
-       skipcond:
-       namedclass = OOB_NAMEDCLASS;
-       if (!range)
-           rangebegin = RExC_parse;
-       value = utf8_to_uv((U8*)RExC_parse,
-                              RExC_end - RExC_parse,
-                              &numlen, 0);
-       RExC_parse += numlen;
-       if (value == '[')
-           namedclass = regpposixcc(pRExC_state, value);
-       else if (value == '\\') {
-           value = (U32)utf8_to_uv((U8*)RExC_parse,
-                                       RExC_end - RExC_parse,
-                                       &numlen, 0);
-           RExC_parse += numlen;
-           /* Some compilers cannot handle switching on 64-bit integer
-            * values, therefore value cannot be an UV.  Yes, this will
-            * be a problem later if we want switch on Unicode.  --jhi */
-           switch (value) {
-           case 'w':           namedclass = ANYOF_ALNUM;               break;
-           case 'W':           namedclass = ANYOF_NALNUM;              break;
-           case 's':           namedclass = ANYOF_SPACE;               break;
-           case 'S':           namedclass = ANYOF_NSPACE;              break;
-           case 'd':           namedclass = ANYOF_DIGIT;               break;
-           case 'D':           namedclass = ANYOF_NDIGIT;              break;
-           case 'p':
-           case 'P':
-               if (*RExC_parse == '{') {
-                   e = strchr(RExC_parse++, '}');
-                    if (!e)
-                        vFAIL("Missing right brace on \\p{}");
-                   n = e - RExC_parse;
-               }
-               else {
-                   e = RExC_parse;
-                   n = 1;
-               }
-               if (!SIZE_ONLY) {
-                   if (value == 'p')
-                       Perl_sv_catpvf(aTHX_ listsv,
-                                      "+utf8::%.*s\n", (int)n, RExC_parse);
-                   else
-                       Perl_sv_catpvf(aTHX_ listsv,
-                                      "!utf8::%.*s\n", (int)n, RExC_parse);
-               }
-               RExC_parse = e + 1;
-               lastvalue = OOB_UTF8;
-               continue;
-           case 'n':           value = '\n';           break;
-           case 'r':           value = '\r';           break;
-           case 't':           value = '\t';           break;
-           case 'f':           value = '\f';           break;
-           case 'b':           value = '\b';           break;
-#ifdef ASCIIish
-           case 'e':           value = '\033';         break;
-           case 'a':           value = '\007';         break;
-#else
-           case 'e':           value = '\047';         break;
-           case 'a':           value = '\057';         break;
-#endif
-           case 'x':
-               if (*RExC_parse == '{') {
-                   e = strchr(RExC_parse++, '}');
-                    if (!e) 
-                        vFAIL("Missing right brace on \\x{}");
-                   numlen = 1;         /* allow underscores */
-                   value = (UV)scan_hex(RExC_parse,
-                                    e - RExC_parse,
-                                    &numlen);
-                   RExC_parse = e + 1;
-               }
-               else {
-                   numlen = 0;         /* disallow underscores */
-                   value = (UV)scan_hex(RExC_parse, 2, &numlen);
-                   RExC_parse += numlen;
-               }
-               break;
-           case 'c':
-               value = UCHARAT(RExC_parse++);
-               value = toCTRL(value);
-               break;
-           case '0': case '1': case '2': case '3': case '4':
-           case '5': case '6': case '7': case '8': case '9':
-               numlen = 0;             /* disallow underscores */
-               value = (UV)scan_oct(--RExC_parse, 3, &numlen);
-               RExC_parse += numlen;
-               break;
-           default:
-               if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
-                   vWARN2(RExC_parse,
-                          "Unrecognized escape \\%c in character class passed through",
-                          (int)value);
-               break;
-           }
-       }
-       if (namedclass > OOB_NAMEDCLASS) {
-           if (range) { /* a-\d, a-[:digit:] */
-               if (!SIZE_ONLY) {
-                   if (ckWARN(WARN_REGEXP))
-                       vWARN4(RExC_parse,
-                              "False [] range \"%*.*s\"",
-                              RExC_parse - rangebegin,
-                              RExC_parse - rangebegin,
-                              rangebegin);
-                   Perl_sv_catpvf(aTHX_ listsv,
-                                  /* 0x002D is Unicode for '-' */
-                                  "%04"UVxf"\n002D\n", (UV)lastvalue);
-               }
-               range = 0;
-           }
-           if (!SIZE_ONLY) {
-               switch (namedclass) {
-               case ANYOF_ALNUM:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");    break;
-               case ANYOF_NALNUM:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");    break;
-               case ANYOF_ALNUMC:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");   break;
-               case ANYOF_NALNUMC:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");   break;
-               case ANYOF_ALPHA:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");   break;
-               case ANYOF_NALPHA:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");   break;
-               case ANYOF_ASCII:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");   break;
-               case ANYOF_NASCII:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");   break;
-               case ANYOF_CNTRL:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");   break;
-               case ANYOF_NCNTRL:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");   break;
-               case ANYOF_GRAPH:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");   break;
-               case ANYOF_NGRAPH:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");   break;
-               case ANYOF_DIGIT:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");   break;
-               case ANYOF_NDIGIT:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");   break;
-               case ANYOF_LOWER:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");   break;
-               case ANYOF_NLOWER:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");   break;
-               case ANYOF_PRINT:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");   break;
-               case ANYOF_NPRINT:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");   break;
-               case ANYOF_PUNCT:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");   break;
-               case ANYOF_NPUNCT:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");   break;
-               case ANYOF_SPACE:
-               case ANYOF_PSXSPC:
-               case ANYOF_BLANK:
-                   /* Not very true for PSXSPC and BLANK
-                    * but not feeling like creating IsPOSIXSpace and
-                    * IsBlank right now. --jhi */
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");   break;
-               case ANYOF_NSPACE:
-               case ANYOF_NPSXSPC:
-               case ANYOF_NBLANK:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");   break;
-               case ANYOF_UPPER:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");   break;
-               case ANYOF_NUPPER:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");   break;
-               case ANYOF_XDIGIT:
-                   Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");  break;
-               case ANYOF_NXDIGIT:
-                   Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");  break;
-               }
-               continue;
-           }
-       }
-        if (range) {
-           if (lastvalue > value) { /* b-a */
-               Simple_vFAIL4("Invalid [] range \"%*.*s\"",
-                             RExC_parse - rangebegin,
-                             RExC_parse - rangebegin,
-                             rangebegin);
-           }
-           range = 0;
-       }
-       else {
-           lastvalue = value;
-           if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
-               RExC_parse[1] != ']') {
-               RExC_parse++;
-               if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
-                   if (ckWARN(WARN_REGEXP))
-                       vWARN4(RExC_parse,
-                              "False [] range \"%*.*s\"",
-                              RExC_parse - rangebegin,
-                              RExC_parse - rangebegin,
-                              rangebegin);
-                   if (!SIZE_ONLY)
-                       Perl_sv_catpvf(aTHX_ listsv,
-                                      /* 0x002D is Unicode for '-' */
-                                      "002D\n");
-               } else
-                   range = 1;
-               continue;       /* do it next time */
-           }
-       }
-       /* now is the next time */
-       if (!SIZE_ONLY)
-           Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
-                          (UV)lastvalue, (UV)value);
-       range = 0;
-    }
-
-    ret = reganode(pRExC_state, ANYOFUTF8, 0);
-
-    if (!SIZE_ONLY) {
-       SV *rv = swash_init("utf8", "", listsv, 1, 0);
-       SvREFCNT_dec(listsv);
-       n = add_data(pRExC_state, 1,"s");
+       av_store(av, 0, listsv);
+       av_store(av, 1, NULL);
+       rv = newRV_noinc((SV*)av);
+       n = add_data(pRExC_state, 1, "s");
        RExC_rx->data->data[n] = (void*)rv;
-       ARG1_SET(ret, flags);
-       ARG2_SET(ret, n);
+       ARG_SET(ret, n);
     }
 
     return ret;
@@ -3950,7 +3963,6 @@ S_regclassutf8(pTHX_ RExC_state_t *pRExC_state)
 STATIC char*
 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 {
-    dTHR;
     char* retval = RExC_parse++;
 
     for (;;) {
@@ -3983,7 +3995,6 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 STATIC regnode *                       /* Location. */
 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 {
-    dTHR;
     register regnode *ret;
     register regnode *ptr;
 
@@ -3997,6 +4008,18 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
     NODE_ALIGN_FILL(ret);
     ptr = ret;
     FILL_ADVANCE_NODE(ptr, op);
+    if (RExC_offsets) {         /* MJD */
+      MJD_OFFSET_DEBUG((stderr, "%s:%u: (op %s) %s %u <- %u (len %u) (max %u).\n", 
+              "reg_node", __LINE__, 
+              reg_name[op],
+              RExC_emit - RExC_emit_start > RExC_offsets[0] 
+              ? "Overwriting end of array!\n" : "OK",
+              RExC_emit - RExC_emit_start,
+              RExC_parse - RExC_start,
+              RExC_offsets[0])); 
+      Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
+    }
+            
     RExC_emit = ptr;
 
     return(ret);
@@ -4008,7 +4031,6 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
 STATIC regnode *                       /* Location. */
 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 {
-    dTHR;
     register regnode *ret;
     register regnode *ptr;
 
@@ -4022,6 +4044,17 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
     NODE_ALIGN_FILL(ret);
     ptr = ret;
     FILL_ADVANCE_NODE_ARG(ptr, op, arg);
+    if (RExC_offsets) {         /* MJD */
+      MJD_OFFSET_DEBUG((stderr, "%s: %s %u <- %u (max %u).\n", 
+              "reganode",
+              RExC_emit - RExC_emit_start > RExC_offsets[0] ? 
+              "Overwriting end of array!\n" : "OK",
+              RExC_emit - RExC_emit_start,
+              RExC_parse - RExC_start,
+              RExC_offsets[0])); 
+      Set_Cur_Node_Offset;
+    }
+            
     RExC_emit = ptr;
 
     return(ret);
@@ -4033,14 +4066,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
 STATIC void
 S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 {
-    dTHR;
-    if (SIZE_ONLY) {
-       U8 tmpbuf[UTF8_MAXLEN];
-       *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
-    }
-    else
-       *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
-
+    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
 }
 
 /*
@@ -4051,12 +4077,11 @@ S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 STATIC void
 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 {
-    dTHR;
     register regnode *src;
     register regnode *dst;
     register regnode *place;
     register int offset = regarglen[(U8)op];
-    
+
 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
 
     if (SIZE_ONLY) {
@@ -4067,10 +4092,33 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
     src = RExC_emit;
     RExC_emit += NODE_STEP_REGNODE + offset;
     dst = RExC_emit;
-    while (src > opnd)
+    while (src > opnd) {
        StructCopy(--src, --dst, regnode);
+        if (RExC_offsets) {     /* MJD 20010112 */
+          MJD_OFFSET_DEBUG((stderr, "%s: %s copy %u -> %u (max %u).\n", 
+                  "reg_insert",
+                  dst - RExC_emit_start > RExC_offsets[0] 
+                  ? "Overwriting end of array!\n" : "OK",
+                  src - RExC_emit_start,
+                  dst - RExC_emit_start,
+                  RExC_offsets[0])); 
+          Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
+          Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
+        }
+    }
+    
 
     place = opnd;              /* Op node, where operand used to be. */
+    if (RExC_offsets) {         /* MJD */
+      MJD_OFFSET_DEBUG((stderr, "%s: %s %u <- %u (max %u).\n", 
+              "reginsert",
+              place - RExC_emit_start > RExC_offsets[0] 
+              ? "Overwriting end of array!\n" : "OK",
+              place - RExC_emit_start,
+              RExC_parse - RExC_start,
+              RExC_offsets[0])); 
+      Set_Node_Offset(place, RExC_parse);
+    }
     src = NEXTOPER(place);
     FILL_ADVANCE_NODE(place, op);
     Zero(src, offset, regnode);
@@ -4082,7 +4130,6 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
 STATIC void
 S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 {
-    dTHR;
     register regnode *scan;
     register regnode *temp;
 
@@ -4112,7 +4159,6 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 STATIC void
 S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
 {
-    dTHR;
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
     if (p == NULL || SIZE_ONLY)
        return;
@@ -4148,10 +4194,11 @@ S_regcurly(pTHX_ register char *s)
 }
 
 
+#ifdef DEBUGGING
+
 STATIC regnode *
 S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
 {
-#ifdef DEBUGGING
     register U8 op = EXACT;    /* Arbitrary non-END op. */
     register regnode *next;
 
@@ -4171,13 +4218,13 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
                      (int)(2*l + 1), "", SvPVX(sv));
        if (next == NULL)               /* Next ptr. */
            PerlIO_printf(Perl_debug_log, "(0)");
-       else 
+       else
            PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
        (void)PerlIO_putc(Perl_debug_log, '\n');
       after_print:
        if (PL_regkind[(U8)op] == BRANCHJ) {
-           register regnode *nnode = (OP(next) == LONGJMP 
-                                      ? regnext(next) 
+           register regnode *nnode = (OP(next) == LONGJMP
+                                      ? regnext(next)
                                       : next);
            if (last && nnode > last)
                nnode = last;
@@ -4198,8 +4245,10 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
            node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
        }
        else if (op == ANYOF) {
+           /* arglen 1 + class block */
+           node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
+                   ? ANYOF_CLASS_SKIP : ANYOF_SKIP);
            node = NEXTOPER(node);
-           node += ANYOF_SKIP;
        }
        else if (PL_regkind[(U8)op] == EXACT) {
             /* Literal string, where present. */
@@ -4215,10 +4264,11 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
        else if (op == WHILEM)
            l--;
     }
-#endif /* DEBUGGING */
     return node;
 }
 
+#endif /* DEBUGGING */
+
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
@@ -4226,7 +4276,6 @@ void
 Perl_regdump(pTHX_ regexp *r)
 {
 #ifdef DEBUGGING
-    dTHR;
     SV *sv = sv_newmortal();
 
     (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
@@ -4234,25 +4283,25 @@ Perl_regdump(pTHX_ regexp *r)
     /* Header fields of interest. */
     if (r->anchored_substr)
        PerlIO_printf(Perl_debug_log,
-                     "anchored `%s%.*s%s'%s at %"IVdf" ", 
+                     "anchored `%s%.*s%s'%s at %"IVdf" ",
                      PL_colors[0],
                      (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
-                     SvPVX(r->anchored_substr), 
+                     SvPVX(r->anchored_substr),
                      PL_colors[1],
                      SvTAIL(r->anchored_substr) ? "$" : "",
                      (IV)r->anchored_offset);
     if (r->float_substr)
        PerlIO_printf(Perl_debug_log,
-                     "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ", 
+                     "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
                      PL_colors[0],
-                     (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)), 
+                     (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
                      SvPVX(r->float_substr),
                      PL_colors[1],
                      SvTAIL(r->float_substr) ? "$" : "",
                      (IV)r->float_min_offset, (UV)r->float_max_offset);
     if (r->check_substr)
-       PerlIO_printf(Perl_debug_log, 
-                     r->check_substr == r->float_substr 
+       PerlIO_printf(Perl_debug_log,
+                     r->check_substr == r->float_substr
                      ? "(checking floating" : "(checking anchored");
     if (r->reganch & ROPT_NOSCAN)
        PerlIO_printf(Perl_debug_log, " noscan");
@@ -4287,13 +4336,25 @@ Perl_regdump(pTHX_ regexp *r)
     if (r->reganch & ROPT_EVAL_SEEN)
        PerlIO_printf(Perl_debug_log, "with eval ");
     PerlIO_printf(Perl_debug_log, "\n");
+    if (r->offsets) {
+      U32 i;
+      U32 len = r->offsets[0];
+      PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]);
+      for (i = 1; i <= len; i++)
+        PerlIO_printf(Perl_debug_log, "%"UVuf"[%"UVuf"] ", 
+                      (UV)r->offsets[i*2-1], 
+                      (UV)r->offsets[i*2]);
+      PerlIO_printf(Perl_debug_log, "\n");
+    }
 #endif /* DEBUGGING */
 }
 
+#ifdef DEBUGGING
+
 STATIC void
 S_put_byte(pTHX_ SV *sv, int c)
 {
-    if (c <= ' ' || c == 127 || c == 255)
+    if (isCNTRL(c) || c == 255 || !isPRINT(c))
        Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
     else if (c == '-' || c == ']' || c == '\\' || c == '^')
        Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
@@ -4301,6 +4362,8 @@ S_put_byte(pTHX_ SV *sv, int c)
        Perl_sv_catpvf(aTHX_ sv, "%c", c);
 }
 
+#endif /* DEBUGGING */
+
 /*
 - regprop - printable representation of opcode
 */
@@ -4308,7 +4371,6 @@ void
 Perl_regprop(pTHX_ SV *sv, regnode *o)
 {
 #ifdef DEBUGGING
-    dTHR;
     register int k;
 
     sv_setpvn(sv, "", 0);
@@ -4336,8 +4398,9 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
        Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
     else if (k == ANYOF) {
        int i, rangestart = -1;
-       const char * const out[] = {    /* Should be syncronized with
-                                          ANYOF_ #xdefines in regcomp.h */
+       U8 flags = ANYOF_FLAGS(o);
+       const char * const anyofs[] = { /* Should be syncronized with
+                                        * ANYOF_ #xdefines in regcomp.h */
            "\\w",
            "\\W",
            "\\s",
@@ -4370,38 +4433,102 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
            "[:^blank:]"
        };
 
-       if (o->flags & ANYOF_LOCALE)
+       if (flags & ANYOF_LOCALE)
            sv_catpv(sv, "{loc}");
-       if (o->flags & ANYOF_FOLD)
+       if (flags & ANYOF_FOLD)
            sv_catpv(sv, "{i}");
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-       if (o->flags & ANYOF_INVERT)
+       if (flags & ANYOF_INVERT)
            sv_catpv(sv, "^");
-       if (OP(o) == ANYOF) {
-           for (i = 0; i <= 256; i++) {
-               if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
-                   if (rangestart == -1)
-                       rangestart = i;
-               } else if (rangestart != -1) {
-                   if (i <= rangestart + 3)
-                       for (; rangestart < i; rangestart++)
-                           put_byte(sv, rangestart);
-                   else {
+       for (i = 0; i <= 256; i++) {
+           if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
+               if (rangestart == -1)
+                   rangestart = i;
+           } else if (rangestart != -1) {
+               if (i <= rangestart + 3)
+                   for (; rangestart < i; rangestart++)
                        put_byte(sv, rangestart);
-                       sv_catpv(sv, "-");
-                       put_byte(sv, i - 1);
-                   }
-                   rangestart = -1;
+               else {
+                   put_byte(sv, rangestart);
+                   sv_catpv(sv, "-");
+                   put_byte(sv, i - 1);
                }
+               rangestart = -1;
            }
-           if (o->flags & ANYOF_CLASS)
-               for (i = 0; i < sizeof(out)/sizeof(char*); i++)
-                   if (ANYOF_CLASS_TEST(o,i))
-                       sv_catpv(sv, out[i]);
        }
-       else {
-           sv_catpv(sv, "{ANYOFUTF8}"); /* TODO: full decode */
+
+       if (o->flags & ANYOF_CLASS)
+           for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
+               if (ANYOF_CLASS_TEST(o,i))
+                   sv_catpv(sv, anyofs[i]);
+
+       if (flags & ANYOF_UNICODE)
+           sv_catpv(sv, "{unicode}");
+       else if (flags & ANYOF_UNICODE_ALL)
+           sv_catpv(sv, "{all-unicode}");
+
+       {
+           SV *lv;
+           SV *sw = regclass_swash(o, FALSE, &lv);
+       
+           if (lv) {
+               if (sw) {
+                   UV i;
+                   U8 s[UTF8_MAXLEN+1];
+               
+                   for (i = 0; i <= 256; i++) { /* just the first 256 */
+                       U8 *e = uvchr_to_utf8(s, i);
+                       
+                       if (i < 256 && swash_fetch(sw, s, TRUE)) {
+                           if (rangestart == -1)
+                               rangestart = i;
+                       } else if (rangestart != -1) {
+                           U8 *p;
+                       
+                           if (i <= rangestart + 3)
+                               for (; rangestart < i; rangestart++) {
+                                   for(e = uvchr_to_utf8(s, rangestart), p = s; p < e; p++)
+                                       put_byte(sv, *p);
+                               }
+                           else {
+                               for (e = uvchr_to_utf8(s, rangestart), p = s; p < e; p++)
+                                   put_byte(sv, *p);
+                               sv_catpv(sv, "-");
+                                   for (e = uvchr_to_utf8(s, i - 1), p = s; p < e; p++)
+                                       put_byte(sv, *p);
+                               }
+                               rangestart = -1;
+                           }
+                       }
+                       
+                   sv_catpv(sv, "..."); /* et cetera */
+               }
+
+               {
+                   char *s = savepv(SvPVX(lv));
+                   char *origs = s;
+               
+                   while(*s && *s != '\n') s++;
+               
+                   if (*s == '\n') {
+                       char *t = ++s;
+                       
+                       while (*s) {
+                           if (*s == '\n')
+                               *s = ' ';
+                           s++;
+                       }
+                       if (s[-1] == ' ')
+                           s[-1] = 0;
+                       
+                       sv_catpv(sv, t);
+                   }
+               
+                   Safefree(origs);
+               }
+           }
        }
+
        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
     }
     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
@@ -4431,7 +4558,6 @@ Perl_re_intuit_string(pTHX_ regexp *prog)
 void
 Perl_pregfree(pTHX_ struct regexp *r)
 {
-    dTHR;
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
     if (!r || (--r->refcnt > 0))
@@ -4445,6 +4571,8 @@ Perl_pregfree(pTHX_ struct regexp *r)
 
     if (r->precomp)
        Safefree(r->precomp);
+    if (r->offsets)             /* 20010421 MJD */
+       Safefree(r->offsets);
     if (RX_MATCH_COPIED(r))
        Safefree(r->subbeg);
     if (r->substrs) {
@@ -4461,6 +4589,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
        SV** old_curpad;
 
        while (--n >= 0) {
+          /* If you add a ->what type here, update the comment in regcomp.h */
            switch (r->data->what[n]) {
            case 's':
                SvREFCNT_dec((SV*)r->data->data[n]);
@@ -4483,7 +4612,11 @@ Perl_pregfree(pTHX_ struct regexp *r)
                }
                else
                    PL_curpad = NULL;
-               op_free((OP_4tree*)r->data->data[n]);
+
+               if (!OpREFCNT_dec((OP_4tree*)r->data->data[n])) {
+                    op_free((OP_4tree*)r->data->data[n]);
+               }
+
                PL_comppad = old_comppad;
                PL_curpad = old_curpad;
                SvREFCNT_dec((SV*)new_comppad);
@@ -4512,7 +4645,6 @@ Perl_pregfree(pTHX_ struct regexp *r)
 regnode *
 Perl_regnext(pTHX_ register regnode *p)
 {
-    dTHR;
     register I32 offset;
 
     if (p == &PL_regdummy)
@@ -4563,9 +4695,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
 
 void
 Perl_save_re_context(pTHX)
-{                   
-    dTHR;
-
+{
 #if 0
     SAVEPPTR(RExC_precomp);            /* uncompiled string. */
     SAVEI32(RExC_npar);                /* () count. */
@@ -4589,10 +4719,8 @@ Perl_save_re_context(pTHX)
     SAVEVPTR(PL_regendp);              /* Ditto for endp. */
     SAVEVPTR(PL_reglastparen);         /* Similarly for lastparen. */
     SAVEPPTR(PL_regtill);              /* How far we are required to go. */
-    SAVEI8(PL_regprev);                        /* char before regbol, \n if none */
-    SAVEVPTR(PL_reg_start_tmp);                /* from regexec.c */
+    SAVEGENERICPV(PL_reg_start_tmp);           /* from regexec.c */
     PL_reg_start_tmp = 0;
-    SAVEFREEPV(PL_reg_start_tmp);
     SAVEI32(PL_reg_start_tmpl);                /* from regexec.c */
     PL_reg_start_tmpl = 0;
     SAVEVPTR(PL_regdata);
@@ -4610,8 +4738,10 @@ Perl_save_re_context(pTHX)
     SAVEI32(PL_reg_oldpos);                    /* from regexec.c */
     SAVEVPTR(PL_reg_oldcurpm);         /* from regexec.c */
     SAVEVPTR(PL_reg_curpm);            /* from regexec.c */
+    SAVEI32(PL_regnpar);               /* () count. */
+    SAVEI32(PL_regsize);               /* from regexec.c */
 #ifdef DEBUGGING
-    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */    
+    SAVEPPTR(PL_reg_starttry);         /* from regexec.c */
 #endif
 }