regcomp.c: Move table to wider scope

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 89147d3..bba966b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -220,7 +220,7 @@ typedef struct RExC_state_t {
  #define        WORST           0       /* Worst case. */
  #define        HASWIDTH        0x01    /* Known to match non-null strings. */
  
-/* Simple enough to be STAR/PLUS operand, in an EXACT node must be a single
+/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
   * character, and if utf8, must be invariant.  Note that this is not the same
   * thing as REGNODE_SIMPLE */
  #define        SIMPLE          0x02
@@ -398,14 +398,18 @@ static const scan_data_t zero_scan_data =
  #define UNI_SEMANTICS (get_regex_charset(RExC_flags) == REGEX_UNICODE_CHARSET)
  #define AT_LEAST_UNI_SEMANTICS (get_regex_charset(RExC_flags) >= REGEX_UNICODE_CHARSET)
  #define ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_RESTRICTED_CHARSET)
-#define MORE_ASCII_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
  #define AT_LEAST_ASCII_RESTRICTED (get_regex_charset(RExC_flags) >= REGEX_ASCII_RESTRICTED_CHARSET)
+#define ASCII_FOLD_RESTRICTED (get_regex_charset(RExC_flags) == REGEX_ASCII_MORE_RESTRICTED_CHARSET)
  
  #define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
  
-#define OOB_UNICODE            12345678
  #define OOB_NAMEDCLASS         -1
  
+/* There is no code point that is out-of-bounds, so this is problematic.  But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE            0xDEADBEEF
+
  #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
  #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
  
@@ -2630,9 +2634,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
   *      problematic sequences.  This delta is used by the caller to adjust the
   *      min length of the match, and the delta between min and max, so that the
   *      optimizer doesn't reject these possibilities based on size constraints.
- * 2)   These sequences are not currently correctly handled by the trie code
- *      either, so it changes the joined node type to ops that are not handled
- *      by trie's, those new ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
+ * 2)   These sequences require special handling by the trie code, so it
+ *      changes the joined node type to ops for the trie's benefit, those new
+ *      ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
   * 3)   This is sufficient for the two Greek sequences (described below), but
   *      the one involving the Sharp s (\xDF) needs more.  The node type
   *      EXACTFU_SS is used for an EXACTFU node that contains at least one "ss"
@@ -5307,9 +5311,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist);
         PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist);
  
-       PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
-       PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
-
         PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
         PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
  
@@ -7433,7 +7434,6 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
      return;
  }
  
-
  void
  Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
  {
@@ -8095,6 +8095,36 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
      return TRUE;
  }
  
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
+{
+    /* Returns the highest code point that matches an inversion list.  This API
+     * has an ambiguity, as it returns 0 under either the highest is actually
+     * 0, or if the list is empty.  If this distinction matters to you, check
+     * for emptiness before calling this function */
+
+    UV len = invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+    if (len == 0) {
+       return 0;
+    }
+
+    array = invlist_array(invlist);
+
+    /* The last element in the array in the inversion list always starts a
+     * range that goes to infinity.  That range may be for code points that are
+     * matched in the inversion list, or it may be for ones that aren't
+     * matched.  In the latter case, the highest code point in the set is one
+     * less than the beginning of this range; otherwise it is the final element
+     * of this range: infinity */
+    return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+           ? UV_MAX
+           : array[len - 1] - 1;
+}
+
  #ifndef PERL_IN_XSUB_RE
  SV *
  Perl__invlist_contents(pTHX_ SV* const invlist)
@@ -8357,7 +8387,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     ret = reganode(pRExC_state,
                                    ((! FOLD)
                                      ? NREF
-                                    : (MORE_ASCII_RESTRICTED)
+                                    : (ASCII_FOLD_RESTRICTED)
                                        ? NREFFA
                                         : (AT_LEAST_UNI_SEMANTICS)
                                           ? NREFFU
@@ -9705,6 +9735,81 @@ S_reg_recode(pTHX_ const char value, SV **encp)
      return uv;
  }
  
+PERL_STATIC_INLINE U8
+S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
+{
+    U8 op;
+
+    PERL_ARGS_ASSERT_COMPUTE_EXACTISH;
+
+    if (! FOLD) {
+        return EXACT;
+    }
+
+    op = get_regex_charset(RExC_flags);
+    if (op >= REGEX_ASCII_RESTRICTED_CHARSET) {
+        op--; /* /a is same as /u, and map /aa's offset to what /a's would have
+                 been, so there is no hole */
+    }
+
+    return op + EXACTF;
+}
+
+PERL_STATIC_INLINE void
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+{
+    /* This knows the details about sizing an EXACTish node, and potentially
+     * populating it with a single character.  If <len> is non-zero, it assumes
+     * that the node has already been populated, and just does the sizing,
+     * ignoring <code_point>.  Otherwise it looks at <code_point> and
+     * calculates what <len> should be.  In pass 1, it sizes the node
+     * appropriately.  In pass 2, it additionally will populate the node's
+     * STRING with <code_point>, if <len> is 0.
+     *
+     * It knows that under FOLD, UTF characters and the Latin Sharp S must be
+     * folded (the latter only when the rules indicate it can match 'ss') */
+
+    bool len_passed_in = cBOOL(len != 0);
+    U8 character[UTF8_MAXBYTES_CASE+1];
+
+    PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT;
+
+    if (! len_passed_in) {
+        if (UTF) {
+            if (FOLD) {
+                to_uni_fold(NATIVE_TO_UNI(code_point), character, &len);
+            }
+            else {
+                uvchr_to_utf8( character, code_point);
+                len = UTF8SKIP(character);
+            }
+        }
+        else if (! FOLD
+                 || code_point != LATIN_SMALL_LETTER_SHARP_S
+                 || ASCII_FOLD_RESTRICTED
+                 || ! AT_LEAST_UNI_SEMANTICS)
+        {
+            *character = (U8) code_point;
+            len = 1;
+        }
+        else {
+            *character = 's';
+            *(character + 1) = 's';
+            len = 2;
+        }
+    }
+
+    if (SIZE_ONLY) {
+        RExC_size += STR_SZ(len);
+    }
+    else {
+        RExC_emit += STR_SZ(len);
+        STR_LEN(node) = len;
+        if (! len_passed_in) {
+            Copy((char *) character, STRING(node), len, char);
+        }
+    }
+}
  
  /*
   - regatom - the lowest level
@@ -10082,7 +10187,7 @@ tryagain:
                  ret = reganode(pRExC_state,
                                 ((! FOLD)
                                   ? NREF
-                                : (MORE_ASCII_RESTRICTED)
+                                : (ASCII_FOLD_RESTRICTED)
                                    ? NREFFA
                                     : (AT_LEAST_UNI_SEMANTICS)
                                       ? NREFFU
@@ -10153,7 +10258,7 @@ tryagain:
                     ret = reganode(pRExC_state,
                                    ((! FOLD)
                                      ? REF
-                                    : (MORE_ASCII_RESTRICTED)
+                                    : (ASCII_FOLD_RESTRICTED)
                                        ? REFFA
                                         : (AT_LEAST_UNI_SEMANTICS)
                                           ? REFFU
@@ -10210,18 +10315,7 @@ tryagain:
             bool is_exactfu_sharp_s;
  
             ender = 0;
-            if (! FOLD) {
-                node_type = EXACT;
-            }
-            else {
-                node_type = get_regex_charset(RExC_flags);
-                if (node_type >= REGEX_ASCII_RESTRICTED_CHARSET) {
-                    node_type--; /* /a is same as /u, and map /aa's offset to
-                                    what /a's would have been, so there is no
-                                    hole */
-                }
-                node_type += EXACTF;
-            }
+            node_type = compute_EXACTish(pRExC_state);
             ret = reg_node(pRExC_state, node_type);
             s = STRING(ret);
  
@@ -10472,7 +10566,7 @@ tryagain:
                         *tmpbuf = (U8) ender;
                         foldlen = 1;
                     }
-                   else if (! MORE_ASCII_RESTRICTED && ! LOC) {
+                   else if (! ASCII_FOLD_RESTRICTED && ! LOC) {
  
                         /* Locale and /aa require more selectivity about the
                          * fold, so are handled below.  Otherwise, here, just
@@ -10604,12 +10698,7 @@ tryagain:
             if (len == 1 && UNI_IS_INVARIANT(ender))
                 *flagp |= SIMPLE;
  
-           if (SIZE_ONLY)
-               RExC_size += STR_SZ(len);
-           else {
-               STR_LEN(ret) = len;
-               RExC_emit += STR_SZ(len);
-            }
+            alloc_maybe_populate_EXACT(pRExC_state, ret, len, 0);
         }
         break;
      }
@@ -10689,7 +10778,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
                     switch (skip) {
                     case 4:
                         if (memEQ(posixcc, "word", 4)) /* this is not POSIX, this is the Perl \w */
-                           namedclass = complement ? ANYOF_NALNUM : ANYOF_ALNUM;
+                           namedclass = ANYOF_ALNUM;
                         break;
                     case 5:
                         /* Names all of length 5.  */
@@ -10699,57 +10788,63 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
                         switch (posixcc[4]) {
                         case 'a':
                             if (memEQ(posixcc, "alph", 4)) /* alpha */
-                               namedclass = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
+                               namedclass = ANYOF_ALPHA;
                             break;
                         case 'e':
                             if (memEQ(posixcc, "spac", 4)) /* space */
-                               namedclass = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
+                               namedclass = ANYOF_PSXSPC;
                             break;
                         case 'h':
                             if (memEQ(posixcc, "grap", 4)) /* graph */
-                               namedclass = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
+                               namedclass = ANYOF_GRAPH;
                             break;
                         case 'i':
                             if (memEQ(posixcc, "asci", 4)) /* ascii */
-                               namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII;
+                               namedclass = ANYOF_ASCII;
                             break;
                         case 'k':
                             if (memEQ(posixcc, "blan", 4)) /* blank */
-                               namedclass = complement ? ANYOF_NBLANK : ANYOF_BLANK;
+                               namedclass = ANYOF_BLANK;
                             break;
                         case 'l':
                             if (memEQ(posixcc, "cntr", 4)) /* cntrl */
-                               namedclass = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
+                               namedclass = ANYOF_CNTRL;
                             break;
                         case 'm':
                             if (memEQ(posixcc, "alnu", 4)) /* alnum */
-                               namedclass = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
+                               namedclass = ANYOF_ALNUMC;
                             break;
                         case 'r':
                             if (memEQ(posixcc, "lowe", 4)) /* lower */
-                               namedclass = complement ? ANYOF_NLOWER : ANYOF_LOWER;
+                               namedclass = ANYOF_LOWER;
                             else if (memEQ(posixcc, "uppe", 4)) /* upper */
-                               namedclass = complement ? ANYOF_NUPPER : ANYOF_UPPER;
+                               namedclass = ANYOF_UPPER;
                             break;
                         case 't':
                             if (memEQ(posixcc, "digi", 4)) /* digit */
-                               namedclass = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
+                               namedclass = ANYOF_DIGIT;
                             else if (memEQ(posixcc, "prin", 4)) /* print */
-                               namedclass = complement ? ANYOF_NPRINT : ANYOF_PRINT;
+                               namedclass = ANYOF_PRINT;
                             else if (memEQ(posixcc, "punc", 4)) /* punct */
-                               namedclass = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
+                               namedclass = ANYOF_PUNCT;
                             break;
                         }
                         break;
                     case 6:
                         if (memEQ(posixcc, "xdigit", 6))
-                           namedclass = complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
+                           namedclass = ANYOF_XDIGIT;
                         break;
                     }
  
                     if (namedclass == OOB_NAMEDCLASS)
                         Simple_vFAIL3("POSIX class [:%.*s:] unknown",
                                       t - s - 1, s + 1);
+
+                    /* The #defines are structured so each complement is +1 to
+                     * the normal one */
+                    if (complement) {
+                        namedclass++;
+                    }
                     assert (posixcc[skip] == ':');
                     assert (posixcc[skip+1] == ']');
                 } else if (!SIZE_ONLY) {
@@ -10880,7 +10975,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
   *                determined at run-time
   * run_time_list  is a SV* that contains text names of properties that are to
   *                be computed at run time.  This concatenates <Xpropertyname>
- *                to it, apppropriately
+ *                to it, appropriately
   * This is essentially DO_POSIX, but we know only the Latin1 values at compile
   * time */
  #define DO_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,      \
@@ -10906,16 +11001,18 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
      }
  
  /* Like DO_POSIX_LATIN1_ONLY_KNOWN, but for the complement.  A combination of
- * this and DO_N_POSIX */
+ * this and DO_N_POSIX.  Sets <matches_above_unicode> only if it can; unchanged
+ * otherwise */
  #define DO_N_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist,    \
-                              l1_sourcelist, Xpropertyname, run_time_list) \
+       l1_sourcelist, Xpropertyname, run_time_list, matches_above_unicode) \
      if (AT_LEAST_ASCII_RESTRICTED) {                                       \
          _invlist_union_complement_2nd(destlist, sourcelist, &destlist);    \
      }                                                                      \
      else {                                                                 \
          Perl_sv_catpvf(aTHX_ run_time_list, "!utf8::%s\n", Xpropertyname); \
+        matches_above_unicode = TRUE;                                      \
         if (LOC) {                                                         \
-           ANYOF_CLASS_SET(node, namedclass);                             \
+            ANYOF_CLASS_SET(node, namedclass);                            \
         }                                                                  \
         else {                                                             \
              SV* scratch_list = NULL;                                       \
@@ -10951,6 +11048,11 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
      return;
  }
  
+/* The names of properties whose definitions are not known at compile time are
+ * stored in this SV, after a constant heading.  So if the length has been
+ * changed since initialization, then there is a run-time definition. */
+#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
+
  /*
     parse a class specification and produce either an ANYOF node that
     matches the pattern or perhaps will be optimized into an EXACTish node
@@ -10963,7 +11065,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
  {
      dVAR;
      register UV nextvalue;
-    register IV prevvalue = OOB_UNICODE;
+    register UV prevvalue = OOB_UNICODE;
      register IV range = 0;
      UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
      register regnode *ret;
@@ -10982,13 +11084,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
                                Optimizations may be possible if this is tiny */
      UV n;
  
-    /* Certain named classes have equivalents that can appear outside a
-     * character class, e.g. \w.  These flags are set for these classes.  The
-     * first flag indicates the op depends on the character set modifier, like
-     * /d, /u....  The second is for those that don't have this dependency. */
-    bool has_special_charset_op = FALSE;
-    bool has_special_non_charset_op = FALSE;
-
      /* Unicode properties are stored in a swash; this holds the current one
       * being parsed.  If this swash is the only above-latin1 component of the
       * character class, an optimization is to pass it directly on to the
@@ -11020,9 +11115,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      UV stored = 0;  /* how many chars stored in the bitmap */
      bool invert = FALSE;    /* Is this class to be complemented */
  
+    /* Is there any thing like \W or [:^digit:] that matches above the legal
+     * Unicode range? */
+    bool runtime_posix_matches_above_Unicode = FALSE;
+
      regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
          case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
+    const I32 orig_size = RExC_size;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGCLASS;
@@ -11065,7 +11165,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
         if (LOC) {
             ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
         }
-       ANYOF_BITMAP_ZERO(ret);
         listsv = newSVpvs("# comment\n");
         initial_listsv_len = SvCUR(listsv);
      }
@@ -11168,6 +11267,7 @@ parseit:
                      SV** invlistsvp;
                      SV* invlist;
                      char* name;
+
                     if (UCHARAT(RExC_parse) == '^') {
                          RExC_parse++;
                          n--;
@@ -11226,7 +11326,11 @@ parseit:
  
                          /* We don't know yet, so have to assume that the
                           * property could match something in the Latin1 range,
-                         * hence something that isn't utf8 */
+                         * hence something that isn't utf8.  Note that this
+                         * would cause things in <depends_list> to match
+                         * inappropriately, except that any \p{}, including
+                         * this one forces Unicode semantics, which means there
+                         * is <no depends_list> */
                          ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
                      }
                      else {
@@ -11389,41 +11493,7 @@ parseit:
                  element_count += 2; /* So counts for three values */
             }
  
-           if (SIZE_ONLY) {
-
-                /* In the first pass, do a little extra work so below can
-                 * possibly optimize the whole node to one of the nodes that
-                 * correspond to the classes given below */
-
-                /* The optimization will only take place if there is a single
-                 * element in the class, so can skip if there is more than one
-                 */
-                if (element_count == 1) {
-
-               /* Possible truncation here but in some 64-bit environments
-                * the compiler gets heartburn about switch on 64-bit values.
-                * A similar issue a little earlier when switching on value.
-                * --jhi */
-                    switch ((I32)namedclass) {
-                        case ANYOF_ALNUM:
-                        case ANYOF_NALNUM:
-                        case ANYOF_DIGIT:
-                        case ANYOF_NDIGIT:
-                        case ANYOF_SPACE:
-                        case ANYOF_NSPACE:
-                            has_special_charset_op = TRUE;
-                            break;
-
-                        case ANYOF_HORIZWS:
-                        case ANYOF_NHORIZWS:
-                        case ANYOF_VERTWS:
-                        case ANYOF_NVERTWS:
-                            has_special_non_charset_op = TRUE;
-                            break;
-                    }
-                }
-            }
-            else {
+           if (! SIZE_ONLY) {
                 switch ((I32)namedclass) {
  
                 case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
@@ -11432,7 +11502,8 @@ parseit:
                     break;
                 case ANYOF_NALNUMC:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
+                        PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_ALPHA:
                     DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
@@ -11440,7 +11511,8 @@ parseit:
                     break;
                 case ANYOF_NALPHA:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
+                        PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_ASCII:
                     if (LOC) {
@@ -11484,12 +11556,11 @@ parseit:
                      * them */
                     DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
                          PL_PosixDigit, "XPosixDigit", listsv);
-                    has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NDIGIT:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
-                    has_special_charset_op = TRUE;
+                        PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_GRAPH:
                     DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
@@ -11497,7 +11568,8 @@ parseit:
                     break;
                 case ANYOF_NGRAPH:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+                        PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_HORIZWS:
                     /* For these, we use the cp_list, as /d doesn't make a
@@ -11506,12 +11578,10 @@ parseit:
                      * cp_list is subject to folding.  It turns out that \h
                      * is just a synonym for XPosixBlank */
                     _invlist_union(cp_list, PL_XPosixBlank, &cp_list);
-                    has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_NHORIZWS:
                      _invlist_union_complement_2nd(cp_list,
                                                   PL_XPosixBlank, &cp_list);
-                    has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_LOWER:
                 case ANYOF_NLOWER:
@@ -11539,7 +11609,8 @@ parseit:
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                            posixes, ascii_source, l1_source, Xname, listsv);
+                            posixes, ascii_source, l1_source, Xname, listsv,
+                            runtime_posix_matches_above_Unicode);
                     }
                     break;
                 }
@@ -11549,7 +11620,8 @@ parseit:
                     break;
                 case ANYOF_NPRINT:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
+                        PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_PUNCT:
                     DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
@@ -11557,7 +11629,8 @@ parseit:
                     break;
                 case ANYOF_NPUNCT:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
+                        PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv,
+                        runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_PSXSPC:
                      DO_POSIX(ret, namedclass, posixes,
@@ -11570,12 +11643,10 @@ parseit:
                 case ANYOF_SPACE:
                      DO_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
-                    has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NSPACE:
                      DO_N_POSIX(ret, namedclass, posixes,
                                              PL_PerlSpace, PL_XPerlSpace);
-                    has_special_charset_op = TRUE;
                     break;
                 case ANYOF_UPPER:   /* Same as LOWER, above */
                 case ANYOF_NUPPER:
@@ -11600,19 +11671,19 @@ parseit:
                     }
                     else {
                         DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
-                        posixes, ascii_source, l1_source, Xname, listsv);
+                        posixes, ascii_source, l1_source, Xname, listsv,
+                        runtime_posix_matches_above_Unicode);
                     }
                     break;
                 }
                 case ANYOF_ALNUM:   /* Really is 'Word' */
                     DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
                              PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
-                    has_special_charset_op = TRUE;
                     break;
                 case ANYOF_NALNUM:
                     DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
-                    has_special_charset_op = TRUE;
+                            PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
+                            runtime_posix_matches_above_Unicode);
                     break;
                 case ANYOF_VERTWS:
                     /* For these, we use the cp_list, as /d doesn't make a
@@ -11620,12 +11691,10 @@ parseit:
                      * if these characters had folds other than themselves, as
                      * cp_list is subject to folding */
                     _invlist_union(cp_list, PL_VertSpace, &cp_list);
-                    has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_NVERTWS:
                      _invlist_union_complement_2nd(cp_list,
                                                      PL_VertSpace, &cp_list);
-                    has_special_non_charset_op = TRUE;
                     break;
                 case ANYOF_XDIGIT:
                      DO_POSIX(ret, namedclass, posixes,
@@ -11643,19 +11712,19 @@ parseit:
                     break;
                 }
  
-               continue;
+               continue;   /* Go get next character */
             }
         } /* end of namedclass \blah */
  
         if (range) {
-           if (prevvalue > (IV)value) /* b-a */ {
+           if (prevvalue > value) /* b-a */ {
                 const int w = RExC_parse - rangebegin;
                 Simple_vFAIL4("Invalid [] range \"%*.*s\"", w, w, rangebegin);
                 range = 0; /* not a valid range */
             }
         }
         else {
-           prevvalue = value; /* save the beginning of the range */
+            prevvalue = value; /* save the beginning of the potential range */
             if (RExC_parse+1 < RExC_end
                 && *RExC_parse == '-'
                 && RExC_parse[1] != ']')
@@ -11680,13 +11749,16 @@ parseit:
             }
         }
  
+        /* Here, <prevvalue> is the beginning of the range, if any; or <value>
+         * if not */
+
         /* non-Latin1 code point implies unicode semantics.  Must be set in
          * pass1 so is there for the whole of pass 2 */
         if (value > 255) {
             RExC_uni_semantics = 1;
         }
  
-       /* now is the next time */
+        /* Ready to process either the single value, or the completed range */
         if (!SIZE_ONLY) {
  #ifndef EBCDIC
              cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
@@ -11715,100 +11787,151 @@ parseit:
          }
  
         range = 0; /* this range (if it was one) is done now */
-    }
+    } /* End of loop through all the text within the brackets */
  
-    /* [\w] can be optimized into \w, but not if there is anything else in the
-     * brackets (except for an initial '^' which indictes omplementing).  We
-     * also can optimize the common special case /[0-9]/ into /\d/a */
-    if (element_count == 1 &&
-        (has_special_charset_op
-         || has_special_non_charset_op
-         || (prevvalue == '0' && value == '9')))
-    {
-        U8 op;
-        const char * cur_parse = RExC_parse;
+    /* If the character class contains only a single element, it may be
+     * optimizable into another node type which is smaller and runs faster.
+     * Check if this is the case for this class */
+    if (element_count == 1) {
+        U8 op = END;
  
-        if (has_special_charset_op) {
-            U8 offset = get_regex_charset(RExC_flags);
+        if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
+                                              [:digit:] or \p{foo} */
  
-            /* /aa is the same as /a for these */
-            if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
-                offset = REGEX_ASCII_RESTRICTED_CHARSET;
-            }
+            /* Certain named classes have equivalents that can appear outside a
+             * character class, e.g. \w, \H.  We use these instead of a
+             * character class. */
              switch ((I32)namedclass) {
+                U8 offset;
+
+                /* The first group is for node types that depend on the charset
+                 * modifier to the regex.  We first calculate the base node
+                 * type, and if it should be inverted */
+
                  case ANYOF_NALNUM:
                      invert = ! invert;
                      /* FALLTHROUGH */
                  case ANYOF_ALNUM:
                      op = ALNUM;
-                    break;
+                    goto join_charset_classes;
+
                  case ANYOF_NSPACE:
                      invert = ! invert;
                      /* FALLTHROUGH */
                  case ANYOF_SPACE:
                      op = SPACE;
-                    break;
+                    goto join_charset_classes;
+
                  case ANYOF_NDIGIT:
                      invert = ! invert;
                      /* FALLTHROUGH */
                  case ANYOF_DIGIT:
                      op = DIGIT;
  
-                    /* There is no DIGITU */
-                    if (offset == REGEX_UNICODE_CHARSET) {
-                        offset = REGEX_DEPENDS_CHARSET;
+                  join_charset_classes:
+
+                    /* Now that we have the base node type, we take advantage
+                     * of the enum ordering of the charset modifiers to get the
+                     * exact node type,  For example the base SPACE also has
+                     * SPACEL, SPACEU, and SPACEA */
+
+                    offset = get_regex_charset(RExC_flags);
+
+                    /* /aa is the same as /a for these */
+                    if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                        offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                    }
+                    else if (op == DIGIT && offset == REGEX_UNICODE_CHARSET) {
+                        offset = REGEX_DEPENDS_CHARSET; /* There is no DIGITU */
                      }
-                    break;
-                default:
-                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
-            }
  
-            /* The number of varieties of each of these is the same, hence, so
-             * is the delta between the normal and complemented nodes */
-            if (invert) {
-                offset += NALNUM - ALNUM;
-            }
+                    op += offset;
  
-            op += offset;
-        }
-        else if (has_special_non_charset_op) {
-            switch ((I32)namedclass) {
+                    /* The number of varieties of each of these is the same,
+                     * hence, so is the delta between the normal and
+                     * complemented nodes */
+                    if (invert) {
+                        op += NALNUM - ALNUM;
+                    }
+                    break;
+
+                /* The second group doesn't depend of the charset modifiers.
+                 * We just have normal and complemented */
                  case ANYOF_NHORIZWS:
                      invert = ! invert;
                      /* FALLTHROUGH */
                  case ANYOF_HORIZWS:
-                    op = HORIZWS;
+                    op = (invert) ? NHORIZWS : HORIZWS;
                      break;
+
                  case ANYOF_NVERTWS:
                      invert = ! invert;
                      /* FALLTHROUGH */
                  case ANYOF_VERTWS:
-                    op = VERTWS;
+                    op = (invert) ? NVERTWS : VERTWS;
                      break;
-                default:
-                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
+
+
              }
+        }
+        else if (value == prevvalue) {
+
+            /* Here, the class consists of just a single code point */
  
-            /* The complement version of each of these nodes is adjacently next
-             * */
              if (invert) {
-                op++;
+                if (! LOC && value == '\n') {
+                    op = REG_ANY; /* Optimize [^\n] */
+                }
+            }
+            else if (value < 256 || UTF) {
+
+                /* Optimize a single value into an EXACTish node, but not if it
+                 * would require converting the pattern to UTF-8. */
+                op = compute_EXACTish(pRExC_state);
+            }
+        } /* Otherwise is a range */
+        else if (! LOC) {   /* locale could vary these */
+            if (prevvalue == '0') {
+                if (value == '9') {
+                    op = (invert) ? NDIGITA : DIGITA;
+                }
              }
-        }
-        else {  /* The remaining possibility is [0-9] */
-            op = (invert) ? NDIGITA : DIGITA;
          }
  
-        /* Throw away this ANYOF regnode, and emit the calculated one, which
-         * should correspond to the beginning, not current, state of the parse
-         */
-        RExC_parse = (char *)orig_parse;
-        RExC_emit = (regnode *)orig_emit;
-        ret = reg_node(pRExC_state, op);
-        RExC_parse = (char *) cur_parse;
+        /* Here, we have changed <op> away from its initial value iff we found
+         * an optimization */
+        if (op != END) {
+
+            /* Throw away this ANYOF regnode, and emit the calculated one,
+             * which should correspond to the beginning, not current, state of
+             * the parse */
+            const char * cur_parse = RExC_parse;
+            RExC_parse = (char *)orig_parse;
+            if ( SIZE_ONLY) {
+                if (! LOC) {
+
+                    /* To get locale nodes to not use the full ANYOF size would
+                     * require moving the code above that writes the portions
+                     * of it that aren't in other nodes to after this point.
+                     * e.g.  ANYOF_CLASS_SET */
+                    RExC_size = orig_size;
+                }
+            }
+            else {
+                RExC_emit = (regnode *)orig_emit;
+            }
  
-        SvREFCNT_dec(listsv);
-        return ret;
+            ret = reg_node(pRExC_state, op);
+
+            if (PL_regkind[op] == EXACT) {
+                alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
+            }
+
+            RExC_parse = (char *) cur_parse;
+
+            SvREFCNT_dec(listsv);
+            return ret;
+        }
      }
  
      if (SIZE_ONLY)
@@ -11822,27 +11945,21 @@ parseit:
  
         SV* fold_intersection = NULL;
  
-        const UV highest_index = invlist_len(cp_list) - 1;
-
          /* In the Latin1 range, the characters that can be folded-to or -from
           * are precisely the alphabetic characters.  If the highest code point
           * is within Latin1, we can use the compiled-in list, and not have to
-         * go out to disk.  If the last element in the array is in the
-         * inversion list set, it starts a range that goes to infinity, so the
-         * maximum of the inversion list is definitely above Latin1.
-         * Otherwise, it starts a range that isn't in the set, so the max is
-         * one less than it */
-        if (! ELEMENT_RANGE_MATCHES_INVLIST(highest_index)
-            && invlist_array(cp_list)[highest_index] <= 256)
-        {
+         * go out to disk. */
+        if (invlist_highest(cp_list) < 256) {
              _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
          }
          else {
  
-            /* This is a list of all the characters that participate in folds
-             * (except marks, etc in multi-char folds */
+            /* Here, there are non-Latin1 code points, so we will have to go
+             * fetch the list of all the characters that participate in folds
+             */
              if (! PL_utf8_foldable) {
-                SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
+                SV* swash = swash_init("utf8", "_Perl_Any_Folds",
+                                       &PL_sv_undef, 1, 0);
                  PL_utf8_foldable = _swash_to_invlist(swash);
                  SvREFCNT_dec(swash);
              }
@@ -11922,12 +12039,12 @@ parseit:
                          }
                          else {
                              depends_list =
-                                add_cp_to_invlist(depends_list, PL_fold_latin1[j]);
+                             add_cp_to_invlist(depends_list, PL_fold_latin1[j]);
                          }
                      }
  
                      if (HAS_NONLATIN1_FOLD_CLOSURE(j)
-                        && (! isASCII(j) || ! MORE_ASCII_RESTRICTED))
+                        && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
                      {
                          /* Certain Latin1 characters have matches outside
                           * Latin1, or are multi-character.  To get here, 'j' is
@@ -11957,15 +12074,13 @@ parseit:
                          switch (j) {
                              case 'k':
                              case 'K':
-                                /* KELVIN SIGN */
                                  cp_list =
-                                    add_cp_to_invlist(cp_list, 0x212A);
+                                    add_cp_to_invlist(cp_list, KELVIN_SIGN);
                                  break;
                              case 's':
                              case 'S':
-                                /* LATIN SMALL LETTER LONG S */
-                                cp_list =
-                                    add_cp_to_invlist(cp_list, 0x017F);
+                                cp_list = add_cp_to_invlist(cp_list,
+                                                    LATIN_SMALL_LETTER_LONG_S);
                                  break;
                              case MICRO_SIGN:
                                  cp_list = add_cp_to_invlist(cp_list,
@@ -11975,9 +12090,8 @@ parseit:
                                  break;
                              case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
                              case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-                                /* ANGSTROM SIGN */
                                  cp_list =
-                                        add_cp_to_invlist(cp_list, 0x212B);
+                                    add_cp_to_invlist(cp_list, ANGSTROM_SIGN);
                                  break;
                              case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
                                  cp_list = add_cp_to_invlist(cp_list,
@@ -11989,7 +12103,7 @@ parseit:
  
                                  /* Under /a, /d, and /u, this can match the two
                                   * chars "ss" */
-                                if (! MORE_ASCII_RESTRICTED) {
+                                if (! ASCII_FOLD_RESTRICTED) {
                                      add_alternate(&unicode_alternate,
                                                    (U8 *) "ss", 2);
  
@@ -12034,7 +12148,7 @@ parseit:
                                      ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
                                      | ((LOC)
                                          ? FOLD_FLAGS_LOCALE
-                                        : (MORE_ASCII_RESTRICTED)
+                                        : (ASCII_FOLD_RESTRICTED)
                                              ? FOLD_FLAGS_NOMIX_ASCII
                                              : 0));
  
@@ -12093,7 +12207,8 @@ parseit:
                             /* /aa doesn't allow folds between ASCII and non-;
                              * /l doesn't allow them between above and below
                              * 256 */
-                           if ((MORE_ASCII_RESTRICTED && (isASCII(c) != isASCII(j)))
+                           if ((ASCII_FOLD_RESTRICTED
+                                      && (isASCII(c) != isASCII(j)))
                                 || (LOC && ((c < 256) != (j < 256))))
                             {
                                 continue;
@@ -12106,7 +12221,7 @@ parseit:
                                 cp_list = add_cp_to_invlist(cp_list, c);
                              }
                              else {
-                                depends_list = add_cp_to_invlist(depends_list, c);
+                              depends_list = add_cp_to_invlist(depends_list, c);
                             }
                         }
                     }
@@ -12118,7 +12233,8 @@ parseit:
  
      /* And combine the result (if any) with any inversion list from posix
       * classes.  The lists are kept separate up to now because we don't want to
-     * fold the classes */
+     * fold the classes (folding of those is automatically handled by the swash
+     * fetching code) */
      if (posixes) {
          if (AT_LEAST_UNI_SEMANTICS) {
              if (cp_list) {
@@ -12160,17 +12276,48 @@ parseit:
      }
  
      /* And combine the result (if any) with any inversion list from properties.
+     * The lists are kept separate up to now so that we can distinguish the two
+     * in regards to matching above-Unicode.  A run-time warning is generated
+     * if a Unicode property is matched against a non-Unicode code point. But,
+     * we allow user-defined properties to match anything, without any warning,
+     * and we also suppress the warning if there is a portion of the character
+     * class that isn't a Unicode property, and which matches above Unicode, \W
+     * or [\x{110000}] for example.
       * (Note that in this case, unlike the Posix one above, there is no
       * <depends_list>, because having a Unicode property forces Unicode
       * semantics */
      if (properties) {
+        bool warn_super = ! has_user_defined_property;
          if (cp_list) {
-            _invlist_union(cp_list, properties, &cp_list);
+
+            /* If it matters to the final outcome, see if a non-property
+             * component of the class matches above Unicode.  If so, the
+             * warning gets suppressed.  This is true even if just a single
+             * such code point is specified, as though not strictly correct if
+             * another such code point is matched against, the fact that they
+             * are using above-Unicode code points indicates they should know
+             * the issues involved */
+            if (warn_super) {
+                bool non_prop_matches_above_Unicode =
+                            runtime_posix_matches_above_Unicode
+                            | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
+                if (invert) {
+                    non_prop_matches_above_Unicode =
+                                            !  non_prop_matches_above_Unicode;
+                }
+                warn_super = ! non_prop_matches_above_Unicode;
+            }
+
+            _invlist_union(properties, cp_list, &cp_list);
              SvREFCNT_dec(properties);
          }
          else {
              cp_list = properties;
          }
+
+        if (warn_super) {
+            ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+        }
      }
  
      /* Here, we have calculated what code points should be in the character
@@ -12190,7 +12337,7 @@ parseit:
          && ! LOC
         && ! depends_list
         && ! unicode_alternate
-       && SvCUR(listsv) == initial_listsv_len)
+       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
      {
          _invlist_invert(cp_list);
  
@@ -12208,6 +12355,7 @@ parseit:
       * compile time that match under all conditions.  Go through it, and
       * for things that belong in the bitmap, put them there, and delete from
       * <cp_list> */
+    ANYOF_BITMAP_ZERO(ret);
      if (cp_list) {
  
         /* This gets set if we actually need to modify things */
@@ -12298,7 +12446,7 @@ parseit:
       * FI'. */
      if (! cp_list
         && ! unicode_alternate
-       && SvCUR(listsv) == initial_listsv_len
+       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
         && ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
          && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
                                || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
@@ -12306,7 +12454,7 @@ parseit:
                                  && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
                                  /* If the latest code point has a fold whose
                                   * bit is set, it must be the only other one */
-                               && ((prevvalue = PL_fold_latin1[value]) != (IV)value)
+                               && ((prevvalue = PL_fold_latin1[value]) != value)
                                  && ANYOF_BITMAP_TEST(ret, prevvalue)))))
      {
          /* Note that the information needed to decide to do this optimization
@@ -12349,7 +12497,7 @@ parseit:
              * then EXACTFU if the regex calls for it, or is required because
              * the character is non-ASCII.  (If <value> is ASCII, its fold is
              * also ASCII for the cases where we get here.) */
-           if (MORE_ASCII_RESTRICTED && isASCII(value)) {
+           if (ASCII_FOLD_RESTRICTED && isASCII(value)) {
                 op = EXACTFA;
             }
             else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
@@ -12384,7 +12532,7 @@ parseit:
         swash = NULL;
      }
      if (! cp_list
-       && SvCUR(listsv) == initial_listsv_len
+       && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
         && ! unicode_alternate)
      {
         ARG_SET(ret, ANYOF_NONBITMAP_EMPTY);
@@ -12407,9 +12555,9 @@ parseit:
         AV * const av = newAV();
         SV *rv;
  
-       av_store(av, 0, (SvCUR(listsv) == initial_listsv_len)
-                       ? &PL_sv_undef
-                       : listsv);
+       av_store(av, 0, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
+                       ? listsv
+                       : &PL_sv_undef);
         if (swash) {
             av_store(av, 1, swash);
             SvREFCNT_dec(cp_list);
@@ -12440,6 +12588,7 @@ parseit:
      }
      return ret;
  }
+#undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
  
  
  /* reg_skipcomment()
@@ -13033,6 +13182,40 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
  #ifdef DEBUGGING
      dVAR;
      register int k;
+
+    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
+    static const char * const anyofs[] = {
+        "\\w",
+        "\\W",
+        "\\s",
+        "\\S",
+        "\\d",
+        "\\D",
+        "[:alnum:]",
+        "[:^alnum:]",
+        "[:alpha:]",
+        "[:^alpha:]",
+        "[:ascii:]",
+        "[:^ascii:]",
+        "[:cntrl:]",
+        "[:^cntrl:]",
+        "[:graph:]",
+        "[:^graph:]",
+        "[:lower:]",
+        "[:^lower:]",
+        "[:print:]",
+        "[:^print:]",
+        "[:punct:]",
+        "[:^punct:]",
+        "[:upper:]",
+        "[:^upper:]",
+        "[:xdigit:]",
+        "[:^xdigit:]",
+        "[:space:]",
+        "[:^space:]",
+        "[:blank:]",
+        "[:^blank:]"
+    };
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
      
@@ -13153,39 +13336,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
  
-       /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-       static const char * const anyofs[] = {
-           "\\w",
-           "\\W",
-           "\\s",
-           "\\S",
-           "\\d",
-           "\\D",
-           "[:alnum:]",
-           "[:^alnum:]",
-           "[:alpha:]",
-           "[:^alpha:]",
-           "[:ascii:]",
-           "[:^ascii:]",
-           "[:cntrl:]",
-           "[:^cntrl:]",
-           "[:graph:]",
-           "[:^graph:]",
-           "[:lower:]",
-           "[:^lower:]",
-           "[:print:]",
-           "[:^print:]",
-           "[:punct:]",
-           "[:^punct:]",
-           "[:upper:]",
-           "[:^upper:]",
-           "[:xdigit:]",
-           "[:^xdigit:]",
-           "[:space:]",
-           "[:^space:]",
-           "[:blank:]",
-           "[:^blank:]"
-       };
  
         if (flags & ANYOF_LOCALE)
             sv_catpvs(sv, "{loc}");