Change \p{} matching for above-Unicode code points

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index b83272d..fdaa537 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -37,16 +37,6 @@
  #include "re_top.h"
  #endif
  
-/* At least one required character in the target string is expressible only in
- * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
-                = "Can't match, because target string needs to be in UTF-8\n";
-
-#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
-    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
-    goto target; \
-} STMT_END
-
  /*
   * pregcomp and pregexec -- regsub and regerror are not used in perl
   *
@@ -93,6 +83,18 @@ static const char* const non_utf8_target_but_utf8_required
  #include "inline_invlist.c"
  #include "unicode_constants.h"
  
+#ifdef DEBUGGING
+/* At least one required character in the target string is expressible only in
+ * UTF-8. */
+static const char* const non_utf8_target_but_utf8_required
+                = "Can't match, because target string needs to be in UTF-8\n";
+#endif
+
+#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
+    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
+    goto target; \
+} STMT_END
+
  #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
  
  #ifndef STATIC
@@ -165,13 +167,13 @@ static const char* const non_utf8_target_but_utf8_required
  #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS_DEBUG_TEST(           \
                                          PL_utf8_swash_ptrs[_CC_WORDCHAR],     \
                                          swash_property_names[_CC_WORDCHAR],   \
-                                        GREEK_SMALL_LETTER_IOTA_UTF8)
+                                        LATIN_CAPITAL_LETTER_SHARP_S_UTF8);
  
  #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */          \
      STMT_START {                                                              \
         LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_regular_begin,               \
                                         "_X_regular_begin",                    \
-                                       GREEK_SMALL_LETTER_IOTA_UTF8);         \
+                                       LATIN_CAPITAL_LETTER_SHARP_S_UTF8);    \
         LOAD_UTF8_CHARCLASS_DEBUG_TEST(PL_utf8_X_extend,                      \
                                         "_X_extend",                           \
                                         COMBINING_GRAVE_ACCENT_UTF8);          \
@@ -207,13 +209,13 @@ static const char* const non_utf8_target_but_utf8_required
  /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
     we don't need this definition. */
  #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
-#define IS_TEXTF(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_TRICKYFOLD || OP(rn)==EXACTFA || OP(rn)==EXACTF || OP(rn)==REFF  || OP(rn)==NREFF )
+#define IS_TEXTF(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFA || OP(rn)==EXACTFA_NO_TRIE || OP(rn)==EXACTF || OP(rn)==REFF  || OP(rn)==NREFF )
  #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
  
  #else
  /* ... so we use this as its faster. */
  #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
-#define IS_TEXTFU(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_TRICKYFOLD || OP(rn) == EXACTFA)
+#define IS_TEXTFU(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn) == EXACTFA || OP(rn) == EXACTFA_NO_TRIE)
  #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
  #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
  
@@ -667,16 +669,24 @@ Perl_re_intuit_start(pTHX_
          }
         check = prog->check_substr;
      }
-    if ((prog->extflags & RXf_ANCH)    /* Match at beg-of-str or after \n */
-        && !(prog->extflags & RXf_ANCH_GPOS)) /* \G isn't a BOS or \n */
-    {
-        ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
+    if (prog->extflags & RXf_ANCH) { /* Match at \G, beg-of-str or after \n */
+       ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
                      || ( (prog->extflags & RXf_ANCH_BOL)
                           && !multiline ) );    /* Check after \n? */
  
         if (!ml_anch) {
-         if (    !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
-              && (strpos != strbeg)) {
+          /* we are only allowed to match at BOS or \G */
+
+         if (prog->extflags & RXf_ANCH_GPOS) {
+            /* in this case, we hope(!) that the caller has already
+             * set strpos to pos()-gofs, and will already have checked
+             * that this anchor position is legal
+             */
+            ;
+          }
+          else if (!(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
+               && (strpos != strbeg))
+          {
               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
               goto fail;
           }
@@ -740,7 +750,7 @@ Perl_re_intuit_start(pTHX_
         /* end shift should be non negative here */
      }
  
-#ifdef QDEBUGGING      /* 7/99: reports of failure (with the older version) */
+#ifdef DEBUGGING       /* 7/99: reports of failure (with the older version) */
      if (end_shift < 0)
         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
                    (IV)end_shift, RX_PRECOMP(prog));
@@ -1227,15 +1237,22 @@ Perl_re_intuit_start(pTHX_
  }
  
  #define DECL_TRIE_TYPE(scan) \
-    const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
+    const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
+                 trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold } \
                      trie_type = ((scan->flags == EXACT) \
                                ? (utf8_target ? trie_utf8 : trie_plain) \
-                              : (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
+                              : (scan->flags == EXACTFA) \
+                                ? (utf8_target ? trie_utf8_exactfa_fold : trie_latin_utf8_exactfa_fold) \
+                                : (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
  
  #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
  STMT_START {                               \
      STRLEN skiplen;                                                                 \
+    U8 flags = FOLD_FLAGS_FULL; \
      switch (trie_type) {                                                            \
+    case trie_utf8_exactfa_fold:                                                            \
+        flags |= FOLD_FLAGS_NOMIX_ASCII; \
+        /* FALL THROUGH */ \
      case trie_utf8_fold:                                                            \
          if ( foldlen>0 ) {                                                          \
              uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
@@ -1243,13 +1260,16 @@ STMT_START {                               \
              uscan += len;                                                           \
              len=0;                                                                  \
          } else {                                                                    \
-            uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen );                \
+            uvc = _to_utf8_fold_flags( (const U8*) uc, foldbuf, &foldlen, flags, NULL);                \
              len = UTF8SKIP(uc);                                                     \
              skiplen = UNISKIP( uvc );                                               \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
          }                                                                           \
          break;                                                                      \
+    case trie_latin_utf8_exactfa_fold: \
+        flags |= FOLD_FLAGS_NOMIX_ASCII; \
+        /* FALL THROUGH */ \
      case trie_latin_utf8_fold:                                                      \
          if ( foldlen>0 ) {                                                          \
              uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
@@ -1258,7 +1278,7 @@ STMT_START {                               \
              len=0;                                                                  \
          } else {                                                                    \
              len = 1;                                                                \
-            uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, FOLD_FLAGS_FULL);   \
+            uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, flags);   \
              skiplen = UNISKIP( uvc );                                               \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
@@ -1469,7 +1489,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
      switch (OP(c)) {
      case ANYOF:
      case ANYOF_SYNTHETIC:
-    case ANYOF_WARN_SUPER:
          if (utf8_target) {
              REXEC_FBC_UTF8_CLASS_SCAN(
                        reginclass(prog, c, (U8*)s, utf8_target));
@@ -1487,6 +1506,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          );
          break;
  
+    case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8 patterns */
+        assert(! is_utf8_pat);
+       /* FALL THROUGH */
      case EXACTFA:
          if (is_utf8_pat || utf8_target) {
              utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
@@ -1496,10 +1518,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          folder = foldEQ_latin1;                /* /a, except the sharp s one which */
          goto do_exactf_non_utf8;       /* isn't dealt with by these */
  
-    case EXACTF:
+    case EXACTF:   /* This node only generated for non-utf8 patterns */
+        assert(! is_utf8_pat);
          if (utf8_target) {
-
-            /* regcomp.c already folded this if pattern is in UTF-8 */
              utf8_fold_flags = 0;
              goto do_exactf_utf8;
          }
@@ -1522,7 +1543,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          }
          goto do_exactf_utf8;
  
-    case EXACTFU_TRICKYFOLD:
      case EXACTFU:
          if (is_utf8_pat || utf8_target) {
              utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
@@ -2262,11 +2282,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
              : strbeg; /* pos() not defined; use start of string */
  
          DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
-            "GPOS ganch set to strbeg[%"IVdf"]\n", reginfo->ganch - strbeg));
+            "GPOS ganch set to strbeg[%"IVdf"]\n", (IV)(reginfo->ganch - strbeg)));
  
          /* in the presence of \G, we may need to start looking earlier in
           * the string than the suggested start point of stringarg:
-         * if gofs->prog is set, then that's a known, fixed minimum
+         * if prog->gofs is set, then that's a known, fixed minimum
           * offset, such as
           * /..\G/:   gofs = 2
           * /ab|c\G/: gofs = 1
@@ -2388,7 +2408,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
             Not newSVsv, either, as it does not COW.
          */
          reginfo->sv = newSV(0);
-        sv_setsv(reginfo->sv, sv);
+        SvSetSV_nosteal(reginfo->sv, sv);
          SAVEFREESV(reginfo->sv);
      }
  
@@ -3434,7 +3454,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              SV** listp;
              if (! PL_utf8_foldclosures) {
                  if (! PL_utf8_tofold) {
-                    U8 dummy[UTF8_MAXBYTES+1];
+                    U8 dummy[UTF8_MAXBYTES_CASE+1];
  
                      /* Force loading this by folding an above-Latin1 char */
                      to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
@@ -3487,7 +3507,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                       * which is the one above 255 */
                      if ((c1 < 256) != (c2 < 256)) {
                          if (OP(text_node) == EXACTFL
-                            || (OP(text_node) == EXACTFA
+                            || ((OP(text_node) == EXACTFA
+                                 || OP(text_node) == EXACTFA_NO_TRIE)
                                  && (isASCII(c1) || isASCII(c2))))
                          {
                              if (c1 < 256) {
@@ -3505,7 +3526,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
               if (utf8_target
                   && HAS_NONLATIN1_FOLD_CLOSURE(c1)
                   && OP(text_node) != EXACTFL
-                 && (OP(text_node) != EXACTFA || ! isASCII(c1)))
+                 && ((OP(text_node) != EXACTFA
+                      && OP(text_node) != EXACTFA_NO_TRIE)
+                     || ! isASCII(c1)))
          {
              /* Here, there could be something above Latin1 in the target which
               * folds to this character in the pattern.  All such cases except
@@ -3526,7 +3549,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                      c2 = PL_fold_locale[c1];
                      break;
  
-                case EXACTF:
+                case EXACTF:   /* This node only generated for non-utf8
+                                  patterns */
+                    assert(! is_utf8_pat);
                      if (! utf8_target) {    /* /d rules */
                          c2 = PL_fold[c1];
                          break;
@@ -3534,8 +3559,11 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                      /* FALLTHROUGH */
                      /* /u rules for all these.  This happens to work for
                       * EXACTFA as nothing in Latin1 folds to ASCII */
+                case EXACTFA_NO_TRIE:   /* This node only generated for
+                                           non-utf8 patterns */
+                    assert(! is_utf8_pat);
+                    /* FALL THROUGH */
                  case EXACTFA:
-                case EXACTFU_TRICKYFOLD:
                  case EXACTFU_SS:
                  case EXACTFU:
                      c2 = PL_fold_latin1[c1];
@@ -3631,7 +3659,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                 during a successful match */
      U32 lastopen = 0;       /* last open we saw */
      bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;   
-    SV* const oreplsv = GvSV(PL_replgv);
+    SV* const oreplsv = GvSVn(PL_replgv);
      /* these three flags are set by various ops to signal information to
       * the very next op. They have a useful lifetime of exactly one loop
       * iteration, and are not preserved or restored by state pushes/pops
@@ -3743,16 +3771,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             sayNO_SILENT;
             assert(0); /*NOTREACHED*/
  
-       case EOL: /* /..$/  */
-               goto seol;
-
         case MEOL: /* /..$/m  */
             if (!NEXTCHR_IS_EOS && nextchr != '\n')
                 sayNO;
             break;
  
+       case EOL: /* /..$/  */
+            /* FALL THROUGH */
         case SEOL: /* /..$/s  */
-         seol:
             if (!NEXTCHR_IS_EOS && nextchr != '\n')
                 sayNO;
             if (reginfo->strend - locinput > 1)
@@ -4216,20 +4242,25 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             goto do_exactf;
  
         case EXACTFU_SS:         /*  /\x{df}/iu   */
-       case EXACTFU_TRICKYFOLD: /*  /\x{390}/iu  */
         case EXACTFU:            /*  /abc/iu      */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
             goto do_exactf;
  
+        case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8
+                                   patterns */
+            assert(! is_utf8_pat);
+            /* FALL THROUGH */
         case EXACTFA:            /*  /abc/iaa     */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_exactf;
  
-       case EXACTF:             /*  /abc/i       */
+        case EXACTF:             /*  /abc/i    This node only generated for
+                                               non-utf8 patterns */
+            assert(! is_utf8_pat);
             folder = foldEQ;
             fold_array = PL_fold;
             fold_utf8_flags = 0;
@@ -4354,7 +4385,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case ANYOF:  /*  /[abc]/       */
-       case ANYOF_WARN_SUPER:
              if (NEXTCHR_IS_EOS)
                  sayNO;
             if (utf8_target) {
@@ -5071,20 +5101,22 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 else {                   /*  /(??{})  */
                     /*  if its overloaded, let the regex compiler handle
                      *  it; otherwise extract regex, or stringify  */
+                   if (SvGMAGICAL(ret))
+                       ret = sv_mortalcopy(ret);
                     if (!SvAMAGIC(ret)) {
                         SV *sv = ret;
                         if (SvROK(sv))
                             sv = SvRV(sv);
                         if (SvTYPE(sv) == SVt_REGEXP)
                             re_sv = (REGEXP*) sv;
-                       else if (SvSMAGICAL(sv)) {
-                           MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
+                       else if (SvSMAGICAL(ret)) {
+                           MAGIC *mg = mg_find(ret, PERL_MAGIC_qr);
                             if (mg)
                                 re_sv = (REGEXP *) mg->mg_obj;
                         }
  
-                       /* force any magic, undef warnings here */
-                       if (!re_sv) {
+                       /* force any undef warnings here */
+                       if (!re_sv && !SvPOK(ret) && !SvNIOK(ret)) {
                             ret = sv_mortalcopy(ret);
                             (void) SvPV_force_nolen(ret);
                         }
@@ -5138,17 +5170,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                      pm_flags);
  
                         if (!(SvFLAGS(ret)
-                             & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
-                                | SVs_GMG))) {
+                             & (SVs_TEMP | SVs_GMG | SVf_ROK))
+                        && (!SvPADTMP(ret) || SvREADONLY(ret))) {
                             /* This isn't a first class regexp. Instead, it's
                                caching a regexp onto an existing, Perl visible
                                scalar.  */
                             sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
                         }
-                       /* safe to do now that any $1 etc has been
-                        * interpolated into the new pattern string and
-                        * compiled */
-                       S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
                     }
                     SAVEFREESV(re_sv);
                     re = ReANY(re_sv);
@@ -6890,8 +6918,11 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         break;
  
+    case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8 patterns */
+        assert(! reginfo->is_utf8_pat);
+        /* FALL THROUGH */
      case EXACTFA:
-       utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
+        utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
         goto do_exactf;
  
      case EXACTFL:
@@ -6899,12 +6930,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         utf8_flags = FOLDEQ_UTF8_LOCALE;
         goto do_exactf;
  
-    case EXACTF:
-           utf8_flags = 0;
-           goto do_exactf;
+    case EXACTF:   /* This node only generated for non-utf8 patterns */
+        assert(! reginfo->is_utf8_pat);
+        utf8_flags = 0;
+        goto do_exactf;
  
      case EXACTFU_SS:
-    case EXACTFU_TRICKYFOLD:
      case EXACTFU:
         utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
  
@@ -6968,7 +6999,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         break;
      }
      case ANYOF:
-    case ANYOF_WARN_SUPER:
         if (utf8_target) {
             while (hardcount < max
                     && scan < loceol
@@ -7415,14 +7445,13 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
             match = TRUE;
         }
         else if (flags & ANYOF_LOCALE) {
-           RXp_MATCH_TAINTED_on(prog);
-
-           if ((flags & ANYOF_LOC_FOLD)
-                && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
-           {
-               match = TRUE;
-           }
-           else if (ANYOF_CLASS_TEST_ANY_SET(n)) {
+           if (flags & ANYOF_LOC_FOLD) {
+                RXp_MATCH_TAINTED_on(prog);
+                if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
+                    match = TRUE;
+                }
+            }
+           else if (ANYOF_POSIXL_TEST_ANY_SET(n)) {
  
                  /* The data structure is arranged so bits 0, 2, 4, ... are set
                   * if the class includes the Posix character class given by
@@ -7456,8 +7485,10 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
  
                  int count = 0;
                  int to_complement = 0;
+
+                RXp_MATCH_TAINTED_on(prog);
                  while (count < ANYOF_MAX) {
-                    if (ANYOF_CLASS_TEST(n, count)
+                    if (ANYOF_POSIXL_TEST(n, count)
                          && to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
                      {
                          match = TRUE;
@@ -7480,7 +7511,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
       * positive that will be resolved when the match is done again as not part
       * of the synthetic start class */
      if (!match) {
-       if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
+       if (utf8_target && (flags & ANYOF_ABOVE_LATIN1_ALL) && c >= 256) {
             match = TRUE;       /* Everything above 255 matches */
         }
         else if (ANYOF_NONBITMAP(n)
@@ -7510,16 +7541,22 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
         }
  
          if (UNICODE_IS_SUPER(c)
-            && OP(n) == ANYOF_WARN_SUPER
+            && (flags & ANYOF_WARN_SUPER)
              && ckWARN_d(WARN_NON_UNICODE))
          {
              Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
-                "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", c);
+                "Matched non-Unicode code point 0x%04"UVXf" against Unicode property; may not be portable", c);
          }
      }
  
+#if ANYOF_INVERT != 1
+    /* Depending on compiler optimization cBOOL takes time, so if don't have to
+     * use it, don't */
+#   error ANYOF_INVERT needs to be set to 1, or guarded with cBOOL below,
+#endif
+
      /* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
-    return cBOOL(flags & ANYOF_INVERT) ^ match;
+    return (flags & ANYOF_INVERT) ^ match;
  }
  
  STATIC U8 *