This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #124280] don't warn for 'my $foo, *bar'
[perl5.git] / regcomp.c
index b1351d4..da6eb16 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -661,7 +661,7 @@ static const scan_data_t zero_scan_data =
 } STMT_END
 
 /* A specialized version of vFAIL2 that works with UTF8f */
-#define vFAIL2utf8f(m, a1) STMT_START { \
+#define vFAIL2utf8f(m, a1) STMT_START {            \
     const IV offset = RExC_parse - RExC_precomp;   \
     if (!SIZE_ONLY)                                \
         SAVEFREESV(RExC_rx_sv);                    \
@@ -669,6 +669,14 @@ static const scan_data_t zero_scan_data =
             REPORT_LOCATION_ARGS(offset));         \
 } STMT_END
 
+#define vFAIL3utf8f(m, a1, a2) STMT_START {             \
+    const IV offset = RExC_parse - RExC_precomp;        \
+    if (!SIZE_ONLY)                                     \
+        SAVEFREESV(RExC_rx_sv);                         \
+    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
+            REPORT_LOCATION_ARGS(offset));              \
+} STMT_END
+
 /* These have asserts in them because of [perl #122671] Many warnings in
  * regcomp.c can occur twice.  If they get output in pass1 and later in that
  * pass, the pattern has to be converted to UTF-8 and the pass restarted, they
@@ -6671,7 +6679,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_pm_flags = pm_flags;
 
     if (runtime_code) {
-       if (TAINTING_get && TAINT_get)
+        assert(TAINTING_get || !TAINT_get);
+       if (TAINT_get)
            Perl_croak(aTHX_ "Eval-group in insecure regular expression");
 
        if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
@@ -9789,9 +9798,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
         ++RExC_parse;
     }
 
-    if (PASS2) {
-        STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
-    }
+    vFAIL("Sequence (?... not terminated");
 }
 
 /*
@@ -10932,7 +10939,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * enough space for all the things we are about to throw
                      * away, but we can shrink it by the ammount we are about
                      * to re-use here */
-                    RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
+                    RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                 }
                 else {
                     ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
@@ -10948,8 +10955,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                "Useless use of greediness modifier '%c'",
                                *RExC_parse);
                 }
-                /* Absorb the modifier, so later code doesn't see nor use it */
-                nextchar(pRExC_state);
             }
 
          do_curly:
@@ -11178,7 +11183,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     char * endbrace;    /* points to '}' following the name */
     char *endchar;     /* Points to '.' or '}' ending cur char in the input
                            stream */
-    char* p;            /* Temporary */
+    char* p = RExC_parse; /* Temporary */
 
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -11196,10 +11201,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     /* The [^\n] meaning of \N ignores spaces and comments under the /x
      * modifier.  The other meanings do not, so use a temporary until we find
      * out which we are being called with */
-    p = (RExC_flags & RXf_PMf_EXTENDED)
-       ? regpatws(pRExC_state, RExC_parse,
-                                TRUE) /* means recognize comments */
-       : RExC_parse;
+    skip_to_be_ignored_text(pRExC_state, &p,
+                            FALSE /* Don't force to /x */ );
 
     /* Disambiguate between \N meaning a named character versus \N meaning
      * [^\n].  The latter is assumed when the {...} following the \N is a legal
@@ -11213,8 +11216,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
        if (! node_p) {
             return FALSE;
         }
-       skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                               FALSE /* Don't force to /x */ );
+
        *node_p = reg_node(pRExC_state, REG_ANY);
        *flagp |= HASWIDTH|SIMPLE;
        MARK_NAUGHTY(1);
@@ -11422,10 +11424,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  * it returns U+FFFD (Replacement character) and sets *encp to NULL.
  */
 STATIC UV
-S_reg_recode(pTHX_ const char value, SV **encp)
+S_reg_recode(pTHX_ const U8 value, SV **encp)
 {
     STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
+    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
     const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
     const STRLEN newlen = SvCUR(sv);
     UV uv = UNICODE_REPLACEMENT;
@@ -11725,7 +11727,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 {
     regnode *ret = NULL;
     I32 flags = 0;
-    char *parse_start = RExC_parse;
+    char *parse_start;
     U8 op;
     int invert = 0;
     U8 arg;
@@ -11739,6 +11741,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     PERL_ARGS_ASSERT_REGATOM;
 
   tryagain:
+    parse_start = RExC_parse;
     switch ((U8)*RExC_parse) {
     case '^':
        RExC_seen_zerolen++;
@@ -12118,9 +12121,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 
             if (*flagp & RESTART_PASS1)
                 return NULL;
-            RExC_parse--;
 
             /* Here, evaluates to a single code point.  Go get that */
+            RExC_parse = parse_start;
             goto defchar;
 
        case 'k':    /* Handle \k<NAME> and \k'NAME' */
@@ -12236,6 +12239,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          * octal character escape, e.g. \35 or \777.
                          * The above logic should make it obvious why using
                          * octal escapes in patterns is problematic. - Yves */
+                        RExC_parse = parse_start;
                         goto defchar;
                     }
                 }
@@ -12284,7 +12288,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        default:
            /* Do not generate "unrecognized" warnings here, we fall
               back into the quick-grab loop below */
-           parse_start--;
+            RExC_parse = parse_start;
            goto defchar;
        } /* end of switch on a \foo sequence */
        break;
@@ -12305,16 +12309,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        /* FALLTHROUGH */
 
     default:
-
-            parse_start = RExC_parse - 1;
-
-           RExC_parse++;
-
          defchar: {
 
             /* Here, we have determined that the next thing is probably a
-             * literal character.  (It still may be an escape sequence that
-             * evaluates to a single character) */
+             * literal character.  RExC_parse points to the first byte of its
+             * definition.  (It still may be an escape sequence that evaluates
+             * to a single character) */
 
            STRLEN len = 0;
            UV ender = 0;
@@ -12378,7 +12378,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              * could back off to end with only a code point that isn't such a
              * non-final, but it is possible for there not to be any in the
              * entire node. */
-           for (p = RExC_parse - 1;
+
+            assert(   ! UTF     /* Is at the beginning of a character */
+                   || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
+                   || UTF8_IS_START(UCHARAT(RExC_parse)));
+
+           for (p = RExC_parse;
                 len < upper_parse && p < RExC_end;
                 len++)
            {
@@ -12613,7 +12618,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      recode_encoding:
                        if (! RExC_override_recoding) {
                            SV* enc = _get_encoding();
-                           ender = reg_recode((const char)(U8)ender, &enc);
+                           ender = reg_recode((U8)ender, &enc);
                            if (!enc && PASS2)
                                ckWARNreg(p, "Invalid escape in the specified encoding");
                            REQUIRE_UTF8(flagp);
@@ -12664,9 +12669,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  * quantifier.  Move <p> to after anything that should be
                  * ignored, which, as a side effect, positions <p> for the next
                  * loop iteration */
-               if ( RExC_flags & RXf_PMf_EXTENDED)
-                    p = regpatws(pRExC_state, p,
-                                          TRUE); /* means recognize comments */
+                skip_to_be_ignored_text(pRExC_state, &p,
+                                        FALSE /* Don't force to /x */ );
 
                 /* If the next thing is a quantifier, it applies to this
                  * character only, which means that this character has to be in
@@ -13064,29 +13068,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
     return(ret);
 }
 
-STATIC char *
-S_regpatws(RExC_state_t *pRExC_state, char *p , const bool recognize_comment )
-{
-    /* Returns the next non-pattern-white space, non-comment character (the
-     * latter only if 'recognize_comment is true) in the string p, which is
-     * ended by RExC_end.  See also reg_skipcomment */
-    const char *e = RExC_end;
-
-    PERL_ARGS_ASSERT_REGPATWS;
-
-    while (p < e) {
-        STRLEN len;
-       if ((len = is_PATWS_safe(p, e, UTF))) {
-           p += len;
-        }
-       else if (recognize_comment && *p == '#') {
-            p = reg_skipcomment(pRExC_state, p);
-       }
-       else
-           break;
-    }
-    return p;
-}
 
 STATIC void
 S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
@@ -13442,8 +13423,10 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
 
         while (RExC_parse < RExC_end) {
             SV* current = NULL;
-            RExC_parse = regpatws(pRExC_state, RExC_parse,
-                                          TRUE); /* means recognize comments */
+
+            skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                    TRUE /* Force /x */ );
+
             switch (*RExC_parse) {
                 case '?':
                     if (RExC_parse[1] == '[') depth++, RExC_parse++;
@@ -13458,6 +13441,10 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                      * default: case next time and keep on incrementing until
                      * we find one of the invariants we do handle. */
                     RExC_parse++;
+                    if (*RExC_parse == 'c') {
+                            /* Skip the \cX notation for control characters */
+                            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                    }
                     break;
                 case '[':
                 {
@@ -13619,9 +13606,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                        operand */
         SV* only_to_avoid_leaks;
 
-        /* Skip white space */
-        RExC_parse = regpatws(pRExC_state, RExC_parse,
-                TRUE /* means recognize comments */ );
+        skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                TRUE /* Force /x */ );
         if (RExC_parse >= RExC_end) {
             Perl_croak(aTHX_ "panic: Read past end of '(?[ ])'");
         }
@@ -13822,8 +13808,12 @@ redo_curchar:
                 /* Having gotten rid of the fence, we pop the operand at the
                  * stack top and process it as a newly encountered operand */
                 current = av_pop(stack);
-                assert(IS_OPERAND(current));
-                goto handle_operand;
+                if (IS_OPERAND(current)) {
+                    goto handle_operand;
+                }
+
+                RExC_parse++;
+                goto bad_syntax;
 
             case '&':
             case '|':
@@ -13899,11 +13889,23 @@ redo_curchar:
                 /* Here, the new operator has equal or lower precedence than
                  * what's already there.  This means the operation already
                  * there should be performed now, before the new one. */
+
                 rhs = av_pop(stack);
+                if (! IS_OPERAND(rhs)) {
+
+                    /* This can happen when a ! is not followed by an operand,
+                     * like in /(?[\t &!])/ */
+                    goto bad_syntax;
+                }
+
                 lhs = av_pop(stack);
 
-                assert(IS_OPERAND(rhs));
-                assert(IS_OPERAND(lhs));
+                if (! IS_OPERAND(lhs)) {
+
+                    /* This can happen when there is an empty (), like in
+                     * /(?[[0]+()+])/ */
+                    goto bad_syntax;
+                }
 
                 switch (stacked_operator) {
                     case '&':
@@ -13949,9 +13951,20 @@ redo_curchar:
                 av_push(stack, rhs);
                 goto redo_curchar;
 
-            case '!':   /* Highest priority, right associative, so just push
-                           onto stack */
-                av_push(stack, newSVuv(curchar));
+            case '!':   /* Highest priority, right associative */
+
+                /* If what's already at the top of the stack is another '!",
+                 * they just cancel each other out */
+                if (   (top_ptr = av_fetch(stack, top_index, FALSE))
+                    && (IS_OPERATOR(*top_ptr) && SvUV(*top_ptr) == '!'))
+                {
+                    only_to_avoid_leaks = av_pop(stack);
+                    SvREFCNT_dec(only_to_avoid_leaks);
+                }
+                else { /* Otherwise, since it's right associative, just push
+                          onto the stack */
+                    av_push(stack, newSVuv(curchar));
+                }
                 break;
 
             default:
@@ -14026,6 +14039,7 @@ redo_curchar:
         || SvTYPE(final) != SVt_INVLIST
         || av_tindex(stack) >= 0)  /* More left on stack */
     {
+      bad_syntax:
         SvREFCNT_dec(final);
         vFAIL("Incomplete expression within '(?[ ])'");
     }
@@ -14250,6 +14264,23 @@ S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN c
 #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION                            \
                                         (SvCUR(listsv) != initial_listsv_len)
 
+/* There is a restricted set of white space characters that are legal when
+ * ignoring white space in a bracketed character class.  This generates the
+ * code to skip them.
+ *
+ * There is a line below that uses the same white space criteria but is outside
+ * this macro.  Both here and there must use the same definition */
+#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p)                          \
+    STMT_START {                                                        \
+        if (do_skip) {                                                  \
+            while (   p < RExC_end                                      \
+                   && isBLANK_A(UCHARAT(p)))                            \
+            {                                                           \
+                p++;                                                    \
+            }                                                           \
+        }                                                               \
+    } STMT_END
+
 STATIC regnode *
 S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  const bool stop_at_1,  /* Just parse the next thing, don't
@@ -14410,20 +14441,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated.  */
     }
 
-    if (skip_white) {
-        RExC_parse = regpatws(pRExC_state, RExC_parse,
-                              FALSE /* means don't recognize comments */ );
-    }
+    SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
 
     if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
        RExC_parse++;
         invert = TRUE;
         allow_multi_folds = FALSE;
         MARK_NAUGHTY(1);
-        if (skip_white) {
-            RExC_parse = regpatws(pRExC_state, RExC_parse,
-                                  FALSE /* means don't recognize comments */ );
-        }
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
     }
 
     /* Check that they didn't say [:posix:] instead of [[:posix:]] */
@@ -14460,10 +14485,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             break;
         }
 
-        if (skip_white) {
-            RExC_parse = regpatws(pRExC_state, RExC_parse,
-                                  FALSE /* means don't recognize comments */ );
-        }
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
 
         if  (UCHARAT(RExC_parse) == ']') {
             break;
@@ -14516,7 +14538,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              * skipped, it means that that white space is wanted literally, and
              * is already in 'value'.  Otherwise, need to translate the escape
              * into what it signifies. */
-            if (! skip_white || ! is_PATWS_cp(value)) switch ((I32)value) {
+            if (! skip_white || ! isBLANK_A(value)) switch ((I32)value) {
 
            case 'w':   namedclass = ANYOF_WORDCHAR;    break;
            case 'W':   namedclass = ANYOF_NWORDCHAR;   break;
@@ -14613,38 +14635,53 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                    vFAIL2("Empty \\%c{}", (U8)value);
                if (*RExC_parse == '{') {
                    const U8 c = (U8)value;
-                   e = strchr(RExC_parse++, '}');
-                    if (!e)
+                   e = strchr(RExC_parse, '}');
+                    if (!e) {
+                        RExC_parse++;
                         vFAIL2("Missing right brace on \\%c{}", c);
-                   while (isSPACE(*RExC_parse))
-                       RExC_parse++;
+                    }
+
+                    RExC_parse++;
+                    while (isSPACE(*RExC_parse)) {
+                         RExC_parse++;
+                   }
+
+                   if (UCHARAT(RExC_parse) == '^') {
+
+                        /* toggle.  (The rhs xor gets the single bit that
+                         * differs between P and p; the other xor inverts just
+                         * that bit) */
+                        value ^= 'P' ^ 'p';
+
+                        RExC_parse++;
+                        while (isSPACE(*RExC_parse)) {
+                            RExC_parse++;
+                        }
+                    }
+
                     if (e == RExC_parse)
                         vFAIL2("Empty \\%c{}", c);
+
                    n = e - RExC_parse;
                    while (isSPACE(*(RExC_parse + n - 1)))
                        n--;
-               }
-               else {
+               }   /* The \p isn't immediately followed by a '{' */
+               else if (! isALPHA(*RExC_parse)) {
+                    RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                    vFAIL2("Character following \\%c must be '{' or a "
+                           "single-character Unicode property name",
+                           (U8) value);
+                }
+                else {
                    e = RExC_parse;
                    n = 1;
                }
                if (!SIZE_ONLY) {
                     SV* invlist;
                     char* name;
+                    char* base_name;    /* name after any packages are stripped */
+                    const char * const colon_colon = "::";
 
-                   if (UCHARAT(RExC_parse) == '^') {
-                        RExC_parse++;
-                        n--;
-                         /* toggle.  (The rhs xor gets the single bit that
-                          * differs between P and p; the other xor inverts just
-                          * that bit) */
-                         value ^= 'P' ^ 'p';
-
-                        while (isSPACE(*RExC_parse)) {
-                             RExC_parse++;
-                             n--;
-                        }
-                   }
                     /* Try to get the definition of the property into
                      * <invlist>.  If /i is in effect, the effective property
                      * will have its name be <__NAME_i>.  The design is
@@ -14660,7 +14697,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
                     /* Look up the property name, and get its swash and
                      * inversion list, if the property is found  */
-                    if (swash) {
+                    if (swash) {    /* Return any left-overs */
                         SvREFCNT_dec_NN(swash);
                     }
                     swash = _core_swash_init("utf8", name, &PL_sv_undef,
@@ -14673,26 +14710,57 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         HV* curpkg = (IN_PERL_COMPILETIME)
                                       ? PL_curstash
                                       : CopSTASH(PL_curcop);
-                        if (swash) {
+                        UV final_n = n;
+                        bool has_pkg;
+
+                        if (swash) {    /* Got a swash but no inversion list.
+                                           Something is likely wrong that will
+                                           be sorted-out later */
                             SvREFCNT_dec_NN(swash);
                             swash = NULL;
                         }
 
-                        /* Here didn't find it.  It could be a user-defined
-                         * property that will be available at run-time.  If we
-                         * accept only compile-time properties, is an error;
-                         * otherwise add it to the list for run-time look up */
-                        if (ret_invlist) {
+                        /* Here didn't find it.  It could be a an error (like a
+                         * typo) in specifying a Unicode property, or it could
+                         * be a user-defined property that will be available at
+                         * run-time.  The names of these must begin with 'In'
+                         * or 'Is' (after any packages are stripped off).  So
+                         * if not one of those, or if we accept only
+                         * compile-time properties, is an error; otherwise add
+                         * it to the list for run-time look up. */
+                        if ((base_name = rninstr(name, name + n,
+                                                 colon_colon, colon_colon + 2)))
+                        { /* Has ::.  We know this must be a user-defined
+                             property */
+                            base_name += 2;
+                            final_n -= base_name - name;
+                            has_pkg = TRUE;
+                        }
+                        else {
+                            base_name = name;
+                            has_pkg = FALSE;
+                        }
+
+                        if (   final_n < 3
+                            || base_name[0] != 'I'
+                            || (base_name[1] != 's' && base_name[1] != 'n')
+                            || ret_invlist)
+                        {
+                            const char * const msg
+                                = (has_pkg)
+                                  ? "Illegal user-defined property name"
+                                  : "Can't find Unicode property definition";
                             RExC_parse = e + 1;
-                            vFAIL2utf8f(
-                                "Property '%"UTF8f"' is unknown",
-                                UTF8fARG(UTF, n, name));
+
+                            /* diag_listed_as: Can't find Unicode property definition "%s" */
+                            vFAIL3utf8f("%s \"%"UTF8f"\"",
+                                msg, UTF8fARG(UTF, n, name));
                         }
 
                         /* If the property name doesn't already have a package
                          * name, add the current one to it so that it can be
                          * referred to outside it. [perl #121777] */
-                        if (curpkg && ! instr(name, "::")) {
+                        if (! has_pkg && curpkg) {
                             char* pkgname = HvNAME(curpkg);
                             if (strNE(pkgname, "main")) {
                                 char* full_name = Perl_form(aTHX_
@@ -14712,9 +14780,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                    ANYOF node */
 
                         /* We don't know yet, so have to assume that the
-                         * property could match something in the Latin1 range,
-                         * hence something that isn't utf8.  Note that this
-                         * would cause things in <depends_list> to match
+                         * property could match something in the upper Latin1
+                         * range, hence something that isn't utf8.  Note that
+                         * this would cause things in <depends_list> to match
                          * inappropriately, except that any \p{}, including
                          * this one forces Unicode semantics, which means there
                          * is no <depends_list> */
@@ -14855,7 +14923,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              recode_encoding:
                if (! RExC_override_recoding) {
                    SV* enc = _get_encoding();
-                   value = reg_recode((const char)(U8)value, &enc);
+                   value = reg_recode((U8)value, &enc);
                    if (!enc) {
                         if (strict) {
                             vFAIL("Invalid escape in the specified encoding");
@@ -15065,10 +15133,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
            }
        } /* end of namedclass \blah */
 
-        if (skip_white) {
-            RExC_parse = regpatws(pRExC_state, RExC_parse,
-                                FALSE /* means don't recognize comments */ );
-        }
+        SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
 
         /* If 'range' is set, 'value' is the ending of a range--check its
          * validity.  (If value isn't a single code point in the case of a
@@ -15109,12 +15174,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                 && *RExC_parse == '-')
             {
                 char* next_char_ptr = RExC_parse + 1;
-                if (skip_white) {   /* Get the next real char after the '-' */
-                    next_char_ptr = regpatws(pRExC_state,
-                                             RExC_parse + 1,
-                                             FALSE); /* means don't recognize
-                                                        comments */
-                }
+
+                /* Get the next real char after the '-' */
+                SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr);
 
                 /* If the '-' is at the end of the class (just before the ']',
                  * it is a literal minus; otherwise it is a range */
@@ -16439,10 +16501,20 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state,
        }
 
        if (use_xmod) {
-            char * new_p = regpatws(pRExC_state, *p,
-                                    TRUE); /* means recognize comments */
-            if (new_p != *p) {
-                *p = new_p;
+            const char * save_p = *p;
+            while ((*p) < RExC_end) {
+                STRLEN len;
+                if ((len = is_PATWS_safe((*p), RExC_end, UTF))) {
+                    (*p) += len;
+                }
+                else if (*(*p) == '#') {
+                    (*p) = reg_skipcomment(pRExC_state, (*p));
+                }
+                else {
+                    break;
+                }
+            }
+            if (*p != save_p) {
                 continue;
             }
        }