#include "inline_invlist.c"
#include "unicode_constants.h"
-#define RF_tainted 1 /* tainted information used? e.g. locale */
-#define RF_warned 2 /* warned about big count? */
-
-#define RF_utf8 8 /* Pattern contains multibyte chars? */
-
-#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
-
#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
#ifndef STATIC
switch ((_char_class_number) classnum) {
case _CC_ENUM_ALPHANUMERIC: return isALPHANUMERIC_LC(character);
case _CC_ENUM_ALPHA: return isALPHA_LC(character);
+ case _CC_ENUM_ASCII: return isASCII_LC(character);
+ case _CC_ENUM_BLANK: return isBLANK_LC(character);
+ case _CC_ENUM_CASED: return isLOWER_LC(character)
+ || isUPPER_LC(character);
+ case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
case _CC_ENUM_DIGIT: return isDIGIT_LC(character);
case _CC_ENUM_GRAPH: return isGRAPH_LC(character);
case _CC_ENUM_LOWER: return isLOWER_LC(character);
case _CC_ENUM_PRINT: return isPRINT_LC(character);
+ case _CC_ENUM_PSXSPC: return isPSXSPC_LC(character);
case _CC_ENUM_PUNCT: return isPUNCT_LC(character);
+ case _CC_ENUM_SPACE: return isSPACE_LC(character);
case _CC_ENUM_UPPER: return isUPPER_LC(character);
case _CC_ENUM_WORDCHAR: return isWORDCHAR_LC(character);
- case _CC_ENUM_SPACE: return isSPACE_LC(character);
- case _CC_ENUM_BLANK: return isBLANK_LC(character);
case _CC_ENUM_XDIGIT: return isXDIGIT_LC(character);
- case _CC_ENUM_CNTRL: return isCNTRL_LC(character);
- case _CC_ENUM_PSXSPC: return isPSXSPC_LC(character);
- case _CC_ENUM_ASCII: return isASCII_LC(character);
default: /* VERTSPACE should never occur in locales */
Perl_croak(aTHX_ "panic: isFOO_lc() has an unexpected character class '%d'", classnum);
}
char *checked_upto = NULL; /* how far into the string we have already checked using find_byclass*/
const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
RXi_GET_DECL(prog,progi);
+ bool is_utf8_pat;
#ifdef DEBUGGING
const char * const i_strpos = strpos;
#endif
RX_MATCH_UTF8_set(rx,utf8_target);
- if (RX_UTF8(rx))
- PL_reg_flags |= RF_utf8;
- else
- PL_reg_flags &= ~RF_utf8;
+ is_utf8_pat = cBOOL(RX_UTF8(rx));
DEBUG_EXECUTE_r(
debug_start_match(rx, utf8_target, strpos, strend,
(IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
t = s;
- s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
+ s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
+ NULL, is_utf8_pat);
if (s) {
checked_upto = s;
} else {
STATIC char *
S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
- const char *strend, regmatch_info *reginfo)
+ const char *strend, regmatch_info *reginfo, bool is_utf8_pat)
{
dVAR;
const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
/* We know what class it must start with. */
switch (OP(c)) {
case ANYOF:
+ case ANYOF_SYNTHETIC:
+ case ANYOF_WARN_SUPER:
if (utf8_target) {
REXEC_FBC_UTF8_CLASS_SCAN(
reginclass(prog, c, (U8*)s, utf8_target));
break;
case EXACTFA:
- if (UTF_PATTERN || utf8_target) {
+ if (is_utf8_pat || utf8_target) {
utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
goto do_exactf_utf8;
}
goto do_exactf_non_utf8;
case EXACTFL:
- if (UTF_PATTERN || utf8_target) {
+ if (is_utf8_pat || utf8_target) {
utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
goto do_exactf_utf8;
}
goto do_exactf_non_utf8;
case EXACTFU_SS:
- if (UTF_PATTERN) {
+ if (is_utf8_pat) {
utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
}
goto do_exactf_utf8;
case EXACTFU_TRICKYFOLD:
case EXACTFU:
- if (UTF_PATTERN || utf8_target) {
- utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+ if (is_utf8_pat || utf8_target) {
+ utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
goto do_exactf_utf8;
}
pat_string = STRING(c);
ln = STR_LEN(c); /* length to match in octets/bytes */
pat_end = pat_string + ln;
- lnc = (UTF_PATTERN) /* length to match in characters */
+ lnc = is_utf8_pat /* length to match in characters */
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
: ln;
while (s <= e) {
char *my_strend= (char *)strend;
if (foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
- pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
+ pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
&& (!reginfo || regtry(reginfo, &s)) )
{
goto got_it;
break;
}
case BOUNDL:
- PL_reg_flags |= RF_tainted;
- FBC_BOUND(isALNUM_LC,
- isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
- isALNUM_LC_utf8((U8*)s));
+ RXp_MATCH_TAINTED_on(prog);
+ FBC_BOUND(isWORDCHAR_LC,
+ isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+ isWORDCHAR_LC_utf8((U8*)s));
break;
case NBOUNDL:
- PL_reg_flags |= RF_tainted;
- FBC_NBOUND(isALNUM_LC,
- isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
- isALNUM_LC_utf8((U8*)s));
+ RXp_MATCH_TAINTED_on(prog);
+ FBC_NBOUND(isWORDCHAR_LC,
+ isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+ isWORDCHAR_LC_utf8((U8*)s));
break;
case BOUND:
FBC_BOUND(isWORDCHAR,
- isALNUM_uni(tmp),
+ isWORDCHAR_uni(tmp),
cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
break;
case BOUNDA:
break;
case NBOUND:
FBC_NBOUND(isWORDCHAR,
- isALNUM_uni(tmp),
+ isWORDCHAR_uni(tmp),
cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
break;
case NBOUNDA:
break;
case BOUNDU:
FBC_BOUND(isWORDCHAR_L1,
- isALNUM_uni(tmp),
+ isWORDCHAR_uni(tmp),
cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
break;
case NBOUNDU:
FBC_NBOUND(isWORDCHAR_L1,
- isALNUM_uni(tmp),
+ isWORDCHAR_uni(tmp),
cBOOL(swash_fetch(PL_utf8_swash_ptrs[_CC_WORDCHAR], (U8*)s, utf8_target)));
break;
case LNBREAK:
/* FALLTHROUGH */
case POSIXL:
- PL_reg_flags |= RF_tainted;
+ RXp_MATCH_TAINTED_on(prog);
REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
break;
Perl_croak(aTHX_ "corrupted regexp program");
}
- PL_reg_flags = 0;
+ RX_MATCH_TAINTED_off(rx);
PL_reg_state.re_state_eval_setup_done = FALSE;
PL_reg_maxiter = 0;
- if (RX_UTF8(rx))
- PL_reg_flags |= RF_utf8;
-
+ reginfo.is_utf8_pat = cBOOL(RX_UTF8(rx));
+ reginfo.warned = FALSE;
/* Mark beginning of line for ^ and lookbehind. */
reginfo.bol = startpos; /* XXX not used ??? */
PL_bostr = strbeg;
/* Messy cases: unanchored match. */
if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
/* we have /x+whatever/ */
- /* it must be a one character string (XXXX Except UTF_PATTERN?) */
+ /* it must be a one character string (XXXX Except is_utf8_pat?) */
char ch;
#ifdef DEBUGGING
int did_match = 0;
quoted, (int)(strend - s));
}
});
- if (find_byclass(prog, c, s, strend, ®info))
+ if (find_byclass(prog, c, s, strend, ®info, reginfo.is_utf8_pat))
goto got_it;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
}
);
);
Safefree(swap);
- RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
if (PL_reg_state.re_state_eval_setup_done)
restore_pos(aTHX_ prog);
}
}
static bool
-S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c1_utf8, int *c2p, U8* c2_utf8)
+S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
+ U8* c1_utf8, int *c2p, U8* c2_utf8, bool is_utf8_pat)
{
/* This function determines if there are one or two characters that match
* the first character of the passed-in EXACTish node <text_node>, and if
* character. If both the pat and the target are UTF-8, we can just
* copy the input to the output, avoiding finding the code point of
* that character */
- if (! UTF_PATTERN) {
+ if (!is_utf8_pat) {
c2 = c1 = *pat;
}
else if (utf8_target) {
}
}
else /* an EXACTFish node */
- if ((UTF_PATTERN
+ if ((is_utf8_pat
&& is_MULTI_CHAR_FOLD_utf8_safe(pat,
pat + STR_LEN(text_node)))
- || (! UTF_PATTERN
+ || (!is_utf8_pat
&& is_MULTI_CHAR_FOLD_latin1_safe(pat,
pat + STR_LEN(text_node))))
{
use_chrtest_void = TRUE;
}
else { /* an EXACTFish node which doesn't begin with a multi-char fold */
- c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+ c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
if (c1 > 256) {
/* Load the folds hash, if not already done */
SV** listp;
U32 maxopenparen = 0; /* max '(' index seen so far */
int to_complement; /* Invert the result? */
_char_class_number classnum;
+ bool is_utf8_pat = reginfo->is_utf8_pat;
#ifdef DEBUGGING
GET_RE_DEBUG_FLAGS_DECL;
case EXACT: { /* /abc/ */
char *s = STRING(scan);
ln = STR_LEN(scan);
- if (utf8_target != UTF_PATTERN) {
+ if (utf8_target != is_utf8_pat) {
/* The target and the pattern have differing utf8ness. */
char *l = locinput;
const char * const e = s + ln;
const char * s;
U32 fold_utf8_flags;
- PL_reg_flags |= RF_tainted;
+ RX_MATCH_TAINTED_on(reginfo->prog);
folder = foldEQ_locale;
fold_array = PL_fold_locale;
fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
case EXACTFU: /* /abc/iu */
folder = foldEQ_latin1;
fold_array = PL_fold_latin1;
- fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
+ fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
goto do_exactf;
case EXACTFA: /* /abc/iaa */
s = STRING(scan);
ln = STR_LEN(scan);
- if (utf8_target || UTF_PATTERN || state_num == EXACTFU_SS) {
+ if (utf8_target || is_utf8_pat || state_num == EXACTFU_SS) {
/* Either target or the pattern are utf8, or has the issue where
* the fold lengths may differ. */
const char * const l = locinput;
char *e = PL_regeol;
- if (! foldEQ_utf8_flags(s, 0, ln, cBOOL(UTF_PATTERN),
+ if (! foldEQ_utf8_flags(s, 0, ln, is_utf8_pat,
l, &e, 0, utf8_target, fold_utf8_flags))
{
sayNO;
* have to set the FLAGS fields of these */
case BOUNDL: /* /\b/l */
case NBOUNDL: /* /\B/l */
- PL_reg_flags |= RF_tainted;
+ RX_MATCH_TAINTED_on(reginfo->prog);
/* FALL THROUGH */
case BOUND: /* /\b/ */
case BOUNDU: /* /\b/u */
ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
}
if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
- ln = isALNUM_uni(ln);
+ ln = isWORDCHAR_uni(ln);
if (NEXTCHR_IS_EOS)
n = 0;
else {
}
}
else {
- ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
- n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
+ ln = isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(ln));
+ n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC_utf8((U8*)locinput);
}
}
else {
n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
break;
case REGEX_LOCALE_CHARSET:
- ln = isALNUM_LC(ln);
- n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
+ ln = isWORDCHAR_LC(ln);
+ n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC(nextchr);
break;
case REGEX_DEPENDS_CHARSET:
- ln = isALNUM(ln);
- n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
+ ln = isWORDCHAR(ln);
+ n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR(nextchr);
break;
case REGEX_ASCII_RESTRICTED_CHARSET:
case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
break;
case ANYOF: /* /[abc]/ */
+ case ANYOF_WARN_SUPER:
if (NEXTCHR_IS_EOS)
sayNO;
if (utf8_target) {
/* The locale hasn't influenced the outcome before this, so defer
* tainting until now */
- PL_reg_flags |= RF_tainted;
+ RX_MATCH_TAINTED_on(reginfo->prog);
/* Use isFOO_lc() for characters within Latin1. (Note that
* UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
const U8 *fold_array;
UV utf8_fold_flags;
- PL_reg_flags |= RF_tainted;
+ RX_MATCH_TAINTED_on(reginfo->prog);
folder = foldEQ_locale;
fold_array = PL_fold_locale;
type = REFFL;
goto do_nref_ref_common;
case REFFL: /* /\1/il */
- PL_reg_flags |= RF_tainted;
+ RX_MATCH_TAINTED_on(reginfo->prog);
folder = foldEQ_locale;
fold_array = PL_fold_locale;
utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
- ST.toggle_reg_flags = PL_reg_flags;
- if (RX_UTF8(re_sv))
- PL_reg_flags |= RF_utf8;
- else
- PL_reg_flags &= ~RF_utf8;
- ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
+ ST.saved_utf8_pat = is_utf8_pat;
+ is_utf8_pat = cBOOL(RX_UTF8(re_sv));
ST.prev_rex = rex_sv;
ST.prev_curlyx = cur_curlyx;
case EVAL_AB: /* cleanup after a successful (??{A})B */
/* note: this is called twice; first after popping B, then A */
- PL_reg_flags ^= ST.toggle_reg_flags;
+ is_utf8_pat = ST.saved_utf8_pat;
rex_sv = ST.prev_rex;
SET_reg_curpm(rex_sv);
rex = ReANY(rex_sv);
case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
/* note: this is called twice; first after popping B, then A */
- PL_reg_flags ^= ST.toggle_reg_flags;
+ is_utf8_pat = ST.saved_utf8_pat;
rex_sv = ST.prev_rex;
SET_reg_curpm(rex_sv);
rex = ReANY(rex_sv);
do_whilem_B_max:
if (cur_curlyx->u.curlyx.count >= REG_INFTY
&& ckWARN(WARN_REGEXP)
- && !(PL_reg_flags & RF_warned))
+ && !reginfo->warned)
{
- PL_reg_flags |= RF_warned;
+ reginfo->warned = TRUE;
Perl_warner(aTHX_ packWARN(WARN_REGEXP),
"Complex regular subexpression recursion limit (%d) "
"exceeded",
/* Maximum greed exceeded */
if (cur_curlyx->u.curlyx.count >= REG_INFTY
&& ckWARN(WARN_REGEXP)
- && !(PL_reg_flags & RF_warned))
+ && !reginfo->warned)
{
- PL_reg_flags |= RF_warned;
+ reginfo->warned = TRUE;
Perl_warner(aTHX_ packWARN(WARN_REGEXP),
"Complex regular subexpression recursion "
"limit (%d) exceeded",
*/
if (PL_regkind[OP(text_node)] == EXACT) {
if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
- text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+ text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+ is_utf8_pat))
{
sayNO;
}
if this changes back then the macro for IS_TEXT and
friends need to change. */
if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
- text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+ text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+ is_utf8_pat))
{
sayNO;
}
if (minmod) {
char *li = locinput;
minmod = 0;
- if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
+ if (ST.min &&
+ regrepeat(rex, &li, ST.A, ST.min, depth, is_utf8_pat)
+ < ST.min)
sayNO;
SET_locinput(li);
ST.count = ST.min;
/* avoid taking address of locinput, so it can remain
* a register var */
char *li = locinput;
- ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
+ ST.count = regrepeat(rex, &li, ST.A, ST.max, depth,
+ is_utf8_pat);
if (ST.count < ST.min)
sayNO;
SET_locinput(li);
* locinput matches */
char *li = ST.oldloc;
ST.count += n;
- if (regrepeat(rex, &li, ST.A, n, depth) < n)
+ if (regrepeat(rex, &li, ST.A, n, depth, is_utf8_pat) < n)
sayNO;
assert(n == REG_INFTY || locinput == li);
}
/* failed -- move forward one */
{
char *li = locinput;
- if (!regrepeat(rex, &li, ST.A, 1, depth)) {
+ if (!regrepeat(rex, &li, ST.A, 1, depth, is_utf8_pat)) {
sayNO;
}
locinput = li;
fake_end:
if (cur_eval) {
/* we've just finished A in /(??{A})B/; now continue with B */
- st->u.eval.toggle_reg_flags
- = cur_eval->u.eval.toggle_reg_flags;
- PL_reg_flags ^= st->u.eval.toggle_reg_flags;
+ st->u.eval.saved_utf8_pat = is_utf8_pat;
+ is_utf8_pat = cur_eval->u.eval.saved_utf8_pat;
st->u.eval.prev_rex = rex_sv; /* inner */
* depth - (for debugging) backtracking depth.
*/
STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
+ I32 max, int depth, bool is_utf8_pat)
{
dVAR;
char *scan; /* Pointer to current position in target string */
}
break;
case EXACT:
- assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+ assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
c = (U8)*STRING(p);
* under UTF-8, or both target and pattern aren't UTF-8. Note that we
* can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
* true iff it doesn't matter if the argument is in UTF-8 or not */
- if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! UTF_PATTERN)) {
+ if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) {
if (utf8_target && scan + max < loceol) {
/* We didn't adjust <loceol> because is UTF-8, but ok to do so,
* since here, to match at all, 1 char == 1 byte */
scan++;
}
}
- else if (UTF_PATTERN) {
+ else if (is_utf8_pat) {
if (utf8_target) {
STRLEN scan_char_len;
goto do_exactf;
case EXACTFL:
- PL_reg_flags |= RF_tainted;
+ RXp_MATCH_TAINTED_on(prog);
utf8_flags = FOLDEQ_UTF8_LOCALE;
goto do_exactf;
case EXACTFU_SS:
case EXACTFU_TRICKYFOLD:
case EXACTFU:
- utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+ utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
do_exactf: {
int c1, c2;
U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
- assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+ assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
- if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
+ if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
+ is_utf8_pat))
+ {
if (c1 == CHRTEST_VOID) {
/* Use full Unicode fold matching */
char *tmpeol = PL_regeol;
- STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
+ STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
while (hardcount < max
&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
STRING(p), NULL, pat_len,
- cBOOL(UTF_PATTERN), utf8_flags))
+ is_utf8_pat, utf8_flags))
{
scan = tmpeol;
tmpeol = PL_regeol;
break;
}
case ANYOF:
+ case ANYOF_WARN_SUPER:
if (utf8_target) {
while (hardcount < max
&& scan < loceol
/* FALLTHROUGH */
case POSIXL:
- PL_reg_flags |= RF_tainted;
+ RXp_MATCH_TAINTED_on(prog);
if (! utf8_target) {
while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
*scan)))
*/
STATIC bool
-S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
+S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const bool utf8_target)
{
dVAR;
const char flags = ANYOF_FLAGS(n);
match = TRUE;
}
else if (flags & ANYOF_LOCALE) {
- PL_reg_flags |= RF_tainted;
+ RXp_MATCH_TAINTED_on(prog);
if ((flags & ANYOF_LOC_FOLD)
&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
* will be 1, so the exclusive or will reverse things, so we
* are testing for \W. On the third iteration, 'to_complement'
* will be 0, and we would be testing for \s; the fourth
- * iteration would test for \S, etc. */
+ * iteration would test for \S, etc.
+ *
+ * Note that this code assumes that all the classes are closed
+ * under folding. For example, if a character matches \w, then
+ * its fold does too; and vice versa. This should be true for
+ * any well-behaved locale for all the currently defined Posix
+ * classes, except for :lower: and :upper:, which are handled
+ * by the pseudo-class :cased: which matches if either of the
+ * other two does. To get rid of this assumption, an outer
+ * loop could be used below to iterate over both the source
+ * character, and its fold (if different) */
int count = 0;
int to_complement = 0;
|| (utf8_target
&& (c >=256
|| (! (flags & ANYOF_LOCALE))
- || (flags & ANYOF_IS_SYNTHETIC)))))
+ || OP(n) == ANYOF_SYNTHETIC))))
{
SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
if (sw) {
}
if (UNICODE_IS_SUPER(c)
- && (flags & ANYOF_WARN_SUPER)
+ && OP(n) == ANYOF_WARN_SUPER
&& ckWARN_d(WARN_NON_UNICODE))
{
Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),