Fix a bug in intuit_start() that makes it fail when the utf8-ness of the
string and pattern differ. This was mostly masked, since pp_match() skips
calling intuit in this case (and has done since 2000, presumably as a
workaround for this issue, and possibly for other issues since fixed).
But pp_subst() didn't skip, so code like this would fail:
$c = "\x{c0}";
utf8::upgrade($c);
print "ok\n" if $c =~ s/\xC0{1,2}$/\xC0/i;
Now that intuit is (hopefully) fixed, also remove the guard in pp_match().
if (update_minmatch++)
minmatch = had_zerolen;
}
- if (RX_EXTFLAGS(rx) & RXf_USE_INTUIT &&
- DO_UTF8(TARG) == (RX_UTF8(rx) != 0)) {
+ if (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) {
s = CALLREG_INTUIT_START(rx, TARG, truebase,
(char *)s, (char *)strend, r_flags, NULL);
/* If regstclass takes bytelength more than 1: If charlength==1, OK.
This leaves EXACTF-ish only, which are dealt with in find_byclass(). */
const U8* const str = (U8*)STRING(progi->regstclass);
+ /* XXX this value could be pre-computed */
const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
- ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
+ ? (reginfo->is_utf8_pat
+ ? utf8_distance(str + STR_LEN(progi->regstclass), str)
+ : STR_LEN(progi->regstclass))
: 1);
char * endpos;
if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)