case ANYOFH:
if (utf8_target) { /* Can't possibly match a non-UTF-8 target */
+ REXEC_FBC_CLASS_SCAN(TRUE,
+ ( (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+ && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+ }
+ break;
+
+ case ANYOFHb:
+ if (utf8_target) { /* Can't possibly match a non-UTF-8 target */
+
+ /* We know what the first byte of any matched string should be */
U8 first_byte = FLAGS(c);
- if (first_byte) { /* We know what the first byte of any matched
- string should be */
- REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
- reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
- }
- else {
- REXEC_FBC_CLASS_SCAN(TRUE,
+ REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
- }
+ }
+ break;
+
+ case ANYOFHr:
+ if (utf8_target) { /* Can't possibly match a non-UTF-8 target */
+ REXEC_FBC_CLASS_SCAN(TRUE,
+ ( inRANGE((U8) NATIVE_UTF8_TO_I8(*s),
+ LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+ HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+ && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
}
break;
case ANYOFH:
if ( ! utf8_target
|| NEXTCHR_IS_EOS
- || ( ANYOF_FLAGS(scan) != 0
- && ANYOF_FLAGS(scan) != (U8) *locinput)
+ || ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8((U8) *locinput)
+ || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+ utf8_target))
+ {
+ sayNO;
+ }
+ goto increment_locinput;
+ break;
+
+ case ANYOFHb:
+ if ( ! utf8_target
+ || NEXTCHR_IS_EOS
+ || ANYOF_FLAGS(scan) != (U8) *locinput
+ || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+ utf8_target))
+ {
+ sayNO;
+ }
+ goto increment_locinput;
+ break;
+
+ case ANYOFHr:
+ if ( ! utf8_target
+ || NEXTCHR_IS_EOS
+ || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+ LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+ HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
|| ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
utf8_target))
{
case ANYOFH:
if (utf8_target) { /* ANYOFH only can match UTF-8 targets */
- if (ANYOF_FLAGS(p)) { /* If we know the first byte of what
- matches, we can avoid calling reginclass
- */
- while ( hardcount < max
- && scan < this_eol
- && (U8) *scan == ANYOF_FLAGS(p)
- && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
- TRUE))
- {
- scan += UTF8SKIP(scan);
- hardcount++;
- }
+ while ( hardcount < max
+ && scan < this_eol
+ && NATIVE_UTF8_TO_I8((U8) *scan) >= ANYOF_FLAGS(p)
+ && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+ {
+ scan += UTF8SKIP(scan);
+ hardcount++;
}
- else while ( hardcount < max
- && scan < this_eol
- && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+ }
+ break;
+
+ case ANYOFHb:
+ if (utf8_target) { /* ANYOFHb only can match UTF-8 targets */
+
+ /* we know the first byte must be the FLAGS field */
+ while ( hardcount < max
+ && scan < this_eol
+ && (U8) *scan == ANYOF_FLAGS(p)
+ && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+ TRUE))
+ {
+ scan += UTF8SKIP(scan);
+ hardcount++;
+ }
+ }
+ break;
+
+ case ANYOFHr:
+ if (utf8_target) { /* ANYOFH only can match UTF-8 targets */
+ while ( hardcount < max
+ && scan < this_eol
+ && inRANGE((U8) NATIVE_UTF8_TO_I8(*scan),
+ LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+ HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+ && NATIVE_UTF8_TO_I8((U8) *scan) >= ANYOF_FLAGS(p)
+ && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
{
scan += UTF8SKIP(scan);
hardcount++;
S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
{
dVAR;
- const char flags = (OP(n) == ANYOFH) ? 0 : ANYOF_FLAGS(n);
+ const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHr))
+ ? 0
+ : ANYOF_FLAGS(n);
bool match = FALSE;
UV c = *p;
}
/* If this character is potentially in the bitmap, check it */
- if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+ if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else if ((flags
regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
eval_state->rex = rex;
+ eval_state->sv = reginfo->sv;
if (reginfo->sv) {
/* Make $_ available to executed code. */
SAVE_DEFSV;
DEFSV_set(reginfo->sv);
}
+ /* will be dec'd by S_cleanup_regmatch_info_aux */
+ SvREFCNT_inc_NN(reginfo->sv);
if (!(mg = mg_find_mglob(reginfo->sv))) {
/* prepare for quick setting of pos */
}
PL_curpm = eval_state->curpm;
+ SvREFCNT_dec(eval_state->sv);
}
PL_regmatch_state = aux->old_regmatch_state;
&& !prog->substrs->data[i].substr) {
SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
if (! sv_utf8_downgrade(sv, TRUE)) {
+ SvREFCNT_dec_NN(sv);
return FALSE;
}
if (SvVALID(prog->substrs->data[i].utf8_substr)) {