From b2a3d41024cc23941660c6f6cac48502a4284422 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sat, 13 Nov 1999 19:50:24 +0000 Subject: [PATCH] Change #4576 accidentally leaked also parts of Ilya's patch that won't apply cleanly anymore. p4raw-id: //depot/cfgperl@4577 --- regexec.c | 461 ++++++++-------------------------------------------------- t/op/re_tests | 2 - 2 files changed, 64 insertions(+), 399 deletions(-) diff --git a/regexec.c b/regexec.c index e3f0cb4..fa891c8 100644 --- a/regexec.c +++ b/regexec.c @@ -254,9 +254,6 @@ S_cache_re(pTHX_ regexp *prog) PL_reg_re = prog; } -static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend, - char *startpos, I32 norun); - /* * Need to implement the following flags for reg_anch: * @@ -278,13 +275,6 @@ static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend, /* XXXX We assume that strpos is strbeg unless sv. */ -/* XXXX Some places assume that there is a fixed substring. - An update may be needed if optimizer marks as "INTUITable" - RExen without fixed substrings. Similarly, it is assumed that - lengths of all the strings are no more than minlen, thus they - cannot come from lookahead. - (Or minlen should take into account lookahead.) */ - /* A failure to find a constant substring means that there is no need to make an expensive call to REx engine, thus we celebrate a failure. Similarly, finding a substring too deep into the string means that less calls to @@ -295,14 +285,10 @@ static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend, b) Fixed substring; c) Whether we are anchored (beginning-of-line or \G); d) First node (of those at offset 0) which may distingush positions; - We use a)b)d) and multiline-part of c), and try to find a position in the + We use 'a', 'b', multiline-part of 'c', and try to find a position in the string which does not contradict any of them. */ -/* Most of decisions we do here should have been done at compile time. - The nodes of the REx which we used for the search should have been - deleted from the finite automaton. */ - char * Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *strend, U32 flags, re_scream_pos_data *data) @@ -315,8 +301,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *t; I32 ml_anch; char *tmp; - register char *other_last = Nullch; /* other substr checked before this */ - char *check_at; /* check substr found at this pos */ + register char *other_last = Nullch; #ifdef DEBUGGING char *i_strpos = strpos; #endif @@ -447,8 +432,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (!s) goto fail_finish; - check_at = s; - /* Finish the diagnostic message */ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); @@ -464,7 +447,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* Take into account the "other" substring. */ /* XXXX May be hopelessly wrong for UTF... */ if (!other_last) - other_last = strpos; + other_last = strpos - 1; if (check == prog->float_substr) { do_other_anchored: { @@ -482,8 +465,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else t = strpos; t += prog->anchored_offset; - if (t < other_last) /* These positions already checked */ - t = other_last; + if (t <= other_last) + t = other_last + 1; PL_bostr = tmp; last2 = last1 = strend - prog->minlen; if (last < last1) @@ -512,7 +495,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, ", trying floating at offset %ld...\n", (long)(s1 + 1 - i_strpos))); PL_regeol = strend; /* Used in HOP() */ - other_last = last1 + prog->anchored_offset + 1; + other_last = last1 + prog->anchored_offset; s = HOPc(last, 1); goto restart; } @@ -520,7 +503,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); t = s - prog->anchored_offset; - other_last = s + 1; + other_last = s - 1; s = s1; if (t == strpos) goto try_at_start; @@ -537,8 +520,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (last - t > prog->float_max_offset) last = t + prog->float_max_offset; s = t + prog->float_min_offset; - if (s < other_last) - s = other_last; + if (s <= other_last) + s = other_last + 1; /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ /* fbm_instr() takes into account exact value of end-of-str if the check is SvTAIL(ed). Since false positives are OK, @@ -563,7 +546,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, ", trying anchored starting at offset %ld...\n", (long)(s1 + 1 - i_strpos))); - other_last = last + 1; + other_last = last; PL_regeol = strend; /* Used in HOP() */ s = HOPc(t, 1); goto restart; @@ -571,7 +554,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else { DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); - other_last = s + 1; + other_last = s - 1; s = s1; if (t == strpos) goto try_at_start; @@ -669,72 +652,6 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, s = strpos; } - /* Last resort... */ - /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ - if (prog->regstclass) { - /* minlen == 0 is possible if regstclass is \b or \B, - and the fixed substr is ''$. - Since minlen is already taken into account, s+1 is before strend; - accidentally, minlen >= 1 guaranties no false positives at s + 1 - even for \b or \B. But (minlen? 1 : 0) below assumes that - regstclass does not come from lookahead... */ - /* If regstclass takes bytelength more than 1: If charlength==1, OK. - This leaves EXACTF only, which is dealt with in find_byclass(). */ - char *endpos = (prog->anchored_substr || ml_anch) - ? s + (prog->minlen? 1 : 0) - : (prog->float_substr ? check_at - start_shift + 1 - : strend) ; - char *startpos = sv ? strend - SvCUR(sv) : s; - - t = s; - s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); - if (!s) { -#ifdef DEBUGGING - char *what; -#endif - if (endpos == strend) { - DEBUG_r( PerlIO_printf(Perl_debug_log, - "Could not match STCLASS...\n") ); - goto fail; - } - /* Contradict one of substrings */ - if (prog->anchored_substr) { - DEBUG_r( PerlIO_printf(Perl_debug_log, - "This position contradicts STCLASS...\n") ); - if (prog->anchored_substr == check) { - DEBUG_r( what = "anchored" ); - hop_and_restart: - PL_regeol = strend; /* Used in HOP() */ - s = HOPc(t, 1); - DEBUG_r( PerlIO_printf(Perl_debug_log, - "trying %s substr starting at offset %ld...\n", - what, (long)(s + start_shift - i_strpos)) ); - goto restart; - } - /* Have both, check is floating */ - if (t + start_shift >= check_at) /* Contradicts floating=check */ - goto retry_floating_check; - /* Recheck anchored substring, but not floating... */ - s = check_at; - DEBUG_r( PerlIO_printf(Perl_debug_log, - "trying anchored substr starting at offset %ld...\n", - (long)(other_last - i_strpos)) ); - goto do_other_anchored; - } - /* Check is floating subtring. */ - retry_floating_check: - t = check_at - start_shift; - DEBUG_r( what = "floating" ); - goto hop_and_restart; - } - DEBUG_r( if (t != s) - PerlIO_printf(Perl_debug_log, - "By STCLASS: moving %ld --> %ld\n", - (long)(t - i_strpos), (long)(s - i_strpos)); - else - PerlIO_printf(Perl_debug_log, - "Does not contradict STCLASS...\n") ); - } DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n", PL_colors[4], PL_colors[5], (long)(s - i_strpos)) ); return s; @@ -1066,7 +983,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && (norun || regtry(prog, s)) ) + && regtry(prog, s) ) goto got_it; s++; } @@ -1076,7 +993,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && (norun || regtry(prog, s)) ) + && regtry(prog, s) ) goto got_it; s++; } @@ -1086,24 +1003,32 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUND: - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + if (minlen) { + dontbother++; + strend -= 1; + } + tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { tmp = !tmp; - if ((norun || regtry(prog, s))) + if (regtry(prog, s)) goto got_it; } s++; } - if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) + if ((minlen || tmp) && regtry(prog,s)) goto got_it; break; case BOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; + if (minlen) { + dontbother++; + strend = reghop_c(strend, -1); + } + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? @@ -1111,54 +1036,60 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * isALNUM_LC_utf8((U8*)s))) { tmp = !tmp; - if ((norun || regtry(prog, s))) + if (regtry(prog, s)) goto got_it; } s += UTF8SKIP(s); } - if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) + if ((minlen || tmp) && regtry(prog,s)) goto got_it; break; case NBOUNDL: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUND: - tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; + if (minlen) { + dontbother++; + strend -= 1; + } + tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) tmp = !tmp; - else if ((norun || regtry(prog, s))) + else if (regtry(prog, s)) goto got_it; s++; } - if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) + if ((minlen || !tmp) && regtry(prog,s)) goto got_it; break; case NBOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - if (prog->minlen) + if (minlen) { + dontbother++; strend = reghop_c(strend, -1); - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; + } + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? swash_fetch(PL_utf8_alnum, (U8*)s) : isALNUM_LC_utf8((U8*)s))) tmp = !tmp; - else if ((norun || regtry(prog, s))) + else if (regtry(prog, s)) goto got_it; s += UTF8SKIP(s); } - if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) + if ((minlen || !tmp) && regtry(prog,s)) goto got_it; break; case ALNUM: while (s < strend) { if (isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1171,7 +1102,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case ALNUMUTF8: while (s < strend) { if (swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1185,7 +1116,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1199,7 +1130,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1212,7 +1143,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NALNUM: while (s < strend) { if (!isALNUM(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1225,7 +1156,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NALNUMUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1239,7 +1170,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1253,7 +1184,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1266,7 +1197,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case SPACE: while (s < strend) { if (isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1279,7 +1210,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case SPACEUTF8: while (s < strend) { if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1293,7 +1224,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1307,7 +1238,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1320,7 +1251,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NSPACE: while (s < strend) { if (!isSPACE(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1333,7 +1264,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NSPACEUTF8: while (s < strend) { if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1347,7 +1278,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isSPACE_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1361,7 +1292,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1374,7 +1305,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case DIGIT: while (s < strend) { if (isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1387,7 +1318,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case DIGITUTF8: while (s < strend) { if (swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1401,7 +1332,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1415,7 +1346,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1428,7 +1359,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NDIGIT: while (s < strend) { if (!isDIGIT(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1441,7 +1372,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NDIGITUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1455,7 +1386,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC(*s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1469,7 +1400,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC_utf8((U8*)s)) { - if (tmp && (norun || regtry(prog, s))) + if (tmp && regtry(prog, s)) goto got_it; else tmp = doevery; @@ -1483,270 +1414,6 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c)); break; } - return 0; - got_it: - return s; -} - -/* - - regexec_flags - match a regexp against a string - */ -I32 -Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, - char *strbeg, I32 minend, SV *sv, void *data, U32 flags) -/* strend: pointer to null at end of string */ -/* strbeg: real beginning of string */ -/* minend: end of match must be >=minend after stringarg. */ -/* data: May be used for some additional optimizations. */ -/* nosave: For optimizations. */ -{ - dTHR; - register char *s; - register regnode *c; - register char *startpos = stringarg; - register I32 tmp; - I32 minlen; /* must match at least this many chars */ - I32 dontbother = 0; /* how many characters not to try at end */ - I32 start_shift = 0; /* Offset of the start to find - constant substr. */ /* CC */ - I32 end_shift = 0; /* Same for the end. */ /* CC */ - I32 scream_pos = -1; /* Internal iterator of scream. */ - char *scream_olds; - SV* oreplsv = GvSV(PL_replgv); - - PL_regcc = 0; - - cache_re(prog); -#ifdef DEBUGGING - PL_regnarrate = PL_debug & 512; -#endif - - /* Be paranoid... */ - if (prog == NULL || startpos == NULL) { - Perl_croak(aTHX_ "NULL regexp parameter"); - return 0; - } - - minlen = prog->minlen; - if (strend - startpos < minlen) goto phooey; - - if (startpos == strbeg) /* is ^ valid at stringarg? */ - PL_regprev = '\n'; - else { - PL_regprev = (U32)stringarg[-1]; - if (!PL_multiline && PL_regprev == '\n') - PL_regprev = '\0'; /* force ^ to NOT match */ - } - - /* Check validity of program. */ - if (UCHARAT(prog->program) != REG_MAGIC) { - Perl_croak(aTHX_ "corrupted regexp program"); - } - - PL_reg_flags = 0; - PL_reg_eval_set = 0; - PL_reg_maxiter = 0; - - if (prog->reganch & ROPT_UTF8) - PL_reg_flags |= RF_utf8; - - /* Mark beginning of line for ^ and lookbehind. */ - PL_regbol = startpos; - PL_bostr = strbeg; - PL_reg_sv = sv; - - /* Mark end of line for $ (and such) */ - PL_regeol = strend; - - /* see how far we have to get to not match where we matched before */ - PL_regtill = startpos+minend; - - /* We start without call_cc context. */ - PL_reg_call_cc = 0; - - /* If there is a "must appear" string, look for it. */ - s = startpos; - - if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ - MAGIC *mg; - - if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ - PL_reg_ganch = startpos; - else if (sv && SvTYPE(sv) >= SVt_PVMG - && SvMAGIC(sv) - && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) { - PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ - if (prog->reganch & ROPT_ANCH_GPOS) { - if (s > PL_reg_ganch) - goto phooey; - s = PL_reg_ganch; - } - } - else /* pos() not defined */ - PL_reg_ganch = strbeg; - } - - if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { - re_scream_pos_data d; - - d.scream_olds = &scream_olds; - d.scream_pos = &scream_pos; - s = re_intuit_start(prog, sv, s, strend, flags, &d); - if (!s) - goto phooey; /* not present */ - } - - DEBUG_r( if (!PL_colorset) reginitcolors() ); - DEBUG_r(PerlIO_printf(Perl_debug_log, - "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", - PL_colors[4],PL_colors[5],PL_colors[0], - prog->precomp, - PL_colors[1], - (strlen(prog->precomp) > 60 ? "..." : ""), - PL_colors[0], - (strend - startpos > 60 ? 60 : strend - startpos), - startpos, PL_colors[1], - (strend - startpos > 60 ? "..." : "")) - ); - - /* Simplest case: anchored match need be tried only once. */ - /* [unless only anchor is BOL and multiline is set] */ - if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { - if (s == startpos && regtry(prog, startpos)) - goto got_it; - else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) - || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ - { - char *end; - - if (minlen) - dontbother = minlen - 1; - end = HOPc(strend, -dontbother) - 1; - /* for multiline we only have to try after newlines */ - if (prog->check_substr) { - if (s == startpos) - goto after_try; - while (1) { - if (regtry(prog, s)) - goto got_it; - after_try: - if (s >= end) - goto phooey; - s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); - if (!s) - goto phooey; - } - } else { - if (s > startpos) - s--; - while (s < end) { - if (*s++ == '\n') { /* don't need PL_utf8skip here */ - if (regtry(prog, s)) - goto got_it; - } - } - } - } - goto phooey; - } else if (prog->reganch & ROPT_ANCH_GPOS) { - if (regtry(prog, PL_reg_ganch)) - goto got_it; - goto phooey; - } - - /* Messy cases: unanchored match. */ - if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { - /* we have /x+whatever/ */ - /* it must be a one character string (XXXX Except UTF?) */ - char ch = SvPVX(prog->anchored_substr)[0]; - if (UTF) { - while (s < strend) { - if (*s == ch) { - if (regtry(prog, s)) goto got_it; - s += UTF8SKIP(s); - while (s < strend && *s == ch) - s += UTF8SKIP(s); - } - s += UTF8SKIP(s); - } - } - else { - while (s < strend) { - if (*s == ch) { - if (regtry(prog, s)) goto got_it; - s++; - while (s < strend && *s == ch) - s++; - } - s++; - } - } - } - /*SUPPRESS 560*/ - else if (prog->anchored_substr != Nullsv - || (prog->float_substr != Nullsv - && prog->float_max_offset < strend - s)) { - SV *must = prog->anchored_substr - ? prog->anchored_substr : prog->float_substr; - I32 back_max = - prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; - I32 back_min = - prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; - I32 delta = back_max - back_min; - char *last = HOPc(strend, /* Cannot start after this */ - -(I32)(CHR_SVLEN(must) - - (SvTAIL(must) != 0) + back_min)); - char *last1; /* Last position checked before */ - - if (s > PL_bostr) - last1 = HOPc(s, -1); - else - last1 = s - 1; /* bogus */ - - /* XXXX check_substr already used to find `s', can optimize if - check_substr==must. */ - scream_pos = -1; - dontbother = end_shift; - strend = HOPc(strend, -dontbother); - while ( (s <= last) && - ((flags & REXEC_SCREAM) - ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg, - end_shift, &scream_pos, 0)) - : (s = fbm_instr((unsigned char*)HOP(s, back_min), - (unsigned char*)strend, must, - PL_multiline ? FBMrf_MULTILINE : 0))) ) { - if (HOPc(s, -back_max) > last1) { - last1 = HOPc(s, -back_min); - s = HOPc(s, -back_max); - } - else { - char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1; - - last1 = HOPc(s, -back_min); - s = t; - } - if (UTF) { - while (s <= last1) { - if (regtry(prog, s)) - goto got_it; - s += UTF8SKIP(s); - } - } - else { - while (s <= last1) { - if (regtry(prog, s)) - goto got_it; - s++; - } - } - } - goto phooey; - } - else if (c = prog->regstclass) { - if (minlen) /* don't bother with what can't match */ - strend = HOPc(strend, -(minlen - 1)); - if (find_byclass(prog, c, s, strend, startpos, 0)) - goto got_it; } else { dontbother = 0; diff --git a/t/op/re_tests b/t/op/re_tests index f866385..d72a0f7 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -742,5 +742,3 @@ tt+$ xxxtt y - - ([[:digit:]-z]+) =0-z= y $1 0-z ([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z \GX.*X aaaXbX n - - -(\d+\.\d+) 3.1415926 y $1 3.1415926 -(\ba.{0,10}br) have a web browser y $1 a web br -- 1.8.3.1