end = HOP3c(strend, -dontbother, strbeg) - 1;
/* for multiline we only have to try after newlines */
if (prog->check_substr || prog->check_utf8) {
- if (s == startpos)
- goto after_try;
- while (1) {
- if (regtry(®info, &s))
- goto got_it;
- after_try:
- if (s > end)
- goto phooey;
- if (prog->extflags & RXf_USE_INTUIT) {
- s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
- if (!s)
- goto phooey;
- }
- else
- s++;
- }
- } else {
- if (s > startpos)
+ /* because of the goto we can not easily reuse the macros for bifurcating the
+ unicode/non-unicode match modes here like we do elsewhere - demerphq */
+ if (utf8_target) {
+ if (s == startpos)
+ goto after_try_utf8;
+ while (1) {
+ if (regtry(®info, &s)) {
+ goto got_it;
+ }
+ after_try_utf8:
+ if (s > end) {
+ goto phooey;
+ }
+ if (prog->extflags & RXf_USE_INTUIT) {
+ s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
+ if (!s) {
+ goto phooey;
+ }
+ }
+ else {
+ s += UTF8SKIP(s);
+ }
+ }
+ } /* end search for check string in unicode */
+ else {
+ if (s == startpos) {
+ goto after_try_latin;
+ }
+ while (1) {
+ if (regtry(®info, &s)) {
+ goto got_it;
+ }
+ after_try_latin:
+ if (s > end) {
+ goto phooey;
+ }
+ if (prog->extflags & RXf_USE_INTUIT) {
+ s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
+ if (!s) {
+ goto phooey;
+ }
+ }
+ else {
+ s++;
+ }
+ }
+ } /* end search for check string in latin*/
+ } /* end search for check string */
+ else { /* search for newline */
+ if (s > startpos) {
+ /*XXX: The s-- is almost definitely wrong here under unicode - demeprhq*/
s--;
+ }
+ /* We can use a more efficient search as newlines are the same in unicode as they are in latin */
while (s < end) {
if (*s++ == '\n') { /* don't need PL_utf8skip here */
if (regtry(®info, &s))
goto got_it;
}
- }
- }
- }
+ }
+ } /* end search for newline */
+ } /* end anchored/multiline check string search */
goto phooey;
} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
{
}
-plan tests => 350; # Update this when adding/deleting tests.
+plan tests => 360; # Update this when adding/deleting tests.
run_tests() unless caller;
ok $str=~/.*\z/, "implict MBOL check string disable does not break things length=$i";
}
}
-
+ {
+ # we are actually testing that we dont die when executing these patterns
+ use utf8;
+ my $e = "Böck";
+ ok(utf8::is_utf8($e),"got a unicode string - rt75680");
+
+ ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680");
+ ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680");
+ ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680");
+ ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680");
+ }
+ {
+ # we are actually testing that we dont die when executing these patterns
+ my $e = "B\x{f6}ck";
+ ok(!utf8::is_utf8($e), "got a latin string - rt75680");
+
+ ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
+ ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680");
+ ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680");
+ ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680");
+ }
} # End of sub run_tests
1;