From a41aa44d9dc4a3ba586d871754bd11137bdc37a2 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 24 Aug 2012 16:17:47 +0100 Subject: [PATCH] stop $foo =~ /(bar)/g skipping copy Normally in the presence of captures, a successful regex execution makes a copy of the matched string, so that $1 et al give the right value even if the original string is changed; i.e. $foo =~ /(123)/g; $foo = "bar"; is("$1", "123"); Until now that test would fail, because perl used to skip the copy for the scalar /(...)/g case (but not the C<$&; //g> case). This was to avoid a huge slowdown in code like the following: $x = 'x' x 1_000_000; 1 while $x =~ /(.)/g; which would otherwise end up copying a 1Mb string a million times. Now that (with the last commit but one) we copy only the required substring of the original string (a 1-byte substring in the above example), we can remove this fast-but-incorrect hack. --- pp_hot.c | 7 +------ t/re/pat_advanced.t | 1 - t/re/pat_psycho.t | 45 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/pp_hot.c b/pp_hot.c index 91958ac..6530ae5 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1319,12 +1319,7 @@ PP(pp_match) } } } - /* XXX: comment out !global get safe $1 vars after a - match, BUT be aware that this leads to dramatic slowdowns on - /g matches against large strings. So far a solution to this problem - appears to be quite tricky. - Test for the unsafe vars are TODO for now. */ - if ( (!global && RX_NPARENS(rx)) + if ( RX_NPARENS(rx) || PL_sawampersand || SvTEMP(TARG) || SvAMAGIC(TARG) diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 6692e1c..05cc191 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1660,7 +1660,6 @@ $x='123'; print ">$1<\n"; EOP - local $::TODO = 'RT #86042'; fresh_perl_is(<<'EOP', ">abc<\n", {}, 'no mention of $&'); my $x; ($x='abc')=~/(abc)/g; diff --git a/t/re/pat_psycho.t b/t/re/pat_psycho.t index 0880242..0433760 100644 --- a/t/re/pat_psycho.t +++ b/t/re/pat_psycho.t @@ -25,7 +25,7 @@ BEGIN { skip_all('$PERL_SKIP_PSYCHO_TEST set') if $ENV{PERL_SKIP_PSYCHO_TEST}; -plan tests => 11; # Update this when adding/deleting tests. +plan tests => 15; # Update this when adding/deleting tests. run_tests() unless caller; @@ -160,6 +160,49 @@ sub run_tests { } ok($ok, $msg); } + + + { + # these bits of test code used to run quadratically. If we break + # anything, they'll start to take minutes to run, rather than + # seconds. We don't actually measure times or set alarms, since + # that tends to be very fragile and prone to false positives. + # Instead, just hope that if someone is messing with + # performance-related code, they'll re-run the test suite and + # notice it suddenly takes a lot longer. + + my $x; + + $x = 'x' x 1_000_000; + 1 while $x =~ /(.)/g; + pass "ascii =~ /(.)/"; + + { + local ${^UTF8CACHE} = 1; # defeat debugging + $x = "\x{100}" x 1_000_000; + 1 while $x =~ /(.)/g; + pass "utf8 =~ /(.)/"; + } + + # run these in separate processes, since they set $& + + fresh_perl_is(<<'EOF', "ok\n", {}, 'ascii =~ /(.)/, mention $&'); +$&; +$x = 'x' x 1_000_000; +1 while $x =~ /(.)/g; +print "ok\n"; +EOF + + fresh_perl_is(<<'EOF', "ok\n", {}, 'utf8 =~ /(.)/, mention $&'); +$&; +local ${^UTF8CACHE} = 1; # defeat debugging +$x = "\x{100}" x 1_000_000; +1 while $x =~ /(.)/g; +print "ok\n"; +EOF + + + } } # End of sub run_tests 1; -- 1.8.3.1