This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #122460] Regression with (?{})(?=...) and pos
authorKarl Williamson <khw@cpan.org>
Thu, 7 Aug 2014 16:45:21 +0000 (10:45 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 8 Aug 2014 18:49:17 +0000 (12:49 -0600)
The commit that really broke this was
a0dd42312a1f26356d2fdf49656e45b77c2cefb5.  The blamed commit fixed some
typos that kept the earlier bad commit from having an effect.

Zero-length assertions, such as (?=...) obviously can match an empty
string.  The bad commit wrongly removed a setting of a flag to that
effect.  I didn't realize the implications at the time, thinking that
the newly-corrected AND logical operations would correctly set things
up, and that the previous code had been a workaround for the buggy AND.
The problem is that there is an overriding context such that whatever
the AND result, since this is in a 0-length assertion, the result will
match the empty string.  Thus it now differs from the pre-bad-commit
code which set to match the empty string only sometimes.  I'm pretty
sure it should be 'always'.  If I'm wrong, it doesn't mean a bug, but
only that an optimisation that could take place won't.

regcomp.c
t/re/pat_advanced.t

index 6530068..11dad9c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -5131,8 +5131,11 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                         */
                        ssc_init(pRExC_state, data->start_class);
                    }  else {
-                       /* AND before and after: combine and continue */
+                        /* AND before and after: combine and continue.  These
+                         * assertions are zero-length, so can match an EMPTY
+                         * string */
                        ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                        ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
                    }
                 }
            }
@@ -5204,6 +5207,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
 
                 if (f & SCF_DO_STCLASS_AND) {
                     ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+                    ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
                 }
                 if (data) {
                     if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
index 82f0917..986eb87 100644 (file)
@@ -2346,6 +2346,12 @@ EOP
         is($word, 'раб', "Handles UTF8 trie correctly");
     }
 
+    { # [perl #122460]
+        my $a = "rdvark";
+        $a =~ /(?{})(?=[A-Za-z0-9_])a*?/g;
+        is (pos $a, 0, "optimizer correctly thinks (?=...) is 0-length");
+    }
+
     #
     # Keep the following tests last -- they may crash perl
     #