This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #133998] regcomp assertion failure
authorKarl Williamson <khw@cpan.org>
Sun, 7 Apr 2019 04:42:05 +0000 (22:42 -0600)
committerKarl Williamson <khw@cpan.org>
Tue, 9 Apr 2019 23:29:28 +0000 (17:29 -0600)
This was caused in the blamed commit by me thinking I could simplify too
much.  So this commit puts back in checking specially for things like
/[\s\S]/l, which has to match every possible code point no matter what
the locale.

regcomp.c
t/re/pat.t

index c654ff7..a7e57e2 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -17331,13 +17331,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             ) {
                 SV* scratch_list = NULL;
 
-                /* What the Posix classes (like \w, [:space:]) match in locale
-                 * isn't knowable under locale until actual match time.  A
+                /* What the Posix classes (like \w, [:space:]) match isn't
+                 * generally knowable under locale until actual match time.  A
                  * special node is used for these which has extra space for a
                  * bitmap, with a bit reserved for each named class that is to
-                 * be matched against.  This isn't needed for \p{} and
+                 * be matched against.  (This isn't needed for \p{} and
                  * pseudo-classes, as they are not affected by locale, and
-                 * hence are dealt with separately */
+                 * hence are dealt with separately.)  However, if a named class
+                 * and its complement are both present, then it matches
+                 * everything, and there is no runtime dependency.  Odd numbers
+                 * are the complements of the next lower number, so xor works.
+                 * (Note that something like [\w\D] should match everything,
+                 * because \d should be a proper subset of \w.  But rather than
+                 * trust that the locale is well behaved, we leave this to
+                 * runtime to sort out) */
+                if (POSIXL_TEST(posixl, namedclass ^ 1)) {
+                    cp_list = _add_range_to_invlist(cp_list, 0, UV_MAX);
+                    POSIXL_ZERO(posixl);
+                    has_runtime_dependency &= ~HAS_L_RUNTIME_DEPENDENCY;
+                    anyof_flags &= ~ANYOF_MATCHES_POSIXL;
+                    continue;   /* We could ignore the rest of the class, but
+                                   best to parse it for any errors */
+                }
+                else { /* Here, isn't the complement of any already parsed
+                          class */
                 POSIXL_SET(posixl, namedclass);
                 has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
                 anyof_flags |= ANYOF_MATCHES_POSIXL;
@@ -17365,6 +17382,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     SvREFCNT_dec_NN(scratch_list);
                 }
                 continue;   /* Go get next character */
+                }
             }
             else {
 
index 57ef00c..f309dd6 100644 (file)
@@ -25,7 +25,7 @@ BEGIN {
 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
 skip_all_without_unicode_tables();
 
-plan tests => 860;  # Update this when adding/deleting tests.
+plan tests => 861;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -2083,6 +2083,11 @@ x{1c!}\;\;îçÿp \0\7fqr/elsif/!\0eF\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ùHQx\0\0\0\7f`Lx{1c
 x{0c!}\;\;îçÿ \0\7f/0f/!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\87\ 1\x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`ÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0000t0F\ 5\0000t0\0\0\80\ 1d?n \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\0\10\10\10\\0\0\ 1\0\10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133933]");
     }
 
+    {   # perl #133998]
+        fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {},
+        '/[\S\s]/l works');
+    }
+
 } # End of sub run_tests
 
 1;