This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Add tests for t/re/unipropsFOO
authorKarl Williamson <khw@cpan.org>
Sun, 8 Apr 2018 20:40:49 +0000 (14:40 -0600)
committerKarl Williamson <khw@cpan.org>
Tue, 17 Apr 2018 17:31:05 +0000 (11:31 -0600)
Make sure that a non-binary property doesn't get mistakenly matched in
\p{}, which is only for binary ones.  There are some ambiguities that
this test keeps us from falling victim to.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index b908dfe..af73daf 100644 (file)
@@ -170675,7 +170675,7 @@ static const U8 WB_table[24][24] = {
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * ea4dc61a00d2db9bd46f3ddec706b5b7b11e8fcf848fb384b54b507fb70d8e90 lib/unicore/mktables
+ * ba5b13123afb59ea004c14fcbac80b5ff428c5b8f1b84bf8602b4ffb7da0407f lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 9108f918cebbc47c3f673698e4c8563dfd5b4e2c2379c334f0104ad7882b2617 regen/mk_invlists.pl
index 64a7bc6..8d53515 100644 (file)
@@ -19109,6 +19109,24 @@ EOF_CODE
                                  lc $a->name cmp lc $b->name
                                } property_ref('*'))
     {
+        # Non-binary properties should not match \p{};  Test all for that.
+        if ($property->type != $BINARY) {
+            my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS }
+                                                            $property->aliases;
+            foreach my $property_alias ($property->aliases) {
+                my $name = standardize($property_alias->name);
+
+                # But some names are ambiguous, meaning a binary property with
+                # the same name when used in \p{}, and a different
+                # (non-binary) property in other contexts.
+                next if grep { $name eq $_ } keys %ambiguous_names;
+
+                push @output, <<"EOF_CODE";
+Error('\\p{$name}');
+Error('\\P{$name}');
+EOF_CODE
+            }
+        }
         foreach my $table (sort { $a->has_dependency <=> $b->has_dependency
                                     or
                                   lc $a->name cmp lc $b->name
index 38fcece..dc2e382 100644 (file)
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * ea4dc61a00d2db9bd46f3ddec706b5b7b11e8fcf848fb384b54b507fb70d8e90 lib/unicore/mktables
+ * ba5b13123afb59ea004c14fcbac80b5ff428c5b8f1b84bf8602b4ffb7da0407f lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 9ea6338945a7d70e5ea4b31ac7856c0b521df96be002e94b4b3b7d31debbf3ab regen/regcharclass.pl