This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #132463] perluniprops for \p{Word}
authorKarl Williamson <khw@cpan.org>
Sat, 18 Nov 2017 16:35:25 +0000 (09:35 -0700)
committerKarl Williamson <khw@cpan.org>
Sat, 18 Nov 2017 16:47:16 +0000 (09:47 -0700)
perluniprops was not updated to reflect the changes made to what
\p{Word} contains as of 5.18.  What was added was the code points that
have the Join_Control property, which, so far, only contain U+200C and
U+200D.  This commit uses Join Control instead of the hard-coded code
point numbers, so that when Unicode changes it, it automatically will
still be valid.

Thanks for spotting this.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index 1ae9812..4eb26ef 100644 (file)
@@ -97454,7 +97454,7 @@ static const U8 WB_table[24][24] = {
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * 409910af8e9ad2cba213c70f936a6c716494029a8346e8b55dd44275cc51d35d lib/unicore/mktables
+ * d905c655c0cc448f19894613ce87026dea667bb66c65937667424fc0afdc90c4 lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 48418cbf454eb9ef35c73468ed5ef72ad8603490eabe74181ce4fae42ec72579 regen/mk_invlists.pl
index 1386735..200ae46 100644 (file)
@@ -14499,7 +14499,8 @@ sub compile_perl() {
 
     my $Word = $perl->add_match_table('Word', Full_Name => 'XPosixWord',
                                 Description => '\w, including beyond ASCII;'
-                                            . ' = \p{Alnum} + \pM + \p{Pc}',
+                                            . ' = \p{Alnum} + \pM + \p{Pc}'
+                                            . ' + \p{Join_Control}',
                                 Initialize => $Alnum + $gc->table('Mark'),
                                 );
     my $Pc = $gc->table('Connector_Punctuation'); # 'Pc' Not in release 1
index 6b3e846..38ddd84 100644 (file)
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * 409910af8e9ad2cba213c70f936a6c716494029a8346e8b55dd44275cc51d35d lib/unicore/mktables
+ * d905c655c0cc448f19894613ce87026dea667bb66c65937667424fc0afdc90c4 lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 9ea6338945a7d70e5ea4b31ac7856c0b521df96be002e94b4b3b7d31debbf3ab regen/regcharclass.pl