Unicode has changed their definition of what should match \w.
http://www.unicode.org/reports/tr18/. This follows that change.
else {
$Word += ord('_'); # Make sure this is a $Word
}
+ my $JC = property_ref('Join_Control'); # Wasn't in release 1
+ if (defined $JC) {
+ $Word += $JC->table('Y');
+ }
+ else {
+ $Word += 0x200C + 0x200D;
+ }
# This is a Perl extension, so the name doesn't begin with Posix.
my $PerlWord = $perl->add_match_table('PerlWord',
=item *
-XXX
+C<\w> now matches the code points U+200C (ZERO WIDTH NON-JOINER) and
+U+200D (ZERO WIDTH JOINER). C<\W> no longer matches these. This change
+is because Unicode corrected their definition of what C<\w> should match.
=back
(?a:\p{Any}) \x{100} y $& \x{100}
(?aa:\p{Any}) \x{100} y $& \x{100}
+\w \x{200C} y $& \x{200C}
+\W \x{200C} n - -
+\w \x{200D} y $& \x{200D}
+\W \x{200D} n - -
+
# vim: softtabstop=0 noexpandtab