use strict;
use warnings;
use File::Spec;
-our $VERSION = '1.35';
+our $VERSION = '1.39';
use unicore::Name; # mktables-generated algorithmically-defined names
use bytes (); # for $bytes::hint_bits
my %system_aliases = (
- 'SINGLE-SHIFT 2' => pack("U", 0x8E),
- 'SINGLE-SHIFT 3' => pack("U", 0x8F),
- 'PRIVATE USE 1' => pack("U", 0x91),
- 'PRIVATE USE 2' => pack("U", 0x92),
+ 'SINGLE-SHIFT 2' => pack("U", utf8::unicode_to_native(0x8E)),
+ 'SINGLE-SHIFT 3' => pack("U", utf8::unicode_to_native(0x8F)),
+ 'PRIVATE USE 1' => pack("U", utf8::unicode_to_native(0x91)),
+ 'PRIVATE USE 2' => pack("U", utf8::unicode_to_native(0x92)),
);
# These are the aliases above that differ under :loose and :full matching
#);
#my %deprecated_aliases;
-#$deprecated_aliases{'BELL'} = pack("U", 0x07) if $^V lt v5.17.0;
+#$deprecated_aliases{'BELL'} = pack("U", utf8::unicode_to_native(0x07)) if $^V lt v5.17.0;
#my %loose_deprecated_aliases = (
#);
# hex, but makes the code easier to maintain, and is called
# infrequently, only at compile-time
if ($value !~ $decimal_qr && $value =~ $hex_qr) {
- $value = CORE::hex $1;
+ my $temp = CORE::hex $1;
+ $temp = utf8::unicode_to_native($temp) if $value =~ /^[Uu]\+/;
+ $value = $temp;
}
if ($value =~ $decimal_qr) {
no warnings qw(non_unicode surrogate nonchar); # Allow any of these
\p{_Perl_Charname_Begin}
\p{_Perl_Charname_Continue}*
$ /x) {
+
push @errors, $name;
}
else {
$^H{charnames_name_aliases}{$name} = $value;
+
+ if (warnings::enabled('deprecated')) {
+ if ($name =~ / ( .* \s ) ( \s* ) $ /x) {
+ carp "Trailing white-space in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'";
+ }
+
+ # Use '+' instead of '*' in this regex, because any trailing
+ # blanks have already been warned about.
+ if ($name =~ / ( .*? \s{2} ) ( .+ ) /x) {
+ carp "A sequence of multiple spaces in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'";
+ }
+ }
}
}
}
if (@errors) {
foreach my $name (@errors) {
my $ok = "";
- $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():\xa0]* ) /x;
+ my $nbsp = chr utf8::unicode_to_native(0xa0);
+ $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():$nbsp]* ) /x;
my $first_bad = substr($name, length($ok), 1);
$name = "Invalid character in charnames alias definition; marked by <-- HERE in '$ok$first_bad<-- HERE " . substr($name, length($ok) + 1) . "'";
}
/xs)
{
# Even in non-loose matching, the script traditionally has been
- # case insensitve
+ # case insensitive
$scripts_trie = "\U$1";
$lookup_name = $2;
ref $alias eq "HASH" or
croak "Only HASH reference supported as argument to :alias";
alias ($alias);
+ $promote = 1;
next;
}
if ($alias =~ m{:(\w+)$}) {
alias_file ($1) and $promote = 1;
next;
}
- alias_file ($alias);
+ alias_file ($alias) and $promote = 1;
next;
}
if (substr($arg, 0, 1) eq ':'
# not an issue.
my %viacode;
+my $no_name_code_points_re = join "|", map { sprintf("%05X",
+ utf8::unicode_to_native($_)) }
+ 0x80, 0x81, 0x84, 0x99;
+$no_name_code_points_re = qr/$no_name_code_points_re/;
+
sub viacode {
# Returns the name of the code point argument
if ($arg =~ $decimal_qr) {
$hex = sprintf "%05X", $arg;
} elsif ($arg =~ $hex_qr) {
+ $hex = CORE::hex $1;
+ $hex = utf8::unicode_to_native($hex) if $arg =~ /^[Uu]\+/;
# Below is the line that differs from the _getcode() source
- $hex = sprintf "%05X", hex $1;
+ $hex = sprintf "%05X", $hex;
} else {
carp("unexpected arg \"$arg\" to charnames::viacode()");
return;
$return = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]);
# If not one of these 4 code points, return what we've found.
- if ($hex !~ / ^ 000 (?: 8[014] | 99 ) $ /x) {
+ if ($hex !~ / ^ $no_name_code_points_re $ /x) {
$viacode{$hex} = $return;
return $return;
}
}
return;
-} # _viacode
+} # viacode
1;