sub alias (@) # Set up a single alias
{
+ my @errors;
+
my $alias = ref $_[0] ? $_[0] : { @_ };
- foreach my $name (keys %$alias) {
+ foreach my $name (sort keys %$alias) { # Sort only because it helps having
+ # deterministic output for
+ # t/lib/charnames/alias
my $value = $alias->{$name};
next unless defined $value; # Omit if screwed up.
$^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
}
else {
- # XXX validate syntax when deprecation cycle complete. ie. start
- # with an alpha only, etc.
- $^H{charnames_name_aliases}{$name} = $value;
+ if ($name !~ / ^
+ \p{_Perl_Charname_Begin}
+ \p{_Perl_Charname_Continue}*
+ $ /x) {
+ push @errors, $name;
+ }
+ else {
+ $^H{charnames_name_aliases}{$name} = $value;
+ }
}
}
+
+ # We find and output all errors from this :alias definition, rather than
+ # failing on the first one, so fewer runs are needed to get it to compile
+ if (@errors) {
+ foreach my $name (@errors) {
+ my $ok = "";
+ $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():\xa0]* ) /x;
+ my $first_bad = substr($name, length($ok), 1);
+ $name = "Invalid character in charnames alias definition; marked by <-- HERE in '$ok$first_bad<-- HERE " . substr($name, length($ok) + 1) . "'";
+ }
+ croak join "\n", @errors;
+ }
+
+ return;
} # alias
sub not_legal_use_bytes_msg {
(F) Only certain characters are valid for character names. The
indicated one isn't. See L<charnames/CUSTOM ALIASES>.
+=item Invalid character in charnames alias definition; marked by <-- HERE in '%s
+
+(F) You tried to create a custom alias for a character name, with
+the C<:alias> option to C<use charnames> and the specified character in
+the indicated name isn't valid. See L<charnames/CUSTOM ALIASES>.
+
=item Invalid conversion in %s: "%s"
(W printf) Perl does not understand the given format conversion. See
EXPECT
OPTIONS regex
PADDING CHARACTER
+########
+# NAME various wrong characters in :alias are errors
+# Below, one of the EXPECT regexes matches both the UTF-8 and non-UTF-8 form.
+# This is because under some circumstances the message gets output as UTF-8.
+use charnames ":full", ":alias" => {
+ "4e_ACUTE" => "LATIN SMALL LETTER E WITH ACUTE",
+ "e_A,CUTE" => "LATIN SMALL LETTER E WITH ACUTE",
+ "e_ACUT\x{d7}E" => "LATIN SMALL LETTER E WITH ACUTE",
+ };
+EXPECT
+OPTIONS regex
+Invalid character in charnames alias definition; marked by <-- HERE in '4<-- HERE e_ACUTE'
+Invalid character in charnames alias definition; marked by <-- HERE in 'e_A,<-- HERE CUTE'
+Invalid character in charnames alias definition; marked by <-- HERE in 'e_ACUT(?:\x{d7}|\x{C3}\x{97})<-- HERE E'
return NULL;
}
- {
+ { /* This code needs to be sync'ed with a regex in _charnames.pm which
+ does the same thing */
bool problematic = FALSE;
const char* i = s;
if (! UTF) {
if (! isALPHAU(*i)) problematic = TRUE;
else for (i = s + 1; i < e; i++) {
- if (isCHARNAME_CONT(*i)) continue;
+ if (isCHARNAME_CONT(*i) || *i == ':') continue;
problematic = TRUE;
break;
}
i+= UTF8SKIP(i))
{
if (UTF8_IS_INVARIANT(*i)) {
- if (isCHARNAME_CONT(*i)) continue;
+ if (isCHARNAME_CONT(*i) || *i == ':') continue;
} else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
continue;
} else if (isCHARNAME_CONT(