From 225fb84f3eb1da83cbc8c79add24882deac79906 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 26 Oct 2012 10:48:48 -0600 Subject: [PATCH] charnames: Don't accept illegal :aliases Now that improper names for characters are an error, we can forbid them at definition time. For the time being allow a colon in the check that continues to be run in toke.c. This will be removed in a future commit. --- lib/_charnames.pm | 32 ++++++++++++++++++++++++++++---- pod/perldiag.pod | 6 ++++++ t/lib/charnames/alias | 14 ++++++++++++++ toke.c | 7 ++++--- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/lib/_charnames.pm b/lib/_charnames.pm index 347ad27..ad7684d 100644 --- a/lib/_charnames.pm +++ b/lib/_charnames.pm @@ -143,8 +143,12 @@ sub carp sub alias (@) # Set up a single alias { + my @errors; + my $alias = ref $_[0] ? $_[0] : { @_ }; - foreach my $name (keys %$alias) { + foreach my $name (sort keys %$alias) { # Sort only because it helps having + # deterministic output for + # t/lib/charnames/alias my $value = $alias->{$name}; next unless defined $value; # Omit if screwed up. @@ -163,11 +167,31 @@ sub alias (@) # Set up a single alias $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name; } else { - # XXX validate syntax when deprecation cycle complete. ie. start - # with an alpha only, etc. - $^H{charnames_name_aliases}{$name} = $value; + if ($name !~ / ^ + \p{_Perl_Charname_Begin} + \p{_Perl_Charname_Continue}* + $ /x) { + push @errors, $name; + } + else { + $^H{charnames_name_aliases}{$name} = $value; + } } } + + # We find and output all errors from this :alias definition, rather than + # failing on the first one, so fewer runs are needed to get it to compile + if (@errors) { + foreach my $name (@errors) { + my $ok = ""; + $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():\xa0]* ) /x; + my $first_bad = substr($name, length($ok), 1); + $name = "Invalid character in charnames alias definition; marked by <-- HERE in '$ok$first_bad<-- HERE " . substr($name, length($ok) + 1) . "'"; + } + croak join "\n", @errors; + } + + return; } # alias sub not_legal_use_bytes_msg { diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 5cac1d1..27fb232 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2365,6 +2365,12 @@ recognized by Perl or by a user-supplied handler. See L. (F) Only certain characters are valid for character names. The indicated one isn't. See L. +=item Invalid character in charnames alias definition; marked by <-- HERE in '%s + +(F) You tried to create a custom alias for a character name, with +the C<:alias> option to C and the specified character in +the indicated name isn't valid. See L. + =item Invalid conversion in %s: "%s" (W printf) Perl does not understand the given format conversion. See diff --git a/t/lib/charnames/alias b/t/lib/charnames/alias index 35ecd45..0424f62 100644 --- a/t/lib/charnames/alias +++ b/t/lib/charnames/alias @@ -347,3 +347,17 @@ print charnames::viacode(0x80), "\n"; EXPECT OPTIONS regex PADDING CHARACTER +######## +# NAME various wrong characters in :alias are errors +# Below, one of the EXPECT regexes matches both the UTF-8 and non-UTF-8 form. +# This is because under some circumstances the message gets output as UTF-8. +use charnames ":full", ":alias" => { + "4e_ACUTE" => "LATIN SMALL LETTER E WITH ACUTE", + "e_A,CUTE" => "LATIN SMALL LETTER E WITH ACUTE", + "e_ACUT\x{d7}E" => "LATIN SMALL LETTER E WITH ACUTE", + }; +EXPECT +OPTIONS regex +Invalid character in charnames alias definition; marked by <-- HERE in '4<-- HERE e_ACUTE' +Invalid character in charnames alias definition; marked by <-- HERE in 'e_A,<-- HERE CUTE' +Invalid character in charnames alias definition; marked by <-- HERE in 'e_ACUT(?:\x{d7}|\x{C3}\x{97})<-- HERE E' diff --git a/toke.c b/toke.c index b3c3767..7cb0070 100644 --- a/toke.c +++ b/toke.c @@ -2663,7 +2663,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) return NULL; } - { + { /* This code needs to be sync'ed with a regex in _charnames.pm which + does the same thing */ bool problematic = FALSE; const char* i = s; @@ -2672,7 +2673,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! UTF) { if (! isALPHAU(*i)) problematic = TRUE; else for (i = s + 1; i < e; i++) { - if (isCHARNAME_CONT(*i)) continue; + if (isCHARNAME_CONT(*i) || *i == ':') continue; problematic = TRUE; break; } @@ -2697,7 +2698,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) i+= UTF8SKIP(i)) { if (UTF8_IS_INVARIANT(*i)) { - if (isCHARNAME_CONT(*i)) continue; + if (isCHARNAME_CONT(*i) || *i == ':') continue; } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) { continue; } else if (isCHARNAME_CONT( -- 1.8.3.1