X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/a23c04e41e8e54701da113d9309ef79fb888afe6..e41d30bc98dce1dea5c4a5747bd838c25bdeda5d:/lib/charnames.pm diff --git a/lib/charnames.pm b/lib/charnames.pm index 80f31e7..3f69662 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -18,6 +18,11 @@ my %alias1 = ( 'FF' => 'FORM FEED (FF)', 'CR' => 'CARRIAGE RETURN (LF)', 'NEL' => 'NEXT LINE (NEL)', + # More convenience. For futher convencience, + # it is suggested some way using using the NamesList + # aliases is implemented. + 'ZWNJ' => 'ZERO WIDTH NON-JOINER', + 'ZWJ' => 'ZERO WIDTH JOINER', 'BOM' => 'BYTE ORDER MARK', ); @@ -55,7 +60,7 @@ sub charnames if ($name eq "BYTE ORDER MARK") { $fname = $name; - $ord = 0xFFFE; + $ord = 0xFEFF; } else { ## Suck in the code/name list as a big string. ## Lines look like: @@ -104,7 +109,7 @@ sub charnames ## ## Now know where in the string the name starts. - ## The code, in hex, is befor that. + ## The code, in hex, is before that. ## ## The code can be 4-6 characters long, so we've got to sort of ## go look for it, just after the newline that comes before $off[0]. @@ -181,7 +186,7 @@ my %viacode; sub viacode { if (@_ != 1) { - carp "charnames::viacode() expects one numeric argument"; + carp "charnames::viacode() expects one argument"; return () } @@ -199,7 +204,7 @@ sub viacode if ($code > 0x10FFFF) { carp "Unicode characters only allocated up to 0x10FFFF (you asked for $hex)"; - return "\x{FFFD}"; + return; } return $viacode{$hex} if exists $viacode{$hex}; @@ -208,11 +213,8 @@ sub viacode if ($txt =~ m/^$hex\t\t(.+)/m) { return $viacode{$hex} = $1; - } elsif ($hex eq 'FFFE') { - return $viacode{$hex} = "BYTE ORDER MARK"; } else { - carp "Unknown charcode '$hex'"; - return "\x{FFFD}"; + return; } } @@ -244,7 +246,7 @@ __END__ =head1 NAME -charnames - define character names for C<\N{named}> string literal escapes. +charnames - define character names for C<\N{named}> string literal escapes =head1 SYNOPSIS @@ -336,9 +338,12 @@ prints "FOUR TEARDROP-SPOKED ASTERISK". Returns undef if no name is known for the code. -This works only for the standard names, and does not yet aply +This works only for the standard names, and does not yet apply to custom translators. +Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK +SPACE", not "BYTE ORDER MARK". + =head1 charnames::vianame(code) Returns the code point indicated by the name. @@ -379,7 +384,12 @@ One can also use BYTE ORDER MARK BOM -though that is of course not a legal character as such. +and + + ZWNJ + ZWJ + +for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. For backward compatibility one can use the old names for certain C0 and C1 controls