X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/a23c04e41e8e54701da113d9309ef79fb888afe6..e41d30bc98dce1dea5c4a5747bd838c25bdeda5d:/lib/charnames.pm

diff --git a/lib/charnames.pm b/lib/charnames.pm
index 80f31e7..3f69662 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -18,6 +18,11 @@ my %alias1 = (
 		'FF'			=> 'FORM FEED (FF)',
 		'CR'			=> 'CARRIAGE RETURN (LF)',
 		'NEL'			=> 'NEXT LINE (NEL)',
+	        # More convenience.  For futher convencience,
+	        # it is suggested some way using using the NamesList
+		# aliases is implemented.
+	        'ZWNJ'			=> 'ZERO WIDTH NON-JOINER',
+	        'ZWJ'			=> 'ZERO WIDTH JOINER',
 		'BOM'			=> 'BYTE ORDER MARK',
 	    );
 
@@ -55,7 +60,7 @@ sub charnames
 
   if ($name eq "BYTE ORDER MARK") {
       $fname = $name;
-      $ord = 0xFFFE;
+      $ord = 0xFEFF;
   } else {
       ## Suck in the code/name list as a big string.
       ## Lines look like:
@@ -104,7 +109,7 @@ sub charnames
       
       ##
       ## Now know where in the string the name starts.
-      ## The code, in hex, is befor that.
+      ## The code, in hex, is before that.
       ##
       ## The code can be 4-6 characters long, so we've got to sort of
       ## go look for it, just after the newline that comes before $off[0].
@@ -181,7 +186,7 @@ my %viacode;
 sub viacode
 {
     if (@_ != 1) {
-        carp "charnames::viacode() expects one numeric argument";
+        carp "charnames::viacode() expects one argument";
         return ()
     }
 
@@ -199,7 +204,7 @@ sub viacode
 
     if ($code > 0x10FFFF) {
 	carp "Unicode characters only allocated up to 0x10FFFF (you asked for $hex)";
-	return "\x{FFFD}";
+	return;
     }
 
     return $viacode{$hex} if exists $viacode{$hex};
@@ -208,11 +213,8 @@ sub viacode
 
     if ($txt =~ m/^$hex\t\t(.+)/m) {
         return $viacode{$hex} = $1;
-    } elsif ($hex eq 'FFFE') {
-        return $viacode{$hex} = "BYTE ORDER MARK";
     } else {
-	carp "Unknown charcode '$hex'";
-        return "\x{FFFD}";
+        return;
     }
 }
 
@@ -244,7 +246,7 @@ __END__
 
 =head1 NAME
 
-charnames - define character names for C<\N{named}> string literal escapes.
+charnames - define character names for C<\N{named}> string literal escapes
 
 =head1 SYNOPSIS
 
@@ -336,9 +338,12 @@ prints "FOUR TEARDROP-SPOKED ASTERISK".
 
 Returns undef if no name is known for the code.
 
-This works only for the standard names, and does not yet aply 
+This works only for the standard names, and does not yet apply 
 to custom translators.
 
+Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK
+SPACE", not "BYTE ORDER MARK".
+
 =head1 charnames::vianame(code)
 
 Returns the code point indicated by the name.
@@ -379,7 +384,12 @@ One can also use
     BYTE ORDER MARK
     BOM
 
-though that is of course not a legal character as such.
+and
+
+    ZWNJ
+    ZWJ
+
+for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER.
 
 For backward compatibility one can use the old names for
 certain C0 and C1 controls