Change pod for macros that require "literal strings"

[perl5.git] / lib / charnames.pm
diff --git a/lib/charnames.pm b/lib/charnames.pm

index 2dd3b62..e22c719 100644 (file)
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -1,7 +1,7 @@
  package charnames;
  use strict;
  use warnings;
-our $VERSION = '1.34';
+our $VERSION = '1.45';
  use unicore::Name;    # mktables-generated algorithmically-defined names
  use _charnames ();    # The submodule for this where most of the work gets done
  
@@ -49,7 +49,7 @@ sub vianame
      # can't change it because of backward compatibility.  New code can use
      # string_vianame() instead.
      my $ord = CORE::hex $1;
-    return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits);
+    return pack("U", $ord) if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits);
      _charnames::carp _charnames::not_legal_use_bytes_msg($arg, chr $ord);
      return;
    }
@@ -74,7 +74,7 @@ sub string_vianame {
    if ($arg =~ /^U\+([0-9a-fA-F]+)$/) {
  
      my $ord = CORE::hex $1;
-    return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits);
+    return pack("U", $ord) if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits);
  
      _charnames::carp _charnames::not_legal_use_bytes_msg($arg, chr $ord);
      return;
@@ -171,7 +171,7 @@ charnames ();">> did not enable C<\N{I<CHARNAME>}>.)
  
  Note that C<\N{U+I<...>}>, where the I<...> is a hexadecimal number,
  also inserts a character into a string.
-The character it inserts is the one whose code point
+The character it inserts is the one whose Unicode code point
  (ordinal value) is equal to the number.  For example, C<"\N{U+263a}"> is
  the Unicode (white background, black foreground) smiley face
  equivalent to C<"\N{WHITE SMILING FACE}">.
@@ -228,7 +228,7 @@ input name is that of a character that won't fit into a byte (i.e., whose
  ordinal is above 255).
  
  Otherwise, any string that includes a C<\N{I<charname>}> or
-C<S<\N{U+I<code point>}>> will automatically have Unicode semantics (see
+C<S<\N{U+I<code point>}>> will automatically have Unicode rules (see
  L<perlunicode/Byte and Character Semantics>).
  
  =head1 LOOSE MATCHES
@@ -277,16 +277,16 @@ you're twisted enough, you can change C<"\N{LATIN CAPITAL LETTER A}"> to
  mean C<"B">, etc.
  
  Aliases must begin with a character that is alphabetic.  After that, each may
-contain any combination of word (C<\w>) characters, SPACE, (U+0020),
-HYPHEN-MINUS (U+002D), LEFT PARENTHESIS (U+0028), RIGHT PARENTHESIS (U+0029),
-and NO-BREAK SPACE (U+00A0).  These last three should never have been allowed
-in names, and are retained for backwards compatibility only; they may be
+contain any combination of word (C<\w>) characters, SPACE (U+0020),
+HYPHEN-MINUS (U+002D), LEFT PARENTHESIS (U+0028), and RIGHT PARENTHESIS
+(U+0029).  These last two should never have been allowed
+in names, and are retained for backwards compatibility only, and may be
  deprecated and removed in future releases of Perl, so don't use them for new
  names.  (More precisely, the first character of a name you specify must be
  something that matches all of C<\p{ID_Start}>, C<\p{Alphabetic}>, and
  C<\p{Gc=Letter}>.  This makes sure it is what any reasonable person would view
-as an alphabetic character.  And, the other characters that match C<\w> must
-also match C<\p{ID_Continue}>.)  Starting with Perl v5.18, any Unicode
+as an alphabetic character.  And, the continuation characters that match C<\w>
+must also match C<\p{ID_Continue}>.)  Starting with Perl v5.18, any Unicode
  characters meeting the above criteria may be used; prior to that only
  Latin1-range characters were acceptable.
  
@@ -295,11 +295,15 @@ matched name) or to a
  numeric code point (ordinal).  The latter is useful for assigning names
  to code points in Unicode private use areas such as U+E800 through
  U+F8FF.
-A numeric code point must be a non-negative integer or a string beginning
+A numeric code point must be a non-negative integer, or a string beginning
  with C<"U+"> or C<"0x"> with the remainder considered to be a
  hexadecimal integer.  A literal numeric constant must be unsigned; it
  will be interpreted as hex if it has a leading zero or contains
  non-decimal hex digits; otherwise it will be interpreted as decimal.
+If it begins with C<"U+">, it is interpreted as the Unicode code point;
+otherwise it is interpreted as native.  (Only code points below 256 can
+differ between Unicode and native.)  Thus C<U+41> is always the Latin letter
+"A"; but C<0x41> can be "NO-BREAK SPACE" on EBCDIC platforms.
  
  Aliases are added either by the use of anonymous hashes:
  
@@ -357,7 +361,7 @@ C<string_vianame> returns C<undef> instead of it being a syntax error.
  =head1 charnames::vianame(I<name>)
  
  This is similar to C<string_vianame>.  The main difference is that under most
-circumstances, vianame returns an ordinal code
+circumstances, C<vianame> returns an ordinal code
  point, whereas C<string_vianame> returns a string.  For example,
  
     printf "U+%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
@@ -402,6 +406,10 @@ with C<"U+"> or C<"0x"> with the remainder considered to be a
  hexadecimal integer.  A literal numeric constant must be unsigned; it
  will be interpreted as hex if it has a leading zero or contains
  non-decimal hex digits; otherwise it will be interpreted as decimal.
+If it begins with C<"U+">, it is interpreted as the Unicode code point;
+otherwise it is interpreted as native.  (Only code points below 256 can
+differ between Unicode and native.)  Thus C<U+41> is always the Latin letter
+"A"; but C<0x41> can be "NO-BREAK SPACE" on EBCDIC platforms.
  
  As mentioned above under L</ALIASES>, Unicode 6.1 defines extra names
  (synonyms or aliases) for some code points, most of which were already