This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
charnames: add CORE:: to hex()
[perl5.git] / lib / charnames.pm
... / ...
CommitLineData
1package charnames;
2use strict;
3use warnings;
4use File::Spec;
5our $VERSION = '1.09';
6
7use bytes (); # for $bytes::hint_bits
8
9my %system_aliases = (
10 # Icky 3.2 names with parentheses.
11 'LINE FEED' => 0x0A, # LINE FEED (LF)
12 'FORM FEED' => 0x0C, # FORM FEED (FF)
13 'CARRIAGE RETURN' => 0x0D, # CARRIAGE RETURN (CR)
14 'NEXT LINE' => 0x85, # NEXT LINE (NEL)
15
16 # Some variant names from Wikipedia
17 'SINGLE-SHIFT 2' => 0x8E,
18 'SINGLE-SHIFT 3' => 0x8F,
19 'PRIVATE USE 1' => 0x91,
20 'PRIVATE USE 2' => 0x92,
21 'START OF PROTECTED AREA' => 0x96,
22 'END OF PROTECTED AREA' => 0x97,
23
24 # Convenience. Standard abbreviations for the controls
25 'NUL' => 0x00, # NULL
26 'SOH' => 0x01, # START OF HEADING
27 'STX' => 0x02, # START OF TEXT
28 'ETX' => 0x03, # END OF TEXT
29 'EOT' => 0x04, # END OF TRANSMISSION
30 'ENQ' => 0x05, # ENQUIRY
31 'ACK' => 0x06, # ACKNOWLEDGE
32 'BEL' => 0x07, # BELL
33 'BS' => 0x08, # BACKSPACE
34 'HT' => 0x09, # HORIZONTAL TABULATION
35 'LF' => 0x0A, # LINE FEED (LF)
36 'VT' => 0x0B, # VERTICAL TABULATION
37 'FF' => 0x0C, # FORM FEED (FF)
38 'CR' => 0x0D, # CARRIAGE RETURN (CR)
39 'SO' => 0x0E, # SHIFT OUT
40 'SI' => 0x0F, # SHIFT IN
41 'DLE' => 0x10, # DATA LINK ESCAPE
42 'DC1' => 0x11, # DEVICE CONTROL ONE
43 'DC2' => 0x12, # DEVICE CONTROL TWO
44 'DC3' => 0x13, # DEVICE CONTROL THREE
45 'DC4' => 0x14, # DEVICE CONTROL FOUR
46 'NAK' => 0x15, # NEGATIVE ACKNOWLEDGE
47 'SYN' => 0x16, # SYNCHRONOUS IDLE
48 'ETB' => 0x17, # END OF TRANSMISSION BLOCK
49 'CAN' => 0x18, # CANCEL
50 'EOM' => 0x19, # END OF MEDIUM
51 'SUB' => 0x1A, # SUBSTITUTE
52 'ESC' => 0x1B, # ESCAPE
53 'FS' => 0x1C, # FILE SEPARATOR
54 'GS' => 0x1D, # GROUP SEPARATOR
55 'RS' => 0x1E, # RECORD SEPARATOR
56 'US' => 0x1F, # UNIT SEPARATOR
57 'DEL' => 0x7F, # DELETE
58 'BPH' => 0x82, # BREAK PERMITTED HERE
59 'NBH' => 0x83, # NO BREAK HERE
60 'NEL' => 0x85, # NEXT LINE (NEL)
61 'SSA' => 0x86, # START OF SELECTED AREA
62 'ESA' => 0x87, # END OF SELECTED AREA
63 'HTS' => 0x88, # CHARACTER TABULATION SET
64 'HTJ' => 0x89, # CHARACTER TABULATION WITH JUSTIFICATION
65 'VTS' => 0x8A, # LINE TABULATION SET
66 'PLD' => 0x8B, # PARTIAL LINE FORWARD
67 'PLU' => 0x8C, # PARTIAL LINE BACKWARD
68 'RI ' => 0x8D, # REVERSE LINE FEED
69 'SS2' => 0x8E, # SINGLE SHIFT TWO
70 'SS3' => 0x8F, # SINGLE SHIFT THREE
71 'DCS' => 0x90, # DEVICE CONTROL STRING
72 'PU1' => 0x91, # PRIVATE USE ONE
73 'PU2' => 0x92, # PRIVATE USE TWO
74 'STS' => 0x93, # SET TRANSMIT STATE
75 'CCH' => 0x94, # CANCEL CHARACTER
76 'MW ' => 0x95, # MESSAGE WAITING
77 'SPA' => 0x96, # START OF GUARDED AREA
78 'EPA' => 0x97, # END OF GUARDED AREA
79 'SOS' => 0x98, # START OF STRING
80 'SCI' => 0x9A, # SINGLE CHARACTER INTRODUCER
81 'CSI' => 0x9B, # CONTROL SEQUENCE INTRODUCER
82 'ST ' => 0x9C, # STRING TERMINATOR
83 'OSC' => 0x9D, # OPERATING SYSTEM COMMAND
84 'PM ' => 0x9E, # PRIVACY MESSAGE
85 'APC' => 0x9F, # APPLICATION PROGRAM COMMAND
86
87 # There are no names for these in the Unicode standard;
88 # perhaps should be deprecated, but then again there are
89 # no alternative names, so am not deprecating. And if
90 # did, the code would have to change to not recommend an
91 # alternative for these.
92 'PADDING CHARACTER' => 0x80,
93 'PAD' => 0x80,
94 'HIGH OCTET PRESET' => 0x81,
95 'HOP' => 0x81,
96 'INDEX' => 0x84,
97 'IND' => 0x84,
98 'SINGLE GRAPHIC CHARACTER INTRODUCER' => 0x99,
99 'SGC' => 0x99,
100
101 # More convenience. For further convenience,
102 # it is suggested some way of using the NamesList
103 # aliases be implemented, but there are ambiguities in
104 # NamesList.txt
105 'BOM' => 0xFEFF, # BYTE ORDER MARK
106 'BYTE ORDER MARK'=> 0xFEFF,
107 'CGJ' => 0x034F, # COMBINING GRAPHEME JOINER
108 'FVS1' => 0x180B, # MONGOLIAN FREE VARIATION SELECTOR ONE
109 'FVS2' => 0x180C, # MONGOLIAN FREE VARIATION SELECTOR TWO
110 'FVS3' => 0x180D, # MONGOLIAN FREE VARIATION SELECTOR THREE
111 'LRE' => 0x202A, # LEFT-TO-RIGHT EMBEDDING
112 'LRM' => 0x200E, # LEFT-TO-RIGHT MARK
113 'LRO' => 0x202D, # LEFT-TO-RIGHT OVERRIDE
114 'MMSP' => 0x205F, # MEDIUM MATHEMATICAL SPACE
115 'MVS' => 0x180E, # MONGOLIAN VOWEL SEPARATOR
116 'NBSP' => 0x00A0, # NO-BREAK SPACE
117 'NNBSP' => 0x202F, # NARROW NO-BREAK SPACE
118 'PDF' => 0x202C, # POP DIRECTIONAL FORMATTING
119 'RLE' => 0x202B, # RIGHT-TO-LEFT EMBEDDING
120 'RLM' => 0x200F, # RIGHT-TO-LEFT MARK
121 'RLO' => 0x202E, # RIGHT-TO-LEFT OVERRIDE
122 'SHY' => 0x00AD, # SOFT HYPHEN
123 'VS1' => 0xFE00, # VARIATION SELECTOR-1
124 'VS2' => 0xFE01, # VARIATION SELECTOR-2
125 'VS3' => 0xFE02, # VARIATION SELECTOR-3
126 'VS4' => 0xFE03, # VARIATION SELECTOR-4
127 'VS5' => 0xFE04, # VARIATION SELECTOR-5
128 'VS6' => 0xFE05, # VARIATION SELECTOR-6
129 'VS7' => 0xFE06, # VARIATION SELECTOR-7
130 'VS8' => 0xFE07, # VARIATION SELECTOR-8
131 'VS9' => 0xFE08, # VARIATION SELECTOR-9
132 'VS10' => 0xFE09, # VARIATION SELECTOR-10
133 'VS11' => 0xFE0A, # VARIATION SELECTOR-11
134 'VS12' => 0xFE0B, # VARIATION SELECTOR-12
135 'VS13' => 0xFE0C, # VARIATION SELECTOR-13
136 'VS14' => 0xFE0D, # VARIATION SELECTOR-14
137 'VS15' => 0xFE0E, # VARIATION SELECTOR-15
138 'VS16' => 0xFE0F, # VARIATION SELECTOR-16
139 'VS17' => 0xE0100, # VARIATION SELECTOR-17
140 'VS18' => 0xE0101, # VARIATION SELECTOR-18
141 'VS19' => 0xE0102, # VARIATION SELECTOR-19
142 'VS20' => 0xE0103, # VARIATION SELECTOR-20
143 'VS21' => 0xE0104, # VARIATION SELECTOR-21
144 'VS22' => 0xE0105, # VARIATION SELECTOR-22
145 'VS23' => 0xE0106, # VARIATION SELECTOR-23
146 'VS24' => 0xE0107, # VARIATION SELECTOR-24
147 'VS25' => 0xE0108, # VARIATION SELECTOR-25
148 'VS26' => 0xE0109, # VARIATION SELECTOR-26
149 'VS27' => 0xE010A, # VARIATION SELECTOR-27
150 'VS28' => 0xE010B, # VARIATION SELECTOR-28
151 'VS29' => 0xE010C, # VARIATION SELECTOR-29
152 'VS30' => 0xE010D, # VARIATION SELECTOR-30
153 'VS31' => 0xE010E, # VARIATION SELECTOR-31
154 'VS32' => 0xE010F, # VARIATION SELECTOR-32
155 'VS33' => 0xE0110, # VARIATION SELECTOR-33
156 'VS34' => 0xE0111, # VARIATION SELECTOR-34
157 'VS35' => 0xE0112, # VARIATION SELECTOR-35
158 'VS36' => 0xE0113, # VARIATION SELECTOR-36
159 'VS37' => 0xE0114, # VARIATION SELECTOR-37
160 'VS38' => 0xE0115, # VARIATION SELECTOR-38
161 'VS39' => 0xE0116, # VARIATION SELECTOR-39
162 'VS40' => 0xE0117, # VARIATION SELECTOR-40
163 'VS41' => 0xE0118, # VARIATION SELECTOR-41
164 'VS42' => 0xE0119, # VARIATION SELECTOR-42
165 'VS43' => 0xE011A, # VARIATION SELECTOR-43
166 'VS44' => 0xE011B, # VARIATION SELECTOR-44
167 'VS45' => 0xE011C, # VARIATION SELECTOR-45
168 'VS46' => 0xE011D, # VARIATION SELECTOR-46
169 'VS47' => 0xE011E, # VARIATION SELECTOR-47
170 'VS48' => 0xE011F, # VARIATION SELECTOR-48
171 'VS49' => 0xE0120, # VARIATION SELECTOR-49
172 'VS50' => 0xE0121, # VARIATION SELECTOR-50
173 'VS51' => 0xE0122, # VARIATION SELECTOR-51
174 'VS52' => 0xE0123, # VARIATION SELECTOR-52
175 'VS53' => 0xE0124, # VARIATION SELECTOR-53
176 'VS54' => 0xE0125, # VARIATION SELECTOR-54
177 'VS55' => 0xE0126, # VARIATION SELECTOR-55
178 'VS56' => 0xE0127, # VARIATION SELECTOR-56
179 'VS57' => 0xE0128, # VARIATION SELECTOR-57
180 'VS58' => 0xE0129, # VARIATION SELECTOR-58
181 'VS59' => 0xE012A, # VARIATION SELECTOR-59
182 'VS60' => 0xE012B, # VARIATION SELECTOR-60
183 'VS61' => 0xE012C, # VARIATION SELECTOR-61
184 'VS62' => 0xE012D, # VARIATION SELECTOR-62
185 'VS63' => 0xE012E, # VARIATION SELECTOR-63
186 'VS64' => 0xE012F, # VARIATION SELECTOR-64
187 'VS65' => 0xE0130, # VARIATION SELECTOR-65
188 'VS66' => 0xE0131, # VARIATION SELECTOR-66
189 'VS67' => 0xE0132, # VARIATION SELECTOR-67
190 'VS68' => 0xE0133, # VARIATION SELECTOR-68
191 'VS69' => 0xE0134, # VARIATION SELECTOR-69
192 'VS70' => 0xE0135, # VARIATION SELECTOR-70
193 'VS71' => 0xE0136, # VARIATION SELECTOR-71
194 'VS72' => 0xE0137, # VARIATION SELECTOR-72
195 'VS73' => 0xE0138, # VARIATION SELECTOR-73
196 'VS74' => 0xE0139, # VARIATION SELECTOR-74
197 'VS75' => 0xE013A, # VARIATION SELECTOR-75
198 'VS76' => 0xE013B, # VARIATION SELECTOR-76
199 'VS77' => 0xE013C, # VARIATION SELECTOR-77
200 'VS78' => 0xE013D, # VARIATION SELECTOR-78
201 'VS79' => 0xE013E, # VARIATION SELECTOR-79
202 'VS80' => 0xE013F, # VARIATION SELECTOR-80
203 'VS81' => 0xE0140, # VARIATION SELECTOR-81
204 'VS82' => 0xE0141, # VARIATION SELECTOR-82
205 'VS83' => 0xE0142, # VARIATION SELECTOR-83
206 'VS84' => 0xE0143, # VARIATION SELECTOR-84
207 'VS85' => 0xE0144, # VARIATION SELECTOR-85
208 'VS86' => 0xE0145, # VARIATION SELECTOR-86
209 'VS87' => 0xE0146, # VARIATION SELECTOR-87
210 'VS88' => 0xE0147, # VARIATION SELECTOR-88
211 'VS89' => 0xE0148, # VARIATION SELECTOR-89
212 'VS90' => 0xE0149, # VARIATION SELECTOR-90
213 'VS91' => 0xE014A, # VARIATION SELECTOR-91
214 'VS92' => 0xE014B, # VARIATION SELECTOR-92
215 'VS93' => 0xE014C, # VARIATION SELECTOR-93
216 'VS94' => 0xE014D, # VARIATION SELECTOR-94
217 'VS95' => 0xE014E, # VARIATION SELECTOR-95
218 'VS96' => 0xE014F, # VARIATION SELECTOR-96
219 'VS97' => 0xE0150, # VARIATION SELECTOR-97
220 'VS98' => 0xE0151, # VARIATION SELECTOR-98
221 'VS99' => 0xE0152, # VARIATION SELECTOR-99
222 'VS100' => 0xE0153, # VARIATION SELECTOR-100
223 'VS101' => 0xE0154, # VARIATION SELECTOR-101
224 'VS102' => 0xE0155, # VARIATION SELECTOR-102
225 'VS103' => 0xE0156, # VARIATION SELECTOR-103
226 'VS104' => 0xE0157, # VARIATION SELECTOR-104
227 'VS105' => 0xE0158, # VARIATION SELECTOR-105
228 'VS106' => 0xE0159, # VARIATION SELECTOR-106
229 'VS107' => 0xE015A, # VARIATION SELECTOR-107
230 'VS108' => 0xE015B, # VARIATION SELECTOR-108
231 'VS109' => 0xE015C, # VARIATION SELECTOR-109
232 'VS110' => 0xE015D, # VARIATION SELECTOR-110
233 'VS111' => 0xE015E, # VARIATION SELECTOR-111
234 'VS112' => 0xE015F, # VARIATION SELECTOR-112
235 'VS113' => 0xE0160, # VARIATION SELECTOR-113
236 'VS114' => 0xE0161, # VARIATION SELECTOR-114
237 'VS115' => 0xE0162, # VARIATION SELECTOR-115
238 'VS116' => 0xE0163, # VARIATION SELECTOR-116
239 'VS117' => 0xE0164, # VARIATION SELECTOR-117
240 'VS118' => 0xE0165, # VARIATION SELECTOR-118
241 'VS119' => 0xE0166, # VARIATION SELECTOR-119
242 'VS120' => 0xE0167, # VARIATION SELECTOR-120
243 'VS121' => 0xE0168, # VARIATION SELECTOR-121
244 'VS122' => 0xE0169, # VARIATION SELECTOR-122
245 'VS123' => 0xE016A, # VARIATION SELECTOR-123
246 'VS124' => 0xE016B, # VARIATION SELECTOR-124
247 'VS125' => 0xE016C, # VARIATION SELECTOR-125
248 'VS126' => 0xE016D, # VARIATION SELECTOR-126
249 'VS127' => 0xE016E, # VARIATION SELECTOR-127
250 'VS128' => 0xE016F, # VARIATION SELECTOR-128
251 'VS129' => 0xE0170, # VARIATION SELECTOR-129
252 'VS130' => 0xE0171, # VARIATION SELECTOR-130
253 'VS131' => 0xE0172, # VARIATION SELECTOR-131
254 'VS132' => 0xE0173, # VARIATION SELECTOR-132
255 'VS133' => 0xE0174, # VARIATION SELECTOR-133
256 'VS134' => 0xE0175, # VARIATION SELECTOR-134
257 'VS135' => 0xE0176, # VARIATION SELECTOR-135
258 'VS136' => 0xE0177, # VARIATION SELECTOR-136
259 'VS137' => 0xE0178, # VARIATION SELECTOR-137
260 'VS138' => 0xE0179, # VARIATION SELECTOR-138
261 'VS139' => 0xE017A, # VARIATION SELECTOR-139
262 'VS140' => 0xE017B, # VARIATION SELECTOR-140
263 'VS141' => 0xE017C, # VARIATION SELECTOR-141
264 'VS142' => 0xE017D, # VARIATION SELECTOR-142
265 'VS143' => 0xE017E, # VARIATION SELECTOR-143
266 'VS144' => 0xE017F, # VARIATION SELECTOR-144
267 'VS145' => 0xE0180, # VARIATION SELECTOR-145
268 'VS146' => 0xE0181, # VARIATION SELECTOR-146
269 'VS147' => 0xE0182, # VARIATION SELECTOR-147
270 'VS148' => 0xE0183, # VARIATION SELECTOR-148
271 'VS149' => 0xE0184, # VARIATION SELECTOR-149
272 'VS150' => 0xE0185, # VARIATION SELECTOR-150
273 'VS151' => 0xE0186, # VARIATION SELECTOR-151
274 'VS152' => 0xE0187, # VARIATION SELECTOR-152
275 'VS153' => 0xE0188, # VARIATION SELECTOR-153
276 'VS154' => 0xE0189, # VARIATION SELECTOR-154
277 'VS155' => 0xE018A, # VARIATION SELECTOR-155
278 'VS156' => 0xE018B, # VARIATION SELECTOR-156
279 'VS157' => 0xE018C, # VARIATION SELECTOR-157
280 'VS158' => 0xE018D, # VARIATION SELECTOR-158
281 'VS159' => 0xE018E, # VARIATION SELECTOR-159
282 'VS160' => 0xE018F, # VARIATION SELECTOR-160
283 'VS161' => 0xE0190, # VARIATION SELECTOR-161
284 'VS162' => 0xE0191, # VARIATION SELECTOR-162
285 'VS163' => 0xE0192, # VARIATION SELECTOR-163
286 'VS164' => 0xE0193, # VARIATION SELECTOR-164
287 'VS165' => 0xE0194, # VARIATION SELECTOR-165
288 'VS166' => 0xE0195, # VARIATION SELECTOR-166
289 'VS167' => 0xE0196, # VARIATION SELECTOR-167
290 'VS168' => 0xE0197, # VARIATION SELECTOR-168
291 'VS169' => 0xE0198, # VARIATION SELECTOR-169
292 'VS170' => 0xE0199, # VARIATION SELECTOR-170
293 'VS171' => 0xE019A, # VARIATION SELECTOR-171
294 'VS172' => 0xE019B, # VARIATION SELECTOR-172
295 'VS173' => 0xE019C, # VARIATION SELECTOR-173
296 'VS174' => 0xE019D, # VARIATION SELECTOR-174
297 'VS175' => 0xE019E, # VARIATION SELECTOR-175
298 'VS176' => 0xE019F, # VARIATION SELECTOR-176
299 'VS177' => 0xE01A0, # VARIATION SELECTOR-177
300 'VS178' => 0xE01A1, # VARIATION SELECTOR-178
301 'VS179' => 0xE01A2, # VARIATION SELECTOR-179
302 'VS180' => 0xE01A3, # VARIATION SELECTOR-180
303 'VS181' => 0xE01A4, # VARIATION SELECTOR-181
304 'VS182' => 0xE01A5, # VARIATION SELECTOR-182
305 'VS183' => 0xE01A6, # VARIATION SELECTOR-183
306 'VS184' => 0xE01A7, # VARIATION SELECTOR-184
307 'VS185' => 0xE01A8, # VARIATION SELECTOR-185
308 'VS186' => 0xE01A9, # VARIATION SELECTOR-186
309 'VS187' => 0xE01AA, # VARIATION SELECTOR-187
310 'VS188' => 0xE01AB, # VARIATION SELECTOR-188
311 'VS189' => 0xE01AC, # VARIATION SELECTOR-189
312 'VS190' => 0xE01AD, # VARIATION SELECTOR-190
313 'VS191' => 0xE01AE, # VARIATION SELECTOR-191
314 'VS192' => 0xE01AF, # VARIATION SELECTOR-192
315 'VS193' => 0xE01B0, # VARIATION SELECTOR-193
316 'VS194' => 0xE01B1, # VARIATION SELECTOR-194
317 'VS195' => 0xE01B2, # VARIATION SELECTOR-195
318 'VS196' => 0xE01B3, # VARIATION SELECTOR-196
319 'VS197' => 0xE01B4, # VARIATION SELECTOR-197
320 'VS198' => 0xE01B5, # VARIATION SELECTOR-198
321 'VS199' => 0xE01B6, # VARIATION SELECTOR-199
322 'VS200' => 0xE01B7, # VARIATION SELECTOR-200
323 'VS201' => 0xE01B8, # VARIATION SELECTOR-201
324 'VS202' => 0xE01B9, # VARIATION SELECTOR-202
325 'VS203' => 0xE01BA, # VARIATION SELECTOR-203
326 'VS204' => 0xE01BB, # VARIATION SELECTOR-204
327 'VS205' => 0xE01BC, # VARIATION SELECTOR-205
328 'VS206' => 0xE01BD, # VARIATION SELECTOR-206
329 'VS207' => 0xE01BE, # VARIATION SELECTOR-207
330 'VS208' => 0xE01BF, # VARIATION SELECTOR-208
331 'VS209' => 0xE01C0, # VARIATION SELECTOR-209
332 'VS210' => 0xE01C1, # VARIATION SELECTOR-210
333 'VS211' => 0xE01C2, # VARIATION SELECTOR-211
334 'VS212' => 0xE01C3, # VARIATION SELECTOR-212
335 'VS213' => 0xE01C4, # VARIATION SELECTOR-213
336 'VS214' => 0xE01C5, # VARIATION SELECTOR-214
337 'VS215' => 0xE01C6, # VARIATION SELECTOR-215
338 'VS216' => 0xE01C7, # VARIATION SELECTOR-216
339 'VS217' => 0xE01C8, # VARIATION SELECTOR-217
340 'VS218' => 0xE01C9, # VARIATION SELECTOR-218
341 'VS219' => 0xE01CA, # VARIATION SELECTOR-219
342 'VS220' => 0xE01CB, # VARIATION SELECTOR-220
343 'VS221' => 0xE01CC, # VARIATION SELECTOR-221
344 'VS222' => 0xE01CD, # VARIATION SELECTOR-222
345 'VS223' => 0xE01CE, # VARIATION SELECTOR-223
346 'VS224' => 0xE01CF, # VARIATION SELECTOR-224
347 'VS225' => 0xE01D0, # VARIATION SELECTOR-225
348 'VS226' => 0xE01D1, # VARIATION SELECTOR-226
349 'VS227' => 0xE01D2, # VARIATION SELECTOR-227
350 'VS228' => 0xE01D3, # VARIATION SELECTOR-228
351 'VS229' => 0xE01D4, # VARIATION SELECTOR-229
352 'VS230' => 0xE01D5, # VARIATION SELECTOR-230
353 'VS231' => 0xE01D6, # VARIATION SELECTOR-231
354 'VS232' => 0xE01D7, # VARIATION SELECTOR-232
355 'VS233' => 0xE01D8, # VARIATION SELECTOR-233
356 'VS234' => 0xE01D9, # VARIATION SELECTOR-234
357 'VS235' => 0xE01DA, # VARIATION SELECTOR-235
358 'VS236' => 0xE01DB, # VARIATION SELECTOR-236
359 'VS237' => 0xE01DC, # VARIATION SELECTOR-237
360 'VS238' => 0xE01DD, # VARIATION SELECTOR-238
361 'VS239' => 0xE01DE, # VARIATION SELECTOR-239
362 'VS240' => 0xE01DF, # VARIATION SELECTOR-240
363 'VS241' => 0xE01E0, # VARIATION SELECTOR-241
364 'VS242' => 0xE01E1, # VARIATION SELECTOR-242
365 'VS243' => 0xE01E2, # VARIATION SELECTOR-243
366 'VS244' => 0xE01E3, # VARIATION SELECTOR-244
367 'VS245' => 0xE01E4, # VARIATION SELECTOR-245
368 'VS246' => 0xE01E5, # VARIATION SELECTOR-246
369 'VS247' => 0xE01E6, # VARIATION SELECTOR-247
370 'VS248' => 0xE01E7, # VARIATION SELECTOR-248
371 'VS249' => 0xE01E8, # VARIATION SELECTOR-249
372 'VS250' => 0xE01E9, # VARIATION SELECTOR-250
373 'VS251' => 0xE01EA, # VARIATION SELECTOR-251
374 'VS252' => 0xE01EB, # VARIATION SELECTOR-252
375 'VS253' => 0xE01EC, # VARIATION SELECTOR-253
376 'VS254' => 0xE01ED, # VARIATION SELECTOR-254
377 'VS255' => 0xE01EE, # VARIATION SELECTOR-255
378 'VS256' => 0xE01EF, # VARIATION SELECTOR-256
379 'WJ' => 0x2060, # WORD JOINER
380 'ZWJ' => 0x200D, # ZERO WIDTH JOINER
381 'ZWNJ' => 0x200C, # ZERO WIDTH NON-JOINER
382 'ZWSP' => 0x200B, # ZERO WIDTH SPACE
383 );
384
385my %deprecated_aliases = (
386 # Pre-3.2 compatibility (only for the first 256 characters).
387 # Use of these gives deprecated message.
388 'HORIZONTAL TABULATION' => 0x09, # CHARACTER TABULATION
389 'VERTICAL TABULATION' => 0x0B, # LINE TABULATION
390 'FILE SEPARATOR' => 0x1C, # INFORMATION SEPARATOR FOUR
391 'GROUP SEPARATOR' => 0x1D, # INFORMATION SEPARATOR THREE
392 'RECORD SEPARATOR' => 0x1E, # INFORMATION SEPARATOR TWO
393 'UNIT SEPARATOR' => 0x1F, # INFORMATION SEPARATOR ONE
394 'HORIZONTAL TABULATION SET' => 0x88, # CHARACTER TABULATION SET
395 'HORIZONTAL TABULATION WITH JUSTIFICATION' => 0x89, # CHARACTER TABULATION WITH JUSTIFICATION
396 'PARTIAL LINE DOWN' => 0x8B, # PARTIAL LINE FORWARD
397 'PARTIAL LINE UP' => 0x8C, # PARTIAL LINE BACKWARD
398 'VERTICAL TABULATION SET' => 0x8A, # LINE TABULATION SET
399 'REVERSE INDEX' => 0x8D, # REVERSE LINE FEED
400 );
401
402my %user_name_aliases = (
403 # User defined aliases. Even more convenient :)
404 # These are the ones that resolved to names
405 );
406
407my %user_numeric_aliases = (
408 # And these resolve directly to code points.
409 );
410my %inverse_user_aliases = (
411 # Map from code point to name
412 );
413my $txt;
414my $decimal_qr = qr/^[1-9]\d*$/;
415
416# Returns the hex number in $1.
417my $hex_qr = qr/^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/;
418
419sub croak
420{
421 require Carp; goto &Carp::croak;
422} # croak
423
424sub carp
425{
426 require Carp; goto &Carp::carp;
427} # carp
428
429sub alias (@)
430{
431 my $alias = ref $_[0] ? $_[0] : { @_ };
432 foreach my $name (keys %$alias) {
433 my $value = $alias->{$name};
434 if ($value =~ $decimal_qr) {
435 $user_numeric_aliases{$name} = $value;
436
437 # Use a canonical form.
438 $inverse_user_aliases{sprintf("%04X", $value)} = $name;
439 }
440 elsif ($value =~ $hex_qr) {
441 my $decimal = CORE::hex $1;
442 $user_numeric_aliases{$name} = $decimal;
443
444 # Must convert to decimal and back to guarantee canonical form
445 $inverse_user_aliases{sprintf("%04X", $decimal)} = $name;
446 }
447 else {
448 $user_name_aliases{$name} = $value;
449 }
450 }
451} # alias
452
453sub alias_file ($)
454{
455 my ($arg, $file) = @_;
456 if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
457 $file = $arg;
458 }
459 elsif ($arg =~ m/^\w+$/) {
460 $file = "unicore/${arg}_alias.pl";
461 }
462 else {
463 croak "Charnames alias files can only have identifier characters";
464 }
465 if (my @alias = do $file) {
466 @alias == 1 && !defined $alias[0] and
467 croak "$file cannot be used as alias file for charnames";
468 @alias % 2 and
469 croak "$file did not return a (valid) list of alias pairs";
470 alias (@alias);
471 return (1);
472 }
473 0;
474} # alias_file
475
476# This is not optimized in any way yet
477sub charnames
478{
479 my $name = shift;
480 my $ord;
481 my $fname;
482
483 # User alias should be checked first or else can't override ours, and if we
484 # add any, could conflict with theirs.
485 if (exists $user_numeric_aliases{$name}) {
486 $ord = $user_numeric_aliases{$name};
487 $fname = $name;
488 }
489 elsif (exists $user_name_aliases{$name}) {
490 $name = $user_name_aliases{$name};
491 }
492 elsif (exists $system_aliases{$name}) {
493 $ord = $system_aliases{$name};
494 $fname = $name;
495 }
496 elsif (exists $deprecated_aliases{$name}) {
497 require warnings;
498 warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" . viacode($deprecated_aliases{$name}) . "\" instead");
499 $ord = $deprecated_aliases{$name};
500 $fname = $name;
501 }
502
503 my @off;
504
505 if (! defined $ord) {
506 ## Suck in the code/name list as a big string.
507 ## Lines look like:
508 ## "0052\t\tLATIN CAPITAL LETTER R\n"
509 $txt = do "unicore/Name.pl" unless $txt;
510
511 ## @off will hold the index into the code/name string of the start and
512 ## end of the name as we find it.
513
514 ## If :full, look for the name exactly
515 if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) {
516 @off = ($-[0], $+[0]);
517 }
518
519 ## If we didn't get above, and :short allowed, look for the short name.
520 ## The short name is like "greek:Sigma"
521 unless (@off) {
522 if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
523 my ($script, $cname) = ($1, $2);
524 my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
525 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) {
526 @off = ($-[0], $+[0]);
527 }
528 }
529 }
530
531 ## If we still don't have it, check for the name among the loaded
532 ## scripts.
533 if (not @off) {
534 my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
535 for my $script (@{$^H{charnames_scripts}}) {
536 if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) {
537 @off = ($-[0], $+[0]);
538 last;
539 }
540 }
541 }
542
543 ## If we don't have it by now, give up.
544 unless (@off) {
545 carp "Unknown charname '$name'";
546 return "\x{FFFD}";
547 }
548
549 ##
550 ## Now know where in the string the name starts.
551 ## The code, in hex, is before that.
552 ##
553 ## The code can be 4-6 characters long, so we've got to sort of
554 ## go look for it, just after the newline that comes before $off[0].
555 ##
556 ## This would be much easier if unicore/Name.pl had info in
557 ## a name/code order, instead of code/name order.
558 ##
559 ## The +1 after the rindex() is to skip past the newline we're finding,
560 ## or, if the rindex() fails, to put us to an offset of zero.
561 ##
562 my $hexstart = rindex($txt, "\n", $off[0]) + 1;
563
564 ## we know where it starts, so turn into number -
565 ## the ordinal for the char.
566 $ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart);
567 }
568
569 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
570 use bytes;
571 return chr $ord if $ord <= 255;
572 my $hex = sprintf "%04x", $ord;
573 if (not defined $fname) {
574 $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
575 }
576 croak "Character 0x$hex with name '$fname' is above 0xFF";
577 }
578
579 no warnings 'utf8'; # allow even illegal characters
580 return pack "U", $ord;
581} # charnames
582
583sub import
584{
585 shift; ## ignore class name
586
587 if (not @_) {
588 carp("`use charnames' needs explicit imports list");
589 }
590 $^H{charnames} = \&charnames ;
591
592 ##
593 ## fill %h keys with our @_ args.
594 ##
595 my ($promote, %h, @args) = (0);
596 while (my $arg = shift) {
597 if ($arg eq ":alias") {
598 @_ or
599 croak ":alias needs an argument in charnames";
600 my $alias = shift;
601 if (ref $alias) {
602 ref $alias eq "HASH" or
603 croak "Only HASH reference supported as argument to :alias";
604 alias ($alias);
605 next;
606 }
607 if ($alias =~ m{:(\w+)$}) {
608 $1 eq "full" || $1 eq "short" and
609 croak ":alias cannot use existing pragma :$1 (reversed order?)";
610 alias_file ($1) and $promote = 1;
611 next;
612 }
613 alias_file ($alias);
614 next;
615 }
616 if (substr($arg, 0, 1) eq ':' and ! ($arg eq ":full" || $arg eq ":short")) {
617 warn "unsupported special '$arg' in charnames";
618 next;
619 }
620 push @args, $arg;
621 }
622 @args == 0 && $promote and @args = (":full");
623 @h{@args} = (1) x @args;
624
625 $^H{charnames_full} = delete $h{':full'};
626 $^H{charnames_short} = delete $h{':short'};
627 $^H{charnames_scripts} = [map uc, keys %h];
628
629 ##
630 ## If utf8? warnings are enabled, and some scripts were given,
631 ## see if at least we can find one letter of each script.
632 ##
633 if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
634 $txt = do "unicore/Name.pl" unless $txt;
635
636 for my $script (@{$^H{charnames_scripts}}) {
637 if (not $txt =~ m/\t\t$script (?:CAPITAL |SMALL )?LETTER /) {
638 warnings::warn('utf8', "No such script: '$script'");
639 }
640 }
641 }
642} # import
643
644my %viacode;
645
646sub viacode
647{
648 if (@_ != 1) {
649 carp "charnames::viacode() expects one argument";
650 return;
651 }
652
653 my $arg = shift;
654
655 # this is derived from Unicode::UCD, where it is nearly the same as the
656 # function _getcode(), but it makes sure that even a hex argument has the
657 # proper number of leading zeros, which is critical in matching against $txt
658 # below
659 my $hex;
660 if ($arg =~ $decimal_qr) {
661 $hex = sprintf "%04X", $arg;
662 } elsif ($arg =~ $hex_qr) {
663 # Below is the line that differs from the _getcode() source
664 $hex = sprintf "%04X", hex $1;
665 } else {
666 carp("unexpected arg \"$arg\" to charnames::viacode()");
667 return;
668 }
669
670 # checking the length first is slightly faster
671 if (length($hex) > 5 && CORE::hex($hex) > 0x10FFFF) {
672 carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)";
673 return;
674 }
675
676 return $viacode{$hex} if exists $viacode{$hex};
677
678 $txt = do "unicore/Name.pl" unless $txt;
679
680 # Return the official name, if exists
681 if ($txt =~ m/^$hex\t\t(.+)/m) {
682 $viacode{$hex} = $1;
683 return $1;
684 }
685
686 # See if there is a user name for it, before giving up completely.
687 return if ! exists $inverse_user_aliases{$hex};
688
689 $viacode{$hex} = $inverse_user_aliases{$hex};
690 return $inverse_user_aliases{$hex};
691} # viacode
692
693my %vianame;
694
695sub vianame
696{
697 if (@_ != 1) {
698 carp "charnames::vianame() expects one name argument";
699 return ()
700 }
701
702 my $arg = shift;
703
704 return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/;
705
706 return $vianame{$arg} if exists $vianame{$arg};
707
708 $txt = do "unicore/Name.pl" unless $txt;
709
710 my $pos = index $txt, "\t\t$arg\n";
711 if (0 <= $pos) {
712 my $posLF = rindex $txt, "\n", $pos;
713 (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d;
714 return $vianame{$arg} = CORE::hex $code;
715
716 # If $pos is at the 1st line, $posLF must be -1 (not found);
717 # then $posLF + 1 equals to 0 (at the beginning of $txt).
718 # Otherwise $posLF is the position of "\n";
719 # then $posLF + 1 must be the position of the next to "\n"
720 # (the beginning of the line).
721 # substr($txt, $posLF + 1, 6) may be "0000\t\t", "00A1\t\t",
722 # "10300\t", "100000", etc. So we can get the code via removing TAB.
723 } else {
724 return;
725 }
726} # vianame
727
728
7291;
730__END__
731
732=head1 NAME
733
734charnames - define character names for C<\N{named}> string literal escapes
735
736=head1 SYNOPSIS
737
738 use charnames ':full';
739 print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
740
741 use charnames ':short';
742 print "\N{greek:Sigma} is an upper-case sigma.\n";
743
744 use charnames qw(cyrillic greek);
745 print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
746
747 use charnames ":full", ":alias" => {
748 e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
749 };
750 print "\N{e_ACUTE} is a small letter e with an acute.\n";
751
752 use charnames ();
753 print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE"
754 printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints
755 # "10330"
756
757=head1 DESCRIPTION
758
759Pragma C<use charnames> supports arguments C<:full>, C<:short>, script
760names and customized aliases. If C<:full> is present, for expansion of
761C<\N{CHARNAME}>, the string C<CHARNAME> is first looked up in the list of
762standard Unicode character names. If C<:short> is present, and
763C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
764as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
765with script name arguments, then for C<\N{CHARNAME}> the name
766C<CHARNAME> is looked up as a letter in the given scripts (in the
767specified order). Customized aliases can override these, and are explained in
768L</CUSTOM ALIASES>.
769
770For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
771this pragma looks for the names
772
773 SCRIPTNAME CAPITAL LETTER CHARNAME
774 SCRIPTNAME SMALL LETTER CHARNAME
775 SCRIPTNAME LETTER CHARNAME
776
777in the table of standard Unicode names. If C<CHARNAME> is lowercase,
778then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant
779is ignored.
780
781Note that C<\N{...}> is compile-time, it's a special form of string
782constant used inside double-quoted strings: in other words, you cannot
783use variables inside the C<\N{...}>. If you want similar run-time
784functionality, use charnames::vianame().
785
786For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F)
787as of Unicode 3.1, there are no official Unicode names but you can use
788instead the ISO 6429 names (LINE FEED, ESCAPE, and so forth, and their
789abbreviations, LF, ESC, ...). In
790Unicode 3.2 (as of Perl 5.8) some naming changes take place ISO 6429
791has been updated, see L</ALIASES>.
792
793Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}"
794is the Unicode smiley face, or "\N{WHITE SMILING FACE}".
795
796=head1 ALIASES
797
798A few aliases have been defined for convenience: instead of having
799to use the official names
800
801 LINE FEED (LF)
802 FORM FEED (FF)
803 CARRIAGE RETURN (CR)
804 NEXT LINE (NEL)
805
806(yes, with parentheses) one can use
807
808 LINE FEED
809 FORM FEED
810 CARRIAGE RETURN
811 NEXT LINE
812 LF
813 FF
814 CR
815 NEL
816
817All the other standard abbreviations for the controls, such as C<ACK> for
818C<ACKNOWLEDGE> also can be used.
819
820One can also use
821
822 BYTE ORDER MARK
823 BOM
824
825and these abbreviations
826
827 Abbreviation Full Name
828
829 CGJ COMBINING GRAPHEME JOINER
830 FVS1 MONGOLIAN FREE VARIATION SELECTOR ONE
831 FVS2 MONGOLIAN FREE VARIATION SELECTOR TWO
832 FVS3 MONGOLIAN FREE VARIATION SELECTOR THREE
833 LRE LEFT-TO-RIGHT EMBEDDING
834 LRM LEFT-TO-RIGHT MARK
835 LRO LEFT-TO-RIGHT OVERRIDE
836 MMSP MEDIUM MATHEMATICAL SPACE
837 MVS MONGOLIAN VOWEL SEPARATOR
838 NBSP NO-BREAK SPACE
839 NNBSP NARROW NO-BREAK SPACE
840 PDF POP DIRECTIONAL FORMATTING
841 RLE RIGHT-TO-LEFT EMBEDDING
842 RLM RIGHT-TO-LEFT MARK
843 RLO RIGHT-TO-LEFT OVERRIDE
844 SHY SOFT HYPHEN
845 VS1 VARIATION SELECTOR-1
846 .
847 .
848 .
849 VS256 VARIATION SELECTOR-256
850 WJ WORD JOINER
851 ZWJ ZERO WIDTH JOINER
852 ZWNJ ZERO WIDTH NON-JOINER
853 ZWSP ZERO WIDTH SPACE
854
855For backward compatibility one can use the old names for
856certain C0 and C1 controls
857
858 old new
859
860 FILE SEPARATOR INFORMATION SEPARATOR FOUR
861 GROUP SEPARATOR INFORMATION SEPARATOR THREE
862 HORIZONTAL TABULATION CHARACTER TABULATION
863 HORIZONTAL TABULATION SET CHARACTER TABULATION SET
864 HORIZONTAL TABULATION WITH JUSTIFICATION CHARACTER TABULATION
865 WITH JUSTIFICATION
866 PARTIAL LINE DOWN PARTIAL LINE FORWARD
867 PARTIAL LINE UP PARTIAL LINE BACKWARD
868 RECORD SEPARATOR INFORMATION SEPARATOR TWO
869 REVERSE INDEX REVERSE LINE FEED
870 UNIT SEPARATOR INFORMATION SEPARATOR ONE
871 VERTICAL TABULATION LINE TABULATION
872 VERTICAL TABULATION SET LINE TABULATION SET
873
874but the old names in addition to giving the character
875will also give a warning about being deprecated.
876
877And finally, certain published variants are usable, including some for
878controls that have no Unicode names:
879
880 END OF PROTECTED AREA
881 HIGH OCTET PRESET
882 HOP
883 IND
884 INDEX
885 PAD
886 PADDING CHARACTER
887 PRIVATE USE 1
888 PRIVATE USE 2
889 SGC
890 SINGLE GRAPHIC CHARACTER INTRODUCER
891 SINGLE-SHIFT 2
892 SINGLE-SHIFT 3
893 START OF PROTECTED AREA
894
895=head1 CUSTOM ALIASES
896
897This version of charnames supports three mechanisms of adding local
898or customized aliases to standard Unicode naming conventions (:full).
899The aliases override any standard definitions, so, if you're twisted enough,
900you can change C<"\N{LATIN CAPITAL LETTER A}"> to mean C<"B">, etc.
901
902Note that an alias should not be something that is a legal curly
903brace-enclosed quantifier (see L<perlreref/QUANTIFIERS>). For example
904C<\N{123}> means to match 123 non-newline characters, and is not treated as an
905alias. Aliases are discouraged from beginning with anything other than an
906alphabetic character and from containing anything other than alphanumerics,
907spaces, dashes, colons, parentheses, and underscores. Currently they must be
908ASCII.
909
910An alias can map to either an official Unicode character name or numeric
911code point (ordinal). The latter is useful for assigning names to code
912points in Unicode private use areas such as U+E000 through U+F8FF. The
913number must look like an unsigned decimal integer, or a hexadecimal
914constant beginning with C<0x>, or <U+>.
915
916=head2 Anonymous hashes
917
918 use charnames ":full", ":alias" => {
919 e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
920 mychar1 => 0xE8000,
921 };
922 my $str = "\N{e_ACUTE}";
923
924=head2 Alias file
925
926 use charnames ":full", ":alias" => "pro";
927
928 will try to read "unicore/pro_alias.pl" from the @INC path. This
929 file should return a list in plain perl:
930
931 (
932 A_GRAVE => "LATIN CAPITAL LETTER A WITH GRAVE",
933 A_CIRCUM => "LATIN CAPITAL LETTER A WITH CIRCUMFLEX",
934 A_DIAERES => "LATIN CAPITAL LETTER A WITH DIAERESIS",
935 A_TILDE => "LATIN CAPITAL LETTER A WITH TILDE",
936 A_BREVE => "LATIN CAPITAL LETTER A WITH BREVE",
937 A_RING => "LATIN CAPITAL LETTER A WITH RING ABOVE",
938 A_MACRON => "LATIN CAPITAL LETTER A WITH MACRON",
939 mychar2 => U+E8001,
940 );
941
942=head2 Alias shortcut
943
944 use charnames ":alias" => ":pro";
945
946works exactly the same as the alias pairs, only this time,
947":full" is inserted automatically as the first argument (if no
948other argument is given).
949
950=head1 charnames::viacode(code)
951
952Returns the full name of the character indicated by the numeric code.
953The example
954
955 print charnames::viacode(0x2722);
956
957prints "FOUR TEARDROP-SPOKED ASTERISK".
958
959Returns undef if no name is known for the code.
960
961The name returned is the official name for the code point, if
962available, otherwise your custom alias for it. This means that your
963alias will only be returned for code points that don't have an official
964Unicode name (nor Unicode version 1 name), such as private use code
965points, and the 4 control characters U+0080, U+0081, U+0084, and U+0099.
966
967Notice that the name returned for of U+FEFF is "ZERO WIDTH NO-BREAK
968SPACE", not "BYTE ORDER MARK".
969
970=head1 charnames::vianame(name)
971
972Returns the code point indicated by the name.
973The example
974
975 printf "%04X", charnames::vianame("FOUR TEARDROP-SPOKED ASTERISK");
976
977prints "2722".
978
979Returns undef if the name is unknown.
980
981This works only for the standard names, and does not yet apply
982to custom translators.
983
984=head1 CUSTOM TRANSLATORS
985
986The mechanism of translation of C<\N{...}> escapes is general and not
987hardwired into F<charnames.pm>. A module can install custom
988translations (inside the scope which C<use>s the module) with the
989following magic incantation:
990
991 sub import {
992 shift;
993 $^H{charnames} = \&translator;
994 }
995
996Here translator() is a subroutine which takes C<CHARNAME> as an
997argument, and returns text to insert into the string instead of the
998C<\N{CHARNAME}> escape. Since the text to insert should be different
999in C<bytes> mode and out of it, the function should check the current
1000state of C<bytes>-flag as in:
1001
1002 use bytes (); # for $bytes::hint_bits
1003 sub translator {
1004 if ($^H & $bytes::hint_bits) {
1005 return bytes_translator(@_);
1006 }
1007 else {
1008 return utf8_translator(@_);
1009 }
1010 }
1011
1012See L</CUSTOM ALIASES> above for restrictions on C<CHARNAME>.
1013
1014=head1 ILLEGAL CHARACTERS
1015
1016If you ask by name for a character that does not exist, a warning is given and
1017the Unicode I<replacement character> "\x{FFFD}" is returned.
1018
1019If you ask by code (C<charnames::viacode()>) for a character that is
1020unassigned, no warning is given and C<undef> is returned. In Unicode
1021the proper name of these is the empty string, which C<undef> stringifies
1022to. (If you ask for a code point past the legal Unicode maximum of
1023U+10FFFF you do get C<undef> and a warning.)
1024
1025=head1 BUGS
1026
1027vianame returns a chr if the input name is of the form C<U+...>, and an ord
1028otherwise. It is proposed to change this to always return an ord. Send email
1029to C<perl5-porters@perl.org> to comment on this proposal.
1030
1031None of the functions work on almost all the Hangul syllable and CJK Unicode
1032characters that have their code points as part of their names.
1033
1034Names must be ASCII characters only, which means that you are out of luck if
1035you want to create aliases in a language where some or all the characters of
1036the desired aliases are non-ASCII.
1037
1038Unicode standard named sequences are not recognized, such as
1039C<LATIN CAPITAL LETTER A WITH MACRON AND GRAVE>
1040(which should mean C<LATIN CAPITAL LETTER A WITH MACRON> with an additional
1041C<COMBINING GRAVE ACCENT>).
1042
1043Since evaluation of the translation function happens in the middle of
1044compilation (of a string literal), the translation function should not
1045do any C<eval>s or C<require>s. This restriction should be lifted in
1046a future version of Perl.
1047
1048=cut