This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add cautionary comment to .t
[perl5.git] / lib / charnames.t
CommitLineData
423cee85 1#!./perl
a0d8d8c5 2use strict;
423cee85 3
52ea3e69
JH
4my @WARN;
5
423cee85
JH
6BEGIN {
7 unless(grep /blib/, @INC) {
8 chdir 't' if -d 't';
20822f61 9 @INC = '../lib';
143a3e5e 10 require './test.pl';
423cee85 11 }
52ea3e69 12 $SIG{__WARN__} = sub { push @WARN, @_ };
423cee85
JH
13}
14
63c6dcc1
JH
15require File::Spec;
16
423cee85 17$| = 1;
822ebcc8 18
c8002005 19plan(85);
423cee85
JH
20
21use charnames ':full';
22
a0d8d8c5 23is("Here\N{EXCLAMATION MARK}?", "Here!?");
423cee85 24
c82a54e6 25{
a0d8d8c5 26 use bytes; # TEST -utf8 can switch utf8 on
c82a54e6 27
a0d8d8c5 28 my $res = eval <<'EOE';
423cee85 29use charnames ":full";
4a2d328f 30"Here: \N{CYRILLIC SMALL LETTER BE}!";
423cee85
JH
311
32EOE
423cee85 33
a0d8d8c5
KW
34 like($@, "above 0xFF");
35 is($res, undef);
36
37 $res = eval <<'EOE';
423cee85 38use charnames 'cyrillic';
4a2d328f 39"Here: \N{Be}!";
423cee85
JH
401
41EOE
a0d8d8c5 42 like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF");
c82a54e6 43}
423cee85 44
a0d8d8c5
KW
45my $encoded_be;
46my $encoded_alpha;
47my $encoded_bet;
48my $encoded_deseng;
49
423cee85 50# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt
210db7fc
PP
51if (ord('A') == 65) { # as on ASCII or UTF-8 machines
52 $encoded_be = "\320\261";
53 $encoded_alpha = "\316\261";
54 $encoded_bet = "\327\221";
55 $encoded_deseng = "\360\220\221\215";
56}
57else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since
58 # UTF-EBCDIC is codepage specific)
59 $encoded_be = "\270\102\130";
60 $encoded_alpha = "\264\130";
61 $encoded_bet = "\270\125\130";
62 $encoded_deseng = "\336\102\103\124";
63}
c5cc3500
GS
64
65sub to_bytes {
f337b084 66 unpack"U0a*", shift;
c5cc3500
GS
67}
68
423cee85
JH
69{
70 use charnames ':full';
423cee85 71
a0d8d8c5 72 is(to_bytes("\N{CYRILLIC SMALL LETTER BE}"), $encoded_be);
423cee85
JH
73
74 use charnames qw(cyrillic greek :short);
75
a0d8d8c5
KW
76 is(to_bytes("\N{be},\N{alpha},\N{hebrew:bet}"),
77 "$encoded_be,$encoded_alpha,$encoded_bet");
423cee85 78}
e1992b6d
GS
79
80{
81 use charnames ':full';
a0d8d8c5
KW
82 is("\x{263a}", "\N{WHITE SMILING FACE}");
83 cmp_ok(length("\x{263a}"), '==', 1);
84 cmp_ok(length("\N{WHITE SMILING FACE}"), '==', 1);
85 is(sprintf("%vx", "\x{263a}"), "263a");
86 is(sprintf("%vx", "\N{WHITE SMILING FACE}"), "263a");
87 is(sprintf("%vx", "\xFF\N{WHITE SMILING FACE}"), "ff.263a");
88 is(sprintf("%vx", "\x{ff}\N{WHITE SMILING FACE}"), "ff.263a");
e1992b6d 89}
c00525d4
SP
90
91{
a0d8d8c5
KW
92 use charnames qw(:full);
93 use utf8;
51cf30b6 94
c00525d4
SP
95 my $x = "\x{221b}";
96 my $named = "\N{CUBE ROOT}";
97
a0d8d8c5 98 cmp_ok(ord($x), '==', ord($named));
c00525d4
SP
99}
100
f9a63242 101{
a0d8d8c5
KW
102 use charnames qw(:full);
103 use utf8;
104 is("\x{100}\N{CENT SIGN}", "\x{100}"."\N{CENT SIGN}");
f9a63242
JH
105}
106
b896c7a5 107{
a0d8d8c5 108 use charnames ':full';
b896c7a5 109
a0d8d8c5 110 is(to_bytes("\N{DESERET SMALL LETTER ENG}"), $encoded_deseng);
4765795a 111}
b896c7a5 112
4765795a 113{
a0d8d8c5
KW
114 # 20001114.001
115
116 no utf8; # naked Latin-1
117
118 use charnames ':full';
119 my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
120 is($text, latin1_to_native("\xc4"));
121
122 # I'm not sure that this tests anything different from the above.
123 cmp_ok(ord($text), '==', ord(latin1_to_native("\xc4")));
b896c7a5
A
124}
125
daf0d493 126{
a0d8d8c5 127 is(charnames::viacode(0x1234), "ETHIOPIC SYLLABLE SEE");
daf0d493 128
a23c04e4 129 # Unused Hebrew.
a0d8d8c5 130 ok(! defined charnames::viacode(0x0590));
daf0d493
JH
131}
132
133{
a0d8d8c5
KW
134 is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330");
135 ok (! defined charnames::vianame("NONE SUCH"));
daf0d493 136}
4e2cda5d
JH
137
138{
139 # check that caching at least hasn't broken anything
140
a0d8d8c5 141 is(charnames::viacode(0x1234), "ETHIOPIC SYLLABLE SEE");
4e2cda5d 142
a0d8d8c5 143 is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330");
4e2cda5d
JH
144
145}
301a3cda 146
a0d8d8c5 147is("\N{CHARACTER TABULATION}", "\t");
a2e77dd4 148
a0d8d8c5
KW
149is("\N{ESCAPE}", "\e");
150is("\N{NULL}", "\c@");
151is("\N{LINE FEED (LF)}", "\n");
152is("\N{LINE FEED}", "\n");
153is("\N{LF}", "\n");
52ea3e69 154
a0d8d8c5
KW
155my $nel = latin1_to_native("\x85");
156$nel = qr/^$nel$/;
52ea3e69 157
a0d8d8c5
KW
158like("\N{NEXT LINE (NEL)}", $nel);
159like("\N{NEXT LINE}", $nel);
160like("\N{NEL}", $nel);
161is("\N{BYTE ORDER MARK}", chr(0xFEFF));
162is("\N{BOM}", chr(0xFEFF));
51e9e896 163
52ea3e69
JH
164{
165 use warnings 'deprecated';
166
a0d8d8c5 167 is("\N{HORIZONTAL TABULATION}", "\t");
52ea3e69 168
a0d8d8c5 169 ok(grep { /"HORIZONTAL TABULATION" is deprecated/ } @WARN);
822ebcc8 170
52ea3e69
JH
171 no warnings 'deprecated';
172
a0d8d8c5 173 is("\N{VERTICAL TABULATION}", "\013");
52ea3e69 174
a0d8d8c5 175 ok(! grep { /"VERTICAL TABULATION" is deprecated/ } @WARN);
52ea3e69 176}
822ebcc8 177
a0d8d8c5 178is(charnames::viacode(0xFEFF), "ZERO WIDTH NO-BREAK SPACE");
a23c04e4 179
872c91ae
JH
180{
181 use warnings;
a0d8d8c5 182 cmp_ok(ord("\N{BOM}"), '==', 0xFEFF);
872c91ae
JH
183}
184
a0d8d8c5 185cmp_ok(ord("\N{ZWNJ}"), '==', 0x200C);
24b5d5cc 186
a0d8d8c5 187cmp_ok(ord("\N{ZWJ}"), '==', 0x200D);
dbc0d4f2 188
a0d8d8c5 189is("\N{U+263A}", "\N{WHITE SMILING FACE}");
dbc0d4f2 190
51b0dbc4 191{
a0d8d8c5
KW
192 cmp_ok( 0x3093, '==', charnames::vianame("HIRAGANA LETTER N"));
193 cmp_ok(0x0397, '==', charnames::vianame("GREEK CAPITAL LETTER ETA"));
51b0dbc4
TS
194}
195
a0d8d8c5
KW
196ok(! defined charnames::viacode(0x110000));
197ok(! grep { /you asked for U+110000/ } @WARN);
e10d7780 198
c8002005
KW
199is(charnames::viacode(0), "NULL");
200is(charnames::viacode("BE"), "VULGAR FRACTION THREE QUARTERS");
201is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM");
202
35c0985d
MB
203
204# ---- Alias extensions
205
63c6dcc1 206my $alifile = File::Spec->catfile(File::Spec->updir, qw(lib unicore xyzzy_alias.pl));
35c0985d
MB
207
208my @prgs;
a0d8d8c5
KW
209{
210 local $/ = undef;
35c0985d 211 @prgs = split "\n########\n", <DATA>;
a0d8d8c5 212}
35c0985d 213
35c0985d
MB
214for (@prgs) {
215 my ($code, $exp) = ((split m/\nEXPECT\n/), '$');
216 my ($prog, $fil) = ((split m/\nFILE\n/, $code), "");
f89542f7 217 my $tmpfile = tempfile();
35c0985d
MB
218 open my $tmp, "> $tmpfile" or die "Could not open $tmpfile: $!";
219 print $tmp $prog, "\n";
220 close $tmp or die "Could not close $tmpfile: $!";
221 if ($fil) {
222 $fil .= "\n";
223 open my $ali, "> $alifile" or die "Could not open $alifile: $!";
224 print $ali $fil;
225 close $ali or die "Could not close $alifile: $!";
a0d8d8c5
KW
226 }
227 my $switch = "";
228 my $res = runperl( switches => $switch,
143a3e5e
CB
229 progfile => $tmpfile,
230 stderr => 1 );
35c0985d
MB
231 my $status = $?;
232 $res =~ s/[\r\n]+$//;
233 $res =~ s/tmp\d+/-/g; # fake $prog from STDIN
234 $res =~ s/\n%[A-Z]+-[SIWEF]-.*$// # clip off DCL status msg
235 if $^O eq "VMS";
236 $exp =~ s/[\r\n]+$//;
35c0985d
MB
237 my $pfx = ($res =~ s/^PREFIX\n//);
238 my $rexp = qr{^$exp};
a0d8d8c5
KW
239 my $expected = ""; # Unsure why this is here, as was never initialized
240
241 SKIP: {
242 skip $res, 1, if $res =~ s/^SKIPPED\n//;
243 if (($pfx and $res !~ /^\Q$expected/) or
244 (!$pfx and $res !~ $rexp))
245 {
246 fail("PROG:\n$prog\nFILE:\n${fil}EXPECTED:\n$exp\nGOT:\n$res");
247 } else {
248 pass("");
249 }
250 }
35c0985d
MB
251 $fil or next;
252 1 while unlink $alifile;
a0d8d8c5 253}
35c0985d 254
e5c3f898
MG
255# [perl #30409] charnames.pm clobbers default variable
256$_ = 'foobar';
257eval "use charnames ':full';";
a0d8d8c5 258is($_, 'foobar');
e5c3f898 259
c776535e
NC
260# Unicode slowdown noted by Phil Pennock, traced to a bug fix in index
261# SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
262# arguments are indentical before calling index.
263# To do this can take advantage of the fact that unicore/Name.pl is 7 bit
e5d6fe1e
KW
264# (or at least should be). So assert that that it's true here. EBCDIC
265# may be a problem (khw).
c776535e
NC
266
267my $names = do "unicore/Name.pl";
a0d8d8c5
KW
268ok(defined $names);
269my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c;
270ok(! $non_ascii, "Make sure all names are ASCII-only");
c776535e 271
eb915052
RGS
272# Verify that charnames propagate to eval("")
273my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ];
274if ($@) {
a0d8d8c5
KW
275 fail('charnames failed to propagate to eval("")');
276 fail('next test also fails to make the same number of tests');
eb915052 277} else {
a0d8d8c5
KW
278 pass('charnames propagated to eval("")');
279 is($evaltry, "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}");
eb915052 280}
c776535e 281
ae6979a8 282# Verify that db includes the normative NameAliases.txt names
a0d8d8c5 283is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}");
ae6979a8 284
ec34a119
DM
285# [perl #73174] use of \N{FOO} used to reset %^H
286
287{
288 use charnames ":full";
289 my $res;
290 BEGIN { $^H{73174} = "foo" }
291 BEGIN { $res = ($^H{73174} // "") }
292 # forces loading of utf8.pm, which used to reset %^H
293 $res .= '-1' if ":" =~ /\N{COLON}/i;
294 BEGIN { $res .= '-' . ($^H{73174} // "") }
295 $res .= '-' . ($^H{73174} // "");
296 $res .= '-2' if ":" =~ /\N{COLON}/;
297 $res .= '-3' if ":" =~ /\N{COLON}/i;
a0d8d8c5 298 is($res, "foo-foo-1--2-3");
ec34a119
DM
299}
300
35c0985d 301__END__
51cf30b6
MB
302# unsupported pragma
303use charnames ":scoobydoo";
304"Here: \N{e_ACUTE}!\n";
305EXPECT
306unsupported special ':scoobydoo' in charnames at
307########
35c0985d
MB
308# wrong type of alias (missing colon)
309use charnames "alias";
310"Here: \N{e_ACUTE}!\n";
311EXPECT
51cf30b6 312Unknown charname 'e_ACUTE' at
35c0985d
MB
313########
314# alias without an argument
315use charnames ":alias";
316"Here: \N{e_ACUTE}!\n";
317EXPECT
51cf30b6
MB
318:alias needs an argument in charnames at
319########
320# reversed sequence
321use charnames ":alias" => ":full";
322"Here: \N{e_ACUTE}!\n";
323EXPECT
324:alias cannot use existing pragma :full \(reversed order\?\) at
35c0985d
MB
325########
326# alias with hashref but no :full
327use charnames ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
328"Here: \N{e_ACUTE}!\n";
329EXPECT
330Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
331########
332# alias with hashref but with :short
333use charnames ":short", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
334"Here: \N{e_ACUTE}!\n";
335EXPECT
336Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
337########
338# alias with hashref to :full OK
339use charnames ":full", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE" };
340"Here: \N{e_ACUTE}!\n";
341EXPECT
342$
343########
344# alias with hashref to :short but using :full
345use charnames ":full", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE" };
346"Here: \N{e_ACUTE}!\n";
347EXPECT
348Unknown charname 'LATIN:e WITH ACUTE' at
349########
350# alias with hashref to :short OK
351use charnames ":short", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE" };
352"Here: \N{e_ACUTE}!\n";
353EXPECT
354$
355########
356# alias with bad hashref
357use charnames ":short", ":alias" => "e_ACUTE";
358"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
359EXPECT
51cf30b6 360unicore/e_ACUTE_alias.pl cannot be used as alias file for charnames at
35c0985d
MB
361########
362# alias with arrayref
363use charnames ":short", ":alias" => [ e_ACUTE => "LATIN:e WITH ACUTE" ];
364"Here: \N{e_ACUTE}!\n";
365EXPECT
366Only HASH reference supported as argument to :alias at
367########
368# alias with bad hashref
369use charnames ":short", ":alias" => { e_ACUTE => "LATIN:e WITH ACUTE", "a_ACUTE" };
370"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
371EXPECT
29489e7c 372Use of uninitialized value
35c0985d
MB
373########
374# alias with hashref two aliases
375use charnames ":short", ":alias" => {
376 e_ACUTE => "LATIN:e WITH ACUTE",
377 a_ACUTE => "",
378 };
379"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
380EXPECT
381Unknown charname '' at
382########
383# alias with hashref two aliases
384use charnames ":short", ":alias" => {
385 e_ACUTE => "LATIN:e WITH ACUTE",
386 a_ACUTE => "LATIN:a WITH ACUTE",
387 };
388"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
389EXPECT
390$
391########
392# alias with hashref using mixed aliasses
393use charnames ":short", ":alias" => {
394 e_ACUTE => "LATIN:e WITH ACUTE",
395 a_ACUTE => "LATIN SMALL LETTER A WITH ACUT",
396 };
397"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
398EXPECT
399Unknown charname 'LATIN SMALL LETTER A WITH ACUT' at
400########
401# alias with hashref using mixed aliasses
402use charnames ":short", ":alias" => {
403 e_ACUTE => "LATIN:e WITH ACUTE",
404 a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
405 };
406"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
407EXPECT
408Unknown charname 'LATIN SMALL LETTER A WITH ACUTE' at
409########
410# alias with hashref using mixed aliasses
411use charnames ":full", ":alias" => {
412 e_ACUTE => "LATIN:e WITH ACUTE",
413 a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
414 };
415"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
416EXPECT
417Unknown charname 'LATIN:e WITH ACUTE' at
418########
419# alias with nonexisting file
420use charnames ":full", ":alias" => "xyzzy";
421"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
422EXPECT
51cf30b6
MB
423unicore/xyzzy_alias.pl cannot be used as alias file for charnames at
424########
425# alias with bad file name
426use charnames ":full", ":alias" => "xy 7-";
427"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
428EXPECT
429Charnames alias files can only have identifier characters at
430########
431# alias with non_absolute (existing) file name (which it should /not/ use)
432use charnames ":full", ":alias" => "perl";
433"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
434EXPECT
435unicore/perl_alias.pl cannot be used as alias file for charnames at
35c0985d
MB
436########
437# alias with bad file
438use charnames ":full", ":alias" => "xyzzy";
439"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
440FILE
441#!perl
4420;
443EXPECT
51cf30b6 444unicore/xyzzy_alias.pl did not return a \(valid\) list of alias pairs at
35c0985d
MB
445########
446# alias with file with empty list
447use charnames ":full", ":alias" => "xyzzy";
448"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
449FILE
450#!perl
451();
452EXPECT
453Unknown charname 'e_ACUTE' at
454########
455# alias with file OK but file has :short aliasses
456use charnames ":full", ":alias" => "xyzzy";
457"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
458FILE
459#!perl
460( e_ACUTE => "LATIN:e WITH ACUTE",
461 a_ACUTE => "LATIN:a WITH ACUTE",
462 );
463EXPECT
464Unknown charname 'LATIN:e WITH ACUTE' at
465########
466# alias with :short and file OK
467use charnames ":short", ":alias" => "xyzzy";
468"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
469FILE
470#!perl
471( e_ACUTE => "LATIN:e WITH ACUTE",
472 a_ACUTE => "LATIN:a WITH ACUTE",
473 );
474EXPECT
475$
476########
477# alias with :short and file OK has :long aliasses
478use charnames ":short", ":alias" => "xyzzy";
479"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
480FILE
481#!perl
482( e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
483 a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
484 );
485EXPECT
486Unknown charname 'LATIN SMALL LETTER E WITH ACUTE' at
487########
488# alias with file implicit :full but file has :short aliasses
489use charnames ":alias" => ":xyzzy";
490"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
491FILE
492#!perl
493( e_ACUTE => "LATIN:e WITH ACUTE",
494 a_ACUTE => "LATIN:a WITH ACUTE",
495 );
496EXPECT
497Unknown charname 'LATIN:e WITH ACUTE' at
498########
499# alias with file implicit :full and file has :long aliasses
500use charnames ":alias" => ":xyzzy";
501"Here: \N{e_ACUTE}\N{a_ACUTE}!\n";
502FILE
503#!perl
504( e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE",
505 a_ACUTE => "LATIN SMALL LETTER A WITH ACUTE",
506 );
507EXPECT
508$