This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove a release_manager_guide step since it's now been automated away
[perl5.git] / lib / charnames.t
CommitLineData
423cee85 1#!./perl
a0d8d8c5 2use strict;
423cee85 3
52ea3e69
JH
4my @WARN;
5
423cee85
JH
6BEGIN {
7 unless(grep /blib/, @INC) {
8 chdir 't' if -d 't';
20822f61 9 @INC = '../lib';
423cee85 10 }
52ea3e69 11 $SIG{__WARN__} = sub { push @WARN, @_ };
423cee85
JH
12}
13
ea065648 14our $local_tests = 58;
63c6dcc1 15
ea065648
NC
16# ---- For the alias extensions
17require "../t/lib/common.pl";
423cee85
JH
18
19use charnames ':full';
20
a0d8d8c5 21is("Here\N{EXCLAMATION MARK}?", "Here!?");
423cee85 22
c82a54e6 23{
a0d8d8c5 24 use bytes; # TEST -utf8 can switch utf8 on
c82a54e6 25
a0d8d8c5 26 my $res = eval <<'EOE';
423cee85 27use charnames ":full";
4a2d328f 28"Here: \N{CYRILLIC SMALL LETTER BE}!";
423cee85
JH
291
30EOE
423cee85 31
a0d8d8c5
KW
32 like($@, "above 0xFF");
33 is($res, undef);
34
35 $res = eval <<'EOE';
423cee85 36use charnames 'cyrillic';
4a2d328f 37"Here: \N{Be}!";
423cee85
JH
381
39EOE
a0d8d8c5 40 like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF");
c82a54e6 41}
423cee85 42
a0d8d8c5
KW
43my $encoded_be;
44my $encoded_alpha;
45my $encoded_bet;
46my $encoded_deseng;
47
423cee85 48# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt
210db7fc
PP
49if (ord('A') == 65) { # as on ASCII or UTF-8 machines
50 $encoded_be = "\320\261";
51 $encoded_alpha = "\316\261";
52 $encoded_bet = "\327\221";
53 $encoded_deseng = "\360\220\221\215";
54}
55else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since
56 # UTF-EBCDIC is codepage specific)
57 $encoded_be = "\270\102\130";
58 $encoded_alpha = "\264\130";
59 $encoded_bet = "\270\125\130";
60 $encoded_deseng = "\336\102\103\124";
61}
c5cc3500
GS
62
63sub to_bytes {
f337b084 64 unpack"U0a*", shift;
c5cc3500
GS
65}
66
423cee85
JH
67{
68 use charnames ':full';
423cee85 69
a0d8d8c5 70 is(to_bytes("\N{CYRILLIC SMALL LETTER BE}"), $encoded_be);
423cee85
JH
71
72 use charnames qw(cyrillic greek :short);
73
a0d8d8c5
KW
74 is(to_bytes("\N{be},\N{alpha},\N{hebrew:bet}"),
75 "$encoded_be,$encoded_alpha,$encoded_bet");
423cee85 76}
e1992b6d
GS
77
78{
79 use charnames ':full';
a0d8d8c5
KW
80 is("\x{263a}", "\N{WHITE SMILING FACE}");
81 cmp_ok(length("\x{263a}"), '==', 1);
82 cmp_ok(length("\N{WHITE SMILING FACE}"), '==', 1);
83 is(sprintf("%vx", "\x{263a}"), "263a");
84 is(sprintf("%vx", "\N{WHITE SMILING FACE}"), "263a");
85 is(sprintf("%vx", "\xFF\N{WHITE SMILING FACE}"), "ff.263a");
86 is(sprintf("%vx", "\x{ff}\N{WHITE SMILING FACE}"), "ff.263a");
e1992b6d 87}
c00525d4 88
89{
a0d8d8c5
KW
90 use charnames qw(:full);
91 use utf8;
51cf30b6 92
c00525d4 93 my $x = "\x{221b}";
94 my $named = "\N{CUBE ROOT}";
95
a0d8d8c5 96 cmp_ok(ord($x), '==', ord($named));
c00525d4 97}
98
f9a63242 99{
a0d8d8c5
KW
100 use charnames qw(:full);
101 use utf8;
102 is("\x{100}\N{CENT SIGN}", "\x{100}"."\N{CENT SIGN}");
f9a63242
JH
103}
104
b896c7a5 105{
a0d8d8c5 106 use charnames ':full';
b896c7a5 107
a0d8d8c5 108 is(to_bytes("\N{DESERET SMALL LETTER ENG}"), $encoded_deseng);
4765795a 109}
b896c7a5 110
4765795a 111{
a0d8d8c5
KW
112 # 20001114.001
113
114 no utf8; # naked Latin-1
115
116 use charnames ':full';
117 my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
118 is($text, latin1_to_native("\xc4"));
119
120 # I'm not sure that this tests anything different from the above.
121 cmp_ok(ord($text), '==', ord(latin1_to_native("\xc4")));
b896c7a5
A
122}
123
daf0d493 124{
a0d8d8c5 125 is(charnames::viacode(0x1234), "ETHIOPIC SYLLABLE SEE");
daf0d493 126
a23c04e4 127 # Unused Hebrew.
a0d8d8c5 128 ok(! defined charnames::viacode(0x0590));
daf0d493
JH
129}
130
131{
a0d8d8c5
KW
132 is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330");
133 ok (! defined charnames::vianame("NONE SUCH"));
daf0d493 134}
4e2cda5d
JH
135
136{
137 # check that caching at least hasn't broken anything
138
a0d8d8c5 139 is(charnames::viacode(0x1234), "ETHIOPIC SYLLABLE SEE");
4e2cda5d 140
a0d8d8c5 141 is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330");
4e2cda5d
JH
142
143}
301a3cda 144
a0d8d8c5 145is("\N{CHARACTER TABULATION}", "\t");
a2e77dd4 146
a0d8d8c5
KW
147is("\N{ESCAPE}", "\e");
148is("\N{NULL}", "\c@");
149is("\N{LINE FEED (LF)}", "\n");
150is("\N{LINE FEED}", "\n");
151is("\N{LF}", "\n");
52ea3e69 152
a0d8d8c5
KW
153my $nel = latin1_to_native("\x85");
154$nel = qr/^$nel$/;
52ea3e69 155
a0d8d8c5
KW
156like("\N{NEXT LINE (NEL)}", $nel);
157like("\N{NEXT LINE}", $nel);
158like("\N{NEL}", $nel);
159is("\N{BYTE ORDER MARK}", chr(0xFEFF));
160is("\N{BOM}", chr(0xFEFF));
51e9e896 161
52ea3e69
JH
162{
163 use warnings 'deprecated';
164
a0d8d8c5 165 is("\N{HORIZONTAL TABULATION}", "\t");
52ea3e69 166
a0d8d8c5 167 ok(grep { /"HORIZONTAL TABULATION" is deprecated/ } @WARN);
822ebcc8 168
52ea3e69
JH
169 no warnings 'deprecated';
170
a0d8d8c5 171 is("\N{VERTICAL TABULATION}", "\013");
52ea3e69 172
a0d8d8c5 173 ok(! grep { /"VERTICAL TABULATION" is deprecated/ } @WARN);
52ea3e69 174}
822ebcc8 175
a0d8d8c5 176is(charnames::viacode(0xFEFF), "ZERO WIDTH NO-BREAK SPACE");
a23c04e4 177
872c91ae
JH
178{
179 use warnings;
a0d8d8c5 180 cmp_ok(ord("\N{BOM}"), '==', 0xFEFF);
872c91ae
JH
181}
182
a0d8d8c5 183cmp_ok(ord("\N{ZWNJ}"), '==', 0x200C);
24b5d5cc 184
a0d8d8c5 185cmp_ok(ord("\N{ZWJ}"), '==', 0x200D);
dbc0d4f2 186
a0d8d8c5 187is("\N{U+263A}", "\N{WHITE SMILING FACE}");
dbc0d4f2 188
51b0dbc4 189{
a0d8d8c5
KW
190 cmp_ok( 0x3093, '==', charnames::vianame("HIRAGANA LETTER N"));
191 cmp_ok(0x0397, '==', charnames::vianame("GREEK CAPITAL LETTER ETA"));
51b0dbc4
ST
192}
193
a0d8d8c5
KW
194ok(! defined charnames::viacode(0x110000));
195ok(! grep { /you asked for U+110000/ } @WARN);
e10d7780 196
c8002005
KW
197is(charnames::viacode(0), "NULL");
198is(charnames::viacode("BE"), "VULGAR FRACTION THREE QUARTERS");
199is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM");
200
e5c3f898
MG
201# [perl #30409] charnames.pm clobbers default variable
202$_ = 'foobar';
203eval "use charnames ':full';";
a0d8d8c5 204is($_, 'foobar');
e5c3f898 205
c776535e
NC
206# Unicode slowdown noted by Phil Pennock, traced to a bug fix in index
207# SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
208# arguments are indentical before calling index.
209# To do this can take advantage of the fact that unicore/Name.pl is 7 bit
e5d6fe1e
KW
210# (or at least should be). So assert that that it's true here. EBCDIC
211# may be a problem (khw).
c776535e
NC
212
213my $names = do "unicore/Name.pl";
a0d8d8c5
KW
214ok(defined $names);
215my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c;
216ok(! $non_ascii, "Make sure all names are ASCII-only");
c776535e 217
eb915052
RGS
218# Verify that charnames propagate to eval("")
219my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ];
220if ($@) {
a0d8d8c5
KW
221 fail('charnames failed to propagate to eval("")');
222 fail('next test also fails to make the same number of tests');
eb915052 223} else {
a0d8d8c5
KW
224 pass('charnames propagated to eval("")');
225 is($evaltry, "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}");
eb915052 226}
c776535e 227
ae6979a8 228# Verify that db includes the normative NameAliases.txt names
a0d8d8c5 229is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}");
ae6979a8 230
ec34a119
DM
231# [perl #73174] use of \N{FOO} used to reset %^H
232
233{
234 use charnames ":full";
235 my $res;
236 BEGIN { $^H{73174} = "foo" }
237 BEGIN { $res = ($^H{73174} // "") }
238 # forces loading of utf8.pm, which used to reset %^H
239 $res .= '-1' if ":" =~ /\N{COLON}/i;
240 BEGIN { $res .= '-' . ($^H{73174} // "") }
241 $res .= '-' . ($^H{73174} // "");
242 $res .= '-2' if ":" =~ /\N{COLON}/;
243 $res .= '-3' if ":" =~ /\N{COLON}/i;
a0d8d8c5 244 is($res, "foo-foo-1--2-3");
ec34a119 245}