Commit | Line | Data |
---|---|---|
646ca15d JH |
1 | #!./perl |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = qw(. ../lib); # ../lib needed for test.deparse | |
6 | require "test.pl"; | |
7 | } | |
8 | ||
a75b6b17 | 9 | plan tests => 42; |
646ca15d JH |
10 | |
11 | # Note that t/op/ord.t already tests for chr() <-> ord() rountripping. | |
12 | ||
13 | # Don't assume ASCII. | |
14 | ||
15 | is(chr(ord("A")), "A"); | |
16 | ||
17 | is(chr( 0), "\x00"); | |
18 | is(chr(127), "\x7F"); | |
19 | is(chr(128), "\x80"); | |
20 | is(chr(255), "\xFF"); | |
21 | ||
8a064bd6 JH |
22 | is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character. |
23 | is(chr(-1 ), "\x{FFFD}"); | |
24 | is(chr(-2 ), "\x{FFFD}"); | |
25 | is(chr(-3.0), "\x{FFFD}"); | |
26 | { | |
27 | use bytes; # Backward compatibility. | |
28 | is(chr(-0.1), "\x00"); | |
29 | is(chr(-1 ), "\xFF"); | |
30 | is(chr(-2 ), "\xFE"); | |
31 | is(chr(-3.0), "\xFD"); | |
32 | } | |
a75b6b17 | 33 | |
b3fe8680 FC |
34 | # Make sure -1 is treated the same way when coming from a tied variable |
35 | sub TIESCALAR {bless[]} | |
36 | sub STORE { $_[0][0] = $_[1] } | |
37 | sub FETCH { $_[0][0] } | |
38 | tie $t, ""; | |
39 | $t = -1; is chr $t, chr -1, 'chr $tied when $tied is -1'; | |
40 | $t = -2; is chr $t, chr -2, 'chr $tied when $tied is -2'; | |
41 | $t = -1.1; is chr $t, chr -1.1, 'chr $tied when $tied is -1.1'; | |
42 | $t = -2.2; is chr $t, chr -2.2, 'chr $tied when $tied is -2.2'; | |
646ca15d | 43 | |
a75b6b17 FC |
44 | # And that stringy scalars are treated likewise |
45 | is chr "-1", chr -1, 'chr "-1" eq chr -1'; | |
46 | is chr "-2", chr -2, 'chr "-2" eq chr -2'; | |
47 | is chr "-1.1", chr -1.1, 'chr "-1.1" eq chr -1.1'; | |
48 | is chr "-2.2", chr -2.2, 'chr "-2.2" eq chr -2.2'; | |
49 | ||
4c5ed6e2 TS |
50 | # Check UTF-8 (not UTF-EBCDIC). |
51 | SKIP: { | |
52 | skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A'; | |
646ca15d | 53 | |
8a064bd6 JH |
54 | sub hexes { |
55 | no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings | |
1651fc44 | 56 | join(" ",unpack "U0 (H2)*", chr $_[0]); |
8a064bd6 | 57 | } |
646ca15d JH |
58 | |
59 | # The following code points are some interesting steps in UTF-8. | |
4c5ed6e2 TS |
60 | is(hexes( 0x100), "c4 80"); |
61 | is(hexes( 0x7FF), "df bf"); | |
62 | is(hexes( 0x800), "e0 a0 80"); | |
63 | is(hexes( 0xFFF), "e0 bf bf"); | |
64 | is(hexes( 0x1000), "e1 80 80"); | |
65 | is(hexes( 0xCFFF), "ec bf bf"); | |
66 | is(hexes( 0xD000), "ed 80 80"); | |
67 | is(hexes( 0xD7FF), "ed 9f bf"); | |
68 | is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin) | |
69 | is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end) | |
70 | is(hexes( 0xE000), "ee 80 80"); | |
71 | is(hexes( 0xFFFF), "ef bf bf"); | |
72 | is(hexes( 0x10000), "f0 90 80 80"); | |
73 | is(hexes( 0x3FFFF), "f0 bf bf bf"); | |
74 | is(hexes( 0x40000), "f1 80 80 80"); | |
75 | is(hexes( 0xFFFFF), "f3 bf bf bf"); | |
76 | is(hexes(0x100000), "f4 80 80 80"); | |
77 | is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point | |
78 | is(hexes(0x110000), "f4 90 80 80"); | |
79 | is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding | |
80 | is(hexes(0x200000), "f8 88 80 80 80"); | |
81 | } | |
b3fe8680 | 82 |