Commit | Line | Data |
---|---|---|
646ca15d JH |
1 | #!./perl |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = qw(. ../lib); # ../lib needed for test.deparse | |
6 | require "test.pl"; | |
7 | } | |
8 | ||
8a064bd6 | 9 | plan tests => 34; |
646ca15d JH |
10 | |
11 | # Note that t/op/ord.t already tests for chr() <-> ord() rountripping. | |
12 | ||
13 | # Don't assume ASCII. | |
14 | ||
15 | is(chr(ord("A")), "A"); | |
16 | ||
17 | is(chr( 0), "\x00"); | |
18 | is(chr(127), "\x7F"); | |
19 | is(chr(128), "\x80"); | |
20 | is(chr(255), "\xFF"); | |
21 | ||
8a064bd6 JH |
22 | is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character. |
23 | is(chr(-1 ), "\x{FFFD}"); | |
24 | is(chr(-2 ), "\x{FFFD}"); | |
25 | is(chr(-3.0), "\x{FFFD}"); | |
26 | { | |
27 | use bytes; # Backward compatibility. | |
28 | is(chr(-0.1), "\x00"); | |
29 | is(chr(-1 ), "\xFF"); | |
30 | is(chr(-2 ), "\xFE"); | |
31 | is(chr(-3.0), "\xFD"); | |
32 | } | |
646ca15d | 33 | |
4c5ed6e2 TS |
34 | # Check UTF-8 (not UTF-EBCDIC). |
35 | SKIP: { | |
36 | skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A'; | |
646ca15d | 37 | |
8a064bd6 JH |
38 | sub hexes { |
39 | no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings | |
1651fc44 | 40 | join(" ",unpack "U0 (H2)*", chr $_[0]); |
8a064bd6 | 41 | } |
646ca15d JH |
42 | |
43 | # The following code points are some interesting steps in UTF-8. | |
4c5ed6e2 TS |
44 | is(hexes( 0x100), "c4 80"); |
45 | is(hexes( 0x7FF), "df bf"); | |
46 | is(hexes( 0x800), "e0 a0 80"); | |
47 | is(hexes( 0xFFF), "e0 bf bf"); | |
48 | is(hexes( 0x1000), "e1 80 80"); | |
49 | is(hexes( 0xCFFF), "ec bf bf"); | |
50 | is(hexes( 0xD000), "ed 80 80"); | |
51 | is(hexes( 0xD7FF), "ed 9f bf"); | |
52 | is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin) | |
53 | is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end) | |
54 | is(hexes( 0xE000), "ee 80 80"); | |
55 | is(hexes( 0xFFFF), "ef bf bf"); | |
56 | is(hexes( 0x10000), "f0 90 80 80"); | |
57 | is(hexes( 0x3FFFF), "f0 bf bf bf"); | |
58 | is(hexes( 0x40000), "f1 80 80 80"); | |
59 | is(hexes( 0xFFFFF), "f3 bf bf bf"); | |
60 | is(hexes(0x100000), "f4 80 80 80"); | |
61 | is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point | |
62 | is(hexes(0x110000), "f4 90 80 80"); | |
63 | is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding | |
64 | is(hexes(0x200000), "f8 88 80 80 80"); | |
65 | } |