Commit | Line | Data |
---|---|---|
b0f2b690 JH |
1 | #!./perl |
2 | ||
2e3dedfe | 3 | print "1..51\n"; |
983ffd37 JH |
4 | |
5 | my $test = 1; | |
6 | ||
7 | sub ok { | |
8 | if ($_[0]) { | |
9 | if ($_[1]) { | |
10 | print "ok $test - $_[1]\n"; | |
11 | } else { | |
12 | print "ok $test\n"; | |
13 | } | |
14 | } else { | |
15 | if ($_[1]) { | |
16 | print "not ok $test - $_[1]\n"; | |
17 | } else { | |
18 | print "not ok $test\n"; | |
19 | } | |
20 | } | |
21 | $test++; | |
22 | } | |
b0f2b690 JH |
23 | |
24 | $a = "HELLO.* world"; | |
25 | $b = "hello.* WORLD"; | |
26 | ||
983ffd37 JH |
27 | ok("\Q$a\E." eq "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); |
28 | ok("\u$a" eq "HELLO\.\* world", '\u'); | |
29 | ok("\l$a" eq "hELLO\.\* world", '\l'); | |
30 | ok("\U$a" eq "HELLO\.\* WORLD", '\U'); | |
31 | ok("\L$a" eq "hello\.\* world", '\L'); | |
32 | ||
33 | ok(quotemeta($a) eq "HELLO\\.\\*\\ world", 'quotemeta'); | |
34 | ok(ucfirst($a) eq "HELLO\.\* world", 'ucfirst'); | |
35 | ok(lcfirst($a) eq "hELLO\.\* world", 'lcfirst'); | |
36 | ok(uc($a) eq "HELLO\.\* WORLD", 'uc'); | |
37 | ok(lc($a) eq "hello\.\* world", 'lc'); | |
38 | ||
39 | ok("\Q$b\E." eq "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); | |
40 | ok("\u$b" eq "Hello\.\* WORLD", '\u'); | |
41 | ok("\l$b" eq "hello\.\* WORLD", '\l'); | |
42 | ok("\U$b" eq "HELLO\.\* WORLD", '\U'); | |
43 | ok("\L$b" eq "hello\.\* world", '\L'); | |
44 | ||
45 | ok(quotemeta($b) eq "hello\\.\\*\\ WORLD", 'quotemeta'); | |
46 | ok(ucfirst($b) eq "Hello\.\* WORLD", 'ucfirst'); | |
47 | ok(lcfirst($b) eq "hello\.\* WORLD", 'lcfirst'); | |
48 | ok(uc($b) eq "HELLO\.\* WORLD", 'uc'); | |
49 | ok(lc($b) eq "hello\.\* world", 'lc'); | |
50 | ||
51 | # \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is | |
7e965bc5 | 52 | # \x{101}, LATIN SMALL LETTER A WITH MACRON. |
b0f2b690 | 53 | |
2533d950 JH |
54 | $a = "\x{100}\x{101}Aa"; |
55 | $b = "\x{101}\x{100}aA"; | |
b0f2b690 | 56 | |
2533d950 JH |
57 | ok("\Q$a\E." eq "\x{100}\x{101}Aa.", '\Q\E \x{100}\x{101}Aa'); |
58 | ok("\u$a" eq "\x{100}\x{101}Aa", '\u'); | |
59 | ok("\l$a" eq "\x{101}\x{101}Aa", '\l'); | |
60 | ok("\U$a" eq "\x{100}\x{100}AA", '\U'); | |
61 | ok("\L$a" eq "\x{101}\x{101}aa", '\L'); | |
983ffd37 | 62 | |
2533d950 JH |
63 | ok(quotemeta($a) eq "\x{100}\x{101}Aa", 'quotemeta'); |
64 | ok(ucfirst($a) eq "\x{100}\x{101}Aa", 'ucfirst'); | |
65 | ok(lcfirst($a) eq "\x{101}\x{101}Aa", 'lcfirst'); | |
66 | ok(uc($a) eq "\x{100}\x{100}AA", 'uc'); | |
67 | ok(lc($a) eq "\x{101}\x{101}aa", 'lc'); | |
983ffd37 | 68 | |
2533d950 JH |
69 | ok("\Q$b\E." eq "\x{101}\x{100}aA.", '\Q\E \x{101}\x{100}aA'); |
70 | ok("\u$b" eq "\x{100}\x{100}aA", '\u'); | |
71 | ok("\l$b" eq "\x{101}\x{100}aA", '\l'); | |
72 | ok("\U$b" eq "\x{100}\x{100}AA", '\U'); | |
73 | ok("\L$b" eq "\x{101}\x{101}aa", '\L'); | |
983ffd37 | 74 | |
2533d950 JH |
75 | ok(quotemeta($b) eq "\x{101}\x{100}aA", 'quotemeta'); |
76 | ok(ucfirst($b) eq "\x{100}\x{100}aA", 'ucfirst'); | |
77 | ok(lcfirst($b) eq "\x{101}\x{100}aA", 'lcfirst'); | |
78 | ok(uc($b) eq "\x{100}\x{100}AA", 'uc'); | |
79 | ok(lc($b) eq "\x{101}\x{101}aa", 'lc'); | |
983ffd37 JH |
80 | |
81 | # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; | |
82 | # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is | |
83 | # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. | |
84 | ||
c811e616 JH |
85 | # In EBCDIC \x{DF} is LATIN SMALL LETTER Y WITH DIAERESIS, |
86 | # and it's uppercase is \x{178}, LATIN CAPITAL LETTER Y WITH DIAERESIS. | |
87 | ||
88 | if (ord("A") == 193) { # EBCDIC | |
1b680483 | 89 | ok("\U\x{DF}aB\x{149}cD" eq "\x{178}AB\x{2BC}NCD", |
c811e616 JH |
90 | "multicharacter uppercase"); |
91 | } elsif (ord("A") == 65) { | |
1b680483 | 92 | ok("\U\x{DF}aB\x{149}cD" eq "SSAB\x{2BC}NCD", |
c811e616 JH |
93 | "multicharacter uppercase"); |
94 | } else { | |
95 | ok(0, "what is your encoding?"); | |
96 | } | |
983ffd37 JH |
97 | |
98 | # The \x{DF} is its own lowercase, ditto for \x{149}. | |
99 | # There are no single character -> multiple characters lowercase mappings. | |
b0f2b690 | 100 | |
c811e616 | 101 | if (ord("A") == 193) { # EBCDIC |
1b680483 | 102 | ok("\LaB\x{149}cD" eq "ab\x{149}cd", |
c811e616 JH |
103 | "multicharacter lowercase"); |
104 | } elsif (ord("A") == 65) { | |
1b680483 | 105 | ok("\L\x{DF}aB\x{149}cD" eq "\x{DF}ab\x{149}cd", |
c811e616 JH |
106 | "multicharacter lowercase"); |
107 | } else { | |
108 | ok(0, "what is your encoding?"); | |
109 | } | |
b0f2b690 | 110 | |
44bc797b JH |
111 | # titlecase is used for \u / ucfirst. |
112 | ||
113 | # \x{587} is ARMENIAN SMALL LIGATURE ECH YIWN and its titlecase is | |
114 | # \x{535}\x{582} ARMENIAN CAPITAL LETTER ECH + ARMENIAN SMALL LETTER YIWN | |
115 | # while its lowercase is | |
116 | # \x{587} itself | |
117 | # and its uppercase is | |
118 | # \x{535}\x{552} ARMENIAN CAPITAL LETTER ECH + ARMENIAN CAPITAL LETTER YIWN | |
119 | ||
120 | $a = "\x{587}"; | |
121 | ||
122 | ok("\L\x{587}" eq "\x{587}", "ligature lowercase"); | |
123 | ok("\u\x{587}" eq "\x{535}\x{582}", "ligature titlecase"); | |
124 | ok("\U\x{587}" eq "\x{535}\x{552}", "ligature uppercase"); | |
125 | ||
2e3dedfe JH |
126 | # mktables had problems where many-to-one case mappings didn't work right. |
127 | # The lib/unifold.t should give the fourth folding, "casefolding", a good | |
128 | # workout. | |
129 | ||
130 | ok(lc("\x{1C4}") eq "\x{1C6}", "U+01C4 lc is U+01C6"); | |
131 | ok(lc("\x{1C5}") eq "\x{1C6}", "U+01C5 lc is U+01C6, too"); | |
132 | ||
133 | ok(ucfirst("\x{3C2}") eq "\x{3A3}", "U+03C2 ucfirst is U+03A3"); | |
134 | ok(ucfirst("\x{3C3}") eq "\x{3A3}", "U+03C3 ucfirst is U+03A3, too"); | |
135 | ||
136 | ok(uc("\x{1C5}") eq "\x{1C4}", "U+01C5 uc is U+01C4"); | |
137 | ok(uc("\x{1C6}") eq "\x{1C4}", "U+01C6 uc is U+01C4, too"); | |
138 |