Commit | Line | Data |
---|---|---|
c8e3bb4c | 1 | # tr.t |
f4240379 | 2 | $|=1; |
c8e3bb4c | 3 | |
f05dd7cc JH |
4 | BEGIN { |
5 | chdir 't' if -d 't'; | |
953ab6e5 | 6 | require './test.pl'; |
43ece5b1 | 7 | set_up_inc('../lib'); |
47918419 JH |
8 | if (is_miniperl()) { |
9 | eval 'require utf8'; | |
caab73c0 | 10 | if ($@) { skip_all("miniperl, no 'utf8'") } |
47918419 | 11 | } |
f05dd7cc | 12 | } |
a5095b95 | 13 | |
47918419 JH |
14 | use utf8; |
15 | ||
fe2ba0a2 | 16 | plan tests => 216; |
953ab6e5 | 17 | |
f605e527 FC |
18 | # Test this first before we extend the stack with other operations. |
19 | # This caused an asan failure due to a bad write past the end of the stack. | |
20 | eval { my $x; die 1..127, $x =~ y/// }; | |
21 | ||
c8e3bb4c GS |
22 | $_ = "abcdefghijklmnopqrstuvwxyz"; |
23 | ||
24 | tr/a-z/A-Z/; | |
25 | ||
953ab6e5 | 26 | is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc'); |
c8e3bb4c GS |
27 | |
28 | tr/A-Z/a-z/; | |
29 | ||
953ab6e5 | 30 | is($_, "abcdefghijklmnopqrstuvwxyz", 'lc'); |
c8e3bb4c GS |
31 | |
32 | tr/b-y/B-Y/; | |
953ab6e5 | 33 | is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc'); |
c8e3bb4c | 34 | |
8efef67c KW |
35 | tr/a-a/AB/; |
36 | is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a'); | |
37 | ||
f4240379 KW |
38 | eval 'tr/a/\N{KATAKANA LETTER AINU P}/;'; |
39 | like $@, | |
92e8e650 | 40 | qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/, |
f4240379 KW |
41 | "Illegal to tr/// named sequence"; |
42 | ||
43 | eval 'tr/\x{101}-\x{100}//;'; | |
44 | like $@, | |
92e8e650 | 45 | qr/Invalid range "\\x\{0101\}-\\x\{0100\}" in transliteration operator/, |
f4240379 KW |
46 | "UTF-8 range with min > max"; |
47 | ||
48 | SKIP: { # Test literal range end point special handling | |
49 | unless ($::IS_EBCDIC) { | |
50 | skip "Valid only for EBCDIC", 24; | |
51 | } | |
52 | ||
53 | $_ = "\x89"; # is 'i' | |
54 | tr/i-j//d; | |
55 | is($_, "", '"\x89" should match [i-j]'); | |
56 | $_ = "\x8A"; | |
57 | tr/i-j//d; | |
58 | is($_, "\x8A", '"\x8A" shouldnt match [i-j]'); | |
59 | $_ = "\x90"; | |
60 | tr/i-j//d; | |
61 | is($_, "\x90", '"\x90" shouldnt match [i-j]'); | |
62 | $_ = "\x91"; # is 'j' | |
63 | tr/i-j//d; | |
64 | is($_, "", '"\x91" should match [i-j]'); | |
65 | ||
66 | $_ = "\x89"; | |
67 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
68 | is($_, "", '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); | |
69 | $_ = "\x8A"; | |
70 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
71 | is($_, "\x8A", '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); | |
72 | $_ = "\x90"; | |
73 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
74 | is($_, "\x90", '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); | |
75 | $_ = "\x91"; | |
76 | tr/i-\N{LATIN SMALL LETTER J}//d; | |
77 | is($_, "", '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); | |
78 | ||
79 | $_ = "\x89"; | |
80 | tr/i-\N{U+6A}//d; | |
81 | is($_, "", '"\x89" should match [i-\N{U+6A}]'); | |
82 | $_ = "\x8A"; | |
83 | tr/i-\N{U+6A}//d; | |
84 | is($_, "\x8A", '"\x8A" shouldnt match [i-\N{U+6A}]'); | |
85 | $_ = "\x90"; | |
86 | tr/i-\N{U+6A}//d; | |
87 | is($_, "\x90", '"\x90" shouldnt match [i-\N{U+6A}]'); | |
88 | $_ = "\x91"; | |
89 | tr/i-\N{U+6A}//d; | |
90 | is($_, "", '"\x91" should match [i-\N{U+6A}]'); | |
91 | ||
92 | $_ = "\x89"; | |
93 | tr/\N{U+69}-\N{U+6A}//d; | |
94 | is($_, "", '"\x89" should match [\N{U+69}-\N{U+6A}]'); | |
95 | $_ = "\x8A"; | |
96 | tr/\N{U+69}-\N{U+6A}//d; | |
97 | is($_, "\x8A", '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); | |
98 | $_ = "\x90"; | |
99 | tr/\N{U+69}-\N{U+6A}//d; | |
100 | is($_, "\x90", '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); | |
101 | $_ = "\x91"; | |
102 | tr/\N{U+69}-\N{U+6A}//d; | |
103 | is($_, "", '"\x91" should match [\N{U+69}-\N{U+6A}]'); | |
104 | ||
105 | $_ = "\x89"; | |
106 | tr/i-\x{91}//d; | |
107 | is($_, "", '"\x89" should match [i-\x{91}]'); | |
108 | $_ = "\x8A"; | |
109 | tr/i-\x{91}//d; | |
110 | is($_, "", '"\x8A" should match [i-\x{91}]'); | |
111 | $_ = "\x90"; | |
112 | tr/i-\x{91}//d; | |
113 | is($_, "", '"\x90" should match [i-\x{91}]'); | |
114 | $_ = "\x91"; | |
115 | tr/i-\x{91}//d; | |
116 | is($_, "", '"\x91" should match [i-\x{91}]'); | |
117 | ||
118 | # Need to use eval, because tries to compile on ASCII platforms even | |
119 | # though the tests are skipped, and fails because 0x89-j is an illegal | |
120 | # range there. | |
121 | $_ = "\x89"; | |
122 | eval 'tr/\x{89}-j//d'; | |
123 | is($_, "", '"\x89" should match [\x{89}-j]'); | |
124 | $_ = "\x8A"; | |
125 | eval 'tr/\x{89}-j//d'; | |
126 | is($_, "", '"\x8A" should match [\x{89}-j]'); | |
127 | $_ = "\x90"; | |
128 | eval 'tr/\x{89}-j//d'; | |
129 | is($_, "", '"\x90" should match [\x{89}-j]'); | |
130 | $_ = "\x91"; | |
131 | eval 'tr/\x{89}-j//d'; | |
132 | is($_, "", '"\x91" should match [\x{89}-j]'); | |
133 | } | |
134 | ||
c8e3bb4c GS |
135 | |
136 | # In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91. | |
137 | # Yes, discontinuities. Regardless, the \xca in the below should stay | |
138 | # untouched (and not became \x8a). | |
5e037136 | 139 | { |
5e037136 | 140 | $_ = "I\xcaJ"; |
c8e3bb4c | 141 | |
5e037136 | 142 | tr/I-J/i-j/; |
c8e3bb4c | 143 | |
ff36f15d | 144 | is($_, "i\xcaj", 'EBCDIC discontinuity'); |
5e037136 | 145 | } |
c8e3bb4c | 146 | # |
2de7b02f | 147 | |
2de7b02f GS |
148 | ($x = 12) =~ tr/1/3/; |
149 | (my $y = 12) =~ tr/1/3/; | |
150 | ($f = 1.5) =~ tr/1/3/; | |
151 | (my $g = 1.5) =~ tr/1/3/; | |
953ab6e5 MS |
152 | is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK'); |
153 | ||
bb16bae8 FC |
154 | # /r |
155 | $_ = 'adam'; | |
156 | is y/dam/ve/rd, 'eve', '/r'; | |
157 | is $_, 'adam', '/r leaves param alone'; | |
158 | $g = 'ruby'; | |
159 | is $g =~ y/bury/repl/r, 'perl', '/r with explicit param'; | |
160 | is $g, 'ruby', '/r leaves explicit param alone'; | |
161 | is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param'; | |
162 | ok !eval '$_ !~ y///r', "!~ y///r is forbidden"; | |
163 | like $@, qr\^Using !~ with tr///r doesn't make sense\, | |
164 | "!~ y///r error message"; | |
165 | { | |
166 | my $w; | |
167 | my $wc; | |
168 | local $SIG{__WARN__} = sub { $w = shift; ++$wc }; | |
169 | local $^W = 1; | |
170 | eval 'y///r; 1'; | |
171 | like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)', | |
172 | '/r warns in void context'; | |
173 | is $wc, 1, '/r warns just once'; | |
174 | } | |
2de7b02f | 175 | |
ee95e30c | 176 | # perlbug [ID 20000511.005 (#3237)] |
2de7b02f GS |
177 | $_ = 'fred'; |
178 | /([a-z]{2})/; | |
179 | $1 =~ tr/A-Z//; | |
180 | s/^(\s*)f/$1F/; | |
953ab6e5 MS |
181 | is($_, 'Fred', 'harmless if explicitly not updating'); |
182 | ||
183 | ||
184 | # A variant of the above, added in 5.7.2 | |
185 | $_ = 'fred'; | |
186 | /([a-z]{2})/; | |
187 | eval '$1 =~ tr/A-Z/A-Z/;'; | |
188 | s/^(\s*)f/$1F/; | |
189 | is($_, 'Fred', 'harmless if implicitly not updating'); | |
190 | is($@, '', ' no error'); | |
191 | ||
2de7b02f GS |
192 | |
193 | # check tr handles UTF8 correctly | |
194 | ($x = 256.65.258) =~ tr/a/b/; | |
953ab6e5 MS |
195 | is($x, 256.65.258, 'handles UTF8'); |
196 | is(length $x, 3); | |
197 | ||
2de7b02f | 198 | $x =~ tr/A/B/; |
953ab6e5 | 199 | is(length $x, 3); |
83bcbc61 | 200 | if ($::IS_ASCII) { # ASCII |
953ab6e5 | 201 | is($x, 256.66.258); |
67a17885 PP |
202 | } |
203 | else { | |
953ab6e5 | 204 | is($x, 256.65.258); |
67a17885 | 205 | } |
953ab6e5 | 206 | |
cbe7f703 PP |
207 | # EBCDIC variants of the above tests |
208 | ($x = 256.193.258) =~ tr/a/b/; | |
953ab6e5 MS |
209 | is(length $x, 3); |
210 | is($x, 256.193.258); | |
211 | ||
cbe7f703 | 212 | $x =~ tr/A/B/; |
953ab6e5 | 213 | is(length $x, 3); |
83bcbc61 | 214 | if ($::IS_ASCII) { # ASCII |
953ab6e5 | 215 | is($x, 256.193.258); |
cbe7f703 PP |
216 | } |
217 | else { | |
953ab6e5 | 218 | is($x, 256.194.258); |
cbe7f703 | 219 | } |
953ab6e5 | 220 | |
036b4402 GS |
221 | |
222 | { | |
953ab6e5 MS |
223 | my $l = chr(300); my $r = chr(400); |
224 | $x = 200.300.400; | |
225 | $x =~ tr/\x{12c}/\x{190}/; | |
226 | is($x, 200.400.400, | |
227 | 'changing UTF8 chars in a UTF8 string, same length'); | |
228 | is(length $x, 3); | |
229 | ||
230 | $x = 200.300.400; | |
231 | $x =~ tr/\x{12c}/\x{be8}/; | |
232 | is($x, 200.3048.400, ' more bytes'); | |
233 | is(length $x, 3); | |
234 | ||
235 | $x = 100.125.60; | |
236 | $x =~ tr/\x{64}/\x{190}/; | |
237 | is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string'); | |
238 | is(length $x, 3); | |
239 | ||
240 | $x = 400.125.60; | |
241 | $x =~ tr/\x{190}/\x{64}/; | |
242 | is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string'); | |
243 | is(length $x, 3); | |
244 | ||
245 | $x = 400.125.60.400; | |
246 | $y = $x =~ tr/\x{190}/\x{190}/; | |
247 | is($y, 2, 'Counting UTF8 chars in UTF8 string'); | |
248 | ||
249 | $x = 60.400.125.60.400; | |
250 | $y = $x =~ tr/\x{3c}/\x{3c}/; | |
251 | is($y, 2, ' non-UTF8 chars in UTF8 string'); | |
252 | ||
253 | # 17 - counting UTF8 chars in non-UTF8 string | |
254 | $x = 200.125.60; | |
255 | $y = $x =~ tr/\x{190}/\x{190}/; | |
256 | is($y, 0, ' UTF8 chars in non-UTFs string'); | |
036b4402 | 257 | } |
c2e66d9e | 258 | |
c2e66d9e | 259 | $_ = "abcdefghijklmnopqrstuvwxyz"; |
953ab6e5 MS |
260 | eval 'tr/a-z-9/ /'; |
261 | like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//'); | |
c2e66d9e | 262 | |
cbe7f703 | 263 | # 19-21: Make sure leading and trailing hyphens still work |
c2e66d9e GS |
264 | $_ = "car-rot9"; |
265 | tr/-a-m/./; | |
953ab6e5 | 266 | is($_, '..r.rot9', 'hyphens, leading'); |
c2e66d9e GS |
267 | |
268 | $_ = "car-rot9"; | |
269 | tr/a-m-/./; | |
953ab6e5 | 270 | is($_, '..r.rot9', ' trailing'); |
c2e66d9e GS |
271 | |
272 | $_ = "car-rot9"; | |
273 | tr/-a-m-/./; | |
953ab6e5 | 274 | is($_, '..r.rot9', ' both'); |
c2e66d9e GS |
275 | |
276 | $_ = "abcdefghijklmnop"; | |
277 | tr/ae-hn/./; | |
953ab6e5 | 278 | is($_, '.bcd....ijklm.op'); |
c2e66d9e GS |
279 | |
280 | $_ = "abcdefghijklmnop"; | |
281 | tr/a-cf-kn-p/./; | |
953ab6e5 | 282 | is($_, '...de......lm...'); |
c2e66d9e GS |
283 | |
284 | $_ = "abcdefghijklmnop"; | |
285 | tr/a-ceg-ikm-o/./; | |
953ab6e5 MS |
286 | is($_, '...d.f...j.l...p'); |
287 | ||
c2e66d9e | 288 | |
c2e66d9e GS |
289 | # 20000705 MJD |
290 | eval "tr/m-d/ /"; | |
321ecc04 | 291 | like($@, qr/^Invalid range "m-d" in transliteration operator/, |
953ab6e5 | 292 | 'reversed range check'); |
c2e66d9e | 293 | |
d897a58d | 294 | 'abcdef' =~ /(bcd)/; |
953ab6e5 MS |
295 | is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count'); |
296 | is($@, '', ' no error'); | |
d897a58d | 297 | |
953ab6e5 MS |
298 | 'abcdef' =~ /(bcd)/; |
299 | is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count'); | |
300 | is($@, '', ' no error'); | |
301 | ||
302 | is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr'); | |
d897a58d | 303 | |
94bfe852 | 304 | eval '"123" =~ tr/1/2/'; |
953ab6e5 MS |
305 | like($@, qr|^Can't modify constant item in transliteration \(tr///\)|, |
306 | 'LHS bad on updating tr'); | |
307 | ||
d897a58d | 308 | |
381d18bc JH |
309 | # v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac) |
310 | # v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90) | |
311 | ||
312 | # Transliterate a byte to a byte, all four ways. | |
313 | ||
314 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/; | |
953ab6e5 | 315 | is($a, v300.197.172.300.197.172, 'byte2byte transliteration'); |
381d18bc JH |
316 | |
317 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/; | |
953ab6e5 | 318 | is($a, v300.197.172.300.197.172); |
381d18bc JH |
319 | |
320 | ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/; | |
953ab6e5 | 321 | is($a, v300.197.172.300.197.172); |
381d18bc JH |
322 | |
323 | ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/; | |
953ab6e5 | 324 | is($a, v300.197.172.300.197.172); |
381d18bc | 325 | |
381d18bc JH |
326 | |
327 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/; | |
953ab6e5 | 328 | is($a, v300.301.172.300.301.172, 'byte2wide transliteration'); |
381d18bc JH |
329 | |
330 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/; | |
953ab6e5 | 331 | is($a, v195.196.172.195.196.172, ' wide2byte'); |
381d18bc JH |
332 | |
333 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/; | |
953ab6e5 | 334 | is($a, v301.196.172.301.196.172, ' wide2wide'); |
381d18bc | 335 | |
381d18bc JH |
336 | |
337 | ($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/; | |
953ab6e5 | 338 | is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte'); |
381d18bc | 339 | |
381d18bc JH |
340 | |
341 | ($a = v300.196.172.300.196.172.400.198.144) =~ | |
342 | tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/; | |
953ab6e5 | 343 | is($a, v197.301.173.197.301.173.401.198.144, 'all together now!'); |
381d18bc | 344 | |
381d18bc | 345 | |
953ab6e5 MS |
346 | is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2, |
347 | 'transliterate and count'); | |
381d18bc | 348 | |
953ab6e5 | 349 | is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2); |
381d18bc | 350 | |
381d18bc JH |
351 | |
352 | ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c; | |
953ab6e5 | 353 | is($a, v301.196.301.301.196.301, 'translit w/complement'); |
381d18bc JH |
354 | |
355 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c; | |
953ab6e5 | 356 | is($a, v300.197.197.300.197.197); |
381d18bc | 357 | |
381d18bc JH |
358 | |
359 | ($a = v300.196.172.300.196.172) =~ tr/\xc4//d; | |
953ab6e5 | 360 | is($a, v300.172.300.172, 'translit w/deletion'); |
381d18bc JH |
361 | |
362 | ($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d; | |
953ab6e5 | 363 | is($a, v196.172.196.172); |
381d18bc | 364 | |
381d18bc JH |
365 | |
366 | ($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s; | |
953ab6e5 | 367 | is($a, v197.172.300.300.197.172, 'translit w/squeeze'); |
381d18bc JH |
368 | |
369 | ($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s; | |
953ab6e5 | 370 | is($a, v196.172.301.196.172.172); |
381d18bc | 371 | |
a1874b66 | 372 | |
953ab6e5 | 373 | # Tricky cases (When Simon Cozens Attacks) |
a1874b66 | 374 | ($a = v196.172.200) =~ tr/\x{12c}/a/; |
953ab6e5 | 375 | is(sprintf("%vd", $a), '196.172.200'); |
a1874b66 JH |
376 | |
377 | ($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/; | |
953ab6e5 | 378 | is(sprintf("%vd", $a), '196.172.200'); |
a1874b66 JH |
379 | |
380 | ($a = v196.172.200) =~ tr/\x{12c}//d; | |
953ab6e5 MS |
381 | is(sprintf("%vd", $a), '196.172.200'); |
382 | ||
a1874b66 | 383 | |
8973db79 | 384 | # UTF8 range tests from Inaba Hiroto |
f9a63242 JH |
385 | |
386 | ($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/; | |
953ab6e5 | 387 | is($a, v192.196.172.194.197.172, 'UTF range'); |
f9a63242 JH |
388 | |
389 | ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/; | |
953ab6e5 MS |
390 | is($a, v300.300.172.302.301.172); |
391 | ||
8973db79 JH |
392 | |
393 | # UTF8 range tests from Karsten Sperling (patch #9008 required) | |
394 | ||
395 | ($a = "\x{0100}") =~ tr/\x00-\x{100}/X/; | |
953ab6e5 | 396 | is($a, "X"); |
8973db79 JH |
397 | |
398 | ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c; | |
953ab6e5 | 399 | is($a, "X"); |
8973db79 JH |
400 | |
401 | ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; | |
953ab6e5 | 402 | is($a, "X"); |
8973db79 JH |
403 | |
404 | ($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; | |
953ab6e5 MS |
405 | is($a, "X"); |
406 | ||
8973db79 | 407 | |
94472101 JH |
408 | # UTF8 range tests from Inaba Hiroto |
409 | ||
410 | ($a = "\x{200}") =~ tr/\x00-\x{100}/X/c; | |
953ab6e5 | 411 | is($a, "X"); |
94472101 JH |
412 | |
413 | ($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs; | |
953ab6e5 MS |
414 | is($a, "X"); |
415 | ||
685b06b5 KW |
416 | # Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters (as |
417 | # well as i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them, | |
6b6bd37b JH |
418 | # from Karsten Sperling. |
419 | ||
420 | $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/; | |
953ab6e5 MS |
421 | is($c, 8); |
422 | is($a, "XXXXXXXX"); | |
4c3a8340 | 423 | |
6b6bd37b | 424 | $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/; |
953ab6e5 MS |
425 | is($c, 8); |
426 | is($a, "XXXXXXXX"); | |
6b6bd37b | 427 | |
4c3a8340 | 428 | SKIP: { |
f4240379 | 429 | skip "EBCDIC-centric tests", 4 unless $::IS_EBCDIC; |
953ab6e5 MS |
430 | |
431 | $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/; | |
432 | is($c, 2); | |
433 | is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X"); | |
434 | ||
435 | $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/; | |
436 | is($c, 2); | |
437 | is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X"); | |
6b6bd37b | 438 | } |
1ed601ec JH |
439 | |
440 | ($a = "\x{100}") =~ tr/\x00-\xff/X/c; | |
953ab6e5 | 441 | is(ord($a), ord("X")); |
1ed601ec JH |
442 | |
443 | ($a = "\x{100}") =~ tr/\x00-\xff/X/cs; | |
953ab6e5 | 444 | is(ord($a), ord("X")); |
1ed601ec JH |
445 | |
446 | ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c; | |
953ab6e5 | 447 | is($a, "\x{100}\x{100}"); |
1ed601ec JH |
448 | |
449 | ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs; | |
953ab6e5 | 450 | is($a, "\x{100}"); |
1ed601ec | 451 | |
629b4584 | 452 | $a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/; |
953ab6e5 MS |
453 | is($a, "\x{1ff}\x{1fe}"); |
454 | ||
76ef7183 JH |
455 | |
456 | # From David Dyck | |
457 | ($a = "R0_001") =~ tr/R_//d; | |
953ab6e5 | 458 | is(hex($a), 1); |
76ef7183 | 459 | |
800b4dc4 JH |
460 | # From Inaba Hiroto |
461 | @a = (1,2); map { y/1/./ for $_ } @a; | |
953ab6e5 | 462 | is("@a", ". 2"); |
800b4dc4 JH |
463 | |
464 | @a = (1,2); map { y/1/./ for $_.'' } @a; | |
953ab6e5 MS |
465 | is("@a", "1 2"); |
466 | ||
800b4dc4 | 467 | |
bec89253 RH |
468 | # Additional test for Inaba Hiroto patch (robin@kitsite.com) |
469 | ($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c; | |
953ab6e5 MS |
470 | is($a, "XZY"); |
471 | ||
bec89253 | 472 | |
2233f375 NC |
473 | # Used to fail with "Modification of a read-only value attempted" |
474 | %a = (N=>1); | |
475 | foreach (keys %a) { | |
953ab6e5 MS |
476 | eval 'tr/N/n/'; |
477 | is($_, 'n', 'pp_trans needs to unshare shared hash keys'); | |
478 | is($@, '', ' no error'); | |
2233f375 | 479 | } |
94bfe852 | 480 | |
953ab6e5 | 481 | |
94bfe852 | 482 | $x = eval '"1213" =~ tr/1/1/'; |
953ab6e5 MS |
483 | is($x, 2, 'implicit count on constant'); |
484 | is($@, '', ' no error'); | |
485 | ||
486 | ||
487 | my @foo = (); | |
488 | eval '$foo[-1] =~ tr/N/N/'; | |
489 | is( $@, '', 'implicit count outside array bounds, index negative' ); | |
490 | is( scalar @foo, 0, " doesn't extend the array"); | |
491 | ||
492 | eval '$foo[1] =~ tr/N/N/'; | |
493 | is( $@, '', 'implicit count outside array bounds, index positive' ); | |
494 | is( scalar @foo, 0, " doesn't extend the array"); | |
495 | ||
496 | ||
497 | my %foo = (); | |
498 | eval '$foo{bar} =~ tr/N/N/'; | |
499 | is( $@, '', 'implicit count outside hash bounds' ); | |
500 | is( scalar keys %foo, 0, " doesn't extend the hash"); | |
d59e14db RGS |
501 | |
502 | $x = \"foo"; | |
503 | is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' ); | |
504 | is( ref $x, 'SCALAR', " doesn't stringify its argument" ); | |
0d65d7d5 MS |
505 | |
506 | # rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing | |
507 | # newline allowed. | |
2a91eb11 | 508 | fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file'); |
9f7f3913 TS |
509 | |
510 | ||
511 | { # [perl #38293] chr(65535) should be allowed in regexes | |
512 | no warnings 'utf8'; # to allow non-characters | |
513 | ||
514 | $s = "\x{d800}\x{ffff}"; | |
515 | $s =~ tr/\0/A/; | |
516 | is($s, "\x{d800}\x{ffff}", "do_trans_simple"); | |
517 | ||
518 | $s = "\x{d800}\x{ffff}"; | |
519 | $i = $s =~ tr/\0//; | |
520 | is($i, 0, "do_trans_count"); | |
521 | ||
522 | $s = "\x{d800}\x{ffff}"; | |
523 | $s =~ tr/\0/A/s; | |
524 | is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH"); | |
525 | ||
526 | $s = "\x{d800}\x{ffff}"; | |
527 | $s =~ tr/\0/A/c; | |
528 | is($s, "AA", "do_trans_complex, COMPLEMENT"); | |
529 | ||
530 | $s = "A\x{ffff}B"; | |
531 | $s =~ tr/\x{ffff}/\x{1ffff}/; | |
532 | is($s, "A\x{1ffff}B", "utf8, SEARCHLIST"); | |
533 | ||
534 | $s = "\x{fffd}\x{fffe}\x{ffff}"; | |
535 | $s =~ tr/\x{fffd}-\x{ffff}/ABC/; | |
536 | is($s, "ABC", "utf8, SEARCHLIST range"); | |
537 | ||
538 | $s = "ABC"; | |
539 | $s =~ tr/ABC/\x{ffff}/; | |
540 | is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST"); | |
541 | ||
542 | $s = "ABC"; | |
543 | $s =~ tr/ABC/\x{fffd}-\x{ffff}/; | |
544 | is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range"); | |
545 | ||
546 | $s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}"; | |
547 | $i = $s =~ tr/\x{ffff}//; | |
548 | is($i, 2, "utf8, count"); | |
549 | ||
550 | $s = "A\x{ffff}\x{ffff}C"; | |
551 | $s =~ tr/\x{ffff}/\x{100}/s; | |
552 | is($s, "A\x{100}C", "utf8, SQUASH"); | |
553 | ||
554 | $s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C"; | |
555 | $s =~ tr/\x{fffe}\x{ffff}//s; | |
556 | is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH"); | |
557 | ||
558 | $s = "xAABBBy"; | |
559 | $s =~ tr/AB/\x{ffff}/s; | |
560 | is($s, "x\x{ffff}y", "utf8, SQUASH"); | |
561 | ||
562 | $s = "xAABBBy"; | |
563 | $s =~ tr/AB/\x{fffe}\x{ffff}/s; | |
564 | is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH"); | |
565 | ||
566 | $s = "A\x{ffff}B\x{fffe}C"; | |
567 | $s =~ tr/\x{fffe}\x{ffff}/x/c; | |
568 | is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT"); | |
569 | ||
570 | $s = "A\x{10000}B\x{2abcd}C"; | |
571 | $s =~ tr/\0-\x{ffff}/x/c; | |
572 | is($s, "AxBxC", "utf8, COMPLEMENT range"); | |
573 | ||
574 | $s = "A\x{fffe}B\x{ffff}C"; | |
575 | $s =~ tr/\x{fffe}\x{ffff}/x/d; | |
576 | is($s, "AxBC", "utf8, DELETE"); | |
577 | ||
578 | } # non-characters end | |
579 | ||
1749ea0d TS |
580 | { # related to [perl #27940] |
581 | my $c; | |
582 | ||
583 | ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d; | |
584 | is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d"); | |
585 | ||
586 | ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d; | |
587 | is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d"); | |
588 | } | |
589 | ||
3788ef8f | 590 | ($s) = keys %{{pie => 3}}; |
3e89ba19 | 591 | SKIP: { |
e3918bb7 FC |
592 | if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 } |
593 | my $wasro = XS::APItest::SvIsCOW($s); | |
2203fb5a | 594 | ok $wasro, "have a COW"; |
3788ef8f | 595 | $s =~ tr/i//; |
e3918bb7 | 596 | ok( XS::APItest::SvIsCOW($s), |
3e89ba19 | 597 | "count-only tr doesn't deCOW COWs" ); |
3788ef8f | 598 | } |
a5446a64 DM |
599 | |
600 | # [ RT #61520 ] | |
601 | # | |
602 | # under threads, unicode tr within a cloned closure would SEGV or assert | |
603 | # fail, since the pointer in the pad to the swash was getting zeroed out | |
604 | # in the proto-CV | |
605 | ||
606 | { | |
607 | my $x = "\x{142}"; | |
608 | sub { | |
609 | $x =~ tr[\x{142}][\x{143}]; | |
610 | }->(); | |
611 | is($x,"\x{143}", "utf8 + closure"); | |
612 | } | |
613 | ||
9100eeb1 Z |
614 | # Freeing of trans ops prior to pmtrans() [perl #102858]. |
615 | eval q{ $a ~= tr/a/b/; }; | |
616 | ok 1; | |
617 | SKIP: { | |
55673181 | 618 | no warnings "deprecated"; |
9100eeb1 Z |
619 | skip "no encoding", 1 unless eval { require encoding; 1 }; |
620 | eval q{ use encoding "utf8"; $a ~= tr/a/b/; }; | |
621 | ok 1; | |
622 | } | |
a5446a64 | 623 | |
cb6d3474 KW |
624 | { # [perl #113584] |
625 | ||
626 | my $x = "Perlα"; | |
627 | $x =~ tr/αα/βγ/; | |
baacc348 | 628 | { no warnings 'utf8'; print "# $x\n"; } # No note() to avoid wide warning. |
cb6d3474 KW |
629 | is($x, "Perlβ", "Only first of multiple transliterations is used"); |
630 | } | |
631 | ||
d8b516a1 FC |
632 | # tr/a/b/ should fail even on zero-length read-only strings |
633 | use constant nullrocow => (keys%{{""=>undef}})[0]; | |
634 | for ("", nullrocow) { | |
635 | eval { $_ =~ y/a/b/ }; | |
636 | like $@, qr/^Modification of a read-only value attempted at /, | |
637 | 'tr/a/b/ fails on zero-length ro string'; | |
638 | } | |
639 | ||
a53bfdae HS |
640 | # Whether they're permitted or not, non-modifying tr/// should not write |
641 | # to read-only values, even with funky flags. | |
642 | { # [perl #123759] | |
643 | eval q{ ('a' =~ /./) =~ tr///d }; | |
644 | ok(1, "tr///d on PL_Yes does not assert"); | |
645 | eval q{ ('a' =~ /./) =~ tr/a-z/a-z/d }; | |
646 | ok(1, "tr/a-z/a-z/d on PL_Yes does not assert"); | |
647 | eval q{ ('a' =~ /./) =~ tr///s }; | |
648 | ok(1, "tr///s on PL_Yes does not assert"); | |
649 | eval q{ *x =~ tr///d }; | |
650 | ok(1, "tr///d on glob does not assert"); | |
651 | } | |
652 | ||
3a34ca0b | 653 | { # [perl #128734 |
ce4eed6e | 654 | my $string = chr utf8::unicode_to_native(0x00e0); |
3a34ca0b KW |
655 | $string =~ tr/\N{U+00e0}/A/; |
656 | is($string, "A", 'tr// of \N{U+...} works for upper-Latin1'); | |
ce4eed6e | 657 | my $string = chr utf8::unicode_to_native(0x00e1); |
3a34ca0b KW |
658 | $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/; |
659 | is($string, "A", 'tr// of \N{name} works for upper-Latin1'); | |
660 | } | |
661 | ||
2108cbcf DM |
662 | # RT #130198 |
663 | # a tr/// that is cho(m)ped, possibly with an array as arg | |
664 | ||
665 | { | |
666 | use warnings; | |
667 | ||
668 | my ($s, @a); | |
669 | ||
670 | my $warn; | |
671 | local $SIG{__WARN__ } = sub { $warn .= "@_" }; | |
672 | ||
673 | for my $c (qw(chop chomp)) { | |
674 | for my $bind ('', '$s =~ ', '@a =~ ') { | |
675 | for my $arg2 (qw(a b)) { | |
676 | for my $r ('', 'r') { | |
677 | $warn = ''; | |
678 | # tr/a/b/ modifies its LHS, so if the LHS is an | |
679 | # array, this should die. The special cases of tr/a/a/ | |
680 | # and tr/a/b/r don't modify their LHS, so instead | |
681 | # we croak because cho(m)p is trying to modify it. | |
682 | # | |
683 | my $exp = | |
684 | ($r eq '' && $arg2 eq 'b' && $bind =~ /\@a/) | |
685 | ? qr/Can't modify private array in transliteration/ | |
686 | : qr{Can't modify transliteration \(tr///\) in $c}; | |
687 | ||
688 | my $expr = "$c(${bind}tr/a/$arg2/$r);"; | |
689 | eval $expr; | |
690 | like $@, $exp, "RT #130198 eval: $expr"; | |
691 | ||
692 | $exp = | |
693 | $bind =~ /\@a/ | |
694 | ? qr{^Applying transliteration \(tr///\) to \@a will act on scalar\(\@a\)} | |
695 | : qr/^$/; | |
696 | like $warn, $exp, "RT #130198 warn: $expr"; | |
697 | } | |
698 | } | |
699 | } | |
700 | } | |
701 | ||
702 | ||
703 | } | |
704 | ||
fe2ba0a2 KW |
705 | { # [perl #130656] This bug happens when the tr is split across lines, so |
706 | # that the first line causes it to go into UTF-8, and the 2nd is only | |
707 | # things like \x | |
708 | my $x = "\x{E235}"; | |
709 | $x =~ tr | |
710 | [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}] | |
711 | [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}]; | |
712 | ||
713 | is $x, "\x{E5CE}", '[perl #130656]'; | |
714 | } | |
2108cbcf | 715 | |
9100eeb1 | 716 | 1; |