This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix utf char > IV_MAX on 32-bit platforms
[perl5.git] / t / op / tr.t
CommitLineData
c8e3bb4c 1# tr.t
f4240379 2$|=1;
c8e3bb4c 3
f05dd7cc
JH
4BEGIN {
5 chdir 't' if -d 't';
953ab6e5 6 require './test.pl';
43ece5b1 7 set_up_inc('../lib');
47918419
JH
8 if (is_miniperl()) {
9 eval 'require utf8';
caab73c0 10 if ($@) { skip_all("miniperl, no 'utf8'") }
47918419 11 }
f05dd7cc 12}
a5095b95 13
47918419
JH
14use utf8;
15
fe2ba0a2 16plan tests => 216;
953ab6e5 17
f605e527
FC
18# Test this first before we extend the stack with other operations.
19# This caused an asan failure due to a bad write past the end of the stack.
20eval { my $x; die 1..127, $x =~ y/// };
21
c8e3bb4c
GS
22$_ = "abcdefghijklmnopqrstuvwxyz";
23
24tr/a-z/A-Z/;
25
953ab6e5 26is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc');
c8e3bb4c
GS
27
28tr/A-Z/a-z/;
29
953ab6e5 30is($_, "abcdefghijklmnopqrstuvwxyz", 'lc');
c8e3bb4c
GS
31
32tr/b-y/B-Y/;
953ab6e5 33is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
c8e3bb4c 34
8efef67c
KW
35tr/a-a/AB/;
36is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a');
37
f4240379
KW
38eval 'tr/a/\N{KATAKANA LETTER AINU P}/;';
39like $@,
92e8e650 40 qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/,
f4240379
KW
41 "Illegal to tr/// named sequence";
42
43eval 'tr/\x{101}-\x{100}//;';
44like $@,
92e8e650 45 qr/Invalid range "\\x\{0101\}-\\x\{0100\}" in transliteration operator/,
f4240379
KW
46 "UTF-8 range with min > max";
47
48SKIP: { # Test literal range end point special handling
49 unless ($::IS_EBCDIC) {
50 skip "Valid only for EBCDIC", 24;
51 }
52
53 $_ = "\x89"; # is 'i'
54 tr/i-j//d;
55 is($_, "", '"\x89" should match [i-j]');
56 $_ = "\x8A";
57 tr/i-j//d;
58 is($_, "\x8A", '"\x8A" shouldnt match [i-j]');
59 $_ = "\x90";
60 tr/i-j//d;
61 is($_, "\x90", '"\x90" shouldnt match [i-j]');
62 $_ = "\x91"; # is 'j'
63 tr/i-j//d;
64 is($_, "", '"\x91" should match [i-j]');
65
66 $_ = "\x89";
67 tr/i-\N{LATIN SMALL LETTER J}//d;
68 is($_, "", '"\x89" should match [i-\N{LATIN SMALL LETTER J}]');
69 $_ = "\x8A";
70 tr/i-\N{LATIN SMALL LETTER J}//d;
71 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
72 $_ = "\x90";
73 tr/i-\N{LATIN SMALL LETTER J}//d;
74 is($_, "\x90", '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
75 $_ = "\x91";
76 tr/i-\N{LATIN SMALL LETTER J}//d;
77 is($_, "", '"\x91" should match [i-\N{LATIN SMALL LETTER J}]');
78
79 $_ = "\x89";
80 tr/i-\N{U+6A}//d;
81 is($_, "", '"\x89" should match [i-\N{U+6A}]');
82 $_ = "\x8A";
83 tr/i-\N{U+6A}//d;
84 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{U+6A}]');
85 $_ = "\x90";
86 tr/i-\N{U+6A}//d;
87 is($_, "\x90", '"\x90" shouldnt match [i-\N{U+6A}]');
88 $_ = "\x91";
89 tr/i-\N{U+6A}//d;
90 is($_, "", '"\x91" should match [i-\N{U+6A}]');
91
92 $_ = "\x89";
93 tr/\N{U+69}-\N{U+6A}//d;
94 is($_, "", '"\x89" should match [\N{U+69}-\N{U+6A}]');
95 $_ = "\x8A";
96 tr/\N{U+69}-\N{U+6A}//d;
97 is($_, "\x8A", '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]');
98 $_ = "\x90";
99 tr/\N{U+69}-\N{U+6A}//d;
100 is($_, "\x90", '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]');
101 $_ = "\x91";
102 tr/\N{U+69}-\N{U+6A}//d;
103 is($_, "", '"\x91" should match [\N{U+69}-\N{U+6A}]');
104
105 $_ = "\x89";
106 tr/i-\x{91}//d;
107 is($_, "", '"\x89" should match [i-\x{91}]');
108 $_ = "\x8A";
109 tr/i-\x{91}//d;
110 is($_, "", '"\x8A" should match [i-\x{91}]');
111 $_ = "\x90";
112 tr/i-\x{91}//d;
113 is($_, "", '"\x90" should match [i-\x{91}]');
114 $_ = "\x91";
115 tr/i-\x{91}//d;
116 is($_, "", '"\x91" should match [i-\x{91}]');
117
118 # Need to use eval, because tries to compile on ASCII platforms even
119 # though the tests are skipped, and fails because 0x89-j is an illegal
120 # range there.
121 $_ = "\x89";
122 eval 'tr/\x{89}-j//d';
123 is($_, "", '"\x89" should match [\x{89}-j]');
124 $_ = "\x8A";
125 eval 'tr/\x{89}-j//d';
126 is($_, "", '"\x8A" should match [\x{89}-j]');
127 $_ = "\x90";
128 eval 'tr/\x{89}-j//d';
129 is($_, "", '"\x90" should match [\x{89}-j]');
130 $_ = "\x91";
131 eval 'tr/\x{89}-j//d';
132 is($_, "", '"\x91" should match [\x{89}-j]');
133}
134
c8e3bb4c
GS
135
136# In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91.
137# Yes, discontinuities. Regardless, the \xca in the below should stay
138# untouched (and not became \x8a).
5e037136 139{
5e037136 140 $_ = "I\xcaJ";
c8e3bb4c 141
5e037136 142 tr/I-J/i-j/;
c8e3bb4c 143
ff36f15d 144 is($_, "i\xcaj", 'EBCDIC discontinuity');
5e037136 145}
c8e3bb4c 146#
2de7b02f 147
2de7b02f
GS
148($x = 12) =~ tr/1/3/;
149(my $y = 12) =~ tr/1/3/;
150($f = 1.5) =~ tr/1/3/;
151(my $g = 1.5) =~ tr/1/3/;
953ab6e5
MS
152is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK');
153
bb16bae8
FC
154# /r
155$_ = 'adam';
156is y/dam/ve/rd, 'eve', '/r';
157is $_, 'adam', '/r leaves param alone';
158$g = 'ruby';
159is $g =~ y/bury/repl/r, 'perl', '/r with explicit param';
160is $g, 'ruby', '/r leaves explicit param alone';
161is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param';
162ok !eval '$_ !~ y///r', "!~ y///r is forbidden";
163like $@, qr\^Using !~ with tr///r doesn't make sense\,
164 "!~ y///r error message";
165{
166 my $w;
167 my $wc;
168 local $SIG{__WARN__} = sub { $w = shift; ++$wc };
169 local $^W = 1;
170 eval 'y///r; 1';
171 like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)',
172 '/r warns in void context';
173 is $wc, 1, '/r warns just once';
174}
2de7b02f 175
ee95e30c 176# perlbug [ID 20000511.005 (#3237)]
2de7b02f
GS
177$_ = 'fred';
178/([a-z]{2})/;
179$1 =~ tr/A-Z//;
180s/^(\s*)f/$1F/;
953ab6e5
MS
181is($_, 'Fred', 'harmless if explicitly not updating');
182
183
184# A variant of the above, added in 5.7.2
185$_ = 'fred';
186/([a-z]{2})/;
187eval '$1 =~ tr/A-Z/A-Z/;';
188s/^(\s*)f/$1F/;
189is($_, 'Fred', 'harmless if implicitly not updating');
190is($@, '', ' no error');
191
2de7b02f
GS
192
193# check tr handles UTF8 correctly
194($x = 256.65.258) =~ tr/a/b/;
953ab6e5
MS
195is($x, 256.65.258, 'handles UTF8');
196is(length $x, 3);
197
2de7b02f 198$x =~ tr/A/B/;
953ab6e5 199is(length $x, 3);
83bcbc61 200if ($::IS_ASCII) { # ASCII
953ab6e5 201 is($x, 256.66.258);
67a17885
PP
202}
203else {
953ab6e5 204 is($x, 256.65.258);
67a17885 205}
953ab6e5 206
cbe7f703
PP
207# EBCDIC variants of the above tests
208($x = 256.193.258) =~ tr/a/b/;
953ab6e5
MS
209is(length $x, 3);
210is($x, 256.193.258);
211
cbe7f703 212$x =~ tr/A/B/;
953ab6e5 213is(length $x, 3);
83bcbc61 214if ($::IS_ASCII) { # ASCII
953ab6e5 215 is($x, 256.193.258);
cbe7f703
PP
216}
217else {
953ab6e5 218 is($x, 256.194.258);
cbe7f703 219}
953ab6e5 220
036b4402
GS
221
222{
953ab6e5
MS
223 my $l = chr(300); my $r = chr(400);
224 $x = 200.300.400;
225 $x =~ tr/\x{12c}/\x{190}/;
226 is($x, 200.400.400,
227 'changing UTF8 chars in a UTF8 string, same length');
228 is(length $x, 3);
229
230 $x = 200.300.400;
231 $x =~ tr/\x{12c}/\x{be8}/;
232 is($x, 200.3048.400, ' more bytes');
233 is(length $x, 3);
234
235 $x = 100.125.60;
236 $x =~ tr/\x{64}/\x{190}/;
237 is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string');
238 is(length $x, 3);
239
240 $x = 400.125.60;
241 $x =~ tr/\x{190}/\x{64}/;
242 is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string');
243 is(length $x, 3);
244
245 $x = 400.125.60.400;
246 $y = $x =~ tr/\x{190}/\x{190}/;
247 is($y, 2, 'Counting UTF8 chars in UTF8 string');
248
249 $x = 60.400.125.60.400;
250 $y = $x =~ tr/\x{3c}/\x{3c}/;
251 is($y, 2, ' non-UTF8 chars in UTF8 string');
252
253 # 17 - counting UTF8 chars in non-UTF8 string
254 $x = 200.125.60;
255 $y = $x =~ tr/\x{190}/\x{190}/;
256 is($y, 0, ' UTF8 chars in non-UTFs string');
036b4402 257}
c2e66d9e 258
c2e66d9e 259$_ = "abcdefghijklmnopqrstuvwxyz";
953ab6e5
MS
260eval 'tr/a-z-9/ /';
261like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//');
c2e66d9e 262
cbe7f703 263# 19-21: Make sure leading and trailing hyphens still work
c2e66d9e
GS
264$_ = "car-rot9";
265tr/-a-m/./;
953ab6e5 266is($_, '..r.rot9', 'hyphens, leading');
c2e66d9e
GS
267
268$_ = "car-rot9";
269tr/a-m-/./;
953ab6e5 270is($_, '..r.rot9', ' trailing');
c2e66d9e
GS
271
272$_ = "car-rot9";
273tr/-a-m-/./;
953ab6e5 274is($_, '..r.rot9', ' both');
c2e66d9e
GS
275
276$_ = "abcdefghijklmnop";
277tr/ae-hn/./;
953ab6e5 278is($_, '.bcd....ijklm.op');
c2e66d9e
GS
279
280$_ = "abcdefghijklmnop";
281tr/a-cf-kn-p/./;
953ab6e5 282is($_, '...de......lm...');
c2e66d9e
GS
283
284$_ = "abcdefghijklmnop";
285tr/a-ceg-ikm-o/./;
953ab6e5
MS
286is($_, '...d.f...j.l...p');
287
c2e66d9e 288
c2e66d9e
GS
289# 20000705 MJD
290eval "tr/m-d/ /";
321ecc04 291like($@, qr/^Invalid range "m-d" in transliteration operator/,
953ab6e5 292 'reversed range check');
c2e66d9e 293
d897a58d 294'abcdef' =~ /(bcd)/;
953ab6e5
MS
295is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count');
296is($@, '', ' no error');
d897a58d 297
953ab6e5
MS
298'abcdef' =~ /(bcd)/;
299is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count');
300is($@, '', ' no error');
301
302is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr');
d897a58d 303
94bfe852 304eval '"123" =~ tr/1/2/';
953ab6e5
MS
305like($@, qr|^Can't modify constant item in transliteration \(tr///\)|,
306 'LHS bad on updating tr');
307
d897a58d 308
381d18bc
JH
309# v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac)
310# v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90)
311
312# Transliterate a byte to a byte, all four ways.
313
314($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/;
953ab6e5 315is($a, v300.197.172.300.197.172, 'byte2byte transliteration');
381d18bc
JH
316
317($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/;
953ab6e5 318is($a, v300.197.172.300.197.172);
381d18bc
JH
319
320($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/;
953ab6e5 321is($a, v300.197.172.300.197.172);
381d18bc
JH
322
323($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/;
953ab6e5 324is($a, v300.197.172.300.197.172);
381d18bc 325
381d18bc
JH
326
327($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/;
953ab6e5 328is($a, v300.301.172.300.301.172, 'byte2wide transliteration');
381d18bc
JH
329
330($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/;
953ab6e5 331is($a, v195.196.172.195.196.172, ' wide2byte');
381d18bc
JH
332
333($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/;
953ab6e5 334is($a, v301.196.172.301.196.172, ' wide2wide');
381d18bc 335
381d18bc
JH
336
337($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/;
953ab6e5 338is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte');
381d18bc 339
381d18bc
JH
340
341($a = v300.196.172.300.196.172.400.198.144) =~
342 tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/;
953ab6e5 343is($a, v197.301.173.197.301.173.401.198.144, 'all together now!');
381d18bc 344
381d18bc 345
953ab6e5
MS
346is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2,
347 'transliterate and count');
381d18bc 348
953ab6e5 349is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2);
381d18bc 350
381d18bc
JH
351
352($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c;
953ab6e5 353is($a, v301.196.301.301.196.301, 'translit w/complement');
381d18bc
JH
354
355($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c;
953ab6e5 356is($a, v300.197.197.300.197.197);
381d18bc 357
381d18bc
JH
358
359($a = v300.196.172.300.196.172) =~ tr/\xc4//d;
953ab6e5 360is($a, v300.172.300.172, 'translit w/deletion');
381d18bc
JH
361
362($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d;
953ab6e5 363is($a, v196.172.196.172);
381d18bc 364
381d18bc
JH
365
366($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s;
953ab6e5 367is($a, v197.172.300.300.197.172, 'translit w/squeeze');
381d18bc
JH
368
369($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s;
953ab6e5 370is($a, v196.172.301.196.172.172);
381d18bc 371
a1874b66 372
953ab6e5 373# Tricky cases (When Simon Cozens Attacks)
a1874b66 374($a = v196.172.200) =~ tr/\x{12c}/a/;
953ab6e5 375is(sprintf("%vd", $a), '196.172.200');
a1874b66
JH
376
377($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/;
953ab6e5 378is(sprintf("%vd", $a), '196.172.200');
a1874b66
JH
379
380($a = v196.172.200) =~ tr/\x{12c}//d;
953ab6e5
MS
381is(sprintf("%vd", $a), '196.172.200');
382
a1874b66 383
8973db79 384# UTF8 range tests from Inaba Hiroto
f9a63242
JH
385
386($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
953ab6e5 387is($a, v192.196.172.194.197.172, 'UTF range');
f9a63242
JH
388
389($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
953ab6e5
MS
390is($a, v300.300.172.302.301.172);
391
8973db79
JH
392
393# UTF8 range tests from Karsten Sperling (patch #9008 required)
394
395($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
953ab6e5 396is($a, "X");
8973db79
JH
397
398($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
953ab6e5 399is($a, "X");
8973db79
JH
400
401($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
953ab6e5 402is($a, "X");
8973db79
JH
403
404($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
953ab6e5
MS
405is($a, "X");
406
8973db79 407
94472101
JH
408# UTF8 range tests from Inaba Hiroto
409
410($a = "\x{200}") =~ tr/\x00-\x{100}/X/c;
953ab6e5 411is($a, "X");
94472101
JH
412
413($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs;
953ab6e5
MS
414is($a, "X");
415
685b06b5
KW
416# Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters (as
417# well as i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them,
6b6bd37b
JH
418# from Karsten Sperling.
419
420$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
953ab6e5
MS
421is($c, 8);
422is($a, "XXXXXXXX");
4c3a8340 423
6b6bd37b 424$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
953ab6e5
MS
425is($c, 8);
426is($a, "XXXXXXXX");
6b6bd37b 427
4c3a8340 428SKIP: {
f4240379 429 skip "EBCDIC-centric tests", 4 unless $::IS_EBCDIC;
953ab6e5
MS
430
431 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
432 is($c, 2);
433 is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X");
434
435 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
436 is($c, 2);
437 is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X");
6b6bd37b 438}
1ed601ec
JH
439
440($a = "\x{100}") =~ tr/\x00-\xff/X/c;
953ab6e5 441is(ord($a), ord("X"));
1ed601ec
JH
442
443($a = "\x{100}") =~ tr/\x00-\xff/X/cs;
953ab6e5 444is(ord($a), ord("X"));
1ed601ec
JH
445
446($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c;
953ab6e5 447is($a, "\x{100}\x{100}");
1ed601ec
JH
448
449($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs;
953ab6e5 450is($a, "\x{100}");
1ed601ec 451
629b4584 452$a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/;
953ab6e5
MS
453is($a, "\x{1ff}\x{1fe}");
454
76ef7183
JH
455
456# From David Dyck
457($a = "R0_001") =~ tr/R_//d;
953ab6e5 458is(hex($a), 1);
76ef7183 459
800b4dc4
JH
460# From Inaba Hiroto
461@a = (1,2); map { y/1/./ for $_ } @a;
953ab6e5 462is("@a", ". 2");
800b4dc4
JH
463
464@a = (1,2); map { y/1/./ for $_.'' } @a;
953ab6e5
MS
465is("@a", "1 2");
466
800b4dc4 467
bec89253
RH
468# Additional test for Inaba Hiroto patch (robin@kitsite.com)
469($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c;
953ab6e5
MS
470is($a, "XZY");
471
bec89253 472
2233f375
NC
473# Used to fail with "Modification of a read-only value attempted"
474%a = (N=>1);
475foreach (keys %a) {
953ab6e5
MS
476 eval 'tr/N/n/';
477 is($_, 'n', 'pp_trans needs to unshare shared hash keys');
478 is($@, '', ' no error');
2233f375 479}
94bfe852 480
953ab6e5 481
94bfe852 482$x = eval '"1213" =~ tr/1/1/';
953ab6e5
MS
483is($x, 2, 'implicit count on constant');
484is($@, '', ' no error');
485
486
487my @foo = ();
488eval '$foo[-1] =~ tr/N/N/';
489is( $@, '', 'implicit count outside array bounds, index negative' );
490is( scalar @foo, 0, " doesn't extend the array");
491
492eval '$foo[1] =~ tr/N/N/';
493is( $@, '', 'implicit count outside array bounds, index positive' );
494is( scalar @foo, 0, " doesn't extend the array");
495
496
497my %foo = ();
498eval '$foo{bar} =~ tr/N/N/';
499is( $@, '', 'implicit count outside hash bounds' );
500is( scalar keys %foo, 0, " doesn't extend the hash");
d59e14db
RGS
501
502$x = \"foo";
503is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' );
504is( ref $x, 'SCALAR', " doesn't stringify its argument" );
0d65d7d5
MS
505
506# rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing
507# newline allowed.
2a91eb11 508fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file');
9f7f3913
TS
509
510
511{ # [perl #38293] chr(65535) should be allowed in regexes
512no warnings 'utf8'; # to allow non-characters
513
514$s = "\x{d800}\x{ffff}";
515$s =~ tr/\0/A/;
516is($s, "\x{d800}\x{ffff}", "do_trans_simple");
517
518$s = "\x{d800}\x{ffff}";
519$i = $s =~ tr/\0//;
520is($i, 0, "do_trans_count");
521
522$s = "\x{d800}\x{ffff}";
523$s =~ tr/\0/A/s;
524is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH");
525
526$s = "\x{d800}\x{ffff}";
527$s =~ tr/\0/A/c;
528is($s, "AA", "do_trans_complex, COMPLEMENT");
529
530$s = "A\x{ffff}B";
531$s =~ tr/\x{ffff}/\x{1ffff}/;
532is($s, "A\x{1ffff}B", "utf8, SEARCHLIST");
533
534$s = "\x{fffd}\x{fffe}\x{ffff}";
535$s =~ tr/\x{fffd}-\x{ffff}/ABC/;
536is($s, "ABC", "utf8, SEARCHLIST range");
537
538$s = "ABC";
539$s =~ tr/ABC/\x{ffff}/;
540is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST");
541
542$s = "ABC";
543$s =~ tr/ABC/\x{fffd}-\x{ffff}/;
544is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range");
545
546$s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}";
547$i = $s =~ tr/\x{ffff}//;
548is($i, 2, "utf8, count");
549
550$s = "A\x{ffff}\x{ffff}C";
551$s =~ tr/\x{ffff}/\x{100}/s;
552is($s, "A\x{100}C", "utf8, SQUASH");
553
554$s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C";
555$s =~ tr/\x{fffe}\x{ffff}//s;
556is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH");
557
558$s = "xAABBBy";
559$s =~ tr/AB/\x{ffff}/s;
560is($s, "x\x{ffff}y", "utf8, SQUASH");
561
562$s = "xAABBBy";
563$s =~ tr/AB/\x{fffe}\x{ffff}/s;
564is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH");
565
566$s = "A\x{ffff}B\x{fffe}C";
567$s =~ tr/\x{fffe}\x{ffff}/x/c;
568is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT");
569
570$s = "A\x{10000}B\x{2abcd}C";
571$s =~ tr/\0-\x{ffff}/x/c;
572is($s, "AxBxC", "utf8, COMPLEMENT range");
573
574$s = "A\x{fffe}B\x{ffff}C";
575$s =~ tr/\x{fffe}\x{ffff}/x/d;
576is($s, "AxBC", "utf8, DELETE");
577
578} # non-characters end
579
1749ea0d
TS
580{ # related to [perl #27940]
581 my $c;
582
583 ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d;
584 is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d");
585
586 ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d;
587 is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d");
588}
589
3788ef8f 590($s) = keys %{{pie => 3}};
3e89ba19 591SKIP: {
e3918bb7
FC
592 if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 }
593 my $wasro = XS::APItest::SvIsCOW($s);
2203fb5a 594 ok $wasro, "have a COW";
3788ef8f 595 $s =~ tr/i//;
e3918bb7 596 ok( XS::APItest::SvIsCOW($s),
3e89ba19 597 "count-only tr doesn't deCOW COWs" );
3788ef8f 598}
a5446a64
DM
599
600# [ RT #61520 ]
601#
602# under threads, unicode tr within a cloned closure would SEGV or assert
603# fail, since the pointer in the pad to the swash was getting zeroed out
604# in the proto-CV
605
606{
607 my $x = "\x{142}";
608 sub {
609 $x =~ tr[\x{142}][\x{143}];
610 }->();
611 is($x,"\x{143}", "utf8 + closure");
612}
613
9100eeb1
Z
614# Freeing of trans ops prior to pmtrans() [perl #102858].
615eval q{ $a ~= tr/a/b/; };
616ok 1;
617SKIP: {
55673181 618 no warnings "deprecated";
9100eeb1
Z
619 skip "no encoding", 1 unless eval { require encoding; 1 };
620 eval q{ use encoding "utf8"; $a ~= tr/a/b/; };
621 ok 1;
622}
a5446a64 623
cb6d3474
KW
624{ # [perl #113584]
625
626 my $x = "Perlα";
627 $x =~ tr/αα/βγ/;
baacc348 628 { no warnings 'utf8'; print "# $x\n"; } # No note() to avoid wide warning.
cb6d3474
KW
629 is($x, "Perlβ", "Only first of multiple transliterations is used");
630}
631
d8b516a1
FC
632# tr/a/b/ should fail even on zero-length read-only strings
633use constant nullrocow => (keys%{{""=>undef}})[0];
634for ("", nullrocow) {
635 eval { $_ =~ y/a/b/ };
636 like $@, qr/^Modification of a read-only value attempted at /,
637 'tr/a/b/ fails on zero-length ro string';
638}
639
a53bfdae
HS
640# Whether they're permitted or not, non-modifying tr/// should not write
641# to read-only values, even with funky flags.
642{ # [perl #123759]
643 eval q{ ('a' =~ /./) =~ tr///d };
644 ok(1, "tr///d on PL_Yes does not assert");
645 eval q{ ('a' =~ /./) =~ tr/a-z/a-z/d };
646 ok(1, "tr/a-z/a-z/d on PL_Yes does not assert");
647 eval q{ ('a' =~ /./) =~ tr///s };
648 ok(1, "tr///s on PL_Yes does not assert");
649 eval q{ *x =~ tr///d };
650 ok(1, "tr///d on glob does not assert");
651}
652
3a34ca0b 653{ # [perl #128734
ce4eed6e 654 my $string = chr utf8::unicode_to_native(0x00e0);
3a34ca0b
KW
655 $string =~ tr/\N{U+00e0}/A/;
656 is($string, "A", 'tr// of \N{U+...} works for upper-Latin1');
ce4eed6e 657 my $string = chr utf8::unicode_to_native(0x00e1);
3a34ca0b
KW
658 $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/;
659 is($string, "A", 'tr// of \N{name} works for upper-Latin1');
660}
661
2108cbcf
DM
662# RT #130198
663# a tr/// that is cho(m)ped, possibly with an array as arg
664
665{
666 use warnings;
667
668 my ($s, @a);
669
670 my $warn;
671 local $SIG{__WARN__ } = sub { $warn .= "@_" };
672
673 for my $c (qw(chop chomp)) {
674 for my $bind ('', '$s =~ ', '@a =~ ') {
675 for my $arg2 (qw(a b)) {
676 for my $r ('', 'r') {
677 $warn = '';
678 # tr/a/b/ modifies its LHS, so if the LHS is an
679 # array, this should die. The special cases of tr/a/a/
680 # and tr/a/b/r don't modify their LHS, so instead
681 # we croak because cho(m)p is trying to modify it.
682 #
683 my $exp =
684 ($r eq '' && $arg2 eq 'b' && $bind =~ /\@a/)
685 ? qr/Can't modify private array in transliteration/
686 : qr{Can't modify transliteration \(tr///\) in $c};
687
688 my $expr = "$c(${bind}tr/a/$arg2/$r);";
689 eval $expr;
690 like $@, $exp, "RT #130198 eval: $expr";
691
692 $exp =
693 $bind =~ /\@a/
694 ? qr{^Applying transliteration \(tr///\) to \@a will act on scalar\(\@a\)}
695 : qr/^$/;
696 like $warn, $exp, "RT #130198 warn: $expr";
697 }
698 }
699 }
700 }
701
702
703}
704
fe2ba0a2
KW
705{ # [perl #130656] This bug happens when the tr is split across lines, so
706 # that the first line causes it to go into UTF-8, and the 2nd is only
707 # things like \x
708 my $x = "\x{E235}";
709 $x =~ tr
710 [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}]
711 [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}];
712
713 is $x, "\x{E5CE}", '[perl #130656]';
714}
2108cbcf 715
9100eeb1 7161;