This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add Rafael's 5.10 roadmap to perltodo.
[perl5.git] / t / op / tr.t
CommitLineData
c8e3bb4c
GS
1# tr.t
2
f05dd7cc
JH
3BEGIN {
4 chdir 't' if -d 't';
20822f61 5 @INC = '../lib';
953ab6e5 6 require './test.pl';
f05dd7cc 7}
a5095b95 8
0d65d7d5 9plan tests => 100;
953ab6e5
MS
10
11my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1);
c8e3bb4c
GS
12
13$_ = "abcdefghijklmnopqrstuvwxyz";
14
15tr/a-z/A-Z/;
16
953ab6e5 17is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc');
c8e3bb4c
GS
18
19tr/A-Z/a-z/;
20
953ab6e5 21is($_, "abcdefghijklmnopqrstuvwxyz", 'lc');
c8e3bb4c
GS
22
23tr/b-y/B-Y/;
953ab6e5 24is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
c8e3bb4c 25
c8e3bb4c
GS
26
27# In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91.
28# Yes, discontinuities. Regardless, the \xca in the below should stay
29# untouched (and not became \x8a).
5e037136
GS
30{
31 no utf8;
32 $_ = "I\xcaJ";
c8e3bb4c 33
5e037136 34 tr/I-J/i-j/;
c8e3bb4c 35
ff36f15d 36 is($_, "i\xcaj", 'EBCDIC discontinuity');
5e037136 37}
c8e3bb4c 38#
2de7b02f 39
953ab6e5 40
2de7b02f
GS
41($x = 12) =~ tr/1/3/;
42(my $y = 12) =~ tr/1/3/;
43($f = 1.5) =~ tr/1/3/;
44(my $g = 1.5) =~ tr/1/3/;
953ab6e5
MS
45is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK');
46
2de7b02f 47
953ab6e5 48# perlbug [ID 20000511.005]
2de7b02f
GS
49$_ = 'fred';
50/([a-z]{2})/;
51$1 =~ tr/A-Z//;
52s/^(\s*)f/$1F/;
953ab6e5
MS
53is($_, 'Fred', 'harmless if explicitly not updating');
54
55
56# A variant of the above, added in 5.7.2
57$_ = 'fred';
58/([a-z]{2})/;
59eval '$1 =~ tr/A-Z/A-Z/;';
60s/^(\s*)f/$1F/;
61is($_, 'Fred', 'harmless if implicitly not updating');
62is($@, '', ' no error');
63
2de7b02f
GS
64
65# check tr handles UTF8 correctly
66($x = 256.65.258) =~ tr/a/b/;
953ab6e5
MS
67is($x, 256.65.258, 'handles UTF8');
68is(length $x, 3);
69
2de7b02f 70$x =~ tr/A/B/;
953ab6e5 71is(length $x, 3);
67a17885 72if (ord("\t") == 9) { # ASCII
953ab6e5 73 is($x, 256.66.258);
67a17885
PP
74}
75else {
953ab6e5 76 is($x, 256.65.258);
67a17885 77}
953ab6e5 78
cbe7f703
PP
79# EBCDIC variants of the above tests
80($x = 256.193.258) =~ tr/a/b/;
953ab6e5
MS
81is(length $x, 3);
82is($x, 256.193.258);
83
cbe7f703 84$x =~ tr/A/B/;
953ab6e5 85is(length $x, 3);
cbe7f703 86if (ord("\t") == 9) { # ASCII
953ab6e5 87 is($x, 256.193.258);
cbe7f703
PP
88}
89else {
953ab6e5 90 is($x, 256.194.258);
cbe7f703 91}
953ab6e5 92
036b4402
GS
93
94{
953ab6e5
MS
95 my $l = chr(300); my $r = chr(400);
96 $x = 200.300.400;
97 $x =~ tr/\x{12c}/\x{190}/;
98 is($x, 200.400.400,
99 'changing UTF8 chars in a UTF8 string, same length');
100 is(length $x, 3);
101
102 $x = 200.300.400;
103 $x =~ tr/\x{12c}/\x{be8}/;
104 is($x, 200.3048.400, ' more bytes');
105 is(length $x, 3);
106
107 $x = 100.125.60;
108 $x =~ tr/\x{64}/\x{190}/;
109 is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string');
110 is(length $x, 3);
111
112 $x = 400.125.60;
113 $x =~ tr/\x{190}/\x{64}/;
114 is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string');
115 is(length $x, 3);
116
117 $x = 400.125.60.400;
118 $y = $x =~ tr/\x{190}/\x{190}/;
119 is($y, 2, 'Counting UTF8 chars in UTF8 string');
120
121 $x = 60.400.125.60.400;
122 $y = $x =~ tr/\x{3c}/\x{3c}/;
123 is($y, 2, ' non-UTF8 chars in UTF8 string');
124
125 # 17 - counting UTF8 chars in non-UTF8 string
126 $x = 200.125.60;
127 $y = $x =~ tr/\x{190}/\x{190}/;
128 is($y, 0, ' UTF8 chars in non-UTFs string');
036b4402 129}
c2e66d9e 130
c2e66d9e 131$_ = "abcdefghijklmnopqrstuvwxyz";
953ab6e5
MS
132eval 'tr/a-z-9/ /';
133like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//');
c2e66d9e 134
cbe7f703 135# 19-21: Make sure leading and trailing hyphens still work
c2e66d9e
GS
136$_ = "car-rot9";
137tr/-a-m/./;
953ab6e5 138is($_, '..r.rot9', 'hyphens, leading');
c2e66d9e
GS
139
140$_ = "car-rot9";
141tr/a-m-/./;
953ab6e5 142is($_, '..r.rot9', ' trailing');
c2e66d9e
GS
143
144$_ = "car-rot9";
145tr/-a-m-/./;
953ab6e5 146is($_, '..r.rot9', ' both');
c2e66d9e
GS
147
148$_ = "abcdefghijklmnop";
149tr/ae-hn/./;
953ab6e5 150is($_, '.bcd....ijklm.op');
c2e66d9e
GS
151
152$_ = "abcdefghijklmnop";
153tr/a-cf-kn-p/./;
953ab6e5 154is($_, '...de......lm...');
c2e66d9e
GS
155
156$_ = "abcdefghijklmnop";
157tr/a-ceg-ikm-o/./;
953ab6e5
MS
158is($_, '...d.f...j.l...p');
159
c2e66d9e 160
c2e66d9e
GS
161# 20000705 MJD
162eval "tr/m-d/ /";
321ecc04 163like($@, qr/^Invalid range "m-d" in transliteration operator/,
953ab6e5 164 'reversed range check');
c2e66d9e 165
d897a58d 166eval '$1 =~ tr/x/y/';
953ab6e5
MS
167like($@, qr/^Modification of a read-only value attempted/,
168 'cannot update read-only var');
d897a58d 169
d897a58d 170'abcdef' =~ /(bcd)/;
953ab6e5
MS
171is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count');
172is($@, '', ' no error');
d897a58d 173
953ab6e5
MS
174'abcdef' =~ /(bcd)/;
175is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count');
176is($@, '', ' no error');
177
178is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr');
d897a58d 179
94bfe852 180eval '"123" =~ tr/1/2/';
953ab6e5
MS
181like($@, qr|^Can't modify constant item in transliteration \(tr///\)|,
182 'LHS bad on updating tr');
183
d897a58d 184
381d18bc
JH
185# v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac)
186# v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90)
187
188# Transliterate a byte to a byte, all four ways.
189
190($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/;
953ab6e5 191is($a, v300.197.172.300.197.172, 'byte2byte transliteration');
381d18bc
JH
192
193($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/;
953ab6e5 194is($a, v300.197.172.300.197.172);
381d18bc
JH
195
196($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/;
953ab6e5 197is($a, v300.197.172.300.197.172);
381d18bc
JH
198
199($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/;
953ab6e5 200is($a, v300.197.172.300.197.172);
381d18bc 201
381d18bc
JH
202
203($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/;
953ab6e5 204is($a, v300.301.172.300.301.172, 'byte2wide transliteration');
381d18bc
JH
205
206($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/;
953ab6e5 207is($a, v195.196.172.195.196.172, ' wide2byte');
381d18bc
JH
208
209($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/;
953ab6e5 210is($a, v301.196.172.301.196.172, ' wide2wide');
381d18bc 211
381d18bc
JH
212
213($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/;
953ab6e5 214is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte');
381d18bc 215
381d18bc
JH
216
217($a = v300.196.172.300.196.172.400.198.144) =~
218 tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/;
953ab6e5 219is($a, v197.301.173.197.301.173.401.198.144, 'all together now!');
381d18bc 220
381d18bc 221
953ab6e5
MS
222is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2,
223 'transliterate and count');
381d18bc 224
953ab6e5 225is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2);
381d18bc 226
381d18bc
JH
227
228($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c;
953ab6e5 229is($a, v301.196.301.301.196.301, 'translit w/complement');
381d18bc
JH
230
231($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c;
953ab6e5 232is($a, v300.197.197.300.197.197);
381d18bc 233
381d18bc
JH
234
235($a = v300.196.172.300.196.172) =~ tr/\xc4//d;
953ab6e5 236is($a, v300.172.300.172, 'translit w/deletion');
381d18bc
JH
237
238($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d;
953ab6e5 239is($a, v196.172.196.172);
381d18bc 240
381d18bc
JH
241
242($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s;
953ab6e5 243is($a, v197.172.300.300.197.172, 'translit w/squeeze');
381d18bc
JH
244
245($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s;
953ab6e5 246is($a, v196.172.301.196.172.172);
381d18bc 247
a1874b66 248
953ab6e5 249# Tricky cases (When Simon Cozens Attacks)
a1874b66 250($a = v196.172.200) =~ tr/\x{12c}/a/;
953ab6e5 251is(sprintf("%vd", $a), '196.172.200');
a1874b66
JH
252
253($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/;
953ab6e5 254is(sprintf("%vd", $a), '196.172.200');
a1874b66
JH
255
256($a = v196.172.200) =~ tr/\x{12c}//d;
953ab6e5
MS
257is(sprintf("%vd", $a), '196.172.200');
258
a1874b66 259
8973db79 260# UTF8 range tests from Inaba Hiroto
f9a63242 261
a26bfc40 262# Not working in EBCDIC as of 12674.
f9a63242 263($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
953ab6e5 264is($a, v192.196.172.194.197.172, 'UTF range');
f9a63242
JH
265
266($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
953ab6e5
MS
267is($a, v300.300.172.302.301.172);
268
8973db79
JH
269
270# UTF8 range tests from Karsten Sperling (patch #9008 required)
271
272($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
953ab6e5 273is($a, "X");
8973db79
JH
274
275($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
953ab6e5 276is($a, "X");
8973db79
JH
277
278($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
953ab6e5 279is($a, "X");
8973db79
JH
280
281($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
953ab6e5
MS
282is($a, "X");
283
8973db79 284
94472101
JH
285# UTF8 range tests from Inaba Hiroto
286
287($a = "\x{200}") =~ tr/\x00-\x{100}/X/c;
953ab6e5 288is($a, "X");
94472101
JH
289
290($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs;
953ab6e5
MS
291is($a, "X");
292
94472101 293
6b6bd37b
JH
294# Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters,
295# (i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them,
296# from Karsten Sperling.
297
298$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
953ab6e5
MS
299is($c, 8);
300is($a, "XXXXXXXX");
4c3a8340 301
6b6bd37b 302$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
953ab6e5
MS
303is($c, 8);
304is($a, "XXXXXXXX");
6b6bd37b 305
4c3a8340 306SKIP: {
953ab6e5
MS
307 skip "not EBCDIC", 4 unless $Is_EBCDIC;
308
309 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
310 is($c, 2);
311 is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X");
312
313 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
314 is($c, 2);
315 is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X");
6b6bd37b 316}
1ed601ec
JH
317
318($a = "\x{100}") =~ tr/\x00-\xff/X/c;
953ab6e5 319is(ord($a), ord("X"));
1ed601ec
JH
320
321($a = "\x{100}") =~ tr/\x00-\xff/X/cs;
953ab6e5 322is(ord($a), ord("X"));
1ed601ec
JH
323
324($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c;
953ab6e5 325is($a, "\x{100}\x{100}");
1ed601ec
JH
326
327($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs;
953ab6e5 328is($a, "\x{100}");
1ed601ec 329
629b4584 330$a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/;
953ab6e5
MS
331is($a, "\x{1ff}\x{1fe}");
332
76ef7183
JH
333
334# From David Dyck
335($a = "R0_001") =~ tr/R_//d;
953ab6e5 336is(hex($a), 1);
76ef7183 337
800b4dc4
JH
338# From Inaba Hiroto
339@a = (1,2); map { y/1/./ for $_ } @a;
953ab6e5 340is("@a", ". 2");
800b4dc4
JH
341
342@a = (1,2); map { y/1/./ for $_.'' } @a;
953ab6e5
MS
343is("@a", "1 2");
344
800b4dc4 345
bec89253
RH
346# Additional test for Inaba Hiroto patch (robin@kitsite.com)
347($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c;
953ab6e5
MS
348is($a, "XZY");
349
bec89253 350
2233f375
NC
351# Used to fail with "Modification of a read-only value attempted"
352%a = (N=>1);
353foreach (keys %a) {
953ab6e5
MS
354 eval 'tr/N/n/';
355 is($_, 'n', 'pp_trans needs to unshare shared hash keys');
356 is($@, '', ' no error');
2233f375 357}
94bfe852 358
953ab6e5 359
94bfe852 360$x = eval '"1213" =~ tr/1/1/';
953ab6e5
MS
361is($x, 2, 'implicit count on constant');
362is($@, '', ' no error');
363
364
365my @foo = ();
366eval '$foo[-1] =~ tr/N/N/';
367is( $@, '', 'implicit count outside array bounds, index negative' );
368is( scalar @foo, 0, " doesn't extend the array");
369
370eval '$foo[1] =~ tr/N/N/';
371is( $@, '', 'implicit count outside array bounds, index positive' );
372is( scalar @foo, 0, " doesn't extend the array");
373
374
375my %foo = ();
376eval '$foo{bar} =~ tr/N/N/';
377is( $@, '', 'implicit count outside hash bounds' );
378is( scalar keys %foo, 0, " doesn't extend the hash");
d59e14db
RGS
379
380$x = \"foo";
381is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' );
382is( ref $x, 'SCALAR', " doesn't stringify its argument" );
0d65d7d5
MS
383
384# rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing
385# newline allowed.
386fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '');