10 if ($@) { skip_all("miniperl, no 'utf8'") }
18 # Test this first before we extend the stack with other operations.
19 # This caused an asan failure due to a bad write past the end of the stack.
20 eval { my $x; die 1..127, $x =~ y/// };
22 $_ = "abcdefghijklmnopqrstuvwxyz";
26 is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc');
30 is($_, "abcdefghijklmnopqrstuvwxyz", 'lc');
33 is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
36 is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a');
38 eval 'tr/a/\N{KATAKANA LETTER AINU P}/;';
40 qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/,
41 "Illegal to tr/// named sequence";
43 eval 'tr/\x{101}-\x{100}//;';
45 qr/Invalid range "\\x\{0101\}-\\x\{0100\}" in transliteration operator/,
46 "UTF-8 range with min > max";
49 # Test /c and variants, with all the search and replace chars being
50 # non-utf8, but with both non-utf8 and utf8 strings.
53 my $all255 = join '', map chr, 0..0xff;
54 my $all255_twice = join '', map chr, map { ($_, $_) } 0..0xff;
55 my $all255_plus = join '', map chr, 0..0x11f;
56 my $all255_twice_plus = join '', map chr, map { ($_, $_) } 0..0x11f;
60 # length(replacement) == 0
64 $c = $s =~ tr/\x40-\xbf//c;
65 is $s, $all255, "/c ==0";
66 is $c, 0x80, "/c ==0 count";
69 $c = $s =~ tr/\x40-\xbf//cd;
70 is $s, join('', map chr, 0x40..0xbf), "/cd ==0";
71 is $c, 0x80, "/cd ==0 count";
74 $c = $s =~ tr/\x40-\xbf//cs;
75 is $s, join('', map chr,
77 (map { ($_, $_) } 0x40..0xbf),
81 is $c, 0x100, "/cs ==0 count";
84 $c = $s =~ tr/\x40-\xbf//csd;
85 is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0";
86 is $c, 0x100, "/csd ==0 count";
89 # length(search) > length(replacement)
93 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c;
94 is $s, join('', map chr,
101 is $c, 0x80, "/c > count";
104 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd;
105 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f),
107 is $c, 0x80, "/cd > count";
110 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs;
111 is $s, join('', map chr,
113 (map { ($_, $_) } 0x40..0xbf),
117 is $c, 0x100, "/cs > count";
120 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd;
121 is $s, join('', map chr,
123 (map { ($_, $_) } 0x40..0xbf),
127 is $c, 0x100, "/csd > count";
130 # length(search) == length(replacement)
134 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c;
135 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/c ==";
136 is $c, 0x80, "/c == count";
139 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd;
140 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd ==";
141 is $c, 0x80, "/cd == count";
144 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs;
145 is $s, join('', map chr,
147 (map { ($_, $_) } 0x40..0xbf),
151 is $c, 0x100, "/cs == count";
154 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd;
155 is $s, join('', map chr,
157 (map { ($_, $_) } 0x40..0xbf),
161 is $c, 0x100, "/csd == count";
163 # length(search) == length(replacement) - 1
168 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/c;
169 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
171 is $c, 0x70, "/c =- count";
174 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cd;
175 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
177 is $c, 0x70, "/cd =- count";
180 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cs;
181 is $s, join('', map chr,
183 (map { ($_, $_) } 0x40..0xbf),
185 (map { ($_, $_) } 0xf0..0xff),
188 is $c, 0xe0, "/cs =- count";
191 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/csd;
192 is $s, join('', map chr,
194 (map { ($_, $_) } 0x40..0xbf),
196 (map { ($_, $_) } 0xf0..0xff),
199 is $c, 0xe0, "/csd =- count";
201 # length(search) < length(replacement)
205 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c;
206 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
208 is $c, 0x70, "/c < count";
211 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd;
212 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
214 is $c, 0x70, "/cd < count";
217 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs;
218 is $s, join('', map chr,
220 (map { ($_, $_) } 0x40..0xbf),
222 (map { ($_, $_) } 0xf0..0xff),
225 is $c, 0xe0, "/cs < count";
228 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd;
229 is $s, join('', map chr,
231 (map { ($_, $_) } 0x40..0xbf),
233 (map { ($_, $_) } 0xf0..0xff),
236 is $c, 0xe0, "/csd < count";
239 # length(replacement) == 0
240 # with some >= 0x100 utf8 chars in the string to be modified
243 $c = $s =~ tr/\x40-\xbf//c;
244 is $s, $all255_plus, "/c ==0U";
245 is $c, 0xa0, "/c ==0U count";
248 $c = $s =~ tr/\x40-\xbf//cd;
249 is $s, join('', map chr, 0x40..0xbf), "/cd ==0U";
250 is $c, 0xa0, "/cd ==0U count";
252 $s = $all255_twice_plus;
253 $c = $s =~ tr/\x40-\xbf//cs;
254 is $s, join('', map chr,
256 (map { ($_, $_) } 0x40..0xbf),
260 is $c, 0x140, "/cs ==0U count";
262 $s = $all255_twice_plus;
263 $c = $s =~ tr/\x40-\xbf//csd;
264 is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0U";
265 is $c, 0x140, "/csd ==0U count";
267 # length(search) > length(replacement)
268 # with some >= 0x100 utf8 chars in the string to be modified
271 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c;
272 is $s, join('', map chr,
279 is $c, 0xa0, "/c >U count";
282 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd;
283 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f),
285 is $c, 0xa0, "/cd >U count";
287 $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
288 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs;
289 is $s, join('', map chr,
291 (map { ($_, $_) } 0x40..0xbf),
297 is $c, 0x144, "/cs >U count";
299 $s = $all255_twice_plus;
300 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd;
301 is $s, join('', map chr,
303 (map { ($_, $_) } 0x40..0xbf),
307 is $c, 0x140, "/csd >U count";
309 # length(search) == length(replacement)
310 # with some >= 0x100 utf8 chars in the string to be modified
313 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c;
314 is $s, join('', map chr,
321 is $c, 0xa0, "/c ==U count";
324 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd;
325 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd ==U";
326 is $c, 0xa0, "/cd ==U count";
328 $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
329 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs;
330 is $s, join('', map chr,
332 (map { ($_, $_) } 0x40..0xbf),
338 is $c, 0x144, "/cs ==U count";
340 $s = $all255_twice_plus;
341 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd;
342 is $s, join('', map chr,
344 (map { ($_, $_) } 0x40..0xbf),
348 is $c, 0x140, "/csd ==U count";
351 # length(search) == length(replacement) - 1
352 # with some >= 0x100 utf8 chars in the string to be modified
355 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/c;
356 is $s, join('', map chr,
363 is $c, 0xa0, "/c =-U count";
366 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cd;
367 is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x40), "/cd =-U";
368 is $c, 0xa0, "/cd =-U count";
370 $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
371 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cs;
372 is $s, join('', map chr,
374 (map { ($_, $_) } 0x40..0xbf),
380 is $c, 0x144, "/cs =-U count";
382 $s = $all255_twice_plus;
383 $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/csd;
384 is $s, join('', map chr,
386 (map { ($_, $_) } 0x40..0xbf),
390 is $c, 0x140, "/csd =-U count";
394 # length(search) < length(replacement),
395 # with some >= 0x100 utf8 chars in the string to be modified
398 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c;
399 is $s, join('', map chr,
408 is $c, 0x90, "/c <U count";
411 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd;
412 is $s, join('', map chr,
420 is $c, 0x90, "/cd <U count";
422 $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
423 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs;
424 is $s, join('', map chr,
426 (map { ($_, $_) } 0x40..0xbf),
428 (map { ($_, $_) } 0xf0..0xff),
434 is $c, 0x124, "/cs <U count";
436 $s = $all255_twice_plus;
437 $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd;
438 is $s, join('', map chr, 0x80..0xbf,
439 (map { ($_, $_) } 0x40..0xbf),
441 (map { ($_, $_) } 0xf0..0xff),
445 is $c, 0x120, "/csd <U count";
450 # the 'extra length' for tr///c was stored as a short, so if the
451 # replacement string had more than 0x7fff chars not paired with
452 # search chars, bad things could happen
456 $s = "\x{9000}\x{9001}\x{9002}";
457 $e = "\$c = \$s =~ tr/\\x00-\\xff/"
458 . ("ABCDEFGHIJKLMNO" x (0xa000 / 15))
461 is $s, "IJK", "RT #132608 len=0xa000";
462 is $c, 3, "RT #132608 len=0xa000 count";
464 $s = "\x{9003}\x{9004}\x{9005}";
465 $e = "\$c = \$s =~ tr/\\x00-\\xff/"
466 . ("ABCDEFGHIJKLMNO" x (0x12000 / 15))
469 is $s, "LMN", "RT #132608 len=0x12000";
470 is $c, 3, "RT #132608 len=0x12000 count";
474 SKIP: { # Test literal range end point special handling
475 unless ($::IS_EBCDIC) {
476 skip "Valid only for EBCDIC", 24;
479 $_ = "\x89"; # is 'i'
481 is($_, "", '"\x89" should match [i-j]');
484 is($_, "\x8A", '"\x8A" shouldnt match [i-j]');
487 is($_, "\x90", '"\x90" shouldnt match [i-j]');
488 $_ = "\x91"; # is 'j'
490 is($_, "", '"\x91" should match [i-j]');
493 tr/i-\N{LATIN SMALL LETTER J}//d;
494 is($_, "", '"\x89" should match [i-\N{LATIN SMALL LETTER J}]');
496 tr/i-\N{LATIN SMALL LETTER J}//d;
497 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
499 tr/i-\N{LATIN SMALL LETTER J}//d;
500 is($_, "\x90", '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
502 tr/i-\N{LATIN SMALL LETTER J}//d;
503 is($_, "", '"\x91" should match [i-\N{LATIN SMALL LETTER J}]');
507 is($_, "", '"\x89" should match [i-\N{U+6A}]');
510 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{U+6A}]');
513 is($_, "\x90", '"\x90" shouldnt match [i-\N{U+6A}]');
516 is($_, "", '"\x91" should match [i-\N{U+6A}]');
519 tr/\N{U+69}-\N{U+6A}//d;
520 is($_, "", '"\x89" should match [\N{U+69}-\N{U+6A}]');
522 tr/\N{U+69}-\N{U+6A}//d;
523 is($_, "\x8A", '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]');
525 tr/\N{U+69}-\N{U+6A}//d;
526 is($_, "\x90", '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]');
528 tr/\N{U+69}-\N{U+6A}//d;
529 is($_, "", '"\x91" should match [\N{U+69}-\N{U+6A}]');
533 is($_, "", '"\x89" should match [i-\x{91}]');
536 is($_, "", '"\x8A" should match [i-\x{91}]');
539 is($_, "", '"\x90" should match [i-\x{91}]');
542 is($_, "", '"\x91" should match [i-\x{91}]');
544 # Need to use eval, because tries to compile on ASCII platforms even
545 # though the tests are skipped, and fails because 0x89-j is an illegal
548 eval 'tr/\x{89}-j//d';
549 is($_, "", '"\x89" should match [\x{89}-j]');
551 eval 'tr/\x{89}-j//d';
552 is($_, "", '"\x8A" should match [\x{89}-j]');
554 eval 'tr/\x{89}-j//d';
555 is($_, "", '"\x90" should match [\x{89}-j]');
557 eval 'tr/\x{89}-j//d';
558 is($_, "", '"\x91" should match [\x{89}-j]');
562 # In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91.
563 # Yes, discontinuities. Regardless, the \xca in the below should stay
564 # untouched (and not became \x8a).
570 is($_, "i\xcaj", 'EBCDIC discontinuity');
574 ($x = 12) =~ tr/1/3/;
575 (my $y = 12) =~ tr/1/3/;
576 ($f = 1.5) =~ tr/1/3/;
577 (my $g = 1.5) =~ tr/1/3/;
578 is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK');
582 is y/dam/ve/rd, 'eve', '/r';
583 is $_, 'adam', '/r leaves param alone';
585 is $g =~ y/bury/repl/r, 'perl', '/r with explicit param';
586 is $g, 'ruby', '/r leaves explicit param alone';
587 is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param';
588 ok !eval '$_ !~ y///r', "!~ y///r is forbidden";
589 like $@, qr\^Using !~ with tr///r doesn't make sense\,
590 "!~ y///r error message";
594 local $SIG{__WARN__} = sub { $w = shift; ++$wc };
597 like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)',
598 '/r warns in void context';
599 is $wc, 1, '/r warns just once';
602 # perlbug [ID 20000511.005 (#3237)]
607 is($_, 'Fred', 'harmless if explicitly not updating');
610 # A variant of the above, added in 5.7.2
613 eval '$1 =~ tr/A-Z/A-Z/;';
615 is($_, 'Fred', 'harmless if implicitly not updating');
616 is($@, '', ' no error');
619 # check tr handles UTF8 correctly
620 ($x = 256.65.258) =~ tr/a/b/;
621 is($x, 256.65.258, 'handles UTF8');
626 if ($::IS_ASCII) { # ASCII
633 # EBCDIC variants of the above tests
634 ($x = 256.193.258) =~ tr/a/b/;
640 if ($::IS_ASCII) { # ASCII
649 my $l = chr(300); my $r = chr(400);
651 $x =~ tr/\x{12c}/\x{190}/;
653 'changing UTF8 chars in a UTF8 string, same length');
657 $x =~ tr/\x{12c}/\x{be8}/;
658 is($x, 200.3048.400, ' more bytes');
662 $x =~ tr/\x{64}/\x{190}/;
663 is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string');
667 $x =~ tr/\x{190}/\x{64}/;
668 is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string');
672 $y = $x =~ tr/\x{190}/\x{190}/;
673 is($y, 2, 'Counting UTF8 chars in UTF8 string');
675 $x = 60.400.125.60.400;
676 $y = $x =~ tr/\x{3c}/\x{3c}/;
677 is($y, 2, ' non-UTF8 chars in UTF8 string');
679 # 17 - counting UTF8 chars in non-UTF8 string
681 $y = $x =~ tr/\x{190}/\x{190}/;
682 is($y, 0, ' UTF8 chars in non-UTFs string');
685 $_ = "abcdefghijklmnopqrstuvwxyz";
687 like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//');
689 # 19-21: Make sure leading and trailing hyphens still work
692 is($_, '..r.rot9', 'hyphens, leading');
696 is($_, '..r.rot9', ' trailing');
700 is($_, '..r.rot9', ' both');
702 $_ = "abcdefghijklmnop";
704 is($_, '.bcd....ijklm.op');
706 $_ = "abcdefghijklmnop";
708 is($_, '...de......lm...');
710 $_ = "abcdefghijklmnop";
712 is($_, '...d.f...j.l...p');
717 like($@, qr/^Invalid range "m-d" in transliteration operator/,
718 'reversed range check');
721 is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count');
722 is($@, '', ' no error');
725 is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count');
726 is($@, '', ' no error');
728 is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr');
730 eval '"123" =~ tr/1/2/';
731 like($@, qr|^Can't modify constant item in transliteration \(tr///\)|,
732 'LHS bad on updating tr');
735 # v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac)
736 # v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90)
738 # Transliterate a byte to a byte, all four ways.
740 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/;
741 is($a, v300.197.172.300.197.172, 'byte2byte transliteration');
743 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/;
744 is($a, v300.197.172.300.197.172);
746 ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/;
747 is($a, v300.197.172.300.197.172);
749 ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/;
750 is($a, v300.197.172.300.197.172);
753 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/;
754 is($a, v300.301.172.300.301.172, 'byte2wide transliteration');
756 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/;
757 is($a, v195.196.172.195.196.172, ' wide2byte');
759 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/;
760 is($a, v301.196.172.301.196.172, ' wide2wide');
763 ($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/;
764 is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte');
767 ($a = v300.196.172.300.196.172.400.198.144) =~
768 tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/;
769 is($a, v197.301.173.197.301.173.401.198.144, 'all together now!');
772 is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2,
773 'transliterate and count');
775 is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2);
778 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c;
779 is($a, v301.196.301.301.196.301, 'translit w/complement');
781 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c;
782 is($a, v300.197.197.300.197.197);
785 ($a = v300.196.172.300.196.172) =~ tr/\xc4//d;
786 is($a, v300.172.300.172, 'translit w/deletion');
788 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d;
789 is($a, v196.172.196.172);
792 ($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s;
793 is($a, v197.172.300.300.197.172, 'translit w/squeeze');
795 ($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s;
796 is($a, v196.172.301.196.172.172);
799 # Tricky cases (When Simon Cozens Attacks)
800 ($a = v196.172.200) =~ tr/\x{12c}/a/;
801 is(sprintf("%vd", $a), '196.172.200');
803 ($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/;
804 is(sprintf("%vd", $a), '196.172.200');
806 ($a = v196.172.200) =~ tr/\x{12c}//d;
807 is(sprintf("%vd", $a), '196.172.200');
810 # UTF8 range tests from Inaba Hiroto
812 ($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
813 is($a, v192.196.172.194.197.172, 'UTF range');
815 ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
816 is($a, v300.300.172.302.301.172);
819 # UTF8 range tests from Karsten Sperling (patch #9008 required)
821 ($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
824 ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
827 ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
830 ($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
834 # UTF8 range tests from Inaba Hiroto
836 ($a = "\x{200}") =~ tr/\x00-\x{100}/X/c;
839 ($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs;
842 # Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters (as
843 # well as i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them,
844 # from Karsten Sperling.
846 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
850 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
855 skip "EBCDIC-centric tests", 4 unless $::IS_EBCDIC;
857 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
859 is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X");
861 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
863 is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X");
866 ($a = "\x{100}") =~ tr/\x00-\xff/X/c;
867 is(ord($a), ord("X"));
869 ($a = "\x{100}") =~ tr/\x00-\xff/X/cs;
870 is(ord($a), ord("X"));
872 ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c;
873 is($a, "\x{100}\x{100}");
875 ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs;
878 $a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/;
879 is($a, "\x{1ff}\x{1fe}");
883 ($a = "R0_001") =~ tr/R_//d;
887 @a = (1,2); map { y/1/./ for $_ } @a;
890 @a = (1,2); map { y/1/./ for $_.'' } @a;
894 # Additional test for Inaba Hiroto patch (robin@kitsite.com)
895 ($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c;
899 # Used to fail with "Modification of a read-only value attempted"
903 is($_, 'n', 'pp_trans needs to unshare shared hash keys');
904 is($@, '', ' no error');
908 $x = eval '"1213" =~ tr/1/1/';
909 is($x, 2, 'implicit count on constant');
910 is($@, '', ' no error');
914 eval '$foo[-1] =~ tr/N/N/';
915 is( $@, '', 'implicit count outside array bounds, index negative' );
916 is( scalar @foo, 0, " doesn't extend the array");
918 eval '$foo[1] =~ tr/N/N/';
919 is( $@, '', 'implicit count outside array bounds, index positive' );
920 is( scalar @foo, 0, " doesn't extend the array");
924 eval '$foo{bar} =~ tr/N/N/';
925 is( $@, '', 'implicit count outside hash bounds' );
926 is( scalar keys %foo, 0, " doesn't extend the hash");
929 is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' );
930 is( ref $x, 'SCALAR', " doesn't stringify its argument" );
932 # rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing
934 fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file');
937 { # [perl #38293] chr(65535) should be allowed in regexes
938 no warnings 'utf8'; # to allow non-characters
940 $s = "\x{d800}\x{ffff}";
942 is($s, "\x{d800}\x{ffff}", "do_trans_simple");
944 $s = "\x{d800}\x{ffff}";
946 is($i, 0, "do_trans_count");
948 $s = "\x{d800}\x{ffff}";
950 is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH");
952 $s = "\x{d800}\x{ffff}";
954 is($s, "AA", "do_trans_complex, COMPLEMENT");
957 $s =~ tr/\x{ffff}/\x{1ffff}/;
958 is($s, "A\x{1ffff}B", "utf8, SEARCHLIST");
960 $s = "\x{fffd}\x{fffe}\x{ffff}";
961 $s =~ tr/\x{fffd}-\x{ffff}/ABC/;
962 is($s, "ABC", "utf8, SEARCHLIST range");
965 $s =~ tr/ABC/\x{ffff}/;
966 is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST");
969 $s =~ tr/ABC/\x{fffd}-\x{ffff}/;
970 is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range");
972 $s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}";
973 $i = $s =~ tr/\x{ffff}//;
974 is($i, 2, "utf8, count");
976 $s = "A\x{ffff}\x{ffff}C";
977 $s =~ tr/\x{ffff}/\x{100}/s;
978 is($s, "A\x{100}C", "utf8, SQUASH");
980 $s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C";
981 $s =~ tr/\x{fffe}\x{ffff}//s;
982 is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH");
985 $s =~ tr/AB/\x{ffff}/s;
986 is($s, "x\x{ffff}y", "utf8, SQUASH");
989 $s =~ tr/AB/\x{fffe}\x{ffff}/s;
990 is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH");
992 $s = "A\x{ffff}B\x{fffe}C";
993 $s =~ tr/\x{fffe}\x{ffff}/x/c;
994 is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT");
996 $s = "A\x{10000}B\x{2abcd}C";
997 $s =~ tr/\0-\x{ffff}/x/c;
998 is($s, "AxBxC", "utf8, COMPLEMENT range");
1000 $s = "A\x{fffe}B\x{ffff}C";
1001 $s =~ tr/\x{fffe}\x{ffff}/x/d;
1002 is($s, "AxBC", "utf8, DELETE");
1004 } # non-characters end
1006 { # related to [perl #27940]
1009 ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d;
1010 is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d");
1012 ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d;
1013 is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d");
1016 ($s) = keys %{{pie => 3}};
1018 if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 }
1019 my $wasro = XS::APItest::SvIsCOW($s);
1020 ok $wasro, "have a COW";
1022 ok( XS::APItest::SvIsCOW($s),
1023 "count-only tr doesn't deCOW COWs" );
1028 # under threads, unicode tr within a cloned closure would SEGV or assert
1029 # fail, since the pointer in the pad to the swash was getting zeroed out
1035 $x =~ tr[\x{142}][\x{143}];
1037 is($x,"\x{143}", "utf8 + closure");
1040 # Freeing of trans ops prior to pmtrans() [perl #102858].
1041 eval q{ $a ~= tr/a/b/; };
1044 no warnings "deprecated";
1045 skip "no encoding", 1 unless eval { require encoding; 1 };
1046 eval q{ use encoding "utf8"; $a ~= tr/a/b/; };
1054 { no warnings 'utf8'; print "# $x\n"; } # No note() to avoid wide warning.
1055 is($x, "Perlβ", "Only first of multiple transliterations is used");
1058 # tr/a/b/ should fail even on zero-length read-only strings
1059 use constant nullrocow => (keys%{{""=>undef}})[0];
1060 for ("", nullrocow) {
1061 eval { $_ =~ y/a/b/ };
1062 like $@, qr/^Modification of a read-only value attempted at /,
1063 'tr/a/b/ fails on zero-length ro string';
1066 # Whether they're permitted or not, non-modifying tr/// should not write
1067 # to read-only values, even with funky flags.
1069 eval q{ ('a' =~ /./) =~ tr///d };
1070 ok(1, "tr///d on PL_Yes does not assert");
1071 eval q{ ('a' =~ /./) =~ tr/a-z/a-z/d };
1072 ok(1, "tr/a-z/a-z/d on PL_Yes does not assert");
1073 eval q{ ('a' =~ /./) =~ tr///s };
1074 ok(1, "tr///s on PL_Yes does not assert");
1075 eval q{ *x =~ tr///d };
1076 ok(1, "tr///d on glob does not assert");
1080 my $string = chr utf8::unicode_to_native(0x00e0);
1081 $string =~ tr/\N{U+00e0}/A/;
1082 is($string, "A", 'tr// of \N{U+...} works for upper-Latin1');
1083 my $string = chr utf8::unicode_to_native(0x00e1);
1084 $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/;
1085 is($string, "A", 'tr// of \N{name} works for upper-Latin1');
1089 # a tr/// that is cho(m)ped, possibly with an array as arg
1097 local $SIG{__WARN__ } = sub { $warn .= "@_" };
1099 for my $c (qw(chop chomp)) {
1100 for my $bind ('', '$s =~ ', '@a =~ ') {
1101 for my $arg2 (qw(a b)) {
1102 for my $r ('', 'r') {
1104 # tr/a/b/ modifies its LHS, so if the LHS is an
1105 # array, this should die. The special cases of tr/a/a/
1106 # and tr/a/b/r don't modify their LHS, so instead
1107 # we croak because cho(m)p is trying to modify it.
1110 ($r eq '' && $arg2 eq 'b' && $bind =~ /\@a/)
1111 ? qr/Can't modify private array in transliteration/
1112 : qr{Can't modify transliteration \(tr///\) in $c};
1114 my $expr = "$c(${bind}tr/a/$arg2/$r);";
1116 like $@, $exp, "RT #130198 eval: $expr";
1120 ? qr{^Applying transliteration \(tr///\) to \@a will act on scalar\(\@a\)}
1122 like $warn, $exp, "RT #130198 warn: $expr";
1131 { # [perl #130656] This bug happens when the tr is split across lines, so
1132 # that the first line causes it to go into UTF-8, and the 2nd is only
1136 [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}]
1137 [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}];
1139 is $x, "\x{E5CE}", '[perl #130656]';