10 if ($@) { skip_all("miniperl, no 'utf8'") }
18 # Test this first before we extend the stack with other operations.
19 # This caused an asan failure due to a bad write past the end of the stack.
20 eval { my $x; die 1..127, $x =~ y/// };
22 $_ = "abcdefghijklmnopqrstuvwxyz";
26 is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 'uc');
30 is($_, "abcdefghijklmnopqrstuvwxyz", 'lc');
33 is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
35 eval 'tr/a/\N{KATAKANA LETTER AINU P}/;';
37 qr/\\N\{KATAKANA LETTER AINU P} must not be a named sequence in transliteration operator/,
38 "Illegal to tr/// named sequence";
40 eval 'tr/\x{101}-\x{100}//;';
42 qr/Invalid range "\\x\{0101}-\\x\{0100}" in transliteration operator/,
43 "UTF-8 range with min > max";
45 SKIP: { # Test literal range end point special handling
46 unless ($::IS_EBCDIC) {
47 skip "Valid only for EBCDIC", 24;
52 is($_, "", '"\x89" should match [i-j]');
55 is($_, "\x8A", '"\x8A" shouldnt match [i-j]');
58 is($_, "\x90", '"\x90" shouldnt match [i-j]');
61 is($_, "", '"\x91" should match [i-j]');
64 tr/i-\N{LATIN SMALL LETTER J}//d;
65 is($_, "", '"\x89" should match [i-\N{LATIN SMALL LETTER J}]');
67 tr/i-\N{LATIN SMALL LETTER J}//d;
68 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
70 tr/i-\N{LATIN SMALL LETTER J}//d;
71 is($_, "\x90", '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]');
73 tr/i-\N{LATIN SMALL LETTER J}//d;
74 is($_, "", '"\x91" should match [i-\N{LATIN SMALL LETTER J}]');
78 is($_, "", '"\x89" should match [i-\N{U+6A}]');
81 is($_, "\x8A", '"\x8A" shouldnt match [i-\N{U+6A}]');
84 is($_, "\x90", '"\x90" shouldnt match [i-\N{U+6A}]');
87 is($_, "", '"\x91" should match [i-\N{U+6A}]');
90 tr/\N{U+69}-\N{U+6A}//d;
91 is($_, "", '"\x89" should match [\N{U+69}-\N{U+6A}]');
93 tr/\N{U+69}-\N{U+6A}//d;
94 is($_, "\x8A", '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]');
96 tr/\N{U+69}-\N{U+6A}//d;
97 is($_, "\x90", '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]');
99 tr/\N{U+69}-\N{U+6A}//d;
100 is($_, "", '"\x91" should match [\N{U+69}-\N{U+6A}]');
104 is($_, "", '"\x89" should match [i-\x{91}]');
107 is($_, "", '"\x8A" should match [i-\x{91}]');
110 is($_, "", '"\x90" should match [i-\x{91}]');
113 is($_, "", '"\x91" should match [i-\x{91}]');
115 # Need to use eval, because tries to compile on ASCII platforms even
116 # though the tests are skipped, and fails because 0x89-j is an illegal
119 eval 'tr/\x{89}-j//d';
120 is($_, "", '"\x89" should match [\x{89}-j]');
122 eval 'tr/\x{89}-j//d';
123 is($_, "", '"\x8A" should match [\x{89}-j]');
125 eval 'tr/\x{89}-j//d';
126 is($_, "", '"\x90" should match [\x{89}-j]');
128 eval 'tr/\x{89}-j//d';
129 is($_, "", '"\x91" should match [\x{89}-j]');
133 # In EBCDIC 'I' is \xc9 and 'J' is \0xd1, 'i' is \x89 and 'j' is \x91.
134 # Yes, discontinuities. Regardless, the \xca in the below should stay
135 # untouched (and not became \x8a).
141 is($_, "i\xcaj", 'EBCDIC discontinuity');
145 ($x = 12) =~ tr/1/3/;
146 (my $y = 12) =~ tr/1/3/;
147 ($f = 1.5) =~ tr/1/3/;
148 (my $g = 1.5) =~ tr/1/3/;
149 is($x + $y + $f + $g, 71, 'tr cancels IOK and NOK');
153 is y/dam/ve/rd, 'eve', '/r';
154 is $_, 'adam', '/r leaves param alone';
156 is $g =~ y/bury/repl/r, 'perl', '/r with explicit param';
157 is $g, 'ruby', '/r leaves explicit param alone';
158 is "aaa" =~ y\a\b\r, 'bbb', '/r with constant param';
159 ok !eval '$_ !~ y///r', "!~ y///r is forbidden";
160 like $@, qr\^Using !~ with tr///r doesn't make sense\,
161 "!~ y///r error message";
165 local $SIG{__WARN__} = sub { $w = shift; ++$wc };
168 like $w, qr '^Useless use of non-destructive transliteration \(tr///r\)',
169 '/r warns in void context';
170 is $wc, 1, '/r warns just once';
173 # perlbug [ID 20000511.005 (#3237)]
178 is($_, 'Fred', 'harmless if explicitly not updating');
181 # A variant of the above, added in 5.7.2
184 eval '$1 =~ tr/A-Z/A-Z/;';
186 is($_, 'Fred', 'harmless if implicitly not updating');
187 is($@, '', ' no error');
190 # check tr handles UTF8 correctly
191 ($x = 256.65.258) =~ tr/a/b/;
192 is($x, 256.65.258, 'handles UTF8');
197 if ($::IS_ASCII) { # ASCII
204 # EBCDIC variants of the above tests
205 ($x = 256.193.258) =~ tr/a/b/;
211 if ($::IS_ASCII) { # ASCII
220 my $l = chr(300); my $r = chr(400);
222 $x =~ tr/\x{12c}/\x{190}/;
224 'changing UTF8 chars in a UTF8 string, same length');
228 $x =~ tr/\x{12c}/\x{be8}/;
229 is($x, 200.3048.400, ' more bytes');
233 $x =~ tr/\x{64}/\x{190}/;
234 is($x, 400.125.60, 'Putting UT8 chars into a non-UTF8 string');
238 $x =~ tr/\x{190}/\x{64}/;
239 is($x, 100.125.60, 'Removing UTF8 chars from UTF8 string');
243 $y = $x =~ tr/\x{190}/\x{190}/;
244 is($y, 2, 'Counting UTF8 chars in UTF8 string');
246 $x = 60.400.125.60.400;
247 $y = $x =~ tr/\x{3c}/\x{3c}/;
248 is($y, 2, ' non-UTF8 chars in UTF8 string');
250 # 17 - counting UTF8 chars in non-UTF8 string
252 $y = $x =~ tr/\x{190}/\x{190}/;
253 is($y, 0, ' UTF8 chars in non-UTFs string');
256 $_ = "abcdefghijklmnopqrstuvwxyz";
258 like($@, qr/^Ambiguous range in transliteration operator/, 'tr/a-z-9//');
260 # 19-21: Make sure leading and trailing hyphens still work
263 is($_, '..r.rot9', 'hyphens, leading');
267 is($_, '..r.rot9', ' trailing');
271 is($_, '..r.rot9', ' both');
273 $_ = "abcdefghijklmnop";
275 is($_, '.bcd....ijklm.op');
277 $_ = "abcdefghijklmnop";
279 is($_, '...de......lm...');
281 $_ = "abcdefghijklmnop";
283 is($_, '...d.f...j.l...p');
288 like($@, qr/^Invalid range "m-d" in transliteration operator/,
289 'reversed range check');
292 is(eval '$1 =~ tr/abcd//', 3, 'explicit read-only count');
293 is($@, '', ' no error');
296 is(eval '$1 =~ tr/abcd/abcd/', 3, 'implicit read-only count');
297 is($@, '', ' no error');
299 is(eval '"123" =~ tr/12//', 2, 'LHS of non-updating tr');
301 eval '"123" =~ tr/1/2/';
302 like($@, qr|^Can't modify constant item in transliteration \(tr///\)|,
303 'LHS bad on updating tr');
306 # v300 (0x12c) is UTF-8-encoded as 196 172 (0xc4 0xac)
307 # v400 (0x190) is UTF-8-encoded as 198 144 (0xc6 0x90)
309 # Transliterate a byte to a byte, all four ways.
311 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/;
312 is($a, v300.197.172.300.197.172, 'byte2byte transliteration');
314 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{c5}/;
315 is($a, v300.197.172.300.197.172);
317 ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\xc5/;
318 is($a, v300.197.172.300.197.172);
320 ($a = v300.196.172.300.196.172) =~ tr/\x{c4}/\x{c5}/;
321 is($a, v300.197.172.300.197.172);
324 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/;
325 is($a, v300.301.172.300.301.172, 'byte2wide transliteration');
327 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc3/;
328 is($a, v195.196.172.195.196.172, ' wide2byte');
330 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/;
331 is($a, v301.196.172.301.196.172, ' wide2wide');
334 ($a = v300.196.172.300.196.172) =~ tr/\xc4\x{12c}/\x{12d}\xc3/;
335 is($a, v195.301.172.195.301.172, 'byte2wide & wide2byte');
338 ($a = v300.196.172.300.196.172.400.198.144) =~
339 tr/\xac\xc4\x{12c}\x{190}/\xad\x{12d}\xc5\x{191}/;
340 is($a, v197.301.173.197.301.173.401.198.144, 'all together now!');
343 is((($a = v300.196.172.300.196.172) =~ tr/\xc4/\xc5/), 2,
344 'transliterate and count');
346 is((($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\x{12d}/), 2);
349 ($a = v300.196.172.300.196.172) =~ tr/\xc4/\x{12d}/c;
350 is($a, v301.196.301.301.196.301, 'translit w/complement');
352 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}/\xc5/c;
353 is($a, v300.197.197.300.197.197);
356 ($a = v300.196.172.300.196.172) =~ tr/\xc4//d;
357 is($a, v300.172.300.172, 'translit w/deletion');
359 ($a = v300.196.172.300.196.172) =~ tr/\x{12c}//d;
360 is($a, v196.172.196.172);
363 ($a = v196.196.172.300.300.196.172) =~ tr/\xc4/\xc5/s;
364 is($a, v197.172.300.300.197.172, 'translit w/squeeze');
366 ($a = v196.172.300.300.196.172.172) =~ tr/\x{12c}/\x{12d}/s;
367 is($a, v196.172.301.196.172.172);
370 # Tricky cases (When Simon Cozens Attacks)
371 ($a = v196.172.200) =~ tr/\x{12c}/a/;
372 is(sprintf("%vd", $a), '196.172.200');
374 ($a = v196.172.200) =~ tr/\x{12c}/\x{12c}/;
375 is(sprintf("%vd", $a), '196.172.200');
377 ($a = v196.172.200) =~ tr/\x{12c}//d;
378 is(sprintf("%vd", $a), '196.172.200');
381 # UTF8 range tests from Inaba Hiroto
383 ($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
384 is($a, v192.196.172.194.197.172, 'UTF range');
386 ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
387 is($a, v300.300.172.302.301.172);
390 # UTF8 range tests from Karsten Sperling (patch #9008 required)
392 ($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
395 ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
398 ($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
401 ($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
405 # UTF8 range tests from Inaba Hiroto
407 ($a = "\x{200}") =~ tr/\x00-\x{100}/X/c;
410 ($a = "\x{200}") =~ tr/\x00-\x{100}/X/cs;
413 # Tricky on EBCDIC: while [a-z] [A-Z] must not match the gap characters (as
414 # well as i-j, r-s, I-J, R-S), [\x89-\x91] [\xc9-\xd1] has to match them,
415 # from Karsten Sperling.
417 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/;
421 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/;
426 skip "EBCDIC-centric tests", 4 unless $::IS_EBCDIC;
428 $c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/;
430 is($a, "X\x8a\x8b\x8c\x8d\x8f\x90X");
432 $c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/;
434 is($a, "X\xca\xcb\xcc\xcd\xcf\xd0X");
437 ($a = "\x{100}") =~ tr/\x00-\xff/X/c;
438 is(ord($a), ord("X"));
440 ($a = "\x{100}") =~ tr/\x00-\xff/X/cs;
441 is(ord($a), ord("X"));
443 ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//c;
444 is($a, "\x{100}\x{100}");
446 ($a = "\x{100}\x{100}") =~ tr/\x{101}-\x{200}//cs;
449 $a = "\xfe\xff"; $a =~ tr/\xfe\xff/\x{1ff}\x{1fe}/;
450 is($a, "\x{1ff}\x{1fe}");
454 ($a = "R0_001") =~ tr/R_//d;
458 @a = (1,2); map { y/1/./ for $_ } @a;
461 @a = (1,2); map { y/1/./ for $_.'' } @a;
465 # Additional test for Inaba Hiroto patch (robin@kitsite.com)
466 ($a = "\x{100}\x{102}\x{101}") =~ tr/\x00-\377/XYZ/c;
470 # Used to fail with "Modification of a read-only value attempted"
474 is($_, 'n', 'pp_trans needs to unshare shared hash keys');
475 is($@, '', ' no error');
479 $x = eval '"1213" =~ tr/1/1/';
480 is($x, 2, 'implicit count on constant');
481 is($@, '', ' no error');
485 eval '$foo[-1] =~ tr/N/N/';
486 is( $@, '', 'implicit count outside array bounds, index negative' );
487 is( scalar @foo, 0, " doesn't extend the array");
489 eval '$foo[1] =~ tr/N/N/';
490 is( $@, '', 'implicit count outside array bounds, index positive' );
491 is( scalar @foo, 0, " doesn't extend the array");
495 eval '$foo{bar} =~ tr/N/N/';
496 is( $@, '', 'implicit count outside hash bounds' );
497 is( scalar keys %foo, 0, " doesn't extend the hash");
500 is( $x =~ tr/A/A/, 2, 'non-modifying tr/// on a scalar ref' );
501 is( ref $x, 'SCALAR', " doesn't stringify its argument" );
503 # rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing
505 fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file');
508 { # [perl #38293] chr(65535) should be allowed in regexes
509 no warnings 'utf8'; # to allow non-characters
511 $s = "\x{d800}\x{ffff}";
513 is($s, "\x{d800}\x{ffff}", "do_trans_simple");
515 $s = "\x{d800}\x{ffff}";
517 is($i, 0, "do_trans_count");
519 $s = "\x{d800}\x{ffff}";
521 is($s, "\x{d800}\x{ffff}", "do_trans_complex, SQUASH");
523 $s = "\x{d800}\x{ffff}";
525 is($s, "AA", "do_trans_complex, COMPLEMENT");
528 $s =~ tr/\x{ffff}/\x{1ffff}/;
529 is($s, "A\x{1ffff}B", "utf8, SEARCHLIST");
531 $s = "\x{fffd}\x{fffe}\x{ffff}";
532 $s =~ tr/\x{fffd}-\x{ffff}/ABC/;
533 is($s, "ABC", "utf8, SEARCHLIST range");
536 $s =~ tr/ABC/\x{ffff}/;
537 is($s, "\x{ffff}"x3, "utf8, REPLACEMENTLIST");
540 $s =~ tr/ABC/\x{fffd}-\x{ffff}/;
541 is($s, "\x{fffd}\x{fffe}\x{ffff}", "utf8, REPLACEMENTLIST range");
543 $s = "A\x{ffff}B\x{100}\0\x{fffe}\x{ffff}";
544 $i = $s =~ tr/\x{ffff}//;
545 is($i, 2, "utf8, count");
547 $s = "A\x{ffff}\x{ffff}C";
548 $s =~ tr/\x{ffff}/\x{100}/s;
549 is($s, "A\x{100}C", "utf8, SQUASH");
551 $s = "A\x{ffff}\x{ffff}\x{fffe}\x{fffe}\x{fffe}C";
552 $s =~ tr/\x{fffe}\x{ffff}//s;
553 is($s, "A\x{ffff}\x{fffe}C", "utf8, SQUASH");
556 $s =~ tr/AB/\x{ffff}/s;
557 is($s, "x\x{ffff}y", "utf8, SQUASH");
560 $s =~ tr/AB/\x{fffe}\x{ffff}/s;
561 is($s, "x\x{fffe}\x{ffff}y", "utf8, SQUASH");
563 $s = "A\x{ffff}B\x{fffe}C";
564 $s =~ tr/\x{fffe}\x{ffff}/x/c;
565 is($s, "x\x{ffff}x\x{fffe}x", "utf8, COMPLEMENT");
567 $s = "A\x{10000}B\x{2abcd}C";
568 $s =~ tr/\0-\x{ffff}/x/c;
569 is($s, "AxBxC", "utf8, COMPLEMENT range");
571 $s = "A\x{fffe}B\x{ffff}C";
572 $s =~ tr/\x{fffe}\x{ffff}/x/d;
573 is($s, "AxBC", "utf8, DELETE");
575 } # non-characters end
577 { # related to [perl #27940]
580 ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d;
581 is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d");
583 ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d;
584 is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d");
587 ($s) = keys %{{pie => 3}};
589 if (!eval { require XS::APItest }) { skip "no XS::APItest", 2 }
590 my $wasro = XS::APItest::SvIsCOW($s);
591 ok $wasro, "have a COW";
593 ok( XS::APItest::SvIsCOW($s),
594 "count-only tr doesn't deCOW COWs" );
599 # under threads, unicode tr within a cloned closure would SEGV or assert
600 # fail, since the pointer in the pad to the swash was getting zeroed out
606 $x =~ tr[\x{142}][\x{143}];
608 is($x,"\x{143}", "utf8 + closure");
611 # Freeing of trans ops prior to pmtrans() [perl #102858].
612 eval q{ $a ~= tr/a/b/; };
615 no warnings "deprecated";
616 skip "no encoding", 1 unless eval { require encoding; 1 };
617 eval q{ use encoding "utf8"; $a ~= tr/a/b/; };
625 { no warnings 'utf8'; print "# $x\n"; } # No note() to avoid wide warning.
626 is($x, "Perlβ", "Only first of multiple transliterations is used");
629 # tr/a/b/ should fail even on zero-length read-only strings
630 use constant nullrocow => (keys%{{""=>undef}})[0];
631 for ("", nullrocow) {
632 eval { $_ =~ y/a/b/ };
633 like $@, qr/^Modification of a read-only value attempted at /,
634 'tr/a/b/ fails on zero-length ro string';
637 # Whether they're permitted or not, non-modifying tr/// should not write
638 # to read-only values, even with funky flags.
640 eval q{ ('a' =~ /./) =~ tr///d };
641 ok(1, "tr///d on PL_Yes does not assert");
642 eval q{ ('a' =~ /./) =~ tr/a-z/a-z/d };
643 ok(1, "tr/a-z/a-z/d on PL_Yes does not assert");
644 eval q{ ('a' =~ /./) =~ tr///s };
645 ok(1, "tr///s on PL_Yes does not assert");
646 eval q{ *x =~ tr///d };
647 ok(1, "tr///d on glob does not assert");
651 my $string = chr utf8::unicode_to_native(0x00e0);
652 $string =~ tr/\N{U+00e0}/A/;
653 is($string, "A", 'tr// of \N{U+...} works for upper-Latin1');
654 my $string = chr utf8::unicode_to_native(0x00e1);
655 $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/;
656 is($string, "A", 'tr// of \N{name} works for upper-Latin1');