# tr.t
$|=1;
-use utf8;
-
BEGIN {
chdir 't' if -d 't';
require './test.pl';
set_up_inc('../lib');
+ if (is_miniperl()) {
+ eval 'require utf8';
+ if ($@) { skip_all("miniperl, no 'utf8'") }
+ }
}
-plan tests => 164;
+use utf8;
+
+plan tests => 296;
# Test this first before we extend the stack with other operations.
# This caused an asan failure due to a bad write past the end of the stack.
tr/b-y/B-Y/;
is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
+tr/a-a/AB/;
+is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a');
+
eval 'tr/a/\N{KATAKANA LETTER AINU P}/;';
like $@,
- qr/\\N\{KATAKANA LETTER AINU P} must not be a named sequence in transliteration operator/,
+ qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/,
"Illegal to tr/// named sequence";
eval 'tr/\x{101}-\x{100}//;';
like $@,
- qr/Invalid range "\\x\{0101}-\\x\{0100}" in transliteration operator/,
+ qr/Invalid range "\\x\{0101\}-\\x\{0100\}" in transliteration operator/,
"UTF-8 range with min > max";
+
+# Test /c and variants, with all the search and replace chars being
+# non-utf8, but with both non-utf8 and utf8 strings.
+
+{
+ my $all255 = join '', map chr, 0..0xff;
+ my $all255_twice = join '', map chr, map { ($_, $_) } 0..0xff;
+ my $all255_plus = join '', map chr, 0..0x11f;
+ my $all255_twice_plus = join '', map chr, map { ($_, $_) } 0..0x11f;
+ my ($c, $s);
+
+
+ # length(replacement) == 0
+ # non-utf8 string
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf//c;
+ is $s, $all255, "/c ==0";
+ is $c, 0x80, "/c ==0 count";
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf//cd;
+ is $s, join('', map chr, 0x40..0xbf), "/cd ==0";
+ is $c, 0x80, "/cd ==0 count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf//cs;
+ is $s, join('', map chr,
+ 0x00..0x3f,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0xc0..0xff,
+ ),
+ "/cs ==0";
+ is $c, 0x100, "/cs ==0 count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf//csd;
+ is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0";
+ is $c, 0x100, "/csd ==0 count";
+
+
+ # length(search) > length(replacement)
+ # non-utf8 string
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x2f,
+ ((0x2f) x 16),
+ ),
+ "/c >";
+ is $c, 0x80, "/c > count";
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f),
+ "/cd >";
+ is $c, 0x80, "/cd > count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ ),
+ "/cs >";
+ is $c, 0x100, "/cs > count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ ),
+ "/csd >";
+ is $c, 0x100, "/csd > count";
+
+
+ # length(search) == length(replacement)
+ # non-utf8 string
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/c ==";
+ is $c, 0x80, "/c == count";
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd ==";
+ is $c, 0x80, "/cd == count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x3f,
+ ),
+ "/cs ==";
+ is $c, 0x100, "/cs == count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x3f,
+ ),
+ "/csd ==";
+ is $c, 0x100, "/csd == count";
+
+ # length(search) == length(replacement) - 1
+ # non-utf8 string
+
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/c;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
+ "/c =-";
+ is $c, 0x70, "/c =- count";
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
+ "/cd =-";
+ is $c, 0x70, "/cd =- count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ ),
+ "/cs =-";
+ is $c, 0xe0, "/cs =- count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x30/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ ),
+ "/csd =-";
+ is $c, 0xe0, "/csd =- count";
+
+ # length(search) < length(replacement)
+ # non-utf8 string
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
+ "/c <";
+ is $c, 0x70, "/c < count";
+
+ $s = $all255;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f, 0xf0..0xff),
+ "/cd <";
+ is $c, 0x70, "/cd < count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ ),
+ "/cs <";
+ is $c, 0xe0, "/cs < count";
+
+ $s = $all255_twice;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ ),
+ "/csd <";
+ is $c, 0xe0, "/csd < count";
+
+
+ # length(replacement) == 0
+ # with some >= 0x100 utf8 chars in the string to be modified
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf//c;
+ is $s, $all255_plus, "/c ==0U";
+ is $c, 0xa0, "/c ==0U count";
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf//cd;
+ is $s, join('', map chr, 0x40..0xbf), "/cd ==0U";
+ is $c, 0xa0, "/cd ==0U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf//cs;
+ is $s, join('', map chr,
+ 0x00..0x3f,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0xc0..0x11f,
+ ),
+ "/cs ==0U";
+ is $c, 0x140, "/cs ==0U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf//csd;
+ is $s, join('', map chr, (map { ($_, $_) } 0x40..0xbf)), "/csd ==0U";
+ is $c, 0x140, "/csd ==0U count";
+
+ # length(search) > length(replacement)
+ # with some >= 0x100 utf8 chars in the string to be modified
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/c;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x2f,
+ ((0x2f) x 48),
+ ),
+ "/c >U";
+ is $c, 0xa0, "/c >U count";
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x2f),
+ "/cd >U";
+ is $c, 0xa0, "/cd >U count";
+
+ $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ 0xbf,
+ 0x2f,
+ ),
+ "/cs >U";
+ is $c, 0x144, "/cs >U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x2f/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ ),
+ "/csd >U";
+ is $c, 0x140, "/csd >U count";
+
+ # length(search) == length(replacement)
+ # with some >= 0x100 utf8 chars in the string to be modified
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/c;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x3f,
+ ((0x3f) x 32),
+ ),
+ "/c ==U";
+ is $c, 0xa0, "/c ==U count";
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x3f), "/cd ==U";
+ is $c, 0xa0, "/cd ==U count";
+
+ $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x3f,
+ 0xbf,
+ 0x3f,
+ ),
+ "/cs ==U";
+ is $c, 0x144, "/cs ==U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x3f/csd;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x3f,
+ ),
+ "/csd ==U";
+ is $c, 0x140, "/csd ==U count";
+
+
+ # length(search) == length(replacement) - 1
+ # with some >= 0x100 utf8 chars in the string to be modified
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/c;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x40,
+ ((0x40) x 31),
+ ),
+ "/c =-U";
+ is $c, 0xa0, "/c =-U count";
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cd;
+ is $s, join('', map chr, 0x80..0xbf, 0x40..0xbf, 0x00..0x40), "/cd =-U";
+ is $c, 0xa0, "/cd =-U count";
+
+ $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x40,
+ 0xbf,
+ 0x40,
+ ),
+ "/cs =-U";
+ is $c, 0x144, "/cs =-U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf/\x80-\xbf\x00-\x40/csd;
+ {
+ local $TODO = "missing last 0x40";
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x40,
+ ),
+ "/csd =-U";
+ }
+ is $c, 0x140, "/csd =-U count";
+
+
+
+ # length(search) < length(replacement),
+ # with some >= 0x100 utf8 chars in the string to be modified
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/c;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x2f,
+ 0xf0..0xff,
+ 0x30..0x3f,
+ ((0x3f)x 16),
+ ),
+ "/c <U";
+ is $c, 0x90, "/c <U count";
+
+ $s = $all255_plus;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cd;
+ {
+ local $TODO = "missing 30-3f";
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ 0x40..0xbf,
+ 0x00..0x2f,
+ 0xf0..0xff,
+ 0x30..0x3f,
+ ),
+ "/cd <U";
+ }
+ is $c, 0x90, "/cd <U count";
+
+ $s = $all255_twice_plus . "\x3f\x3f\x{200}\x{300}";
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/cs;
+ is $s, join('', map chr,
+ 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ 0x30..0x3f,
+ 0xbf,
+ 0x3f,
+ ),
+ "/cs <U";
+ is $c, 0x124, "/cs <U count";
+
+ $s = $all255_twice_plus;
+ $c = $s =~ tr/\x40-\xbf\xf0-\xff/\x80-\xbf\x00-\x3f/csd;
+ {
+ local $TODO = "missing 30-3f";
+ is $s, join('', map chr, 0x80..0xbf,
+ (map { ($_, $_) } 0x40..0xbf),
+ 0x00..0x2f,
+ (map { ($_, $_) } 0xf0..0xff),
+ 0x30..0x3f,
+ ),
+ "/csd <U";
+ }
+ is $c, 0x120, "/csd <U count";
+}
+
+
SKIP: { # Test literal range end point special handling
unless ($::IS_EBCDIC) {
skip "Valid only for EBCDIC", 24;
is $wc, 1, '/r warns just once';
}
-# perlbug [ID 20000511.005]
+# perlbug [ID 20000511.005 (#3237)]
$_ = 'fred';
/([a-z]{2})/;
$1 =~ tr/A-Z//;
# rt.perl.org 36622. Perl didn't like a y/// at end of file. No trailing
# newline allowed.
-fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '');
+fresh_perl_is(q[$_ = "foo"; y/A-Z/a-z/], '', {}, 'RT #36622 y/// at end of file');
{ # [perl #38293] chr(65535) should be allowed in regexes
ok(1, "tr///d on glob does not assert");
}
+{ # [perl #128734
+ my $string = chr utf8::unicode_to_native(0x00e0);
+ $string =~ tr/\N{U+00e0}/A/;
+ is($string, "A", 'tr// of \N{U+...} works for upper-Latin1');
+ my $string = chr utf8::unicode_to_native(0x00e1);
+ $string =~ tr/\N{LATIN SMALL LETTER A WITH ACUTE}/A/;
+ is($string, "A", 'tr// of \N{name} works for upper-Latin1');
+}
+
+# RT #130198
+# a tr/// that is cho(m)ped, possibly with an array as arg
+
+{
+ use warnings;
+
+ my ($s, @a);
+
+ my $warn;
+ local $SIG{__WARN__ } = sub { $warn .= "@_" };
+
+ for my $c (qw(chop chomp)) {
+ for my $bind ('', '$s =~ ', '@a =~ ') {
+ for my $arg2 (qw(a b)) {
+ for my $r ('', 'r') {
+ $warn = '';
+ # tr/a/b/ modifies its LHS, so if the LHS is an
+ # array, this should die. The special cases of tr/a/a/
+ # and tr/a/b/r don't modify their LHS, so instead
+ # we croak because cho(m)p is trying to modify it.
+ #
+ my $exp =
+ ($r eq '' && $arg2 eq 'b' && $bind =~ /\@a/)
+ ? qr/Can't modify private array in transliteration/
+ : qr{Can't modify transliteration \(tr///\) in $c};
+
+ my $expr = "$c(${bind}tr/a/$arg2/$r);";
+ eval $expr;
+ like $@, $exp, "RT #130198 eval: $expr";
+
+ $exp =
+ $bind =~ /\@a/
+ ? qr{^Applying transliteration \(tr///\) to \@a will act on scalar\(\@a\)}
+ : qr/^$/;
+ like $warn, $exp, "RT #130198 warn: $expr";
+ }
+ }
+ }
+ }
+
+
+}
+
+{ # [perl #130656] This bug happens when the tr is split across lines, so
+ # that the first line causes it to go into UTF-8, and the 2nd is only
+ # things like \x
+ my $x = "\x{E235}";
+ $x =~ tr
+ [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}]
+ [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}];
+
+ is $x, "\x{E5CE}", '[perl #130656]';
+}
+
1;