Commit | Line | Data |
---|---|---|
d0ea2801 RGS |
1 | #!./perl -T |
2 | # tests whether tainting works with UTF-8 | |
3 | ||
4 | BEGIN { | |
d0ea2801 RGS |
5 | chdir 't' if -d 't'; |
6 | @INC = qw(../lib); | |
7 | } | |
8 | ||
9 | use strict; | |
10 | use Config; | |
11 | ||
d2a59f97 NC |
12 | # How to identify taint when you see it |
13 | sub any_tainted (@) { | |
14 | not eval { join("",@_), kill 0; 1 }; | |
15 | } | |
16 | sub tainted ($) { | |
17 | any_tainted @_; | |
d0ea2801 RGS |
18 | } |
19 | ||
d2a59f97 | 20 | require './test.pl'; |
cda67c99 | 21 | plan(tests => 3*10 + 3*8 + 2*16 + 3); |
d0ea2801 RGS |
22 | |
23 | my $arg = $ENV{PATH}; # a tainted value | |
24 | use constant UTF8 => "\x{1234}"; | |
25 | ||
1651fc44 | 26 | *is_utf8 = \&utf8::is_utf8; |
78ea37eb | 27 | |
d0ea2801 RGS |
28 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { |
29 | my $encode = $ary->[0]; | |
30 | my $string = $ary->[1]; | |
31 | ||
32 | my $taint = $arg; substr($taint, 0) = $ary->[1]; | |
33 | ||
d2a59f97 | 34 | is(tainted($taint), tainted($arg), "tainted: $encode, before test"); |
d0ea2801 RGS |
35 | |
36 | my $lconcat = $taint; | |
37 | $lconcat .= UTF8; | |
d2a59f97 | 38 | is($lconcat, $string.UTF8, "compare: $encode, concat left"); |
d0ea2801 | 39 | |
d2a59f97 | 40 | is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left"); |
d0ea2801 RGS |
41 | |
42 | my $rconcat = UTF8; | |
43 | $rconcat .= $taint; | |
d2a59f97 | 44 | is($rconcat, UTF8.$string, "compare: $encode, concat right"); |
d0ea2801 | 45 | |
d2a59f97 | 46 | is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right"); |
d0ea2801 RGS |
47 | |
48 | my $ljoin = join('!', $taint, UTF8); | |
d2a59f97 | 49 | is($ljoin, join('!', $string, UTF8), "compare: $encode, join left"); |
d0ea2801 | 50 | |
d2a59f97 | 51 | is(tainted($ljoin), tainted($arg), "tainted: $encode, join left"); |
d0ea2801 RGS |
52 | |
53 | my $rjoin = join('!', UTF8, $taint); | |
d2a59f97 | 54 | is($rjoin, join('!', UTF8, $string), "compare: $encode, join right"); |
d0ea2801 | 55 | |
d2a59f97 | 56 | is(tainted($rjoin), tainted($arg), "tainted: $encode, join right"); |
d0ea2801 | 57 | |
d2a59f97 | 58 | is(tainted($taint), tainted($arg), "tainted: $encode, after test"); |
d0ea2801 | 59 | } |
78ea37eb TS |
60 | |
61 | ||
62 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { | |
63 | my $encode = $ary->[0]; | |
64 | ||
65 | my $utf8 = pack('U*') . $ary->[1]; | |
f337b084 | 66 | my $byte = unpack('U0a*', $utf8); |
78ea37eb TS |
67 | |
68 | my $taint = $arg; substr($taint, 0) = $utf8; | |
69 | utf8::encode($taint); | |
70 | ||
d2a59f97 | 71 | is($taint, $byte, "compare: $encode, encode utf8"); |
78ea37eb | 72 | |
d2a59f97 | 73 | is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8"); |
78ea37eb | 74 | |
d2a59f97 | 75 | ok(!is_utf8($taint), "is_utf8: $encode, encode utf8"); |
78ea37eb | 76 | |
d2a59f97 | 77 | is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8"); |
78ea37eb TS |
78 | |
79 | my $taint = $arg; substr($taint, 0) = $byte; | |
80 | utf8::decode($taint); | |
81 | ||
d2a59f97 | 82 | is($taint, $utf8, "compare: $encode, decode byte"); |
78ea37eb | 83 | |
d2a59f97 | 84 | is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte"); |
78ea37eb | 85 | |
d2a59f97 | 86 | is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte"); |
78ea37eb | 87 | |
d2a59f97 | 88 | is(tainted($taint), tainted($arg), "tainted: $encode, decode byte"); |
78ea37eb TS |
89 | } |
90 | ||
91 | ||
92 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { | |
93 | my $encode = $ary->[0]; | |
94 | ||
95 | my $up = pack('U*') . $ary->[1]; | |
f337b084 | 96 | my $down = pack("a*", $ary->[1]); |
78ea37eb TS |
97 | |
98 | my $taint = $arg; substr($taint, 0) = $up; | |
99 | utf8::upgrade($taint); | |
100 | ||
d2a59f97 | 101 | is($taint, $up, "compare: $encode, upgrade up"); |
78ea37eb | 102 | |
d2a59f97 | 103 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up"); |
78ea37eb | 104 | |
d2a59f97 | 105 | ok(is_utf8($taint), "is_utf8: $encode, upgrade up"); |
78ea37eb | 106 | |
d2a59f97 | 107 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up"); |
78ea37eb TS |
108 | |
109 | my $taint = $arg; substr($taint, 0) = $down; | |
110 | utf8::upgrade($taint); | |
111 | ||
d2a59f97 | 112 | is($taint, $up, "compare: $encode, upgrade down"); |
78ea37eb | 113 | |
d2a59f97 | 114 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down"); |
78ea37eb | 115 | |
d2a59f97 | 116 | ok(is_utf8($taint), "is_utf8: $encode, upgrade down"); |
78ea37eb | 117 | |
d2a59f97 | 118 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down"); |
78ea37eb TS |
119 | |
120 | my $taint = $arg; substr($taint, 0) = $up; | |
121 | utf8::downgrade($taint); | |
122 | ||
d2a59f97 | 123 | is($taint, $down, "compare: $encode, downgrade up"); |
78ea37eb | 124 | |
d2a59f97 | 125 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up"); |
78ea37eb | 126 | |
d2a59f97 | 127 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade up"); |
78ea37eb | 128 | |
d2a59f97 | 129 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up"); |
78ea37eb TS |
130 | |
131 | my $taint = $arg; substr($taint, 0) = $down; | |
132 | utf8::downgrade($taint); | |
133 | ||
d2a59f97 | 134 | is($taint, $down, "compare: $encode, downgrade down"); |
78ea37eb | 135 | |
d2a59f97 | 136 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down"); |
78ea37eb | 137 | |
d2a59f97 | 138 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade down"); |
78ea37eb | 139 | |
d2a59f97 | 140 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down"); |
78ea37eb TS |
141 | } |
142 | ||
5316d14d | 143 | { |
5316d14d DD |
144 | fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', |
145 | 'ok', {switches => ["-T", "-l"]}, | |
146 | "matching a regexp is taint agnostic"); | |
e7b79d50 NC |
147 | |
148 | fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,', | |
149 | 'ok', {switches => ["-T", "-l"]}, | |
150 | "therefore swash_init should be taint agnostic"); | |
151 | } | |
cda67c99 DM |
152 | |
153 | { | |
154 | # RT #122148: s///e on tainted utf8 strings got pos() messed up in 5.20 | |
155 | ||
156 | my @p; | |
157 | my $s = "\x{100}\x{100}\x{100}\x{100}". $^X; | |
158 | $s =~ s/\x{100}/push @p, pos($s); "xxxx";/eg; | |
159 | is("@p", "0 1 2 3", "RT #122148"); | |
160 | } |