Commit | Line | Data |
---|---|---|
d0ea2801 RGS |
1 | #!./perl -T |
2 | # tests whether tainting works with UTF-8 | |
3 | ||
4 | BEGIN { | |
d0ea2801 RGS |
5 | chdir 't' if -d 't'; |
6 | @INC = qw(../lib); | |
7 | } | |
8 | ||
9 | use strict; | |
10 | use Config; | |
11 | ||
d2a59f97 NC |
12 | # How to identify taint when you see it |
13 | sub any_tainted (@) { | |
14 | not eval { join("",@_), kill 0; 1 }; | |
15 | } | |
16 | sub tainted ($) { | |
17 | any_tainted @_; | |
d0ea2801 RGS |
18 | } |
19 | ||
d2a59f97 | 20 | require './test.pl'; |
d020f5c4 | 21 | plan(tests => 3*10 + 3*8 + 2*16 + 2); |
d0ea2801 RGS |
22 | |
23 | my $arg = $ENV{PATH}; # a tainted value | |
24 | use constant UTF8 => "\x{1234}"; | |
25 | ||
78ea37eb TS |
26 | sub is_utf8 { |
27 | my $s = shift; | |
f337b084 | 28 | return 0xB6 != unpack('C', chr(0xB6).$s); |
78ea37eb TS |
29 | } |
30 | ||
d0ea2801 RGS |
31 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { |
32 | my $encode = $ary->[0]; | |
33 | my $string = $ary->[1]; | |
34 | ||
35 | my $taint = $arg; substr($taint, 0) = $ary->[1]; | |
36 | ||
d2a59f97 | 37 | is(tainted($taint), tainted($arg), "tainted: $encode, before test"); |
d0ea2801 RGS |
38 | |
39 | my $lconcat = $taint; | |
40 | $lconcat .= UTF8; | |
d2a59f97 | 41 | is($lconcat, $string.UTF8, "compare: $encode, concat left"); |
d0ea2801 | 42 | |
d2a59f97 | 43 | is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left"); |
d0ea2801 RGS |
44 | |
45 | my $rconcat = UTF8; | |
46 | $rconcat .= $taint; | |
d2a59f97 | 47 | is($rconcat, UTF8.$string, "compare: $encode, concat right"); |
d0ea2801 | 48 | |
d2a59f97 | 49 | is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right"); |
d0ea2801 RGS |
50 | |
51 | my $ljoin = join('!', $taint, UTF8); | |
d2a59f97 | 52 | is($ljoin, join('!', $string, UTF8), "compare: $encode, join left"); |
d0ea2801 | 53 | |
d2a59f97 | 54 | is(tainted($ljoin), tainted($arg), "tainted: $encode, join left"); |
d0ea2801 RGS |
55 | |
56 | my $rjoin = join('!', UTF8, $taint); | |
d2a59f97 | 57 | is($rjoin, join('!', UTF8, $string), "compare: $encode, join right"); |
d0ea2801 | 58 | |
d2a59f97 | 59 | is(tainted($rjoin), tainted($arg), "tainted: $encode, join right"); |
d0ea2801 | 60 | |
d2a59f97 | 61 | is(tainted($taint), tainted($arg), "tainted: $encode, after test"); |
d0ea2801 | 62 | } |
78ea37eb TS |
63 | |
64 | ||
65 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { | |
66 | my $encode = $ary->[0]; | |
67 | ||
68 | my $utf8 = pack('U*') . $ary->[1]; | |
f337b084 | 69 | my $byte = unpack('U0a*', $utf8); |
78ea37eb TS |
70 | |
71 | my $taint = $arg; substr($taint, 0) = $utf8; | |
72 | utf8::encode($taint); | |
73 | ||
d2a59f97 | 74 | is($taint, $byte, "compare: $encode, encode utf8"); |
78ea37eb | 75 | |
d2a59f97 | 76 | is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8"); |
78ea37eb | 77 | |
d2a59f97 | 78 | ok(!is_utf8($taint), "is_utf8: $encode, encode utf8"); |
78ea37eb | 79 | |
d2a59f97 | 80 | is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8"); |
78ea37eb TS |
81 | |
82 | my $taint = $arg; substr($taint, 0) = $byte; | |
83 | utf8::decode($taint); | |
84 | ||
d2a59f97 | 85 | is($taint, $utf8, "compare: $encode, decode byte"); |
78ea37eb | 86 | |
d2a59f97 | 87 | is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte"); |
78ea37eb | 88 | |
d2a59f97 | 89 | is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte"); |
78ea37eb | 90 | |
d2a59f97 | 91 | is(tainted($taint), tainted($arg), "tainted: $encode, decode byte"); |
78ea37eb TS |
92 | } |
93 | ||
94 | ||
95 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { | |
96 | my $encode = $ary->[0]; | |
97 | ||
98 | my $up = pack('U*') . $ary->[1]; | |
f337b084 | 99 | my $down = pack("a*", $ary->[1]); |
78ea37eb TS |
100 | |
101 | my $taint = $arg; substr($taint, 0) = $up; | |
102 | utf8::upgrade($taint); | |
103 | ||
d2a59f97 | 104 | is($taint, $up, "compare: $encode, upgrade up"); |
78ea37eb | 105 | |
d2a59f97 | 106 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up"); |
78ea37eb | 107 | |
d2a59f97 | 108 | ok(is_utf8($taint), "is_utf8: $encode, upgrade up"); |
78ea37eb | 109 | |
d2a59f97 | 110 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up"); |
78ea37eb TS |
111 | |
112 | my $taint = $arg; substr($taint, 0) = $down; | |
113 | utf8::upgrade($taint); | |
114 | ||
d2a59f97 | 115 | is($taint, $up, "compare: $encode, upgrade down"); |
78ea37eb | 116 | |
d2a59f97 | 117 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down"); |
78ea37eb | 118 | |
d2a59f97 | 119 | ok(is_utf8($taint), "is_utf8: $encode, upgrade down"); |
78ea37eb | 120 | |
d2a59f97 | 121 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down"); |
78ea37eb TS |
122 | |
123 | my $taint = $arg; substr($taint, 0) = $up; | |
124 | utf8::downgrade($taint); | |
125 | ||
d2a59f97 | 126 | is($taint, $down, "compare: $encode, downgrade up"); |
78ea37eb | 127 | |
d2a59f97 | 128 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up"); |
78ea37eb | 129 | |
d2a59f97 | 130 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade up"); |
78ea37eb | 131 | |
d2a59f97 | 132 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up"); |
78ea37eb TS |
133 | |
134 | my $taint = $arg; substr($taint, 0) = $down; | |
135 | utf8::downgrade($taint); | |
136 | ||
d2a59f97 | 137 | is($taint, $down, "compare: $encode, downgrade down"); |
78ea37eb | 138 | |
d2a59f97 | 139 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down"); |
78ea37eb | 140 | |
d2a59f97 | 141 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade down"); |
78ea37eb | 142 | |
d2a59f97 | 143 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down"); |
78ea37eb TS |
144 | } |
145 | ||
5316d14d | 146 | { |
5316d14d DD |
147 | fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', |
148 | 'ok', {switches => ["-T", "-l"]}, | |
149 | "matching a regexp is taint agnostic"); | |
e7b79d50 NC |
150 | |
151 | fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,', | |
152 | 'ok', {switches => ["-T", "-l"]}, | |
153 | "therefore swash_init should be taint agnostic"); | |
154 | } |