This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Gisle noted an unused variable
[perl5.git] / t / op / utftaint.t
CommitLineData
d0ea2801
RGS
1#!./perl -T
2# tests whether tainting works with UTF-8
3
4BEGIN {
5 if ($ENV{PERL_CORE_MINITEST}) {
6 print "1..0 # Skip: no dynamic loading on miniperl, no threads\n";
7 exit 0;
8 }
9 chdir 't' if -d 't';
10 @INC = qw(../lib);
11}
12
13use strict;
14use Config;
15
16BEGIN {
17 if ($Config{extensions} !~ m(\bList/Util\b)) {
18 print "1..0 # Skip: no Scalar::Util module\n";
19 exit 0;
20 }
21}
22
23use Scalar::Util qw(tainted);
24
25use Test;
78ea37eb 26plan tests => 3*10 + 3*8 + 2*16;
d0ea2801
RGS
27my $cnt = 0;
28
29my $arg = $ENV{PATH}; # a tainted value
30use constant UTF8 => "\x{1234}";
31
78ea37eb
TS
32sub is_utf8 {
33 my $s = shift;
f337b084 34 return 0xB6 != unpack('C', chr(0xB6).$s);
78ea37eb
TS
35}
36
d0ea2801
RGS
37for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
38 my $encode = $ary->[0];
39 my $string = $ary->[1];
40
41 my $taint = $arg; substr($taint, 0) = $ary->[1];
42
43 print tainted($taint) == tainted($arg)
44 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, before test\n";
45
46 my $lconcat = $taint;
47 $lconcat .= UTF8;
78ea37eb 48 print $lconcat eq $string.UTF8
d0ea2801
RGS
49 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat left\n";
50
51 print tainted($lconcat) == tainted($arg)
52 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat left\n";
53
54 my $rconcat = UTF8;
55 $rconcat .= $taint;
78ea37eb 56 print $rconcat eq UTF8.$string
d0ea2801
RGS
57 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, concat right\n";
58
59 print tainted($rconcat) == tainted($arg)
60 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, concat right\n";
61
62 my $ljoin = join('!', $taint, UTF8);
63 print $ljoin eq join('!', $string, UTF8)
64 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join left\n";
65
66 print tainted($ljoin) == tainted($arg)
67 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join left\n";
68
69 my $rjoin = join('!', UTF8, $taint);
70 print $rjoin eq join('!', UTF8, $string)
71 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, join right\n";
72
73 print tainted($rjoin) == tainted($arg)
74 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, join right\n";
75
76 print tainted($taint) == tainted($arg)
77 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, after test\n";
78}
78ea37eb
TS
79
80
81for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
82 my $encode = $ary->[0];
83
84 my $utf8 = pack('U*') . $ary->[1];
f337b084 85 my $byte = unpack('U0a*', $utf8);
78ea37eb
TS
86
87 my $taint = $arg; substr($taint, 0) = $utf8;
88 utf8::encode($taint);
89
90 print $taint eq $byte
91 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, encode utf8\n";
92
93 print pack('a*',$taint) eq pack('a*',$byte)
94 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, encode utf8\n";
95
96 print !is_utf8($taint)
97 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, encode utf8\n";
98
99 print tainted($taint) == tainted($arg)
100 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, encode utf8\n";
101
102 my $taint = $arg; substr($taint, 0) = $byte;
103 utf8::decode($taint);
104
105 print $taint eq $utf8
106 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, decode byte\n";
107
108 print pack('a*',$taint) eq pack('a*',$utf8)
109 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, decode byte\n";
110
111 print is_utf8($taint) eq ($encode ne 'ascii')
112 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, decode byte\n";
113
114 print tainted($taint) == tainted($arg)
115 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, decode byte\n";
116}
117
118
119for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {
120 my $encode = $ary->[0];
121
122 my $up = pack('U*') . $ary->[1];
f337b084 123 my $down = pack("a*", $ary->[1]);
78ea37eb
TS
124
125 my $taint = $arg; substr($taint, 0) = $up;
126 utf8::upgrade($taint);
127
128 print $taint eq $up
129 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade up\n";
130
131 print pack('a*',$taint) eq pack('a*',$up)
132 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade up\n";
133
134 print is_utf8($taint)
135 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade up\n";
136
137 print tainted($taint) == tainted($arg)
138 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade up\n";
139
140 my $taint = $arg; substr($taint, 0) = $down;
141 utf8::upgrade($taint);
142
143 print $taint eq $up
144 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, upgrade down\n";
145
146 print pack('a*',$taint) eq pack('a*',$up)
147 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, upgrade down\n";
148
149 print is_utf8($taint)
150 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, upgrade down\n";
151
152 print tainted($taint) == tainted($arg)
153 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, upgrade down\n";
154
155 my $taint = $arg; substr($taint, 0) = $up;
156 utf8::downgrade($taint);
157
158 print $taint eq $down
159 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade up\n";
160
161 print pack('a*',$taint) eq pack('a*',$down)
162 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade up\n";
163
164 print !is_utf8($taint)
165 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade up\n";
166
167 print tainted($taint) == tainted($arg)
168 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade up\n";
169
170 my $taint = $arg; substr($taint, 0) = $down;
171 utf8::downgrade($taint);
172
173 print $taint eq $down
174 ? "ok " : "not ok ", ++$cnt, " # compare: $encode, downgrade down\n";
175
176 print pack('a*',$taint) eq pack('a*',$down)
177 ? "ok " : "not ok ", ++$cnt, " # bytecmp: $encode, downgrade down\n";
178
179 print !is_utf8($taint)
180 ? "ok " : "not ok ", ++$cnt, " # is_utf8: $encode, downgrade down\n";
181
182 print tainted($taint) == tainted($arg)
183 ? "ok " : "not ok ", ++$cnt, " # tainted: $encode, downgrade down\n";
184}
185
186