This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Compress::Zlib, pack "C" and utf-8 [PATCH]
[perl5.git] / t / op / utftaint.t
CommitLineData
d0ea2801
RGS
1#!./perl -T
2# tests whether tainting works with UTF-8
3
4BEGIN {
d0ea2801
RGS
5 chdir 't' if -d 't';
6 @INC = qw(../lib);
7}
8
9use strict;
10use Config;
11
d2a59f97
NC
12# How to identify taint when you see it
13sub any_tainted (@) {
14 not eval { join("",@_), kill 0; 1 };
15}
16sub tainted ($) {
17 any_tainted @_;
d0ea2801
RGS
18}
19
d2a59f97 20require './test.pl';
d020f5c4 21plan(tests => 3*10 + 3*8 + 2*16 + 2);
d0ea2801
RGS
22
23my $arg = $ENV{PATH}; # a tainted value
24use constant UTF8 => "\x{1234}";
25
1651fc44 26*is_utf8 = \&utf8::is_utf8;
78ea37eb 27
d0ea2801
RGS
28for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
29 my $encode = $ary->[0];
30 my $string = $ary->[1];
31
32 my $taint = $arg; substr($taint, 0) = $ary->[1];
33
d2a59f97 34 is(tainted($taint), tainted($arg), "tainted: $encode, before test");
d0ea2801
RGS
35
36 my $lconcat = $taint;
37 $lconcat .= UTF8;
d2a59f97 38 is($lconcat, $string.UTF8, "compare: $encode, concat left");
d0ea2801 39
d2a59f97 40 is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left");
d0ea2801
RGS
41
42 my $rconcat = UTF8;
43 $rconcat .= $taint;
d2a59f97 44 is($rconcat, UTF8.$string, "compare: $encode, concat right");
d0ea2801 45
d2a59f97 46 is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right");
d0ea2801
RGS
47
48 my $ljoin = join('!', $taint, UTF8);
d2a59f97 49 is($ljoin, join('!', $string, UTF8), "compare: $encode, join left");
d0ea2801 50
d2a59f97 51 is(tainted($ljoin), tainted($arg), "tainted: $encode, join left");
d0ea2801
RGS
52
53 my $rjoin = join('!', UTF8, $taint);
d2a59f97 54 is($rjoin, join('!', UTF8, $string), "compare: $encode, join right");
d0ea2801 55
d2a59f97 56 is(tainted($rjoin), tainted($arg), "tainted: $encode, join right");
d0ea2801 57
d2a59f97 58 is(tainted($taint), tainted($arg), "tainted: $encode, after test");
d0ea2801 59}
78ea37eb
ST
60
61
62for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
63 my $encode = $ary->[0];
64
65 my $utf8 = pack('U*') . $ary->[1];
f337b084 66 my $byte = unpack('U0a*', $utf8);
78ea37eb
ST
67
68 my $taint = $arg; substr($taint, 0) = $utf8;
69 utf8::encode($taint);
70
d2a59f97 71 is($taint, $byte, "compare: $encode, encode utf8");
78ea37eb 72
d2a59f97 73 is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8");
78ea37eb 74
d2a59f97 75 ok(!is_utf8($taint), "is_utf8: $encode, encode utf8");
78ea37eb 76
d2a59f97 77 is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8");
78ea37eb
ST
78
79 my $taint = $arg; substr($taint, 0) = $byte;
80 utf8::decode($taint);
81
d2a59f97 82 is($taint, $utf8, "compare: $encode, decode byte");
78ea37eb 83
d2a59f97 84 is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte");
78ea37eb 85
d2a59f97 86 is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte");
78ea37eb 87
d2a59f97 88 is(tainted($taint), tainted($arg), "tainted: $encode, decode byte");
78ea37eb
ST
89}
90
91
92for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {
93 my $encode = $ary->[0];
94
95 my $up = pack('U*') . $ary->[1];
f337b084 96 my $down = pack("a*", $ary->[1]);
78ea37eb
ST
97
98 my $taint = $arg; substr($taint, 0) = $up;
99 utf8::upgrade($taint);
100
d2a59f97 101 is($taint, $up, "compare: $encode, upgrade up");
78ea37eb 102
d2a59f97 103 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up");
78ea37eb 104
d2a59f97 105 ok(is_utf8($taint), "is_utf8: $encode, upgrade up");
78ea37eb 106
d2a59f97 107 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up");
78ea37eb
ST
108
109 my $taint = $arg; substr($taint, 0) = $down;
110 utf8::upgrade($taint);
111
d2a59f97 112 is($taint, $up, "compare: $encode, upgrade down");
78ea37eb 113
d2a59f97 114 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down");
78ea37eb 115
d2a59f97 116 ok(is_utf8($taint), "is_utf8: $encode, upgrade down");
78ea37eb 117
d2a59f97 118 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down");
78ea37eb
ST
119
120 my $taint = $arg; substr($taint, 0) = $up;
121 utf8::downgrade($taint);
122
d2a59f97 123 is($taint, $down, "compare: $encode, downgrade up");
78ea37eb 124
d2a59f97 125 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up");
78ea37eb 126
d2a59f97 127 ok(!is_utf8($taint), "is_utf8: $encode, downgrade up");
78ea37eb 128
d2a59f97 129 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up");
78ea37eb
ST
130
131 my $taint = $arg; substr($taint, 0) = $down;
132 utf8::downgrade($taint);
133
d2a59f97 134 is($taint, $down, "compare: $encode, downgrade down");
78ea37eb 135
d2a59f97 136 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down");
78ea37eb 137
d2a59f97 138 ok(!is_utf8($taint), "is_utf8: $encode, downgrade down");
78ea37eb 139
d2a59f97 140 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down");
78ea37eb
ST
141}
142
5316d14d 143{
5316d14d
DD
144 fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,',
145 'ok', {switches => ["-T", "-l"]},
146 "matching a regexp is taint agnostic");
e7b79d50
NC
147
148 fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,',
149 'ok', {switches => ["-T", "-l"]},
150 "therefore swash_init should be taint agnostic");
151}