This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
RE: [perl #26136] localtime(3) calls tzset(3), but localtime_r(3) may not.
[perl5.git] / t / op / utftaint.t
CommitLineData
d0ea2801
RGS
1#!./perl -T
2# tests whether tainting works with UTF-8
3
4BEGIN {
d0ea2801
RGS
5 chdir 't' if -d 't';
6 @INC = qw(../lib);
7}
8
9use strict;
10use Config;
11
d2a59f97
NC
12# How to identify taint when you see it
13sub any_tainted (@) {
14 not eval { join("",@_), kill 0; 1 };
15}
16sub tainted ($) {
17 any_tainted @_;
d0ea2801
RGS
18}
19
d2a59f97 20require './test.pl';
d020f5c4 21plan(tests => 3*10 + 3*8 + 2*16 + 2);
d0ea2801
RGS
22
23my $arg = $ENV{PATH}; # a tainted value
24use constant UTF8 => "\x{1234}";
25
78ea37eb
TS
26sub is_utf8 {
27 my $s = shift;
f337b084 28 return 0xB6 != unpack('C', chr(0xB6).$s);
78ea37eb
TS
29}
30
d0ea2801
RGS
31for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
32 my $encode = $ary->[0];
33 my $string = $ary->[1];
34
35 my $taint = $arg; substr($taint, 0) = $ary->[1];
36
d2a59f97 37 is(tainted($taint), tainted($arg), "tainted: $encode, before test");
d0ea2801
RGS
38
39 my $lconcat = $taint;
40 $lconcat .= UTF8;
d2a59f97 41 is($lconcat, $string.UTF8, "compare: $encode, concat left");
d0ea2801 42
d2a59f97 43 is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left");
d0ea2801
RGS
44
45 my $rconcat = UTF8;
46 $rconcat .= $taint;
d2a59f97 47 is($rconcat, UTF8.$string, "compare: $encode, concat right");
d0ea2801 48
d2a59f97 49 is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right");
d0ea2801
RGS
50
51 my $ljoin = join('!', $taint, UTF8);
d2a59f97 52 is($ljoin, join('!', $string, UTF8), "compare: $encode, join left");
d0ea2801 53
d2a59f97 54 is(tainted($ljoin), tainted($arg), "tainted: $encode, join left");
d0ea2801
RGS
55
56 my $rjoin = join('!', UTF8, $taint);
d2a59f97 57 is($rjoin, join('!', UTF8, $string), "compare: $encode, join right");
d0ea2801 58
d2a59f97 59 is(tainted($rjoin), tainted($arg), "tainted: $encode, join right");
d0ea2801 60
d2a59f97 61 is(tainted($taint), tainted($arg), "tainted: $encode, after test");
d0ea2801 62}
78ea37eb
TS
63
64
65for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
66 my $encode = $ary->[0];
67
68 my $utf8 = pack('U*') . $ary->[1];
f337b084 69 my $byte = unpack('U0a*', $utf8);
78ea37eb
TS
70
71 my $taint = $arg; substr($taint, 0) = $utf8;
72 utf8::encode($taint);
73
d2a59f97 74 is($taint, $byte, "compare: $encode, encode utf8");
78ea37eb 75
d2a59f97 76 is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8");
78ea37eb 77
d2a59f97 78 ok(!is_utf8($taint), "is_utf8: $encode, encode utf8");
78ea37eb 79
d2a59f97 80 is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8");
78ea37eb
TS
81
82 my $taint = $arg; substr($taint, 0) = $byte;
83 utf8::decode($taint);
84
d2a59f97 85 is($taint, $utf8, "compare: $encode, decode byte");
78ea37eb 86
d2a59f97 87 is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte");
78ea37eb 88
d2a59f97 89 is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte");
78ea37eb 90
d2a59f97 91 is(tainted($taint), tainted($arg), "tainted: $encode, decode byte");
78ea37eb
TS
92}
93
94
95for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {
96 my $encode = $ary->[0];
97
98 my $up = pack('U*') . $ary->[1];
f337b084 99 my $down = pack("a*", $ary->[1]);
78ea37eb
TS
100
101 my $taint = $arg; substr($taint, 0) = $up;
102 utf8::upgrade($taint);
103
d2a59f97 104 is($taint, $up, "compare: $encode, upgrade up");
78ea37eb 105
d2a59f97 106 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up");
78ea37eb 107
d2a59f97 108 ok(is_utf8($taint), "is_utf8: $encode, upgrade up");
78ea37eb 109
d2a59f97 110 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up");
78ea37eb
TS
111
112 my $taint = $arg; substr($taint, 0) = $down;
113 utf8::upgrade($taint);
114
d2a59f97 115 is($taint, $up, "compare: $encode, upgrade down");
78ea37eb 116
d2a59f97 117 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down");
78ea37eb 118
d2a59f97 119 ok(is_utf8($taint), "is_utf8: $encode, upgrade down");
78ea37eb 120
d2a59f97 121 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down");
78ea37eb
TS
122
123 my $taint = $arg; substr($taint, 0) = $up;
124 utf8::downgrade($taint);
125
d2a59f97 126 is($taint, $down, "compare: $encode, downgrade up");
78ea37eb 127
d2a59f97 128 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up");
78ea37eb 129
d2a59f97 130 ok(!is_utf8($taint), "is_utf8: $encode, downgrade up");
78ea37eb 131
d2a59f97 132 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up");
78ea37eb
TS
133
134 my $taint = $arg; substr($taint, 0) = $down;
135 utf8::downgrade($taint);
136
d2a59f97 137 is($taint, $down, "compare: $encode, downgrade down");
78ea37eb 138
d2a59f97 139 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down");
78ea37eb 140
d2a59f97 141 ok(!is_utf8($taint), "is_utf8: $encode, downgrade down");
78ea37eb 142
d2a59f97 143 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down");
78ea37eb
TS
144}
145
5316d14d 146{
5316d14d
DD
147 fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,',
148 'ok', {switches => ["-T", "-l"]},
149 "matching a regexp is taint agnostic");
e7b79d50
NC
150
151 fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,',
152 'ok', {switches => ["-T", "-l"]},
153 "therefore swash_init should be taint agnostic");
154}