Commit | Line | Data |
---|---|---|
7d59b7e4 NIS |
1 | #!./perl |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
0c4f7ff0 | 6 | unless (find PerlIO::Layer 'perlio') { |
7d59b7e4 NIS |
7 | print "1..0 # Skip: not perlio\n"; |
8 | exit 0; | |
9 | } | |
10 | } | |
11 | ||
169da838 | 12 | no utf8; # needed for use utf8 not griping about the raw octets |
3ba0e062 | 13 | |
768fd157 | 14 | BEGIN { require "./test.pl"; } |
bb4e15c8 | 15 | |
0fb301d7 | 16 | plan(tests => 55); |
bb4e15c8 | 17 | |
7d59b7e4 | 18 | $| = 1; |
7d59b7e4 NIS |
19 | |
20 | open(F,"+>:utf8",'a'); | |
21 | print F chr(0x100).'£'; | |
0fb301d7 | 22 | cmp_ok( tell(F), '==', 4, tell(F) ); |
7d59b7e4 | 23 | print F "\n"; |
0fb301d7 | 24 | cmp_ok( tell(F), '>=', 5, tell(F) ); |
7d59b7e4 | 25 | seek(F,0,0); |
0fb301d7 NC |
26 | is( getc(F), chr(0x100) ); |
27 | is( getc(F), "£" ); | |
28 | is( getc(F), "\n" ); | |
7d59b7e4 NIS |
29 | seek(F,0,0); |
30 | binmode(F,":bytes"); | |
d2f5bb60 PP |
31 | my $chr = chr(0xc4); |
32 | if (ord('A') == 193) { $chr = chr(0x8c); } # EBCDIC | |
0fb301d7 | 33 | is( getc(F), $chr ); |
d2f5bb60 PP |
34 | $chr = chr(0x80); |
35 | if (ord('A') == 193) { $chr = chr(0x41); } # EBCDIC | |
0fb301d7 | 36 | is( getc(F), $chr ); |
d2f5bb60 PP |
37 | $chr = chr(0xc2); |
38 | if (ord('A') == 193) { $chr = chr(0x80); } # EBCDIC | |
0fb301d7 | 39 | is( getc(F), $chr ); |
d2f5bb60 PP |
40 | $chr = chr(0xa3); |
41 | if (ord('A') == 193) { $chr = chr(0x44); } # EBCDIC | |
0fb301d7 NC |
42 | is( getc(F), $chr ); |
43 | is( getc(F), "\n" ); | |
7d59b7e4 NIS |
44 | seek(F,0,0); |
45 | binmode(F,":utf8"); | |
0fb301d7 | 46 | is( scalar(<F>), "\x{100}£\n" ); |
eb5c063a NIS |
47 | seek(F,0,0); |
48 | $buf = chr(0x200); | |
49 | $count = read(F,$buf,2,1); | |
0fb301d7 NC |
50 | cmp_ok( $count, '==', 2 ); |
51 | is( $buf, "\x{200}\x{100}£" ); | |
7d59b7e4 NIS |
52 | close(F); |
53 | ||
360eb788 | 54 | { |
7b89fb7c JH |
55 | $a = chr(300); # This *is* UTF-encoded |
56 | $b = chr(130); # This is not. | |
6874a2de | 57 | |
7b89fb7c JH |
58 | open F, ">:utf8", 'a' or die $!; |
59 | print F $a,"\n"; | |
60 | close F; | |
6874a2de | 61 | |
7b89fb7c JH |
62 | open F, "<:utf8", 'a' or die $!; |
63 | $x = <F>; | |
64 | chomp($x); | |
0fb301d7 | 65 | is( $x, chr(300) ); |
6874a2de | 66 | |
7b89fb7c JH |
67 | open F, "a" or die $!; # Not UTF |
68 | binmode(F, ":bytes"); | |
69 | $x = <F>; | |
70 | chomp($x); | |
71 | $chr = chr(196).chr(172); | |
72 | if (ord('A') == 193) { $chr = chr(141).chr(83); } # EBCDIC | |
0fb301d7 | 73 | is( $x, $chr ); |
7b89fb7c | 74 | close F; |
6874a2de | 75 | |
7b89fb7c JH |
76 | open F, ">:utf8", 'a' or die $!; |
77 | binmode(F); # we write a "\n" and then tell() - avoid CRLF issues. | |
6874a2de | 78 | binmode(F,":utf8"); # turn UTF-8-ness back on |
7b89fb7c JH |
79 | print F $a; |
80 | my $y; | |
81 | { my $x = tell(F); | |
82 | { use bytes; $y = length($a);} | |
0fb301d7 | 83 | cmp_ok( $x, '==', $y ); |
7b89fb7c | 84 | } |
6874a2de | 85 | |
7b89fb7c JH |
86 | { # Check byte length of $b |
87 | use bytes; my $y = length($b); | |
0fb301d7 | 88 | cmp_ok( $y, '==', 1 ); |
7b89fb7c | 89 | } |
6874a2de | 90 | |
7b89fb7c | 91 | print F $b,"\n"; # Don't upgrades $b |
6874a2de | 92 | |
7b89fb7c JH |
93 | { # Check byte length of $b |
94 | use bytes; my $y = length($b); | |
0fb301d7 | 95 | cmp_ok( $y, '==', 1 ); |
7b89fb7c | 96 | } |
6874a2de | 97 | |
7b89fb7c JH |
98 | { |
99 | my $x = tell(F); | |
100 | { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII | |
0fb301d7 | 101 | cmp_ok( $x, '==', $y ); |
7b89fb7c | 102 | } |
6874a2de | 103 | |
7b89fb7c | 104 | close F; |
6874a2de | 105 | |
7b89fb7c JH |
106 | open F, "a" or die $!; # Not UTF |
107 | binmode(F, ":bytes"); | |
108 | $x = <F>; | |
109 | chomp($x); | |
110 | $chr = v196.172.194.130; | |
111 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC | |
0fb301d7 | 112 | is( $x, $chr, sprintf('(%vd)', $x) ); |
6874a2de | 113 | |
7b89fb7c JH |
114 | open F, "<:utf8", "a" or die $!; |
115 | $x = <F>; | |
116 | chomp($x); | |
117 | close F; | |
0fb301d7 | 118 | is( $x, chr(300).chr(130), sprintf('(%vd)', $x) ); |
6874a2de | 119 | |
3eb9224a | 120 | open F, ">", "a" or die $!; |
11fa0b78 | 121 | binmode(F, ":bytes:"); |
7b89fb7c JH |
122 | |
123 | # Now let's make it suffer. | |
3eb9224a JH |
124 | my $w; |
125 | { | |
126 | use warnings 'utf8'; | |
127 | local $SIG{__WARN__} = sub { $w = $_[0] }; | |
128 | print F $a; | |
0fb301d7 NC |
129 | ok( (!$@)); |
130 | like($w, qr/Wide character in print/i ); | |
3eb9224a | 131 | } |
54d2e5f1 | 132 | } |
360eb788 NIS |
133 | |
134 | # Hm. Time to get more evil. | |
135 | open F, ">:utf8", "a" or die $!; | |
136 | print F $a; | |
137 | binmode(F, ":bytes"); | |
138 | print F chr(130)."\n"; | |
139 | close F; | |
6874a2de | 140 | |
360eb788 | 141 | open F, "<", "a" or die $!; |
3eb9224a | 142 | binmode(F, ":bytes"); |
360eb788 | 143 | $x = <F>; chomp $x; |
d2f5bb60 PP |
144 | $chr = v196.172.130; |
145 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC | |
0fb301d7 | 146 | is( $x, $chr ); |
360eb788 NIS |
147 | |
148 | # Right. | |
149 | open F, ">:utf8", "a" or die $!; | |
150 | print F $a; | |
151 | close F; | |
152 | open F, ">>", "a" or die $!; | |
ceb1aeda | 153 | binmode(F, ":bytes"); |
360eb788 NIS |
154 | print F chr(130)."\n"; |
155 | close F; | |
156 | ||
157 | open F, "<", "a" or die $!; | |
ceb1aeda | 158 | binmode(F, ":bytes"); |
360eb788 | 159 | $x = <F>; chomp $x; |
ceb1aeda NC |
160 | SKIP: { |
161 | skip("Defaulting to UTF-8 output means that we can't generate a mangled file") | |
162 | if $UTF8_OUTPUT; | |
163 | is( $x, $chr ); | |
164 | } | |
360eb788 NIS |
165 | |
166 | # Now we have a deformed file. | |
d0965105 | 167 | |
75ccb5d3 JH |
168 | SKIP: { |
169 | if (ord('A') == 193) { | |
ceb1aeda | 170 | skip("EBCDIC doesn't complain", 2); |
75ccb5d3 | 171 | } else { |
0fb301d7 | 172 | my @warnings; |
75ccb5d3 JH |
173 | open F, "<:utf8", "a" or die $!; |
174 | $x = <F>; chomp $x; | |
0fb301d7 | 175 | local $SIG{__WARN__} = sub { push @warnings, $_[0]; }; |
75ccb5d3 | 176 | eval { sprintf "%vd\n", $x }; |
0fb301d7 NC |
177 | is (scalar @warnings, 1); |
178 | like ($warnings[0], qr/Malformed UTF-8 character \(unexpected continuation byte 0x82, with no preceding start byte/); | |
75ccb5d3 | 179 | } |
360eb788 NIS |
180 | } |
181 | ||
4f0c37ba | 182 | close F; |
360eb788 | 183 | unlink('a'); |
7d59b7e4 | 184 | |
62961d2e | 185 | open F, ">:utf8", "a"; |
d0965105 | 186 | @a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000 |
c36dfd09 | 187 | unshift @a, chr(0); # ... and a null byte in front just for fun |
d0965105 JH |
188 | print F @a; |
189 | close F; | |
c36dfd09 | 190 | |
c88828dc JH |
191 | my $c; |
192 | ||
193 | # read() should work on characters, not bytes | |
d0965105 JH |
194 | open F, "<:utf8", "a"; |
195 | $a = 0; | |
0fb301d7 | 196 | my $failed; |
d0965105 | 197 | for (@a) { |
c88828dc JH |
198 | unless (($c = read(F, $b, 1) == 1) && |
199 | length($b) == 1 && | |
200 | ord($b) == ord($_) && | |
201 | tell(F) == ($a += bytes::length($b))) { | |
34fce102 JH |
202 | print '# ord($_) == ', ord($_), "\n"; |
203 | print '# ord($b) == ', ord($b), "\n"; | |
204 | print '# length($b) == ', length($b), "\n"; | |
205 | print '# bytes::length($b) == ', bytes::length($b), "\n"; | |
206 | print '# tell(F) == ', tell(F), "\n"; | |
207 | print '# $a == ', $a, "\n"; | |
208 | print '# $c == ', $c, "\n"; | |
0fb301d7 | 209 | $failed++; |
d0965105 JH |
210 | last; |
211 | } | |
212 | } | |
c36dfd09 | 213 | close F; |
0fb301d7 | 214 | is($failed, undef); |
d0965105 | 215 | |
62961d2e JH |
216 | { |
217 | # Check that warnings are on on I/O, and that they can be muffled. | |
218 | ||
219 | local $SIG{__WARN__} = sub { $@ = shift }; | |
220 | ||
221 | undef $@; | |
222 | open F, ">a"; | |
3eb9224a | 223 | binmode(F, ":bytes"); |
62961d2e JH |
224 | print F chr(0x100); |
225 | close(F); | |
226 | ||
bb4e15c8 | 227 | like( $@, 'Wide character in print' ); |
62961d2e JH |
228 | |
229 | undef $@; | |
2b1ff55a | 230 | open F, ">:utf8", "a"; |
62961d2e JH |
231 | print F chr(0x100); |
232 | close(F); | |
233 | ||
768fd157 | 234 | isnt( defined $@, !0 ); |
62961d2e JH |
235 | |
236 | undef $@; | |
237 | open F, ">a"; | |
238 | binmode(F, ":utf8"); | |
239 | print F chr(0x100); | |
240 | close(F); | |
241 | ||
768fd157 | 242 | isnt( defined $@, !0 ); |
62961d2e JH |
243 | |
244 | no warnings 'utf8'; | |
245 | ||
246 | undef $@; | |
247 | open F, ">a"; | |
248 | print F chr(0x100); | |
249 | close(F); | |
250 | ||
768fd157 | 251 | isnt( defined $@, !0 ); |
62961d2e JH |
252 | |
253 | use warnings 'utf8'; | |
254 | ||
255 | undef $@; | |
256 | open F, ">a"; | |
3eb9224a | 257 | binmode(F, ":bytes"); |
62961d2e JH |
258 | print F chr(0x100); |
259 | close(F); | |
260 | ||
bb4e15c8 | 261 | like( $@, 'Wide character in print' ); |
62961d2e JH |
262 | } |
263 | ||
efd8b2ba AE |
264 | { |
265 | open F, ">:bytes","a"; print F "\xde"; close F; | |
266 | ||
267 | open F, "<:bytes", "a"; | |
268 | my $b = chr 0x100; | |
269 | $b .= <F>; | |
0fb301d7 | 270 | is( $b, chr(0x100).chr(0xde), "21395 '.= <>' utf8 vs. bytes" ); |
efd8b2ba AE |
271 | close F; |
272 | } | |
273 | ||
274 | { | |
275 | open F, ">:utf8","a"; print F chr 0x100; close F; | |
276 | ||
277 | open F, "<:utf8", "a"; | |
278 | my $b = "\xde"; | |
279 | $b .= <F>; | |
0fb301d7 | 280 | is( $b, chr(0xde).chr(0x100), "21395 '.= <>' bytes vs. utf8" ); |
efd8b2ba AE |
281 | close F; |
282 | } | |
283 | ||
b5d30a84 JH |
284 | { |
285 | my @a = ( [ 0x007F, "bytes" ], | |
286 | [ 0x0080, "bytes" ], | |
287 | [ 0x0080, "utf8" ], | |
288 | [ 0x0100, "utf8" ] ); | |
289 | my $t = 34; | |
290 | for my $u (@a) { | |
291 | for my $v (@a) { | |
292 | # print "# @$u - @$v\n"; | |
293 | open F, ">a"; | |
294 | binmode(F, ":" . $u->[1]); | |
295 | print F chr($u->[0]); | |
296 | close F; | |
297 | ||
298 | open F, "<a"; | |
299 | binmode(F, ":" . $u->[1]); | |
300 | ||
301 | my $s = chr($v->[0]); | |
302 | utf8::upgrade($s) if $v->[1] eq "utf8"; | |
303 | ||
304 | $s .= <F>; | |
0fb301d7 | 305 | is( $s, chr($v->[0]) . chr($u->[0]), 'rcatline utf8' ); |
b5d30a84 JH |
306 | close F; |
307 | $t++; | |
308 | } | |
309 | } | |
11c2f0cf | 310 | # last test here 49 |
b5d30a84 JH |
311 | } |
312 | ||
4de5f5ca JH |
313 | { |
314 | # [perl #23428] Somethings rotten in unicode semantics | |
315 | open F, ">a"; | |
316 | binmode F, ":utf8"; | |
317 | syswrite(F, $a = chr(0x100)); | |
d3b4e16f | 318 | close F; |
4de5f5ca JH |
319 | is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); |
320 | like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); | |
321 | } | |
322 | ||
554ad1fc | 323 | # sysread() and syswrite() tested in lib/open.t since Fcntl is used |
c88828dc | 324 | |
d3b4e16f | 325 | { |
532ffc5d | 326 | # <FH> on a :utf8 stream should complain immediately with -w |
d3b4e16f | 327 | # if it finds bad UTF-8 (:encoding(utf8) works this way) |
532ffc5d | 328 | use warnings 'utf8'; |
2d79bf7f | 329 | undef $@; |
d3b4e16f JH |
330 | local $SIG{__WARN__} = sub { $@ = shift }; |
331 | open F, ">a"; | |
332 | binmode F; | |
cc8040a1 TD |
333 | my ($chrE4, $chrF6) = (chr(0xE4), chr(0xF6)); |
334 | if (ord('A') == 193) # EBCDIC | |
335 | { ($chrE4, $chrF6) = (chr(0x43), chr(0xEC)); } | |
336 | print F "foo", $chrE4, "\n"; | |
337 | print F "foo", $chrF6, "\n"; | |
d3b4e16f JH |
338 | close F; |
339 | open F, "<:utf8", "a"; | |
2d79bf7f | 340 | undef $@; |
d3b4e16f | 341 | my $line = <F>; |
cc8040a1 TD |
342 | my ($chrE4, $chrF6) = ("E4", "F6"); |
343 | if (ord('A') == 193) { ($chrE4, $chrF6) = ("43", "EC"); } # EBCDIC | |
344 | like( $@, qr/utf8 "\\x$chrE4" does not map to Unicode .+ <F> line 1/, | |
2d79bf7f JH |
345 | "<:utf8 readline must warn about bad utf8"); |
346 | undef $@; | |
347 | $line .= <F>; | |
cc8040a1 | 348 | like( $@, qr/utf8 "\\x$chrF6" does not map to Unicode .+ <F> line 2/, |
2d79bf7f | 349 | "<:utf8 rcatline must warn about bad utf8"); |
d3b4e16f JH |
350 | close F; |
351 | } | |
352 | ||
c88828dc JH |
353 | END { |
354 | 1 while unlink "a"; | |
355 | 1 while unlink "b"; | |
356 | } |