Commit | Line | Data |
---|---|---|
7d59b7e4 NIS |
1 | #!./perl |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
e05e9c3d NC |
6 | require './test.pl'; |
7 | skip_all_without_perlio(); | |
7d59b7e4 NIS |
8 | } |
9 | ||
169da838 | 10 | no utf8; # needed for use utf8 not griping about the raw octets |
3ba0e062 | 11 | |
bb4e15c8 | 12 | |
0fb301d7 | 13 | plan(tests => 55); |
bb4e15c8 | 14 | |
7d59b7e4 | 15 | $| = 1; |
7d59b7e4 | 16 | |
62a28c97 NC |
17 | my $a_file = tempfile(); |
18 | ||
19 | open(F,"+>:utf8",$a_file); | |
7d59b7e4 | 20 | print F chr(0x100).'£'; |
0fb301d7 | 21 | cmp_ok( tell(F), '==', 4, tell(F) ); |
7d59b7e4 | 22 | print F "\n"; |
0fb301d7 | 23 | cmp_ok( tell(F), '>=', 5, tell(F) ); |
7d59b7e4 | 24 | seek(F,0,0); |
0fb301d7 NC |
25 | is( getc(F), chr(0x100) ); |
26 | is( getc(F), "£" ); | |
27 | is( getc(F), "\n" ); | |
7d59b7e4 NIS |
28 | seek(F,0,0); |
29 | binmode(F,":bytes"); | |
d2f5bb60 | 30 | my $chr = chr(0xc4); |
62a28c97 | 31 | if (ord($a_file) == 193) { $chr = chr(0x8c); } # EBCDIC |
0fb301d7 | 32 | is( getc(F), $chr ); |
d2f5bb60 | 33 | $chr = chr(0x80); |
62a28c97 | 34 | if (ord($a_file) == 193) { $chr = chr(0x41); } # EBCDIC |
0fb301d7 | 35 | is( getc(F), $chr ); |
d2f5bb60 | 36 | $chr = chr(0xc2); |
62a28c97 | 37 | if (ord($a_file) == 193) { $chr = chr(0x80); } # EBCDIC |
0fb301d7 | 38 | is( getc(F), $chr ); |
d2f5bb60 | 39 | $chr = chr(0xa3); |
62a28c97 | 40 | if (ord($a_file) == 193) { $chr = chr(0x44); } # EBCDIC |
0fb301d7 NC |
41 | is( getc(F), $chr ); |
42 | is( getc(F), "\n" ); | |
7d59b7e4 NIS |
43 | seek(F,0,0); |
44 | binmode(F,":utf8"); | |
0fb301d7 | 45 | is( scalar(<F>), "\x{100}£\n" ); |
eb5c063a NIS |
46 | seek(F,0,0); |
47 | $buf = chr(0x200); | |
48 | $count = read(F,$buf,2,1); | |
0fb301d7 NC |
49 | cmp_ok( $count, '==', 2 ); |
50 | is( $buf, "\x{200}\x{100}£" ); | |
7d59b7e4 NIS |
51 | close(F); |
52 | ||
360eb788 | 53 | { |
7b89fb7c JH |
54 | $a = chr(300); # This *is* UTF-encoded |
55 | $b = chr(130); # This is not. | |
6874a2de | 56 | |
62a28c97 | 57 | open F, ">:utf8", $a_file or die $!; |
7b89fb7c JH |
58 | print F $a,"\n"; |
59 | close F; | |
6874a2de | 60 | |
62a28c97 | 61 | open F, "<:utf8", $a_file or die $!; |
7b89fb7c JH |
62 | $x = <F>; |
63 | chomp($x); | |
0fb301d7 | 64 | is( $x, chr(300) ); |
6874a2de | 65 | |
62a28c97 | 66 | open F, $a_file or die $!; # Not UTF |
7b89fb7c JH |
67 | binmode(F, ":bytes"); |
68 | $x = <F>; | |
69 | chomp($x); | |
70 | $chr = chr(196).chr(172); | |
62a28c97 | 71 | if (ord($a_file) == 193) { $chr = chr(141).chr(83); } # EBCDIC |
0fb301d7 | 72 | is( $x, $chr ); |
7b89fb7c | 73 | close F; |
6874a2de | 74 | |
62a28c97 | 75 | open F, ">:utf8", $a_file or die $!; |
7b89fb7c | 76 | binmode(F); # we write a "\n" and then tell() - avoid CRLF issues. |
6874a2de | 77 | binmode(F,":utf8"); # turn UTF-8-ness back on |
7b89fb7c JH |
78 | print F $a; |
79 | my $y; | |
80 | { my $x = tell(F); | |
81 | { use bytes; $y = length($a);} | |
0fb301d7 | 82 | cmp_ok( $x, '==', $y ); |
7b89fb7c | 83 | } |
6874a2de | 84 | |
7b89fb7c JH |
85 | { # Check byte length of $b |
86 | use bytes; my $y = length($b); | |
0fb301d7 | 87 | cmp_ok( $y, '==', 1 ); |
7b89fb7c | 88 | } |
6874a2de | 89 | |
7b89fb7c | 90 | print F $b,"\n"; # Don't upgrades $b |
6874a2de | 91 | |
7b89fb7c JH |
92 | { # Check byte length of $b |
93 | use bytes; my $y = length($b); | |
0fb301d7 | 94 | cmp_ok( $y, '==', 1 ); |
7b89fb7c | 95 | } |
6874a2de | 96 | |
7b89fb7c JH |
97 | { |
98 | my $x = tell(F); | |
99 | { use bytes; if (ord('A')==193){$y += 2;}else{$y += 3;}} # EBCDIC ASCII | |
0fb301d7 | 100 | cmp_ok( $x, '==', $y ); |
7b89fb7c | 101 | } |
6874a2de | 102 | |
7b89fb7c | 103 | close F; |
6874a2de | 104 | |
62a28c97 | 105 | open F, $a_file or die $!; # Not UTF |
7b89fb7c JH |
106 | binmode(F, ":bytes"); |
107 | $x = <F>; | |
108 | chomp($x); | |
109 | $chr = v196.172.194.130; | |
110 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC | |
0fb301d7 | 111 | is( $x, $chr, sprintf('(%vd)', $x) ); |
6874a2de | 112 | |
62a28c97 | 113 | open F, "<:utf8", $a_file or die $!; |
7b89fb7c JH |
114 | $x = <F>; |
115 | chomp($x); | |
116 | close F; | |
0fb301d7 | 117 | is( $x, chr(300).chr(130), sprintf('(%vd)', $x) ); |
6874a2de | 118 | |
62a28c97 | 119 | open F, ">", $a_file or die $!; |
11fa0b78 | 120 | binmode(F, ":bytes:"); |
7b89fb7c JH |
121 | |
122 | # Now let's make it suffer. | |
3eb9224a JH |
123 | my $w; |
124 | { | |
125 | use warnings 'utf8'; | |
126 | local $SIG{__WARN__} = sub { $w = $_[0] }; | |
127 | print F $a; | |
0fb301d7 NC |
128 | ok( (!$@)); |
129 | like($w, qr/Wide character in print/i ); | |
3eb9224a | 130 | } |
54d2e5f1 | 131 | } |
360eb788 NIS |
132 | |
133 | # Hm. Time to get more evil. | |
62a28c97 | 134 | open F, ">:utf8", $a_file or die $!; |
360eb788 NIS |
135 | print F $a; |
136 | binmode(F, ":bytes"); | |
137 | print F chr(130)."\n"; | |
138 | close F; | |
6874a2de | 139 | |
62a28c97 | 140 | open F, "<", $a_file or die $!; |
3eb9224a | 141 | binmode(F, ":bytes"); |
360eb788 | 142 | $x = <F>; chomp $x; |
d2f5bb60 PP |
143 | $chr = v196.172.130; |
144 | if (ord('A') == 193) { $chr = v141.83.130; } # EBCDIC | |
0fb301d7 | 145 | is( $x, $chr ); |
360eb788 NIS |
146 | |
147 | # Right. | |
62a28c97 | 148 | open F, ">:utf8", $a_file or die $!; |
360eb788 NIS |
149 | print F $a; |
150 | close F; | |
62a28c97 | 151 | open F, ">>", $a_file or die $!; |
ceb1aeda | 152 | binmode(F, ":bytes"); |
360eb788 NIS |
153 | print F chr(130)."\n"; |
154 | close F; | |
155 | ||
62a28c97 | 156 | open F, "<", $a_file or die $!; |
ceb1aeda | 157 | binmode(F, ":bytes"); |
360eb788 | 158 | $x = <F>; chomp $x; |
ceb1aeda NC |
159 | SKIP: { |
160 | skip("Defaulting to UTF-8 output means that we can't generate a mangled file") | |
161 | if $UTF8_OUTPUT; | |
162 | is( $x, $chr ); | |
163 | } | |
360eb788 NIS |
164 | |
165 | # Now we have a deformed file. | |
d0965105 | 166 | |
75ccb5d3 JH |
167 | SKIP: { |
168 | if (ord('A') == 193) { | |
ceb1aeda | 169 | skip("EBCDIC doesn't complain", 2); |
75ccb5d3 | 170 | } else { |
0fb301d7 | 171 | my @warnings; |
62a28c97 | 172 | open F, "<:utf8", $a_file or die $!; |
75ccb5d3 | 173 | $x = <F>; chomp $x; |
0fb301d7 | 174 | local $SIG{__WARN__} = sub { push @warnings, $_[0]; }; |
75ccb5d3 | 175 | eval { sprintf "%vd\n", $x }; |
0fb301d7 NC |
176 | is (scalar @warnings, 1); |
177 | like ($warnings[0], qr/Malformed UTF-8 character \(unexpected continuation byte 0x82, with no preceding start byte/); | |
75ccb5d3 | 178 | } |
360eb788 NIS |
179 | } |
180 | ||
4f0c37ba | 181 | close F; |
62a28c97 | 182 | unlink($a_file); |
7d59b7e4 | 183 | |
62a28c97 | 184 | open F, ">:utf8", $a_file; |
d0965105 | 185 | @a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000 |
c36dfd09 | 186 | unshift @a, chr(0); # ... and a null byte in front just for fun |
d0965105 JH |
187 | print F @a; |
188 | close F; | |
c36dfd09 | 189 | |
c88828dc JH |
190 | my $c; |
191 | ||
192 | # read() should work on characters, not bytes | |
62a28c97 | 193 | open F, "<:utf8", $a_file; |
d0965105 | 194 | $a = 0; |
0fb301d7 | 195 | my $failed; |
d0965105 | 196 | for (@a) { |
c88828dc JH |
197 | unless (($c = read(F, $b, 1) == 1) && |
198 | length($b) == 1 && | |
199 | ord($b) == ord($_) && | |
200 | tell(F) == ($a += bytes::length($b))) { | |
34fce102 JH |
201 | print '# ord($_) == ', ord($_), "\n"; |
202 | print '# ord($b) == ', ord($b), "\n"; | |
203 | print '# length($b) == ', length($b), "\n"; | |
204 | print '# bytes::length($b) == ', bytes::length($b), "\n"; | |
205 | print '# tell(F) == ', tell(F), "\n"; | |
206 | print '# $a == ', $a, "\n"; | |
207 | print '# $c == ', $c, "\n"; | |
0fb301d7 | 208 | $failed++; |
d0965105 JH |
209 | last; |
210 | } | |
211 | } | |
c36dfd09 | 212 | close F; |
0fb301d7 | 213 | is($failed, undef); |
d0965105 | 214 | |
62961d2e JH |
215 | { |
216 | # Check that warnings are on on I/O, and that they can be muffled. | |
217 | ||
218 | local $SIG{__WARN__} = sub { $@ = shift }; | |
219 | ||
220 | undef $@; | |
62a28c97 | 221 | open F, ">$a_file"; |
3eb9224a | 222 | binmode(F, ":bytes"); |
62961d2e JH |
223 | print F chr(0x100); |
224 | close(F); | |
225 | ||
bb4e15c8 | 226 | like( $@, 'Wide character in print' ); |
62961d2e JH |
227 | |
228 | undef $@; | |
62a28c97 | 229 | open F, ">:utf8", $a_file; |
62961d2e JH |
230 | print F chr(0x100); |
231 | close(F); | |
232 | ||
768fd157 | 233 | isnt( defined $@, !0 ); |
62961d2e JH |
234 | |
235 | undef $@; | |
62a28c97 | 236 | open F, ">$a_file"; |
62961d2e JH |
237 | binmode(F, ":utf8"); |
238 | print F chr(0x100); | |
239 | close(F); | |
240 | ||
768fd157 | 241 | isnt( defined $@, !0 ); |
62961d2e JH |
242 | |
243 | no warnings 'utf8'; | |
244 | ||
245 | undef $@; | |
62a28c97 | 246 | open F, ">$a_file"; |
62961d2e JH |
247 | print F chr(0x100); |
248 | close(F); | |
249 | ||
768fd157 | 250 | isnt( defined $@, !0 ); |
62961d2e JH |
251 | |
252 | use warnings 'utf8'; | |
253 | ||
254 | undef $@; | |
62a28c97 | 255 | open F, ">$a_file"; |
3eb9224a | 256 | binmode(F, ":bytes"); |
62961d2e JH |
257 | print F chr(0x100); |
258 | close(F); | |
259 | ||
bb4e15c8 | 260 | like( $@, 'Wide character in print' ); |
62961d2e JH |
261 | } |
262 | ||
efd8b2ba | 263 | { |
62a28c97 | 264 | open F, ">:bytes",$a_file; print F "\xde"; close F; |
efd8b2ba | 265 | |
62a28c97 | 266 | open F, "<:bytes", $a_file; |
efd8b2ba AE |
267 | my $b = chr 0x100; |
268 | $b .= <F>; | |
0fb301d7 | 269 | is( $b, chr(0x100).chr(0xde), "21395 '.= <>' utf8 vs. bytes" ); |
efd8b2ba AE |
270 | close F; |
271 | } | |
272 | ||
273 | { | |
62a28c97 | 274 | open F, ">:utf8",$a_file; print F chr 0x100; close F; |
efd8b2ba | 275 | |
62a28c97 | 276 | open F, "<:utf8", $a_file; |
efd8b2ba AE |
277 | my $b = "\xde"; |
278 | $b .= <F>; | |
0fb301d7 | 279 | is( $b, chr(0xde).chr(0x100), "21395 '.= <>' bytes vs. utf8" ); |
efd8b2ba AE |
280 | close F; |
281 | } | |
282 | ||
b5d30a84 JH |
283 | { |
284 | my @a = ( [ 0x007F, "bytes" ], | |
285 | [ 0x0080, "bytes" ], | |
286 | [ 0x0080, "utf8" ], | |
287 | [ 0x0100, "utf8" ] ); | |
288 | my $t = 34; | |
289 | for my $u (@a) { | |
290 | for my $v (@a) { | |
291 | # print "# @$u - @$v\n"; | |
62a28c97 | 292 | open F, ">$a_file"; |
b5d30a84 JH |
293 | binmode(F, ":" . $u->[1]); |
294 | print F chr($u->[0]); | |
295 | close F; | |
296 | ||
62a28c97 | 297 | open F, "<$a_file"; |
b5d30a84 JH |
298 | binmode(F, ":" . $u->[1]); |
299 | ||
300 | my $s = chr($v->[0]); | |
301 | utf8::upgrade($s) if $v->[1] eq "utf8"; | |
302 | ||
303 | $s .= <F>; | |
0fb301d7 | 304 | is( $s, chr($v->[0]) . chr($u->[0]), 'rcatline utf8' ); |
b5d30a84 JH |
305 | close F; |
306 | $t++; | |
307 | } | |
308 | } | |
11c2f0cf | 309 | # last test here 49 |
b5d30a84 JH |
310 | } |
311 | ||
4de5f5ca JH |
312 | { |
313 | # [perl #23428] Somethings rotten in unicode semantics | |
62a28c97 | 314 | open F, ">$a_file"; |
4de5f5ca JH |
315 | binmode F, ":utf8"; |
316 | syswrite(F, $a = chr(0x100)); | |
d3b4e16f | 317 | close F; |
4de5f5ca JH |
318 | is( ord($a), 0x100, '23428 syswrite should not downgrade scalar' ); |
319 | like( $a, qr/^\w+/, '23428 syswrite should not downgrade scalar' ); | |
320 | } | |
321 | ||
554ad1fc | 322 | # sysread() and syswrite() tested in lib/open.t since Fcntl is used |
c88828dc | 323 | |
d3b4e16f | 324 | { |
532ffc5d | 325 | # <FH> on a :utf8 stream should complain immediately with -w |
d3b4e16f | 326 | # if it finds bad UTF-8 (:encoding(utf8) works this way) |
532ffc5d | 327 | use warnings 'utf8'; |
2d79bf7f | 328 | undef $@; |
d3b4e16f | 329 | local $SIG{__WARN__} = sub { $@ = shift }; |
62a28c97 | 330 | open F, ">$a_file"; |
d3b4e16f | 331 | binmode F; |
cc8040a1 TD |
332 | my ($chrE4, $chrF6) = (chr(0xE4), chr(0xF6)); |
333 | if (ord('A') == 193) # EBCDIC | |
334 | { ($chrE4, $chrF6) = (chr(0x43), chr(0xEC)); } | |
335 | print F "foo", $chrE4, "\n"; | |
336 | print F "foo", $chrF6, "\n"; | |
d3b4e16f | 337 | close F; |
62a28c97 | 338 | open F, "<:utf8", $a_file; |
2d79bf7f | 339 | undef $@; |
d3b4e16f | 340 | my $line = <F>; |
cc8040a1 TD |
341 | my ($chrE4, $chrF6) = ("E4", "F6"); |
342 | if (ord('A') == 193) { ($chrE4, $chrF6) = ("43", "EC"); } # EBCDIC | |
343 | like( $@, qr/utf8 "\\x$chrE4" does not map to Unicode .+ <F> line 1/, | |
2d79bf7f JH |
344 | "<:utf8 readline must warn about bad utf8"); |
345 | undef $@; | |
346 | $line .= <F>; | |
cc8040a1 | 347 | like( $@, qr/utf8 "\\x$chrF6" does not map to Unicode .+ <F> line 2/, |
2d79bf7f | 348 | "<:utf8 rcatline must warn about bad utf8"); |
d3b4e16f JH |
349 | close F; |
350 | } |