This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Die on malformed isFOO_utf8() input
[perl5.git] / t / lib / warnings / utf8
CommitLineData
f0df466a
JH
1
2 utf8.c AOK
3
4b88fb76 4 [utf8_to_uvchr_buf]
f0df466a
JH
5 Malformed UTF-8 character
6 my $a = ord "\x80" ;
7
8 Malformed UTF-8 character
9 my $a = ord "\xf080" ;
10 <<<<<< this warning can't be easily triggered from perl anymore
11
12 [utf16_to_utf8]
13 Malformed UTF-16 surrogate
93f09d7b 14 <<<<<< Add a test when something actually calls utf16_to_utf8
f0df466a
JH
15
16__END__
4b88fb76 17# utf8.c [utf8_to_uvchr_buf] -W
f0df466a
JH
18BEGIN {
19 if (ord('A') == 193) {
72b4e0d1 20 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
f0df466a
JH
21 exit 0;
22 }
23}
24use utf8 ;
25my $a = "snøstorm" ;
26{
27 no warnings 'utf8' ;
28 my $a = "snøstorm";
29 use warnings 'utf8' ;
30 my $a = "snøstorm";
31}
32EXPECT
7cf8d05d
KW
33Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 9.
34Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 14.
f0df466a 35########
507b9800 36use warnings 'utf8';
9ae3ac1a
KW
37my $d7ff = uc(chr(0xD7FF));
38my $d800 = uc(chr(0xD800));
39my $dfff = uc(chr(0xDFFF));
40my $e000 = uc(chr(0xE000));
41my $feff = uc(chr(0xFEFF));
42my $fffd = uc(chr(0xFFFD));
43my $fffe = uc(chr(0xFFFE));
44my $ffff = uc(chr(0xFFFF));
45my $hex4 = uc(chr(0x10000));
46my $hex5 = uc(chr(0x100000));
47my $maxm1 = uc(chr(0x10FFFE));
48my $max = uc(chr(0x10FFFF));
49my $nonUnicode = uc(chr(0x110000));
507b9800 50no warnings 'utf8';
9ae3ac1a
KW
51my $d7ff = uc(chr(0xD7FF));
52my $d800 = uc(chr(0xD800));
53my $dfff = uc(chr(0xDFFF));
54my $e000 = uc(chr(0xE000));
55my $feff = uc(chr(0xFEFF));
56my $fffd = uc(chr(0xFFFD));
57my $fffe = uc(chr(0xFFFE));
58my $ffff = uc(chr(0xFFFF));
59my $hex4 = uc(chr(0x10000));
60my $hex5 = uc(chr(0x100000));
61my $maxm1 = uc(chr(0x10FFFE));
62my $max = uc(chr(0x10FFFF));
63my $nonUnicode = uc(chr(0x110000));
507b9800 64EXPECT
9ae3ac1a
KW
65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
507b9800 68########
62961d2e 69use warnings 'utf8';
8457b38f
KW
70my $d800 = uc(chr(0xD800));
71my $nonUnicode = uc(chr(0x110000));
72no warnings 'surrogate';
73my $d800 = uc(chr(0xD800));
74my $nonUnicode = uc(chr(0x110000));
75EXPECT
76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
79########
80use warnings 'utf8';
81my $d800 = uc(chr(0xD800));
82my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
83no warnings 'non_unicode';
84my $d800 = uc(chr(0xD800));
85my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
86EXPECT
87Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
88Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
9415f659
KW
89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5.
90########
9415f659 91use warnings 'utf8';
760c7c2f 92no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines
9415f659
KW
93my $big_nonUnicode = uc(chr(0x8000_0000));
94no warnings 'non_unicode';
95my $big_nonUnicode = uc(chr(0x8000_0000));
96EXPECT
760c7c2f 97Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3.
8457b38f
KW
98########
99use warnings 'utf8';
9ae3ac1a
KW
100my $d7ff = lc pack("U", 0xD7FF);
101my $d800 = lc pack("U", 0xD800);
102my $dfff = lc pack("U", 0xDFFF);
103my $e000 = lc pack("U", 0xE000);
104my $feff = lc pack("U", 0xFEFF);
105my $fffd = lc pack("U", 0xFFFD);
106my $fffe = lc pack("U", 0xFFFE);
107my $ffff = lc pack("U", 0xFFFF);
108my $hex4 = lc pack("U", 0x10000);
109my $hex5 = lc pack("U", 0x100000);
110my $maxm1 = lc pack("U", 0x10FFFE);
111my $max = lc pack("U", 0x10FFFF);
112my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 113no warnings 'utf8';
9ae3ac1a
KW
114my $d7ff = lc pack("U", 0xD7FF);
115my $d800 = lc pack("U", 0xD800);
116my $dfff = lc pack("U", 0xDFFF);
117my $e000 = lc pack("U", 0xE000);
118my $feff = lc pack("U", 0xFEFF);
119my $fffd = lc pack("U", 0xFFFD);
120my $fffe = lc pack("U", 0xFFFE);
121my $ffff = lc pack("U", 0xFFFF);
122my $hex4 = lc pack("U", 0x10000);
123my $hex5 = lc pack("U", 0x100000);
124my $maxm1 = lc pack("U", 0x10FFFE);
125my $max = lc pack("U", 0x10FFFF);
126my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 127EXPECT
9ae3ac1a
KW
128Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
129Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
130Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
62961d2e
JH
131########
132use warnings 'utf8';
9ae3ac1a
KW
133my $d7ff = ucfirst "\x{D7FF}";
134my $d800 = ucfirst "\x{D800}";
135my $dfff = ucfirst "\x{DFFF}";
136my $e000 = ucfirst "\x{E000}";
137my $feff = ucfirst "\x{FEFF}";
138my $fffd = ucfirst "\x{FFFD}";
139my $fffe = ucfirst "\x{FFFE}";
140my $ffff = ucfirst "\x{FFFF}";
141my $hex4 = ucfirst "\x{10000}";
142my $hex5 = ucfirst "\x{100000}";
143my $maxm1 = ucfirst "\x{10FFFE}";
144my $max = ucfirst "\x{10FFFF}";
145my $nonUnicode = ucfirst "\x{110000}";
62961d2e 146no warnings 'utf8';
9ae3ac1a
KW
147my $d7ff = ucfirst "\x{D7FF}";
148my $d800 = ucfirst "\x{D800}";
149my $dfff = ucfirst "\x{DFFF}";
150my $e000 = ucfirst "\x{E000}";
151my $feff = ucfirst "\x{FEFF}";
152my $fffd = ucfirst "\x{FFFD}";
153my $fffe = ucfirst "\x{FFFE}";
154my $ffff = ucfirst "\x{FFFF}";
155my $hex4 = ucfirst "\x{10000}";
156my $hex5 = ucfirst "\x{100000}";
157my $maxm1 = ucfirst "\x{10FFFE}";
158my $max = ucfirst "\x{10FFFF}";
159my $nonUnicode = ucfirst "\x{110000}";
160EXPECT
161Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
162Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
163Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
164########
2d88a86a 165# NAME Matching \p{} against above-Unicode
9ae3ac1a
KW
166use warnings 'utf8';
167chr(0xD7FF) =~ /\p{Any}/;
168chr(0xD800) =~ /\p{Any}/;
169chr(0xDFFF) =~ /\p{Any}/;
170chr(0xE000) =~ /\p{Any}/;
171chr(0xFEFF) =~ /\p{Any}/;
172chr(0xFFFD) =~ /\p{Any}/;
173chr(0xFFFE) =~ /\p{Any}/;
174chr(0xFFFF) =~ /\p{Any}/;
175chr(0x10000) =~ /\p{Any}/;
176chr(0x100000) =~ /\p{Any}/;
177chr(0x10FFFE) =~ /\p{Any}/;
178chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
179chr(0x110000) =~ /[\p{Any}]/;
180chr(0x110001) =~ /[\w\p{Any}]/;
181chr(0x10FFFF) =~ /\p{All}/;
182chr(0x110002) =~ /[\w\p{All}]/;
183chr(0x110003) =~ /[\p{XPosixWord}]/;
184chr(0x110004) =~ /[\P{XPosixWord}]/;
185chr(0x110005) =~ /^[\p{Unassigned}]/;
186chr(0x110006) =~ /^[\P{Unassigned}]/;
187# Only Unicode properties give non-Unicode warnings, and only those properties
188# which do match above Unicode; and not when something else in the class
189# matches above Unicode. Below we test three ways where something outside the
190# property may match non-Unicode: a code point above it, a class \S that we
191# know at compile time doesn't, and a class \W whose values aren't (at the time
192# of this writing) specified at compile time, but which wouldn't match
5073ffbd
KW
193chr(0x110050) =~ /\w/;
194chr(0x110051) =~ /\W/;
195chr(0x110052) =~ /\d/;
196chr(0x110053) =~ /\D/;
197chr(0x110054) =~ /\s/;
198chr(0x110055) =~ /\S/;
199chr(0x110056) =~ /[[:word:]]/;
200chr(0x110057) =~ /[[:^word:]]/;
201chr(0x110058) =~ /[[:alnum:]]/;
202chr(0x110059) =~ /[[:^alnum:]]/;
203chr(0x11005A) =~ /[[:space:]]/;
204chr(0x11005B) =~ /[[:^space:]]/;
205chr(0x11005C) =~ /[[:digit:]]/;
206chr(0x11005D) =~ /[[:^digit:]]/;
207chr(0x11005E) =~ /[[:alpha:]]/;
208chr(0x11005F) =~ /[[:^alpha:]]/;
209chr(0x110060) =~ /[[:ascii:]]/;
210chr(0x110061) =~ /[[:^ascii:]]/;
211chr(0x110062) =~ /[[:cntrl:]]/;
212chr(0x110063) =~ /[[:^cntrl:]]/;
213chr(0x110064) =~ /[[:graph:]]/;
214chr(0x110065) =~ /[[:^graph:]]/;
215chr(0x110066) =~ /[[:lower:]]/;
216chr(0x110067) =~ /[[:^lower:]]/;
217chr(0x110068) =~ /[[:print:]]/;
218chr(0x110069) =~ /[[:^print:]]/;
219chr(0x11006A) =~ /[[:punct:]]/;
220chr(0x11006B) =~ /[[:^punct:]]/;
221chr(0x11006C) =~ /[[:upper:]]/;
222chr(0x11006D) =~ /[[:^upper:]]/;
223chr(0x11006E) =~ /[[:xdigit:]]/;
224chr(0x11006F) =~ /[[:^xdigit:]]/;
225chr(0x110070) =~ /[[:blank:]]/;
226chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
227chr(0x111010) =~ /[\W\p{Unassigned}]/;
228chr(0x111011) =~ /[\W\P{Unassigned}]/;
229chr(0x112010) =~ /[\S\p{Unassigned}]/;
230chr(0x112011) =~ /[\S\P{Unassigned}]/;
231chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
232chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a
KW
233no warnings 'utf8';
234chr(0xD7FF) =~ /\p{Any}/;
235chr(0xD800) =~ /\p{Any}/;
236chr(0xDFFF) =~ /\p{Any}/;
237chr(0xE000) =~ /\p{Any}/;
238chr(0xFEFF) =~ /\p{Any}/;
239chr(0xFFFD) =~ /\p{Any}/;
240chr(0xFFFE) =~ /\p{Any}/;
241chr(0xFFFF) =~ /\p{Any}/;
242chr(0x10000) =~ /\p{Any}/;
243chr(0x100000) =~ /\p{Any}/;
244chr(0x10FFFE) =~ /\p{Any}/;
245chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
246chr(0x110000) =~ /[\p{Any}]/;
247chr(0x110001) =~ /[\w\p{Any}]/;
248chr(0x10FFFF) =~ /\p{All}/;
249chr(0x110002) =~ /[\w\p{All}]/;
250chr(0x110003) =~ /[\p{XPosixWord}]/;
251chr(0x110004) =~ /[\P{XPosixWord}]/;
252chr(0x110005) =~ /^[\p{Unassigned}]/;
253chr(0x110006) =~ /^[\P{Unassigned}]/;
5073ffbd
KW
254chr(0x110050) =~ /\w/;
255chr(0x110051) =~ /\W/;
256chr(0x110052) =~ /\d/;
257chr(0x110053) =~ /\D/;
258chr(0x110054) =~ /\s/;
259chr(0x110055) =~ /\S/;
260chr(0x110056) =~ /[[:word:]]/;
261chr(0x110057) =~ /[[:^word:]]/;
262chr(0x110058) =~ /[[:alnum:]]/;
263chr(0x110059) =~ /[[:^alnum:]]/;
264chr(0x11005A) =~ /[[:space:]]/;
265chr(0x11005B) =~ /[[:^space:]]/;
266chr(0x11005C) =~ /[[:digit:]]/;
267chr(0x11005D) =~ /[[:^digit:]]/;
268chr(0x11005E) =~ /[[:alpha:]]/;
269chr(0x11005F) =~ /[[:^alpha:]]/;
270chr(0x110060) =~ /[[:ascii:]]/;
271chr(0x110061) =~ /[[:^ascii:]]/;
272chr(0x110062) =~ /[[:cntrl:]]/;
273chr(0x110063) =~ /[[:^cntrl:]]/;
274chr(0x110064) =~ /[[:graph:]]/;
275chr(0x110065) =~ /[[:^graph:]]/;
276chr(0x110066) =~ /[[:lower:]]/;
277chr(0x110067) =~ /[[:^lower:]]/;
278chr(0x110068) =~ /[[:print:]]/;
279chr(0x110069) =~ /[[:^print:]]/;
280chr(0x11006A) =~ /[[:punct:]]/;
281chr(0x11006B) =~ /[[:^punct:]]/;
282chr(0x11006C) =~ /[[:upper:]]/;
283chr(0x11006D) =~ /[[:^upper:]]/;
284chr(0x11006E) =~ /[[:xdigit:]]/;
285chr(0x11006F) =~ /[[:^xdigit:]]/;
286chr(0x110070) =~ /[[:blank:]]/;
287chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
288chr(0x111010) =~ /[\W\p{Unassigned}]/;
289chr(0x111011) =~ /[\W\P{Unassigned}]/;
290chr(0x112010) =~ /[\S\p{Unassigned}]/;
291chr(0x112011) =~ /[\S\P{Unassigned}]/;
292chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
293chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a 294EXPECT
2d88a86a
KW
295Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
296Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
9ae3ac1a 297########
e9b08962 298# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
2d88a86a
KW
299# Now have to make FATAL to guarantee being output
300use warnings FATAL => 'non_unicode';
ae986089
KW
301"\x{110000}" =~ /b?\p{Space}/;
302EXPECT
2d88a86a 303Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
ae986089
KW
304########
305# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
306use warnings 'non_unicode';
2d88a86a 307use warnings FATAL => 'non_unicode';
ae986089
KW
308"\x{110000}" =~ /b?[[:space:]]/;
309EXPECT
310########
8457b38f
KW
311use warnings 'utf8';
312chr(0x110000) =~ /\p{Any}/;
2d88a86a
KW
313########
314# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
315use warnings qw(utf8 non_unicode);
316chr(0x110000) =~ /^\p{Unassigned}/;
8457b38f 317no warnings 'non_unicode';
2d88a86a
KW
318chr(0x110001) =~ /\p{Unassigned}/;
319use warnings 'non_unicode';
320no warnings 'utf8';
321chr(0x110002) =~ /\p{Unassigned}/;
8457b38f 322EXPECT
2d88a86a 323Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
8457b38f 324########
f2c2a6ab 325# NAME optimizable regnode should still give non_unicode warnings when fatalized
5073ffbd 326use warnings 'utf8';
f2c2a6ab 327use warnings FATAL => 'non_unicode';
845e7aa3 328chr(0x110000) =~ /\p{lb=cr}/;
f2c2a6ab 329EXPECT
2d88a86a 330Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
f2c2a6ab
KW
331########
332# NAME optimizable regnode should not give non_unicode warnings when warnings are off
5073ffbd 333no warnings 'non_unicode';
845e7aa3 334chr(0x110000) =~ /\p{lb=cr}/;
5073ffbd 335EXPECT
5073ffbd 336########
2d88a86a
KW
337# NAME 'All' matches above-Unicode without any warning
338use warnings qw(utf8 non_unicode);
339chr(0x110000) =~ /\p{All}/;
340EXPECT
341########
9ae3ac1a
KW
342require "../test.pl";
343use warnings 'utf8';
a410ec23 344sub Is_Super { return '!utf8::Any' }
88d45d28
KW
345# The extra char is to avoid an optimization that avoids the problem when the
346# property is the only non-latin1 char in a class
347print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
a410ec23
KW
348EXPECT
3491
350########
351require "../test.pl";
352use warnings 'utf8';
9ae3ac1a
KW
353my $file = tempfile();
354open(my $fh, "+>:utf8", $file);
355print $fh "\x{D7FF}", "\n";
356print $fh "\x{D800}", "\n";
c87792c3
KW
357print $fh "\x{D900}", "\n";
358print $fh "\x{DA00}", "\n";
359print $fh "\x{DB00}", "\n";
360print $fh "\x{DC00}", "\n";
361print $fh "\x{DD00}", "\n";
362print $fh "\x{DE00}", "\n";
363print $fh "\x{DF00}", "\n";
9ae3ac1a
KW
364print $fh "\x{DFFF}", "\n";
365print $fh "\x{E000}", "\n";
366print $fh "\x{FDCF}", "\n";
367print $fh "\x{FDD0}", "\n";
c87792c3 368print $fh "\x{FDD1}", "\n";
9ae3ac1a
KW
369print $fh "\x{FDEF}", "\n";
370print $fh "\x{FDF0}", "\n";
c87792c3
KW
371print $fh "\x{FDFE}", "\n";
372print $fh "\x{FDFF}", "\n";
373print $fh "\x{FE00}", "\n";
9ae3ac1a
KW
374print $fh "\x{FEFF}", "\n";
375print $fh "\x{FFFD}", "\n";
376print $fh "\x{FFFE}", "\n";
377print $fh "\x{FFFF}", "\n";
378print $fh "\x{10000}", "\n";
c87792c3 379print $fh "\x{1FFFD}", "\n";
9ae3ac1a
KW
380print $fh "\x{1FFFE}", "\n";
381print $fh "\x{1FFFF}", "\n";
c87792c3
KW
382print $fh "\x{20000}", "\n";
383print $fh "\x{2FFFD}", "\n";
9ae3ac1a
KW
384print $fh "\x{2FFFE}", "\n";
385print $fh "\x{2FFFF}", "\n";
c87792c3
KW
386print $fh "\x{30000}", "\n";
387print $fh "\x{3FFFD}", "\n";
9ae3ac1a
KW
388print $fh "\x{3FFFE}", "\n";
389print $fh "\x{3FFFF}", "\n";
c87792c3
KW
390print $fh "\x{40000}", "\n";
391print $fh "\x{4FFFD}", "\n";
9ae3ac1a
KW
392print $fh "\x{4FFFE}", "\n";
393print $fh "\x{4FFFF}", "\n";
c87792c3
KW
394print $fh "\x{50000}", "\n";
395print $fh "\x{5FFFD}", "\n";
9ae3ac1a
KW
396print $fh "\x{5FFFE}", "\n";
397print $fh "\x{5FFFF}", "\n";
c87792c3
KW
398print $fh "\x{60000}", "\n";
399print $fh "\x{6FFFD}", "\n";
9ae3ac1a
KW
400print $fh "\x{6FFFE}", "\n";
401print $fh "\x{6FFFF}", "\n";
c87792c3
KW
402print $fh "\x{70000}", "\n";
403print $fh "\x{7FFFD}", "\n";
9ae3ac1a
KW
404print $fh "\x{7FFFE}", "\n";
405print $fh "\x{7FFFF}", "\n";
c87792c3
KW
406print $fh "\x{80000}", "\n";
407print $fh "\x{8FFFD}", "\n";
9ae3ac1a
KW
408print $fh "\x{8FFFE}", "\n";
409print $fh "\x{8FFFF}", "\n";
c87792c3
KW
410print $fh "\x{90000}", "\n";
411print $fh "\x{9FFFD}", "\n";
9ae3ac1a
KW
412print $fh "\x{9FFFE}", "\n";
413print $fh "\x{9FFFF}", "\n";
c87792c3
KW
414print $fh "\x{A0000}", "\n";
415print $fh "\x{AFFFD}", "\n";
9ae3ac1a
KW
416print $fh "\x{AFFFE}", "\n";
417print $fh "\x{AFFFF}", "\n";
c87792c3
KW
418print $fh "\x{B0000}", "\n";
419print $fh "\x{BFFFD}", "\n";
9ae3ac1a
KW
420print $fh "\x{BFFFE}", "\n";
421print $fh "\x{BFFFF}", "\n";
c87792c3
KW
422print $fh "\x{C0000}", "\n";
423print $fh "\x{CFFFD}", "\n";
9ae3ac1a
KW
424print $fh "\x{CFFFE}", "\n";
425print $fh "\x{CFFFF}", "\n";
c87792c3
KW
426print $fh "\x{D0000}", "\n";
427print $fh "\x{DFFFD}", "\n";
9ae3ac1a
KW
428print $fh "\x{DFFFE}", "\n";
429print $fh "\x{DFFFF}", "\n";
c87792c3
KW
430print $fh "\x{E0000}", "\n";
431print $fh "\x{EFFFD}", "\n";
9ae3ac1a
KW
432print $fh "\x{EFFFE}", "\n";
433print $fh "\x{EFFFF}", "\n";
c87792c3
KW
434print $fh "\x{F0000}", "\n";
435print $fh "\x{FFFFD}", "\n";
9ae3ac1a
KW
436print $fh "\x{FFFFE}", "\n";
437print $fh "\x{FFFFF}", "\n";
438print $fh "\x{100000}", "\n";
c87792c3 439print $fh "\x{10FFFD}", "\n";
9ae3ac1a
KW
440print $fh "\x{10FFFE}", "\n";
441print $fh "\x{10FFFF}", "\n";
442print $fh "\x{110000}", "\n";
c87792c3
KW
443print $fh "\x{11FFFD}", "\n";
444print $fh "\x{11FFFE}", "\n";
445print $fh "\x{11FFFF}", "\n";
446print $fh "\x{120000}", "\n";
9ae3ac1a
KW
447close $fh;
448EXPECT
449Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
c87792c3
KW
450Unicode surrogate U+D900 is illegal in UTF-8 at - line 7.
451Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8.
452Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9.
453Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10.
454Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11.
455Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12.
456Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13.
457Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14.
458Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17.
459Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18.
460Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19.
461Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26.
462Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27.
463Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30.
464Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31.
465Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34.
466Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35.
467Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38.
468Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39.
469Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42.
470Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43.
471Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46.
472Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47.
473Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50.
474Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51.
475Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54.
476Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55.
477Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58.
478Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59.
479Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62.
480Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63.
481Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66.
482Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67.
483Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70.
484Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71.
485Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74.
486Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75.
487Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78.
488Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79.
489Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82.
490Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83.
491Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86.
492Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87.
493Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90.
494Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91.
495Code point 0x110000 is not Unicode, may not be portable in print at - line 92.
496Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93.
497Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94.
498Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95.
499Code point 0x120000 is not Unicode, may not be portable in print at - line 96.
9ae3ac1a
KW
500########
501require "../test.pl";
8457b38f
KW
502use warnings 'utf8';
503my $file = tempfile();
504open(my $fh, "+>:utf8", $file);
505print $fh "\x{D800}", "\n";
506print $fh "\x{FFFF}", "\n";
507print $fh "\x{110000}", "\n";
508close $fh;
509EXPECT
510Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
15ca5930
KW
511Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6.
512Code point 0x110000 is not Unicode, may not be portable in print at - line 7.
8457b38f
KW
513########
514require "../test.pl";
515use warnings 'utf8';
516no warnings 'surrogate';
517my $file = tempfile();
518open(my $fh, "+>:utf8", $file);
519print $fh "\x{D800}", "\n";
520print $fh "\x{FFFF}", "\n";
521print $fh "\x{110000}", "\n";
522close $fh;
523EXPECT
15ca5930
KW
524Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
525Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
526########
527require "../test.pl";
528use warnings 'utf8';
529no warnings 'nonchar';
530my $file = tempfile();
531open(my $fh, "+>:utf8", $file);
532print $fh "\x{D800}", "\n";
533print $fh "\x{FFFF}", "\n";
534print $fh "\x{110000}", "\n";
535close $fh;
536EXPECT
537Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 538Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
539########
540require "../test.pl";
541use warnings 'utf8';
542no warnings 'non_unicode';
543my $file = tempfile();
544open(my $fh, "+>:utf8", $file);
545print $fh "\x{D800}", "\n";
546print $fh "\x{FFFF}", "\n";
547print $fh "\x{110000}", "\n";
548close $fh;
549EXPECT
550Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 551Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
8457b38f 552########
920e47bb
AC
553# NAME C<use warnings "nonchar"> works in isolation
554require "../test.pl";
555use warnings 'nonchar';
556my $file = tempfile();
557open(my $fh, "+>:utf8", $file);
558print $fh "\x{FFFF}", "\n";
559close $fh;
560EXPECT
15ca5930 561Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5.
920e47bb 562########
920e47bb
AC
563# NAME C<use warnings "surrogate"> works in isolation
564require "../test.pl";
565use warnings 'surrogate';
566my $file = tempfile();
567open(my $fh, "+>:utf8", $file);
568print $fh "\x{D800}", "\n";
569close $fh;
570EXPECT
571Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
572########
920e47bb
AC
573# NAME C<use warnings "non_unicode"> works in isolation
574require "../test.pl";
575use warnings 'non_unicode';
576my $file = tempfile();
577open(my $fh, "+>:utf8", $file);
578print $fh "\x{110000}", "\n";
579close $fh;
580EXPECT
15ca5930 581Code point 0x110000 is not Unicode, may not be portable in print at - line 5.
920e47bb 582########
8457b38f 583require "../test.pl";
9ae3ac1a
KW
584no warnings 'utf8';
585my $file = tempfile();
586open(my $fh, "+>:utf8", $file);
587print $fh "\x{D7FF}", "\n";
588print $fh "\x{D800}", "\n";
589print $fh "\x{DFFF}", "\n";
590print $fh "\x{E000}", "\n";
591print $fh "\x{FDCF}", "\n";
592print $fh "\x{FDD0}", "\n";
593print $fh "\x{FDEF}", "\n";
594print $fh "\x{FDF0}", "\n";
595print $fh "\x{FEFF}", "\n";
596print $fh "\x{FFFD}", "\n";
597print $fh "\x{FFFE}", "\n";
598print $fh "\x{FFFF}", "\n";
599print $fh "\x{10000}", "\n";
600print $fh "\x{1FFFE}", "\n";
601print $fh "\x{1FFFF}", "\n";
602print $fh "\x{2FFFE}", "\n";
603print $fh "\x{2FFFF}", "\n";
604print $fh "\x{3FFFE}", "\n";
605print $fh "\x{3FFFF}", "\n";
606print $fh "\x{4FFFE}", "\n";
607print $fh "\x{4FFFF}", "\n";
608print $fh "\x{5FFFE}", "\n";
609print $fh "\x{5FFFF}", "\n";
610print $fh "\x{6FFFE}", "\n";
611print $fh "\x{6FFFF}", "\n";
612print $fh "\x{7FFFE}", "\n";
613print $fh "\x{7FFFF}", "\n";
614print $fh "\x{8FFFE}", "\n";
615print $fh "\x{8FFFF}", "\n";
616print $fh "\x{9FFFE}", "\n";
617print $fh "\x{9FFFF}", "\n";
618print $fh "\x{AFFFE}", "\n";
619print $fh "\x{AFFFF}", "\n";
620print $fh "\x{BFFFE}", "\n";
621print $fh "\x{BFFFF}", "\n";
622print $fh "\x{CFFFE}", "\n";
623print $fh "\x{CFFFF}", "\n";
624print $fh "\x{DFFFE}", "\n";
625print $fh "\x{DFFFF}", "\n";
626print $fh "\x{EFFFE}", "\n";
627print $fh "\x{EFFFF}", "\n";
628print $fh "\x{FFFFE}", "\n";
629print $fh "\x{FFFFF}", "\n";
630print $fh "\x{100000}", "\n";
631print $fh "\x{10FFFE}", "\n";
632print $fh "\x{10FFFF}", "\n";
633print $fh "\x{110000}", "\n";
634close $fh;
62961d2e 635EXPECT
ab0b796c
KW
636########
637# NAME Case change crosses 255/256 under non-UTF8 locale
ef9d5242
KW
638require '../loc_tools.pl';
639unless (locales_enabled('LC_CTYPE')) {
640 print("SKIPPED\n# locales not available\n"),exit;
641}
ab0b796c
KW
642eval { require POSIX; POSIX->import("locale_h") };
643if ($@) {
644 print("SKIPPED\n# no POSIX\n"),exit;
645}
646use warnings 'locale';
647use feature 'fc';
648use locale;
649setlocale(&POSIX::LC_CTYPE, "C");
650my $a;
651$a = lc("\x{178}");
652$a = fc("\x{1E9E}");
653$a = fc("\x{FB05}");
654$a = uc("\x{FB00}");
655$a = ucfirst("\x{149}");
8bdce394
KW
656$a = lcfirst("\x{178}");
657no warnings 'locale';
658$a = lc("\x{178}");
659$a = fc("\x{1E9E}");
660$a = fc("\x{FB05}");
661$a = uc("\x{FB00}");
662$a = ucfirst("\x{149}");
663$a = lcfirst("\x{178}");
ab0b796c 664EXPECT
ef9d5242
KW
665Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14.
666Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15.
667Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16.
668Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17.
669Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18.
670Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19.
613abc6d
KW
671########
672# NAME Wide character in non-UTF-8 locale
ef9d5242
KW
673require '../loc_tools.pl';
674unless (locales_enabled('LC_CTYPE')) {
675 print("SKIPPED\n# locales not available\n"),exit;
676}
613abc6d
KW
677eval { require POSIX; POSIX->import("locale_h") };
678if ($@) {
679 print("SKIPPED\n# no POSIX\n"),exit;
680}
681use warnings 'locale';
682use feature 'fc';
683use locale;
684setlocale(&POSIX::LC_CTYPE, "C");
685my $a;
686$a = lc("\x{100}");
687$a = lcfirst("\x{101}");
688$a = fc("\x{102}");
689$a = uc("\x{103}");
690$a = ucfirst("\x{104}");
691no warnings 'locale';
692$a = lc("\x{100}");
693$a = lcfirst("\x{101}");
694$a = fc("\x{102}");
695$a = uc("\x{103}");
696$a = ucfirst("\x{104}");
697EXPECT
ef9d5242
KW
698Wide character (U+100) in lc at - line 14.
699Wide character (U+101) in lcfirst at - line 15.
700Wide character (U+102) in fc at - line 16.
701Wide character (U+103) in uc at - line 17.
702Wide character (U+104) in ucfirst at - line 18.
008e8e82
KW
703########
704# NAME Wide character in UTF-8 locale
705require '../loc_tools.pl';
706unless (locales_enabled('LC_CTYPE')) {
707 print("SKIPPED\n# locales not available\n"),exit;
708}
709eval { require POSIX; POSIX->import("locale_h") };
710if ($@) {
711 print("SKIPPED\n# no POSIX\n"),exit;
712}
713my @utf8_locales = find_utf8_ctype_locale();
714unless (@utf8_locales) {
715 print("SKIPPED\n# no UTF-8 locales\n"),exit;
716}
717use warnings 'locale';
718use feature 'fc';
719use locale;
720setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]);
721my $a;
722$a = lc("\x{100}");
723$a = lcfirst("\x{101}");
724$a = fc("\x{102}");
725$a = uc("\x{103}");
726$a = ucfirst("\x{104}");
727EXPECT
760c7c2f
KW
728########
729# NAME Deprecation of too-large code points
730require "../test.pl";
731use warnings 'non_unicode';
732my $max_cp = ~0 >> 1;
733my $max_char = chr $max_cp;
734my $to_warn_cp = $max_cp + 1;
735my $to_warn_char = chr $to_warn_cp;
736$max_char =~ /[\x{110000}\P{Unassigned}]/;
737$to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
738my $temp = qr/$max_char/;
739$temp = qr/$to_warn_char/;
740$temp = uc($max_char);
741$temp = uc($to_warn_char);
742my $file = tempfile();
743open(my $fh, "+>:utf8", $file);
744print $fh $max_char, "\n";
745print $fh $to_warn_char, "\n";
746close $fh;
747EXPECT
748OPTION regex
2d212e86
KW
749Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
750Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in pattern match \(m//\) at - line \d+.
751Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
752Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
753Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
754Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
760c7c2f 755Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
2d212e86 756Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
760c7c2f
KW
757Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
758Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+.
ca93ce3c 759Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in print at - line \d+.
710740a6
KW
760########
761# NAME [perl #127262]
e88136ce
KW
762BEGIN{
763 if (ord('A') == 193) {
764 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
765 exit 0;
766 }
a8b2934d
JH
767 use Config;
768 unless ($Double{double_style_ieee}) {
769 print "SKIPPED\n# non-IEEE fp range.";
770 exit 0;
771 }
772{};$^H=eval'2**400'}Â
710740a6 773EXPECT
a8b2934d 774Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11.