This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Restore a portion of reverted commits
[perl5.git] / t / lib / warnings / utf8
CommitLineData
f0df466a
JH
1
2 utf8.c AOK
3
4b88fb76 4 [utf8_to_uvchr_buf]
f0df466a
JH
5 Malformed UTF-8 character
6 my $a = ord "\x80" ;
7
8 Malformed UTF-8 character
9 my $a = ord "\xf080" ;
10 <<<<<< this warning can't be easily triggered from perl anymore
11
12 [utf16_to_utf8]
13 Malformed UTF-16 surrogate
93f09d7b 14 <<<<<< Add a test when something actually calls utf16_to_utf8
f0df466a
JH
15
16__END__
4b88fb76 17# utf8.c [utf8_to_uvchr_buf] -W
6cdc5cd8 18# NAME Malformed under 'use utf8' in double-quoted string
f0df466a
JH
19BEGIN {
20 if (ord('A') == 193) {
72b4e0d1 21 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
f0df466a
JH
22 exit 0;
23 }
24}
25use utf8 ;
6cdc5cd8 26no warnings; # Malformed is a fatal error, so gets output anyway.
f0df466a 27my $a = "snøstorm" ;
6cdc5cd8
KW
28EXPECT
29Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 10.
30Malformed UTF-8 character (fatal) at - line 10.
31########
32# NAME Malformed under 'use utf8' in single-quoted string
33BEGIN {
34 if (ord('A') == 193) {
35 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
36 exit 0;
37 }
f0df466a 38}
6cdc5cd8
KW
39use utf8 ;
40no warnings; # Malformed is a fatal error, so gets output anyway.
41my $a = 'snøstorm' ;
f0df466a 42EXPECT
7cf8d05d 43Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 9.
6cdc5cd8 44Malformed UTF-8 character (fatal) at - line 9.
f0df466a 45########
507b9800 46use warnings 'utf8';
9ae3ac1a
KW
47my $d7ff = uc(chr(0xD7FF));
48my $d800 = uc(chr(0xD800));
49my $dfff = uc(chr(0xDFFF));
50my $e000 = uc(chr(0xE000));
51my $feff = uc(chr(0xFEFF));
52my $fffd = uc(chr(0xFFFD));
53my $fffe = uc(chr(0xFFFE));
54my $ffff = uc(chr(0xFFFF));
55my $hex4 = uc(chr(0x10000));
56my $hex5 = uc(chr(0x100000));
57my $maxm1 = uc(chr(0x10FFFE));
58my $max = uc(chr(0x10FFFF));
59my $nonUnicode = uc(chr(0x110000));
507b9800 60no warnings 'utf8';
9ae3ac1a
KW
61my $d7ff = uc(chr(0xD7FF));
62my $d800 = uc(chr(0xD800));
63my $dfff = uc(chr(0xDFFF));
64my $e000 = uc(chr(0xE000));
65my $feff = uc(chr(0xFEFF));
66my $fffd = uc(chr(0xFFFD));
67my $fffe = uc(chr(0xFFFE));
68my $ffff = uc(chr(0xFFFF));
69my $hex4 = uc(chr(0x10000));
70my $hex5 = uc(chr(0x100000));
71my $maxm1 = uc(chr(0x10FFFE));
72my $max = uc(chr(0x10FFFF));
73my $nonUnicode = uc(chr(0x110000));
507b9800 74EXPECT
9ae3ac1a
KW
75Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
76Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
507b9800 78########
62961d2e 79use warnings 'utf8';
8457b38f
KW
80my $d800 = uc(chr(0xD800));
81my $nonUnicode = uc(chr(0x110000));
82no warnings 'surrogate';
83my $d800 = uc(chr(0xD800));
84my $nonUnicode = uc(chr(0x110000));
85EXPECT
86Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
87Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
88Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
89########
90use warnings 'utf8';
91my $d800 = uc(chr(0xD800));
92my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
93no warnings 'non_unicode';
94my $d800 = uc(chr(0xD800));
95my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
96EXPECT
97Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
98Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
9415f659
KW
99Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5.
100########
9415f659 101use warnings 'utf8';
1ddd0324 102my $big_nonUnicode = uc(chr(0x7FFF_FFFF));
9415f659 103no warnings 'non_unicode';
1ddd0324 104my $big_nonUnicode = uc(chr(0x7FFF_FFFF));
9415f659 105EXPECT
1ddd0324 106Operation "uc" returns its argument for non-Unicode code point 0x7FFFFFFF at - line 2.
8457b38f
KW
107########
108use warnings 'utf8';
9ae3ac1a
KW
109my $d7ff = lc pack("U", 0xD7FF);
110my $d800 = lc pack("U", 0xD800);
111my $dfff = lc pack("U", 0xDFFF);
112my $e000 = lc pack("U", 0xE000);
113my $feff = lc pack("U", 0xFEFF);
114my $fffd = lc pack("U", 0xFFFD);
115my $fffe = lc pack("U", 0xFFFE);
116my $ffff = lc pack("U", 0xFFFF);
117my $hex4 = lc pack("U", 0x10000);
118my $hex5 = lc pack("U", 0x100000);
119my $maxm1 = lc pack("U", 0x10FFFE);
120my $max = lc pack("U", 0x10FFFF);
121my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 122no warnings 'utf8';
9ae3ac1a
KW
123my $d7ff = lc pack("U", 0xD7FF);
124my $d800 = lc pack("U", 0xD800);
125my $dfff = lc pack("U", 0xDFFF);
126my $e000 = lc pack("U", 0xE000);
127my $feff = lc pack("U", 0xFEFF);
128my $fffd = lc pack("U", 0xFFFD);
129my $fffe = lc pack("U", 0xFFFE);
130my $ffff = lc pack("U", 0xFFFF);
131my $hex4 = lc pack("U", 0x10000);
132my $hex5 = lc pack("U", 0x100000);
133my $maxm1 = lc pack("U", 0x10FFFE);
134my $max = lc pack("U", 0x10FFFF);
135my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 136EXPECT
9ae3ac1a
KW
137Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
138Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
139Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
62961d2e
JH
140########
141use warnings 'utf8';
9ae3ac1a
KW
142my $d7ff = ucfirst "\x{D7FF}";
143my $d800 = ucfirst "\x{D800}";
144my $dfff = ucfirst "\x{DFFF}";
145my $e000 = ucfirst "\x{E000}";
146my $feff = ucfirst "\x{FEFF}";
147my $fffd = ucfirst "\x{FFFD}";
148my $fffe = ucfirst "\x{FFFE}";
149my $ffff = ucfirst "\x{FFFF}";
150my $hex4 = ucfirst "\x{10000}";
151my $hex5 = ucfirst "\x{100000}";
152my $maxm1 = ucfirst "\x{10FFFE}";
153my $max = ucfirst "\x{10FFFF}";
154my $nonUnicode = ucfirst "\x{110000}";
62961d2e 155no warnings 'utf8';
9ae3ac1a
KW
156my $d7ff = ucfirst "\x{D7FF}";
157my $d800 = ucfirst "\x{D800}";
158my $dfff = ucfirst "\x{DFFF}";
159my $e000 = ucfirst "\x{E000}";
160my $feff = ucfirst "\x{FEFF}";
161my $fffd = ucfirst "\x{FFFD}";
162my $fffe = ucfirst "\x{FFFE}";
163my $ffff = ucfirst "\x{FFFF}";
164my $hex4 = ucfirst "\x{10000}";
165my $hex5 = ucfirst "\x{100000}";
166my $maxm1 = ucfirst "\x{10FFFE}";
167my $max = ucfirst "\x{10FFFF}";
168my $nonUnicode = ucfirst "\x{110000}";
169EXPECT
170Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
171Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
172Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
173########
2d88a86a 174# NAME Matching \p{} against above-Unicode
9ae3ac1a
KW
175use warnings 'utf8';
176chr(0xD7FF) =~ /\p{Any}/;
177chr(0xD800) =~ /\p{Any}/;
178chr(0xDFFF) =~ /\p{Any}/;
179chr(0xE000) =~ /\p{Any}/;
180chr(0xFEFF) =~ /\p{Any}/;
181chr(0xFFFD) =~ /\p{Any}/;
182chr(0xFFFE) =~ /\p{Any}/;
183chr(0xFFFF) =~ /\p{Any}/;
184chr(0x10000) =~ /\p{Any}/;
185chr(0x100000) =~ /\p{Any}/;
186chr(0x10FFFE) =~ /\p{Any}/;
187chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
188chr(0x110000) =~ /[\p{Any}]/;
189chr(0x110001) =~ /[\w\p{Any}]/;
190chr(0x10FFFF) =~ /\p{All}/;
191chr(0x110002) =~ /[\w\p{All}]/;
192chr(0x110003) =~ /[\p{XPosixWord}]/;
193chr(0x110004) =~ /[\P{XPosixWord}]/;
194chr(0x110005) =~ /^[\p{Unassigned}]/;
195chr(0x110006) =~ /^[\P{Unassigned}]/;
196# Only Unicode properties give non-Unicode warnings, and only those properties
197# which do match above Unicode; and not when something else in the class
198# matches above Unicode. Below we test three ways where something outside the
199# property may match non-Unicode: a code point above it, a class \S that we
200# know at compile time doesn't, and a class \W whose values aren't (at the time
201# of this writing) specified at compile time, but which wouldn't match
5073ffbd
KW
202chr(0x110050) =~ /\w/;
203chr(0x110051) =~ /\W/;
204chr(0x110052) =~ /\d/;
205chr(0x110053) =~ /\D/;
206chr(0x110054) =~ /\s/;
207chr(0x110055) =~ /\S/;
208chr(0x110056) =~ /[[:word:]]/;
209chr(0x110057) =~ /[[:^word:]]/;
210chr(0x110058) =~ /[[:alnum:]]/;
211chr(0x110059) =~ /[[:^alnum:]]/;
212chr(0x11005A) =~ /[[:space:]]/;
213chr(0x11005B) =~ /[[:^space:]]/;
214chr(0x11005C) =~ /[[:digit:]]/;
215chr(0x11005D) =~ /[[:^digit:]]/;
216chr(0x11005E) =~ /[[:alpha:]]/;
217chr(0x11005F) =~ /[[:^alpha:]]/;
218chr(0x110060) =~ /[[:ascii:]]/;
219chr(0x110061) =~ /[[:^ascii:]]/;
220chr(0x110062) =~ /[[:cntrl:]]/;
221chr(0x110063) =~ /[[:^cntrl:]]/;
222chr(0x110064) =~ /[[:graph:]]/;
223chr(0x110065) =~ /[[:^graph:]]/;
224chr(0x110066) =~ /[[:lower:]]/;
225chr(0x110067) =~ /[[:^lower:]]/;
226chr(0x110068) =~ /[[:print:]]/;
227chr(0x110069) =~ /[[:^print:]]/;
228chr(0x11006A) =~ /[[:punct:]]/;
229chr(0x11006B) =~ /[[:^punct:]]/;
230chr(0x11006C) =~ /[[:upper:]]/;
231chr(0x11006D) =~ /[[:^upper:]]/;
232chr(0x11006E) =~ /[[:xdigit:]]/;
233chr(0x11006F) =~ /[[:^xdigit:]]/;
234chr(0x110070) =~ /[[:blank:]]/;
235chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
236chr(0x111010) =~ /[\W\p{Unassigned}]/;
237chr(0x111011) =~ /[\W\P{Unassigned}]/;
238chr(0x112010) =~ /[\S\p{Unassigned}]/;
239chr(0x112011) =~ /[\S\P{Unassigned}]/;
240chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
241chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a
KW
242no warnings 'utf8';
243chr(0xD7FF) =~ /\p{Any}/;
244chr(0xD800) =~ /\p{Any}/;
245chr(0xDFFF) =~ /\p{Any}/;
246chr(0xE000) =~ /\p{Any}/;
247chr(0xFEFF) =~ /\p{Any}/;
248chr(0xFFFD) =~ /\p{Any}/;
249chr(0xFFFE) =~ /\p{Any}/;
250chr(0xFFFF) =~ /\p{Any}/;
251chr(0x10000) =~ /\p{Any}/;
252chr(0x100000) =~ /\p{Any}/;
253chr(0x10FFFE) =~ /\p{Any}/;
254chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
255chr(0x110000) =~ /[\p{Any}]/;
256chr(0x110001) =~ /[\w\p{Any}]/;
257chr(0x10FFFF) =~ /\p{All}/;
258chr(0x110002) =~ /[\w\p{All}]/;
259chr(0x110003) =~ /[\p{XPosixWord}]/;
260chr(0x110004) =~ /[\P{XPosixWord}]/;
261chr(0x110005) =~ /^[\p{Unassigned}]/;
262chr(0x110006) =~ /^[\P{Unassigned}]/;
5073ffbd
KW
263chr(0x110050) =~ /\w/;
264chr(0x110051) =~ /\W/;
265chr(0x110052) =~ /\d/;
266chr(0x110053) =~ /\D/;
267chr(0x110054) =~ /\s/;
268chr(0x110055) =~ /\S/;
269chr(0x110056) =~ /[[:word:]]/;
270chr(0x110057) =~ /[[:^word:]]/;
271chr(0x110058) =~ /[[:alnum:]]/;
272chr(0x110059) =~ /[[:^alnum:]]/;
273chr(0x11005A) =~ /[[:space:]]/;
274chr(0x11005B) =~ /[[:^space:]]/;
275chr(0x11005C) =~ /[[:digit:]]/;
276chr(0x11005D) =~ /[[:^digit:]]/;
277chr(0x11005E) =~ /[[:alpha:]]/;
278chr(0x11005F) =~ /[[:^alpha:]]/;
279chr(0x110060) =~ /[[:ascii:]]/;
280chr(0x110061) =~ /[[:^ascii:]]/;
281chr(0x110062) =~ /[[:cntrl:]]/;
282chr(0x110063) =~ /[[:^cntrl:]]/;
283chr(0x110064) =~ /[[:graph:]]/;
284chr(0x110065) =~ /[[:^graph:]]/;
285chr(0x110066) =~ /[[:lower:]]/;
286chr(0x110067) =~ /[[:^lower:]]/;
287chr(0x110068) =~ /[[:print:]]/;
288chr(0x110069) =~ /[[:^print:]]/;
289chr(0x11006A) =~ /[[:punct:]]/;
290chr(0x11006B) =~ /[[:^punct:]]/;
291chr(0x11006C) =~ /[[:upper:]]/;
292chr(0x11006D) =~ /[[:^upper:]]/;
293chr(0x11006E) =~ /[[:xdigit:]]/;
294chr(0x11006F) =~ /[[:^xdigit:]]/;
295chr(0x110070) =~ /[[:blank:]]/;
296chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
297chr(0x111010) =~ /[\W\p{Unassigned}]/;
298chr(0x111011) =~ /[\W\P{Unassigned}]/;
299chr(0x112010) =~ /[\S\p{Unassigned}]/;
300chr(0x112011) =~ /[\S\P{Unassigned}]/;
301chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
302chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a 303EXPECT
2d88a86a
KW
304Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
305Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
9ae3ac1a 306########
e9b08962 307# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
2d88a86a
KW
308# Now have to make FATAL to guarantee being output
309use warnings FATAL => 'non_unicode';
ae986089
KW
310"\x{110000}" =~ /b?\p{Space}/;
311EXPECT
2d88a86a 312Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
ae986089
KW
313########
314# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
315use warnings 'non_unicode';
2d88a86a 316use warnings FATAL => 'non_unicode';
ae986089
KW
317"\x{110000}" =~ /b?[[:space:]]/;
318EXPECT
319########
8457b38f
KW
320use warnings 'utf8';
321chr(0x110000) =~ /\p{Any}/;
2d88a86a
KW
322########
323# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
324use warnings qw(utf8 non_unicode);
325chr(0x110000) =~ /^\p{Unassigned}/;
8457b38f 326no warnings 'non_unicode';
2d88a86a
KW
327chr(0x110001) =~ /\p{Unassigned}/;
328use warnings 'non_unicode';
329no warnings 'utf8';
330chr(0x110002) =~ /\p{Unassigned}/;
8457b38f 331EXPECT
2d88a86a 332Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
8457b38f 333########
f2c2a6ab 334# NAME optimizable regnode should still give non_unicode warnings when fatalized
5073ffbd 335use warnings 'utf8';
f2c2a6ab 336use warnings FATAL => 'non_unicode';
845e7aa3 337chr(0x110000) =~ /\p{lb=cr}/;
f2c2a6ab 338EXPECT
2d88a86a 339Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
f2c2a6ab
KW
340########
341# NAME optimizable regnode should not give non_unicode warnings when warnings are off
5073ffbd 342no warnings 'non_unicode';
845e7aa3 343chr(0x110000) =~ /\p{lb=cr}/;
5073ffbd 344EXPECT
5073ffbd 345########
2d88a86a
KW
346# NAME 'All' matches above-Unicode without any warning
347use warnings qw(utf8 non_unicode);
348chr(0x110000) =~ /\p{All}/;
349EXPECT
350########
9ae3ac1a
KW
351require "../test.pl";
352use warnings 'utf8';
a410ec23 353sub Is_Super { return '!utf8::Any' }
88d45d28
KW
354# The extra char is to avoid an optimization that avoids the problem when the
355# property is the only non-latin1 char in a class
356print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
a410ec23
KW
357EXPECT
3581
359########
360require "../test.pl";
361use warnings 'utf8';
9ae3ac1a
KW
362my $file = tempfile();
363open(my $fh, "+>:utf8", $file);
364print $fh "\x{D7FF}", "\n";
365print $fh "\x{D800}", "\n";
c87792c3
KW
366print $fh "\x{D900}", "\n";
367print $fh "\x{DA00}", "\n";
368print $fh "\x{DB00}", "\n";
369print $fh "\x{DC00}", "\n";
370print $fh "\x{DD00}", "\n";
371print $fh "\x{DE00}", "\n";
372print $fh "\x{DF00}", "\n";
9ae3ac1a
KW
373print $fh "\x{DFFF}", "\n";
374print $fh "\x{E000}", "\n";
375print $fh "\x{FDCF}", "\n";
376print $fh "\x{FDD0}", "\n";
c87792c3 377print $fh "\x{FDD1}", "\n";
9ae3ac1a
KW
378print $fh "\x{FDEF}", "\n";
379print $fh "\x{FDF0}", "\n";
c87792c3
KW
380print $fh "\x{FDFE}", "\n";
381print $fh "\x{FDFF}", "\n";
382print $fh "\x{FE00}", "\n";
9ae3ac1a
KW
383print $fh "\x{FEFF}", "\n";
384print $fh "\x{FFFD}", "\n";
385print $fh "\x{FFFE}", "\n";
386print $fh "\x{FFFF}", "\n";
387print $fh "\x{10000}", "\n";
c87792c3 388print $fh "\x{1FFFD}", "\n";
9ae3ac1a
KW
389print $fh "\x{1FFFE}", "\n";
390print $fh "\x{1FFFF}", "\n";
c87792c3
KW
391print $fh "\x{20000}", "\n";
392print $fh "\x{2FFFD}", "\n";
9ae3ac1a
KW
393print $fh "\x{2FFFE}", "\n";
394print $fh "\x{2FFFF}", "\n";
c87792c3
KW
395print $fh "\x{30000}", "\n";
396print $fh "\x{3FFFD}", "\n";
9ae3ac1a
KW
397print $fh "\x{3FFFE}", "\n";
398print $fh "\x{3FFFF}", "\n";
c87792c3
KW
399print $fh "\x{40000}", "\n";
400print $fh "\x{4FFFD}", "\n";
9ae3ac1a
KW
401print $fh "\x{4FFFE}", "\n";
402print $fh "\x{4FFFF}", "\n";
c87792c3
KW
403print $fh "\x{50000}", "\n";
404print $fh "\x{5FFFD}", "\n";
9ae3ac1a
KW
405print $fh "\x{5FFFE}", "\n";
406print $fh "\x{5FFFF}", "\n";
c87792c3
KW
407print $fh "\x{60000}", "\n";
408print $fh "\x{6FFFD}", "\n";
9ae3ac1a
KW
409print $fh "\x{6FFFE}", "\n";
410print $fh "\x{6FFFF}", "\n";
c87792c3
KW
411print $fh "\x{70000}", "\n";
412print $fh "\x{7FFFD}", "\n";
9ae3ac1a
KW
413print $fh "\x{7FFFE}", "\n";
414print $fh "\x{7FFFF}", "\n";
c87792c3
KW
415print $fh "\x{80000}", "\n";
416print $fh "\x{8FFFD}", "\n";
9ae3ac1a
KW
417print $fh "\x{8FFFE}", "\n";
418print $fh "\x{8FFFF}", "\n";
c87792c3
KW
419print $fh "\x{90000}", "\n";
420print $fh "\x{9FFFD}", "\n";
9ae3ac1a
KW
421print $fh "\x{9FFFE}", "\n";
422print $fh "\x{9FFFF}", "\n";
c87792c3
KW
423print $fh "\x{A0000}", "\n";
424print $fh "\x{AFFFD}", "\n";
9ae3ac1a
KW
425print $fh "\x{AFFFE}", "\n";
426print $fh "\x{AFFFF}", "\n";
c87792c3
KW
427print $fh "\x{B0000}", "\n";
428print $fh "\x{BFFFD}", "\n";
9ae3ac1a
KW
429print $fh "\x{BFFFE}", "\n";
430print $fh "\x{BFFFF}", "\n";
c87792c3
KW
431print $fh "\x{C0000}", "\n";
432print $fh "\x{CFFFD}", "\n";
9ae3ac1a
KW
433print $fh "\x{CFFFE}", "\n";
434print $fh "\x{CFFFF}", "\n";
c87792c3
KW
435print $fh "\x{D0000}", "\n";
436print $fh "\x{DFFFD}", "\n";
9ae3ac1a
KW
437print $fh "\x{DFFFE}", "\n";
438print $fh "\x{DFFFF}", "\n";
c87792c3
KW
439print $fh "\x{E0000}", "\n";
440print $fh "\x{EFFFD}", "\n";
9ae3ac1a
KW
441print $fh "\x{EFFFE}", "\n";
442print $fh "\x{EFFFF}", "\n";
c87792c3
KW
443print $fh "\x{F0000}", "\n";
444print $fh "\x{FFFFD}", "\n";
9ae3ac1a
KW
445print $fh "\x{FFFFE}", "\n";
446print $fh "\x{FFFFF}", "\n";
447print $fh "\x{100000}", "\n";
c87792c3 448print $fh "\x{10FFFD}", "\n";
9ae3ac1a
KW
449print $fh "\x{10FFFE}", "\n";
450print $fh "\x{10FFFF}", "\n";
451print $fh "\x{110000}", "\n";
c87792c3
KW
452print $fh "\x{11FFFD}", "\n";
453print $fh "\x{11FFFE}", "\n";
454print $fh "\x{11FFFF}", "\n";
455print $fh "\x{120000}", "\n";
9ae3ac1a
KW
456close $fh;
457EXPECT
458Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
c87792c3
KW
459Unicode surrogate U+D900 is illegal in UTF-8 at - line 7.
460Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8.
461Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9.
462Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10.
463Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11.
464Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12.
465Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13.
466Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14.
467Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17.
468Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18.
469Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19.
470Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26.
471Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27.
472Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30.
473Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31.
474Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34.
475Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35.
476Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38.
477Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39.
478Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42.
479Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43.
480Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46.
481Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47.
482Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50.
483Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51.
484Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54.
485Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55.
486Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58.
487Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59.
488Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62.
489Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63.
490Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66.
491Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67.
492Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70.
493Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71.
494Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74.
495Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75.
496Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78.
497Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79.
498Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82.
499Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83.
500Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86.
501Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87.
502Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90.
503Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91.
504Code point 0x110000 is not Unicode, may not be portable in print at - line 92.
505Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93.
506Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94.
507Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95.
508Code point 0x120000 is not Unicode, may not be portable in print at - line 96.
9ae3ac1a
KW
509########
510require "../test.pl";
8457b38f
KW
511use warnings 'utf8';
512my $file = tempfile();
513open(my $fh, "+>:utf8", $file);
514print $fh "\x{D800}", "\n";
515print $fh "\x{FFFF}", "\n";
516print $fh "\x{110000}", "\n";
517close $fh;
518EXPECT
519Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
15ca5930
KW
520Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6.
521Code point 0x110000 is not Unicode, may not be portable in print at - line 7.
8457b38f
KW
522########
523require "../test.pl";
524use warnings 'utf8';
525no warnings 'surrogate';
526my $file = tempfile();
527open(my $fh, "+>:utf8", $file);
528print $fh "\x{D800}", "\n";
529print $fh "\x{FFFF}", "\n";
530print $fh "\x{110000}", "\n";
531close $fh;
532EXPECT
15ca5930
KW
533Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
534Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
535########
536require "../test.pl";
537use warnings 'utf8';
538no warnings 'nonchar';
539my $file = tempfile();
540open(my $fh, "+>:utf8", $file);
541print $fh "\x{D800}", "\n";
542print $fh "\x{FFFF}", "\n";
543print $fh "\x{110000}", "\n";
544close $fh;
545EXPECT
546Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 547Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
548########
549require "../test.pl";
550use warnings 'utf8';
551no warnings 'non_unicode';
552my $file = tempfile();
553open(my $fh, "+>:utf8", $file);
554print $fh "\x{D800}", "\n";
555print $fh "\x{FFFF}", "\n";
556print $fh "\x{110000}", "\n";
557close $fh;
558EXPECT
559Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 560Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
8457b38f 561########
920e47bb
AC
562# NAME C<use warnings "nonchar"> works in isolation
563require "../test.pl";
564use warnings 'nonchar';
565my $file = tempfile();
566open(my $fh, "+>:utf8", $file);
567print $fh "\x{FFFF}", "\n";
568close $fh;
569EXPECT
15ca5930 570Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5.
920e47bb 571########
920e47bb
AC
572# NAME C<use warnings "surrogate"> works in isolation
573require "../test.pl";
574use warnings 'surrogate';
575my $file = tempfile();
576open(my $fh, "+>:utf8", $file);
577print $fh "\x{D800}", "\n";
578close $fh;
579EXPECT
580Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
581########
920e47bb
AC
582# NAME C<use warnings "non_unicode"> works in isolation
583require "../test.pl";
584use warnings 'non_unicode';
585my $file = tempfile();
586open(my $fh, "+>:utf8", $file);
587print $fh "\x{110000}", "\n";
588close $fh;
589EXPECT
15ca5930 590Code point 0x110000 is not Unicode, may not be portable in print at - line 5.
920e47bb 591########
8457b38f 592require "../test.pl";
9ae3ac1a
KW
593no warnings 'utf8';
594my $file = tempfile();
595open(my $fh, "+>:utf8", $file);
596print $fh "\x{D7FF}", "\n";
597print $fh "\x{D800}", "\n";
598print $fh "\x{DFFF}", "\n";
599print $fh "\x{E000}", "\n";
600print $fh "\x{FDCF}", "\n";
601print $fh "\x{FDD0}", "\n";
602print $fh "\x{FDEF}", "\n";
603print $fh "\x{FDF0}", "\n";
604print $fh "\x{FEFF}", "\n";
605print $fh "\x{FFFD}", "\n";
606print $fh "\x{FFFE}", "\n";
607print $fh "\x{FFFF}", "\n";
608print $fh "\x{10000}", "\n";
609print $fh "\x{1FFFE}", "\n";
610print $fh "\x{1FFFF}", "\n";
611print $fh "\x{2FFFE}", "\n";
612print $fh "\x{2FFFF}", "\n";
613print $fh "\x{3FFFE}", "\n";
614print $fh "\x{3FFFF}", "\n";
615print $fh "\x{4FFFE}", "\n";
616print $fh "\x{4FFFF}", "\n";
617print $fh "\x{5FFFE}", "\n";
618print $fh "\x{5FFFF}", "\n";
619print $fh "\x{6FFFE}", "\n";
620print $fh "\x{6FFFF}", "\n";
621print $fh "\x{7FFFE}", "\n";
622print $fh "\x{7FFFF}", "\n";
623print $fh "\x{8FFFE}", "\n";
624print $fh "\x{8FFFF}", "\n";
625print $fh "\x{9FFFE}", "\n";
626print $fh "\x{9FFFF}", "\n";
627print $fh "\x{AFFFE}", "\n";
628print $fh "\x{AFFFF}", "\n";
629print $fh "\x{BFFFE}", "\n";
630print $fh "\x{BFFFF}", "\n";
631print $fh "\x{CFFFE}", "\n";
632print $fh "\x{CFFFF}", "\n";
633print $fh "\x{DFFFE}", "\n";
634print $fh "\x{DFFFF}", "\n";
635print $fh "\x{EFFFE}", "\n";
636print $fh "\x{EFFFF}", "\n";
637print $fh "\x{FFFFE}", "\n";
638print $fh "\x{FFFFF}", "\n";
639print $fh "\x{100000}", "\n";
640print $fh "\x{10FFFE}", "\n";
641print $fh "\x{10FFFF}", "\n";
642print $fh "\x{110000}", "\n";
643close $fh;
62961d2e 644EXPECT
ab0b796c
KW
645########
646# NAME Case change crosses 255/256 under non-UTF8 locale
ef9d5242
KW
647require '../loc_tools.pl';
648unless (locales_enabled('LC_CTYPE')) {
649 print("SKIPPED\n# locales not available\n"),exit;
650}
ab0b796c
KW
651eval { require POSIX; POSIX->import("locale_h") };
652if ($@) {
653 print("SKIPPED\n# no POSIX\n"),exit;
654}
655use warnings 'locale';
656use feature 'fc';
657use locale;
658setlocale(&POSIX::LC_CTYPE, "C");
659my $a;
660$a = lc("\x{178}");
661$a = fc("\x{1E9E}");
662$a = fc("\x{FB05}");
663$a = uc("\x{FB00}");
664$a = ucfirst("\x{149}");
8bdce394
KW
665$a = lcfirst("\x{178}");
666no warnings 'locale';
667$a = lc("\x{178}");
668$a = fc("\x{1E9E}");
669$a = fc("\x{FB05}");
670$a = uc("\x{FB00}");
671$a = ucfirst("\x{149}");
672$a = lcfirst("\x{178}");
ab0b796c 673EXPECT
ef9d5242
KW
674Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14.
675Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15.
676Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16.
677Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17.
678Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18.
679Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19.
613abc6d
KW
680########
681# NAME Wide character in non-UTF-8 locale
ef9d5242
KW
682require '../loc_tools.pl';
683unless (locales_enabled('LC_CTYPE')) {
684 print("SKIPPED\n# locales not available\n"),exit;
685}
613abc6d
KW
686eval { require POSIX; POSIX->import("locale_h") };
687if ($@) {
688 print("SKIPPED\n# no POSIX\n"),exit;
689}
690use warnings 'locale';
691use feature 'fc';
692use locale;
693setlocale(&POSIX::LC_CTYPE, "C");
694my $a;
695$a = lc("\x{100}");
696$a = lcfirst("\x{101}");
697$a = fc("\x{102}");
698$a = uc("\x{103}");
699$a = ucfirst("\x{104}");
700no warnings 'locale';
701$a = lc("\x{100}");
702$a = lcfirst("\x{101}");
703$a = fc("\x{102}");
704$a = uc("\x{103}");
705$a = ucfirst("\x{104}");
706EXPECT
ef9d5242
KW
707Wide character (U+100) in lc at - line 14.
708Wide character (U+101) in lcfirst at - line 15.
709Wide character (U+102) in fc at - line 16.
710Wide character (U+103) in uc at - line 17.
711Wide character (U+104) in ucfirst at - line 18.
008e8e82
KW
712########
713# NAME Wide character in UTF-8 locale
714require '../loc_tools.pl';
715unless (locales_enabled('LC_CTYPE')) {
716 print("SKIPPED\n# locales not available\n"),exit;
717}
718eval { require POSIX; POSIX->import("locale_h") };
719if ($@) {
720 print("SKIPPED\n# no POSIX\n"),exit;
721}
722my @utf8_locales = find_utf8_ctype_locale();
723unless (@utf8_locales) {
724 print("SKIPPED\n# no UTF-8 locales\n"),exit;
725}
726use warnings 'locale';
727use feature 'fc';
728use locale;
729setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]);
730my $a;
731$a = lc("\x{100}");
732$a = lcfirst("\x{101}");
733$a = fc("\x{102}");
734$a = uc("\x{103}");
735$a = ucfirst("\x{104}");
736EXPECT
760c7c2f 737########
76513bdc
KW
738# NAME Deprecation of too-large code points
739require "../test.pl";
740use warnings 'non_unicode';
741my $max_cp = ~0 >> 1;
742my $max_char = chr $max_cp;
743my $to_warn_cp = $max_cp + 1;
744my $to_warn_char = chr $to_warn_cp;
745$max_char =~ /[\x{110000}\P{Unassigned}]/;
746$to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
747my $temp = qr/$max_char/;
748$temp = qr/$to_warn_char/;
749$temp = uc($max_char);
750$temp = uc($to_warn_char);
751my $file = tempfile();
752open(my $fh, "+>:utf8", $file);
753print $fh $max_char, "\n";
754print $fh $to_warn_char, "\n";
755close $fh;
756EXPECT
757OPTION regex
758Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
759Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in pattern match \(m//\) at - line \d+.
760Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
761Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
762Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
763Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
764Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
765Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in uc at - line \d+.
766Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
767Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
768Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+.
769Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in print at - line \d+.
770########
710740a6 771# NAME [perl #127262]
e88136ce
KW
772BEGIN{
773 if (ord('A') == 193) {
774 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
775 exit 0;
776 }
a8b2934d
JH
777 use Config;
778 unless ($Double{double_style_ieee}) {
779 print "SKIPPED\n# non-IEEE fp range.";
780 exit 0;
781 }
782{};$^H=eval'2**400'}Â
710740a6 783EXPECT
a8b2934d 784Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11.
1d5030e1
KW
785########
786# NAME [perl #131646]
787BEGIN{
788 if (ord('A') == 193) {
789 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
790 exit 0;
791 }
792}
793no warnings;
794use warnings 'utf8';
795for(uc 0..t){0~~pack"UXp>",exp}
796EXPECT
797Malformed UTF-8 character: \xc2\x00 (unexpected non-continuation byte 0x00, immediately after start byte 0xc2; need 2 bytes, got 1) in smart match at - line 9.