This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8_heavy.pl: Pass up USER_DEFINED to outside swash
[perl5.git] / t / lib / warnings / utf8
CommitLineData
f0df466a
JH
1
2 utf8.c AOK
3
4 [utf8_to_uv]
5 Malformed UTF-8 character
6 my $a = ord "\x80" ;
7
8 Malformed UTF-8 character
9 my $a = ord "\xf080" ;
10 <<<<<< this warning can't be easily triggered from perl anymore
11
12 [utf16_to_utf8]
13 Malformed UTF-16 surrogate
93f09d7b 14 <<<<<< Add a test when something actually calls utf16_to_utf8
f0df466a
JH
15
16__END__
17# utf8.c [utf8_to_uv] -W
18BEGIN {
19 if (ord('A') == 193) {
20 print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
21 exit 0;
22 }
23}
24use utf8 ;
25my $a = "snøstorm" ;
26{
27 no warnings 'utf8' ;
28 my $a = "snøstorm";
29 use warnings 'utf8' ;
30 my $a = "snøstorm";
31}
32EXPECT
41432148
JH
33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
f0df466a 35########
507b9800 36use warnings 'utf8';
9ae3ac1a
KW
37my $d7ff = uc(chr(0xD7FF));
38my $d800 = uc(chr(0xD800));
39my $dfff = uc(chr(0xDFFF));
40my $e000 = uc(chr(0xE000));
41my $feff = uc(chr(0xFEFF));
42my $fffd = uc(chr(0xFFFD));
43my $fffe = uc(chr(0xFFFE));
44my $ffff = uc(chr(0xFFFF));
45my $hex4 = uc(chr(0x10000));
46my $hex5 = uc(chr(0x100000));
47my $maxm1 = uc(chr(0x10FFFE));
48my $max = uc(chr(0x10FFFF));
49my $nonUnicode = uc(chr(0x110000));
507b9800 50no warnings 'utf8';
9ae3ac1a
KW
51my $d7ff = uc(chr(0xD7FF));
52my $d800 = uc(chr(0xD800));
53my $dfff = uc(chr(0xDFFF));
54my $e000 = uc(chr(0xE000));
55my $feff = uc(chr(0xFEFF));
56my $fffd = uc(chr(0xFFFD));
57my $fffe = uc(chr(0xFFFE));
58my $ffff = uc(chr(0xFFFF));
59my $hex4 = uc(chr(0x10000));
60my $hex5 = uc(chr(0x100000));
61my $maxm1 = uc(chr(0x10FFFE));
62my $max = uc(chr(0x10FFFF));
63my $nonUnicode = uc(chr(0x110000));
507b9800 64EXPECT
9ae3ac1a
KW
65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
507b9800 68########
62961d2e 69use warnings 'utf8';
8457b38f
KW
70my $d800 = uc(chr(0xD800));
71my $nonUnicode = uc(chr(0x110000));
72no warnings 'surrogate';
73my $d800 = uc(chr(0xD800));
74my $nonUnicode = uc(chr(0x110000));
75EXPECT
76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
79########
80use warnings 'utf8';
81my $d800 = uc(chr(0xD800));
82my $nonUnicode = uc(chr(0x110000));
83my $big_nonUnicode = uc(chr(0x8000_0000));
84no warnings 'non_unicode';
85my $d800 = uc(chr(0xD800));
86my $nonUnicode = uc(chr(0x110000));
87my $big_nonUnicode = uc(chr(0x8000_0000));
88EXPECT
89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
93########
94use warnings 'utf8';
9ae3ac1a
KW
95my $d7ff = lc pack("U", 0xD7FF);
96my $d800 = lc pack("U", 0xD800);
97my $dfff = lc pack("U", 0xDFFF);
98my $e000 = lc pack("U", 0xE000);
99my $feff = lc pack("U", 0xFEFF);
100my $fffd = lc pack("U", 0xFFFD);
101my $fffe = lc pack("U", 0xFFFE);
102my $ffff = lc pack("U", 0xFFFF);
103my $hex4 = lc pack("U", 0x10000);
104my $hex5 = lc pack("U", 0x100000);
105my $maxm1 = lc pack("U", 0x10FFFE);
106my $max = lc pack("U", 0x10FFFF);
107my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 108no warnings 'utf8';
9ae3ac1a
KW
109my $d7ff = lc pack("U", 0xD7FF);
110my $d800 = lc pack("U", 0xD800);
111my $dfff = lc pack("U", 0xDFFF);
112my $e000 = lc pack("U", 0xE000);
113my $feff = lc pack("U", 0xFEFF);
114my $fffd = lc pack("U", 0xFFFD);
115my $fffe = lc pack("U", 0xFFFE);
116my $ffff = lc pack("U", 0xFFFF);
117my $hex4 = lc pack("U", 0x10000);
118my $hex5 = lc pack("U", 0x100000);
119my $maxm1 = lc pack("U", 0x10FFFE);
120my $max = lc pack("U", 0x10FFFF);
121my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 122EXPECT
9ae3ac1a
KW
123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
62961d2e
JH
126########
127use warnings 'utf8';
9ae3ac1a
KW
128my $d7ff = ucfirst "\x{D7FF}";
129my $d800 = ucfirst "\x{D800}";
130my $dfff = ucfirst "\x{DFFF}";
131my $e000 = ucfirst "\x{E000}";
132my $feff = ucfirst "\x{FEFF}";
133my $fffd = ucfirst "\x{FFFD}";
134my $fffe = ucfirst "\x{FFFE}";
135my $ffff = ucfirst "\x{FFFF}";
136my $hex4 = ucfirst "\x{10000}";
137my $hex5 = ucfirst "\x{100000}";
138my $maxm1 = ucfirst "\x{10FFFE}";
139my $max = ucfirst "\x{10FFFF}";
140my $nonUnicode = ucfirst "\x{110000}";
62961d2e 141no warnings 'utf8';
9ae3ac1a
KW
142my $d7ff = ucfirst "\x{D7FF}";
143my $d800 = ucfirst "\x{D800}";
144my $dfff = ucfirst "\x{DFFF}";
145my $e000 = ucfirst "\x{E000}";
146my $feff = ucfirst "\x{FEFF}";
147my $fffd = ucfirst "\x{FFFD}";
148my $fffe = ucfirst "\x{FFFE}";
149my $ffff = ucfirst "\x{FFFF}";
150my $hex4 = ucfirst "\x{10000}";
151my $hex5 = ucfirst "\x{100000}";
152my $maxm1 = ucfirst "\x{10FFFE}";
153my $max = ucfirst "\x{10FFFF}";
154my $nonUnicode = ucfirst "\x{110000}";
155EXPECT
156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
159########
160use warnings 'utf8';
161chr(0xD7FF) =~ /\p{Any}/;
162chr(0xD800) =~ /\p{Any}/;
163chr(0xDFFF) =~ /\p{Any}/;
164chr(0xE000) =~ /\p{Any}/;
165chr(0xFEFF) =~ /\p{Any}/;
166chr(0xFFFD) =~ /\p{Any}/;
167chr(0xFFFE) =~ /\p{Any}/;
168chr(0xFFFF) =~ /\p{Any}/;
169chr(0x10000) =~ /\p{Any}/;
170chr(0x100000) =~ /\p{Any}/;
171chr(0x10FFFE) =~ /\p{Any}/;
172chr(0x10FFFF) =~ /\p{Any}/;
173chr(0x110000) =~ /\p{Any}/;
174no warnings 'utf8';
175chr(0xD7FF) =~ /\p{Any}/;
176chr(0xD800) =~ /\p{Any}/;
177chr(0xDFFF) =~ /\p{Any}/;
178chr(0xE000) =~ /\p{Any}/;
179chr(0xFEFF) =~ /\p{Any}/;
180chr(0xFFFD) =~ /\p{Any}/;
181chr(0xFFFE) =~ /\p{Any}/;
182chr(0xFFFF) =~ /\p{Any}/;
183chr(0x10000) =~ /\p{Any}/;
184chr(0x100000) =~ /\p{Any}/;
185chr(0x10FFFE) =~ /\p{Any}/;
186chr(0x10FFFF) =~ /\p{Any}/;
187chr(0x110000) =~ /\p{Any}/;
188EXPECT
c634fdd3 189Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14.
9ae3ac1a 190########
8457b38f
KW
191use warnings 'utf8';
192chr(0x110000) =~ /\p{Any}/;
193no warnings 'non_unicode';
194chr(0x110000) =~ /\p{Any}/;
195EXPECT
c634fdd3 196Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
8457b38f 197########
9ae3ac1a
KW
198require "../test.pl";
199use warnings 'utf8';
200my $file = tempfile();
201open(my $fh, "+>:utf8", $file);
202print $fh "\x{D7FF}", "\n";
203print $fh "\x{D800}", "\n";
204print $fh "\x{DFFF}", "\n";
205print $fh "\x{E000}", "\n";
206print $fh "\x{FDCF}", "\n";
207print $fh "\x{FDD0}", "\n";
208print $fh "\x{FDEF}", "\n";
209print $fh "\x{FDF0}", "\n";
210print $fh "\x{FEFF}", "\n";
211print $fh "\x{FFFD}", "\n";
212print $fh "\x{FFFE}", "\n";
213print $fh "\x{FFFF}", "\n";
214print $fh "\x{10000}", "\n";
215print $fh "\x{1FFFE}", "\n";
216print $fh "\x{1FFFF}", "\n";
217print $fh "\x{2FFFE}", "\n";
218print $fh "\x{2FFFF}", "\n";
219print $fh "\x{3FFFE}", "\n";
220print $fh "\x{3FFFF}", "\n";
221print $fh "\x{4FFFE}", "\n";
222print $fh "\x{4FFFF}", "\n";
223print $fh "\x{5FFFE}", "\n";
224print $fh "\x{5FFFF}", "\n";
225print $fh "\x{6FFFE}", "\n";
226print $fh "\x{6FFFF}", "\n";
227print $fh "\x{7FFFE}", "\n";
228print $fh "\x{7FFFF}", "\n";
229print $fh "\x{8FFFE}", "\n";
230print $fh "\x{8FFFF}", "\n";
231print $fh "\x{9FFFE}", "\n";
232print $fh "\x{9FFFF}", "\n";
233print $fh "\x{AFFFE}", "\n";
234print $fh "\x{AFFFF}", "\n";
235print $fh "\x{BFFFE}", "\n";
236print $fh "\x{BFFFF}", "\n";
237print $fh "\x{CFFFE}", "\n";
238print $fh "\x{CFFFF}", "\n";
239print $fh "\x{DFFFE}", "\n";
240print $fh "\x{DFFFF}", "\n";
241print $fh "\x{EFFFE}", "\n";
242print $fh "\x{EFFFF}", "\n";
243print $fh "\x{FFFFE}", "\n";
244print $fh "\x{FFFFF}", "\n";
245print $fh "\x{100000}", "\n";
246print $fh "\x{10FFFE}", "\n";
247print $fh "\x{10FFFF}", "\n";
248print $fh "\x{110000}", "\n";
249close $fh;
250EXPECT
251Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
252Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
253Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
254Unicode non-character U+FDEF is illegal for open interchange at - line 11.
255Unicode non-character U+FFFE is illegal for open interchange at - line 15.
256Unicode non-character U+FFFF is illegal for open interchange at - line 16.
257Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
258Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
259Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
260Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
261Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
262Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
263Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
264Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
265Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
266Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
267Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
268Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
269Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
270Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
271Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
272Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
273Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
274Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
275Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
276Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
277Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
278Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
279Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
280Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
281Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
282Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
283Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
284Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
285Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
286Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
287Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
288Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
289Code point 0x110000 is not Unicode, may not be portable at - line 51.
290########
291require "../test.pl";
8457b38f
KW
292use warnings 'utf8';
293my $file = tempfile();
294open(my $fh, "+>:utf8", $file);
295print $fh "\x{D800}", "\n";
296print $fh "\x{FFFF}", "\n";
297print $fh "\x{110000}", "\n";
298close $fh;
299EXPECT
300Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
301Unicode non-character U+FFFF is illegal for open interchange at - line 6.
302Code point 0x110000 is not Unicode, may not be portable at - line 7.
303########
304require "../test.pl";
305use warnings 'utf8';
306no warnings 'surrogate';
307my $file = tempfile();
308open(my $fh, "+>:utf8", $file);
309print $fh "\x{D800}", "\n";
310print $fh "\x{FFFF}", "\n";
311print $fh "\x{110000}", "\n";
312close $fh;
313EXPECT
314Unicode non-character U+FFFF is illegal for open interchange at - line 7.
315Code point 0x110000 is not Unicode, may not be portable at - line 8.
316########
317require "../test.pl";
318use warnings 'utf8';
319no warnings 'nonchar';
320my $file = tempfile();
321open(my $fh, "+>:utf8", $file);
322print $fh "\x{D800}", "\n";
323print $fh "\x{FFFF}", "\n";
324print $fh "\x{110000}", "\n";
325close $fh;
326EXPECT
327Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
328Code point 0x110000 is not Unicode, may not be portable at - line 8.
329########
330require "../test.pl";
331use warnings 'utf8';
332no warnings 'non_unicode';
333my $file = tempfile();
334open(my $fh, "+>:utf8", $file);
335print $fh "\x{D800}", "\n";
336print $fh "\x{FFFF}", "\n";
337print $fh "\x{110000}", "\n";
338close $fh;
339EXPECT
340Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
341Unicode non-character U+FFFF is illegal for open interchange at - line 7.
342########
343require "../test.pl";
9ae3ac1a
KW
344no warnings 'utf8';
345my $file = tempfile();
346open(my $fh, "+>:utf8", $file);
347print $fh "\x{D7FF}", "\n";
348print $fh "\x{D800}", "\n";
349print $fh "\x{DFFF}", "\n";
350print $fh "\x{E000}", "\n";
351print $fh "\x{FDCF}", "\n";
352print $fh "\x{FDD0}", "\n";
353print $fh "\x{FDEF}", "\n";
354print $fh "\x{FDF0}", "\n";
355print $fh "\x{FEFF}", "\n";
356print $fh "\x{FFFD}", "\n";
357print $fh "\x{FFFE}", "\n";
358print $fh "\x{FFFF}", "\n";
359print $fh "\x{10000}", "\n";
360print $fh "\x{1FFFE}", "\n";
361print $fh "\x{1FFFF}", "\n";
362print $fh "\x{2FFFE}", "\n";
363print $fh "\x{2FFFF}", "\n";
364print $fh "\x{3FFFE}", "\n";
365print $fh "\x{3FFFF}", "\n";
366print $fh "\x{4FFFE}", "\n";
367print $fh "\x{4FFFF}", "\n";
368print $fh "\x{5FFFE}", "\n";
369print $fh "\x{5FFFF}", "\n";
370print $fh "\x{6FFFE}", "\n";
371print $fh "\x{6FFFF}", "\n";
372print $fh "\x{7FFFE}", "\n";
373print $fh "\x{7FFFF}", "\n";
374print $fh "\x{8FFFE}", "\n";
375print $fh "\x{8FFFF}", "\n";
376print $fh "\x{9FFFE}", "\n";
377print $fh "\x{9FFFF}", "\n";
378print $fh "\x{AFFFE}", "\n";
379print $fh "\x{AFFFF}", "\n";
380print $fh "\x{BFFFE}", "\n";
381print $fh "\x{BFFFF}", "\n";
382print $fh "\x{CFFFE}", "\n";
383print $fh "\x{CFFFF}", "\n";
384print $fh "\x{DFFFE}", "\n";
385print $fh "\x{DFFFF}", "\n";
386print $fh "\x{EFFFE}", "\n";
387print $fh "\x{EFFFF}", "\n";
388print $fh "\x{FFFFE}", "\n";
389print $fh "\x{FFFFF}", "\n";
390print $fh "\x{100000}", "\n";
391print $fh "\x{10FFFE}", "\n";
392print $fh "\x{10FFFF}", "\n";
393print $fh "\x{110000}", "\n";
394close $fh;
62961d2e 395EXPECT