This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlapi: Account for EBCDIC extend UTF-8 range
[perl5.git] / t / lib / warnings / utf8
CommitLineData
f0df466a
JH
1
2 utf8.c AOK
3
4b88fb76 4 [utf8_to_uvchr_buf]
f0df466a
JH
5 Malformed UTF-8 character
6 my $a = ord "\x80" ;
7
8 Malformed UTF-8 character
9 my $a = ord "\xf080" ;
10 <<<<<< this warning can't be easily triggered from perl anymore
11
12 [utf16_to_utf8]
13 Malformed UTF-16 surrogate
93f09d7b 14 <<<<<< Add a test when something actually calls utf16_to_utf8
f0df466a
JH
15
16__END__
4b88fb76 17# utf8.c [utf8_to_uvchr_buf] -W
f0df466a
JH
18BEGIN {
19 if (ord('A') == 193) {
72b4e0d1 20 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
f0df466a
JH
21 exit 0;
22 }
23}
24use utf8 ;
25my $a = "snøstorm" ;
26{
27 no warnings 'utf8' ;
28 my $a = "snøstorm";
29 use warnings 'utf8' ;
30 my $a = "snøstorm";
31}
32EXPECT
41432148
JH
33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
f0df466a 35########
507b9800 36use warnings 'utf8';
9ae3ac1a
KW
37my $d7ff = uc(chr(0xD7FF));
38my $d800 = uc(chr(0xD800));
39my $dfff = uc(chr(0xDFFF));
40my $e000 = uc(chr(0xE000));
41my $feff = uc(chr(0xFEFF));
42my $fffd = uc(chr(0xFFFD));
43my $fffe = uc(chr(0xFFFE));
44my $ffff = uc(chr(0xFFFF));
45my $hex4 = uc(chr(0x10000));
46my $hex5 = uc(chr(0x100000));
47my $maxm1 = uc(chr(0x10FFFE));
48my $max = uc(chr(0x10FFFF));
49my $nonUnicode = uc(chr(0x110000));
507b9800 50no warnings 'utf8';
9ae3ac1a
KW
51my $d7ff = uc(chr(0xD7FF));
52my $d800 = uc(chr(0xD800));
53my $dfff = uc(chr(0xDFFF));
54my $e000 = uc(chr(0xE000));
55my $feff = uc(chr(0xFEFF));
56my $fffd = uc(chr(0xFFFD));
57my $fffe = uc(chr(0xFFFE));
58my $ffff = uc(chr(0xFFFF));
59my $hex4 = uc(chr(0x10000));
60my $hex5 = uc(chr(0x100000));
61my $maxm1 = uc(chr(0x10FFFE));
62my $max = uc(chr(0x10FFFF));
63my $nonUnicode = uc(chr(0x110000));
507b9800 64EXPECT
9ae3ac1a
KW
65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
507b9800 68########
62961d2e 69use warnings 'utf8';
8457b38f
KW
70my $d800 = uc(chr(0xD800));
71my $nonUnicode = uc(chr(0x110000));
72no warnings 'surrogate';
73my $d800 = uc(chr(0xD800));
74my $nonUnicode = uc(chr(0x110000));
75EXPECT
76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
79########
80use warnings 'utf8';
81my $d800 = uc(chr(0xD800));
82my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
83no warnings 'non_unicode';
84my $d800 = uc(chr(0xD800));
85my $nonUnicode = uc(chr(0x110000));
8457b38f
KW
86EXPECT
87Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
88Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
9415f659
KW
89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5.
90########
9415f659
KW
91use warnings 'utf8';
92my $big_nonUnicode = uc(chr(0x8000_0000));
93no warnings 'non_unicode';
94my $big_nonUnicode = uc(chr(0x8000_0000));
95EXPECT
c0236afe 96Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 2.
8457b38f
KW
97########
98use warnings 'utf8';
9ae3ac1a
KW
99my $d7ff = lc pack("U", 0xD7FF);
100my $d800 = lc pack("U", 0xD800);
101my $dfff = lc pack("U", 0xDFFF);
102my $e000 = lc pack("U", 0xE000);
103my $feff = lc pack("U", 0xFEFF);
104my $fffd = lc pack("U", 0xFFFD);
105my $fffe = lc pack("U", 0xFFFE);
106my $ffff = lc pack("U", 0xFFFF);
107my $hex4 = lc pack("U", 0x10000);
108my $hex5 = lc pack("U", 0x100000);
109my $maxm1 = lc pack("U", 0x10FFFE);
110my $max = lc pack("U", 0x10FFFF);
111my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 112no warnings 'utf8';
9ae3ac1a
KW
113my $d7ff = lc pack("U", 0xD7FF);
114my $d800 = lc pack("U", 0xD800);
115my $dfff = lc pack("U", 0xDFFF);
116my $e000 = lc pack("U", 0xE000);
117my $feff = lc pack("U", 0xFEFF);
118my $fffd = lc pack("U", 0xFFFD);
119my $fffe = lc pack("U", 0xFFFE);
120my $ffff = lc pack("U", 0xFFFF);
121my $hex4 = lc pack("U", 0x10000);
122my $hex5 = lc pack("U", 0x100000);
123my $maxm1 = lc pack("U", 0x10FFFE);
124my $max = lc pack("U", 0x10FFFF);
125my $nonUnicode = lc(pack("U", 0x110000));
62961d2e 126EXPECT
9ae3ac1a
KW
127Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
128Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
129Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
62961d2e
JH
130########
131use warnings 'utf8';
9ae3ac1a
KW
132my $d7ff = ucfirst "\x{D7FF}";
133my $d800 = ucfirst "\x{D800}";
134my $dfff = ucfirst "\x{DFFF}";
135my $e000 = ucfirst "\x{E000}";
136my $feff = ucfirst "\x{FEFF}";
137my $fffd = ucfirst "\x{FFFD}";
138my $fffe = ucfirst "\x{FFFE}";
139my $ffff = ucfirst "\x{FFFF}";
140my $hex4 = ucfirst "\x{10000}";
141my $hex5 = ucfirst "\x{100000}";
142my $maxm1 = ucfirst "\x{10FFFE}";
143my $max = ucfirst "\x{10FFFF}";
144my $nonUnicode = ucfirst "\x{110000}";
62961d2e 145no warnings 'utf8';
9ae3ac1a
KW
146my $d7ff = ucfirst "\x{D7FF}";
147my $d800 = ucfirst "\x{D800}";
148my $dfff = ucfirst "\x{DFFF}";
149my $e000 = ucfirst "\x{E000}";
150my $feff = ucfirst "\x{FEFF}";
151my $fffd = ucfirst "\x{FFFD}";
152my $fffe = ucfirst "\x{FFFE}";
153my $ffff = ucfirst "\x{FFFF}";
154my $hex4 = ucfirst "\x{10000}";
155my $hex5 = ucfirst "\x{100000}";
156my $maxm1 = ucfirst "\x{10FFFE}";
157my $max = ucfirst "\x{10FFFF}";
158my $nonUnicode = ucfirst "\x{110000}";
159EXPECT
160Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
161Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
162Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
163########
2d88a86a 164# NAME Matching \p{} against above-Unicode
9ae3ac1a
KW
165use warnings 'utf8';
166chr(0xD7FF) =~ /\p{Any}/;
167chr(0xD800) =~ /\p{Any}/;
168chr(0xDFFF) =~ /\p{Any}/;
169chr(0xE000) =~ /\p{Any}/;
170chr(0xFEFF) =~ /\p{Any}/;
171chr(0xFFFD) =~ /\p{Any}/;
172chr(0xFFFE) =~ /\p{Any}/;
173chr(0xFFFF) =~ /\p{Any}/;
174chr(0x10000) =~ /\p{Any}/;
175chr(0x100000) =~ /\p{Any}/;
176chr(0x10FFFE) =~ /\p{Any}/;
177chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
178chr(0x110000) =~ /[\p{Any}]/;
179chr(0x110001) =~ /[\w\p{Any}]/;
180chr(0x10FFFF) =~ /\p{All}/;
181chr(0x110002) =~ /[\w\p{All}]/;
182chr(0x110003) =~ /[\p{XPosixWord}]/;
183chr(0x110004) =~ /[\P{XPosixWord}]/;
184chr(0x110005) =~ /^[\p{Unassigned}]/;
185chr(0x110006) =~ /^[\P{Unassigned}]/;
186# Only Unicode properties give non-Unicode warnings, and only those properties
187# which do match above Unicode; and not when something else in the class
188# matches above Unicode. Below we test three ways where something outside the
189# property may match non-Unicode: a code point above it, a class \S that we
190# know at compile time doesn't, and a class \W whose values aren't (at the time
191# of this writing) specified at compile time, but which wouldn't match
5073ffbd
KW
192chr(0x110050) =~ /\w/;
193chr(0x110051) =~ /\W/;
194chr(0x110052) =~ /\d/;
195chr(0x110053) =~ /\D/;
196chr(0x110054) =~ /\s/;
197chr(0x110055) =~ /\S/;
198chr(0x110056) =~ /[[:word:]]/;
199chr(0x110057) =~ /[[:^word:]]/;
200chr(0x110058) =~ /[[:alnum:]]/;
201chr(0x110059) =~ /[[:^alnum:]]/;
202chr(0x11005A) =~ /[[:space:]]/;
203chr(0x11005B) =~ /[[:^space:]]/;
204chr(0x11005C) =~ /[[:digit:]]/;
205chr(0x11005D) =~ /[[:^digit:]]/;
206chr(0x11005E) =~ /[[:alpha:]]/;
207chr(0x11005F) =~ /[[:^alpha:]]/;
208chr(0x110060) =~ /[[:ascii:]]/;
209chr(0x110061) =~ /[[:^ascii:]]/;
210chr(0x110062) =~ /[[:cntrl:]]/;
211chr(0x110063) =~ /[[:^cntrl:]]/;
212chr(0x110064) =~ /[[:graph:]]/;
213chr(0x110065) =~ /[[:^graph:]]/;
214chr(0x110066) =~ /[[:lower:]]/;
215chr(0x110067) =~ /[[:^lower:]]/;
216chr(0x110068) =~ /[[:print:]]/;
217chr(0x110069) =~ /[[:^print:]]/;
218chr(0x11006A) =~ /[[:punct:]]/;
219chr(0x11006B) =~ /[[:^punct:]]/;
220chr(0x11006C) =~ /[[:upper:]]/;
221chr(0x11006D) =~ /[[:^upper:]]/;
222chr(0x11006E) =~ /[[:xdigit:]]/;
223chr(0x11006F) =~ /[[:^xdigit:]]/;
224chr(0x110070) =~ /[[:blank:]]/;
225chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
226chr(0x111010) =~ /[\W\p{Unassigned}]/;
227chr(0x111011) =~ /[\W\P{Unassigned}]/;
228chr(0x112010) =~ /[\S\p{Unassigned}]/;
229chr(0x112011) =~ /[\S\P{Unassigned}]/;
230chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
231chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a
KW
232no warnings 'utf8';
233chr(0xD7FF) =~ /\p{Any}/;
234chr(0xD800) =~ /\p{Any}/;
235chr(0xDFFF) =~ /\p{Any}/;
236chr(0xE000) =~ /\p{Any}/;
237chr(0xFEFF) =~ /\p{Any}/;
238chr(0xFFFD) =~ /\p{Any}/;
239chr(0xFFFE) =~ /\p{Any}/;
240chr(0xFFFF) =~ /\p{Any}/;
241chr(0x10000) =~ /\p{Any}/;
242chr(0x100000) =~ /\p{Any}/;
243chr(0x10FFFE) =~ /\p{Any}/;
244chr(0x10FFFF) =~ /\p{Any}/;
2d88a86a
KW
245chr(0x110000) =~ /[\p{Any}]/;
246chr(0x110001) =~ /[\w\p{Any}]/;
247chr(0x10FFFF) =~ /\p{All}/;
248chr(0x110002) =~ /[\w\p{All}]/;
249chr(0x110003) =~ /[\p{XPosixWord}]/;
250chr(0x110004) =~ /[\P{XPosixWord}]/;
251chr(0x110005) =~ /^[\p{Unassigned}]/;
252chr(0x110006) =~ /^[\P{Unassigned}]/;
5073ffbd
KW
253chr(0x110050) =~ /\w/;
254chr(0x110051) =~ /\W/;
255chr(0x110052) =~ /\d/;
256chr(0x110053) =~ /\D/;
257chr(0x110054) =~ /\s/;
258chr(0x110055) =~ /\S/;
259chr(0x110056) =~ /[[:word:]]/;
260chr(0x110057) =~ /[[:^word:]]/;
261chr(0x110058) =~ /[[:alnum:]]/;
262chr(0x110059) =~ /[[:^alnum:]]/;
263chr(0x11005A) =~ /[[:space:]]/;
264chr(0x11005B) =~ /[[:^space:]]/;
265chr(0x11005C) =~ /[[:digit:]]/;
266chr(0x11005D) =~ /[[:^digit:]]/;
267chr(0x11005E) =~ /[[:alpha:]]/;
268chr(0x11005F) =~ /[[:^alpha:]]/;
269chr(0x110060) =~ /[[:ascii:]]/;
270chr(0x110061) =~ /[[:^ascii:]]/;
271chr(0x110062) =~ /[[:cntrl:]]/;
272chr(0x110063) =~ /[[:^cntrl:]]/;
273chr(0x110064) =~ /[[:graph:]]/;
274chr(0x110065) =~ /[[:^graph:]]/;
275chr(0x110066) =~ /[[:lower:]]/;
276chr(0x110067) =~ /[[:^lower:]]/;
277chr(0x110068) =~ /[[:print:]]/;
278chr(0x110069) =~ /[[:^print:]]/;
279chr(0x11006A) =~ /[[:punct:]]/;
280chr(0x11006B) =~ /[[:^punct:]]/;
281chr(0x11006C) =~ /[[:upper:]]/;
282chr(0x11006D) =~ /[[:^upper:]]/;
283chr(0x11006E) =~ /[[:xdigit:]]/;
284chr(0x11006F) =~ /[[:^xdigit:]]/;
285chr(0x110070) =~ /[[:blank:]]/;
286chr(0x110071) =~ /[[:^blank:]]/;
2d88a86a
KW
287chr(0x111010) =~ /[\W\p{Unassigned}]/;
288chr(0x111011) =~ /[\W\P{Unassigned}]/;
289chr(0x112010) =~ /[\S\p{Unassigned}]/;
290chr(0x112011) =~ /[\S\P{Unassigned}]/;
291chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
292chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
9ae3ac1a 293EXPECT
2d88a86a
KW
294Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
295Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
9ae3ac1a 296########
e9b08962 297# NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
2d88a86a
KW
298# Now have to make FATAL to guarantee being output
299use warnings FATAL => 'non_unicode';
ae986089
KW
300"\x{110000}" =~ /b?\p{Space}/;
301EXPECT
2d88a86a 302Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
ae986089
KW
303########
304# NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
305use warnings 'non_unicode';
2d88a86a 306use warnings FATAL => 'non_unicode';
ae986089
KW
307"\x{110000}" =~ /b?[[:space:]]/;
308EXPECT
309########
8457b38f
KW
310use warnings 'utf8';
311chr(0x110000) =~ /\p{Any}/;
2d88a86a
KW
312########
313# NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
314use warnings qw(utf8 non_unicode);
315chr(0x110000) =~ /^\p{Unassigned}/;
8457b38f 316no warnings 'non_unicode';
2d88a86a
KW
317chr(0x110001) =~ /\p{Unassigned}/;
318use warnings 'non_unicode';
319no warnings 'utf8';
320chr(0x110002) =~ /\p{Unassigned}/;
8457b38f 321EXPECT
2d88a86a 322Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
8457b38f 323########
f2c2a6ab 324# NAME optimizable regnode should still give non_unicode warnings when fatalized
5073ffbd 325use warnings 'utf8';
f2c2a6ab 326use warnings FATAL => 'non_unicode';
845e7aa3 327chr(0x110000) =~ /\p{lb=cr}/;
f2c2a6ab 328EXPECT
2d88a86a 329Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
f2c2a6ab
KW
330########
331# NAME optimizable regnode should not give non_unicode warnings when warnings are off
5073ffbd 332no warnings 'non_unicode';
845e7aa3 333chr(0x110000) =~ /\p{lb=cr}/;
5073ffbd 334EXPECT
5073ffbd 335########
2d88a86a
KW
336# NAME 'All' matches above-Unicode without any warning
337use warnings qw(utf8 non_unicode);
338chr(0x110000) =~ /\p{All}/;
339EXPECT
340########
9ae3ac1a
KW
341require "../test.pl";
342use warnings 'utf8';
a410ec23 343sub Is_Super { return '!utf8::Any' }
88d45d28
KW
344# The extra char is to avoid an optimization that avoids the problem when the
345# property is the only non-latin1 char in a class
346print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
a410ec23
KW
347EXPECT
3481
349########
350require "../test.pl";
351use warnings 'utf8';
9ae3ac1a
KW
352my $file = tempfile();
353open(my $fh, "+>:utf8", $file);
354print $fh "\x{D7FF}", "\n";
355print $fh "\x{D800}", "\n";
356print $fh "\x{DFFF}", "\n";
357print $fh "\x{E000}", "\n";
358print $fh "\x{FDCF}", "\n";
359print $fh "\x{FDD0}", "\n";
360print $fh "\x{FDEF}", "\n";
361print $fh "\x{FDF0}", "\n";
362print $fh "\x{FEFF}", "\n";
363print $fh "\x{FFFD}", "\n";
364print $fh "\x{FFFE}", "\n";
365print $fh "\x{FFFF}", "\n";
366print $fh "\x{10000}", "\n";
367print $fh "\x{1FFFE}", "\n";
368print $fh "\x{1FFFF}", "\n";
369print $fh "\x{2FFFE}", "\n";
370print $fh "\x{2FFFF}", "\n";
371print $fh "\x{3FFFE}", "\n";
372print $fh "\x{3FFFF}", "\n";
373print $fh "\x{4FFFE}", "\n";
374print $fh "\x{4FFFF}", "\n";
375print $fh "\x{5FFFE}", "\n";
376print $fh "\x{5FFFF}", "\n";
377print $fh "\x{6FFFE}", "\n";
378print $fh "\x{6FFFF}", "\n";
379print $fh "\x{7FFFE}", "\n";
380print $fh "\x{7FFFF}", "\n";
381print $fh "\x{8FFFE}", "\n";
382print $fh "\x{8FFFF}", "\n";
383print $fh "\x{9FFFE}", "\n";
384print $fh "\x{9FFFF}", "\n";
385print $fh "\x{AFFFE}", "\n";
386print $fh "\x{AFFFF}", "\n";
387print $fh "\x{BFFFE}", "\n";
388print $fh "\x{BFFFF}", "\n";
389print $fh "\x{CFFFE}", "\n";
390print $fh "\x{CFFFF}", "\n";
391print $fh "\x{DFFFE}", "\n";
392print $fh "\x{DFFFF}", "\n";
393print $fh "\x{EFFFE}", "\n";
394print $fh "\x{EFFFF}", "\n";
395print $fh "\x{FFFFE}", "\n";
396print $fh "\x{FFFFF}", "\n";
397print $fh "\x{100000}", "\n";
398print $fh "\x{10FFFE}", "\n";
399print $fh "\x{10FFFF}", "\n";
400print $fh "\x{110000}", "\n";
401close $fh;
402EXPECT
403Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
404Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
15ca5930
KW
405Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 10.
406Unicode non-character U+FDEF is not recommended for open interchange in print at - line 11.
407Unicode non-character U+FFFE is not recommended for open interchange in print at - line 15.
408Unicode non-character U+FFFF is not recommended for open interchange in print at - line 16.
409Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 18.
410Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 19.
411Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 20.
412Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 21.
413Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 22.
414Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 23.
415Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 24.
416Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 25.
417Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 26.
418Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 27.
419Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 28.
420Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 29.
421Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 30.
422Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 31.
423Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 32.
424Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 33.
425Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 34.
426Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 35.
427Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 36.
428Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 37.
429Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 38.
430Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 39.
431Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 40.
432Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 41.
433Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 42.
434Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 43.
435Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 44.
436Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 45.
437Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 46.
438Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 47.
439Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 49.
440Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 50.
441Code point 0x110000 is not Unicode, may not be portable in print at - line 51.
9ae3ac1a
KW
442########
443require "../test.pl";
8457b38f
KW
444use warnings 'utf8';
445my $file = tempfile();
446open(my $fh, "+>:utf8", $file);
447print $fh "\x{D800}", "\n";
448print $fh "\x{FFFF}", "\n";
449print $fh "\x{110000}", "\n";
450close $fh;
451EXPECT
452Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
15ca5930
KW
453Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6.
454Code point 0x110000 is not Unicode, may not be portable in print at - line 7.
8457b38f
KW
455########
456require "../test.pl";
457use warnings 'utf8';
458no warnings 'surrogate';
459my $file = tempfile();
460open(my $fh, "+>:utf8", $file);
461print $fh "\x{D800}", "\n";
462print $fh "\x{FFFF}", "\n";
463print $fh "\x{110000}", "\n";
464close $fh;
465EXPECT
15ca5930
KW
466Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
467Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
468########
469require "../test.pl";
470use warnings 'utf8';
471no warnings 'nonchar';
472my $file = tempfile();
473open(my $fh, "+>:utf8", $file);
474print $fh "\x{D800}", "\n";
475print $fh "\x{FFFF}", "\n";
476print $fh "\x{110000}", "\n";
477close $fh;
478EXPECT
479Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 480Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
8457b38f
KW
481########
482require "../test.pl";
483use warnings 'utf8';
484no warnings 'non_unicode';
485my $file = tempfile();
486open(my $fh, "+>:utf8", $file);
487print $fh "\x{D800}", "\n";
488print $fh "\x{FFFF}", "\n";
489print $fh "\x{110000}", "\n";
490close $fh;
491EXPECT
492Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
15ca5930 493Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
8457b38f 494########
920e47bb
AC
495# NAME C<use warnings "nonchar"> works in isolation
496require "../test.pl";
497use warnings 'nonchar';
498my $file = tempfile();
499open(my $fh, "+>:utf8", $file);
500print $fh "\x{FFFF}", "\n";
501close $fh;
502EXPECT
15ca5930 503Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5.
920e47bb 504########
920e47bb
AC
505# NAME C<use warnings "surrogate"> works in isolation
506require "../test.pl";
507use warnings 'surrogate';
508my $file = tempfile();
509open(my $fh, "+>:utf8", $file);
510print $fh "\x{D800}", "\n";
511close $fh;
512EXPECT
513Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
514########
920e47bb
AC
515# NAME C<use warnings "non_unicode"> works in isolation
516require "../test.pl";
517use warnings 'non_unicode';
518my $file = tempfile();
519open(my $fh, "+>:utf8", $file);
520print $fh "\x{110000}", "\n";
521close $fh;
522EXPECT
15ca5930 523Code point 0x110000 is not Unicode, may not be portable in print at - line 5.
920e47bb 524########
8457b38f 525require "../test.pl";
9ae3ac1a
KW
526no warnings 'utf8';
527my $file = tempfile();
528open(my $fh, "+>:utf8", $file);
529print $fh "\x{D7FF}", "\n";
530print $fh "\x{D800}", "\n";
531print $fh "\x{DFFF}", "\n";
532print $fh "\x{E000}", "\n";
533print $fh "\x{FDCF}", "\n";
534print $fh "\x{FDD0}", "\n";
535print $fh "\x{FDEF}", "\n";
536print $fh "\x{FDF0}", "\n";
537print $fh "\x{FEFF}", "\n";
538print $fh "\x{FFFD}", "\n";
539print $fh "\x{FFFE}", "\n";
540print $fh "\x{FFFF}", "\n";
541print $fh "\x{10000}", "\n";
542print $fh "\x{1FFFE}", "\n";
543print $fh "\x{1FFFF}", "\n";
544print $fh "\x{2FFFE}", "\n";
545print $fh "\x{2FFFF}", "\n";
546print $fh "\x{3FFFE}", "\n";
547print $fh "\x{3FFFF}", "\n";
548print $fh "\x{4FFFE}", "\n";
549print $fh "\x{4FFFF}", "\n";
550print $fh "\x{5FFFE}", "\n";
551print $fh "\x{5FFFF}", "\n";
552print $fh "\x{6FFFE}", "\n";
553print $fh "\x{6FFFF}", "\n";
554print $fh "\x{7FFFE}", "\n";
555print $fh "\x{7FFFF}", "\n";
556print $fh "\x{8FFFE}", "\n";
557print $fh "\x{8FFFF}", "\n";
558print $fh "\x{9FFFE}", "\n";
559print $fh "\x{9FFFF}", "\n";
560print $fh "\x{AFFFE}", "\n";
561print $fh "\x{AFFFF}", "\n";
562print $fh "\x{BFFFE}", "\n";
563print $fh "\x{BFFFF}", "\n";
564print $fh "\x{CFFFE}", "\n";
565print $fh "\x{CFFFF}", "\n";
566print $fh "\x{DFFFE}", "\n";
567print $fh "\x{DFFFF}", "\n";
568print $fh "\x{EFFFE}", "\n";
569print $fh "\x{EFFFF}", "\n";
570print $fh "\x{FFFFE}", "\n";
571print $fh "\x{FFFFF}", "\n";
572print $fh "\x{100000}", "\n";
573print $fh "\x{10FFFE}", "\n";
574print $fh "\x{10FFFF}", "\n";
575print $fh "\x{110000}", "\n";
576close $fh;
62961d2e 577EXPECT
ab0b796c
KW
578########
579# NAME Case change crosses 255/256 under non-UTF8 locale
ef9d5242
KW
580require '../loc_tools.pl';
581unless (locales_enabled('LC_CTYPE')) {
582 print("SKIPPED\n# locales not available\n"),exit;
583}
ab0b796c
KW
584eval { require POSIX; POSIX->import("locale_h") };
585if ($@) {
586 print("SKIPPED\n# no POSIX\n"),exit;
587}
588use warnings 'locale';
589use feature 'fc';
590use locale;
591setlocale(&POSIX::LC_CTYPE, "C");
592my $a;
593$a = lc("\x{178}");
594$a = fc("\x{1E9E}");
595$a = fc("\x{FB05}");
596$a = uc("\x{FB00}");
597$a = ucfirst("\x{149}");
8bdce394
KW
598$a = lcfirst("\x{178}");
599no warnings 'locale';
600$a = lc("\x{178}");
601$a = fc("\x{1E9E}");
602$a = fc("\x{FB05}");
603$a = uc("\x{FB00}");
604$a = ucfirst("\x{149}");
605$a = lcfirst("\x{178}");
ab0b796c 606EXPECT
ef9d5242
KW
607Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14.
608Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15.
609Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16.
610Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17.
611Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18.
612Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19.
613abc6d
KW
613########
614# NAME Wide character in non-UTF-8 locale
ef9d5242
KW
615require '../loc_tools.pl';
616unless (locales_enabled('LC_CTYPE')) {
617 print("SKIPPED\n# locales not available\n"),exit;
618}
613abc6d
KW
619eval { require POSIX; POSIX->import("locale_h") };
620if ($@) {
621 print("SKIPPED\n# no POSIX\n"),exit;
622}
623use warnings 'locale';
624use feature 'fc';
625use locale;
626setlocale(&POSIX::LC_CTYPE, "C");
627my $a;
628$a = lc("\x{100}");
629$a = lcfirst("\x{101}");
630$a = fc("\x{102}");
631$a = uc("\x{103}");
632$a = ucfirst("\x{104}");
633no warnings 'locale';
634$a = lc("\x{100}");
635$a = lcfirst("\x{101}");
636$a = fc("\x{102}");
637$a = uc("\x{103}");
638$a = ucfirst("\x{104}");
639EXPECT
ef9d5242
KW
640Wide character (U+100) in lc at - line 14.
641Wide character (U+101) in lcfirst at - line 15.
642Wide character (U+102) in fc at - line 16.
643Wide character (U+103) in uc at - line 17.
644Wide character (U+104) in ucfirst at - line 18.
008e8e82
KW
645########
646# NAME Wide character in UTF-8 locale
647require '../loc_tools.pl';
648unless (locales_enabled('LC_CTYPE')) {
649 print("SKIPPED\n# locales not available\n"),exit;
650}
651eval { require POSIX; POSIX->import("locale_h") };
652if ($@) {
653 print("SKIPPED\n# no POSIX\n"),exit;
654}
655my @utf8_locales = find_utf8_ctype_locale();
656unless (@utf8_locales) {
657 print("SKIPPED\n# no UTF-8 locales\n"),exit;
658}
659use warnings 'locale';
660use feature 'fc';
661use locale;
662setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]);
663my $a;
664$a = lc("\x{100}");
665$a = lcfirst("\x{101}");
666$a = fc("\x{102}");
667$a = uc("\x{103}");
668$a = ucfirst("\x{104}");
669EXPECT