5 Malformed UTF-8 character
8 Malformed UTF-8 character
10 <<<<<< this warning can't be easily triggered from perl anymore
13 Malformed UTF-16 surrogate
14 <<<<<< Add a test when something actually calls utf16_to_utf8
17 # utf8.c [utf8_to_uvchr_buf] -W
19 if (ord('A') == 193) {
20 print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
33 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
34 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
37 my $d7ff = uc(chr(0xD7FF));
38 my $d800 = uc(chr(0xD800));
39 my $dfff = uc(chr(0xDFFF));
40 my $e000 = uc(chr(0xE000));
41 my $feff = uc(chr(0xFEFF));
42 my $fffd = uc(chr(0xFFFD));
43 my $fffe = uc(chr(0xFFFE));
44 my $ffff = uc(chr(0xFFFF));
45 my $hex4 = uc(chr(0x10000));
46 my $hex5 = uc(chr(0x100000));
47 my $maxm1 = uc(chr(0x10FFFE));
48 my $max = uc(chr(0x10FFFF));
49 my $nonUnicode = uc(chr(0x110000));
51 my $d7ff = uc(chr(0xD7FF));
52 my $d800 = uc(chr(0xD800));
53 my $dfff = uc(chr(0xDFFF));
54 my $e000 = uc(chr(0xE000));
55 my $feff = uc(chr(0xFEFF));
56 my $fffd = uc(chr(0xFFFD));
57 my $fffe = uc(chr(0xFFFE));
58 my $ffff = uc(chr(0xFFFF));
59 my $hex4 = uc(chr(0x10000));
60 my $hex5 = uc(chr(0x100000));
61 my $maxm1 = uc(chr(0x10FFFE));
62 my $max = uc(chr(0x10FFFF));
63 my $nonUnicode = uc(chr(0x110000));
65 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66 Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
70 my $d800 = uc(chr(0xD800));
71 my $nonUnicode = uc(chr(0x110000));
72 no warnings 'surrogate';
73 my $d800 = uc(chr(0xD800));
74 my $nonUnicode = uc(chr(0x110000));
76 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
81 my $d800 = uc(chr(0xD800));
82 my $nonUnicode = uc(chr(0x110000));
83 no warnings 'non_unicode';
84 my $d800 = uc(chr(0xD800));
85 my $nonUnicode = uc(chr(0x110000));
87 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
88 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
89 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5.
92 no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines
93 my $big_nonUnicode = uc(chr(0x8000_0000));
94 no warnings 'non_unicode';
95 my $big_nonUnicode = uc(chr(0x8000_0000));
97 Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3.
100 my $d7ff = lc pack("U", 0xD7FF);
101 my $d800 = lc pack("U", 0xD800);
102 my $dfff = lc pack("U", 0xDFFF);
103 my $e000 = lc pack("U", 0xE000);
104 my $feff = lc pack("U", 0xFEFF);
105 my $fffd = lc pack("U", 0xFFFD);
106 my $fffe = lc pack("U", 0xFFFE);
107 my $ffff = lc pack("U", 0xFFFF);
108 my $hex4 = lc pack("U", 0x10000);
109 my $hex5 = lc pack("U", 0x100000);
110 my $maxm1 = lc pack("U", 0x10FFFE);
111 my $max = lc pack("U", 0x10FFFF);
112 my $nonUnicode = lc(pack("U", 0x110000));
114 my $d7ff = lc pack("U", 0xD7FF);
115 my $d800 = lc pack("U", 0xD800);
116 my $dfff = lc pack("U", 0xDFFF);
117 my $e000 = lc pack("U", 0xE000);
118 my $feff = lc pack("U", 0xFEFF);
119 my $fffd = lc pack("U", 0xFFFD);
120 my $fffe = lc pack("U", 0xFFFE);
121 my $ffff = lc pack("U", 0xFFFF);
122 my $hex4 = lc pack("U", 0x10000);
123 my $hex5 = lc pack("U", 0x100000);
124 my $maxm1 = lc pack("U", 0x10FFFE);
125 my $max = lc pack("U", 0x10FFFF);
126 my $nonUnicode = lc(pack("U", 0x110000));
128 Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
129 Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
130 Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
133 my $d7ff = ucfirst "\x{D7FF}";
134 my $d800 = ucfirst "\x{D800}";
135 my $dfff = ucfirst "\x{DFFF}";
136 my $e000 = ucfirst "\x{E000}";
137 my $feff = ucfirst "\x{FEFF}";
138 my $fffd = ucfirst "\x{FFFD}";
139 my $fffe = ucfirst "\x{FFFE}";
140 my $ffff = ucfirst "\x{FFFF}";
141 my $hex4 = ucfirst "\x{10000}";
142 my $hex5 = ucfirst "\x{100000}";
143 my $maxm1 = ucfirst "\x{10FFFE}";
144 my $max = ucfirst "\x{10FFFF}";
145 my $nonUnicode = ucfirst "\x{110000}";
147 my $d7ff = ucfirst "\x{D7FF}";
148 my $d800 = ucfirst "\x{D800}";
149 my $dfff = ucfirst "\x{DFFF}";
150 my $e000 = ucfirst "\x{E000}";
151 my $feff = ucfirst "\x{FEFF}";
152 my $fffd = ucfirst "\x{FFFD}";
153 my $fffe = ucfirst "\x{FFFE}";
154 my $ffff = ucfirst "\x{FFFF}";
155 my $hex4 = ucfirst "\x{10000}";
156 my $hex5 = ucfirst "\x{100000}";
157 my $maxm1 = ucfirst "\x{10FFFE}";
158 my $max = ucfirst "\x{10FFFF}";
159 my $nonUnicode = ucfirst "\x{110000}";
161 Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
162 Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
163 Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
165 # NAME Matching \p{} against above-Unicode
167 chr(0xD7FF) =~ /\p{Any}/;
168 chr(0xD800) =~ /\p{Any}/;
169 chr(0xDFFF) =~ /\p{Any}/;
170 chr(0xE000) =~ /\p{Any}/;
171 chr(0xFEFF) =~ /\p{Any}/;
172 chr(0xFFFD) =~ /\p{Any}/;
173 chr(0xFFFE) =~ /\p{Any}/;
174 chr(0xFFFF) =~ /\p{Any}/;
175 chr(0x10000) =~ /\p{Any}/;
176 chr(0x100000) =~ /\p{Any}/;
177 chr(0x10FFFE) =~ /\p{Any}/;
178 chr(0x10FFFF) =~ /\p{Any}/;
179 chr(0x110000) =~ /[\p{Any}]/;
180 chr(0x110001) =~ /[\w\p{Any}]/;
181 chr(0x10FFFF) =~ /\p{All}/;
182 chr(0x110002) =~ /[\w\p{All}]/;
183 chr(0x110003) =~ /[\p{XPosixWord}]/;
184 chr(0x110004) =~ /[\P{XPosixWord}]/;
185 chr(0x110005) =~ /^[\p{Unassigned}]/;
186 chr(0x110006) =~ /^[\P{Unassigned}]/;
187 # Only Unicode properties give non-Unicode warnings, and only those properties
188 # which do match above Unicode; and not when something else in the class
189 # matches above Unicode. Below we test three ways where something outside the
190 # property may match non-Unicode: a code point above it, a class \S that we
191 # know at compile time doesn't, and a class \W whose values aren't (at the time
192 # of this writing) specified at compile time, but which wouldn't match
193 chr(0x110050) =~ /\w/;
194 chr(0x110051) =~ /\W/;
195 chr(0x110052) =~ /\d/;
196 chr(0x110053) =~ /\D/;
197 chr(0x110054) =~ /\s/;
198 chr(0x110055) =~ /\S/;
199 chr(0x110056) =~ /[[:word:]]/;
200 chr(0x110057) =~ /[[:^word:]]/;
201 chr(0x110058) =~ /[[:alnum:]]/;
202 chr(0x110059) =~ /[[:^alnum:]]/;
203 chr(0x11005A) =~ /[[:space:]]/;
204 chr(0x11005B) =~ /[[:^space:]]/;
205 chr(0x11005C) =~ /[[:digit:]]/;
206 chr(0x11005D) =~ /[[:^digit:]]/;
207 chr(0x11005E) =~ /[[:alpha:]]/;
208 chr(0x11005F) =~ /[[:^alpha:]]/;
209 chr(0x110060) =~ /[[:ascii:]]/;
210 chr(0x110061) =~ /[[:^ascii:]]/;
211 chr(0x110062) =~ /[[:cntrl:]]/;
212 chr(0x110063) =~ /[[:^cntrl:]]/;
213 chr(0x110064) =~ /[[:graph:]]/;
214 chr(0x110065) =~ /[[:^graph:]]/;
215 chr(0x110066) =~ /[[:lower:]]/;
216 chr(0x110067) =~ /[[:^lower:]]/;
217 chr(0x110068) =~ /[[:print:]]/;
218 chr(0x110069) =~ /[[:^print:]]/;
219 chr(0x11006A) =~ /[[:punct:]]/;
220 chr(0x11006B) =~ /[[:^punct:]]/;
221 chr(0x11006C) =~ /[[:upper:]]/;
222 chr(0x11006D) =~ /[[:^upper:]]/;
223 chr(0x11006E) =~ /[[:xdigit:]]/;
224 chr(0x11006F) =~ /[[:^xdigit:]]/;
225 chr(0x110070) =~ /[[:blank:]]/;
226 chr(0x110071) =~ /[[:^blank:]]/;
227 chr(0x111010) =~ /[\W\p{Unassigned}]/;
228 chr(0x111011) =~ /[\W\P{Unassigned}]/;
229 chr(0x112010) =~ /[\S\p{Unassigned}]/;
230 chr(0x112011) =~ /[\S\P{Unassigned}]/;
231 chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
232 chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
234 chr(0xD7FF) =~ /\p{Any}/;
235 chr(0xD800) =~ /\p{Any}/;
236 chr(0xDFFF) =~ /\p{Any}/;
237 chr(0xE000) =~ /\p{Any}/;
238 chr(0xFEFF) =~ /\p{Any}/;
239 chr(0xFFFD) =~ /\p{Any}/;
240 chr(0xFFFE) =~ /\p{Any}/;
241 chr(0xFFFF) =~ /\p{Any}/;
242 chr(0x10000) =~ /\p{Any}/;
243 chr(0x100000) =~ /\p{Any}/;
244 chr(0x10FFFE) =~ /\p{Any}/;
245 chr(0x10FFFF) =~ /\p{Any}/;
246 chr(0x110000) =~ /[\p{Any}]/;
247 chr(0x110001) =~ /[\w\p{Any}]/;
248 chr(0x10FFFF) =~ /\p{All}/;
249 chr(0x110002) =~ /[\w\p{All}]/;
250 chr(0x110003) =~ /[\p{XPosixWord}]/;
251 chr(0x110004) =~ /[\P{XPosixWord}]/;
252 chr(0x110005) =~ /^[\p{Unassigned}]/;
253 chr(0x110006) =~ /^[\P{Unassigned}]/;
254 chr(0x110050) =~ /\w/;
255 chr(0x110051) =~ /\W/;
256 chr(0x110052) =~ /\d/;
257 chr(0x110053) =~ /\D/;
258 chr(0x110054) =~ /\s/;
259 chr(0x110055) =~ /\S/;
260 chr(0x110056) =~ /[[:word:]]/;
261 chr(0x110057) =~ /[[:^word:]]/;
262 chr(0x110058) =~ /[[:alnum:]]/;
263 chr(0x110059) =~ /[[:^alnum:]]/;
264 chr(0x11005A) =~ /[[:space:]]/;
265 chr(0x11005B) =~ /[[:^space:]]/;
266 chr(0x11005C) =~ /[[:digit:]]/;
267 chr(0x11005D) =~ /[[:^digit:]]/;
268 chr(0x11005E) =~ /[[:alpha:]]/;
269 chr(0x11005F) =~ /[[:^alpha:]]/;
270 chr(0x110060) =~ /[[:ascii:]]/;
271 chr(0x110061) =~ /[[:^ascii:]]/;
272 chr(0x110062) =~ /[[:cntrl:]]/;
273 chr(0x110063) =~ /[[:^cntrl:]]/;
274 chr(0x110064) =~ /[[:graph:]]/;
275 chr(0x110065) =~ /[[:^graph:]]/;
276 chr(0x110066) =~ /[[:lower:]]/;
277 chr(0x110067) =~ /[[:^lower:]]/;
278 chr(0x110068) =~ /[[:print:]]/;
279 chr(0x110069) =~ /[[:^print:]]/;
280 chr(0x11006A) =~ /[[:punct:]]/;
281 chr(0x11006B) =~ /[[:^punct:]]/;
282 chr(0x11006C) =~ /[[:upper:]]/;
283 chr(0x11006D) =~ /[[:^upper:]]/;
284 chr(0x11006E) =~ /[[:xdigit:]]/;
285 chr(0x11006F) =~ /[[:^xdigit:]]/;
286 chr(0x110070) =~ /[[:blank:]]/;
287 chr(0x110071) =~ /[[:^blank:]]/;
288 chr(0x111010) =~ /[\W\p{Unassigned}]/;
289 chr(0x111011) =~ /[\W\P{Unassigned}]/;
290 chr(0x112010) =~ /[\S\p{Unassigned}]/;
291 chr(0x112011) =~ /[\S\P{Unassigned}]/;
292 chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
293 chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
295 Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
296 Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
298 # NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
299 # Now have to make FATAL to guarantee being output
300 use warnings FATAL => 'non_unicode';
301 "\x{110000}" =~ /b?\p{Space}/;
303 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
305 # NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
306 use warnings 'non_unicode';
307 use warnings FATAL => 'non_unicode';
308 "\x{110000}" =~ /b?[[:space:]]/;
312 chr(0x110000) =~ /\p{Any}/;
314 # NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
315 use warnings qw(utf8 non_unicode);
316 chr(0x110000) =~ /^\p{Unassigned}/;
317 no warnings 'non_unicode';
318 chr(0x110001) =~ /\p{Unassigned}/;
319 use warnings 'non_unicode';
321 chr(0x110002) =~ /\p{Unassigned}/;
323 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
325 # NAME optimizable regnode should still give non_unicode warnings when fatalized
327 use warnings FATAL => 'non_unicode';
328 chr(0x110000) =~ /\p{lb=cr}/;
330 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
332 # NAME optimizable regnode should not give non_unicode warnings when warnings are off
333 no warnings 'non_unicode';
334 chr(0x110000) =~ /\p{lb=cr}/;
337 # NAME 'All' matches above-Unicode without any warning
338 use warnings qw(utf8 non_unicode);
339 chr(0x110000) =~ /\p{All}/;
342 require "../test.pl";
344 sub Is_Super { return '!utf8::Any' }
345 # The extra char is to avoid an optimization that avoids the problem when the
346 # property is the only non-latin1 char in a class
347 print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
351 require "../test.pl";
353 my $file = tempfile();
354 open(my $fh, "+>:utf8", $file);
355 print $fh "\x{D7FF}", "\n";
356 print $fh "\x{D800}", "\n";
357 print $fh "\x{D900}", "\n";
358 print $fh "\x{DA00}", "\n";
359 print $fh "\x{DB00}", "\n";
360 print $fh "\x{DC00}", "\n";
361 print $fh "\x{DD00}", "\n";
362 print $fh "\x{DE00}", "\n";
363 print $fh "\x{DF00}", "\n";
364 print $fh "\x{DFFF}", "\n";
365 print $fh "\x{E000}", "\n";
366 print $fh "\x{FDCF}", "\n";
367 print $fh "\x{FDD0}", "\n";
368 print $fh "\x{FDD1}", "\n";
369 print $fh "\x{FDEF}", "\n";
370 print $fh "\x{FDF0}", "\n";
371 print $fh "\x{FDFE}", "\n";
372 print $fh "\x{FDFF}", "\n";
373 print $fh "\x{FE00}", "\n";
374 print $fh "\x{FEFF}", "\n";
375 print $fh "\x{FFFD}", "\n";
376 print $fh "\x{FFFE}", "\n";
377 print $fh "\x{FFFF}", "\n";
378 print $fh "\x{10000}", "\n";
379 print $fh "\x{1FFFD}", "\n";
380 print $fh "\x{1FFFE}", "\n";
381 print $fh "\x{1FFFF}", "\n";
382 print $fh "\x{20000}", "\n";
383 print $fh "\x{2FFFD}", "\n";
384 print $fh "\x{2FFFE}", "\n";
385 print $fh "\x{2FFFF}", "\n";
386 print $fh "\x{30000}", "\n";
387 print $fh "\x{3FFFD}", "\n";
388 print $fh "\x{3FFFE}", "\n";
389 print $fh "\x{3FFFF}", "\n";
390 print $fh "\x{40000}", "\n";
391 print $fh "\x{4FFFD}", "\n";
392 print $fh "\x{4FFFE}", "\n";
393 print $fh "\x{4FFFF}", "\n";
394 print $fh "\x{50000}", "\n";
395 print $fh "\x{5FFFD}", "\n";
396 print $fh "\x{5FFFE}", "\n";
397 print $fh "\x{5FFFF}", "\n";
398 print $fh "\x{60000}", "\n";
399 print $fh "\x{6FFFD}", "\n";
400 print $fh "\x{6FFFE}", "\n";
401 print $fh "\x{6FFFF}", "\n";
402 print $fh "\x{70000}", "\n";
403 print $fh "\x{7FFFD}", "\n";
404 print $fh "\x{7FFFE}", "\n";
405 print $fh "\x{7FFFF}", "\n";
406 print $fh "\x{80000}", "\n";
407 print $fh "\x{8FFFD}", "\n";
408 print $fh "\x{8FFFE}", "\n";
409 print $fh "\x{8FFFF}", "\n";
410 print $fh "\x{90000}", "\n";
411 print $fh "\x{9FFFD}", "\n";
412 print $fh "\x{9FFFE}", "\n";
413 print $fh "\x{9FFFF}", "\n";
414 print $fh "\x{A0000}", "\n";
415 print $fh "\x{AFFFD}", "\n";
416 print $fh "\x{AFFFE}", "\n";
417 print $fh "\x{AFFFF}", "\n";
418 print $fh "\x{B0000}", "\n";
419 print $fh "\x{BFFFD}", "\n";
420 print $fh "\x{BFFFE}", "\n";
421 print $fh "\x{BFFFF}", "\n";
422 print $fh "\x{C0000}", "\n";
423 print $fh "\x{CFFFD}", "\n";
424 print $fh "\x{CFFFE}", "\n";
425 print $fh "\x{CFFFF}", "\n";
426 print $fh "\x{D0000}", "\n";
427 print $fh "\x{DFFFD}", "\n";
428 print $fh "\x{DFFFE}", "\n";
429 print $fh "\x{DFFFF}", "\n";
430 print $fh "\x{E0000}", "\n";
431 print $fh "\x{EFFFD}", "\n";
432 print $fh "\x{EFFFE}", "\n";
433 print $fh "\x{EFFFF}", "\n";
434 print $fh "\x{F0000}", "\n";
435 print $fh "\x{FFFFD}", "\n";
436 print $fh "\x{FFFFE}", "\n";
437 print $fh "\x{FFFFF}", "\n";
438 print $fh "\x{100000}", "\n";
439 print $fh "\x{10FFFD}", "\n";
440 print $fh "\x{10FFFE}", "\n";
441 print $fh "\x{10FFFF}", "\n";
442 print $fh "\x{110000}", "\n";
443 print $fh "\x{11FFFD}", "\n";
444 print $fh "\x{11FFFE}", "\n";
445 print $fh "\x{11FFFF}", "\n";
446 print $fh "\x{120000}", "\n";
449 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
450 Unicode surrogate U+D900 is illegal in UTF-8 at - line 7.
451 Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8.
452 Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9.
453 Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10.
454 Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11.
455 Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12.
456 Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13.
457 Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14.
458 Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17.
459 Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18.
460 Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19.
461 Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26.
462 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27.
463 Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30.
464 Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31.
465 Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34.
466 Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35.
467 Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38.
468 Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39.
469 Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42.
470 Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43.
471 Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46.
472 Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47.
473 Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50.
474 Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51.
475 Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54.
476 Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55.
477 Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58.
478 Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59.
479 Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62.
480 Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63.
481 Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66.
482 Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67.
483 Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70.
484 Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71.
485 Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74.
486 Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75.
487 Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78.
488 Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79.
489 Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82.
490 Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83.
491 Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86.
492 Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87.
493 Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90.
494 Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91.
495 Code point 0x110000 is not Unicode, may not be portable in print at - line 92.
496 Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93.
497 Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94.
498 Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95.
499 Code point 0x120000 is not Unicode, may not be portable in print at - line 96.
501 require "../test.pl";
503 my $file = tempfile();
504 open(my $fh, "+>:utf8", $file);
505 print $fh "\x{D800}", "\n";
506 print $fh "\x{FFFF}", "\n";
507 print $fh "\x{110000}", "\n";
510 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
511 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6.
512 Code point 0x110000 is not Unicode, may not be portable in print at - line 7.
514 require "../test.pl";
516 no warnings 'surrogate';
517 my $file = tempfile();
518 open(my $fh, "+>:utf8", $file);
519 print $fh "\x{D800}", "\n";
520 print $fh "\x{FFFF}", "\n";
521 print $fh "\x{110000}", "\n";
524 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
525 Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
527 require "../test.pl";
529 no warnings 'nonchar';
530 my $file = tempfile();
531 open(my $fh, "+>:utf8", $file);
532 print $fh "\x{D800}", "\n";
533 print $fh "\x{FFFF}", "\n";
534 print $fh "\x{110000}", "\n";
537 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
538 Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
540 require "../test.pl";
542 no warnings 'non_unicode';
543 my $file = tempfile();
544 open(my $fh, "+>:utf8", $file);
545 print $fh "\x{D800}", "\n";
546 print $fh "\x{FFFF}", "\n";
547 print $fh "\x{110000}", "\n";
550 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
551 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
553 # NAME C<use warnings "nonchar"> works in isolation
554 require "../test.pl";
555 use warnings 'nonchar';
556 my $file = tempfile();
557 open(my $fh, "+>:utf8", $file);
558 print $fh "\x{FFFF}", "\n";
561 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5.
563 # NAME C<use warnings "surrogate"> works in isolation
564 require "../test.pl";
565 use warnings 'surrogate';
566 my $file = tempfile();
567 open(my $fh, "+>:utf8", $file);
568 print $fh "\x{D800}", "\n";
571 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
573 # NAME C<use warnings "non_unicode"> works in isolation
574 require "../test.pl";
575 use warnings 'non_unicode';
576 my $file = tempfile();
577 open(my $fh, "+>:utf8", $file);
578 print $fh "\x{110000}", "\n";
581 Code point 0x110000 is not Unicode, may not be portable in print at - line 5.
583 require "../test.pl";
585 my $file = tempfile();
586 open(my $fh, "+>:utf8", $file);
587 print $fh "\x{D7FF}", "\n";
588 print $fh "\x{D800}", "\n";
589 print $fh "\x{DFFF}", "\n";
590 print $fh "\x{E000}", "\n";
591 print $fh "\x{FDCF}", "\n";
592 print $fh "\x{FDD0}", "\n";
593 print $fh "\x{FDEF}", "\n";
594 print $fh "\x{FDF0}", "\n";
595 print $fh "\x{FEFF}", "\n";
596 print $fh "\x{FFFD}", "\n";
597 print $fh "\x{FFFE}", "\n";
598 print $fh "\x{FFFF}", "\n";
599 print $fh "\x{10000}", "\n";
600 print $fh "\x{1FFFE}", "\n";
601 print $fh "\x{1FFFF}", "\n";
602 print $fh "\x{2FFFE}", "\n";
603 print $fh "\x{2FFFF}", "\n";
604 print $fh "\x{3FFFE}", "\n";
605 print $fh "\x{3FFFF}", "\n";
606 print $fh "\x{4FFFE}", "\n";
607 print $fh "\x{4FFFF}", "\n";
608 print $fh "\x{5FFFE}", "\n";
609 print $fh "\x{5FFFF}", "\n";
610 print $fh "\x{6FFFE}", "\n";
611 print $fh "\x{6FFFF}", "\n";
612 print $fh "\x{7FFFE}", "\n";
613 print $fh "\x{7FFFF}", "\n";
614 print $fh "\x{8FFFE}", "\n";
615 print $fh "\x{8FFFF}", "\n";
616 print $fh "\x{9FFFE}", "\n";
617 print $fh "\x{9FFFF}", "\n";
618 print $fh "\x{AFFFE}", "\n";
619 print $fh "\x{AFFFF}", "\n";
620 print $fh "\x{BFFFE}", "\n";
621 print $fh "\x{BFFFF}", "\n";
622 print $fh "\x{CFFFE}", "\n";
623 print $fh "\x{CFFFF}", "\n";
624 print $fh "\x{DFFFE}", "\n";
625 print $fh "\x{DFFFF}", "\n";
626 print $fh "\x{EFFFE}", "\n";
627 print $fh "\x{EFFFF}", "\n";
628 print $fh "\x{FFFFE}", "\n";
629 print $fh "\x{FFFFF}", "\n";
630 print $fh "\x{100000}", "\n";
631 print $fh "\x{10FFFE}", "\n";
632 print $fh "\x{10FFFF}", "\n";
633 print $fh "\x{110000}", "\n";
637 # NAME Case change crosses 255/256 under non-UTF8 locale
638 require '../loc_tools.pl';
639 unless (locales_enabled('LC_CTYPE')) {
640 print("SKIPPED\n# locales not available\n"),exit;
642 eval { require POSIX; POSIX->import("locale_h") };
644 print("SKIPPED\n# no POSIX\n"),exit;
646 use warnings 'locale';
649 setlocale(&POSIX::LC_CTYPE, "C");
655 $a = ucfirst("\x{149}");
656 $a = lcfirst("\x{178}");
657 no warnings 'locale';
662 $a = ucfirst("\x{149}");
663 $a = lcfirst("\x{178}");
665 Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14.
666 Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15.
667 Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16.
668 Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17.
669 Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18.
670 Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19.
672 # NAME Wide character in non-UTF-8 locale
673 require '../loc_tools.pl';
674 unless (locales_enabled('LC_CTYPE')) {
675 print("SKIPPED\n# locales not available\n"),exit;
677 eval { require POSIX; POSIX->import("locale_h") };
679 print("SKIPPED\n# no POSIX\n"),exit;
681 use warnings 'locale';
684 setlocale(&POSIX::LC_CTYPE, "C");
687 $a = lcfirst("\x{101}");
690 $a = ucfirst("\x{104}");
691 no warnings 'locale';
693 $a = lcfirst("\x{101}");
696 $a = ucfirst("\x{104}");
698 Wide character (U+100) in lc at - line 14.
699 Wide character (U+101) in lcfirst at - line 15.
700 Wide character (U+102) in fc at - line 16.
701 Wide character (U+103) in uc at - line 17.
702 Wide character (U+104) in ucfirst at - line 18.
704 # NAME Wide character in UTF-8 locale
705 require '../loc_tools.pl';
706 unless (locales_enabled('LC_CTYPE')) {
707 print("SKIPPED\n# locales not available\n"),exit;
709 eval { require POSIX; POSIX->import("locale_h") };
711 print("SKIPPED\n# no POSIX\n"),exit;
713 my @utf8_locales = find_utf8_ctype_locale();
714 unless (@utf8_locales) {
715 print("SKIPPED\n# no UTF-8 locales\n"),exit;
717 use warnings 'locale';
720 setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]);
723 $a = lcfirst("\x{101}");
726 $a = ucfirst("\x{104}");
729 # NAME Deprecation of too-large code points
730 require "../test.pl";
731 use warnings 'non_unicode';
732 my $max_cp = ~0 >> 1;
733 my $max_char = chr $max_cp;
734 my $to_warn_cp = $max_cp + 1;
735 my $to_warn_char = chr $to_warn_cp;
736 $max_char =~ /[\x{110000}\P{Unassigned}]/;
737 $to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
738 my $temp = qr/$max_char/;
739 $temp = qr/$to_warn_char/;
740 $temp = uc($max_char);
741 $temp = uc($to_warn_char);
742 my $file = tempfile();
743 open(my $fh, "+>:utf8", $file);
744 print $fh $max_char, "\n";
745 print $fh $to_warn_char, "\n";
749 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
750 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in pattern match \(m//\) at - line \d+.
751 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
752 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
753 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
754 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in regexp compilation at - line \d+.
755 Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
756 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ at - line \d+.
757 Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
758 Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+.
759 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+ in print at - line \d+.