Commit | Line | Data |
---|---|---|
f0df466a JH |
1 | |
2 | utf8.c AOK | |
3 | ||
4 | [utf8_to_uv] | |
5 | Malformed UTF-8 character | |
6 | my $a = ord "\x80" ; | |
7 | ||
8 | Malformed UTF-8 character | |
9 | my $a = ord "\xf080" ; | |
10 | <<<<<< this warning can't be easily triggered from perl anymore | |
11 | ||
12 | [utf16_to_utf8] | |
13 | Malformed UTF-16 surrogate | |
93f09d7b | 14 | <<<<<< Add a test when something actually calls utf16_to_utf8 |
f0df466a JH |
15 | |
16 | __END__ | |
17 | # utf8.c [utf8_to_uv] -W | |
18 | BEGIN { | |
19 | if (ord('A') == 193) { | |
20 | print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; | |
21 | exit 0; | |
22 | } | |
23 | } | |
24 | use utf8 ; | |
25 | my $a = "snøstorm" ; | |
26 | { | |
27 | no warnings 'utf8' ; | |
28 | my $a = "snøstorm"; | |
29 | use warnings 'utf8' ; | |
30 | my $a = "snøstorm"; | |
31 | } | |
32 | EXPECT | |
41432148 JH |
33 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. |
34 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. | |
f0df466a | 35 | ######## |
507b9800 | 36 | use warnings 'utf8'; |
9ae3ac1a KW |
37 | my $d7ff = uc(chr(0xD7FF)); |
38 | my $d800 = uc(chr(0xD800)); | |
39 | my $dfff = uc(chr(0xDFFF)); | |
40 | my $e000 = uc(chr(0xE000)); | |
41 | my $feff = uc(chr(0xFEFF)); | |
42 | my $fffd = uc(chr(0xFFFD)); | |
43 | my $fffe = uc(chr(0xFFFE)); | |
44 | my $ffff = uc(chr(0xFFFF)); | |
45 | my $hex4 = uc(chr(0x10000)); | |
46 | my $hex5 = uc(chr(0x100000)); | |
47 | my $maxm1 = uc(chr(0x10FFFE)); | |
48 | my $max = uc(chr(0x10FFFF)); | |
49 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 50 | no warnings 'utf8'; |
9ae3ac1a KW |
51 | my $d7ff = uc(chr(0xD7FF)); |
52 | my $d800 = uc(chr(0xD800)); | |
53 | my $dfff = uc(chr(0xDFFF)); | |
54 | my $e000 = uc(chr(0xE000)); | |
55 | my $feff = uc(chr(0xFEFF)); | |
56 | my $fffd = uc(chr(0xFFFD)); | |
57 | my $fffe = uc(chr(0xFFFE)); | |
58 | my $ffff = uc(chr(0xFFFF)); | |
59 | my $hex4 = uc(chr(0x10000)); | |
60 | my $hex5 = uc(chr(0x100000)); | |
61 | my $maxm1 = uc(chr(0x10FFFE)); | |
62 | my $max = uc(chr(0x10FFFF)); | |
63 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 64 | EXPECT |
9ae3ac1a KW |
65 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
66 | Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
67 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
507b9800 | 68 | ######## |
62961d2e | 69 | use warnings 'utf8'; |
8457b38f KW |
70 | my $d800 = uc(chr(0xD800)); |
71 | my $nonUnicode = uc(chr(0x110000)); | |
72 | no warnings 'surrogate'; | |
73 | my $d800 = uc(chr(0xD800)); | |
74 | my $nonUnicode = uc(chr(0x110000)); | |
75 | EXPECT | |
76 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
77 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
78 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. | |
79 | ######## | |
80 | use warnings 'utf8'; | |
81 | my $d800 = uc(chr(0xD800)); | |
82 | my $nonUnicode = uc(chr(0x110000)); | |
83 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
84 | no warnings 'non_unicode'; | |
85 | my $d800 = uc(chr(0xD800)); | |
86 | my $nonUnicode = uc(chr(0x110000)); | |
87 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
88 | EXPECT | |
89 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
90 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
91 | Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4. | |
92 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6. | |
93 | ######## | |
94 | use warnings 'utf8'; | |
9ae3ac1a KW |
95 | my $d7ff = lc pack("U", 0xD7FF); |
96 | my $d800 = lc pack("U", 0xD800); | |
97 | my $dfff = lc pack("U", 0xDFFF); | |
98 | my $e000 = lc pack("U", 0xE000); | |
99 | my $feff = lc pack("U", 0xFEFF); | |
100 | my $fffd = lc pack("U", 0xFFFD); | |
101 | my $fffe = lc pack("U", 0xFFFE); | |
102 | my $ffff = lc pack("U", 0xFFFF); | |
103 | my $hex4 = lc pack("U", 0x10000); | |
104 | my $hex5 = lc pack("U", 0x100000); | |
105 | my $maxm1 = lc pack("U", 0x10FFFE); | |
106 | my $max = lc pack("U", 0x10FFFF); | |
107 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 108 | no warnings 'utf8'; |
9ae3ac1a KW |
109 | my $d7ff = lc pack("U", 0xD7FF); |
110 | my $d800 = lc pack("U", 0xD800); | |
111 | my $dfff = lc pack("U", 0xDFFF); | |
112 | my $e000 = lc pack("U", 0xE000); | |
113 | my $feff = lc pack("U", 0xFEFF); | |
114 | my $fffd = lc pack("U", 0xFFFD); | |
115 | my $fffe = lc pack("U", 0xFFFE); | |
116 | my $ffff = lc pack("U", 0xFFFF); | |
117 | my $hex4 = lc pack("U", 0x10000); | |
118 | my $hex5 = lc pack("U", 0x100000); | |
119 | my $maxm1 = lc pack("U", 0x10FFFE); | |
120 | my $max = lc pack("U", 0x10FFFF); | |
121 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 122 | EXPECT |
9ae3ac1a KW |
123 | Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
124 | Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
125 | Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
62961d2e JH |
126 | ######## |
127 | use warnings 'utf8'; | |
9ae3ac1a KW |
128 | my $d7ff = ucfirst "\x{D7FF}"; |
129 | my $d800 = ucfirst "\x{D800}"; | |
130 | my $dfff = ucfirst "\x{DFFF}"; | |
131 | my $e000 = ucfirst "\x{E000}"; | |
132 | my $feff = ucfirst "\x{FEFF}"; | |
133 | my $fffd = ucfirst "\x{FFFD}"; | |
134 | my $fffe = ucfirst "\x{FFFE}"; | |
135 | my $ffff = ucfirst "\x{FFFF}"; | |
136 | my $hex4 = ucfirst "\x{10000}"; | |
137 | my $hex5 = ucfirst "\x{100000}"; | |
138 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
139 | my $max = ucfirst "\x{10FFFF}"; | |
140 | my $nonUnicode = ucfirst "\x{110000}"; | |
62961d2e | 141 | no warnings 'utf8'; |
9ae3ac1a KW |
142 | my $d7ff = ucfirst "\x{D7FF}"; |
143 | my $d800 = ucfirst "\x{D800}"; | |
144 | my $dfff = ucfirst "\x{DFFF}"; | |
145 | my $e000 = ucfirst "\x{E000}"; | |
146 | my $feff = ucfirst "\x{FEFF}"; | |
147 | my $fffd = ucfirst "\x{FFFD}"; | |
148 | my $fffe = ucfirst "\x{FFFE}"; | |
149 | my $ffff = ucfirst "\x{FFFF}"; | |
150 | my $hex4 = ucfirst "\x{10000}"; | |
151 | my $hex5 = ucfirst "\x{100000}"; | |
152 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
153 | my $max = ucfirst "\x{10FFFF}"; | |
154 | my $nonUnicode = ucfirst "\x{110000}"; | |
155 | EXPECT | |
156 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. | |
157 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
158 | Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
159 | ######## | |
160 | use warnings 'utf8'; | |
161 | chr(0xD7FF) =~ /\p{Any}/; | |
162 | chr(0xD800) =~ /\p{Any}/; | |
163 | chr(0xDFFF) =~ /\p{Any}/; | |
164 | chr(0xE000) =~ /\p{Any}/; | |
165 | chr(0xFEFF) =~ /\p{Any}/; | |
166 | chr(0xFFFD) =~ /\p{Any}/; | |
167 | chr(0xFFFE) =~ /\p{Any}/; | |
168 | chr(0xFFFF) =~ /\p{Any}/; | |
169 | chr(0x10000) =~ /\p{Any}/; | |
170 | chr(0x100000) =~ /\p{Any}/; | |
171 | chr(0x10FFFE) =~ /\p{Any}/; | |
172 | chr(0x10FFFF) =~ /\p{Any}/; | |
173 | chr(0x110000) =~ /\p{Any}/; | |
174 | no warnings 'utf8'; | |
175 | chr(0xD7FF) =~ /\p{Any}/; | |
176 | chr(0xD800) =~ /\p{Any}/; | |
177 | chr(0xDFFF) =~ /\p{Any}/; | |
178 | chr(0xE000) =~ /\p{Any}/; | |
179 | chr(0xFEFF) =~ /\p{Any}/; | |
180 | chr(0xFFFD) =~ /\p{Any}/; | |
181 | chr(0xFFFE) =~ /\p{Any}/; | |
182 | chr(0xFFFF) =~ /\p{Any}/; | |
183 | chr(0x10000) =~ /\p{Any}/; | |
184 | chr(0x100000) =~ /\p{Any}/; | |
185 | chr(0x10FFFE) =~ /\p{Any}/; | |
186 | chr(0x10FFFF) =~ /\p{Any}/; | |
187 | chr(0x110000) =~ /\p{Any}/; | |
188 | EXPECT | |
c634fdd3 | 189 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14. |
9ae3ac1a | 190 | ######## |
8457b38f KW |
191 | use warnings 'utf8'; |
192 | chr(0x110000) =~ /\p{Any}/; | |
193 | no warnings 'non_unicode'; | |
194 | chr(0x110000) =~ /\p{Any}/; | |
195 | EXPECT | |
c634fdd3 | 196 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. |
8457b38f | 197 | ######## |
9ae3ac1a KW |
198 | require "../test.pl"; |
199 | use warnings 'utf8'; | |
200 | my $file = tempfile(); | |
201 | open(my $fh, "+>:utf8", $file); | |
202 | print $fh "\x{D7FF}", "\n"; | |
203 | print $fh "\x{D800}", "\n"; | |
204 | print $fh "\x{DFFF}", "\n"; | |
205 | print $fh "\x{E000}", "\n"; | |
206 | print $fh "\x{FDCF}", "\n"; | |
207 | print $fh "\x{FDD0}", "\n"; | |
208 | print $fh "\x{FDEF}", "\n"; | |
209 | print $fh "\x{FDF0}", "\n"; | |
210 | print $fh "\x{FEFF}", "\n"; | |
211 | print $fh "\x{FFFD}", "\n"; | |
212 | print $fh "\x{FFFE}", "\n"; | |
213 | print $fh "\x{FFFF}", "\n"; | |
214 | print $fh "\x{10000}", "\n"; | |
215 | print $fh "\x{1FFFE}", "\n"; | |
216 | print $fh "\x{1FFFF}", "\n"; | |
217 | print $fh "\x{2FFFE}", "\n"; | |
218 | print $fh "\x{2FFFF}", "\n"; | |
219 | print $fh "\x{3FFFE}", "\n"; | |
220 | print $fh "\x{3FFFF}", "\n"; | |
221 | print $fh "\x{4FFFE}", "\n"; | |
222 | print $fh "\x{4FFFF}", "\n"; | |
223 | print $fh "\x{5FFFE}", "\n"; | |
224 | print $fh "\x{5FFFF}", "\n"; | |
225 | print $fh "\x{6FFFE}", "\n"; | |
226 | print $fh "\x{6FFFF}", "\n"; | |
227 | print $fh "\x{7FFFE}", "\n"; | |
228 | print $fh "\x{7FFFF}", "\n"; | |
229 | print $fh "\x{8FFFE}", "\n"; | |
230 | print $fh "\x{8FFFF}", "\n"; | |
231 | print $fh "\x{9FFFE}", "\n"; | |
232 | print $fh "\x{9FFFF}", "\n"; | |
233 | print $fh "\x{AFFFE}", "\n"; | |
234 | print $fh "\x{AFFFF}", "\n"; | |
235 | print $fh "\x{BFFFE}", "\n"; | |
236 | print $fh "\x{BFFFF}", "\n"; | |
237 | print $fh "\x{CFFFE}", "\n"; | |
238 | print $fh "\x{CFFFF}", "\n"; | |
239 | print $fh "\x{DFFFE}", "\n"; | |
240 | print $fh "\x{DFFFF}", "\n"; | |
241 | print $fh "\x{EFFFE}", "\n"; | |
242 | print $fh "\x{EFFFF}", "\n"; | |
243 | print $fh "\x{FFFFE}", "\n"; | |
244 | print $fh "\x{FFFFF}", "\n"; | |
245 | print $fh "\x{100000}", "\n"; | |
246 | print $fh "\x{10FFFE}", "\n"; | |
247 | print $fh "\x{10FFFF}", "\n"; | |
248 | print $fh "\x{110000}", "\n"; | |
249 | close $fh; | |
250 | EXPECT | |
251 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
252 | Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. | |
253 | Unicode non-character U+FDD0 is illegal for open interchange at - line 10. | |
254 | Unicode non-character U+FDEF is illegal for open interchange at - line 11. | |
255 | Unicode non-character U+FFFE is illegal for open interchange at - line 15. | |
256 | Unicode non-character U+FFFF is illegal for open interchange at - line 16. | |
257 | Unicode non-character U+1FFFE is illegal for open interchange at - line 18. | |
258 | Unicode non-character U+1FFFF is illegal for open interchange at - line 19. | |
259 | Unicode non-character U+2FFFE is illegal for open interchange at - line 20. | |
260 | Unicode non-character U+2FFFF is illegal for open interchange at - line 21. | |
261 | Unicode non-character U+3FFFE is illegal for open interchange at - line 22. | |
262 | Unicode non-character U+3FFFF is illegal for open interchange at - line 23. | |
263 | Unicode non-character U+4FFFE is illegal for open interchange at - line 24. | |
264 | Unicode non-character U+4FFFF is illegal for open interchange at - line 25. | |
265 | Unicode non-character U+5FFFE is illegal for open interchange at - line 26. | |
266 | Unicode non-character U+5FFFF is illegal for open interchange at - line 27. | |
267 | Unicode non-character U+6FFFE is illegal for open interchange at - line 28. | |
268 | Unicode non-character U+6FFFF is illegal for open interchange at - line 29. | |
269 | Unicode non-character U+7FFFE is illegal for open interchange at - line 30. | |
270 | Unicode non-character U+7FFFF is illegal for open interchange at - line 31. | |
271 | Unicode non-character U+8FFFE is illegal for open interchange at - line 32. | |
272 | Unicode non-character U+8FFFF is illegal for open interchange at - line 33. | |
273 | Unicode non-character U+9FFFE is illegal for open interchange at - line 34. | |
274 | Unicode non-character U+9FFFF is illegal for open interchange at - line 35. | |
275 | Unicode non-character U+AFFFE is illegal for open interchange at - line 36. | |
276 | Unicode non-character U+AFFFF is illegal for open interchange at - line 37. | |
277 | Unicode non-character U+BFFFE is illegal for open interchange at - line 38. | |
278 | Unicode non-character U+BFFFF is illegal for open interchange at - line 39. | |
279 | Unicode non-character U+CFFFE is illegal for open interchange at - line 40. | |
280 | Unicode non-character U+CFFFF is illegal for open interchange at - line 41. | |
281 | Unicode non-character U+DFFFE is illegal for open interchange at - line 42. | |
282 | Unicode non-character U+DFFFF is illegal for open interchange at - line 43. | |
283 | Unicode non-character U+EFFFE is illegal for open interchange at - line 44. | |
284 | Unicode non-character U+EFFFF is illegal for open interchange at - line 45. | |
285 | Unicode non-character U+FFFFE is illegal for open interchange at - line 46. | |
286 | Unicode non-character U+FFFFF is illegal for open interchange at - line 47. | |
287 | Unicode non-character U+10FFFE is illegal for open interchange at - line 49. | |
288 | Unicode non-character U+10FFFF is illegal for open interchange at - line 50. | |
289 | Code point 0x110000 is not Unicode, may not be portable at - line 51. | |
290 | ######## | |
291 | require "../test.pl"; | |
8457b38f KW |
292 | use warnings 'utf8'; |
293 | my $file = tempfile(); | |
294 | open(my $fh, "+>:utf8", $file); | |
295 | print $fh "\x{D800}", "\n"; | |
296 | print $fh "\x{FFFF}", "\n"; | |
297 | print $fh "\x{110000}", "\n"; | |
298 | close $fh; | |
299 | EXPECT | |
300 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
301 | Unicode non-character U+FFFF is illegal for open interchange at - line 6. | |
302 | Code point 0x110000 is not Unicode, may not be portable at - line 7. | |
303 | ######## | |
304 | require "../test.pl"; | |
305 | use warnings 'utf8'; | |
306 | no warnings 'surrogate'; | |
307 | my $file = tempfile(); | |
308 | open(my $fh, "+>:utf8", $file); | |
309 | print $fh "\x{D800}", "\n"; | |
310 | print $fh "\x{FFFF}", "\n"; | |
311 | print $fh "\x{110000}", "\n"; | |
312 | close $fh; | |
313 | EXPECT | |
314 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
315 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
316 | ######## | |
317 | require "../test.pl"; | |
318 | use warnings 'utf8'; | |
319 | no warnings 'nonchar'; | |
320 | my $file = tempfile(); | |
321 | open(my $fh, "+>:utf8", $file); | |
322 | print $fh "\x{D800}", "\n"; | |
323 | print $fh "\x{FFFF}", "\n"; | |
324 | print $fh "\x{110000}", "\n"; | |
325 | close $fh; | |
326 | EXPECT | |
327 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
328 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
329 | ######## | |
330 | require "../test.pl"; | |
331 | use warnings 'utf8'; | |
332 | no warnings 'non_unicode'; | |
333 | my $file = tempfile(); | |
334 | open(my $fh, "+>:utf8", $file); | |
335 | print $fh "\x{D800}", "\n"; | |
336 | print $fh "\x{FFFF}", "\n"; | |
337 | print $fh "\x{110000}", "\n"; | |
338 | close $fh; | |
339 | EXPECT | |
340 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
341 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
342 | ######## | |
343 | require "../test.pl"; | |
9ae3ac1a KW |
344 | no warnings 'utf8'; |
345 | my $file = tempfile(); | |
346 | open(my $fh, "+>:utf8", $file); | |
347 | print $fh "\x{D7FF}", "\n"; | |
348 | print $fh "\x{D800}", "\n"; | |
349 | print $fh "\x{DFFF}", "\n"; | |
350 | print $fh "\x{E000}", "\n"; | |
351 | print $fh "\x{FDCF}", "\n"; | |
352 | print $fh "\x{FDD0}", "\n"; | |
353 | print $fh "\x{FDEF}", "\n"; | |
354 | print $fh "\x{FDF0}", "\n"; | |
355 | print $fh "\x{FEFF}", "\n"; | |
356 | print $fh "\x{FFFD}", "\n"; | |
357 | print $fh "\x{FFFE}", "\n"; | |
358 | print $fh "\x{FFFF}", "\n"; | |
359 | print $fh "\x{10000}", "\n"; | |
360 | print $fh "\x{1FFFE}", "\n"; | |
361 | print $fh "\x{1FFFF}", "\n"; | |
362 | print $fh "\x{2FFFE}", "\n"; | |
363 | print $fh "\x{2FFFF}", "\n"; | |
364 | print $fh "\x{3FFFE}", "\n"; | |
365 | print $fh "\x{3FFFF}", "\n"; | |
366 | print $fh "\x{4FFFE}", "\n"; | |
367 | print $fh "\x{4FFFF}", "\n"; | |
368 | print $fh "\x{5FFFE}", "\n"; | |
369 | print $fh "\x{5FFFF}", "\n"; | |
370 | print $fh "\x{6FFFE}", "\n"; | |
371 | print $fh "\x{6FFFF}", "\n"; | |
372 | print $fh "\x{7FFFE}", "\n"; | |
373 | print $fh "\x{7FFFF}", "\n"; | |
374 | print $fh "\x{8FFFE}", "\n"; | |
375 | print $fh "\x{8FFFF}", "\n"; | |
376 | print $fh "\x{9FFFE}", "\n"; | |
377 | print $fh "\x{9FFFF}", "\n"; | |
378 | print $fh "\x{AFFFE}", "\n"; | |
379 | print $fh "\x{AFFFF}", "\n"; | |
380 | print $fh "\x{BFFFE}", "\n"; | |
381 | print $fh "\x{BFFFF}", "\n"; | |
382 | print $fh "\x{CFFFE}", "\n"; | |
383 | print $fh "\x{CFFFF}", "\n"; | |
384 | print $fh "\x{DFFFE}", "\n"; | |
385 | print $fh "\x{DFFFF}", "\n"; | |
386 | print $fh "\x{EFFFE}", "\n"; | |
387 | print $fh "\x{EFFFF}", "\n"; | |
388 | print $fh "\x{FFFFE}", "\n"; | |
389 | print $fh "\x{FFFFF}", "\n"; | |
390 | print $fh "\x{100000}", "\n"; | |
391 | print $fh "\x{10FFFE}", "\n"; | |
392 | print $fh "\x{10FFFF}", "\n"; | |
393 | print $fh "\x{110000}", "\n"; | |
394 | close $fh; | |
62961d2e | 395 | EXPECT |