Commit | Line | Data |
---|---|---|
f0df466a JH |
1 | |
2 | utf8.c AOK | |
3 | ||
4 | [utf8_to_uv] | |
5 | Malformed UTF-8 character | |
6 | my $a = ord "\x80" ; | |
7 | ||
8 | Malformed UTF-8 character | |
9 | my $a = ord "\xf080" ; | |
10 | <<<<<< this warning can't be easily triggered from perl anymore | |
11 | ||
12 | [utf16_to_utf8] | |
13 | Malformed UTF-16 surrogate | |
93f09d7b | 14 | <<<<<< Add a test when something actually calls utf16_to_utf8 |
f0df466a JH |
15 | |
16 | __END__ | |
17 | # utf8.c [utf8_to_uv] -W | |
18 | BEGIN { | |
19 | if (ord('A') == 193) { | |
20 | print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; | |
21 | exit 0; | |
22 | } | |
23 | } | |
24 | use utf8 ; | |
25 | my $a = "snøstorm" ; | |
26 | { | |
27 | no warnings 'utf8' ; | |
28 | my $a = "snøstorm"; | |
29 | use warnings 'utf8' ; | |
30 | my $a = "snøstorm"; | |
31 | } | |
32 | EXPECT | |
41432148 JH |
33 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. |
34 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. | |
f0df466a | 35 | ######## |
507b9800 | 36 | use warnings 'utf8'; |
9ae3ac1a KW |
37 | my $d7ff = uc(chr(0xD7FF)); |
38 | my $d800 = uc(chr(0xD800)); | |
39 | my $dfff = uc(chr(0xDFFF)); | |
40 | my $e000 = uc(chr(0xE000)); | |
41 | my $feff = uc(chr(0xFEFF)); | |
42 | my $fffd = uc(chr(0xFFFD)); | |
43 | my $fffe = uc(chr(0xFFFE)); | |
44 | my $ffff = uc(chr(0xFFFF)); | |
45 | my $hex4 = uc(chr(0x10000)); | |
46 | my $hex5 = uc(chr(0x100000)); | |
47 | my $maxm1 = uc(chr(0x10FFFE)); | |
48 | my $max = uc(chr(0x10FFFF)); | |
49 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 50 | no warnings 'utf8'; |
9ae3ac1a KW |
51 | my $d7ff = uc(chr(0xD7FF)); |
52 | my $d800 = uc(chr(0xD800)); | |
53 | my $dfff = uc(chr(0xDFFF)); | |
54 | my $e000 = uc(chr(0xE000)); | |
55 | my $feff = uc(chr(0xFEFF)); | |
56 | my $fffd = uc(chr(0xFFFD)); | |
57 | my $fffe = uc(chr(0xFFFE)); | |
58 | my $ffff = uc(chr(0xFFFF)); | |
59 | my $hex4 = uc(chr(0x10000)); | |
60 | my $hex5 = uc(chr(0x100000)); | |
61 | my $maxm1 = uc(chr(0x10FFFE)); | |
62 | my $max = uc(chr(0x10FFFF)); | |
63 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 64 | EXPECT |
9ae3ac1a KW |
65 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
66 | Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
67 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
507b9800 | 68 | ######## |
62961d2e | 69 | use warnings 'utf8'; |
9ae3ac1a KW |
70 | my $d7ff = lc pack("U", 0xD7FF); |
71 | my $d800 = lc pack("U", 0xD800); | |
72 | my $dfff = lc pack("U", 0xDFFF); | |
73 | my $e000 = lc pack("U", 0xE000); | |
74 | my $feff = lc pack("U", 0xFEFF); | |
75 | my $fffd = lc pack("U", 0xFFFD); | |
76 | my $fffe = lc pack("U", 0xFFFE); | |
77 | my $ffff = lc pack("U", 0xFFFF); | |
78 | my $hex4 = lc pack("U", 0x10000); | |
79 | my $hex5 = lc pack("U", 0x100000); | |
80 | my $maxm1 = lc pack("U", 0x10FFFE); | |
81 | my $max = lc pack("U", 0x10FFFF); | |
82 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 83 | no warnings 'utf8'; |
9ae3ac1a KW |
84 | my $d7ff = lc pack("U", 0xD7FF); |
85 | my $d800 = lc pack("U", 0xD800); | |
86 | my $dfff = lc pack("U", 0xDFFF); | |
87 | my $e000 = lc pack("U", 0xE000); | |
88 | my $feff = lc pack("U", 0xFEFF); | |
89 | my $fffd = lc pack("U", 0xFFFD); | |
90 | my $fffe = lc pack("U", 0xFFFE); | |
91 | my $ffff = lc pack("U", 0xFFFF); | |
92 | my $hex4 = lc pack("U", 0x10000); | |
93 | my $hex5 = lc pack("U", 0x100000); | |
94 | my $maxm1 = lc pack("U", 0x10FFFE); | |
95 | my $max = lc pack("U", 0x10FFFF); | |
96 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 97 | EXPECT |
9ae3ac1a KW |
98 | Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
99 | Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
100 | Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
62961d2e JH |
101 | ######## |
102 | use warnings 'utf8'; | |
9ae3ac1a KW |
103 | my $d7ff = ucfirst "\x{D7FF}"; |
104 | my $d800 = ucfirst "\x{D800}"; | |
105 | my $dfff = ucfirst "\x{DFFF}"; | |
106 | my $e000 = ucfirst "\x{E000}"; | |
107 | my $feff = ucfirst "\x{FEFF}"; | |
108 | my $fffd = ucfirst "\x{FFFD}"; | |
109 | my $fffe = ucfirst "\x{FFFE}"; | |
110 | my $ffff = ucfirst "\x{FFFF}"; | |
111 | my $hex4 = ucfirst "\x{10000}"; | |
112 | my $hex5 = ucfirst "\x{100000}"; | |
113 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
114 | my $max = ucfirst "\x{10FFFF}"; | |
115 | my $nonUnicode = ucfirst "\x{110000}"; | |
62961d2e | 116 | no warnings 'utf8'; |
9ae3ac1a KW |
117 | my $d7ff = ucfirst "\x{D7FF}"; |
118 | my $d800 = ucfirst "\x{D800}"; | |
119 | my $dfff = ucfirst "\x{DFFF}"; | |
120 | my $e000 = ucfirst "\x{E000}"; | |
121 | my $feff = ucfirst "\x{FEFF}"; | |
122 | my $fffd = ucfirst "\x{FFFD}"; | |
123 | my $fffe = ucfirst "\x{FFFE}"; | |
124 | my $ffff = ucfirst "\x{FFFF}"; | |
125 | my $hex4 = ucfirst "\x{10000}"; | |
126 | my $hex5 = ucfirst "\x{100000}"; | |
127 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
128 | my $max = ucfirst "\x{10FFFF}"; | |
129 | my $nonUnicode = ucfirst "\x{110000}"; | |
130 | EXPECT | |
131 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. | |
132 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
133 | Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
134 | ######## | |
135 | use warnings 'utf8'; | |
136 | chr(0xD7FF) =~ /\p{Any}/; | |
137 | chr(0xD800) =~ /\p{Any}/; | |
138 | chr(0xDFFF) =~ /\p{Any}/; | |
139 | chr(0xE000) =~ /\p{Any}/; | |
140 | chr(0xFEFF) =~ /\p{Any}/; | |
141 | chr(0xFFFD) =~ /\p{Any}/; | |
142 | chr(0xFFFE) =~ /\p{Any}/; | |
143 | chr(0xFFFF) =~ /\p{Any}/; | |
144 | chr(0x10000) =~ /\p{Any}/; | |
145 | chr(0x100000) =~ /\p{Any}/; | |
146 | chr(0x10FFFE) =~ /\p{Any}/; | |
147 | chr(0x10FFFF) =~ /\p{Any}/; | |
148 | chr(0x110000) =~ /\p{Any}/; | |
149 | no warnings 'utf8'; | |
150 | chr(0xD7FF) =~ /\p{Any}/; | |
151 | chr(0xD800) =~ /\p{Any}/; | |
152 | chr(0xDFFF) =~ /\p{Any}/; | |
153 | chr(0xE000) =~ /\p{Any}/; | |
154 | chr(0xFEFF) =~ /\p{Any}/; | |
155 | chr(0xFFFD) =~ /\p{Any}/; | |
156 | chr(0xFFFE) =~ /\p{Any}/; | |
157 | chr(0xFFFF) =~ /\p{Any}/; | |
158 | chr(0x10000) =~ /\p{Any}/; | |
159 | chr(0x100000) =~ /\p{Any}/; | |
160 | chr(0x10FFFE) =~ /\p{Any}/; | |
161 | chr(0x10FFFF) =~ /\p{Any}/; | |
162 | chr(0x110000) =~ /\p{Any}/; | |
163 | EXPECT | |
164 | Code point 0x110000 is not Unicode, no properties match it; all inverse properties do at - line 14. | |
165 | ######## | |
166 | require "../test.pl"; | |
167 | use warnings 'utf8'; | |
168 | my $file = tempfile(); | |
169 | open(my $fh, "+>:utf8", $file); | |
170 | print $fh "\x{D7FF}", "\n"; | |
171 | print $fh "\x{D800}", "\n"; | |
172 | print $fh "\x{DFFF}", "\n"; | |
173 | print $fh "\x{E000}", "\n"; | |
174 | print $fh "\x{FDCF}", "\n"; | |
175 | print $fh "\x{FDD0}", "\n"; | |
176 | print $fh "\x{FDEF}", "\n"; | |
177 | print $fh "\x{FDF0}", "\n"; | |
178 | print $fh "\x{FEFF}", "\n"; | |
179 | print $fh "\x{FFFD}", "\n"; | |
180 | print $fh "\x{FFFE}", "\n"; | |
181 | print $fh "\x{FFFF}", "\n"; | |
182 | print $fh "\x{10000}", "\n"; | |
183 | print $fh "\x{1FFFE}", "\n"; | |
184 | print $fh "\x{1FFFF}", "\n"; | |
185 | print $fh "\x{2FFFE}", "\n"; | |
186 | print $fh "\x{2FFFF}", "\n"; | |
187 | print $fh "\x{3FFFE}", "\n"; | |
188 | print $fh "\x{3FFFF}", "\n"; | |
189 | print $fh "\x{4FFFE}", "\n"; | |
190 | print $fh "\x{4FFFF}", "\n"; | |
191 | print $fh "\x{5FFFE}", "\n"; | |
192 | print $fh "\x{5FFFF}", "\n"; | |
193 | print $fh "\x{6FFFE}", "\n"; | |
194 | print $fh "\x{6FFFF}", "\n"; | |
195 | print $fh "\x{7FFFE}", "\n"; | |
196 | print $fh "\x{7FFFF}", "\n"; | |
197 | print $fh "\x{8FFFE}", "\n"; | |
198 | print $fh "\x{8FFFF}", "\n"; | |
199 | print $fh "\x{9FFFE}", "\n"; | |
200 | print $fh "\x{9FFFF}", "\n"; | |
201 | print $fh "\x{AFFFE}", "\n"; | |
202 | print $fh "\x{AFFFF}", "\n"; | |
203 | print $fh "\x{BFFFE}", "\n"; | |
204 | print $fh "\x{BFFFF}", "\n"; | |
205 | print $fh "\x{CFFFE}", "\n"; | |
206 | print $fh "\x{CFFFF}", "\n"; | |
207 | print $fh "\x{DFFFE}", "\n"; | |
208 | print $fh "\x{DFFFF}", "\n"; | |
209 | print $fh "\x{EFFFE}", "\n"; | |
210 | print $fh "\x{EFFFF}", "\n"; | |
211 | print $fh "\x{FFFFE}", "\n"; | |
212 | print $fh "\x{FFFFF}", "\n"; | |
213 | print $fh "\x{100000}", "\n"; | |
214 | print $fh "\x{10FFFE}", "\n"; | |
215 | print $fh "\x{10FFFF}", "\n"; | |
216 | print $fh "\x{110000}", "\n"; | |
217 | close $fh; | |
218 | EXPECT | |
219 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
220 | Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. | |
221 | Unicode non-character U+FDD0 is illegal for open interchange at - line 10. | |
222 | Unicode non-character U+FDEF is illegal for open interchange at - line 11. | |
223 | Unicode non-character U+FFFE is illegal for open interchange at - line 15. | |
224 | Unicode non-character U+FFFF is illegal for open interchange at - line 16. | |
225 | Unicode non-character U+1FFFE is illegal for open interchange at - line 18. | |
226 | Unicode non-character U+1FFFF is illegal for open interchange at - line 19. | |
227 | Unicode non-character U+2FFFE is illegal for open interchange at - line 20. | |
228 | Unicode non-character U+2FFFF is illegal for open interchange at - line 21. | |
229 | Unicode non-character U+3FFFE is illegal for open interchange at - line 22. | |
230 | Unicode non-character U+3FFFF is illegal for open interchange at - line 23. | |
231 | Unicode non-character U+4FFFE is illegal for open interchange at - line 24. | |
232 | Unicode non-character U+4FFFF is illegal for open interchange at - line 25. | |
233 | Unicode non-character U+5FFFE is illegal for open interchange at - line 26. | |
234 | Unicode non-character U+5FFFF is illegal for open interchange at - line 27. | |
235 | Unicode non-character U+6FFFE is illegal for open interchange at - line 28. | |
236 | Unicode non-character U+6FFFF is illegal for open interchange at - line 29. | |
237 | Unicode non-character U+7FFFE is illegal for open interchange at - line 30. | |
238 | Unicode non-character U+7FFFF is illegal for open interchange at - line 31. | |
239 | Unicode non-character U+8FFFE is illegal for open interchange at - line 32. | |
240 | Unicode non-character U+8FFFF is illegal for open interchange at - line 33. | |
241 | Unicode non-character U+9FFFE is illegal for open interchange at - line 34. | |
242 | Unicode non-character U+9FFFF is illegal for open interchange at - line 35. | |
243 | Unicode non-character U+AFFFE is illegal for open interchange at - line 36. | |
244 | Unicode non-character U+AFFFF is illegal for open interchange at - line 37. | |
245 | Unicode non-character U+BFFFE is illegal for open interchange at - line 38. | |
246 | Unicode non-character U+BFFFF is illegal for open interchange at - line 39. | |
247 | Unicode non-character U+CFFFE is illegal for open interchange at - line 40. | |
248 | Unicode non-character U+CFFFF is illegal for open interchange at - line 41. | |
249 | Unicode non-character U+DFFFE is illegal for open interchange at - line 42. | |
250 | Unicode non-character U+DFFFF is illegal for open interchange at - line 43. | |
251 | Unicode non-character U+EFFFE is illegal for open interchange at - line 44. | |
252 | Unicode non-character U+EFFFF is illegal for open interchange at - line 45. | |
253 | Unicode non-character U+FFFFE is illegal for open interchange at - line 46. | |
254 | Unicode non-character U+FFFFF is illegal for open interchange at - line 47. | |
255 | Unicode non-character U+10FFFE is illegal for open interchange at - line 49. | |
256 | Unicode non-character U+10FFFF is illegal for open interchange at - line 50. | |
257 | Code point 0x110000 is not Unicode, may not be portable at - line 51. | |
258 | ######## | |
259 | require "../test.pl"; | |
260 | no warnings 'utf8'; | |
261 | my $file = tempfile(); | |
262 | open(my $fh, "+>:utf8", $file); | |
263 | print $fh "\x{D7FF}", "\n"; | |
264 | print $fh "\x{D800}", "\n"; | |
265 | print $fh "\x{DFFF}", "\n"; | |
266 | print $fh "\x{E000}", "\n"; | |
267 | print $fh "\x{FDCF}", "\n"; | |
268 | print $fh "\x{FDD0}", "\n"; | |
269 | print $fh "\x{FDEF}", "\n"; | |
270 | print $fh "\x{FDF0}", "\n"; | |
271 | print $fh "\x{FEFF}", "\n"; | |
272 | print $fh "\x{FFFD}", "\n"; | |
273 | print $fh "\x{FFFE}", "\n"; | |
274 | print $fh "\x{FFFF}", "\n"; | |
275 | print $fh "\x{10000}", "\n"; | |
276 | print $fh "\x{1FFFE}", "\n"; | |
277 | print $fh "\x{1FFFF}", "\n"; | |
278 | print $fh "\x{2FFFE}", "\n"; | |
279 | print $fh "\x{2FFFF}", "\n"; | |
280 | print $fh "\x{3FFFE}", "\n"; | |
281 | print $fh "\x{3FFFF}", "\n"; | |
282 | print $fh "\x{4FFFE}", "\n"; | |
283 | print $fh "\x{4FFFF}", "\n"; | |
284 | print $fh "\x{5FFFE}", "\n"; | |
285 | print $fh "\x{5FFFF}", "\n"; | |
286 | print $fh "\x{6FFFE}", "\n"; | |
287 | print $fh "\x{6FFFF}", "\n"; | |
288 | print $fh "\x{7FFFE}", "\n"; | |
289 | print $fh "\x{7FFFF}", "\n"; | |
290 | print $fh "\x{8FFFE}", "\n"; | |
291 | print $fh "\x{8FFFF}", "\n"; | |
292 | print $fh "\x{9FFFE}", "\n"; | |
293 | print $fh "\x{9FFFF}", "\n"; | |
294 | print $fh "\x{AFFFE}", "\n"; | |
295 | print $fh "\x{AFFFF}", "\n"; | |
296 | print $fh "\x{BFFFE}", "\n"; | |
297 | print $fh "\x{BFFFF}", "\n"; | |
298 | print $fh "\x{CFFFE}", "\n"; | |
299 | print $fh "\x{CFFFF}", "\n"; | |
300 | print $fh "\x{DFFFE}", "\n"; | |
301 | print $fh "\x{DFFFF}", "\n"; | |
302 | print $fh "\x{EFFFE}", "\n"; | |
303 | print $fh "\x{EFFFF}", "\n"; | |
304 | print $fh "\x{FFFFE}", "\n"; | |
305 | print $fh "\x{FFFFF}", "\n"; | |
306 | print $fh "\x{100000}", "\n"; | |
307 | print $fh "\x{10FFFE}", "\n"; | |
308 | print $fh "\x{10FFFF}", "\n"; | |
309 | print $fh "\x{110000}", "\n"; | |
310 | close $fh; | |
62961d2e | 311 | EXPECT |