Commit | Line | Data |
---|---|---|
f0df466a JH |
1 | |
2 | utf8.c AOK | |
3 | ||
4b88fb76 | 4 | [utf8_to_uvchr_buf] |
f0df466a JH |
5 | Malformed UTF-8 character |
6 | my $a = ord "\x80" ; | |
7 | ||
8 | Malformed UTF-8 character | |
9 | my $a = ord "\xf080" ; | |
10 | <<<<<< this warning can't be easily triggered from perl anymore | |
11 | ||
12 | [utf16_to_utf8] | |
13 | Malformed UTF-16 surrogate | |
93f09d7b | 14 | <<<<<< Add a test when something actually calls utf16_to_utf8 |
f0df466a JH |
15 | |
16 | __END__ | |
4b88fb76 | 17 | # utf8.c [utf8_to_uvchr_buf] -W |
6cdc5cd8 | 18 | # NAME Malformed under 'use utf8' in double-quoted string |
f0df466a JH |
19 | BEGIN { |
20 | if (ord('A') == 193) { | |
72b4e0d1 | 21 | print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; |
f0df466a JH |
22 | exit 0; |
23 | } | |
24 | } | |
25 | use utf8 ; | |
6cdc5cd8 | 26 | no warnings; # Malformed is a fatal error, so gets output anyway. |
f0df466a | 27 | my $a = "snøstorm" ; |
6cdc5cd8 KW |
28 | EXPECT |
29 | Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 10. | |
30 | Malformed UTF-8 character (fatal) at - line 10. | |
31 | ######## | |
32 | # NAME Malformed under 'use utf8' in single-quoted string | |
33 | BEGIN { | |
34 | if (ord('A') == 193) { | |
35 | print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; | |
36 | exit 0; | |
37 | } | |
f0df466a | 38 | } |
6cdc5cd8 KW |
39 | use utf8 ; |
40 | no warnings; # Malformed is a fatal error, so gets output anyway. | |
41 | my $a = 'snøstorm' ; | |
f0df466a | 42 | EXPECT |
7cf8d05d | 43 | Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 9. |
6cdc5cd8 | 44 | Malformed UTF-8 character (fatal) at - line 9. |
f0df466a | 45 | ######## |
507b9800 | 46 | use warnings 'utf8'; |
9ae3ac1a KW |
47 | my $d7ff = uc(chr(0xD7FF)); |
48 | my $d800 = uc(chr(0xD800)); | |
49 | my $dfff = uc(chr(0xDFFF)); | |
50 | my $e000 = uc(chr(0xE000)); | |
51 | my $feff = uc(chr(0xFEFF)); | |
52 | my $fffd = uc(chr(0xFFFD)); | |
53 | my $fffe = uc(chr(0xFFFE)); | |
54 | my $ffff = uc(chr(0xFFFF)); | |
55 | my $hex4 = uc(chr(0x10000)); | |
56 | my $hex5 = uc(chr(0x100000)); | |
57 | my $maxm1 = uc(chr(0x10FFFE)); | |
58 | my $max = uc(chr(0x10FFFF)); | |
59 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 60 | no warnings 'utf8'; |
9ae3ac1a KW |
61 | my $d7ff = uc(chr(0xD7FF)); |
62 | my $d800 = uc(chr(0xD800)); | |
63 | my $dfff = uc(chr(0xDFFF)); | |
64 | my $e000 = uc(chr(0xE000)); | |
65 | my $feff = uc(chr(0xFEFF)); | |
66 | my $fffd = uc(chr(0xFFFD)); | |
67 | my $fffe = uc(chr(0xFFFE)); | |
68 | my $ffff = uc(chr(0xFFFF)); | |
69 | my $hex4 = uc(chr(0x10000)); | |
70 | my $hex5 = uc(chr(0x100000)); | |
71 | my $maxm1 = uc(chr(0x10FFFE)); | |
72 | my $max = uc(chr(0x10FFFF)); | |
73 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 74 | EXPECT |
9ae3ac1a KW |
75 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
76 | Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
77 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
507b9800 | 78 | ######## |
62961d2e | 79 | use warnings 'utf8'; |
8457b38f KW |
80 | my $d800 = uc(chr(0xD800)); |
81 | my $nonUnicode = uc(chr(0x110000)); | |
82 | no warnings 'surrogate'; | |
83 | my $d800 = uc(chr(0xD800)); | |
84 | my $nonUnicode = uc(chr(0x110000)); | |
85 | EXPECT | |
86 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
87 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
88 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. | |
89 | ######## | |
90 | use warnings 'utf8'; | |
91 | my $d800 = uc(chr(0xD800)); | |
92 | my $nonUnicode = uc(chr(0x110000)); | |
8457b38f KW |
93 | no warnings 'non_unicode'; |
94 | my $d800 = uc(chr(0xD800)); | |
95 | my $nonUnicode = uc(chr(0x110000)); | |
8457b38f KW |
96 | EXPECT |
97 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
98 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
9415f659 KW |
99 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. |
100 | ######## | |
9415f659 | 101 | use warnings 'utf8'; |
51099b64 | 102 | my $big_nonUnicode = uc(chr(0x7fff_ffff)); |
9415f659 | 103 | no warnings 'non_unicode'; |
51099b64 | 104 | my $big_nonUnicode = uc(chr(0x7fff_ffff)); |
9415f659 | 105 | EXPECT |
51099b64 | 106 | Operation "uc" returns its argument for non-Unicode code point 0x7FFFFFFF at - line 2. |
8457b38f KW |
107 | ######## |
108 | use warnings 'utf8'; | |
9ae3ac1a KW |
109 | my $d7ff = lc pack("U", 0xD7FF); |
110 | my $d800 = lc pack("U", 0xD800); | |
111 | my $dfff = lc pack("U", 0xDFFF); | |
112 | my $e000 = lc pack("U", 0xE000); | |
113 | my $feff = lc pack("U", 0xFEFF); | |
114 | my $fffd = lc pack("U", 0xFFFD); | |
115 | my $fffe = lc pack("U", 0xFFFE); | |
116 | my $ffff = lc pack("U", 0xFFFF); | |
117 | my $hex4 = lc pack("U", 0x10000); | |
118 | my $hex5 = lc pack("U", 0x100000); | |
119 | my $maxm1 = lc pack("U", 0x10FFFE); | |
120 | my $max = lc pack("U", 0x10FFFF); | |
121 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 122 | no warnings 'utf8'; |
9ae3ac1a KW |
123 | my $d7ff = lc pack("U", 0xD7FF); |
124 | my $d800 = lc pack("U", 0xD800); | |
125 | my $dfff = lc pack("U", 0xDFFF); | |
126 | my $e000 = lc pack("U", 0xE000); | |
127 | my $feff = lc pack("U", 0xFEFF); | |
128 | my $fffd = lc pack("U", 0xFFFD); | |
129 | my $fffe = lc pack("U", 0xFFFE); | |
130 | my $ffff = lc pack("U", 0xFFFF); | |
131 | my $hex4 = lc pack("U", 0x10000); | |
132 | my $hex5 = lc pack("U", 0x100000); | |
133 | my $maxm1 = lc pack("U", 0x10FFFE); | |
134 | my $max = lc pack("U", 0x10FFFF); | |
135 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 136 | EXPECT |
9ae3ac1a KW |
137 | Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
138 | Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
139 | Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
62961d2e JH |
140 | ######## |
141 | use warnings 'utf8'; | |
9ae3ac1a KW |
142 | my $d7ff = ucfirst "\x{D7FF}"; |
143 | my $d800 = ucfirst "\x{D800}"; | |
144 | my $dfff = ucfirst "\x{DFFF}"; | |
145 | my $e000 = ucfirst "\x{E000}"; | |
146 | my $feff = ucfirst "\x{FEFF}"; | |
147 | my $fffd = ucfirst "\x{FFFD}"; | |
148 | my $fffe = ucfirst "\x{FFFE}"; | |
149 | my $ffff = ucfirst "\x{FFFF}"; | |
150 | my $hex4 = ucfirst "\x{10000}"; | |
151 | my $hex5 = ucfirst "\x{100000}"; | |
152 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
153 | my $max = ucfirst "\x{10FFFF}"; | |
154 | my $nonUnicode = ucfirst "\x{110000}"; | |
62961d2e | 155 | no warnings 'utf8'; |
9ae3ac1a KW |
156 | my $d7ff = ucfirst "\x{D7FF}"; |
157 | my $d800 = ucfirst "\x{D800}"; | |
158 | my $dfff = ucfirst "\x{DFFF}"; | |
159 | my $e000 = ucfirst "\x{E000}"; | |
160 | my $feff = ucfirst "\x{FEFF}"; | |
161 | my $fffd = ucfirst "\x{FFFD}"; | |
162 | my $fffe = ucfirst "\x{FFFE}"; | |
163 | my $ffff = ucfirst "\x{FFFF}"; | |
164 | my $hex4 = ucfirst "\x{10000}"; | |
165 | my $hex5 = ucfirst "\x{100000}"; | |
166 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
167 | my $max = ucfirst "\x{10FFFF}"; | |
168 | my $nonUnicode = ucfirst "\x{110000}"; | |
169 | EXPECT | |
170 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. | |
171 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
172 | Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
173 | ######## | |
2d88a86a | 174 | # NAME Matching \p{} against above-Unicode |
9ae3ac1a KW |
175 | use warnings 'utf8'; |
176 | chr(0xD7FF) =~ /\p{Any}/; | |
177 | chr(0xD800) =~ /\p{Any}/; | |
178 | chr(0xDFFF) =~ /\p{Any}/; | |
179 | chr(0xE000) =~ /\p{Any}/; | |
180 | chr(0xFEFF) =~ /\p{Any}/; | |
181 | chr(0xFFFD) =~ /\p{Any}/; | |
182 | chr(0xFFFE) =~ /\p{Any}/; | |
183 | chr(0xFFFF) =~ /\p{Any}/; | |
184 | chr(0x10000) =~ /\p{Any}/; | |
185 | chr(0x100000) =~ /\p{Any}/; | |
186 | chr(0x10FFFE) =~ /\p{Any}/; | |
187 | chr(0x10FFFF) =~ /\p{Any}/; | |
2d88a86a KW |
188 | chr(0x110000) =~ /[\p{Any}]/; |
189 | chr(0x110001) =~ /[\w\p{Any}]/; | |
190 | chr(0x10FFFF) =~ /\p{All}/; | |
191 | chr(0x110002) =~ /[\w\p{All}]/; | |
192 | chr(0x110003) =~ /[\p{XPosixWord}]/; | |
193 | chr(0x110004) =~ /[\P{XPosixWord}]/; | |
194 | chr(0x110005) =~ /^[\p{Unassigned}]/; | |
195 | chr(0x110006) =~ /^[\P{Unassigned}]/; | |
196 | # Only Unicode properties give non-Unicode warnings, and only those properties | |
197 | # which do match above Unicode; and not when something else in the class | |
198 | # matches above Unicode. Below we test three ways where something outside the | |
199 | # property may match non-Unicode: a code point above it, a class \S that we | |
200 | # know at compile time doesn't, and a class \W whose values aren't (at the time | |
201 | # of this writing) specified at compile time, but which wouldn't match | |
5073ffbd KW |
202 | chr(0x110050) =~ /\w/; |
203 | chr(0x110051) =~ /\W/; | |
204 | chr(0x110052) =~ /\d/; | |
205 | chr(0x110053) =~ /\D/; | |
206 | chr(0x110054) =~ /\s/; | |
207 | chr(0x110055) =~ /\S/; | |
208 | chr(0x110056) =~ /[[:word:]]/; | |
209 | chr(0x110057) =~ /[[:^word:]]/; | |
210 | chr(0x110058) =~ /[[:alnum:]]/; | |
211 | chr(0x110059) =~ /[[:^alnum:]]/; | |
212 | chr(0x11005A) =~ /[[:space:]]/; | |
213 | chr(0x11005B) =~ /[[:^space:]]/; | |
214 | chr(0x11005C) =~ /[[:digit:]]/; | |
215 | chr(0x11005D) =~ /[[:^digit:]]/; | |
216 | chr(0x11005E) =~ /[[:alpha:]]/; | |
217 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
218 | chr(0x110060) =~ /[[:ascii:]]/; | |
219 | chr(0x110061) =~ /[[:^ascii:]]/; | |
220 | chr(0x110062) =~ /[[:cntrl:]]/; | |
221 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
222 | chr(0x110064) =~ /[[:graph:]]/; | |
223 | chr(0x110065) =~ /[[:^graph:]]/; | |
224 | chr(0x110066) =~ /[[:lower:]]/; | |
225 | chr(0x110067) =~ /[[:^lower:]]/; | |
226 | chr(0x110068) =~ /[[:print:]]/; | |
227 | chr(0x110069) =~ /[[:^print:]]/; | |
228 | chr(0x11006A) =~ /[[:punct:]]/; | |
229 | chr(0x11006B) =~ /[[:^punct:]]/; | |
230 | chr(0x11006C) =~ /[[:upper:]]/; | |
231 | chr(0x11006D) =~ /[[:^upper:]]/; | |
232 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
233 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
234 | chr(0x110070) =~ /[[:blank:]]/; | |
235 | chr(0x110071) =~ /[[:^blank:]]/; | |
2d88a86a KW |
236 | chr(0x111010) =~ /[\W\p{Unassigned}]/; |
237 | chr(0x111011) =~ /[\W\P{Unassigned}]/; | |
238 | chr(0x112010) =~ /[\S\p{Unassigned}]/; | |
239 | chr(0x112011) =~ /[\S\P{Unassigned}]/; | |
240 | chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; | |
241 | chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; | |
9ae3ac1a KW |
242 | no warnings 'utf8'; |
243 | chr(0xD7FF) =~ /\p{Any}/; | |
244 | chr(0xD800) =~ /\p{Any}/; | |
245 | chr(0xDFFF) =~ /\p{Any}/; | |
246 | chr(0xE000) =~ /\p{Any}/; | |
247 | chr(0xFEFF) =~ /\p{Any}/; | |
248 | chr(0xFFFD) =~ /\p{Any}/; | |
249 | chr(0xFFFE) =~ /\p{Any}/; | |
250 | chr(0xFFFF) =~ /\p{Any}/; | |
251 | chr(0x10000) =~ /\p{Any}/; | |
252 | chr(0x100000) =~ /\p{Any}/; | |
253 | chr(0x10FFFE) =~ /\p{Any}/; | |
254 | chr(0x10FFFF) =~ /\p{Any}/; | |
2d88a86a KW |
255 | chr(0x110000) =~ /[\p{Any}]/; |
256 | chr(0x110001) =~ /[\w\p{Any}]/; | |
257 | chr(0x10FFFF) =~ /\p{All}/; | |
258 | chr(0x110002) =~ /[\w\p{All}]/; | |
259 | chr(0x110003) =~ /[\p{XPosixWord}]/; | |
260 | chr(0x110004) =~ /[\P{XPosixWord}]/; | |
261 | chr(0x110005) =~ /^[\p{Unassigned}]/; | |
262 | chr(0x110006) =~ /^[\P{Unassigned}]/; | |
5073ffbd KW |
263 | chr(0x110050) =~ /\w/; |
264 | chr(0x110051) =~ /\W/; | |
265 | chr(0x110052) =~ /\d/; | |
266 | chr(0x110053) =~ /\D/; | |
267 | chr(0x110054) =~ /\s/; | |
268 | chr(0x110055) =~ /\S/; | |
269 | chr(0x110056) =~ /[[:word:]]/; | |
270 | chr(0x110057) =~ /[[:^word:]]/; | |
271 | chr(0x110058) =~ /[[:alnum:]]/; | |
272 | chr(0x110059) =~ /[[:^alnum:]]/; | |
273 | chr(0x11005A) =~ /[[:space:]]/; | |
274 | chr(0x11005B) =~ /[[:^space:]]/; | |
275 | chr(0x11005C) =~ /[[:digit:]]/; | |
276 | chr(0x11005D) =~ /[[:^digit:]]/; | |
277 | chr(0x11005E) =~ /[[:alpha:]]/; | |
278 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
279 | chr(0x110060) =~ /[[:ascii:]]/; | |
280 | chr(0x110061) =~ /[[:^ascii:]]/; | |
281 | chr(0x110062) =~ /[[:cntrl:]]/; | |
282 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
283 | chr(0x110064) =~ /[[:graph:]]/; | |
284 | chr(0x110065) =~ /[[:^graph:]]/; | |
285 | chr(0x110066) =~ /[[:lower:]]/; | |
286 | chr(0x110067) =~ /[[:^lower:]]/; | |
287 | chr(0x110068) =~ /[[:print:]]/; | |
288 | chr(0x110069) =~ /[[:^print:]]/; | |
289 | chr(0x11006A) =~ /[[:punct:]]/; | |
290 | chr(0x11006B) =~ /[[:^punct:]]/; | |
291 | chr(0x11006C) =~ /[[:upper:]]/; | |
292 | chr(0x11006D) =~ /[[:^upper:]]/; | |
293 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
294 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
295 | chr(0x110070) =~ /[[:blank:]]/; | |
296 | chr(0x110071) =~ /[[:^blank:]]/; | |
2d88a86a KW |
297 | chr(0x111010) =~ /[\W\p{Unassigned}]/; |
298 | chr(0x111011) =~ /[\W\P{Unassigned}]/; | |
299 | chr(0x112010) =~ /[\S\p{Unassigned}]/; | |
300 | chr(0x112011) =~ /[\S\P{Unassigned}]/; | |
301 | chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; | |
302 | chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; | |
9ae3ac1a | 303 | EXPECT |
2d88a86a KW |
304 | Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20. |
305 | Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21. | |
9ae3ac1a | 306 | ######## |
e9b08962 | 307 | # NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) |
2d88a86a KW |
308 | # Now have to make FATAL to guarantee being output |
309 | use warnings FATAL => 'non_unicode'; | |
ae986089 KW |
310 | "\x{110000}" =~ /b?\p{Space}/; |
311 | EXPECT | |
2d88a86a | 312 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. |
ae986089 KW |
313 | ######## |
314 | # NAME Matching POSIX class property against above-Unicode code point doesn't output a warning | |
315 | use warnings 'non_unicode'; | |
2d88a86a | 316 | use warnings FATAL => 'non_unicode'; |
ae986089 KW |
317 | "\x{110000}" =~ /b?[[:space:]]/; |
318 | EXPECT | |
319 | ######## | |
8457b38f KW |
320 | use warnings 'utf8'; |
321 | chr(0x110000) =~ /\p{Any}/; | |
2d88a86a KW |
322 | ######## |
323 | # NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning | |
324 | use warnings qw(utf8 non_unicode); | |
325 | chr(0x110000) =~ /^\p{Unassigned}/; | |
8457b38f | 326 | no warnings 'non_unicode'; |
2d88a86a KW |
327 | chr(0x110001) =~ /\p{Unassigned}/; |
328 | use warnings 'non_unicode'; | |
329 | no warnings 'utf8'; | |
330 | chr(0x110002) =~ /\p{Unassigned}/; | |
8457b38f | 331 | EXPECT |
2d88a86a | 332 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2. |
8457b38f | 333 | ######## |
f2c2a6ab | 334 | # NAME optimizable regnode should still give non_unicode warnings when fatalized |
5073ffbd | 335 | use warnings 'utf8'; |
f2c2a6ab | 336 | use warnings FATAL => 'non_unicode'; |
845e7aa3 | 337 | chr(0x110000) =~ /\p{lb=cr}/; |
f2c2a6ab | 338 | EXPECT |
2d88a86a | 339 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. |
f2c2a6ab KW |
340 | ######## |
341 | # NAME optimizable regnode should not give non_unicode warnings when warnings are off | |
5073ffbd | 342 | no warnings 'non_unicode'; |
845e7aa3 | 343 | chr(0x110000) =~ /\p{lb=cr}/; |
5073ffbd | 344 | EXPECT |
5073ffbd | 345 | ######## |
2d88a86a KW |
346 | # NAME 'All' matches above-Unicode without any warning |
347 | use warnings qw(utf8 non_unicode); | |
348 | chr(0x110000) =~ /\p{All}/; | |
349 | EXPECT | |
350 | ######## | |
9ae3ac1a KW |
351 | require "../test.pl"; |
352 | use warnings 'utf8'; | |
a410ec23 | 353 | sub Is_Super { return '!utf8::Any' } |
88d45d28 KW |
354 | # The extra char is to avoid an optimization that avoids the problem when the |
355 | # property is the only non-latin1 char in a class | |
356 | print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; | |
a410ec23 KW |
357 | EXPECT |
358 | 1 | |
359 | ######## | |
360 | require "../test.pl"; | |
361 | use warnings 'utf8'; | |
9ae3ac1a KW |
362 | my $file = tempfile(); |
363 | open(my $fh, "+>:utf8", $file); | |
364 | print $fh "\x{D7FF}", "\n"; | |
365 | print $fh "\x{D800}", "\n"; | |
c87792c3 KW |
366 | print $fh "\x{D900}", "\n"; |
367 | print $fh "\x{DA00}", "\n"; | |
368 | print $fh "\x{DB00}", "\n"; | |
369 | print $fh "\x{DC00}", "\n"; | |
370 | print $fh "\x{DD00}", "\n"; | |
371 | print $fh "\x{DE00}", "\n"; | |
372 | print $fh "\x{DF00}", "\n"; | |
9ae3ac1a KW |
373 | print $fh "\x{DFFF}", "\n"; |
374 | print $fh "\x{E000}", "\n"; | |
375 | print $fh "\x{FDCF}", "\n"; | |
376 | print $fh "\x{FDD0}", "\n"; | |
c87792c3 | 377 | print $fh "\x{FDD1}", "\n"; |
9ae3ac1a KW |
378 | print $fh "\x{FDEF}", "\n"; |
379 | print $fh "\x{FDF0}", "\n"; | |
c87792c3 KW |
380 | print $fh "\x{FDFE}", "\n"; |
381 | print $fh "\x{FDFF}", "\n"; | |
382 | print $fh "\x{FE00}", "\n"; | |
9ae3ac1a KW |
383 | print $fh "\x{FEFF}", "\n"; |
384 | print $fh "\x{FFFD}", "\n"; | |
385 | print $fh "\x{FFFE}", "\n"; | |
386 | print $fh "\x{FFFF}", "\n"; | |
387 | print $fh "\x{10000}", "\n"; | |
c87792c3 | 388 | print $fh "\x{1FFFD}", "\n"; |
9ae3ac1a KW |
389 | print $fh "\x{1FFFE}", "\n"; |
390 | print $fh "\x{1FFFF}", "\n"; | |
c87792c3 KW |
391 | print $fh "\x{20000}", "\n"; |
392 | print $fh "\x{2FFFD}", "\n"; | |
9ae3ac1a KW |
393 | print $fh "\x{2FFFE}", "\n"; |
394 | print $fh "\x{2FFFF}", "\n"; | |
c87792c3 KW |
395 | print $fh "\x{30000}", "\n"; |
396 | print $fh "\x{3FFFD}", "\n"; | |
9ae3ac1a KW |
397 | print $fh "\x{3FFFE}", "\n"; |
398 | print $fh "\x{3FFFF}", "\n"; | |
c87792c3 KW |
399 | print $fh "\x{40000}", "\n"; |
400 | print $fh "\x{4FFFD}", "\n"; | |
9ae3ac1a KW |
401 | print $fh "\x{4FFFE}", "\n"; |
402 | print $fh "\x{4FFFF}", "\n"; | |
c87792c3 KW |
403 | print $fh "\x{50000}", "\n"; |
404 | print $fh "\x{5FFFD}", "\n"; | |
9ae3ac1a KW |
405 | print $fh "\x{5FFFE}", "\n"; |
406 | print $fh "\x{5FFFF}", "\n"; | |
c87792c3 KW |
407 | print $fh "\x{60000}", "\n"; |
408 | print $fh "\x{6FFFD}", "\n"; | |
9ae3ac1a KW |
409 | print $fh "\x{6FFFE}", "\n"; |
410 | print $fh "\x{6FFFF}", "\n"; | |
c87792c3 KW |
411 | print $fh "\x{70000}", "\n"; |
412 | print $fh "\x{7FFFD}", "\n"; | |
9ae3ac1a KW |
413 | print $fh "\x{7FFFE}", "\n"; |
414 | print $fh "\x{7FFFF}", "\n"; | |
c87792c3 KW |
415 | print $fh "\x{80000}", "\n"; |
416 | print $fh "\x{8FFFD}", "\n"; | |
9ae3ac1a KW |
417 | print $fh "\x{8FFFE}", "\n"; |
418 | print $fh "\x{8FFFF}", "\n"; | |
c87792c3 KW |
419 | print $fh "\x{90000}", "\n"; |
420 | print $fh "\x{9FFFD}", "\n"; | |
9ae3ac1a KW |
421 | print $fh "\x{9FFFE}", "\n"; |
422 | print $fh "\x{9FFFF}", "\n"; | |
c87792c3 KW |
423 | print $fh "\x{A0000}", "\n"; |
424 | print $fh "\x{AFFFD}", "\n"; | |
9ae3ac1a KW |
425 | print $fh "\x{AFFFE}", "\n"; |
426 | print $fh "\x{AFFFF}", "\n"; | |
c87792c3 KW |
427 | print $fh "\x{B0000}", "\n"; |
428 | print $fh "\x{BFFFD}", "\n"; | |
9ae3ac1a KW |
429 | print $fh "\x{BFFFE}", "\n"; |
430 | print $fh "\x{BFFFF}", "\n"; | |
c87792c3 KW |
431 | print $fh "\x{C0000}", "\n"; |
432 | print $fh "\x{CFFFD}", "\n"; | |
9ae3ac1a KW |
433 | print $fh "\x{CFFFE}", "\n"; |
434 | print $fh "\x{CFFFF}", "\n"; | |
c87792c3 KW |
435 | print $fh "\x{D0000}", "\n"; |
436 | print $fh "\x{DFFFD}", "\n"; | |
9ae3ac1a KW |
437 | print $fh "\x{DFFFE}", "\n"; |
438 | print $fh "\x{DFFFF}", "\n"; | |
c87792c3 KW |
439 | print $fh "\x{E0000}", "\n"; |
440 | print $fh "\x{EFFFD}", "\n"; | |
9ae3ac1a KW |
441 | print $fh "\x{EFFFE}", "\n"; |
442 | print $fh "\x{EFFFF}", "\n"; | |
c87792c3 KW |
443 | print $fh "\x{F0000}", "\n"; |
444 | print $fh "\x{FFFFD}", "\n"; | |
9ae3ac1a KW |
445 | print $fh "\x{FFFFE}", "\n"; |
446 | print $fh "\x{FFFFF}", "\n"; | |
447 | print $fh "\x{100000}", "\n"; | |
c87792c3 | 448 | print $fh "\x{10FFFD}", "\n"; |
9ae3ac1a KW |
449 | print $fh "\x{10FFFE}", "\n"; |
450 | print $fh "\x{10FFFF}", "\n"; | |
451 | print $fh "\x{110000}", "\n"; | |
c87792c3 KW |
452 | print $fh "\x{11FFFD}", "\n"; |
453 | print $fh "\x{11FFFE}", "\n"; | |
454 | print $fh "\x{11FFFF}", "\n"; | |
455 | print $fh "\x{120000}", "\n"; | |
9ae3ac1a KW |
456 | close $fh; |
457 | EXPECT | |
458 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
c87792c3 KW |
459 | Unicode surrogate U+D900 is illegal in UTF-8 at - line 7. |
460 | Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8. | |
461 | Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9. | |
462 | Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10. | |
463 | Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11. | |
464 | Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12. | |
465 | Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13. | |
466 | Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14. | |
467 | Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17. | |
468 | Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18. | |
469 | Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19. | |
470 | Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26. | |
471 | Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27. | |
472 | Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30. | |
473 | Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31. | |
474 | Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34. | |
475 | Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35. | |
476 | Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38. | |
477 | Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39. | |
478 | Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42. | |
479 | Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43. | |
480 | Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46. | |
481 | Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47. | |
482 | Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50. | |
483 | Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51. | |
484 | Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54. | |
485 | Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55. | |
486 | Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58. | |
487 | Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59. | |
488 | Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62. | |
489 | Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63. | |
490 | Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66. | |
491 | Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67. | |
492 | Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70. | |
493 | Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71. | |
494 | Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74. | |
495 | Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75. | |
496 | Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78. | |
497 | Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79. | |
498 | Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82. | |
499 | Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83. | |
500 | Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86. | |
501 | Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87. | |
502 | Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90. | |
503 | Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91. | |
504 | Code point 0x110000 is not Unicode, may not be portable in print at - line 92. | |
505 | Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93. | |
506 | Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94. | |
507 | Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95. | |
508 | Code point 0x120000 is not Unicode, may not be portable in print at - line 96. | |
9ae3ac1a KW |
509 | ######## |
510 | require "../test.pl"; | |
8457b38f KW |
511 | use warnings 'utf8'; |
512 | my $file = tempfile(); | |
513 | open(my $fh, "+>:utf8", $file); | |
514 | print $fh "\x{D800}", "\n"; | |
515 | print $fh "\x{FFFF}", "\n"; | |
516 | print $fh "\x{110000}", "\n"; | |
517 | close $fh; | |
518 | EXPECT | |
519 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
15ca5930 KW |
520 | Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6. |
521 | Code point 0x110000 is not Unicode, may not be portable in print at - line 7. | |
8457b38f KW |
522 | ######## |
523 | require "../test.pl"; | |
524 | use warnings 'utf8'; | |
525 | no warnings 'surrogate'; | |
526 | my $file = tempfile(); | |
527 | open(my $fh, "+>:utf8", $file); | |
528 | print $fh "\x{D800}", "\n"; | |
529 | print $fh "\x{FFFF}", "\n"; | |
530 | print $fh "\x{110000}", "\n"; | |
531 | close $fh; | |
532 | EXPECT | |
15ca5930 KW |
533 | Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. |
534 | Code point 0x110000 is not Unicode, may not be portable in print at - line 8. | |
8457b38f KW |
535 | ######## |
536 | require "../test.pl"; | |
537 | use warnings 'utf8'; | |
538 | no warnings 'nonchar'; | |
539 | my $file = tempfile(); | |
540 | open(my $fh, "+>:utf8", $file); | |
541 | print $fh "\x{D800}", "\n"; | |
542 | print $fh "\x{FFFF}", "\n"; | |
543 | print $fh "\x{110000}", "\n"; | |
544 | close $fh; | |
545 | EXPECT | |
546 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
15ca5930 | 547 | Code point 0x110000 is not Unicode, may not be portable in print at - line 8. |
8457b38f KW |
548 | ######## |
549 | require "../test.pl"; | |
550 | use warnings 'utf8'; | |
551 | no warnings 'non_unicode'; | |
552 | my $file = tempfile(); | |
553 | open(my $fh, "+>:utf8", $file); | |
554 | print $fh "\x{D800}", "\n"; | |
555 | print $fh "\x{FFFF}", "\n"; | |
556 | print $fh "\x{110000}", "\n"; | |
557 | close $fh; | |
558 | EXPECT | |
559 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
15ca5930 | 560 | Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7. |
8457b38f | 561 | ######## |
920e47bb AC |
562 | # NAME C<use warnings "nonchar"> works in isolation |
563 | require "../test.pl"; | |
564 | use warnings 'nonchar'; | |
565 | my $file = tempfile(); | |
566 | open(my $fh, "+>:utf8", $file); | |
567 | print $fh "\x{FFFF}", "\n"; | |
568 | close $fh; | |
569 | EXPECT | |
15ca5930 | 570 | Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5. |
920e47bb | 571 | ######## |
920e47bb AC |
572 | # NAME C<use warnings "surrogate"> works in isolation |
573 | require "../test.pl"; | |
574 | use warnings 'surrogate'; | |
575 | my $file = tempfile(); | |
576 | open(my $fh, "+>:utf8", $file); | |
577 | print $fh "\x{D800}", "\n"; | |
578 | close $fh; | |
579 | EXPECT | |
580 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
581 | ######## | |
920e47bb AC |
582 | # NAME C<use warnings "non_unicode"> works in isolation |
583 | require "../test.pl"; | |
584 | use warnings 'non_unicode'; | |
585 | my $file = tempfile(); | |
586 | open(my $fh, "+>:utf8", $file); | |
587 | print $fh "\x{110000}", "\n"; | |
588 | close $fh; | |
589 | EXPECT | |
15ca5930 | 590 | Code point 0x110000 is not Unicode, may not be portable in print at - line 5. |
920e47bb | 591 | ######## |
8457b38f | 592 | require "../test.pl"; |
9ae3ac1a KW |
593 | no warnings 'utf8'; |
594 | my $file = tempfile(); | |
595 | open(my $fh, "+>:utf8", $file); | |
596 | print $fh "\x{D7FF}", "\n"; | |
597 | print $fh "\x{D800}", "\n"; | |
598 | print $fh "\x{DFFF}", "\n"; | |
599 | print $fh "\x{E000}", "\n"; | |
600 | print $fh "\x{FDCF}", "\n"; | |
601 | print $fh "\x{FDD0}", "\n"; | |
602 | print $fh "\x{FDEF}", "\n"; | |
603 | print $fh "\x{FDF0}", "\n"; | |
604 | print $fh "\x{FEFF}", "\n"; | |
605 | print $fh "\x{FFFD}", "\n"; | |
606 | print $fh "\x{FFFE}", "\n"; | |
607 | print $fh "\x{FFFF}", "\n"; | |
608 | print $fh "\x{10000}", "\n"; | |
609 | print $fh "\x{1FFFE}", "\n"; | |
610 | print $fh "\x{1FFFF}", "\n"; | |
611 | print $fh "\x{2FFFE}", "\n"; | |
612 | print $fh "\x{2FFFF}", "\n"; | |
613 | print $fh "\x{3FFFE}", "\n"; | |
614 | print $fh "\x{3FFFF}", "\n"; | |
615 | print $fh "\x{4FFFE}", "\n"; | |
616 | print $fh "\x{4FFFF}", "\n"; | |
617 | print $fh "\x{5FFFE}", "\n"; | |
618 | print $fh "\x{5FFFF}", "\n"; | |
619 | print $fh "\x{6FFFE}", "\n"; | |
620 | print $fh "\x{6FFFF}", "\n"; | |
621 | print $fh "\x{7FFFE}", "\n"; | |
622 | print $fh "\x{7FFFF}", "\n"; | |
623 | print $fh "\x{8FFFE}", "\n"; | |
624 | print $fh "\x{8FFFF}", "\n"; | |
625 | print $fh "\x{9FFFE}", "\n"; | |
626 | print $fh "\x{9FFFF}", "\n"; | |
627 | print $fh "\x{AFFFE}", "\n"; | |
628 | print $fh "\x{AFFFF}", "\n"; | |
629 | print $fh "\x{BFFFE}", "\n"; | |
630 | print $fh "\x{BFFFF}", "\n"; | |
631 | print $fh "\x{CFFFE}", "\n"; | |
632 | print $fh "\x{CFFFF}", "\n"; | |
633 | print $fh "\x{DFFFE}", "\n"; | |
634 | print $fh "\x{DFFFF}", "\n"; | |
635 | print $fh "\x{EFFFE}", "\n"; | |
636 | print $fh "\x{EFFFF}", "\n"; | |
637 | print $fh "\x{FFFFE}", "\n"; | |
638 | print $fh "\x{FFFFF}", "\n"; | |
639 | print $fh "\x{100000}", "\n"; | |
640 | print $fh "\x{10FFFE}", "\n"; | |
641 | print $fh "\x{10FFFF}", "\n"; | |
642 | print $fh "\x{110000}", "\n"; | |
643 | close $fh; | |
62961d2e | 644 | EXPECT |
ab0b796c KW |
645 | ######## |
646 | # NAME Case change crosses 255/256 under non-UTF8 locale | |
ef9d5242 KW |
647 | require '../loc_tools.pl'; |
648 | unless (locales_enabled('LC_CTYPE')) { | |
649 | print("SKIPPED\n# locales not available\n"),exit; | |
650 | } | |
ab0b796c KW |
651 | eval { require POSIX; POSIX->import("locale_h") }; |
652 | if ($@) { | |
653 | print("SKIPPED\n# no POSIX\n"),exit; | |
654 | } | |
655 | use warnings 'locale'; | |
656 | use feature 'fc'; | |
657 | use locale; | |
658 | setlocale(&POSIX::LC_CTYPE, "C"); | |
659 | my $a; | |
660 | $a = lc("\x{178}"); | |
661 | $a = fc("\x{1E9E}"); | |
662 | $a = fc("\x{FB05}"); | |
663 | $a = uc("\x{FB00}"); | |
664 | $a = ucfirst("\x{149}"); | |
8bdce394 KW |
665 | $a = lcfirst("\x{178}"); |
666 | no warnings 'locale'; | |
667 | $a = lc("\x{178}"); | |
668 | $a = fc("\x{1E9E}"); | |
669 | $a = fc("\x{FB05}"); | |
670 | $a = uc("\x{FB00}"); | |
671 | $a = ucfirst("\x{149}"); | |
672 | $a = lcfirst("\x{178}"); | |
ab0b796c | 673 | EXPECT |
ef9d5242 KW |
674 | Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14. |
675 | Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15. | |
676 | Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16. | |
677 | Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17. | |
678 | Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18. | |
679 | Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19. | |
613abc6d KW |
680 | ######## |
681 | # NAME Wide character in non-UTF-8 locale | |
ef9d5242 KW |
682 | require '../loc_tools.pl'; |
683 | unless (locales_enabled('LC_CTYPE')) { | |
684 | print("SKIPPED\n# locales not available\n"),exit; | |
685 | } | |
613abc6d KW |
686 | eval { require POSIX; POSIX->import("locale_h") }; |
687 | if ($@) { | |
688 | print("SKIPPED\n# no POSIX\n"),exit; | |
689 | } | |
690 | use warnings 'locale'; | |
691 | use feature 'fc'; | |
692 | use locale; | |
693 | setlocale(&POSIX::LC_CTYPE, "C"); | |
694 | my $a; | |
695 | $a = lc("\x{100}"); | |
696 | $a = lcfirst("\x{101}"); | |
697 | $a = fc("\x{102}"); | |
698 | $a = uc("\x{103}"); | |
699 | $a = ucfirst("\x{104}"); | |
700 | no warnings 'locale'; | |
701 | $a = lc("\x{100}"); | |
702 | $a = lcfirst("\x{101}"); | |
703 | $a = fc("\x{102}"); | |
704 | $a = uc("\x{103}"); | |
705 | $a = ucfirst("\x{104}"); | |
706 | EXPECT | |
ef9d5242 KW |
707 | Wide character (U+100) in lc at - line 14. |
708 | Wide character (U+101) in lcfirst at - line 15. | |
709 | Wide character (U+102) in fc at - line 16. | |
710 | Wide character (U+103) in uc at - line 17. | |
711 | Wide character (U+104) in ucfirst at - line 18. | |
008e8e82 KW |
712 | ######## |
713 | # NAME Wide character in UTF-8 locale | |
714 | require '../loc_tools.pl'; | |
715 | unless (locales_enabled('LC_CTYPE')) { | |
716 | print("SKIPPED\n# locales not available\n"),exit; | |
717 | } | |
718 | eval { require POSIX; POSIX->import("locale_h") }; | |
719 | if ($@) { | |
720 | print("SKIPPED\n# no POSIX\n"),exit; | |
721 | } | |
722 | my @utf8_locales = find_utf8_ctype_locale(); | |
723 | unless (@utf8_locales) { | |
724 | print("SKIPPED\n# no UTF-8 locales\n"),exit; | |
725 | } | |
726 | use warnings 'locale'; | |
727 | use feature 'fc'; | |
728 | use locale; | |
729 | setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]); | |
730 | my $a; | |
731 | $a = lc("\x{100}"); | |
732 | $a = lcfirst("\x{101}"); | |
733 | $a = fc("\x{102}"); | |
734 | $a = uc("\x{103}"); | |
735 | $a = ucfirst("\x{104}"); | |
736 | EXPECT | |
760c7c2f | 737 | ######## |
710740a6 | 738 | # NAME [perl #127262] |
e88136ce KW |
739 | BEGIN{ |
740 | if (ord('A') == 193) { | |
741 | print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; | |
742 | exit 0; | |
743 | } | |
a8b2934d JH |
744 | use Config; |
745 | unless ($Double{double_style_ieee}) { | |
746 | print "SKIPPED\n# non-IEEE fp range."; | |
747 | exit 0; | |
748 | } | |
749 | {};$^H=eval'2**400'}Â | |
710740a6 | 750 | EXPECT |
a8b2934d | 751 | Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11. |