Commit | Line | Data |
---|---|---|
f0df466a JH |
1 | |
2 | utf8.c AOK | |
3 | ||
4b88fb76 | 4 | [utf8_to_uvchr_buf] |
f0df466a JH |
5 | Malformed UTF-8 character |
6 | my $a = ord "\x80" ; | |
7 | ||
8 | Malformed UTF-8 character | |
9 | my $a = ord "\xf080" ; | |
10 | <<<<<< this warning can't be easily triggered from perl anymore | |
11 | ||
12 | [utf16_to_utf8] | |
13 | Malformed UTF-16 surrogate | |
93f09d7b | 14 | <<<<<< Add a test when something actually calls utf16_to_utf8 |
f0df466a JH |
15 | |
16 | __END__ | |
4b88fb76 | 17 | # utf8.c [utf8_to_uvchr_buf] -W |
f0df466a JH |
18 | BEGIN { |
19 | if (ord('A') == 193) { | |
72b4e0d1 | 20 | print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings."; |
f0df466a JH |
21 | exit 0; |
22 | } | |
23 | } | |
24 | use utf8 ; | |
25 | my $a = "snøstorm" ; | |
26 | { | |
27 | no warnings 'utf8' ; | |
28 | my $a = "snøstorm"; | |
29 | use warnings 'utf8' ; | |
30 | my $a = "snøstorm"; | |
31 | } | |
32 | EXPECT | |
41432148 JH |
33 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. |
34 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. | |
f0df466a | 35 | ######## |
507b9800 | 36 | use warnings 'utf8'; |
9ae3ac1a KW |
37 | my $d7ff = uc(chr(0xD7FF)); |
38 | my $d800 = uc(chr(0xD800)); | |
39 | my $dfff = uc(chr(0xDFFF)); | |
40 | my $e000 = uc(chr(0xE000)); | |
41 | my $feff = uc(chr(0xFEFF)); | |
42 | my $fffd = uc(chr(0xFFFD)); | |
43 | my $fffe = uc(chr(0xFFFE)); | |
44 | my $ffff = uc(chr(0xFFFF)); | |
45 | my $hex4 = uc(chr(0x10000)); | |
46 | my $hex5 = uc(chr(0x100000)); | |
47 | my $maxm1 = uc(chr(0x10FFFE)); | |
48 | my $max = uc(chr(0x10FFFF)); | |
49 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 50 | no warnings 'utf8'; |
9ae3ac1a KW |
51 | my $d7ff = uc(chr(0xD7FF)); |
52 | my $d800 = uc(chr(0xD800)); | |
53 | my $dfff = uc(chr(0xDFFF)); | |
54 | my $e000 = uc(chr(0xE000)); | |
55 | my $feff = uc(chr(0xFEFF)); | |
56 | my $fffd = uc(chr(0xFFFD)); | |
57 | my $fffe = uc(chr(0xFFFE)); | |
58 | my $ffff = uc(chr(0xFFFF)); | |
59 | my $hex4 = uc(chr(0x10000)); | |
60 | my $hex5 = uc(chr(0x100000)); | |
61 | my $maxm1 = uc(chr(0x10FFFE)); | |
62 | my $max = uc(chr(0x10FFFF)); | |
63 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 64 | EXPECT |
9ae3ac1a KW |
65 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
66 | Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
67 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
507b9800 | 68 | ######## |
62961d2e | 69 | use warnings 'utf8'; |
8457b38f KW |
70 | my $d800 = uc(chr(0xD800)); |
71 | my $nonUnicode = uc(chr(0x110000)); | |
72 | no warnings 'surrogate'; | |
73 | my $d800 = uc(chr(0xD800)); | |
74 | my $nonUnicode = uc(chr(0x110000)); | |
75 | EXPECT | |
76 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
77 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
78 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. | |
79 | ######## | |
80 | use warnings 'utf8'; | |
81 | my $d800 = uc(chr(0xD800)); | |
82 | my $nonUnicode = uc(chr(0x110000)); | |
8457b38f KW |
83 | no warnings 'non_unicode'; |
84 | my $d800 = uc(chr(0xD800)); | |
85 | my $nonUnicode = uc(chr(0x110000)); | |
8457b38f KW |
86 | EXPECT |
87 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
88 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
9415f659 KW |
89 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5. |
90 | ######## | |
91 | BEGIN { | |
92 | if (ord('A') == 193) { | |
93 | print "SKIPPED\n# ebcdic platforms can't handle this large a code point"; | |
94 | exit 0; | |
95 | } | |
96 | } | |
97 | use warnings 'utf8'; | |
98 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
99 | no warnings 'non_unicode'; | |
100 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
101 | EXPECT | |
102 | Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 8. | |
8457b38f KW |
103 | ######## |
104 | use warnings 'utf8'; | |
9ae3ac1a KW |
105 | my $d7ff = lc pack("U", 0xD7FF); |
106 | my $d800 = lc pack("U", 0xD800); | |
107 | my $dfff = lc pack("U", 0xDFFF); | |
108 | my $e000 = lc pack("U", 0xE000); | |
109 | my $feff = lc pack("U", 0xFEFF); | |
110 | my $fffd = lc pack("U", 0xFFFD); | |
111 | my $fffe = lc pack("U", 0xFFFE); | |
112 | my $ffff = lc pack("U", 0xFFFF); | |
113 | my $hex4 = lc pack("U", 0x10000); | |
114 | my $hex5 = lc pack("U", 0x100000); | |
115 | my $maxm1 = lc pack("U", 0x10FFFE); | |
116 | my $max = lc pack("U", 0x10FFFF); | |
117 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 118 | no warnings 'utf8'; |
9ae3ac1a KW |
119 | my $d7ff = lc pack("U", 0xD7FF); |
120 | my $d800 = lc pack("U", 0xD800); | |
121 | my $dfff = lc pack("U", 0xDFFF); | |
122 | my $e000 = lc pack("U", 0xE000); | |
123 | my $feff = lc pack("U", 0xFEFF); | |
124 | my $fffd = lc pack("U", 0xFFFD); | |
125 | my $fffe = lc pack("U", 0xFFFE); | |
126 | my $ffff = lc pack("U", 0xFFFF); | |
127 | my $hex4 = lc pack("U", 0x10000); | |
128 | my $hex5 = lc pack("U", 0x100000); | |
129 | my $maxm1 = lc pack("U", 0x10FFFE); | |
130 | my $max = lc pack("U", 0x10FFFF); | |
131 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 132 | EXPECT |
9ae3ac1a KW |
133 | Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
134 | Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
135 | Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
62961d2e JH |
136 | ######## |
137 | use warnings 'utf8'; | |
9ae3ac1a KW |
138 | my $d7ff = ucfirst "\x{D7FF}"; |
139 | my $d800 = ucfirst "\x{D800}"; | |
140 | my $dfff = ucfirst "\x{DFFF}"; | |
141 | my $e000 = ucfirst "\x{E000}"; | |
142 | my $feff = ucfirst "\x{FEFF}"; | |
143 | my $fffd = ucfirst "\x{FFFD}"; | |
144 | my $fffe = ucfirst "\x{FFFE}"; | |
145 | my $ffff = ucfirst "\x{FFFF}"; | |
146 | my $hex4 = ucfirst "\x{10000}"; | |
147 | my $hex5 = ucfirst "\x{100000}"; | |
148 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
149 | my $max = ucfirst "\x{10FFFF}"; | |
150 | my $nonUnicode = ucfirst "\x{110000}"; | |
62961d2e | 151 | no warnings 'utf8'; |
9ae3ac1a KW |
152 | my $d7ff = ucfirst "\x{D7FF}"; |
153 | my $d800 = ucfirst "\x{D800}"; | |
154 | my $dfff = ucfirst "\x{DFFF}"; | |
155 | my $e000 = ucfirst "\x{E000}"; | |
156 | my $feff = ucfirst "\x{FEFF}"; | |
157 | my $fffd = ucfirst "\x{FFFD}"; | |
158 | my $fffe = ucfirst "\x{FFFE}"; | |
159 | my $ffff = ucfirst "\x{FFFF}"; | |
160 | my $hex4 = ucfirst "\x{10000}"; | |
161 | my $hex5 = ucfirst "\x{100000}"; | |
162 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
163 | my $max = ucfirst "\x{10FFFF}"; | |
164 | my $nonUnicode = ucfirst "\x{110000}"; | |
165 | EXPECT | |
166 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. | |
167 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
168 | Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
169 | ######## | |
2d88a86a | 170 | # NAME Matching \p{} against above-Unicode |
9ae3ac1a KW |
171 | use warnings 'utf8'; |
172 | chr(0xD7FF) =~ /\p{Any}/; | |
173 | chr(0xD800) =~ /\p{Any}/; | |
174 | chr(0xDFFF) =~ /\p{Any}/; | |
175 | chr(0xE000) =~ /\p{Any}/; | |
176 | chr(0xFEFF) =~ /\p{Any}/; | |
177 | chr(0xFFFD) =~ /\p{Any}/; | |
178 | chr(0xFFFE) =~ /\p{Any}/; | |
179 | chr(0xFFFF) =~ /\p{Any}/; | |
180 | chr(0x10000) =~ /\p{Any}/; | |
181 | chr(0x100000) =~ /\p{Any}/; | |
182 | chr(0x10FFFE) =~ /\p{Any}/; | |
183 | chr(0x10FFFF) =~ /\p{Any}/; | |
2d88a86a KW |
184 | chr(0x110000) =~ /[\p{Any}]/; |
185 | chr(0x110001) =~ /[\w\p{Any}]/; | |
186 | chr(0x10FFFF) =~ /\p{All}/; | |
187 | chr(0x110002) =~ /[\w\p{All}]/; | |
188 | chr(0x110003) =~ /[\p{XPosixWord}]/; | |
189 | chr(0x110004) =~ /[\P{XPosixWord}]/; | |
190 | chr(0x110005) =~ /^[\p{Unassigned}]/; | |
191 | chr(0x110006) =~ /^[\P{Unassigned}]/; | |
192 | # Only Unicode properties give non-Unicode warnings, and only those properties | |
193 | # which do match above Unicode; and not when something else in the class | |
194 | # matches above Unicode. Below we test three ways where something outside the | |
195 | # property may match non-Unicode: a code point above it, a class \S that we | |
196 | # know at compile time doesn't, and a class \W whose values aren't (at the time | |
197 | # of this writing) specified at compile time, but which wouldn't match | |
5073ffbd KW |
198 | chr(0x110050) =~ /\w/; |
199 | chr(0x110051) =~ /\W/; | |
200 | chr(0x110052) =~ /\d/; | |
201 | chr(0x110053) =~ /\D/; | |
202 | chr(0x110054) =~ /\s/; | |
203 | chr(0x110055) =~ /\S/; | |
204 | chr(0x110056) =~ /[[:word:]]/; | |
205 | chr(0x110057) =~ /[[:^word:]]/; | |
206 | chr(0x110058) =~ /[[:alnum:]]/; | |
207 | chr(0x110059) =~ /[[:^alnum:]]/; | |
208 | chr(0x11005A) =~ /[[:space:]]/; | |
209 | chr(0x11005B) =~ /[[:^space:]]/; | |
210 | chr(0x11005C) =~ /[[:digit:]]/; | |
211 | chr(0x11005D) =~ /[[:^digit:]]/; | |
212 | chr(0x11005E) =~ /[[:alpha:]]/; | |
213 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
214 | chr(0x110060) =~ /[[:ascii:]]/; | |
215 | chr(0x110061) =~ /[[:^ascii:]]/; | |
216 | chr(0x110062) =~ /[[:cntrl:]]/; | |
217 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
218 | chr(0x110064) =~ /[[:graph:]]/; | |
219 | chr(0x110065) =~ /[[:^graph:]]/; | |
220 | chr(0x110066) =~ /[[:lower:]]/; | |
221 | chr(0x110067) =~ /[[:^lower:]]/; | |
222 | chr(0x110068) =~ /[[:print:]]/; | |
223 | chr(0x110069) =~ /[[:^print:]]/; | |
224 | chr(0x11006A) =~ /[[:punct:]]/; | |
225 | chr(0x11006B) =~ /[[:^punct:]]/; | |
226 | chr(0x11006C) =~ /[[:upper:]]/; | |
227 | chr(0x11006D) =~ /[[:^upper:]]/; | |
228 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
229 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
230 | chr(0x110070) =~ /[[:blank:]]/; | |
231 | chr(0x110071) =~ /[[:^blank:]]/; | |
2d88a86a KW |
232 | chr(0x111010) =~ /[\W\p{Unassigned}]/; |
233 | chr(0x111011) =~ /[\W\P{Unassigned}]/; | |
234 | chr(0x112010) =~ /[\S\p{Unassigned}]/; | |
235 | chr(0x112011) =~ /[\S\P{Unassigned}]/; | |
236 | chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; | |
237 | chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; | |
9ae3ac1a KW |
238 | no warnings 'utf8'; |
239 | chr(0xD7FF) =~ /\p{Any}/; | |
240 | chr(0xD800) =~ /\p{Any}/; | |
241 | chr(0xDFFF) =~ /\p{Any}/; | |
242 | chr(0xE000) =~ /\p{Any}/; | |
243 | chr(0xFEFF) =~ /\p{Any}/; | |
244 | chr(0xFFFD) =~ /\p{Any}/; | |
245 | chr(0xFFFE) =~ /\p{Any}/; | |
246 | chr(0xFFFF) =~ /\p{Any}/; | |
247 | chr(0x10000) =~ /\p{Any}/; | |
248 | chr(0x100000) =~ /\p{Any}/; | |
249 | chr(0x10FFFE) =~ /\p{Any}/; | |
250 | chr(0x10FFFF) =~ /\p{Any}/; | |
2d88a86a KW |
251 | chr(0x110000) =~ /[\p{Any}]/; |
252 | chr(0x110001) =~ /[\w\p{Any}]/; | |
253 | chr(0x10FFFF) =~ /\p{All}/; | |
254 | chr(0x110002) =~ /[\w\p{All}]/; | |
255 | chr(0x110003) =~ /[\p{XPosixWord}]/; | |
256 | chr(0x110004) =~ /[\P{XPosixWord}]/; | |
257 | chr(0x110005) =~ /^[\p{Unassigned}]/; | |
258 | chr(0x110006) =~ /^[\P{Unassigned}]/; | |
5073ffbd KW |
259 | chr(0x110050) =~ /\w/; |
260 | chr(0x110051) =~ /\W/; | |
261 | chr(0x110052) =~ /\d/; | |
262 | chr(0x110053) =~ /\D/; | |
263 | chr(0x110054) =~ /\s/; | |
264 | chr(0x110055) =~ /\S/; | |
265 | chr(0x110056) =~ /[[:word:]]/; | |
266 | chr(0x110057) =~ /[[:^word:]]/; | |
267 | chr(0x110058) =~ /[[:alnum:]]/; | |
268 | chr(0x110059) =~ /[[:^alnum:]]/; | |
269 | chr(0x11005A) =~ /[[:space:]]/; | |
270 | chr(0x11005B) =~ /[[:^space:]]/; | |
271 | chr(0x11005C) =~ /[[:digit:]]/; | |
272 | chr(0x11005D) =~ /[[:^digit:]]/; | |
273 | chr(0x11005E) =~ /[[:alpha:]]/; | |
274 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
275 | chr(0x110060) =~ /[[:ascii:]]/; | |
276 | chr(0x110061) =~ /[[:^ascii:]]/; | |
277 | chr(0x110062) =~ /[[:cntrl:]]/; | |
278 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
279 | chr(0x110064) =~ /[[:graph:]]/; | |
280 | chr(0x110065) =~ /[[:^graph:]]/; | |
281 | chr(0x110066) =~ /[[:lower:]]/; | |
282 | chr(0x110067) =~ /[[:^lower:]]/; | |
283 | chr(0x110068) =~ /[[:print:]]/; | |
284 | chr(0x110069) =~ /[[:^print:]]/; | |
285 | chr(0x11006A) =~ /[[:punct:]]/; | |
286 | chr(0x11006B) =~ /[[:^punct:]]/; | |
287 | chr(0x11006C) =~ /[[:upper:]]/; | |
288 | chr(0x11006D) =~ /[[:^upper:]]/; | |
289 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
290 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
291 | chr(0x110070) =~ /[[:blank:]]/; | |
292 | chr(0x110071) =~ /[[:^blank:]]/; | |
2d88a86a KW |
293 | chr(0x111010) =~ /[\W\p{Unassigned}]/; |
294 | chr(0x111011) =~ /[\W\P{Unassigned}]/; | |
295 | chr(0x112010) =~ /[\S\p{Unassigned}]/; | |
296 | chr(0x112011) =~ /[\S\P{Unassigned}]/; | |
297 | chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/; | |
298 | chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/; | |
9ae3ac1a | 299 | EXPECT |
2d88a86a KW |
300 | Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20. |
301 | Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21. | |
9ae3ac1a | 302 | ######## |
e9b08962 | 303 | # NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) |
2d88a86a KW |
304 | # Now have to make FATAL to guarantee being output |
305 | use warnings FATAL => 'non_unicode'; | |
ae986089 KW |
306 | "\x{110000}" =~ /b?\p{Space}/; |
307 | EXPECT | |
2d88a86a | 308 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. |
ae986089 KW |
309 | ######## |
310 | # NAME Matching POSIX class property against above-Unicode code point doesn't output a warning | |
311 | use warnings 'non_unicode'; | |
2d88a86a | 312 | use warnings FATAL => 'non_unicode'; |
ae986089 KW |
313 | "\x{110000}" =~ /b?[[:space:]]/; |
314 | EXPECT | |
315 | ######## | |
8457b38f KW |
316 | use warnings 'utf8'; |
317 | chr(0x110000) =~ /\p{Any}/; | |
2d88a86a KW |
318 | ######## |
319 | # NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning | |
320 | use warnings qw(utf8 non_unicode); | |
321 | chr(0x110000) =~ /^\p{Unassigned}/; | |
8457b38f | 322 | no warnings 'non_unicode'; |
2d88a86a KW |
323 | chr(0x110001) =~ /\p{Unassigned}/; |
324 | use warnings 'non_unicode'; | |
325 | no warnings 'utf8'; | |
326 | chr(0x110002) =~ /\p{Unassigned}/; | |
8457b38f | 327 | EXPECT |
2d88a86a | 328 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2. |
8457b38f | 329 | ######## |
f2c2a6ab | 330 | # NAME optimizable regnode should still give non_unicode warnings when fatalized |
5073ffbd | 331 | use warnings 'utf8'; |
f2c2a6ab | 332 | use warnings FATAL => 'non_unicode'; |
845e7aa3 | 333 | chr(0x110000) =~ /\p{lb=cr}/; |
f2c2a6ab | 334 | EXPECT |
2d88a86a | 335 | Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3. |
f2c2a6ab KW |
336 | ######## |
337 | # NAME optimizable regnode should not give non_unicode warnings when warnings are off | |
5073ffbd | 338 | no warnings 'non_unicode'; |
845e7aa3 | 339 | chr(0x110000) =~ /\p{lb=cr}/; |
5073ffbd | 340 | EXPECT |
5073ffbd | 341 | ######## |
2d88a86a KW |
342 | # NAME 'All' matches above-Unicode without any warning |
343 | use warnings qw(utf8 non_unicode); | |
344 | chr(0x110000) =~ /\p{All}/; | |
345 | EXPECT | |
346 | ######## | |
9ae3ac1a KW |
347 | require "../test.pl"; |
348 | use warnings 'utf8'; | |
a410ec23 | 349 | sub Is_Super { return '!utf8::Any' } |
88d45d28 KW |
350 | # The extra char is to avoid an optimization that avoids the problem when the |
351 | # property is the only non-latin1 char in a class | |
352 | print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; | |
a410ec23 KW |
353 | EXPECT |
354 | 1 | |
355 | ######## | |
356 | require "../test.pl"; | |
357 | use warnings 'utf8'; | |
9ae3ac1a KW |
358 | my $file = tempfile(); |
359 | open(my $fh, "+>:utf8", $file); | |
360 | print $fh "\x{D7FF}", "\n"; | |
361 | print $fh "\x{D800}", "\n"; | |
362 | print $fh "\x{DFFF}", "\n"; | |
363 | print $fh "\x{E000}", "\n"; | |
364 | print $fh "\x{FDCF}", "\n"; | |
365 | print $fh "\x{FDD0}", "\n"; | |
366 | print $fh "\x{FDEF}", "\n"; | |
367 | print $fh "\x{FDF0}", "\n"; | |
368 | print $fh "\x{FEFF}", "\n"; | |
369 | print $fh "\x{FFFD}", "\n"; | |
370 | print $fh "\x{FFFE}", "\n"; | |
371 | print $fh "\x{FFFF}", "\n"; | |
372 | print $fh "\x{10000}", "\n"; | |
373 | print $fh "\x{1FFFE}", "\n"; | |
374 | print $fh "\x{1FFFF}", "\n"; | |
375 | print $fh "\x{2FFFE}", "\n"; | |
376 | print $fh "\x{2FFFF}", "\n"; | |
377 | print $fh "\x{3FFFE}", "\n"; | |
378 | print $fh "\x{3FFFF}", "\n"; | |
379 | print $fh "\x{4FFFE}", "\n"; | |
380 | print $fh "\x{4FFFF}", "\n"; | |
381 | print $fh "\x{5FFFE}", "\n"; | |
382 | print $fh "\x{5FFFF}", "\n"; | |
383 | print $fh "\x{6FFFE}", "\n"; | |
384 | print $fh "\x{6FFFF}", "\n"; | |
385 | print $fh "\x{7FFFE}", "\n"; | |
386 | print $fh "\x{7FFFF}", "\n"; | |
387 | print $fh "\x{8FFFE}", "\n"; | |
388 | print $fh "\x{8FFFF}", "\n"; | |
389 | print $fh "\x{9FFFE}", "\n"; | |
390 | print $fh "\x{9FFFF}", "\n"; | |
391 | print $fh "\x{AFFFE}", "\n"; | |
392 | print $fh "\x{AFFFF}", "\n"; | |
393 | print $fh "\x{BFFFE}", "\n"; | |
394 | print $fh "\x{BFFFF}", "\n"; | |
395 | print $fh "\x{CFFFE}", "\n"; | |
396 | print $fh "\x{CFFFF}", "\n"; | |
397 | print $fh "\x{DFFFE}", "\n"; | |
398 | print $fh "\x{DFFFF}", "\n"; | |
399 | print $fh "\x{EFFFE}", "\n"; | |
400 | print $fh "\x{EFFFF}", "\n"; | |
401 | print $fh "\x{FFFFE}", "\n"; | |
402 | print $fh "\x{FFFFF}", "\n"; | |
403 | print $fh "\x{100000}", "\n"; | |
404 | print $fh "\x{10FFFE}", "\n"; | |
405 | print $fh "\x{10FFFF}", "\n"; | |
406 | print $fh "\x{110000}", "\n"; | |
407 | close $fh; | |
408 | EXPECT | |
409 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
410 | Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. | |
411 | Unicode non-character U+FDD0 is illegal for open interchange at - line 10. | |
412 | Unicode non-character U+FDEF is illegal for open interchange at - line 11. | |
413 | Unicode non-character U+FFFE is illegal for open interchange at - line 15. | |
414 | Unicode non-character U+FFFF is illegal for open interchange at - line 16. | |
415 | Unicode non-character U+1FFFE is illegal for open interchange at - line 18. | |
416 | Unicode non-character U+1FFFF is illegal for open interchange at - line 19. | |
417 | Unicode non-character U+2FFFE is illegal for open interchange at - line 20. | |
418 | Unicode non-character U+2FFFF is illegal for open interchange at - line 21. | |
419 | Unicode non-character U+3FFFE is illegal for open interchange at - line 22. | |
420 | Unicode non-character U+3FFFF is illegal for open interchange at - line 23. | |
421 | Unicode non-character U+4FFFE is illegal for open interchange at - line 24. | |
422 | Unicode non-character U+4FFFF is illegal for open interchange at - line 25. | |
423 | Unicode non-character U+5FFFE is illegal for open interchange at - line 26. | |
424 | Unicode non-character U+5FFFF is illegal for open interchange at - line 27. | |
425 | Unicode non-character U+6FFFE is illegal for open interchange at - line 28. | |
426 | Unicode non-character U+6FFFF is illegal for open interchange at - line 29. | |
427 | Unicode non-character U+7FFFE is illegal for open interchange at - line 30. | |
428 | Unicode non-character U+7FFFF is illegal for open interchange at - line 31. | |
429 | Unicode non-character U+8FFFE is illegal for open interchange at - line 32. | |
430 | Unicode non-character U+8FFFF is illegal for open interchange at - line 33. | |
431 | Unicode non-character U+9FFFE is illegal for open interchange at - line 34. | |
432 | Unicode non-character U+9FFFF is illegal for open interchange at - line 35. | |
433 | Unicode non-character U+AFFFE is illegal for open interchange at - line 36. | |
434 | Unicode non-character U+AFFFF is illegal for open interchange at - line 37. | |
435 | Unicode non-character U+BFFFE is illegal for open interchange at - line 38. | |
436 | Unicode non-character U+BFFFF is illegal for open interchange at - line 39. | |
437 | Unicode non-character U+CFFFE is illegal for open interchange at - line 40. | |
438 | Unicode non-character U+CFFFF is illegal for open interchange at - line 41. | |
439 | Unicode non-character U+DFFFE is illegal for open interchange at - line 42. | |
440 | Unicode non-character U+DFFFF is illegal for open interchange at - line 43. | |
441 | Unicode non-character U+EFFFE is illegal for open interchange at - line 44. | |
442 | Unicode non-character U+EFFFF is illegal for open interchange at - line 45. | |
443 | Unicode non-character U+FFFFE is illegal for open interchange at - line 46. | |
444 | Unicode non-character U+FFFFF is illegal for open interchange at - line 47. | |
445 | Unicode non-character U+10FFFE is illegal for open interchange at - line 49. | |
446 | Unicode non-character U+10FFFF is illegal for open interchange at - line 50. | |
447 | Code point 0x110000 is not Unicode, may not be portable at - line 51. | |
448 | ######## | |
449 | require "../test.pl"; | |
8457b38f KW |
450 | use warnings 'utf8'; |
451 | my $file = tempfile(); | |
452 | open(my $fh, "+>:utf8", $file); | |
453 | print $fh "\x{D800}", "\n"; | |
454 | print $fh "\x{FFFF}", "\n"; | |
455 | print $fh "\x{110000}", "\n"; | |
456 | close $fh; | |
457 | EXPECT | |
458 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
459 | Unicode non-character U+FFFF is illegal for open interchange at - line 6. | |
460 | Code point 0x110000 is not Unicode, may not be portable at - line 7. | |
461 | ######## | |
462 | require "../test.pl"; | |
463 | use warnings 'utf8'; | |
464 | no warnings 'surrogate'; | |
465 | my $file = tempfile(); | |
466 | open(my $fh, "+>:utf8", $file); | |
467 | print $fh "\x{D800}", "\n"; | |
468 | print $fh "\x{FFFF}", "\n"; | |
469 | print $fh "\x{110000}", "\n"; | |
470 | close $fh; | |
471 | EXPECT | |
472 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
473 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
474 | ######## | |
475 | require "../test.pl"; | |
476 | use warnings 'utf8'; | |
477 | no warnings 'nonchar'; | |
478 | my $file = tempfile(); | |
479 | open(my $fh, "+>:utf8", $file); | |
480 | print $fh "\x{D800}", "\n"; | |
481 | print $fh "\x{FFFF}", "\n"; | |
482 | print $fh "\x{110000}", "\n"; | |
483 | close $fh; | |
484 | EXPECT | |
485 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
486 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
487 | ######## | |
488 | require "../test.pl"; | |
489 | use warnings 'utf8'; | |
490 | no warnings 'non_unicode'; | |
491 | my $file = tempfile(); | |
492 | open(my $fh, "+>:utf8", $file); | |
493 | print $fh "\x{D800}", "\n"; | |
494 | print $fh "\x{FFFF}", "\n"; | |
495 | print $fh "\x{110000}", "\n"; | |
496 | close $fh; | |
497 | EXPECT | |
498 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
499 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
500 | ######## | |
920e47bb AC |
501 | # NAME C<use warnings "nonchar"> works in isolation |
502 | require "../test.pl"; | |
503 | use warnings 'nonchar'; | |
504 | my $file = tempfile(); | |
505 | open(my $fh, "+>:utf8", $file); | |
506 | print $fh "\x{FFFF}", "\n"; | |
507 | close $fh; | |
508 | EXPECT | |
509 | Unicode non-character U+FFFF is illegal for open interchange at - line 5. | |
510 | ######## | |
920e47bb AC |
511 | # NAME C<use warnings "surrogate"> works in isolation |
512 | require "../test.pl"; | |
513 | use warnings 'surrogate'; | |
514 | my $file = tempfile(); | |
515 | open(my $fh, "+>:utf8", $file); | |
516 | print $fh "\x{D800}", "\n"; | |
517 | close $fh; | |
518 | EXPECT | |
519 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
520 | ######## | |
920e47bb AC |
521 | # NAME C<use warnings "non_unicode"> works in isolation |
522 | require "../test.pl"; | |
523 | use warnings 'non_unicode'; | |
524 | my $file = tempfile(); | |
525 | open(my $fh, "+>:utf8", $file); | |
526 | print $fh "\x{110000}", "\n"; | |
527 | close $fh; | |
528 | EXPECT | |
529 | Code point 0x110000 is not Unicode, may not be portable at - line 5. | |
530 | ######## | |
8457b38f | 531 | require "../test.pl"; |
9ae3ac1a KW |
532 | no warnings 'utf8'; |
533 | my $file = tempfile(); | |
534 | open(my $fh, "+>:utf8", $file); | |
535 | print $fh "\x{D7FF}", "\n"; | |
536 | print $fh "\x{D800}", "\n"; | |
537 | print $fh "\x{DFFF}", "\n"; | |
538 | print $fh "\x{E000}", "\n"; | |
539 | print $fh "\x{FDCF}", "\n"; | |
540 | print $fh "\x{FDD0}", "\n"; | |
541 | print $fh "\x{FDEF}", "\n"; | |
542 | print $fh "\x{FDF0}", "\n"; | |
543 | print $fh "\x{FEFF}", "\n"; | |
544 | print $fh "\x{FFFD}", "\n"; | |
545 | print $fh "\x{FFFE}", "\n"; | |
546 | print $fh "\x{FFFF}", "\n"; | |
547 | print $fh "\x{10000}", "\n"; | |
548 | print $fh "\x{1FFFE}", "\n"; | |
549 | print $fh "\x{1FFFF}", "\n"; | |
550 | print $fh "\x{2FFFE}", "\n"; | |
551 | print $fh "\x{2FFFF}", "\n"; | |
552 | print $fh "\x{3FFFE}", "\n"; | |
553 | print $fh "\x{3FFFF}", "\n"; | |
554 | print $fh "\x{4FFFE}", "\n"; | |
555 | print $fh "\x{4FFFF}", "\n"; | |
556 | print $fh "\x{5FFFE}", "\n"; | |
557 | print $fh "\x{5FFFF}", "\n"; | |
558 | print $fh "\x{6FFFE}", "\n"; | |
559 | print $fh "\x{6FFFF}", "\n"; | |
560 | print $fh "\x{7FFFE}", "\n"; | |
561 | print $fh "\x{7FFFF}", "\n"; | |
562 | print $fh "\x{8FFFE}", "\n"; | |
563 | print $fh "\x{8FFFF}", "\n"; | |
564 | print $fh "\x{9FFFE}", "\n"; | |
565 | print $fh "\x{9FFFF}", "\n"; | |
566 | print $fh "\x{AFFFE}", "\n"; | |
567 | print $fh "\x{AFFFF}", "\n"; | |
568 | print $fh "\x{BFFFE}", "\n"; | |
569 | print $fh "\x{BFFFF}", "\n"; | |
570 | print $fh "\x{CFFFE}", "\n"; | |
571 | print $fh "\x{CFFFF}", "\n"; | |
572 | print $fh "\x{DFFFE}", "\n"; | |
573 | print $fh "\x{DFFFF}", "\n"; | |
574 | print $fh "\x{EFFFE}", "\n"; | |
575 | print $fh "\x{EFFFF}", "\n"; | |
576 | print $fh "\x{FFFFE}", "\n"; | |
577 | print $fh "\x{FFFFF}", "\n"; | |
578 | print $fh "\x{100000}", "\n"; | |
579 | print $fh "\x{10FFFE}", "\n"; | |
580 | print $fh "\x{10FFFF}", "\n"; | |
581 | print $fh "\x{110000}", "\n"; | |
582 | close $fh; | |
62961d2e | 583 | EXPECT |
ab0b796c KW |
584 | ######## |
585 | # NAME Case change crosses 255/256 under non-UTF8 locale | |
586 | eval { require POSIX; POSIX->import("locale_h") }; | |
587 | if ($@) { | |
588 | print("SKIPPED\n# no POSIX\n"),exit; | |
589 | } | |
590 | use warnings 'locale'; | |
591 | use feature 'fc'; | |
592 | use locale; | |
593 | setlocale(&POSIX::LC_CTYPE, "C"); | |
594 | my $a; | |
595 | $a = lc("\x{178}"); | |
596 | $a = fc("\x{1E9E}"); | |
597 | $a = fc("\x{FB05}"); | |
598 | $a = uc("\x{FB00}"); | |
599 | $a = ucfirst("\x{149}"); | |
8bdce394 KW |
600 | $a = lcfirst("\x{178}"); |
601 | no warnings 'locale'; | |
602 | $a = lc("\x{178}"); | |
603 | $a = fc("\x{1E9E}"); | |
604 | $a = fc("\x{FB05}"); | |
605 | $a = uc("\x{FB00}"); | |
606 | $a = ucfirst("\x{149}"); | |
607 | $a = lcfirst("\x{178}"); | |
ab0b796c KW |
608 | EXPECT |
609 | Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 10. | |
610 | Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 11. | |
611 | Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 12. | |
612 | Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 13. | |
613 | Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 14. | |
8bdce394 | 614 | Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 15. |
613abc6d KW |
615 | ######## |
616 | # NAME Wide character in non-UTF-8 locale | |
617 | eval { require POSIX; POSIX->import("locale_h") }; | |
618 | if ($@) { | |
619 | print("SKIPPED\n# no POSIX\n"),exit; | |
620 | } | |
621 | use warnings 'locale'; | |
622 | use feature 'fc'; | |
623 | use locale; | |
624 | setlocale(&POSIX::LC_CTYPE, "C"); | |
625 | my $a; | |
626 | $a = lc("\x{100}"); | |
627 | $a = lcfirst("\x{101}"); | |
628 | $a = fc("\x{102}"); | |
629 | $a = uc("\x{103}"); | |
630 | $a = ucfirst("\x{104}"); | |
631 | no warnings 'locale'; | |
632 | $a = lc("\x{100}"); | |
633 | $a = lcfirst("\x{101}"); | |
634 | $a = fc("\x{102}"); | |
635 | $a = uc("\x{103}"); | |
636 | $a = ucfirst("\x{104}"); | |
637 | EXPECT | |
638 | Wide character (U+100) in lc at - line 10. | |
639 | Wide character (U+101) in lcfirst at - line 11. | |
640 | Wide character (U+102) in fc at - line 12. | |
641 | Wide character (U+103) in uc at - line 13. | |
642 | Wide character (U+104) in ucfirst at - line 14. |