Commit | Line | Data |
---|---|---|
f0df466a JH |
1 | |
2 | utf8.c AOK | |
3 | ||
4b88fb76 | 4 | [utf8_to_uvchr_buf] |
f0df466a JH |
5 | Malformed UTF-8 character |
6 | my $a = ord "\x80" ; | |
7 | ||
8 | Malformed UTF-8 character | |
9 | my $a = ord "\xf080" ; | |
10 | <<<<<< this warning can't be easily triggered from perl anymore | |
11 | ||
12 | [utf16_to_utf8] | |
13 | Malformed UTF-16 surrogate | |
93f09d7b | 14 | <<<<<< Add a test when something actually calls utf16_to_utf8 |
f0df466a JH |
15 | |
16 | __END__ | |
4b88fb76 | 17 | # utf8.c [utf8_to_uvchr_buf] -W |
f0df466a JH |
18 | BEGIN { |
19 | if (ord('A') == 193) { | |
20 | print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; | |
21 | exit 0; | |
22 | } | |
23 | } | |
24 | use utf8 ; | |
25 | my $a = "snøstorm" ; | |
26 | { | |
27 | no warnings 'utf8' ; | |
28 | my $a = "snøstorm"; | |
29 | use warnings 'utf8' ; | |
30 | my $a = "snøstorm"; | |
31 | } | |
32 | EXPECT | |
41432148 JH |
33 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. |
34 | Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. | |
f0df466a | 35 | ######## |
507b9800 | 36 | use warnings 'utf8'; |
9ae3ac1a KW |
37 | my $d7ff = uc(chr(0xD7FF)); |
38 | my $d800 = uc(chr(0xD800)); | |
39 | my $dfff = uc(chr(0xDFFF)); | |
40 | my $e000 = uc(chr(0xE000)); | |
41 | my $feff = uc(chr(0xFEFF)); | |
42 | my $fffd = uc(chr(0xFFFD)); | |
43 | my $fffe = uc(chr(0xFFFE)); | |
44 | my $ffff = uc(chr(0xFFFF)); | |
45 | my $hex4 = uc(chr(0x10000)); | |
46 | my $hex5 = uc(chr(0x100000)); | |
47 | my $maxm1 = uc(chr(0x10FFFE)); | |
48 | my $max = uc(chr(0x10FFFF)); | |
49 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 50 | no warnings 'utf8'; |
9ae3ac1a KW |
51 | my $d7ff = uc(chr(0xD7FF)); |
52 | my $d800 = uc(chr(0xD800)); | |
53 | my $dfff = uc(chr(0xDFFF)); | |
54 | my $e000 = uc(chr(0xE000)); | |
55 | my $feff = uc(chr(0xFEFF)); | |
56 | my $fffd = uc(chr(0xFFFD)); | |
57 | my $fffe = uc(chr(0xFFFE)); | |
58 | my $ffff = uc(chr(0xFFFF)); | |
59 | my $hex4 = uc(chr(0x10000)); | |
60 | my $hex5 = uc(chr(0x100000)); | |
61 | my $maxm1 = uc(chr(0x10FFFE)); | |
62 | my $max = uc(chr(0x10FFFF)); | |
63 | my $nonUnicode = uc(chr(0x110000)); | |
507b9800 | 64 | EXPECT |
9ae3ac1a KW |
65 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
66 | Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
67 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
507b9800 | 68 | ######## |
62961d2e | 69 | use warnings 'utf8'; |
8457b38f KW |
70 | my $d800 = uc(chr(0xD800)); |
71 | my $nonUnicode = uc(chr(0x110000)); | |
72 | no warnings 'surrogate'; | |
73 | my $d800 = uc(chr(0xD800)); | |
74 | my $nonUnicode = uc(chr(0x110000)); | |
75 | EXPECT | |
76 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
77 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
78 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. | |
79 | ######## | |
80 | use warnings 'utf8'; | |
81 | my $d800 = uc(chr(0xD800)); | |
82 | my $nonUnicode = uc(chr(0x110000)); | |
83 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
84 | no warnings 'non_unicode'; | |
85 | my $d800 = uc(chr(0xD800)); | |
86 | my $nonUnicode = uc(chr(0x110000)); | |
87 | my $big_nonUnicode = uc(chr(0x8000_0000)); | |
88 | EXPECT | |
89 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. | |
90 | Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. | |
91 | Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4. | |
92 | Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6. | |
93 | ######## | |
94 | use warnings 'utf8'; | |
9ae3ac1a KW |
95 | my $d7ff = lc pack("U", 0xD7FF); |
96 | my $d800 = lc pack("U", 0xD800); | |
97 | my $dfff = lc pack("U", 0xDFFF); | |
98 | my $e000 = lc pack("U", 0xE000); | |
99 | my $feff = lc pack("U", 0xFEFF); | |
100 | my $fffd = lc pack("U", 0xFFFD); | |
101 | my $fffe = lc pack("U", 0xFFFE); | |
102 | my $ffff = lc pack("U", 0xFFFF); | |
103 | my $hex4 = lc pack("U", 0x10000); | |
104 | my $hex5 = lc pack("U", 0x100000); | |
105 | my $maxm1 = lc pack("U", 0x10FFFE); | |
106 | my $max = lc pack("U", 0x10FFFF); | |
107 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 108 | no warnings 'utf8'; |
9ae3ac1a KW |
109 | my $d7ff = lc pack("U", 0xD7FF); |
110 | my $d800 = lc pack("U", 0xD800); | |
111 | my $dfff = lc pack("U", 0xDFFF); | |
112 | my $e000 = lc pack("U", 0xE000); | |
113 | my $feff = lc pack("U", 0xFEFF); | |
114 | my $fffd = lc pack("U", 0xFFFD); | |
115 | my $fffe = lc pack("U", 0xFFFE); | |
116 | my $ffff = lc pack("U", 0xFFFF); | |
117 | my $hex4 = lc pack("U", 0x10000); | |
118 | my $hex5 = lc pack("U", 0x100000); | |
119 | my $maxm1 = lc pack("U", 0x10FFFE); | |
120 | my $max = lc pack("U", 0x10FFFF); | |
121 | my $nonUnicode = lc(pack("U", 0x110000)); | |
62961d2e | 122 | EXPECT |
9ae3ac1a KW |
123 | Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. |
124 | Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
125 | Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
62961d2e JH |
126 | ######## |
127 | use warnings 'utf8'; | |
9ae3ac1a KW |
128 | my $d7ff = ucfirst "\x{D7FF}"; |
129 | my $d800 = ucfirst "\x{D800}"; | |
130 | my $dfff = ucfirst "\x{DFFF}"; | |
131 | my $e000 = ucfirst "\x{E000}"; | |
132 | my $feff = ucfirst "\x{FEFF}"; | |
133 | my $fffd = ucfirst "\x{FFFD}"; | |
134 | my $fffe = ucfirst "\x{FFFE}"; | |
135 | my $ffff = ucfirst "\x{FFFF}"; | |
136 | my $hex4 = ucfirst "\x{10000}"; | |
137 | my $hex5 = ucfirst "\x{100000}"; | |
138 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
139 | my $max = ucfirst "\x{10FFFF}"; | |
140 | my $nonUnicode = ucfirst "\x{110000}"; | |
62961d2e | 141 | no warnings 'utf8'; |
9ae3ac1a KW |
142 | my $d7ff = ucfirst "\x{D7FF}"; |
143 | my $d800 = ucfirst "\x{D800}"; | |
144 | my $dfff = ucfirst "\x{DFFF}"; | |
145 | my $e000 = ucfirst "\x{E000}"; | |
146 | my $feff = ucfirst "\x{FEFF}"; | |
147 | my $fffd = ucfirst "\x{FFFD}"; | |
148 | my $fffe = ucfirst "\x{FFFE}"; | |
149 | my $ffff = ucfirst "\x{FFFF}"; | |
150 | my $hex4 = ucfirst "\x{10000}"; | |
151 | my $hex5 = ucfirst "\x{100000}"; | |
152 | my $maxm1 = ucfirst "\x{10FFFE}"; | |
153 | my $max = ucfirst "\x{10FFFF}"; | |
154 | my $nonUnicode = ucfirst "\x{110000}"; | |
155 | EXPECT | |
156 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. | |
157 | Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. | |
158 | Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. | |
159 | ######## | |
160 | use warnings 'utf8'; | |
161 | chr(0xD7FF) =~ /\p{Any}/; | |
162 | chr(0xD800) =~ /\p{Any}/; | |
163 | chr(0xDFFF) =~ /\p{Any}/; | |
164 | chr(0xE000) =~ /\p{Any}/; | |
165 | chr(0xFEFF) =~ /\p{Any}/; | |
166 | chr(0xFFFD) =~ /\p{Any}/; | |
167 | chr(0xFFFE) =~ /\p{Any}/; | |
168 | chr(0xFFFF) =~ /\p{Any}/; | |
169 | chr(0x10000) =~ /\p{Any}/; | |
170 | chr(0x100000) =~ /\p{Any}/; | |
171 | chr(0x10FFFE) =~ /\p{Any}/; | |
172 | chr(0x10FFFF) =~ /\p{Any}/; | |
5073ffbd KW |
173 | chr(0x110000) =~ /[\w\p{Any}]/; |
174 | chr(0x110010) =~ /[\w\p{PosixWord}]/; | |
175 | chr(0x110011) =~ /[\w\P{PosixWord}]/; | |
176 | chr(0x110012) =~ /[\w\p{XPosixWord}]/; | |
177 | chr(0x110013) =~ /[\w\P{XPosixWord}]/; | |
178 | chr(0x110014) =~ /[\w\p{PosixAlnum}]/; | |
179 | chr(0x110015) =~ /[\w\P{PosixAlnum}]/; | |
180 | chr(0x110016) =~ /[\w\p{XPosixAlnum}]/; | |
181 | chr(0x110017) =~ /[\w\P{XPosixAlnum}]/; | |
182 | chr(0x110018) =~ /[\w\p{PosixSpace}]/; | |
183 | chr(0x110019) =~ /[\w\P{PosixSpace}]/; | |
184 | chr(0x11001A) =~ /[\w\p{XPosixSpace}]/; | |
185 | chr(0x11001B) =~ /[\w\P{XPosixSpace}]/; | |
186 | chr(0x11001C) =~ /[\w\p{PosixDigit}]/; | |
187 | chr(0x11001D) =~ /[\w\P{PosixDigit}]/; | |
188 | chr(0x11001E) =~ /[\w\p{XPosixDigit}]/; | |
189 | chr(0x11001F) =~ /[\w\P{XPosixDigit}]/; | |
190 | chr(0x110020) =~ /[\w\p{PosixAlpha}]/; | |
191 | chr(0x110021) =~ /[\w\P{PosixAlpha}]/; | |
192 | chr(0x110022) =~ /[\w\p{XPosixAlpha}]/; | |
193 | chr(0x110023) =~ /[\w\P{XPosixAlpha}]/; | |
194 | chr(0x110024) =~ /[\w\p{Ascii}]/; | |
195 | chr(0x110025) =~ /[\w\P{Ascii}]/; | |
196 | chr(0x110026) =~ /[\w\p{PosixCntrl}]/; | |
197 | chr(0x110027) =~ /[\w\P{PosixCntrl}]/; | |
198 | chr(0x110028) =~ /[\w\p{XPosixCntrl}]/; | |
199 | chr(0x110029) =~ /[\w\P{XPosixCntrl}]/; | |
200 | chr(0x11002A) =~ /[\w\p{PosixGraph}]/; | |
201 | chr(0x11002B) =~ /[\w\P{PosixGraph}]/; | |
202 | chr(0x11002C) =~ /[\w\p{XPosixGraph}]/; | |
203 | chr(0x11002D) =~ /[\w\P{XPosixGraph}]/; | |
204 | chr(0x11002E) =~ /[\w\p{PosixLower}]/; | |
205 | chr(0x11002F) =~ /[\w\P{PosixLower}]/; | |
206 | chr(0x110030) =~ /[\w\p{XPosixLower}]/; | |
207 | chr(0x110031) =~ /[\w\P{XPosixLower}]/; | |
208 | chr(0x110032) =~ /[\w\p{PosixPrint}]/; | |
209 | chr(0x110033) =~ /[\w\P{PosixPrint}]/; | |
210 | chr(0x110034) =~ /[\w\p{XPosixPrint}]/; | |
211 | chr(0x110035) =~ /[\w\P{XPosixPrint}]/; | |
212 | chr(0x110036) =~ /[\w\p{PosixPunct}]/; | |
213 | chr(0x110037) =~ /[\w\P{PosixPunct}]/; | |
214 | chr(0x110038) =~ /[\w\p{XPosixPunct}]/; | |
215 | chr(0x110039) =~ /[\w\P{XPosixPunct}]/; | |
216 | chr(0x11003A) =~ /[\w\p{PosixUpper}]/; | |
217 | chr(0x11003B) =~ /[\w\P{PosixUpper}]/; | |
218 | chr(0x11003C) =~ /[\w\p{XPosixUpper}]/; | |
219 | chr(0x11003D) =~ /[\w\P{XPosixUpper}]/; | |
220 | chr(0x11003E) =~ /[\w\p{PosixXdigit}]/; | |
221 | chr(0x11003F) =~ /[\w\P{PosixXdigit}]/; | |
222 | chr(0x110040) =~ /[\w\p{XPosixXdigit}]/; | |
223 | chr(0x110041) =~ /[\w\P{XPosixXdigit}]/; | |
224 | chr(0x110042) =~ /[\w\p{PerlSpace}]/; | |
225 | chr(0x110043) =~ /[\w\P{PerlSpace}]/; | |
226 | chr(0x110044) =~ /[\w\p{XPerlSpace}]/; | |
227 | chr(0x110045) =~ /[\w\P{XPerlSpace}]/; | |
228 | chr(0x110046) =~ /[\w\p{PosixBlank}]/; | |
229 | chr(0x110047) =~ /[\w\P{PosixBlank}]/; | |
230 | chr(0x110048) =~ /[\w\p{XPosixBlank}]/; | |
231 | chr(0x110049) =~ /[\w\P{XPosixBlank}]/; | |
232 | # Currently some warnings from the above are output twice | |
233 | # Only Unicode properties give non-Unicode warnings, and not when something | |
234 | # else in the class matches above Unicode. Below we test three ways where | |
235 | # something outside the property may match non-Unicode: a code point above it, | |
236 | # a class \S that we know at compile time doesn't, and a class \W whose values | |
237 | # aren't (at the time of this writing) specified at compile time, but which | |
238 | # wouldn't match | |
239 | chr(0x110050) =~ /\w/; | |
240 | chr(0x110051) =~ /\W/; | |
241 | chr(0x110052) =~ /\d/; | |
242 | chr(0x110053) =~ /\D/; | |
243 | chr(0x110054) =~ /\s/; | |
244 | chr(0x110055) =~ /\S/; | |
245 | chr(0x110056) =~ /[[:word:]]/; | |
246 | chr(0x110057) =~ /[[:^word:]]/; | |
247 | chr(0x110058) =~ /[[:alnum:]]/; | |
248 | chr(0x110059) =~ /[[:^alnum:]]/; | |
249 | chr(0x11005A) =~ /[[:space:]]/; | |
250 | chr(0x11005B) =~ /[[:^space:]]/; | |
251 | chr(0x11005C) =~ /[[:digit:]]/; | |
252 | chr(0x11005D) =~ /[[:^digit:]]/; | |
253 | chr(0x11005E) =~ /[[:alpha:]]/; | |
254 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
255 | chr(0x110060) =~ /[[:ascii:]]/; | |
256 | chr(0x110061) =~ /[[:^ascii:]]/; | |
257 | chr(0x110062) =~ /[[:cntrl:]]/; | |
258 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
259 | chr(0x110064) =~ /[[:graph:]]/; | |
260 | chr(0x110065) =~ /[[:^graph:]]/; | |
261 | chr(0x110066) =~ /[[:lower:]]/; | |
262 | chr(0x110067) =~ /[[:^lower:]]/; | |
263 | chr(0x110068) =~ /[[:print:]]/; | |
264 | chr(0x110069) =~ /[[:^print:]]/; | |
265 | chr(0x11006A) =~ /[[:punct:]]/; | |
266 | chr(0x11006B) =~ /[[:^punct:]]/; | |
267 | chr(0x11006C) =~ /[[:upper:]]/; | |
268 | chr(0x11006D) =~ /[[:^upper:]]/; | |
269 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
270 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
271 | chr(0x110070) =~ /[[:blank:]]/; | |
272 | chr(0x110071) =~ /[[:^blank:]]/; | |
273 | chr(0x111000) =~ /[\W\p{Any}]/; | |
274 | chr(0x111010) =~ /[\W\p{PosixWord}]/; | |
275 | chr(0x111011) =~ /[\W\P{PosixWord}]/; | |
276 | chr(0x111012) =~ /[\W\p{XPosixWord}]/; | |
277 | chr(0x111013) =~ /[\W\P{XPosixWord}]/; | |
278 | chr(0x111014) =~ /[\W\p{PosixAlnum}]/; | |
279 | chr(0x111015) =~ /[\W\P{PosixAlnum}]/; | |
280 | chr(0x111016) =~ /[\W\p{XPosixAlnum}]/; | |
281 | chr(0x111017) =~ /[\W\P{XPosixAlnum}]/; | |
282 | chr(0x111018) =~ /[\W\p{PosixSpace}]/; | |
283 | chr(0x111019) =~ /[\W\P{PosixSpace}]/; | |
284 | chr(0x11101A) =~ /[\W\p{XPosixSpace}]/; | |
285 | chr(0x11101B) =~ /[\W\P{XPosixSpace}]/; | |
286 | chr(0x11101C) =~ /[\W\p{PosixDigit}]/; | |
287 | chr(0x11101D) =~ /[\W\P{PosixDigit}]/; | |
288 | chr(0x11101E) =~ /[\W\p{XPosixDigit}]/; | |
289 | chr(0x11101F) =~ /[\W\P{XPosixDigit}]/; | |
290 | chr(0x111020) =~ /[\W\p{PosixAlpha}]/; | |
291 | chr(0x111021) =~ /[\W\P{PosixAlpha}]/; | |
292 | chr(0x111022) =~ /[\W\p{XPosixAlpha}]/; | |
293 | chr(0x111023) =~ /[\W\P{XPosixAlpha}]/; | |
294 | chr(0x111024) =~ /[\W\p{Ascii}]/; | |
295 | chr(0x111025) =~ /[\W\P{Ascii}]/; | |
296 | chr(0x111026) =~ /[\W\p{PosixCntrl}]/; | |
297 | chr(0x111027) =~ /[\W\P{PosixCntrl}]/; | |
298 | chr(0x111028) =~ /[\W\p{XPosixCntrl}]/; | |
299 | chr(0x111029) =~ /[\W\P{XPosixCntrl}]/; | |
300 | chr(0x11102A) =~ /[\W\p{PosixGraph}]/; | |
301 | chr(0x11102B) =~ /[\W\P{PosixGraph}]/; | |
302 | chr(0x11102C) =~ /[\W\p{XPosixGraph}]/; | |
303 | chr(0x11102D) =~ /[\W\P{XPosixGraph}]/; | |
304 | chr(0x11102E) =~ /[\W\p{PosixLower}]/; | |
305 | chr(0x11102F) =~ /[\W\P{PosixLower}]/; | |
306 | chr(0x111030) =~ /[\W\p{XPosixLower}]/; | |
307 | chr(0x111031) =~ /[\W\P{XPosixLower}]/; | |
308 | chr(0x111032) =~ /[\W\p{PosixPrint}]/; | |
309 | chr(0x111033) =~ /[\W\P{PosixPrint}]/; | |
310 | chr(0x111034) =~ /[\W\p{XPosixPrint}]/; | |
311 | chr(0x111035) =~ /[\W\P{XPosixPrint}]/; | |
312 | chr(0x111036) =~ /[\W\p{PosixPunct}]/; | |
313 | chr(0x111037) =~ /[\W\P{PosixPunct}]/; | |
314 | chr(0x111038) =~ /[\W\p{XPosixPunct}]/; | |
315 | chr(0x111039) =~ /[\W\P{XPosixPunct}]/; | |
316 | chr(0x11103A) =~ /[\W\p{PosixUpper}]/; | |
317 | chr(0x11103B) =~ /[\W\P{PosixUpper}]/; | |
318 | chr(0x11103C) =~ /[\W\p{XPosixUpper}]/; | |
319 | chr(0x11103D) =~ /[\W\P{XPosixUpper}]/; | |
320 | chr(0x11103E) =~ /[\W\p{PosixXdigit}]/; | |
321 | chr(0x11103F) =~ /[\W\P{PosixXdigit}]/; | |
322 | chr(0x111040) =~ /[\W\p{XPosixXdigit}]/; | |
323 | chr(0x111041) =~ /[\W\P{XPosixXdigit}]/; | |
324 | chr(0x111042) =~ /[\W\p{PerlSpace}]/; | |
325 | chr(0x111043) =~ /[\W\P{PerlSpace}]/; | |
326 | chr(0x111044) =~ /[\W\p{XPerlSpace}]/; | |
327 | chr(0x111045) =~ /[\W\P{XPerlSpace}]/; | |
328 | chr(0x111046) =~ /[\W\p{PosixBlank}]/; | |
329 | chr(0x111047) =~ /[\W\P{PosixBlank}]/; | |
330 | chr(0x111048) =~ /[\W\p{XPosixBlank}]/; | |
331 | chr(0x111049) =~ /[\W\P{XPosixBlank}]/; | |
332 | chr(0x112000) =~ /[\S\p{Any}]/; | |
333 | chr(0x112010) =~ /[\S\p{PosixWord}]/; | |
334 | chr(0x112011) =~ /[\S\P{PosixWord}]/; | |
335 | chr(0x112012) =~ /[\S\p{XPosixWord}]/; | |
336 | chr(0x112013) =~ /[\S\P{XPosixWord}]/; | |
337 | chr(0x112014) =~ /[\S\p{PosixAlnum}]/; | |
338 | chr(0x112015) =~ /[\S\P{PosixAlnum}]/; | |
339 | chr(0x112016) =~ /[\S\p{XPosixAlnum}]/; | |
340 | chr(0x112017) =~ /[\S\P{XPosixAlnum}]/; | |
341 | chr(0x112018) =~ /[\S\p{PosixSpace}]/; | |
342 | chr(0x112019) =~ /[\S\P{PosixSpace}]/; | |
343 | chr(0x11201A) =~ /[\S\p{XPosixSpace}]/; | |
344 | chr(0x11201B) =~ /[\S\P{XPosixSpace}]/; | |
345 | chr(0x11201C) =~ /[\S\p{PosixDigit}]/; | |
346 | chr(0x11201D) =~ /[\S\P{PosixDigit}]/; | |
347 | chr(0x11201E) =~ /[\S\p{XPosixDigit}]/; | |
348 | chr(0x11201F) =~ /[\S\P{XPosixDigit}]/; | |
349 | chr(0x112020) =~ /[\S\p{PosixAlpha}]/; | |
350 | chr(0x112021) =~ /[\S\P{PosixAlpha}]/; | |
351 | chr(0x112022) =~ /[\S\p{XPosixAlpha}]/; | |
352 | chr(0x112023) =~ /[\S\P{XPosixAlpha}]/; | |
353 | chr(0x112024) =~ /[\S\p{Ascii}]/; | |
354 | chr(0x112025) =~ /[\S\P{Ascii}]/; | |
355 | chr(0x112026) =~ /[\S\p{PosixCntrl}]/; | |
356 | chr(0x112027) =~ /[\S\P{PosixCntrl}]/; | |
357 | chr(0x112028) =~ /[\S\p{XPosixCntrl}]/; | |
358 | chr(0x112029) =~ /[\S\P{XPosixCntrl}]/; | |
359 | chr(0x11202A) =~ /[\S\p{PosixGraph}]/; | |
360 | chr(0x11202B) =~ /[\S\P{PosixGraph}]/; | |
361 | chr(0x11202C) =~ /[\S\p{XPosixGraph}]/; | |
362 | chr(0x11202D) =~ /[\S\P{XPosixGraph}]/; | |
363 | chr(0x11202E) =~ /[\S\p{PosixLower}]/; | |
364 | chr(0x11202F) =~ /[\S\P{PosixLower}]/; | |
365 | chr(0x112030) =~ /[\S\p{XPosixLower}]/; | |
366 | chr(0x112031) =~ /[\S\P{XPosixLower}]/; | |
367 | chr(0x112032) =~ /[\S\p{PosixPrint}]/; | |
368 | chr(0x112033) =~ /[\S\P{PosixPrint}]/; | |
369 | chr(0x112034) =~ /[\S\p{XPosixPrint}]/; | |
370 | chr(0x112035) =~ /[\S\P{XPosixPrint}]/; | |
371 | chr(0x112036) =~ /[\S\p{PosixPunct}]/; | |
372 | chr(0x112037) =~ /[\S\P{PosixPunct}]/; | |
373 | chr(0x112038) =~ /[\S\p{XPosixPunct}]/; | |
374 | chr(0x112039) =~ /[\S\P{XPosixPunct}]/; | |
375 | chr(0x11203A) =~ /[\S\p{PosixUpper}]/; | |
376 | chr(0x11203B) =~ /[\S\P{PosixUpper}]/; | |
377 | chr(0x11203C) =~ /[\S\p{XPosixUpper}]/; | |
378 | chr(0x11203D) =~ /[\S\P{XPosixUpper}]/; | |
379 | chr(0x11203E) =~ /[\S\p{PosixXdigit}]/; | |
380 | chr(0x11203F) =~ /[\S\P{PosixXdigit}]/; | |
381 | chr(0x112040) =~ /[\S\p{XPosixXdigit}]/; | |
382 | chr(0x112041) =~ /[\S\P{XPosixXdigit}]/; | |
383 | chr(0x112042) =~ /[\S\p{PerlSpace}]/; | |
384 | chr(0x112043) =~ /[\S\P{PerlSpace}]/; | |
385 | chr(0x112044) =~ /[\S\p{XPerlSpace}]/; | |
386 | chr(0x112045) =~ /[\S\P{XPerlSpace}]/; | |
387 | chr(0x112046) =~ /[\S\p{PosixBlank}]/; | |
388 | chr(0x112047) =~ /[\S\P{PosixBlank}]/; | |
389 | chr(0x112048) =~ /[\S\p{XPosixBlank}]/; | |
390 | chr(0x112049) =~ /[\S\P{XPosixBlank}]/; | |
391 | chr(0x113000) =~ /[\x{110000}\p{Any}]/; | |
392 | chr(0x113010) =~ /[\x{110000}\p{PosixWord}]/; | |
393 | chr(0x113011) =~ /[\x{110000}\P{PosixWord}]/; | |
394 | chr(0x113012) =~ /[\x{110000}\p{XPosixWord}]/; | |
395 | chr(0x113013) =~ /[\x{110000}\P{XPosixWord}]/; | |
396 | chr(0x113014) =~ /[\x{110000}\p{PosixAlnum}]/; | |
397 | chr(0x113015) =~ /[\x{110000}\P{PosixAlnum}]/; | |
398 | chr(0x113016) =~ /[\x{110000}\p{XPosixAlnum}]/; | |
399 | chr(0x113017) =~ /[\x{110000}\P{XPosixAlnum}]/; | |
400 | chr(0x113018) =~ /[\x{110000}\p{PosixSpace}]/; | |
401 | chr(0x113019) =~ /[\x{110000}\P{PosixSpace}]/; | |
402 | chr(0x11301A) =~ /[\x{110000}\p{XPosixSpace}]/; | |
403 | chr(0x11301B) =~ /[\x{110000}\P{XPosixSpace}]/; | |
404 | chr(0x11301C) =~ /[\x{110000}\p{PosixDigit}]/; | |
405 | chr(0x11301D) =~ /[\x{110000}\P{PosixDigit}]/; | |
406 | chr(0x11301E) =~ /[\x{110000}\p{XPosixDigit}]/; | |
407 | chr(0x11301F) =~ /[\x{110000}\P{XPosixDigit}]/; | |
408 | chr(0x113020) =~ /[\x{110000}\p{PosixAlpha}]/; | |
409 | chr(0x113021) =~ /[\x{110000}\P{PosixAlpha}]/; | |
410 | chr(0x113022) =~ /[\x{110000}\p{XPosixAlpha}]/; | |
411 | chr(0x113023) =~ /[\x{110000}\P{XPosixAlpha}]/; | |
412 | chr(0x113024) =~ /[\x{110000}\p{Ascii}]/; | |
413 | chr(0x113025) =~ /[\x{110000}\P{Ascii}]/; | |
414 | chr(0x113026) =~ /[\x{110000}\p{PosixCntrl}]/; | |
415 | chr(0x113027) =~ /[\x{110000}\P{PosixCntrl}]/; | |
416 | chr(0x113028) =~ /[\x{110000}\p{XPosixCntrl}]/; | |
417 | chr(0x113029) =~ /[\x{110000}\P{XPosixCntrl}]/; | |
418 | chr(0x11302A) =~ /[\x{110000}\p{PosixGraph}]/; | |
419 | chr(0x11302B) =~ /[\x{110000}\P{PosixGraph}]/; | |
420 | chr(0x11302C) =~ /[\x{110000}\p{XPosixGraph}]/; | |
421 | chr(0x11302D) =~ /[\x{110000}\P{XPosixGraph}]/; | |
422 | chr(0x11302E) =~ /[\x{110000}\p{PosixLower}]/; | |
423 | chr(0x11302F) =~ /[\x{110000}\P{PosixLower}]/; | |
424 | chr(0x113030) =~ /[\x{110000}\p{XPosixLower}]/; | |
425 | chr(0x113031) =~ /[\x{110000}\P{XPosixLower}]/; | |
426 | chr(0x113032) =~ /[\x{110000}\p{PosixPrint}]/; | |
427 | chr(0x113033) =~ /[\x{110000}\P{PosixPrint}]/; | |
428 | chr(0x113034) =~ /[\x{110000}\p{XPosixPrint}]/; | |
429 | chr(0x113035) =~ /[\x{110000}\P{XPosixPrint}]/; | |
430 | chr(0x113036) =~ /[\x{110000}\p{PosixPunct}]/; | |
431 | chr(0x113037) =~ /[\x{110000}\P{PosixPunct}]/; | |
432 | chr(0x113038) =~ /[\x{110000}\p{XPosixPunct}]/; | |
433 | chr(0x113039) =~ /[\x{110000}\P{XPosixPunct}]/; | |
434 | chr(0x11303A) =~ /[\x{110000}\p{PosixUpper}]/; | |
435 | chr(0x11303B) =~ /[\x{110000}\P{PosixUpper}]/; | |
436 | chr(0x11303C) =~ /[\x{110000}\p{XPosixUpper}]/; | |
437 | chr(0x11303D) =~ /[\x{110000}\P{XPosixUpper}]/; | |
438 | chr(0x11303E) =~ /[\x{110000}\p{PosixXdigit}]/; | |
439 | chr(0x11303F) =~ /[\x{110000}\P{PosixXdigit}]/; | |
440 | chr(0x113040) =~ /[\x{110000}\p{XPosixXdigit}]/; | |
441 | chr(0x113041) =~ /[\x{110000}\P{XPosixXdigit}]/; | |
442 | chr(0x113042) =~ /[\x{110000}\p{PerlSpace}]/; | |
443 | chr(0x113043) =~ /[\x{110000}\P{PerlSpace}]/; | |
444 | chr(0x113044) =~ /[\x{110000}\p{XPerlSpace}]/; | |
445 | chr(0x113045) =~ /[\x{110000}\P{XPerlSpace}]/; | |
446 | chr(0x113046) =~ /[\x{110000}\p{PosixBlank}]/; | |
447 | chr(0x113047) =~ /[\x{110000}\P{PosixBlank}]/; | |
448 | chr(0x113048) =~ /[\x{110000}\p{XPosixBlank}]/; | |
449 | chr(0x113049) =~ /[\x{110000}\P{XPosixBlank}]/; | |
9ae3ac1a KW |
450 | no warnings 'utf8'; |
451 | chr(0xD7FF) =~ /\p{Any}/; | |
452 | chr(0xD800) =~ /\p{Any}/; | |
453 | chr(0xDFFF) =~ /\p{Any}/; | |
454 | chr(0xE000) =~ /\p{Any}/; | |
455 | chr(0xFEFF) =~ /\p{Any}/; | |
456 | chr(0xFFFD) =~ /\p{Any}/; | |
457 | chr(0xFFFE) =~ /\p{Any}/; | |
458 | chr(0xFFFF) =~ /\p{Any}/; | |
459 | chr(0x10000) =~ /\p{Any}/; | |
460 | chr(0x100000) =~ /\p{Any}/; | |
461 | chr(0x10FFFE) =~ /\p{Any}/; | |
462 | chr(0x10FFFF) =~ /\p{Any}/; | |
463 | chr(0x110000) =~ /\p{Any}/; | |
5073ffbd KW |
464 | chr(0x110010) =~ /\p{PosixWord}/; |
465 | chr(0x110011) =~ /\P{PosixWord}/; | |
466 | chr(0x110012) =~ /\p{XPosixWord}/; | |
467 | chr(0x110013) =~ /\P{XPosixWord}/; | |
468 | chr(0x110014) =~ /\p{PosixAlnum}/; | |
469 | chr(0x110015) =~ /\P{PosixAlnum}/; | |
470 | chr(0x110016) =~ /\p{XPosixAlnum}/; | |
471 | chr(0x110017) =~ /\P{XPosixAlnum}/; | |
472 | chr(0x110018) =~ /\p{PosixSpace}/; | |
473 | chr(0x110019) =~ /\P{PosixSpace}/; | |
474 | chr(0x11001A) =~ /\p{XPosixSpace}/; | |
475 | chr(0x11001B) =~ /\P{XPosixSpace}/; | |
476 | chr(0x11001C) =~ /\p{PosixDigit}/; | |
477 | chr(0x11001D) =~ /\P{PosixDigit}/; | |
478 | chr(0x11001E) =~ /\p{XPosixDigit}/; | |
479 | chr(0x11001F) =~ /\P{XPosixDigit}/; | |
480 | chr(0x110020) =~ /\p{PosixAlpha}/; | |
481 | chr(0x110021) =~ /\P{PosixAlpha}/; | |
482 | chr(0x110022) =~ /\p{XPosixAlpha}/; | |
483 | chr(0x110023) =~ /\P{XPosixAlpha}/; | |
484 | chr(0x110024) =~ /\p{Ascii}/; | |
485 | chr(0x110025) =~ /\P{Ascii}/; | |
486 | chr(0x110026) =~ /\p{PosixCntrl}/; | |
487 | chr(0x110027) =~ /\P{PosixCntrl}/; | |
488 | chr(0x110028) =~ /\p{XPosixCntrl}/; | |
489 | chr(0x110029) =~ /\P{XPosixCntrl}/; | |
490 | chr(0x11002A) =~ /\p{PosixGraph}/; | |
491 | chr(0x11002B) =~ /\P{PosixGraph}/; | |
492 | chr(0x11002C) =~ /\p{XPosixGraph}/; | |
493 | chr(0x11002D) =~ /\P{XPosixGraph}/; | |
494 | chr(0x11002E) =~ /\p{PosixLower}/; | |
495 | chr(0x11002F) =~ /\P{PosixLower}/; | |
496 | chr(0x110030) =~ /\p{XPosixLower}/; | |
497 | chr(0x110031) =~ /\P{XPosixLower}/; | |
498 | chr(0x110032) =~ /\p{PosixPrint}/; | |
499 | chr(0x110033) =~ /\P{PosixPrint}/; | |
500 | chr(0x110034) =~ /\p{XPosixPrint}/; | |
501 | chr(0x110035) =~ /\P{XPosixPrint}/; | |
502 | chr(0x110036) =~ /\p{PosixPunct}/; | |
503 | chr(0x110037) =~ /\P{PosixPunct}/; | |
504 | chr(0x110038) =~ /\p{XPosixPunct}/; | |
505 | chr(0x110039) =~ /\P{XPosixPunct}/; | |
506 | chr(0x11003A) =~ /\p{PosixUpper}/; | |
507 | chr(0x11003B) =~ /\P{PosixUpper}/; | |
508 | chr(0x11003C) =~ /\p{XPosixUpper}/; | |
509 | chr(0x11003D) =~ /\P{XPosixUpper}/; | |
510 | chr(0x11003E) =~ /\p{PosixXdigit}/; | |
511 | chr(0x11003F) =~ /\P{PosixXdigit}/; | |
512 | chr(0x110040) =~ /\p{XPosixXdigit}/; | |
513 | chr(0x110041) =~ /\P{XPosixXdigit}/; | |
514 | chr(0x110042) =~ /\p{PerlSpace}/; | |
515 | chr(0x110043) =~ /\P{PerlSpace}/; | |
516 | chr(0x110044) =~ /\p{XPerlSpace}/; | |
517 | chr(0x110045) =~ /\P{XPerlSpace}/; | |
518 | chr(0x110046) =~ /\p{PosixBlank}/; | |
519 | chr(0x110047) =~ /\P{PosixBlank}/; | |
520 | chr(0x110048) =~ /\p{XPosixBlank}/; | |
521 | chr(0x110049) =~ /\P{XPosixBlank}/; | |
522 | chr(0x110050) =~ /\w/; | |
523 | chr(0x110051) =~ /\W/; | |
524 | chr(0x110052) =~ /\d/; | |
525 | chr(0x110053) =~ /\D/; | |
526 | chr(0x110054) =~ /\s/; | |
527 | chr(0x110055) =~ /\S/; | |
528 | chr(0x110056) =~ /[[:word:]]/; | |
529 | chr(0x110057) =~ /[[:^word:]]/; | |
530 | chr(0x110058) =~ /[[:alnum:]]/; | |
531 | chr(0x110059) =~ /[[:^alnum:]]/; | |
532 | chr(0x11005A) =~ /[[:space:]]/; | |
533 | chr(0x11005B) =~ /[[:^space:]]/; | |
534 | chr(0x11005C) =~ /[[:digit:]]/; | |
535 | chr(0x11005D) =~ /[[:^digit:]]/; | |
536 | chr(0x11005E) =~ /[[:alpha:]]/; | |
537 | chr(0x11005F) =~ /[[:^alpha:]]/; | |
538 | chr(0x110060) =~ /[[:ascii:]]/; | |
539 | chr(0x110061) =~ /[[:^ascii:]]/; | |
540 | chr(0x110062) =~ /[[:cntrl:]]/; | |
541 | chr(0x110063) =~ /[[:^cntrl:]]/; | |
542 | chr(0x110064) =~ /[[:graph:]]/; | |
543 | chr(0x110065) =~ /[[:^graph:]]/; | |
544 | chr(0x110066) =~ /[[:lower:]]/; | |
545 | chr(0x110067) =~ /[[:^lower:]]/; | |
546 | chr(0x110068) =~ /[[:print:]]/; | |
547 | chr(0x110069) =~ /[[:^print:]]/; | |
548 | chr(0x11006A) =~ /[[:punct:]]/; | |
549 | chr(0x11006B) =~ /[[:^punct:]]/; | |
550 | chr(0x11006C) =~ /[[:upper:]]/; | |
551 | chr(0x11006D) =~ /[[:^upper:]]/; | |
552 | chr(0x11006E) =~ /[[:xdigit:]]/; | |
553 | chr(0x11006F) =~ /[[:^xdigit:]]/; | |
554 | chr(0x110070) =~ /[[:blank:]]/; | |
555 | chr(0x110071) =~ /[[:^blank:]]/; | |
9ae3ac1a | 556 | EXPECT |
c634fdd3 | 557 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14. |
5073ffbd KW |
558 | Code point 0x110010 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 15. |
559 | Code point 0x110011 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 16. | |
560 | Code point 0x110011 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 16. | |
561 | Code point 0x110012 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 17. | |
562 | Code point 0x110013 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 18. | |
563 | Code point 0x110013 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 18. | |
564 | Code point 0x110014 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 19. | |
565 | Code point 0x110015 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 20. | |
566 | Code point 0x110015 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 20. | |
567 | Code point 0x110016 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 21. | |
568 | Code point 0x110017 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 22. | |
569 | Code point 0x110017 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 22. | |
570 | Code point 0x110018 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 23. | |
571 | Code point 0x110019 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 24. | |
572 | Code point 0x110019 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 24. | |
573 | Code point 0x11001A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 25. | |
574 | Code point 0x11001B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 26. | |
575 | Code point 0x11001B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 26. | |
576 | Code point 0x11001C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 27. | |
577 | Code point 0x11001D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 28. | |
578 | Code point 0x11001D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 28. | |
579 | Code point 0x11001E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 29. | |
580 | Code point 0x11001F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 30. | |
581 | Code point 0x11001F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 30. | |
582 | Code point 0x110020 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 31. | |
583 | Code point 0x110021 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 32. | |
584 | Code point 0x110021 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 32. | |
585 | Code point 0x110022 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 33. | |
586 | Code point 0x110023 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 34. | |
587 | Code point 0x110023 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 34. | |
588 | Code point 0x110024 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 35. | |
589 | Code point 0x110025 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 36. | |
590 | Code point 0x110025 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 36. | |
591 | Code point 0x110026 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 37. | |
592 | Code point 0x110027 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 38. | |
593 | Code point 0x110027 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 38. | |
594 | Code point 0x110028 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 39. | |
595 | Code point 0x110029 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 40. | |
596 | Code point 0x110029 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 40. | |
597 | Code point 0x11002A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 41. | |
598 | Code point 0x11002B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 42. | |
599 | Code point 0x11002B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 42. | |
600 | Code point 0x11002C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 43. | |
601 | Code point 0x11002D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 44. | |
602 | Code point 0x11002D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 44. | |
603 | Code point 0x11002E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 45. | |
604 | Code point 0x11002F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 46. | |
605 | Code point 0x11002F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 46. | |
606 | Code point 0x110030 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 47. | |
607 | Code point 0x110031 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 48. | |
608 | Code point 0x110031 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 48. | |
609 | Code point 0x110032 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 49. | |
610 | Code point 0x110033 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 50. | |
611 | Code point 0x110033 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 50. | |
612 | Code point 0x110034 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 51. | |
613 | Code point 0x110035 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 52. | |
614 | Code point 0x110035 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 52. | |
615 | Code point 0x110036 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 53. | |
616 | Code point 0x110037 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 54. | |
617 | Code point 0x110037 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 54. | |
618 | Code point 0x110038 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 55. | |
619 | Code point 0x110039 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 56. | |
620 | Code point 0x110039 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 56. | |
621 | Code point 0x11003A is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 57. | |
622 | Code point 0x11003B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 58. | |
623 | Code point 0x11003B is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 58. | |
624 | Code point 0x11003C is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 59. | |
625 | Code point 0x11003D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 60. | |
626 | Code point 0x11003D is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 60. | |
627 | Code point 0x11003E is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 61. | |
628 | Code point 0x11003F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 62. | |
629 | Code point 0x11003F is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 62. | |
630 | Code point 0x110040 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 63. | |
631 | Code point 0x110041 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 64. | |
632 | Code point 0x110041 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 64. | |
633 | Code point 0x110042 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 65. | |
634 | Code point 0x110043 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 66. | |
635 | Code point 0x110043 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 66. | |
636 | Code point 0x110044 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 67. | |
637 | Code point 0x110045 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 68. | |
638 | Code point 0x110045 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 68. | |
639 | Code point 0x110046 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 69. | |
640 | Code point 0x110047 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 70. | |
641 | Code point 0x110047 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 70. | |
642 | Code point 0x110048 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 71. | |
643 | Code point 0x110049 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 72. | |
644 | Code point 0x110049 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 72. | |
9ae3ac1a | 645 | ######## |
e9b08962 | 646 | # NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class) |
ae986089 KW |
647 | use warnings 'non_unicode'; |
648 | "\x{110000}" =~ /b?\p{Space}/; | |
649 | EXPECT | |
650 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. | |
651 | ######## | |
652 | # NAME Matching POSIX class property against above-Unicode code point doesn't output a warning | |
653 | use warnings 'non_unicode'; | |
654 | "\x{110000}" =~ /b?[[:space:]]/; | |
655 | EXPECT | |
656 | ######## | |
8457b38f KW |
657 | use warnings 'utf8'; |
658 | chr(0x110000) =~ /\p{Any}/; | |
659 | no warnings 'non_unicode'; | |
660 | chr(0x110000) =~ /\p{Any}/; | |
661 | EXPECT | |
c634fdd3 | 662 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. |
8457b38f | 663 | ######## |
5073ffbd KW |
664 | # TODO optimized regnode should still give warnings |
665 | use warnings 'utf8'; | |
845e7aa3 | 666 | chr(0x110000) =~ /\p{lb=cr}/; |
5073ffbd | 667 | no warnings 'non_unicode'; |
845e7aa3 | 668 | chr(0x110000) =~ /\p{lb=cr}/; |
5073ffbd KW |
669 | EXPECT |
670 | Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. | |
671 | ######## | |
9ae3ac1a KW |
672 | require "../test.pl"; |
673 | use warnings 'utf8'; | |
a410ec23 | 674 | sub Is_Super { return '!utf8::Any' } |
88d45d28 KW |
675 | # The extra char is to avoid an optimization that avoids the problem when the |
676 | # property is the only non-latin1 char in a class | |
677 | print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; | |
a410ec23 KW |
678 | EXPECT |
679 | 1 | |
680 | ######## | |
681 | require "../test.pl"; | |
682 | use warnings 'utf8'; | |
9ae3ac1a KW |
683 | my $file = tempfile(); |
684 | open(my $fh, "+>:utf8", $file); | |
685 | print $fh "\x{D7FF}", "\n"; | |
686 | print $fh "\x{D800}", "\n"; | |
687 | print $fh "\x{DFFF}", "\n"; | |
688 | print $fh "\x{E000}", "\n"; | |
689 | print $fh "\x{FDCF}", "\n"; | |
690 | print $fh "\x{FDD0}", "\n"; | |
691 | print $fh "\x{FDEF}", "\n"; | |
692 | print $fh "\x{FDF0}", "\n"; | |
693 | print $fh "\x{FEFF}", "\n"; | |
694 | print $fh "\x{FFFD}", "\n"; | |
695 | print $fh "\x{FFFE}", "\n"; | |
696 | print $fh "\x{FFFF}", "\n"; | |
697 | print $fh "\x{10000}", "\n"; | |
698 | print $fh "\x{1FFFE}", "\n"; | |
699 | print $fh "\x{1FFFF}", "\n"; | |
700 | print $fh "\x{2FFFE}", "\n"; | |
701 | print $fh "\x{2FFFF}", "\n"; | |
702 | print $fh "\x{3FFFE}", "\n"; | |
703 | print $fh "\x{3FFFF}", "\n"; | |
704 | print $fh "\x{4FFFE}", "\n"; | |
705 | print $fh "\x{4FFFF}", "\n"; | |
706 | print $fh "\x{5FFFE}", "\n"; | |
707 | print $fh "\x{5FFFF}", "\n"; | |
708 | print $fh "\x{6FFFE}", "\n"; | |
709 | print $fh "\x{6FFFF}", "\n"; | |
710 | print $fh "\x{7FFFE}", "\n"; | |
711 | print $fh "\x{7FFFF}", "\n"; | |
712 | print $fh "\x{8FFFE}", "\n"; | |
713 | print $fh "\x{8FFFF}", "\n"; | |
714 | print $fh "\x{9FFFE}", "\n"; | |
715 | print $fh "\x{9FFFF}", "\n"; | |
716 | print $fh "\x{AFFFE}", "\n"; | |
717 | print $fh "\x{AFFFF}", "\n"; | |
718 | print $fh "\x{BFFFE}", "\n"; | |
719 | print $fh "\x{BFFFF}", "\n"; | |
720 | print $fh "\x{CFFFE}", "\n"; | |
721 | print $fh "\x{CFFFF}", "\n"; | |
722 | print $fh "\x{DFFFE}", "\n"; | |
723 | print $fh "\x{DFFFF}", "\n"; | |
724 | print $fh "\x{EFFFE}", "\n"; | |
725 | print $fh "\x{EFFFF}", "\n"; | |
726 | print $fh "\x{FFFFE}", "\n"; | |
727 | print $fh "\x{FFFFF}", "\n"; | |
728 | print $fh "\x{100000}", "\n"; | |
729 | print $fh "\x{10FFFE}", "\n"; | |
730 | print $fh "\x{10FFFF}", "\n"; | |
731 | print $fh "\x{110000}", "\n"; | |
732 | close $fh; | |
733 | EXPECT | |
734 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
735 | Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. | |
736 | Unicode non-character U+FDD0 is illegal for open interchange at - line 10. | |
737 | Unicode non-character U+FDEF is illegal for open interchange at - line 11. | |
738 | Unicode non-character U+FFFE is illegal for open interchange at - line 15. | |
739 | Unicode non-character U+FFFF is illegal for open interchange at - line 16. | |
740 | Unicode non-character U+1FFFE is illegal for open interchange at - line 18. | |
741 | Unicode non-character U+1FFFF is illegal for open interchange at - line 19. | |
742 | Unicode non-character U+2FFFE is illegal for open interchange at - line 20. | |
743 | Unicode non-character U+2FFFF is illegal for open interchange at - line 21. | |
744 | Unicode non-character U+3FFFE is illegal for open interchange at - line 22. | |
745 | Unicode non-character U+3FFFF is illegal for open interchange at - line 23. | |
746 | Unicode non-character U+4FFFE is illegal for open interchange at - line 24. | |
747 | Unicode non-character U+4FFFF is illegal for open interchange at - line 25. | |
748 | Unicode non-character U+5FFFE is illegal for open interchange at - line 26. | |
749 | Unicode non-character U+5FFFF is illegal for open interchange at - line 27. | |
750 | Unicode non-character U+6FFFE is illegal for open interchange at - line 28. | |
751 | Unicode non-character U+6FFFF is illegal for open interchange at - line 29. | |
752 | Unicode non-character U+7FFFE is illegal for open interchange at - line 30. | |
753 | Unicode non-character U+7FFFF is illegal for open interchange at - line 31. | |
754 | Unicode non-character U+8FFFE is illegal for open interchange at - line 32. | |
755 | Unicode non-character U+8FFFF is illegal for open interchange at - line 33. | |
756 | Unicode non-character U+9FFFE is illegal for open interchange at - line 34. | |
757 | Unicode non-character U+9FFFF is illegal for open interchange at - line 35. | |
758 | Unicode non-character U+AFFFE is illegal for open interchange at - line 36. | |
759 | Unicode non-character U+AFFFF is illegal for open interchange at - line 37. | |
760 | Unicode non-character U+BFFFE is illegal for open interchange at - line 38. | |
761 | Unicode non-character U+BFFFF is illegal for open interchange at - line 39. | |
762 | Unicode non-character U+CFFFE is illegal for open interchange at - line 40. | |
763 | Unicode non-character U+CFFFF is illegal for open interchange at - line 41. | |
764 | Unicode non-character U+DFFFE is illegal for open interchange at - line 42. | |
765 | Unicode non-character U+DFFFF is illegal for open interchange at - line 43. | |
766 | Unicode non-character U+EFFFE is illegal for open interchange at - line 44. | |
767 | Unicode non-character U+EFFFF is illegal for open interchange at - line 45. | |
768 | Unicode non-character U+FFFFE is illegal for open interchange at - line 46. | |
769 | Unicode non-character U+FFFFF is illegal for open interchange at - line 47. | |
770 | Unicode non-character U+10FFFE is illegal for open interchange at - line 49. | |
771 | Unicode non-character U+10FFFF is illegal for open interchange at - line 50. | |
772 | Code point 0x110000 is not Unicode, may not be portable at - line 51. | |
773 | ######## | |
774 | require "../test.pl"; | |
8457b38f KW |
775 | use warnings 'utf8'; |
776 | my $file = tempfile(); | |
777 | open(my $fh, "+>:utf8", $file); | |
778 | print $fh "\x{D800}", "\n"; | |
779 | print $fh "\x{FFFF}", "\n"; | |
780 | print $fh "\x{110000}", "\n"; | |
781 | close $fh; | |
782 | EXPECT | |
783 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
784 | Unicode non-character U+FFFF is illegal for open interchange at - line 6. | |
785 | Code point 0x110000 is not Unicode, may not be portable at - line 7. | |
786 | ######## | |
787 | require "../test.pl"; | |
788 | use warnings 'utf8'; | |
789 | no warnings 'surrogate'; | |
790 | my $file = tempfile(); | |
791 | open(my $fh, "+>:utf8", $file); | |
792 | print $fh "\x{D800}", "\n"; | |
793 | print $fh "\x{FFFF}", "\n"; | |
794 | print $fh "\x{110000}", "\n"; | |
795 | close $fh; | |
796 | EXPECT | |
797 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
798 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
799 | ######## | |
800 | require "../test.pl"; | |
801 | use warnings 'utf8'; | |
802 | no warnings 'nonchar'; | |
803 | my $file = tempfile(); | |
804 | open(my $fh, "+>:utf8", $file); | |
805 | print $fh "\x{D800}", "\n"; | |
806 | print $fh "\x{FFFF}", "\n"; | |
807 | print $fh "\x{110000}", "\n"; | |
808 | close $fh; | |
809 | EXPECT | |
810 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
811 | Code point 0x110000 is not Unicode, may not be portable at - line 8. | |
812 | ######## | |
813 | require "../test.pl"; | |
814 | use warnings 'utf8'; | |
815 | no warnings 'non_unicode'; | |
816 | my $file = tempfile(); | |
817 | open(my $fh, "+>:utf8", $file); | |
818 | print $fh "\x{D800}", "\n"; | |
819 | print $fh "\x{FFFF}", "\n"; | |
820 | print $fh "\x{110000}", "\n"; | |
821 | close $fh; | |
822 | EXPECT | |
823 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. | |
824 | Unicode non-character U+FFFF is illegal for open interchange at - line 7. | |
825 | ######## | |
920e47bb AC |
826 | # NAME C<use warnings "nonchar"> works in isolation |
827 | require "../test.pl"; | |
828 | use warnings 'nonchar'; | |
829 | my $file = tempfile(); | |
830 | open(my $fh, "+>:utf8", $file); | |
831 | print $fh "\x{FFFF}", "\n"; | |
832 | close $fh; | |
833 | EXPECT | |
834 | Unicode non-character U+FFFF is illegal for open interchange at - line 5. | |
835 | ######## | |
920e47bb AC |
836 | # NAME C<use warnings "surrogate"> works in isolation |
837 | require "../test.pl"; | |
838 | use warnings 'surrogate'; | |
839 | my $file = tempfile(); | |
840 | open(my $fh, "+>:utf8", $file); | |
841 | print $fh "\x{D800}", "\n"; | |
842 | close $fh; | |
843 | EXPECT | |
844 | Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. | |
845 | ######## | |
920e47bb AC |
846 | # NAME C<use warnings "non_unicode"> works in isolation |
847 | require "../test.pl"; | |
848 | use warnings 'non_unicode'; | |
849 | my $file = tempfile(); | |
850 | open(my $fh, "+>:utf8", $file); | |
851 | print $fh "\x{110000}", "\n"; | |
852 | close $fh; | |
853 | EXPECT | |
854 | Code point 0x110000 is not Unicode, may not be portable at - line 5. | |
855 | ######## | |
8457b38f | 856 | require "../test.pl"; |
9ae3ac1a KW |
857 | no warnings 'utf8'; |
858 | my $file = tempfile(); | |
859 | open(my $fh, "+>:utf8", $file); | |
860 | print $fh "\x{D7FF}", "\n"; | |
861 | print $fh "\x{D800}", "\n"; | |
862 | print $fh "\x{DFFF}", "\n"; | |
863 | print $fh "\x{E000}", "\n"; | |
864 | print $fh "\x{FDCF}", "\n"; | |
865 | print $fh "\x{FDD0}", "\n"; | |
866 | print $fh "\x{FDEF}", "\n"; | |
867 | print $fh "\x{FDF0}", "\n"; | |
868 | print $fh "\x{FEFF}", "\n"; | |
869 | print $fh "\x{FFFD}", "\n"; | |
870 | print $fh "\x{FFFE}", "\n"; | |
871 | print $fh "\x{FFFF}", "\n"; | |
872 | print $fh "\x{10000}", "\n"; | |
873 | print $fh "\x{1FFFE}", "\n"; | |
874 | print $fh "\x{1FFFF}", "\n"; | |
875 | print $fh "\x{2FFFE}", "\n"; | |
876 | print $fh "\x{2FFFF}", "\n"; | |
877 | print $fh "\x{3FFFE}", "\n"; | |
878 | print $fh "\x{3FFFF}", "\n"; | |
879 | print $fh "\x{4FFFE}", "\n"; | |
880 | print $fh "\x{4FFFF}", "\n"; | |
881 | print $fh "\x{5FFFE}", "\n"; | |
882 | print $fh "\x{5FFFF}", "\n"; | |
883 | print $fh "\x{6FFFE}", "\n"; | |
884 | print $fh "\x{6FFFF}", "\n"; | |
885 | print $fh "\x{7FFFE}", "\n"; | |
886 | print $fh "\x{7FFFF}", "\n"; | |
887 | print $fh "\x{8FFFE}", "\n"; | |
888 | print $fh "\x{8FFFF}", "\n"; | |
889 | print $fh "\x{9FFFE}", "\n"; | |
890 | print $fh "\x{9FFFF}", "\n"; | |
891 | print $fh "\x{AFFFE}", "\n"; | |
892 | print $fh "\x{AFFFF}", "\n"; | |
893 | print $fh "\x{BFFFE}", "\n"; | |
894 | print $fh "\x{BFFFF}", "\n"; | |
895 | print $fh "\x{CFFFE}", "\n"; | |
896 | print $fh "\x{CFFFF}", "\n"; | |
897 | print $fh "\x{DFFFE}", "\n"; | |
898 | print $fh "\x{DFFFF}", "\n"; | |
899 | print $fh "\x{EFFFE}", "\n"; | |
900 | print $fh "\x{EFFFF}", "\n"; | |
901 | print $fh "\x{FFFFE}", "\n"; | |
902 | print $fh "\x{FFFFF}", "\n"; | |
903 | print $fh "\x{100000}", "\n"; | |
904 | print $fh "\x{10FFFE}", "\n"; | |
905 | print $fh "\x{10FFFF}", "\n"; | |
906 | print $fh "\x{110000}", "\n"; | |
907 | close $fh; | |
62961d2e | 908 | EXPECT |