Commit | Line | Data |
---|---|---|
7899b636 KW |
1 | =provides |
2 | ||
3 | __UNDEFINED__ | |
4 | utf8_to_uvchr_buf | |
5 | ||
6 | =implementation | |
7 | ||
8 | #define D_PPP_MIN(a,b) (((a) <= (b)) ? (a) : (b)) | |
9 | ||
10 | __UNDEFINED__ UNICODE_REPLACEMENT 0xFFFD | |
11 | ||
12 | __UNDEFINED__ UTF8_MAXBYTES UTF8_MAXLEN | |
13 | ||
14 | __UNDEFINED__ UTF8_ALLOW_ANYUV 0 | |
15 | __UNDEFINED__ UTF8_ALLOW_EMPTY 0x0001 | |
16 | __UNDEFINED__ UTF8_ALLOW_CONTINUATION 0x0002 | |
17 | __UNDEFINED__ UTF8_ALLOW_NON_CONTINUATION 0x0004 | |
18 | __UNDEFINED__ UTF8_ALLOW_SHORT 0x0008 | |
19 | __UNDEFINED__ UTF8_ALLOW_LONG 0x0010 | |
20 | __UNDEFINED__ UTF8_ALLOW_OVERFLOW 0x0080 | |
21 | __UNDEFINED__ UTF8_ALLOW_ANY ( UTF8_ALLOW_CONTINUATION \ | |
22 | |UTF8_ALLOW_NON_CONTINUATION \ | |
23 | |UTF8_ALLOW_SHORT \ | |
24 | |UTF8_ALLOW_LONG \ | |
25 | |UTF8_ALLOW_OVERFLOW) | |
26 | ||
27 | #if defined UTF8SKIP | |
28 | ||
29 | /* Don't use official version because it uses MIN, which may not be available */ | |
30 | #undef UTF8_SAFE_SKIP | |
31 | ||
32 | __UNDEFINED__ UTF8_SAFE_SKIP(s, e) ( \ | |
33 | ((((e) - (s)) <= 0) \ | |
34 | ? 0 \ | |
35 | : D_PPP_MIN(((e) - (s)), UTF8SKIP(s)))) | |
36 | #endif | |
37 | ||
d2b44e2b KW |
38 | #ifdef is_ascii_string |
39 | __UNDEFINED__ is_invariant_string(s,l) is_ascii_string(s,l) | |
40 | __UNDEFINED__ is_utf8_invariant_string(s,l) is_ascii_string(s,l) | |
41 | ||
42 | /* Hint: is_utf8_invariant_string | |
43 | Please use this instead of is_ascii_string or is_invariant_string | |
44 | */ | |
45 | #endif | |
46 | ||
2ff9e5e8 KW |
47 | #ifdef ibcmp_utf8 |
48 | __UNDEFINED__ foldEQ_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2) \ | |
49 | cBOOL(! ibcmp_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2)) | |
50 | #endif | |
51 | ||
7899b636 KW |
52 | #if defined(is_utf8_string) && defined(UTF8SKIP) |
53 | __UNDEFINED__ isUTF8_CHAR(s0, e) ( \ | |
54 | (e) <= (s0) || ! is_utf8_string(s0, D_PPP_MIN(UTF8SKIP(s0), (e) - (s0))) \ | |
55 | ? 0 \ | |
56 | : UTF8SKIP(s0)) | |
57 | #endif | |
58 | ||
8fc7db65 KW |
59 | #if 'A' == 65 |
60 | __UNDEFINED__ BOM_UTF8 "\xEF\xBB\xBF" | |
61 | __UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD" | |
62 | #elif '^' == 95 | |
63 | __UNDEFINED__ BOM_UTF8 "\xDD\x73\x66\x73" | |
64 | __UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71" | |
65 | #elif '^' == 176 | |
66 | __UNDEFINED__ BOM_UTF8 "\xDD\x72\x65\x72" | |
67 | __UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70" | |
68 | #else | |
69 | # error Unknown character set | |
70 | #endif | |
71 | ||
7899b636 KW |
72 | #if { VERSION < 5.31.3 } |
73 | /* Versions prior to this accepted things that are now considered | |
74 | * malformations, and didn't return -1 on error with warnings enabled | |
75 | * */ | |
76 | # undef utf8_to_uvchr_buf | |
77 | #endif | |
78 | ||
79 | /* This implementation brings modern, generally more restricted standards to | |
80 | * utf8_to_uvchr_buf. Some of these are security related, and clearly must | |
81 | * be done. But its arguable that the others need not, and hence should not. | |
82 | * The reason they're here is that a module that intends to play with the | |
83 | * latest perls should be able to work the same in all releases. An example is | |
84 | * that perl no longer accepts any UV for a code point, but limits them to | |
85 | * IV_MAX or below. This is for future internal use of the larger code points. | |
86 | * If it turns out that some of these changes are breaking code that isn't | |
87 | * intended to work with modern perls, the tighter restrictions could be | |
88 | * relaxed. khw thinks this is unlikely, but has been wrong in the past. */ | |
89 | ||
90 | /* 5.6.0 is the first release with UTF-8, and we don't implement this function | |
91 | * there due to its likely lack of still being in use, and the underlying | |
92 | * implementation is very different from later ones, without the later | |
93 | * safeguards, so would require extra work to deal with */ | |
94 | #if { VERSION >= 5.6.1 } && ! defined(utf8_to_uvchr_buf) | |
95 | /* Choose which underlying implementation to use. At least one must be | |
96 | * present or the perl is too early to handle this function */ | |
97 | # if defined(utf8n_to_uvchr) || defined(utf8_to_uv) | |
98 | # if defined(utf8n_to_uvchr) /* This is the preferred implementation */ | |
99 | # define D_PPP_utf8_to_uvchr_buf_callee utf8n_to_uvchr | |
100 | # else /* Must be at least 5.6.1 from #if above */ | |
101 | # define D_PPP_utf8_to_uvchr_buf_callee(s, curlen, retlen, flags) utf8_to_uv((U8 *)(s), (curlen), (retlen), (flags)) | |
102 | # endif | |
103 | # endif | |
104 | ||
105 | # if { NEED utf8_to_uvchr_buf } | |
106 | ||
107 | UV | |
108 | utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) | |
109 | { | |
110 | UV ret; | |
111 | STRLEN curlen; | |
112 | bool overflows = 0; | |
113 | const U8 *cur_s = s; | |
114 | const bool do_warnings = ckWARN_d(WARN_UTF8); | |
115 | # if { VERSION < 5.26.0 } && ! defined(EBCDIC) | |
116 | STRLEN overflow_length = 0; | |
117 | # endif | |
118 | ||
119 | if (send > s) { | |
120 | curlen = send - s; | |
121 | } | |
122 | else { | |
123 | assert(0); /* Modern perls die under this circumstance */ | |
124 | curlen = 0; | |
125 | if (! do_warnings) { /* Handle empty here if no warnings needed */ | |
126 | if (retlen) *retlen = 0; | |
127 | return UNICODE_REPLACEMENT; | |
128 | } | |
129 | } | |
130 | ||
131 | # if { VERSION < 5.26.0 } && ! defined(EBCDIC) | |
132 | ||
133 | /* Perl did not properly detect overflow for much of its history on | |
134 | * non-EBCDIC platforms, often returning an overlong value which may or may | |
135 | * not have been tolerated in the call. Also, earlier versions, when they | |
136 | * did detect overflow, may have disallowed it completely. Modern ones can | |
137 | * replace it with the REPLACEMENT CHARACTER, depending on calling | |
138 | * parameters. Therefore detect it ourselves in releases it was | |
139 | * problematic in. */ | |
140 | ||
141 | if (curlen > 0 && UNLIKELY(*s >= 0xFE)) { | |
142 | ||
143 | /* First, on a 32-bit machine the first byte being at least \xFE | |
144 | * automatically is overflow, as it indicates something requiring more | |
145 | * than 31 bits */ | |
146 | if (sizeof(ret) < 8) { | |
147 | overflows = 1; | |
148 | overflow_length = 7; | |
149 | } | |
150 | else { | |
151 | const U8 highest[] = /* 2*63-1 */ | |
152 | "\xFF\x80\x87\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"; | |
153 | const U8 *cur_h = highest; | |
154 | ||
155 | for (cur_s = s; cur_s < send; cur_s++, cur_h++) { | |
156 | if (UNLIKELY(*cur_s == *cur_h)) { | |
157 | continue; | |
158 | } | |
159 | ||
160 | /* If this byte is larger than the corresponding highest UTF-8 | |
161 | * byte, the sequence overflows; otherwise the byte is less | |
162 | * than (as we handled the equality case above), and so the | |
163 | * sequence doesn't overflow */ | |
164 | overflows = *cur_s > *cur_h; | |
165 | break; | |
166 | ||
167 | } | |
168 | ||
169 | /* Here, either we set the bool and broke out of the loop, or got | |
170 | * to the end and all bytes are the same which indicates it doesn't | |
171 | * overflow. If it did overflow, it would be this number of bytes | |
172 | * */ | |
173 | overflow_length = 13; | |
174 | } | |
175 | } | |
176 | ||
177 | if (UNLIKELY(overflows)) { | |
178 | ret = 0; | |
179 | ||
180 | if (! do_warnings && retlen) { | |
181 | *retlen = overflow_length; | |
182 | } | |
183 | } | |
184 | else | |
185 | ||
186 | # endif /* < 5.26 */ | |
187 | ||
188 | /* Here, we are either in a release that properly detects overflow, or | |
189 | * we have checked for overflow and the next statement is executing as | |
190 | * part of the above conditional where we know we don't have overflow. | |
191 | * | |
192 | * The modern versions allow anything that evaluates to a legal UV, but | |
193 | * not overlongs nor an empty input */ | |
194 | ret = D_PPP_utf8_to_uvchr_buf_callee( | |
195 | s, curlen, retlen, (UTF8_ALLOW_ANYUV | |
196 | & ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY))); | |
197 | ||
198 | # if { VERSION >= 5.26.0 } && { VERSION < 5.28.0 } | |
199 | ||
200 | /* But actually, more modern versions restrict the UV to being no more than | |
201 | * what * an IV can hold, so it could, so it could still have gotten it | |
202 | * wrong about overflowing. */ | |
203 | if (UNLIKELY(ret > IV_MAX)) { | |
204 | overflows = 1; | |
205 | } | |
206 | ||
207 | # endif | |
208 | ||
209 | if (UNLIKELY(overflows)) { | |
210 | if (! do_warnings) { | |
211 | if (retlen) { | |
212 | *retlen = D_PPP_MIN(*retlen, UTF8SKIP(s)); | |
213 | *retlen = D_PPP_MIN(*retlen, curlen); | |
214 | } | |
215 | return UNICODE_REPLACEMENT; | |
216 | } | |
217 | else { | |
218 | ||
219 | /* We use the error message in use from 5.8-5.26 */ | |
220 | Perl_warner(aTHX_ packWARN(WARN_UTF8), | |
221 | "Malformed UTF-8 character (overflow at 0x%" UVxf | |
222 | ", byte 0x%02x, after start byte 0x%02x)", | |
223 | ret, *cur_s, *s); | |
224 | if (retlen) { | |
225 | *retlen = (STRLEN) -1; | |
226 | } | |
227 | return 0; | |
228 | } | |
229 | } | |
230 | ||
231 | /* Here, did not overflow, but if it failed for some other reason, and | |
232 | * warnings are off, to emulate the behavior of the real utf8_to_uvchr(), | |
233 | * try again, allowing anything. (Note a return of 0 is ok if the input | |
234 | * was '\0') */ | |
235 | if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0'))) { | |
236 | ||
237 | /* If curlen is 0, we already handled the case where warnings are | |
238 | * disabled, so this 'if' will be true, and so later on, we know that | |
239 | * 's' is dereferencible */ | |
240 | if (do_warnings) { | |
241 | *retlen = (STRLEN) -1; | |
242 | } | |
243 | else { | |
244 | ret = D_PPP_utf8_to_uvchr_buf_callee( | |
245 | s, curlen, retlen, UTF8_ALLOW_ANY); | |
246 | /* Override with the REPLACEMENT character, as that is what the | |
247 | * modern version of this function returns */ | |
248 | ret = UNICODE_REPLACEMENT; | |
249 | ||
250 | # if { VERSION < 5.16.0 } | |
251 | ||
252 | /* Versions earlier than this don't necessarily return the proper | |
253 | * length. It should not extend past the end of string, nor past | |
254 | * what the first byte indicates the length is, nor past the | |
255 | * continuation characters */ | |
256 | if (retlen && *retlen >= 0) { | |
257 | unsigned int i = 1; | |
258 | ||
259 | *retlen = D_PPP_MIN(*retlen, curlen); | |
260 | *retlen = D_PPP_MIN(*retlen, UTF8SKIP(s)); | |
261 | do { | |
262 | if (s[i] < 0x80 || s[i] > 0xBF) { | |
263 | *retlen = i; | |
264 | break; | |
265 | } | |
266 | } while (++i < *retlen); | |
267 | } | |
268 | ||
269 | # endif | |
270 | ||
271 | } | |
272 | } | |
273 | ||
274 | return ret; | |
275 | } | |
276 | ||
277 | # endif | |
278 | #endif | |
279 | ||
280 | #if defined(UTF8SKIP) && defined(utf8_to_uvchr_buf) | |
281 | #undef utf8_to_uvchr /* Always redefine this unsafe function so that it refuses | |
282 | to read past a NUL, making it much less likely to read | |
283 | off the end of the buffer. A NUL indicates the start | |
284 | of the next character anyway. If the input isn't | |
285 | NUL-terminated, the function remains unsafe, as it | |
286 | always has been. */ | |
287 | ||
288 | __UNDEFINED__ utf8_to_uvchr(s, lp) \ | |
289 | ((*(s) == '\0') \ | |
290 | ? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */ \ | |
291 | : utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp))) | |
292 | ||
293 | #endif | |
294 | ||
295 | =xsinit | |
296 | ||
297 | #define NEED_utf8_to_uvchr_buf | |
298 | ||
299 | =xsubs | |
300 | ||
301 | #if defined(UTF8_SAFE_SKIP) && defined(UTF8SKIP) | |
302 | ||
303 | STRLEN | |
304 | UTF8_SAFE_SKIP(s, adjustment) | |
305 | char * s | |
306 | int adjustment | |
307 | PREINIT: | |
308 | const char *const_s; | |
309 | CODE: | |
310 | const_s = s; | |
311 | /* Instead of passing in an 'e' ptr, use the real end, adjusted */ | |
312 | RETVAL = UTF8_SAFE_SKIP(const_s, s + UTF8SKIP(s) + adjustment); | |
313 | OUTPUT: | |
314 | RETVAL | |
315 | ||
316 | #endif | |
317 | ||
318 | #ifdef isUTF8_CHAR | |
319 | ||
320 | STRLEN | |
321 | isUTF8_CHAR(s, adjustment) | |
322 | unsigned char * s | |
323 | int adjustment | |
324 | PREINIT: | |
325 | const unsigned char *const_s; | |
326 | const unsigned char *const_e; | |
327 | CODE: | |
328 | const_s = s; | |
329 | /* Instead of passing in an 'e' ptr, use the real end, adjusted */ | |
330 | const_e = const_s + UTF8SKIP(const_s) + adjustment; | |
331 | RETVAL = isUTF8_CHAR(const_s, const_e); | |
332 | OUTPUT: | |
333 | RETVAL | |
334 | ||
335 | #endif | |
336 | ||
2ff9e5e8 KW |
337 | |
338 | #ifdef foldEQ_utf8 | |
339 | ||
340 | STRLEN | |
341 | foldEQ_utf8(s1, l1, u1, s2, l2, u2) | |
342 | char *s1 | |
343 | UV l1 | |
344 | bool u1 | |
345 | char *s2 | |
346 | UV l2 | |
347 | bool u2 | |
348 | PREINIT: | |
349 | const char *const_s1; | |
350 | const char *const_s2; | |
351 | CODE: | |
352 | const_s1 = s1; | |
353 | const_s2 = s2; | |
354 | RETVAL = foldEQ_utf8(const_s1, NULL, l1, u1, const_s2, NULL, l2, u2); | |
355 | OUTPUT: | |
356 | RETVAL | |
357 | ||
358 | #endif | |
359 | ||
7899b636 KW |
360 | #ifdef utf8_to_uvchr_buf |
361 | ||
362 | AV * | |
363 | utf8_to_uvchr_buf(s, adjustment) | |
364 | unsigned char *s | |
365 | int adjustment | |
366 | PREINIT: | |
367 | AV *av; | |
368 | STRLEN len; | |
369 | const unsigned char *const_s; | |
370 | CODE: | |
371 | av = newAV(); | |
372 | const_s = s; | |
373 | av_push(av, newSVuv(utf8_to_uvchr_buf(const_s, | |
374 | s + UTF8SKIP(s) + adjustment, | |
375 | &len))); | |
376 | if (len == (STRLEN) -1) { | |
377 | av_push(av, newSViv(-1)); | |
378 | } | |
379 | else { | |
380 | av_push(av, newSVuv(len)); | |
381 | } | |
382 | RETVAL = av; | |
383 | OUTPUT: | |
384 | RETVAL | |
385 | ||
386 | #endif | |
387 | ||
388 | #ifdef utf8_to_uvchr | |
389 | ||
390 | AV * | |
391 | utf8_to_uvchr(s) | |
392 | unsigned char *s | |
393 | PREINIT: | |
394 | AV *av; | |
395 | STRLEN len; | |
396 | const unsigned char *const_s; | |
397 | CODE: | |
398 | av = newAV(); | |
399 | const_s = s; | |
400 | av_push(av, newSVuv(utf8_to_uvchr(const_s, &len))); | |
401 | if (len == (STRLEN) -1) { | |
402 | av_push(av, newSViv(-1)); | |
403 | } | |
404 | else { | |
405 | av_push(av, newSVuv(len)); | |
406 | } | |
407 | RETVAL = av; | |
408 | OUTPUT: | |
409 | RETVAL | |
410 | ||
411 | #endif | |
412 | ||
2ff9e5e8 | 413 | =tests plan => 58 |
7899b636 KW |
414 | |
415 | BEGIN { require warnings if "$]" gt '5.006' } | |
416 | ||
417 | # skip tests on 5.6.0 and earlier | |
418 | if ("$]" le '5.006') { | |
2ff9e5e8 | 419 | skip 'skip: broken utf8 support', 0 for 1..58; |
7899b636 KW |
420 | exit; |
421 | } | |
422 | ||
423 | ok(&Devel::PPPort::UTF8_SAFE_SKIP("A", 0), 1); | |
424 | ok(&Devel::PPPort::UTF8_SAFE_SKIP("A", -1), 0); | |
425 | ||
426 | ok(&Devel::PPPort::isUTF8_CHAR("A", -1), 0); | |
427 | ok(&Devel::PPPort::isUTF8_CHAR("A", 0), 1); | |
428 | ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", -1), 0); | |
429 | ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", 0), 2); | |
430 | ||
2ff9e5e8 KW |
431 | if ("$]" lt '5.008') { |
432 | ok(1, 1) for 1 ..3 | |
433 | } | |
434 | else { | |
435 | ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{101}", 3, 1), 1); | |
436 | ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{102}", 3, 1), 0); | |
437 | ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "b\x{101}", 3, 1), 0); | |
438 | } | |
439 | ||
7899b636 KW |
440 | my $ret = &Devel::PPPort::utf8_to_uvchr("A"); |
441 | ok($ret->[0], ord("A")); | |
442 | ok($ret->[1], 1); | |
443 | ||
444 | $ret = &Devel::PPPort::utf8_to_uvchr("\0"); | |
445 | ok($ret->[0], 0); | |
446 | ok($ret->[1], 1); | |
447 | ||
448 | $ret = &Devel::PPPort::utf8_to_uvchr_buf("A", 0); | |
449 | ok($ret->[0], ord("A")); | |
450 | ok($ret->[1], 1); | |
451 | ||
452 | $ret = &Devel::PPPort::utf8_to_uvchr_buf("\0", 0); | |
453 | ok($ret->[0], 0); | |
454 | ok($ret->[1], 1); | |
455 | ||
456 | if (ord("A") != 65) { # tests not valid for EBCDIC | |
457 | ok(1, 1) for 1 .. (2 + 4 + (7 * 5)); | |
458 | } | |
459 | else { | |
460 | $ret = &Devel::PPPort::utf8_to_uvchr_buf("\xc4\x80", 0); | |
461 | ok($ret->[0], 0x100); | |
462 | ok($ret->[1], 2); | |
463 | ||
464 | my @warnings; | |
465 | local $SIG{__WARN__} = sub { push @warnings, @_; }; | |
466 | ||
467 | { | |
468 | BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' } | |
469 | $ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80"); | |
470 | ok($ret->[0], 0); | |
471 | ok($ret->[1], -1); | |
472 | ||
473 | BEGIN { 'warnings'->unimport() if "$]" gt '5.006' } | |
474 | $ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80"); | |
475 | ok($ret->[0], 0xFFFD); | |
476 | ok($ret->[1], 1); | |
477 | } | |
478 | ||
479 | my @buf_tests = ( | |
480 | { | |
481 | input => "A", | |
482 | adjustment => -1, | |
483 | warning => qr/empty/, | |
484 | no_warnings_returned_length => 0, | |
485 | }, | |
486 | { | |
487 | input => "\xc4\xc5", | |
488 | adjustment => 0, | |
489 | warning => qr/non-continuation/, | |
490 | no_warnings_returned_length => 1, | |
491 | }, | |
492 | { | |
493 | input => "\xc4\x80", | |
494 | adjustment => -1, | |
495 | warning => qr/short|1 byte, need 2/, | |
496 | no_warnings_returned_length => 1, | |
497 | }, | |
498 | { | |
499 | input => "\xc0\x81", | |
500 | adjustment => 0, | |
501 | warning => qr/overlong|2 bytes, need 1/, | |
502 | no_warnings_returned_length => 2, | |
503 | }, | |
504 | { | |
505 | input => "\xe0\x80\x81", | |
506 | adjustment => 0, | |
507 | warning => qr/overlong|3 bytes, need 1/, | |
508 | no_warnings_returned_length => 3, | |
509 | }, | |
510 | { | |
511 | input => "\xf0\x80\x80\x81", | |
512 | adjustment => 0, | |
513 | warning => qr/overlong|4 bytes, need 1/, | |
514 | no_warnings_returned_length => 4, | |
515 | }, | |
516 | { # Old algorithm failed to detect this | |
517 | input => "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf", | |
518 | adjustment => 0, | |
519 | warning => qr/overflow/, | |
520 | no_warnings_returned_length => 13, | |
521 | }, | |
522 | ); | |
523 | ||
524 | # An empty input is an assertion failure on debugging builds. It is | |
525 | # deliberately the first test. | |
526 | require Config; import Config; | |
527 | use vars '%Config'; | |
528 | if ($Config{ccflags} =~ /-DDEBUGGING/) { | |
529 | shift @buf_tests; | |
530 | ok(1, 1) for 1..5; | |
531 | } | |
532 | ||
533 | for my $test (@buf_tests) { | |
534 | my $input = $test->{'input'}; | |
535 | my $adjustment = $test->{'adjustment'}; | |
536 | my $display = 'utf8_to_uvchr_buf("'; | |
537 | for (my $i = 0; $i < length($input) + $adjustment; $i++) { | |
538 | $display .= sprintf "\\x%02x", ord substr($input, $i, 1); | |
539 | } | |
540 | ||
541 | $display .= '")'; | |
542 | my $warning = $test->{'warning'}; | |
543 | ||
544 | undef @warnings; | |
545 | BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' } | |
546 | $ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment); | |
547 | ok($ret->[0], 0, "returned value $display; warnings enabled"); | |
548 | ok($ret->[1], -1, "returned length $display; warnings enabled"); | |
549 | my $all_warnings = join "; ", @warnings; | |
550 | my $contains = grep { $_ =~ $warning } $all_warnings; | |
551 | ok($contains, 1, $display | |
552 | . "; Got: '$all_warnings', which should contain '$warning'"); | |
553 | ||
554 | undef @warnings; | |
555 | BEGIN { 'warnings'->unimport('utf8') if "$]" gt '5.006' } | |
556 | $ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment); | |
557 | ok($ret->[0], 0xFFFD, "returned value $display; warnings disabled"); | |
558 | ok($ret->[1], $test->{'no_warnings_returned_length'}, | |
559 | "returned length $display; warnings disabled"); | |
560 | } | |
561 | } |