This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
parts/inc/utf8: Don't use postfix for in D:P tests
[perl5.git] / dist / Devel-PPPort / parts / inc / utf8
CommitLineData
7899b636
KW
1=provides
2
3__UNDEFINED__
4utf8_to_uvchr_buf
0570adb7
P
5sv_len_utf8
6sv_len_utf8_nomg
7899b636
KW
7
8=implementation
9
10#define D_PPP_MIN(a,b) (((a) <= (b)) ? (a) : (b))
11
12__UNDEFINED__ UNICODE_REPLACEMENT 0xFFFD
13
14__UNDEFINED__ UTF8_MAXBYTES UTF8_MAXLEN
15
16__UNDEFINED__ UTF8_ALLOW_ANYUV 0
17__UNDEFINED__ UTF8_ALLOW_EMPTY 0x0001
18__UNDEFINED__ UTF8_ALLOW_CONTINUATION 0x0002
19__UNDEFINED__ UTF8_ALLOW_NON_CONTINUATION 0x0004
20__UNDEFINED__ UTF8_ALLOW_SHORT 0x0008
21__UNDEFINED__ UTF8_ALLOW_LONG 0x0010
22__UNDEFINED__ UTF8_ALLOW_OVERFLOW 0x0080
23__UNDEFINED__ UTF8_ALLOW_ANY ( UTF8_ALLOW_CONTINUATION \
24 |UTF8_ALLOW_NON_CONTINUATION \
25 |UTF8_ALLOW_SHORT \
26 |UTF8_ALLOW_LONG \
27 |UTF8_ALLOW_OVERFLOW)
28
29#if defined UTF8SKIP
30
31/* Don't use official version because it uses MIN, which may not be available */
32#undef UTF8_SAFE_SKIP
33
34__UNDEFINED__ UTF8_SAFE_SKIP(s, e) ( \
35 ((((e) - (s)) <= 0) \
36 ? 0 \
37 : D_PPP_MIN(((e) - (s)), UTF8SKIP(s))))
38#endif
39
d2b44e2b
KW
40#ifdef is_ascii_string
41__UNDEFINED__ is_invariant_string(s,l) is_ascii_string(s,l)
42__UNDEFINED__ is_utf8_invariant_string(s,l) is_ascii_string(s,l)
43
44/* Hint: is_utf8_invariant_string
45 Please use this instead of is_ascii_string or is_invariant_string
46*/
47#endif
48
2ff9e5e8
KW
49#ifdef ibcmp_utf8
50__UNDEFINED__ foldEQ_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2) \
51 cBOOL(! ibcmp_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2))
52#endif
53
7899b636
KW
54#if defined(is_utf8_string) && defined(UTF8SKIP)
55__UNDEFINED__ isUTF8_CHAR(s0, e) ( \
56 (e) <= (s0) || ! is_utf8_string(s0, D_PPP_MIN(UTF8SKIP(s0), (e) - (s0))) \
57 ? 0 \
58 : UTF8SKIP(s0))
59#endif
60
8fc7db65
KW
61#if 'A' == 65
62__UNDEFINED__ BOM_UTF8 "\xEF\xBB\xBF"
63__UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD"
64#elif '^' == 95
65__UNDEFINED__ BOM_UTF8 "\xDD\x73\x66\x73"
66__UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71"
67#elif '^' == 176
68__UNDEFINED__ BOM_UTF8 "\xDD\x72\x65\x72"
69__UNDEFINED__ REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70"
70#else
71# error Unknown character set
72#endif
73
96656f64 74#if { VERSION < 5.31.4 }
7899b636
KW
75 /* Versions prior to this accepted things that are now considered
76 * malformations, and didn't return -1 on error with warnings enabled
77 * */
78# undef utf8_to_uvchr_buf
79#endif
80
81/* This implementation brings modern, generally more restricted standards to
82 * utf8_to_uvchr_buf. Some of these are security related, and clearly must
83 * be done. But its arguable that the others need not, and hence should not.
84 * The reason they're here is that a module that intends to play with the
85 * latest perls should be able to work the same in all releases. An example is
86 * that perl no longer accepts any UV for a code point, but limits them to
87 * IV_MAX or below. This is for future internal use of the larger code points.
88 * If it turns out that some of these changes are breaking code that isn't
89 * intended to work with modern perls, the tighter restrictions could be
90 * relaxed. khw thinks this is unlikely, but has been wrong in the past. */
91
92/* 5.6.0 is the first release with UTF-8, and we don't implement this function
93 * there due to its likely lack of still being in use, and the underlying
94 * implementation is very different from later ones, without the later
95 * safeguards, so would require extra work to deal with */
96#if { VERSION >= 5.6.1 } && ! defined(utf8_to_uvchr_buf)
97 /* Choose which underlying implementation to use. At least one must be
98 * present or the perl is too early to handle this function */
99# if defined(utf8n_to_uvchr) || defined(utf8_to_uv)
100# if defined(utf8n_to_uvchr) /* This is the preferred implementation */
101# define D_PPP_utf8_to_uvchr_buf_callee utf8n_to_uvchr
102# else /* Must be at least 5.6.1 from #if above */
103# define D_PPP_utf8_to_uvchr_buf_callee(s, curlen, retlen, flags) utf8_to_uv((U8 *)(s), (curlen), (retlen), (flags))
104# endif
105# endif
106
107# if { NEED utf8_to_uvchr_buf }
108
109UV
110utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
111{
112 UV ret;
113 STRLEN curlen;
114 bool overflows = 0;
115 const U8 *cur_s = s;
116 const bool do_warnings = ckWARN_d(WARN_UTF8);
117# if { VERSION < 5.26.0 } && ! defined(EBCDIC)
118 STRLEN overflow_length = 0;
119# endif
120
121 if (send > s) {
122 curlen = send - s;
123 }
124 else {
125 assert(0); /* Modern perls die under this circumstance */
126 curlen = 0;
127 if (! do_warnings) { /* Handle empty here if no warnings needed */
128 if (retlen) *retlen = 0;
129 return UNICODE_REPLACEMENT;
130 }
131 }
132
133# if { VERSION < 5.26.0 } && ! defined(EBCDIC)
134
135 /* Perl did not properly detect overflow for much of its history on
136 * non-EBCDIC platforms, often returning an overlong value which may or may
137 * not have been tolerated in the call. Also, earlier versions, when they
138 * did detect overflow, may have disallowed it completely. Modern ones can
139 * replace it with the REPLACEMENT CHARACTER, depending on calling
140 * parameters. Therefore detect it ourselves in releases it was
141 * problematic in. */
142
143 if (curlen > 0 && UNLIKELY(*s >= 0xFE)) {
144
145 /* First, on a 32-bit machine the first byte being at least \xFE
146 * automatically is overflow, as it indicates something requiring more
147 * than 31 bits */
148 if (sizeof(ret) < 8) {
149 overflows = 1;
150 overflow_length = 7;
151 }
152 else {
153 const U8 highest[] = /* 2*63-1 */
154 "\xFF\x80\x87\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF";
155 const U8 *cur_h = highest;
156
157 for (cur_s = s; cur_s < send; cur_s++, cur_h++) {
158 if (UNLIKELY(*cur_s == *cur_h)) {
159 continue;
160 }
161
162 /* If this byte is larger than the corresponding highest UTF-8
163 * byte, the sequence overflows; otherwise the byte is less
164 * than (as we handled the equality case above), and so the
165 * sequence doesn't overflow */
166 overflows = *cur_s > *cur_h;
167 break;
168
169 }
170
171 /* Here, either we set the bool and broke out of the loop, or got
172 * to the end and all bytes are the same which indicates it doesn't
173 * overflow. If it did overflow, it would be this number of bytes
174 * */
175 overflow_length = 13;
176 }
177 }
178
179 if (UNLIKELY(overflows)) {
180 ret = 0;
181
182 if (! do_warnings && retlen) {
183 *retlen = overflow_length;
184 }
185 }
186 else
187
188# endif /* < 5.26 */
189
190 /* Here, we are either in a release that properly detects overflow, or
191 * we have checked for overflow and the next statement is executing as
192 * part of the above conditional where we know we don't have overflow.
193 *
194 * The modern versions allow anything that evaluates to a legal UV, but
195 * not overlongs nor an empty input */
196 ret = D_PPP_utf8_to_uvchr_buf_callee(
197 s, curlen, retlen, (UTF8_ALLOW_ANYUV
198 & ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));
199
200# if { VERSION >= 5.26.0 } && { VERSION < 5.28.0 }
201
202 /* But actually, more modern versions restrict the UV to being no more than
203 * what * an IV can hold, so it could, so it could still have gotten it
204 * wrong about overflowing. */
205 if (UNLIKELY(ret > IV_MAX)) {
206 overflows = 1;
207 }
208
209# endif
210
211 if (UNLIKELY(overflows)) {
212 if (! do_warnings) {
213 if (retlen) {
214 *retlen = D_PPP_MIN(*retlen, UTF8SKIP(s));
215 *retlen = D_PPP_MIN(*retlen, curlen);
216 }
217 return UNICODE_REPLACEMENT;
218 }
219 else {
220
221 /* We use the error message in use from 5.8-5.26 */
222 Perl_warner(aTHX_ packWARN(WARN_UTF8),
223 "Malformed UTF-8 character (overflow at 0x%" UVxf
224 ", byte 0x%02x, after start byte 0x%02x)",
225 ret, *cur_s, *s);
226 if (retlen) {
227 *retlen = (STRLEN) -1;
228 }
229 return 0;
230 }
231 }
232
233 /* Here, did not overflow, but if it failed for some other reason, and
234 * warnings are off, to emulate the behavior of the real utf8_to_uvchr(),
235 * try again, allowing anything. (Note a return of 0 is ok if the input
236 * was '\0') */
237 if (UNLIKELY(ret == 0 && (curlen == 0 || *s != '\0'))) {
238
239 /* If curlen is 0, we already handled the case where warnings are
240 * disabled, so this 'if' will be true, and so later on, we know that
241 * 's' is dereferencible */
242 if (do_warnings) {
243 *retlen = (STRLEN) -1;
244 }
245 else {
246 ret = D_PPP_utf8_to_uvchr_buf_callee(
247 s, curlen, retlen, UTF8_ALLOW_ANY);
248 /* Override with the REPLACEMENT character, as that is what the
249 * modern version of this function returns */
250 ret = UNICODE_REPLACEMENT;
251
252# if { VERSION < 5.16.0 }
253
254 /* Versions earlier than this don't necessarily return the proper
255 * length. It should not extend past the end of string, nor past
256 * what the first byte indicates the length is, nor past the
257 * continuation characters */
258 if (retlen && *retlen >= 0) {
259 unsigned int i = 1;
260
261 *retlen = D_PPP_MIN(*retlen, curlen);
262 *retlen = D_PPP_MIN(*retlen, UTF8SKIP(s));
263 do {
264 if (s[i] < 0x80 || s[i] > 0xBF) {
265 *retlen = i;
266 break;
267 }
268 } while (++i < *retlen);
269 }
270
271# endif
272
273 }
274 }
275
276 return ret;
277}
278
279# endif
280#endif
281
282#if defined(UTF8SKIP) && defined(utf8_to_uvchr_buf)
283#undef utf8_to_uvchr /* Always redefine this unsafe function so that it refuses
284 to read past a NUL, making it much less likely to read
285 off the end of the buffer. A NUL indicates the start
286 of the next character anyway. If the input isn't
287 NUL-terminated, the function remains unsafe, as it
288 always has been. */
289
290__UNDEFINED__ utf8_to_uvchr(s, lp) \
291 ((*(s) == '\0') \
292 ? utf8_to_uvchr_buf(s,((s)+1), lp) /* Handle single NUL specially */ \
293 : utf8_to_uvchr_buf(s, (s) + my_strnlen((char *) (s), UTF8SKIP(s)), (lp)))
294
295#endif
296
0570adb7
P
297#ifdef SV_NOSTEAL
298 /* Older Perl versions have broken sv_len_utf8() when passed sv does not have SVf_UTF8 flag set */
299 /* Also note that SvGETMAGIC() may change presence of SVf_UTF8 flag */
300# if { VERSION < 5.17.5 }
301# undef sv_len_utf8
302# if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
303# define sv_len_utf8_nomg(sv) ({ SV *_sv2 = (sv); (SvUTF8(_sv2) ? Perl_sv_len_utf8(aTHX_ (!SvGMAGICAL(_sv2) ? _sv2 : sv_mortalcopy_flags(_sv2, SV_NOSTEAL))) : ({ STRLEN _len; SvPV_nomg(_sv2, _len); _len; })); })
304# define sv_len_utf8(sv) ({ SV *_sv1 = (sv); SvGETMAGIC(_sv1); sv_len_utf8_nomg(_sv1); })
305# else
306# define sv_len_utf8_nomg(sv) (PL_Sv = (sv), (SvUTF8(PL_Sv) ? Perl_sv_len_utf8(aTHX_ (!SvGMAGICAL(PL_Sv) ? PL_Sv : sv_mortalcopy_flags(PL_Sv, SV_NOSTEAL))) : (SvPV_nomg(PL_Sv, PL_na), PL_na)))
307# define sv_len_utf8(sv) (PL_Sv = (sv), SvGETMAGIC(PL_Sv), sv_len_utf8_nomg(PL_Sv))
308# endif
309# endif
310# if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
311 __UNDEFINED__ sv_len_utf8_nomg(sv) ({ SV *_sv = (sv); sv_len_utf8(!SvGMAGICAL(_sv) ? _sv : sv_mortalcopy_flags(_sv, SV_NOSTEAL)); })
312# else
313 __UNDEFINED__ sv_len_utf8_nomg(sv) ((PL_Sv = (sv)), sv_len_utf8(!SvGMAGICAL(PL_Sv) ? PL_Sv : sv_mortalcopy_flags(PL_Sv, SV_NOSTEAL)))
314# endif
315#endif
316
7899b636
KW
317=xsinit
318
319#define NEED_utf8_to_uvchr_buf
320
321=xsubs
322
323#if defined(UTF8_SAFE_SKIP) && defined(UTF8SKIP)
324
325STRLEN
326UTF8_SAFE_SKIP(s, adjustment)
327 char * s
328 int adjustment
329 PREINIT:
330 const char *const_s;
331 CODE:
332 const_s = s;
333 /* Instead of passing in an 'e' ptr, use the real end, adjusted */
334 RETVAL = UTF8_SAFE_SKIP(const_s, s + UTF8SKIP(s) + adjustment);
335 OUTPUT:
336 RETVAL
337
338#endif
339
340#ifdef isUTF8_CHAR
341
342STRLEN
343isUTF8_CHAR(s, adjustment)
344 unsigned char * s
345 int adjustment
346 PREINIT:
347 const unsigned char *const_s;
348 const unsigned char *const_e;
349 CODE:
350 const_s = s;
351 /* Instead of passing in an 'e' ptr, use the real end, adjusted */
352 const_e = const_s + UTF8SKIP(const_s) + adjustment;
353 RETVAL = isUTF8_CHAR(const_s, const_e);
354 OUTPUT:
355 RETVAL
356
357#endif
358
2ff9e5e8
KW
359
360#ifdef foldEQ_utf8
361
362STRLEN
363foldEQ_utf8(s1, l1, u1, s2, l2, u2)
364 char *s1
365 UV l1
366 bool u1
367 char *s2
368 UV l2
369 bool u2
370 PREINIT:
371 const char *const_s1;
372 const char *const_s2;
373 CODE:
374 const_s1 = s1;
375 const_s2 = s2;
376 RETVAL = foldEQ_utf8(const_s1, NULL, l1, u1, const_s2, NULL, l2, u2);
377 OUTPUT:
378 RETVAL
379
380#endif
381
7899b636
KW
382#ifdef utf8_to_uvchr_buf
383
384AV *
385utf8_to_uvchr_buf(s, adjustment)
386 unsigned char *s
387 int adjustment
388 PREINIT:
389 AV *av;
390 STRLEN len;
391 const unsigned char *const_s;
392 CODE:
393 av = newAV();
394 const_s = s;
395 av_push(av, newSVuv(utf8_to_uvchr_buf(const_s,
396 s + UTF8SKIP(s) + adjustment,
397 &len)));
398 if (len == (STRLEN) -1) {
399 av_push(av, newSViv(-1));
400 }
401 else {
402 av_push(av, newSVuv(len));
403 }
404 RETVAL = av;
405 OUTPUT:
406 RETVAL
407
408#endif
409
410#ifdef utf8_to_uvchr
411
412AV *
413utf8_to_uvchr(s)
414 unsigned char *s
415 PREINIT:
416 AV *av;
417 STRLEN len;
418 const unsigned char *const_s;
419 CODE:
420 av = newAV();
421 const_s = s;
422 av_push(av, newSVuv(utf8_to_uvchr(const_s, &len)));
423 if (len == (STRLEN) -1) {
424 av_push(av, newSViv(-1));
425 }
426 else {
427 av_push(av, newSVuv(len));
428 }
429 RETVAL = av;
430 OUTPUT:
431 RETVAL
432
433#endif
434
0570adb7
P
435#ifdef SV_NOSTEAL
436
437STRLEN
438sv_len_utf8(sv)
439 SV *sv
440 CODE:
441 RETVAL = sv_len_utf8(sv);
442 OUTPUT:
443 RETVAL
444
445STRLEN
446sv_len_utf8_nomg(sv)
447 SV *sv
448 CODE:
449 RETVAL = sv_len_utf8_nomg(sv);
450 OUTPUT:
451 RETVAL
452
453#endif
454
455=tests plan => 81
7899b636
KW
456
457BEGIN { require warnings if "$]" gt '5.006' }
458
459# skip tests on 5.6.0 and earlier
460if ("$]" le '5.006') {
0570adb7 461 skip 'skip: broken utf8 support', 0 for 1..81;
7899b636
KW
462 exit;
463}
464
465ok(&Devel::PPPort::UTF8_SAFE_SKIP("A", 0), 1);
466ok(&Devel::PPPort::UTF8_SAFE_SKIP("A", -1), 0);
467
468ok(&Devel::PPPort::isUTF8_CHAR("A", -1), 0);
469ok(&Devel::PPPort::isUTF8_CHAR("A", 0), 1);
470ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", -1), 0);
471ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", 0), 2);
472
2ff9e5e8
KW
473if ("$]" lt '5.008') {
474 ok(1, 1) for 1 ..3
475}
476else {
477 ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{101}", 3, 1), 1);
478 ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{102}", 3, 1), 0);
479 ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "b\x{101}", 3, 1), 0);
480}
481
7899b636
KW
482my $ret = &Devel::PPPort::utf8_to_uvchr("A");
483ok($ret->[0], ord("A"));
484ok($ret->[1], 1);
485
486$ret = &Devel::PPPort::utf8_to_uvchr("\0");
487ok($ret->[0], 0);
488ok($ret->[1], 1);
489
490$ret = &Devel::PPPort::utf8_to_uvchr_buf("A", 0);
491ok($ret->[0], ord("A"));
492ok($ret->[1], 1);
493
494$ret = &Devel::PPPort::utf8_to_uvchr_buf("\0", 0);
495ok($ret->[0], 0);
496ok($ret->[1], 1);
497
498if (ord("A") != 65) { # tests not valid for EBCDIC
499 ok(1, 1) for 1 .. (2 + 4 + (7 * 5));
500}
501else {
502 $ret = &Devel::PPPort::utf8_to_uvchr_buf("\xc4\x80", 0);
503 ok($ret->[0], 0x100);
504 ok($ret->[1], 2);
505
506 my @warnings;
507 local $SIG{__WARN__} = sub { push @warnings, @_; };
508
509 {
510 BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' }
511 $ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80");
512 ok($ret->[0], 0);
513 ok($ret->[1], -1);
514
515 BEGIN { 'warnings'->unimport() if "$]" gt '5.006' }
516 $ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80");
517 ok($ret->[0], 0xFFFD);
518 ok($ret->[1], 1);
519 }
520
521 my @buf_tests = (
522 {
523 input => "A",
524 adjustment => -1,
525 warning => qr/empty/,
526 no_warnings_returned_length => 0,
527 },
528 {
529 input => "\xc4\xc5",
530 adjustment => 0,
531 warning => qr/non-continuation/,
532 no_warnings_returned_length => 1,
533 },
534 {
535 input => "\xc4\x80",
536 adjustment => -1,
537 warning => qr/short|1 byte, need 2/,
538 no_warnings_returned_length => 1,
539 },
540 {
541 input => "\xc0\x81",
542 adjustment => 0,
543 warning => qr/overlong|2 bytes, need 1/,
544 no_warnings_returned_length => 2,
545 },
546 {
547 input => "\xe0\x80\x81",
548 adjustment => 0,
549 warning => qr/overlong|3 bytes, need 1/,
550 no_warnings_returned_length => 3,
551 },
552 {
553 input => "\xf0\x80\x80\x81",
554 adjustment => 0,
555 warning => qr/overlong|4 bytes, need 1/,
556 no_warnings_returned_length => 4,
557 },
558 { # Old algorithm failed to detect this
559 input => "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
560 adjustment => 0,
561 warning => qr/overflow/,
562 no_warnings_returned_length => 13,
563 },
564 );
565
566 # An empty input is an assertion failure on debugging builds. It is
567 # deliberately the first test.
568 require Config; import Config;
569 use vars '%Config';
570 if ($Config{ccflags} =~ /-DDEBUGGING/) {
571 shift @buf_tests;
572 ok(1, 1) for 1..5;
573 }
574
575 for my $test (@buf_tests) {
576 my $input = $test->{'input'};
577 my $adjustment = $test->{'adjustment'};
578 my $display = 'utf8_to_uvchr_buf("';
579 for (my $i = 0; $i < length($input) + $adjustment; $i++) {
580 $display .= sprintf "\\x%02x", ord substr($input, $i, 1);
581 }
582
583 $display .= '")';
584 my $warning = $test->{'warning'};
585
586 undef @warnings;
587 BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' }
588 $ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment);
589 ok($ret->[0], 0, "returned value $display; warnings enabled");
590 ok($ret->[1], -1, "returned length $display; warnings enabled");
591 my $all_warnings = join "; ", @warnings;
592 my $contains = grep { $_ =~ $warning } $all_warnings;
593 ok($contains, 1, $display
594 . "; Got: '$all_warnings', which should contain '$warning'");
595
596 undef @warnings;
597 BEGIN { 'warnings'->unimport('utf8') if "$]" gt '5.006' }
598 $ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment);
599 ok($ret->[0], 0xFFFD, "returned value $display; warnings disabled");
600 ok($ret->[1], $test->{'no_warnings_returned_length'},
601 "returned length $display; warnings disabled");
602 }
603}
0570adb7
P
604
605if ("$]" ge '5.008') {
606 BEGIN { if ("$]" ge '5.008') { require utf8; "utf8"->import() } }
607
608 ok(Devel::PPPort::sv_len_utf8("aščť"), 4);
609 ok(Devel::PPPort::sv_len_utf8_nomg("aščť"), 4);
610
611 my $str = "áíé";
612 utf8::downgrade($str);
613 ok(Devel::PPPort::sv_len_utf8($str), 3);
614 utf8::downgrade($str);
615 ok(Devel::PPPort::sv_len_utf8_nomg($str), 3);
616 utf8::upgrade($str);
617 ok(Devel::PPPort::sv_len_utf8($str), 3);
618 utf8::upgrade($str);
619 ok(Devel::PPPort::sv_len_utf8_nomg($str), 3);
620
621 tie my $scalar, 'TieScalarCounter', "é";
622
623 ok(tied($scalar)->{fetch}, 0);
624 ok(tied($scalar)->{store}, 0);
625 ok(Devel::PPPort::sv_len_utf8($scalar), 2);
626 ok(tied($scalar)->{fetch}, 1);
627 ok(tied($scalar)->{store}, 0);
628 ok(Devel::PPPort::sv_len_utf8($scalar), 3);
629 ok(tied($scalar)->{fetch}, 2);
630 ok(tied($scalar)->{store}, 0);
631 ok(Devel::PPPort::sv_len_utf8($scalar), 4);
632 ok(tied($scalar)->{fetch}, 3);
633 ok(tied($scalar)->{store}, 0);
634 ok(Devel::PPPort::sv_len_utf8_nomg($scalar), 4);
635 ok(tied($scalar)->{fetch}, 3);
636 ok(tied($scalar)->{store}, 0);
637 ok(Devel::PPPort::sv_len_utf8_nomg($scalar), 4);
638 ok(tied($scalar)->{fetch}, 3);
639 ok(tied($scalar)->{store}, 0);
640} else {
8583f9b2
KW
641 for (1..23) {
642 skip 'skip: no SV_NOSTEAL support', 0;
643 }
0570adb7
P
644}
645
646package TieScalarCounter;
647
648sub TIESCALAR {
649 my ($class, $value) = @_;
650 return bless { fetch => 0, store => 0, value => $value }, $class;
651}
652
653sub FETCH {
654 BEGIN { if ("$]" ge '5.008') { require utf8; "utf8"->import() } }
655 my ($self) = @_;
656 $self->{fetch}++;
657 return $self->{value} .= "é";
658}
659
660sub STORE {
661 my ($self, $value) = @_;
662 $self->{store}++;
663 $self->{value} = $value;
664}