}
}
+# #80190 update pos, and cached length/position-mapping after
+# utf8 upgrade/downgrade, encode/decode
+
+for my $pos (0..5) {
+
+ my $pos1 = ($pos >= 3) ? 2 : ($pos >= 1) ? 1 : 0;
+ my $pos2 = ($pos1 == 2) ? 3 : $pos1;
+
+ my $p;
+ my $s = "A\xc8\x81\xe8\xab\x86\x{100}";
+ chop($s);
+
+ pos($s) = $pos;
+ # also sets cache
+ is(length($s), 6, "(pos $pos) len before utf8::downgrade");
+ is(pos($s), $pos, "(pos $pos) pos before utf8::downgrade");
+ utf8::downgrade($s);
+ is(length($s), 6, "(pos $pos) len after utf8::downgrade");
+ is(pos($s), $pos, "(pos $pos) pos after utf8::downgrade");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::downgrade");
+ utf8::decode($s);
+ is(length($s), 3, "(pos $pos) len after D; utf8::decode");
+ is(pos($s), $pos1, "(pos $pos) pos after D; utf8::decode");
+ is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after D; utf8::decode");
+ utf8::encode($s);
+ is(length($s), 6, "(pos $pos) len after D; utf8::encode");
+ is(pos($s), $pos2, "(pos $pos) pos after D; utf8::encode");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after D; utf8::encode");
+
+ $s = "A\xc8\x81\xe8\xab\x86";
+
+ pos($s) = $pos;
+ is(length($s), 6, "(pos $pos) len before utf8::upgrade");
+ is(pos($s), $pos, "(pos $pos) pos before utf8::upgrade");
+ utf8::upgrade($s);
+ is(length($s), 6, "(pos $pos) len after utf8::upgrade");
+ is(pos($s), $pos, "(pos $pos) pos after utf8::upgrade");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::upgrade");
+ utf8::decode($s);
+ is(length($s), 3, "(pos $pos) len after U; utf8::decode");
+ is(pos($s), $pos1, "(pos $pos) pos after U; utf8::decode");
+ is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after U; utf8::decode");
+ utf8::encode($s);
+ is(length($s), 6, "(pos $pos) len after U; utf8::encode");
+ is(pos($s), $pos2, "(pos $pos) pos after U; utf8::encode");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after U; utf8::encode");
+}
+
done_testing();
}
}
}
+
+ if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
+ /* Update pos. We do it at the end rather than during
+ * the upgrade, to avoid slowing down the common case
+ * (upgrade without pos) */
+ MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
+ if (mg) {
+ I32 pos = mg->mg_len;
+ if (pos > 0 && (U32)pos > invariant_head) {
+ U8 *d = (U8*) SvPVX(sv) + invariant_head;
+ STRLEN n = (U32)pos - invariant_head;
+ while (n > 0) {
+ if (UTF8_IS_START(*d))
+ d++;
+ d++;
+ n--;
+ }
+ mg->mg_len = d - (U8*)SvPVX(sv);
+ }
+ }
+ if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
+ magic_setutf8(sv,mg); /* clear UTF8 cache */
+ }
}
}
if (SvCUR(sv)) {
U8 *s;
STRLEN len;
+ int mg_flags = SV_GMAGIC;
if (SvIsCOW(sv)) {
sv_force_normal_flags(sv, 0);
}
- s = (U8 *) SvPV(sv, len);
+ if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
+ /* update pos */
+ MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
+ if (mg) {
+ I32 pos = mg->mg_len;
+ if (pos > 0) {
+ sv_pos_b2u(sv, &pos);
+ mg_flags = 0; /* sv_pos_b2u does get magic */
+ mg->mg_len = pos;
+ }
+ }
+ if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
+ magic_setutf8(sv,mg); /* clear UTF8 cache */
+
+ }
+ s = (U8 *) SvPV_flags(sv, len, mg_flags);
+
if (!utf8_to_bytes(s, &len)) {
if (fail_ok)
return FALSE;
PERL_ARGS_ASSERT_SV_UTF8_DECODE;
if (SvPOKp(sv)) {
- const U8 *c;
+ const U8 *start, *c;
const U8 *e;
/* The octets may have got themselves encoded - get them back as
/* it is actually just a matter of turning the utf8 flag on, but
* we want to make sure everything inside is valid utf8 first.
*/
- c = (const U8 *) SvPVX_const(sv);
+ c = start = (const U8 *) SvPVX_const(sv);
if (!is_utf8_string(c, SvCUR(sv)+1))
return FALSE;
e = (const U8 *) SvEND(sv);
break;
}
}
+ if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
+ /* adjust pos to the start of a UTF8 char sequence */
+ MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
+ if (mg) {
+ I32 pos = mg->mg_len;
+ if (pos > 0) {
+ for (c = start + pos; c > start; c--) {
+ if (UTF8_IS_START(*c))
+ break;
+ }
+ mg->mg_len = c - start;
+ }
+ }
+ if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
+ magic_setutf8(sv,mg); /* clear UTF8 cache */
+ }
}
return TRUE;
}
}
FREETMPS;
LEAVE;
+ if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
+ /* clear pos and any utf8 cache */
+ MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global);
+ if (mg)
+ mg->mg_len = -1;
+ if ((mg = mg_find(sv, PERL_MAGIC_utf8)))
+ magic_setutf8(sv,mg); /* clear UTF8 cache */
+ }
SvUTF8_on(sv);
return SvPVX(sv);
}