__UNDEFINED__
utf8_to_uvchr_buf
+sv_len_utf8
+sv_len_utf8_nomg
=implementation
__UNDEFINED__ UNICODE_REPLACEMENT 0xFFFD
+#ifdef UTF8_MAXLEN
__UNDEFINED__ UTF8_MAXBYTES UTF8_MAXLEN
+#endif
__UNDEFINED__ UTF8_ALLOW_ANYUV 0
__UNDEFINED__ UTF8_ALLOW_EMPTY 0x0001
__UNDEFINED__ is_invariant_string(s,l) is_ascii_string(s,l)
__UNDEFINED__ is_utf8_invariant_string(s,l) is_ascii_string(s,l)
-/* Hint: is_utf8_invariant_string
- Please use this instead of is_ascii_string or is_invariant_string
-*/
+/* Hint: is_ascii_string, is_invariant_string
+ is_utf8_invariant_string() does the same thing and is preferred because its
+ name is more accurate as to what it does */
+#endif
+
+#ifdef ibcmp_utf8
+__UNDEFINED__ foldEQ_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2) \
+ cBOOL(! ibcmp_utf8(s1,pe1,l1,u1,s2,pe2,l2,u2))
#endif
#if defined(is_utf8_string) && defined(UTF8SKIP)
# error Unknown character set
#endif
-#if { VERSION < 5.31.3 }
+#if { VERSION < 5.31.4 }
/* Versions prior to this accepted things that are now considered
* malformations, and didn't return -1 on error with warnings enabled
* */
#if { VERSION >= 5.6.1 } && ! defined(utf8_to_uvchr_buf)
/* Choose which underlying implementation to use. At least one must be
* present or the perl is too early to handle this function */
-# if defined(utf8n_to_uvchr) || defined(utf8_to_uv)
+# if defined(utf8n_to_uvchr) || defined(utf8_to_uvchr) || defined(utf8_to_uv)
# if defined(utf8n_to_uvchr) /* This is the preferred implementation */
# define D_PPP_utf8_to_uvchr_buf_callee utf8n_to_uvchr
-# else /* Must be at least 5.6.1 from #if above */
-# define D_PPP_utf8_to_uvchr_buf_callee(s, curlen, retlen, flags) utf8_to_uv((U8 *)(s), (curlen), (retlen), (flags))
+# elif /* Must be at least 5.6.1 from #if above; \
+ If have both regular and _simple, regular has all args */ \
+ defined(utf8_to_uv) && defined(utf8_to_uv_simple)
+# define D_PPP_utf8_to_uvchr_buf_callee utf8_to_uv
+# elif defined(utf8_to_uvchr) /* The below won't work well on error input */
+# define D_PPP_utf8_to_uvchr_buf_callee(s, curlen, retlen, flags) \
+ utf8_to_uvchr((U8 *)(s), (retlen))
+# else
+# define D_PPP_utf8_to_uvchr_buf_callee(s, curlen, retlen, flags) \
+ utf8_to_uv((U8 *)(s), (retlen))
# endif
# endif
# if { VERSION >= 5.26.0 } && { VERSION < 5.28.0 }
/* But actually, more modern versions restrict the UV to being no more than
- * what * an IV can hold, so it could, so it could still have gotten it
- * wrong about overflowing. */
+ * what an IV can hold, so it could still have gotten it wrong about
+ * overflowing. */
if (UNLIKELY(ret > IV_MAX)) {
overflows = 1;
}
#endif
+/* Hint: utf8_to_uvchr
+ Use utf8_to_uvchr_buf() instead. But ONLY if you KNOW the upper bound
+ of the input string (not resorting to using UTF8SKIP, etc., to infer it).
+ The backported utf8_to_uvchr() will do a better job to prevent most cases
+ of trying to read beyond the end of the buffer */
+
+/* Replace utf8_to_uvchr with utf8_to_uvchr_buf */
+
+#ifdef SV_NOSTEAL
+ /* Older Perl versions have broken sv_len_utf8() when passed sv does not have SVf_UTF8 flag set */
+ /* Also note that SvGETMAGIC() may change presence of SVf_UTF8 flag */
+# if { VERSION < 5.17.5 }
+# undef sv_len_utf8
+# if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
+# define sv_len_utf8_nomg(sv) ({ SV *_sv2 = (sv); (SvUTF8(_sv2) ? Perl_sv_len_utf8(aTHX_ (!SvGMAGICAL(_sv2) ? _sv2 : sv_mortalcopy_flags(_sv2, SV_NOSTEAL))) : ({ STRLEN _len; SvPV_nomg(_sv2, _len); _len; })); })
+# define sv_len_utf8(sv) ({ SV *_sv1 = (sv); SvGETMAGIC(_sv1); sv_len_utf8_nomg(_sv1); })
+# else
+# define sv_len_utf8_nomg(sv) (PL_Sv = (sv), (SvUTF8(PL_Sv) ? Perl_sv_len_utf8(aTHX_ (!SvGMAGICAL(PL_Sv) ? PL_Sv : sv_mortalcopy_flags(PL_Sv, SV_NOSTEAL))) : (SvPV_nomg(PL_Sv, PL_na), PL_na)))
+# define sv_len_utf8(sv) (PL_Sv = (sv), SvGETMAGIC(PL_Sv), sv_len_utf8_nomg(PL_Sv))
+# endif
+# endif
+# if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
+ __UNDEFINED__ sv_len_utf8_nomg(sv) ({ SV *_sv = (sv); sv_len_utf8(!SvGMAGICAL(_sv) ? _sv : sv_mortalcopy_flags(_sv, SV_NOSTEAL)); })
+# else
+ __UNDEFINED__ sv_len_utf8_nomg(sv) ((PL_Sv = (sv)), sv_len_utf8(!SvGMAGICAL(PL_Sv) ? PL_Sv : sv_mortalcopy_flags(PL_Sv, SV_NOSTEAL)))
+# endif
+#endif
+
=xsinit
#define NEED_utf8_to_uvchr_buf
#endif
+
+#ifdef foldEQ_utf8
+
+STRLEN
+foldEQ_utf8(s1, l1, u1, s2, l2, u2)
+ char *s1
+ UV l1
+ bool u1
+ char *s2
+ UV l2
+ bool u2
+ PREINIT:
+ const char *const_s1;
+ const char *const_s2;
+ CODE:
+ const_s1 = s1;
+ const_s2 = s2;
+ RETVAL = foldEQ_utf8(const_s1, NULL, l1, u1, const_s2, NULL, l2, u2);
+ OUTPUT:
+ RETVAL
+
+#endif
+
#ifdef utf8_to_uvchr_buf
AV *
#endif
-=tests plan => 55
+#ifdef SV_NOSTEAL
-BEGIN { require warnings if "$]" gt '5.006' }
+STRLEN
+sv_len_utf8(sv)
+ SV *sv
+ CODE:
+ RETVAL = sv_len_utf8(sv);
+ OUTPUT:
+ RETVAL
-# skip tests on 5.6.0 and earlier
-if ("$]" le '5.006') {
- skip 'skip: broken utf8 support', 0 for 1..55;
+STRLEN
+sv_len_utf8_nomg(sv)
+ SV *sv
+ CODE:
+ RETVAL = sv_len_utf8_nomg(sv);
+ OUTPUT:
+ RETVAL
+
+#endif
+
+=tests plan => 81
+
+BEGIN { require warnings if "$]" > '5.006' }
+
+# skip tests on 5.6.0 and earlier, plus 7.0
+if ("$]" <= '5.006' || "$]" == '5.007' ) {
+ for (1..81) {
+ skip 'skip: broken utf8 support', 0;
+ }
exit;
}
ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", -1), 0);
ok(&Devel::PPPort::isUTF8_CHAR("\x{100}", 0), 2);
+if ("$]" < '5.008') {
+ for (1 ..3) {
+ ok(1, 1)
+ }
+}
+else {
+ ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{101}", 3, 1), 1);
+ ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "a\x{102}", 3, 1), 0);
+ ok(&Devel::PPPort::foldEQ_utf8("A\x{100}", 3, 1, "b\x{101}", 3, 1), 0);
+}
+
my $ret = &Devel::PPPort::utf8_to_uvchr("A");
ok($ret->[0], ord("A"));
ok($ret->[1], 1);
ok($ret->[1], 1);
if (ord("A") != 65) { # tests not valid for EBCDIC
- ok(1, 1) for 1 .. (2 + 4 + (7 * 5));
+ for (1 .. (2 + 4 + (7 * 5))) {
+ ok(1, 1);
+ }
}
else {
$ret = &Devel::PPPort::utf8_to_uvchr_buf("\xc4\x80", 0);
local $SIG{__WARN__} = sub { push @warnings, @_; };
{
- BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' }
+ BEGIN { 'warnings'->import('utf8') if "$]" > '5.006' }
$ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80");
ok($ret->[0], 0);
ok($ret->[1], -1);
- BEGIN { 'warnings'->unimport() if "$]" gt '5.006' }
+ BEGIN { 'warnings'->unimport() if "$]" > '5.006' }
$ret = &Devel::PPPort::utf8_to_uvchr("\xe0\0\x80");
ok($ret->[0], 0xFFFD);
ok($ret->[1], 1);
{
input => "A",
adjustment => -1,
- warning => qr/empty/,
+ warning => eval "qr/empty/",
no_warnings_returned_length => 0,
},
{
input => "\xc4\xc5",
adjustment => 0,
- warning => qr/non-continuation/,
+ warning => eval "qr/non-continuation/",
no_warnings_returned_length => 1,
},
{
input => "\xc4\x80",
adjustment => -1,
- warning => qr/short|1 byte, need 2/,
+ warning => eval "qr/short|1 byte, need 2/",
no_warnings_returned_length => 1,
},
{
input => "\xc0\x81",
adjustment => 0,
- warning => qr/overlong|2 bytes, need 1/,
+ warning => eval "qr/overlong|2 bytes, need 1/",
no_warnings_returned_length => 2,
},
{
input => "\xe0\x80\x81",
adjustment => 0,
- warning => qr/overlong|3 bytes, need 1/,
+ warning => eval "qr/overlong|3 bytes, need 1/",
no_warnings_returned_length => 3,
},
{
input => "\xf0\x80\x80\x81",
adjustment => 0,
- warning => qr/overlong|4 bytes, need 1/,
+ warning => eval "qr/overlong|4 bytes, need 1/",
no_warnings_returned_length => 4,
},
{ # Old algorithm failed to detect this
input => "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
adjustment => 0,
- warning => qr/overflow/,
+ warning => eval "qr/overflow/",
no_warnings_returned_length => 13,
},
);
use vars '%Config';
if ($Config{ccflags} =~ /-DDEBUGGING/) {
shift @buf_tests;
- ok(1, 1) for 1..5;
+ for (1..5) {
+ ok(1, 1);
+ }
}
- for my $test (@buf_tests) {
+ my $test;
+ for $test (@buf_tests) {
my $input = $test->{'input'};
my $adjustment = $test->{'adjustment'};
my $display = 'utf8_to_uvchr_buf("';
- for (my $i = 0; $i < length($input) + $adjustment; $i++) {
+ my $i;
+ for ($i = 0; $i < length($input) + $adjustment; $i++) {
$display .= sprintf "\\x%02x", ord substr($input, $i, 1);
}
my $warning = $test->{'warning'};
undef @warnings;
- BEGIN { 'warnings'->import('utf8') if "$]" gt '5.006' }
+ BEGIN { 'warnings'->import('utf8') if "$]" > '5.006' }
$ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment);
ok($ret->[0], 0, "returned value $display; warnings enabled");
ok($ret->[1], -1, "returned length $display; warnings enabled");
. "; Got: '$all_warnings', which should contain '$warning'");
undef @warnings;
- BEGIN { 'warnings'->unimport('utf8') if "$]" gt '5.006' }
+ BEGIN { 'warnings'->unimport('utf8') if "$]" > '5.006' }
$ret = &Devel::PPPort::utf8_to_uvchr_buf($input, $adjustment);
ok($ret->[0], 0xFFFD, "returned value $display; warnings disabled");
ok($ret->[1], $test->{'no_warnings_returned_length'},
"returned length $display; warnings disabled");
}
}
+
+if ("$]" ge '5.008') {
+ BEGIN { if ("$]" ge '5.008') { require utf8; "utf8"->import() } }
+
+ ok(Devel::PPPort::sv_len_utf8("aščť"), 4);
+ ok(Devel::PPPort::sv_len_utf8_nomg("aščť"), 4);
+
+ my $str = "áíé";
+ utf8::downgrade($str);
+ ok(Devel::PPPort::sv_len_utf8($str), 3);
+ utf8::downgrade($str);
+ ok(Devel::PPPort::sv_len_utf8_nomg($str), 3);
+ utf8::upgrade($str);
+ ok(Devel::PPPort::sv_len_utf8($str), 3);
+ utf8::upgrade($str);
+ ok(Devel::PPPort::sv_len_utf8_nomg($str), 3);
+
+ tie my $scalar, 'TieScalarCounter', "é";
+
+ ok(tied($scalar)->{fetch}, 0);
+ ok(tied($scalar)->{store}, 0);
+ ok(Devel::PPPort::sv_len_utf8($scalar), 2);
+ ok(tied($scalar)->{fetch}, 1);
+ ok(tied($scalar)->{store}, 0);
+ ok(Devel::PPPort::sv_len_utf8($scalar), 3);
+ ok(tied($scalar)->{fetch}, 2);
+ ok(tied($scalar)->{store}, 0);
+ ok(Devel::PPPort::sv_len_utf8($scalar), 4);
+ ok(tied($scalar)->{fetch}, 3);
+ ok(tied($scalar)->{store}, 0);
+ ok(Devel::PPPort::sv_len_utf8_nomg($scalar), 4);
+ ok(tied($scalar)->{fetch}, 3);
+ ok(tied($scalar)->{store}, 0);
+ ok(Devel::PPPort::sv_len_utf8_nomg($scalar), 4);
+ ok(tied($scalar)->{fetch}, 3);
+ ok(tied($scalar)->{store}, 0);
+} else {
+ for (1..23) {
+ skip 'skip: no SV_NOSTEAL support', 0;
+ }
+}
+
+package TieScalarCounter;
+
+sub TIESCALAR {
+ my ($class, $value) = @_;
+ return bless { fetch => 0, store => 0, value => $value }, $class;
+}
+
+sub FETCH {
+ BEGIN { if ("$]" ge '5.008') { require utf8; "utf8"->import() } }
+ my ($self) = @_;
+ $self->{fetch}++;
+ return $self->{value} .= "é";
+}
+
+sub STORE {
+ my ($self, $value) = @_;
+ $self->{store}++;
+ $self->{value} = $value;
+}