X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/685289b5657b776e8a3871de68a57785e6ccd797..0c415a7950ced3bdd13d9361e7154695c677851b:/utf8.c diff --git a/utf8.c b/utf8.c index 6600023..184e429 100644 --- a/utf8.c +++ b/utf8.c @@ -507,7 +507,13 @@ determinable reasonable value. The UTF8_CHECK_ONLY flag overrides the behavior when a non-allowed (by other flags) malformation is found. If this flag is set, the routine assumes that the caller will raise a warning, and this function will silently just set -C to C<-1> and return zero. +C to C<-1> (cast to C) and return zero. + +Note that this API requires disambiguation between successful decoding a NUL +character, and an error return (unless the UTF8_CHECK_ONLY flag is set), as +in both cases, 0 is returned. To disambiguate, upon a zero return, see if the +first byte of C is 0 as well. If so, the input was a NUL; if not, the input +had an error. Certain code points are considered problematic. These are Unicode surrogates, Unicode non-characters, and code points above the Unicode maximum of 0x10FFFF. @@ -1096,10 +1102,7 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) if (e < s) goto warn_and_return; while (s < e) { - if (!UTF8_IS_INVARIANT(*s)) - s += UTF8SKIP(s); - else - s++; + s += UTF8SKIP(s); len++; } @@ -2863,19 +2866,21 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m if (!method) { /* demand load utf8 */ ENTER; errsv_save = newSVsv(ERRSV); + SAVEFREESV(errsv_save); /* It is assumed that callers of this routine are not passing in * any user derived data. */ /* Need to do this after save_re_context() as it will set * PL_tainted to 1 while saving $1 etc (see the code after getrx: * in Perl_magic_get). Even line to create errsv_save can turn on * PL_tainted. */ - SAVEBOOL(PL_tainted); - PL_tainted = 0; +#ifndef NO_TAINT_SUPPORT + SAVEBOOL(TAINT_get); + TAINT_NOT; +#endif Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvn(pkg,pkg_len), NULL); if (!SvTRUE(ERRSV)) sv_setsv(ERRSV, errsv_save); - SvREFCNT_dec(errsv_save); LEAVE; } SPAGAIN; @@ -2888,6 +2893,7 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m mPUSHi(none); PUTBACK; errsv_save = newSVsv(ERRSV); + SAVEFREESV(errsv_save); /* If we already have a pointer to the method, no need to use * call_method() to repeat the lookup. */ if (method ? call_sv(MUTABLE_SV(method), G_SCALAR) @@ -2898,7 +2904,6 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m } if (!SvTRUE(ERRSV)) sv_setsv(ERRSV, errsv_save); - SvREFCNT_dec(errsv_save); LEAVE; POPSTACK; if (IN_PERL_COMPILETIME) { @@ -3609,7 +3614,7 @@ HV* Perl__swash_inversion_hash(pTHX_ SV* const swash) { - /* Subject to change or removal. For use only in one place in regcomp.c. + /* Subject to change or removal. For use only in regcomp.c and regexec.c * Can't be used on a property that is subject to user override, as it * relies on the value of SPECIALS in the swash which would be set by * utf8_heavy.pl to the hash in the non-overriden file, and hence is not set @@ -4297,9 +4302,11 @@ scanning won't continue past that goal. Correspondingly for C with respect C. If C is non-NULL and the pointer it points to is not NULL, that pointer is -considered an end pointer beyond which scanning of C will not continue under -any circumstances. This means that if both C and C are specified, and -C +considered an end pointer to the position 1 byte past the maximum point +in C beyond which scanning will not continue under any circumstances. +(This routine assumes that UTF-8 encoded input strings are not malformed; +malformed input can cause it to read past C). +This means that if both C and C are specified, and C is less than C+C, the match will never be successful because it can never get as far as its goal (and in fact is asserted against). Correspondingly for