=cut
*/
+/* helper for Perl__force_out_malformed_utf8_message(). Like
+ * SAVECOMPILEWARNINGS(), but works with PL_curcop rather than
+ * PL_compiling */
+
+static void
+S_restore_cop_warnings(pTHX_ void *p)
+{
+ if (!specialWARN(PL_curcop->cop_warnings))
+ PerlMemShared_free(PL_curcop->cop_warnings);
+ PL_curcop->cop_warnings = (STRLEN*)p;
+}
+
+
void
Perl__force_out_malformed_utf8_message(pTHX_
const U8 *const p, /* First byte in UTF-8 sequence */
PL_dowarn = G_WARN_ALL_ON|G_WARN_ON;
if (PL_curcop) {
+ /* this is like SAVECOMPILEWARNINGS() except with PL_curcop rather
+ * than PL_compiling */
+ SAVEDESTRUCTOR_X(S_restore_cop_warnings,
+ (void*)PL_curcop->cop_warnings);
PL_curcop->cop_warnings = pWARN_ALL;
}
{
PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
- assert(s < send);
-
- return utf8n_to_uvchr(s, send - s, retlen,
- ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ return _utf8_to_uvchr_buf(s, send, retlen);
}
/* This is marked as deprecated
* the bitops (especially ~) can create illegal UTF-8.
* In other words: in Perl UTF-8 is not just for Unicode. */
- if (e < s)
+ if (UNLIKELY(e < s))
goto warn_and_return;
while (s < e) {
s += UTF8SKIP(s);
len++;
}
- if (e != s) {
+ if (UNLIKELY(e != s)) {
len--;
warn_and_return:
if (PL_op)
bool
Perl__is_uni_FOO(pTHX_ const U8 classnum, const UV c)
{
+ dVAR;
return _invlist_contains_cp(PL_XPosix_ptrs[classnum], c);
}
bool
Perl__is_utf8_idstart(pTHX_ const U8 *p)
{
+ dVAR;
+
PERL_ARGS_ASSERT__IS_UTF8_IDSTART;
if (*p == '_')
bool
Perl__is_uni_perl_idcont(pTHX_ UV c)
{
+ dVAR;
return _invlist_contains_cp(PL_utf8_perl_idcont, c);
}
bool
Perl__is_uni_perl_idstart(pTHX_ UV c)
{
+ dVAR;
return _invlist_contains_cp(PL_utf8_perl_idstart, c);
}
* The ordinal of the first character of the changed version is returned
* (but note, as explained above, that there may be more.) */
+ dVAR;
PERL_ARGS_ASSERT_TO_UNI_UPPER;
if (c < 256) {
UV
Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
{
+ dVAR;
PERL_ARGS_ASSERT_TO_UNI_TITLE;
if (c < 256) {
UV
Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
{
+ dVAR;
PERL_ARGS_ASSERT_TO_UNI_LOWER;
if (c < 256) {
* FOLD_FLAGS_NOMIX_ASCII iff non-ASCII to ASCII folds are prohibited
*/
+ dVAR;
PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS;
if (flags & FOLD_FLAGS_LOCALE) {
const char * const file,
const unsigned line)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_FOO;
warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
Perl__is_utf8_FOO_with_len(pTHX_ const U8 classnum, const U8 *p,
const U8 * const e)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_FOO_WITH_LEN;
return is_utf8_common_with_len(p, e, PL_XPosix_ptrs[classnum]);
bool
Perl__is_utf8_perl_idstart_with_len(pTHX_ const U8 *p, const U8 * const e)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART_WITH_LEN;
return is_utf8_common_with_len(p, e, PL_utf8_perl_idstart);
bool
Perl__is_utf8_xidstart(pTHX_ const U8 *p)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_XIDSTART;
if (*p == '_')
bool
Perl__is_utf8_perl_idcont_with_len(pTHX_ const U8 *p, const U8 * const e)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT_WITH_LEN;
return is_utf8_common_with_len(p, e, PL_utf8_perl_idcont);
bool
Perl__is_utf8_idcont(pTHX_ const U8 *p)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_IDCONT;
return is_utf8_common(p, PL_utf8_idcont);
bool
Perl__is_utf8_xidcont(pTHX_ const U8 *p)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_XIDCONT;
return is_utf8_common(p, PL_utf8_xidcont);
bool
Perl__is_utf8_mark(pTHX_ const U8 *p)
{
+ dVAR;
PERL_ARGS_ASSERT__IS_UTF8_MARK;
return is_utf8_common(p, PL_utf8_mark);
* the return can point to them, but single code points aren't, so would
* need to be constructed if we didn't employ something like this API */
+ dVAR;
/* 'index' is guaranteed to be non-negative, as this is an inversion map
* that covers all possible inputs. See [perl #133365] */
SSize_t index = _invlist_search(PL_utf8_foldclosures, cp);
* sequence, and the entire sequence will be stored in *ustrp. ustrp will
* contain *lenp bytes */
+ dVAR;
PERL_ARGS_ASSERT_TURKIC_LC;
assert(e > p0);
const char * const file,
const int line)
{
+ dVAR;
UV result;
const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_UPPER,
cBOOL(flags), file, line);
const char * const file,
const int line)
{
+ dVAR;
UV result;
const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_TITLE,
cBOOL(flags), file, line);
const char * const file,
const int line)
{
+ dVAR;
UV result;
const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_LOWER,
cBOOL(flags), file, line);
const char * const file,
const int line)
{
+ dVAR;
UV result;
const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_FOLD,
cBOOL(flags), file, line);
SAVEBOOL(TAINT_get);
TAINT_NOT;
#endif
- Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvn(pkg,pkg_len),
- NULL);
+ require_pv("utf8_heavy.pl");
{
/* Not ERRSV, as there is no need to vivify a scalar we are
about to discard. */
* handled the same way, speeding up this common case */
if (UTF8_IS_INVARIANT(*s)) { /* Assumes 's' contains at least 1 byte */
+ if (retlen) {
+ *retlen = 1;
+ }
return (UV) *s;
}