-/*
+/*
* This file contains mathoms, various binary artifacts from previous
- * versions of Perl. For binary or source compatibility reasons, though,
- * we cannot completely remove them from the core code.
+ * versions of Perl which we cannot completely remove from the core
+ * code. There are two reasons functions should be here:
+ *
+ * 1) A function has been replaced by a macro within a minor release,
+ * so XS modules compiled against an older release will expect to
+ * still be able to link against the function
+ * 2) A function Perl_foo(...) with #define foo Perl_foo(aTHX_ ...)
+ * has been replaced by a macro, e.g. #define foo(...) foo_flags(...,0)
+ * but XS code may still explicitly use the long form, i.e.
+ * Perl_foo(aTHX_ ...)
+ *
+ * NOTE: ALL FUNCTIONS IN THIS FILE should have an entry with the 'b' flag in
+ * embed.fnc.
+ *
+ * To move a function to this file, simply cut and paste it here, and change
+ * its embed.fnc entry to additionally have the 'b' flag. If, for some reason
+ * a function you'd like to be treated as mathoms can't be moved from its
+ * current place, simply enclose it between
*
- * REMEMBER to update makedef.pl when adding a function to mathoms.c whose
- * name doesn't begin with "Perl_".
+ * #ifndef NO_MATHOMS
+ * ...
+ * #endif
*
- * SMP - Oct. 24, 2005
+ * and add the 'b' flag in embed.fnc.
*
* The compilation of this file can be suppressed; see INSTALL
*
-=head1 Obsolete backwards compatibility functions
+ * Some blurb for perlapi.pod:
+
+ head1 Obsolete backwards compatibility functions
+
Some of these are also deprecated. You can exclude these from
your compiled Perl by adding this option to Configure:
C<-Accflags='-DNO_MATHOMS'>
*/
#else
-/* Not all of these have prototypes elsewhere, so do this to get
- * non-mangled names.
- */
-START_EXTERN_C
-
-PERL_CALLCONV OP * Perl_ref(pTHX_ OP *o, I32 type);
-PERL_CALLCONV void Perl_sv_unref(pTHX_ SV *sv);
-PERL_CALLCONV void Perl_sv_taint(pTHX_ SV *sv);
-PERL_CALLCONV IV Perl_sv_2iv(pTHX_ SV *sv);
-PERL_CALLCONV UV Perl_sv_2uv(pTHX_ SV *sv);
-PERL_CALLCONV NV Perl_sv_2nv(pTHX_ SV *sv);
-PERL_CALLCONV char * Perl_sv_2pv(pTHX_ SV *sv, STRLEN *lp);
-PERL_CALLCONV char * Perl_sv_2pv_nolen(pTHX_ SV *sv);
-PERL_CALLCONV char * Perl_sv_2pvbyte_nolen(pTHX_ SV *sv);
-PERL_CALLCONV char * Perl_sv_2pvutf8_nolen(pTHX_ SV *sv);
-PERL_CALLCONV void Perl_sv_force_normal(pTHX_ SV *sv);
-PERL_CALLCONV void Perl_sv_setsv(pTHX_ SV *dstr, SV *sstr);
-PERL_CALLCONV void Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen);
-PERL_CALLCONV void Perl_sv_catpvn_mg(pTHX_ SV *sv, const char *ptr, STRLEN len);
-PERL_CALLCONV void Perl_sv_catsv(pTHX_ SV *dstr, SV *sstr);
-PERL_CALLCONV void Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *ssv);
-PERL_CALLCONV char * Perl_sv_pv(pTHX_ SV *sv);
-PERL_CALLCONV char * Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp);
-PERL_CALLCONV char * Perl_sv_pvbyte(pTHX_ SV *sv);
-PERL_CALLCONV char * Perl_sv_pvutf8(pTHX_ SV *sv);
-PERL_CALLCONV STRLEN Perl_sv_utf8_upgrade(pTHX_ SV *sv);
-PERL_CALLCONV NV Perl_huge(void);
-PERL_CALLCONV void Perl_gv_fullname3(pTHX_ SV *sv, const GV *gv, const char *prefix);
-PERL_CALLCONV void Perl_gv_efullname3(pTHX_ SV *sv, const GV *gv, const char *prefix);
-PERL_CALLCONV GV * Perl_gv_fetchmethod(pTHX_ HV *stash, const char *name);
-PERL_CALLCONV HE * Perl_hv_iternext(pTHX_ HV *hv);
-PERL_CALLCONV void Perl_hv_magic(pTHX_ HV *hv, GV *gv, int how);
-PERL_CALLCONV bool Perl_do_open(pTHX_ GV *gv, const char *name, I32 len, int as_raw, int rawmode, int rawperm, PerlIO *supplied_fp);
-PERL_CALLCONV bool Perl_do_aexec(pTHX_ SV *really, SV **mark, SV **sp);
-PERL_CALLCONV U8 * Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv);
-PERL_CALLCONV bool Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep);
-PERL_CALLCONV void Perl_sv_nolocking(pTHX_ SV *sv);
-PERL_CALLCONV void Perl_sv_usepvn_mg(pTHX_ SV *sv, char *ptr, STRLEN len);
-PERL_CALLCONV void Perl_sv_usepvn(pTHX_ SV *sv, char *ptr, STRLEN len);
-PERL_CALLCONV int Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...);
-PERL_CALLCONV int Perl_printf_nocontext(const char *format, ...);
-PERL_CALLCONV int Perl_magic_setglob(pTHX_ SV* sv, MAGIC* mg);
-PERL_CALLCONV AV * Perl_newAV(pTHX);
-PERL_CALLCONV HV * Perl_newHV(pTHX);
-PERL_CALLCONV IO * Perl_newIO(pTHX);
-PERL_CALLCONV I32 Perl_my_stat(pTHX);
-PERL_CALLCONV I32 Perl_my_lstat(pTHX);
-PERL_CALLCONV I32 Perl_sv_eq(pTHX_ SV *sv1, SV *sv2);
-PERL_CALLCONV char * Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp);
-PERL_CALLCONV bool Perl_sv_2bool(pTHX_ SV *const sv);
-PERL_CALLCONV CV * Perl_newSUB(pTHX_ I32 floor, OP* o, OP* proto, OP* block);
-PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp);
-PERL_CALLCONV UV Perl_to_utf8_title(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp);
-PERL_CALLCONV UV Perl_to_utf8_upper(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp);
-PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp);
-PERL_CALLCONV SV *Perl_sv_mortalcopy(pTHX_ SV *const oldstr);
+/* The functions in this file should be able to call other deprecated functions
+ * without a compiler warning */
+GCC_DIAG_IGNORE(-Wdeprecated-declarations)
/* ref() is now a macro using Perl_doref;
* this version provided for binary compatibility only.
}
/*
+=for apidoc_section $SV
=for apidoc sv_unref
Unsets the RV status of the SV, and decrements the reference count of
*/
void
-Perl_sv_setsv(pTHX_ SV *dstr, SV *sstr)
+Perl_sv_setsv(pTHX_ SV *dsv, SV *ssv)
{
PERL_ARGS_ASSERT_SV_SETSV;
- sv_setsv_flags(dstr, sstr, SV_GMAGIC);
+ sv_setsv_flags(dsv, ssv, SV_GMAGIC);
}
/* sv_catpvn() is now a macro using Perl_sv_catpvn_flags();
sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC);
}
-/*
-=for apidoc sv_catpvn_mg
-
-Like C<sv_catpvn>, but also handles 'set' magic.
-
-=cut
-*/
-
void
-Perl_sv_catpvn_mg(pTHX_ SV *sv, const char *ptr, STRLEN len)
+Perl_sv_catpvn_mg(pTHX_ SV *dsv, const char *sstr, STRLEN len)
{
PERL_ARGS_ASSERT_SV_CATPVN_MG;
- sv_catpvn_flags(sv,ptr,len,SV_GMAGIC|SV_SMAGIC);
+ sv_catpvn_flags(dsv,sstr,len,SV_GMAGIC|SV_SMAGIC);
}
/* sv_catsv() is now a macro using Perl_sv_catsv_flags();
*/
void
-Perl_sv_catsv(pTHX_ SV *dstr, SV *sstr)
+Perl_sv_catsv(pTHX_ SV *dsv, SV *sstr)
{
PERL_ARGS_ASSERT_SV_CATSV;
- sv_catsv_flags(dstr, sstr, SV_GMAGIC);
+ sv_catsv_flags(dsv, sstr, SV_GMAGIC);
}
-/*
-=for apidoc sv_catsv_mg
-
-Like C<sv_catsv>, but also handles 'set' magic.
-
-=cut
-*/
-
void
-Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *ssv)
+Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *sstr)
{
PERL_ARGS_ASSERT_SV_CATSV_MG;
- sv_catsv_flags(dsv,ssv,SV_GMAGIC|SV_SMAGIC);
+ sv_catsv_flags(dsv,sstr,SV_GMAGIC|SV_SMAGIC);
}
/*
Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...)
{
int ret = 0;
- va_list(arglist);
+ va_list arglist;
/* Easier to special case this here than in embed.pl. (Look at what it
generates for proto.h) */
Perl_printf_nocontext(const char *format, ...)
{
dTHX;
- va_list(arglist);
+ va_list arglist;
int ret = 0;
#ifdef PERL_IMPLICIT_CONTEXT
}
/*
+=for apidoc_section $GV
=for apidoc gv_fetchmethod
See L</gv_fetchmethod_autoload>.
}
bool
-Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep)
+Perl_is_utf8_string_loc(const U8 *s, const STRLEN len, const U8 **ep)
{
PERL_ARGS_ASSERT_IS_UTF8_STRING_LOC;
- PERL_UNUSED_CONTEXT;
return is_utf8_string_loclen(s, len, ep, 0);
}
/*
+=for apidoc_section $SV
=for apidoc sv_nolocking
Dummy routine which "locks" an SV when there is no locking module present.
"Superseded" by C<sv_nosharing()>.
=cut
+
+PERL_UNLOCK_HOOK in intrpvar.h is the macro that refers to this, and guarantees
+that mathoms gets loaded.
+
*/
void
}
void
-Perl_save_iv(pTHX_ IV *ivp)
-{
- PERL_ARGS_ASSERT_SAVE_IV;
-
- SSCHECK(3);
- SSPUSHIV(*ivp);
- SSPUSHPTR(ivp);
- SSPUSHUV(SAVEt_IV);
-}
-
-void
Perl_save_nogv(pTHX_ GV *gv)
{
PERL_ARGS_ASSERT_SAVE_NOGV;
}
/*
+=for apidoc_section $pack
=for apidoc unpack_str
The engine implementing C<unpack()> Perl function. Note: parameters C<strbeg>,
=cut */
-I32
+SSize_t
Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s,
const char *strbeg, const char *strend, char **new_s, I32 ocnt,
U32 flags)
The engine implementing C<pack()> Perl function. Note: parameters
C<next_in_list> and C<flags> are not used. This call should not be used; use
-C<packlist> instead.
+C<L</packlist>> instead.
=cut
*/
{
PERL_ARGS_ASSERT_HV_EXISTS_ENT;
- return hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash)
- ? TRUE : FALSE;
+ return cBOOL(hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash));
}
HE *
klen = klen_i32;
flags = 0;
}
- return hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0)
- ? TRUE : FALSE;
+ return cBOOL(hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0));
}
SV**
char *
Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp)
{
+ PERL_ARGS_ASSERT_SV_COLLXFRM;
return sv_collxfrm_flags(sv, nxp, SV_GMAGIC);
}
+
+char *
+Perl_mem_collxfrm(pTHX_ const char *input_string, STRLEN len, STRLEN *xlen)
+{
+ /* This function is retained for compatibility in case someone outside core
+ * is using this (but it is undocumented) */
+
+ PERL_ARGS_ASSERT_MEM_COLLXFRM;
+
+ return _mem_collxfrm(input_string, len, xlen, FALSE);
+}
+
#endif
bool
Perl_sv_2bool(pTHX_ SV *const sv)
{
+ PERL_ARGS_ASSERT_SV_2BOOL;
return sv_2bool_flags(sv, SV_GMAGIC);
}
/*
+=for apidoc_section $custom
=for apidoc custom_op_name
Return the name for a given custom op. This was once used by the C<OP_NAME>
macro, but is no longer: it has only been kept for compatibility, and
return newATTRSUB(floor, o, proto, NULL, block);
}
-UV
-Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
-{
- PERL_ARGS_ASSERT_TO_UTF8_FOLD;
-
- return _to_utf8_fold_flags(p, ustrp, lenp, FOLD_FLAGS_FULL);
-}
-
-UV
-Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
-{
- PERL_ARGS_ASSERT_TO_UTF8_LOWER;
-
- return _to_utf8_lower_flags(p, ustrp, lenp, FALSE);
-}
-
-UV
-Perl_to_utf8_title(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
-{
- PERL_ARGS_ASSERT_TO_UTF8_TITLE;
-
- return _to_utf8_title_flags(p, ustrp, lenp, FALSE);
-}
-
-UV
-Perl_to_utf8_upper(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
-{
- PERL_ARGS_ASSERT_TO_UTF8_UPPER;
-
- return _to_utf8_upper_flags(p, ustrp, lenp, FALSE);
-}
-
SV *
-Perl_sv_mortalcopy(pTHX_ SV *const oldstr)
+Perl_sv_mortalcopy(pTHX_ SV *const oldsv)
{
- return Perl_sv_mortalcopy_flags(aTHX_ oldstr, SV_GMAGIC);
+ return Perl_sv_mortalcopy_flags(aTHX_ oldsv, SV_GMAGIC);
}
void
{
PERL_ARGS_ASSERT_SV_COPYPV;
- sv_copypv_flags(dsv, ssv, 0);
+ sv_copypv_flags(dsv, ssv, SV_GMAGIC);
}
UV /* Made into a function, so can be deprecated */
return ch;
}
-bool /* Made into a function, so can be deprecated */
-Perl_isIDFIRST_lazy(pTHX_ const char* p)
-{
- PERL_ARGS_ASSERT_ISIDFIRST_LAZY;
-
- return isIDFIRST_lazy_if(p,1);
-}
-
-bool /* Made into a function, so can be deprecated */
-Perl_isALNUM_lazy(pTHX_ const char* p)
-{
- PERL_ARGS_ASSERT_ISALNUM_LAZY;
-
- return isALNUM_lazy_if(p,1);
-}
-
-bool
-Perl_is_uni_alnum(pTHX_ UV c)
-{
- return isWORDCHAR_uni(c);
-}
-
-bool
-Perl_is_uni_alnumc(pTHX_ UV c)
-{
- return isALNUM_uni(c);
-}
-
-bool
-Perl_is_uni_alpha(pTHX_ UV c)
-{
- return isALPHA_uni(c);
-}
-
-bool
-Perl_is_uni_ascii(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isASCII_uni(c);
-}
-
-bool
-Perl_is_uni_blank(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isBLANK_uni(c);
-}
-
-bool
-Perl_is_uni_space(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isSPACE_uni(c);
-}
-
-bool
-Perl_is_uni_digit(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isDIGIT_uni(c);
-}
-
-bool
-Perl_is_uni_upper(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isUPPER_uni(c);
-}
-
-bool
-Perl_is_uni_lower(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isLOWER_uni(c);
-}
-
-bool
-Perl_is_uni_cntrl(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isCNTRL_L1(c);
-}
-
-bool
-Perl_is_uni_graph(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isGRAPH_uni(c);
-}
-
-bool
-Perl_is_uni_print(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isPRINT_uni(c);
-}
-
-bool
-Perl_is_uni_punct(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isPUNCT_uni(c);
-}
-
-bool
-Perl_is_uni_xdigit(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isXDIGIT_uni(c);
-}
-
-bool
-Perl_is_uni_alnum_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isWORDCHAR_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_alnumc_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isALPHANUMERIC_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_idfirst_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- /* XXX Should probably be something that resolves to the old IDFIRST, but
- * this function is deprecated, so not bothering */
- return isIDFIRST_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_alpha_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isALPHA_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_ascii_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isASCII_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_blank_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isBLANK_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_space_lc(pTHX_ UV c)
-{
- PERL_UNUSED_CONTEXT;
- return isSPACE_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_digit_lc(pTHX_ UV c)
-{
- return isDIGIT_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_idfirst(pTHX_ UV c)
-{
- U8 tmpbuf[UTF8_MAXBYTES+1];
- uvchr_to_utf8(tmpbuf, c);
- return _is_utf8_idstart(tmpbuf);
-}
-
-bool
-Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */
-{
- PERL_ARGS_ASSERT_IS_UTF8_IDFIRST;
-
- return _is_utf8_idstart(p);
-}
-
-bool
-Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */
-{
- PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST;
-
- return _is_utf8_xidstart(p);
-}
-
-bool
-Perl_is_utf8_idcont(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_IDCONT;
-
- return _is_utf8_idcont(p);
-}
-
-bool
-Perl_is_utf8_xidcont(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_XIDCONT;
-
- return _is_utf8_xidcont(p);
-}
-
-bool
-Perl_is_uni_upper_lc(pTHX_ UV c)
-{
- return isUPPER_LC_uvchr(c);
-}
-
-bool
-Perl_is_uni_lower_lc(pTHX_ UV c)
-{
- return isLOWER_LC_uvchr(c);
-}
+/*
+=for apidoc_section $unicode
+=for apidoc is_utf8_char
-bool
-Perl_is_uni_cntrl_lc(pTHX_ UV c)
-{
- return isCNTRL_LC_uvchr(c);
-}
+Tests if some arbitrary number of bytes begins in a valid UTF-8
+character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
+character is a valid UTF-8 character. The actual number of bytes in the UTF-8
+character will be returned if it is valid, otherwise 0.
-bool
-Perl_is_uni_graph_lc(pTHX_ UV c)
-{
- return isGRAPH_LC_uvchr(c);
-}
+This function is deprecated due to the possibility that malformed input could
+cause reading beyond the end of the input buffer. Use L</isUTF8_CHAR>
+instead.
-bool
-Perl_is_uni_print_lc(pTHX_ UV c)
-{
- return isPRINT_LC_uvchr(c);
-}
+=cut */
-bool
-Perl_is_uni_punct_lc(pTHX_ UV c)
+STRLEN
+Perl_is_utf8_char(const U8 *s)
{
- return isPUNCT_LC_uvchr(c);
-}
+ PERL_ARGS_ASSERT_IS_UTF8_CHAR;
-bool
-Perl_is_uni_xdigit_lc(pTHX_ UV c)
-{
- return isXDIGIT_LC_uvchr(c);
+ /* Assumes we have enough space, which is why this is deprecated. But the
+ * UTF8_CHK_SKIP(s)) makes it safe for the common case of NUL-terminated
+ * strings */
+ return isUTF8_CHAR(s, s + UTF8_CHK_SKIP(s));
}
-U32
-Perl_to_uni_upper_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_upper(c, tmpbuf, &len);
-}
+/*
+=for apidoc is_utf8_char_buf
-U32
-Perl_to_uni_title_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character XXX -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_title(c, tmpbuf, &len);
-}
+This is identical to the macro L<perlapi/isUTF8_CHAR>.
-U32
-Perl_to_uni_lower_lc(pTHX_ U32 c)
-{
- /* XXX returns only the first character -- do not use XXX */
- /* XXX no locale support yet */
- STRLEN len;
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- return (U32)to_uni_lower(c, tmpbuf, &len);
-}
+=cut */
-bool
-Perl_is_utf8_alnum(pTHX_ const U8 *p)
+STRLEN
+Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
{
- PERL_ARGS_ASSERT_IS_UTF8_ALNUM;
- /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true
- * descendant of isalnum(3), in other words, it doesn't
- * contain the '_'. --jhi */
- return isWORDCHAR_utf8(p);
-}
-
-bool
-Perl_is_utf8_alnumc(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_ALNUMC;
+ PERL_ARGS_ASSERT_IS_UTF8_CHAR_BUF;
- return isALPHANUMERIC_utf8(p);
+ return isUTF8_CHAR(buf, buf_end);
}
-bool
-Perl_is_utf8_alpha(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_ALPHA;
-
- return isALPHA_utf8(p);
-}
+/* DEPRECATED!
+ * Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>. Surrogates,
+ * non-character code points, and non-Unicode code points are allowed */
-bool
-Perl_is_utf8_ascii(pTHX_ const U8 *p)
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
{
- PERL_ARGS_ASSERT_IS_UTF8_ASCII;
PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
- return isASCII_utf8(p);
+ return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
}
-bool
-Perl_is_utf8_blank(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_BLANK;
- PERL_UNUSED_CONTEXT;
-
- return isBLANK_utf8(p);
-}
+/*
+=for apidoc utf8_to_uvuni
-bool
-Perl_is_utf8_space(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_SPACE;
- PERL_UNUSED_CONTEXT;
+Returns the Unicode code point of the first character in the string C<s>
+which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
- return isSPACE_utf8(p);
-}
+Some, but not all, UTF-8 malformations are detected, and in fact, some
+malformed input could cause reading beyond the end of the input buffer, which
+is one reason why this function is deprecated. The other is that only in
+extremely limited circumstances should the Unicode versus native code point be
+of any interest to you. See L</utf8_to_uvuni_buf> for alternatives.
-bool
-Perl_is_utf8_perl_space(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE;
- PERL_UNUSED_CONTEXT;
+If C<s> points to one of the detected malformations, and UTF8 warnings are
+enabled, zero is returned and C<*retlen> is set (if C<retlen> doesn't point to
+NULL) to -1. If those warnings are off, the computed value if well-defined (or
+the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
+is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
+next possible position in C<s> that could begin a non-malformed character.
+See L<perlapi/utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
- /* Only true if is an ASCII space-like character, and ASCII is invariant
- * under utf8, so can just use the macro */
- return isSPACE_A(*p);
-}
+=cut
+*/
-bool
-Perl_is_utf8_perl_word(pTHX_ const U8 *p)
+UV
+Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
{
- PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD;
PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_UTF8_TO_UVUNI;
- /* Only true if is an ASCII word character, and ASCII is invariant
- * under utf8, so can just use the macro */
- return isWORDCHAR_A(*p);
+ return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
}
-bool
-Perl_is_utf8_digit(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_DIGIT;
+/*
+=for apidoc pad_compname_type
- return isDIGIT_utf8(p);
-}
+Looks up the type of the lexical variable at position C<po> in the
+currently-compiling pad. If the variable is typed, the stash of the
+class to which it is typed is returned. If not, C<NULL> is returned.
-bool
-Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT;
- PERL_UNUSED_CONTEXT;
+Use L<perlintern/C<PAD_COMPNAME_TYPE>> instead.
- /* Only true if is an ASCII digit character, and ASCII is invariant
- * under utf8, so can just use the macro */
- return isDIGIT_A(*p);
-}
+=cut
+*/
-bool
-Perl_is_utf8_upper(pTHX_ const U8 *p)
+HV *
+Perl_pad_compname_type(pTHX_ const PADOFFSET po)
{
- PERL_ARGS_ASSERT_IS_UTF8_UPPER;
-
- return isUPPER_utf8(p);
+ return PAD_COMPNAME_TYPE(po);
}
-bool
-Perl_is_utf8_lower(pTHX_ const U8 *p)
-{
- PERL_ARGS_ASSERT_IS_UTF8_LOWER;
-
- return isLOWER_utf8(p);
-}
+/* return ptr to little string in big string, NULL if not found */
+/* The original version of this routine was donated by Corey Satten. */
-bool
-Perl_is_utf8_cntrl(pTHX_ const U8 *p)
+char *
+Perl_instr(const char *big, const char *little)
{
- PERL_ARGS_ASSERT_IS_UTF8_CNTRL;
- PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_INSTR;
- return isCNTRL_utf8(p);
+ return instr(big, little);
}
-bool
-Perl_is_utf8_graph(pTHX_ const U8 *p)
+SV *
+Perl_newSVsv(pTHX_ SV *const old)
{
- PERL_ARGS_ASSERT_IS_UTF8_GRAPH;
-
- return isGRAPH_utf8(p);
+ return newSVsv(old);
}
bool
-Perl_is_utf8_print(pTHX_ const U8 *p)
+Perl_sv_utf8_downgrade(pTHX_ SV *const sv, const bool fail_ok)
{
- PERL_ARGS_ASSERT_IS_UTF8_PRINT;
+ PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE;
- return isPRINT_utf8(p);
+ return sv_utf8_downgrade(sv, fail_ok);
}
-bool
-Perl_is_utf8_punct(pTHX_ const U8 *p)
+char *
+Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp)
{
- PERL_ARGS_ASSERT_IS_UTF8_PUNCT;
+ PERL_ARGS_ASSERT_SV_2PVUTF8;
- return isPUNCT_utf8(p);
+ return sv_2pvutf8(sv, lp);
}
-bool
-Perl_is_utf8_xdigit(pTHX_ const U8 *p)
+char *
+Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp)
{
- PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
- PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_SV_2PVBYTE;
- return isXDIGIT_utf8(p);
+ return sv_2pvbyte(sv, lp);
}
-bool
-Perl_is_utf8_mark(pTHX_ const U8 *p)
+U8 *
+Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
{
- PERL_ARGS_ASSERT_IS_UTF8_MARK;
+ PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
- return _is_utf8_mark(p);
+ return uvoffuni_to_utf8_flags(d, uv, 0);
}
/*
-=for apidoc is_utf8_char
+=for apidoc utf8n_to_uvuni
-Tests if some arbitrary number of bytes begins in a valid UTF-8
-character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
-character is a valid UTF-8 character. The actual number of bytes in the UTF-8
-character will be returned if it is valid, otherwise 0.
+Instead use L<perlapi/utf8_to_uvchr_buf>, or rarely, L<perlapi/utf8n_to_uvchr>.
-This function is deprecated due to the possibility that malformed input could
-cause reading beyond the end of the input buffer. Use L</isUTF8_CHAR>
-instead.
+This function was useful for code that wanted to handle both EBCDIC and
+ASCII platforms with Unicode properties, but starting in Perl v5.20, the
+distinctions between the platforms have mostly been made invisible to most
+code, so this function is quite unlikely to be what you want. If you do need
+this precise functionality, use instead
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|perlapi/utf8_to_uvchr_buf>>
+or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|perlapi/utf8n_to_uvchr>>.
-=cut */
+=cut
+*/
-STRLEN
-Perl_is_utf8_char(const U8 *s)
+UV
+Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
{
- PERL_ARGS_ASSERT_IS_UTF8_CHAR;
+ PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
- /* Assumes we have enough space, which is why this is deprecated */
- return isUTF8_CHAR(s, s + UTF8SKIP(s));
+ return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags));
}
/*
-=for apidoc is_utf8_char_buf
+=for apidoc uvuni_to_utf8_flags
-This is identical to the macro L</isUTF8_CHAR>.
+Instead you almost certainly want to use L<perlapi/uvchr_to_utf8> or
+L<perlapi/uvchr_to_utf8_flags>.
-=cut */
+This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
+which itself, while not deprecated, should be used only in isolated
+circumstances. These functions were useful for code that wanted to handle
+both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
+v5.20, the distinctions between the platforms have mostly been made invisible
+to most code, so this function is quite unlikely to be what you want.
-STRLEN
-Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
-{
-
- PERL_ARGS_ASSERT_IS_UTF8_CHAR_BUF;
-
- return isUTF8_CHAR(buf, buf_end);
-}
-
-/* DEPRECATED!
- * Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
- * there are no malformations in the input UTF-8 string C<s>. Surrogates,
- * non-character code points, and non-Unicode code points are allowed */
+=cut
+*/
-UV
-Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+U8 *
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
{
- PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+ PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
- return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
+ return uvoffuni_to_utf8_flags(d, uv, flags);
}
/*
{
PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
- return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
-}
-
-/*
-=for apidoc utf8_to_uvuni
-
-Returns the Unicode code point of the first character in the string C<s>
-which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
-length, in bytes, of that character.
-
-Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer, which
-is one reason why this function is deprecated. The other is that only in
-extremely limited circumstances should the Unicode versus native code point be
-of any interest to you. See L</utf8_to_uvuni_buf> for alternatives.
-
-If C<s> points to one of the detected malformations, and UTF8 warnings are
-enabled, zero is returned and C<*retlen> is set (if C<retlen> doesn't point to
-NULL) to -1. If those warnings are off, the computed value if well-defined (or
-the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
-is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
-next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
+ /* This function is unsafe if malformed UTF-8 input is given it, which is
+ * why the function is deprecated. If the first byte of the input
+ * indicates that there are more bytes remaining in the sequence that forms
+ * the character than there are in the input buffer, it can read past the
+ * end. But we can make it safe if the input string happens to be
+ * NUL-terminated, as many strings in Perl are, by refusing to read past a
+ * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of
+ * the next character anyway. If the input isn't NUL-terminated, the
+ * function remains unsafe, as it always has been. */
-=cut
-*/
-
-UV
-Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
-{
- PERL_ARGS_ASSERT_UTF8_TO_UVUNI;
-
- return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
+ return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen);
}
-/*
-=for apidoc Am|HV *|pad_compname_type|PADOFFSET po
-
-Looks up the type of the lexical variable at position C<po> in the
-currently-compiling pad. If the variable is typed, the stash of the
-class to which it is typed is returned. If not, C<NULL> is returned.
-
-=cut
-*/
-
-HV *
-Perl_pad_compname_type(pTHX_ const PADOFFSET po)
-{
- return PAD_COMPNAME_TYPE(po);
-}
-
-
-END_EXTERN_C
+GCC_DIAG_RESTORE
#endif /* NO_MATHOMS */