X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/534dad482495d5659cc3fdec2b8189067f5c84e5..6e7a7813c390d2121876b9f8af4f37102e8d51fa:/mathoms.c diff --git a/mathoms.c b/mathoms.c index d530cc0..533a737 100644 --- a/mathoms.c +++ b/mathoms.c @@ -19,19 +19,39 @@ -/* +/* * This file contains mathoms, various binary artifacts from previous - * versions of Perl. For binary or source compatibility reasons, though, - * we cannot completely remove them from the core code. + * versions of Perl which we cannot completely remove from the core + * code. There are two reasons functions should be here: + * + * 1) A function has been replaced by a macro within a minor release, + * so XS modules compiled against an older release will expect to + * still be able to link against the function + * 2) A function Perl_foo(...) with #define foo Perl_foo(aTHX_ ...) + * has been replaced by a macro, e.g. #define foo(...) foo_flags(...,0) + * but XS code may still explicitly use the long form, i.e. + * Perl_foo(aTHX_ ...) + * + * NOTE: ALL FUNCTIONS IN THIS FILE should have an entry with the 'b' flag in + * embed.fnc. + * + * To move a function to this file, simply cut and paste it here, and change + * its embed.fnc entry to additionally have the 'b' flag. If, for some reason + * a function you'd like to be treated as mathoms can't be moved from its + * current place, simply enclose it between * - * REMEMBER to update makedef.pl when adding a function to mathoms.c whose - * name doesn't begin with "Perl_". + * #ifndef NO_MATHOMS + * ... + * #endif * - * SMP - Oct. 24, 2005 + * and add the 'b' flag in embed.fnc. * * The compilation of this file can be suppressed; see INSTALL * -=head1 Obsolete backwards compatibility functions + * Some blurb for perlapi.pod: + + head1 Obsolete backwards compatibility functions + Some of these are also deprecated. You can exclude these from your compiled Perl by adding this option to Configure: C<-Accflags='-DNO_MATHOMS'> @@ -51,65 +71,9 @@ C<-Accflags='-DNO_MATHOMS'> */ #else -/* Not all of these have prototypes elsewhere, so do this to get - * non-mangled names. - */ -START_EXTERN_C - -PERL_CALLCONV OP * Perl_ref(pTHX_ OP *o, I32 type); -PERL_CALLCONV void Perl_sv_unref(pTHX_ SV *sv); -PERL_CALLCONV void Perl_sv_taint(pTHX_ SV *sv); -PERL_CALLCONV IV Perl_sv_2iv(pTHX_ SV *sv); -PERL_CALLCONV UV Perl_sv_2uv(pTHX_ SV *sv); -PERL_CALLCONV NV Perl_sv_2nv(pTHX_ SV *sv); -PERL_CALLCONV char * Perl_sv_2pv(pTHX_ SV *sv, STRLEN *lp); -PERL_CALLCONV char * Perl_sv_2pv_nolen(pTHX_ SV *sv); -PERL_CALLCONV char * Perl_sv_2pvbyte_nolen(pTHX_ SV *sv); -PERL_CALLCONV char * Perl_sv_2pvutf8_nolen(pTHX_ SV *sv); -PERL_CALLCONV void Perl_sv_force_normal(pTHX_ SV *sv); -PERL_CALLCONV void Perl_sv_setsv(pTHX_ SV *dstr, SV *sstr); -PERL_CALLCONV void Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen); -PERL_CALLCONV void Perl_sv_catpvn_mg(pTHX_ SV *sv, const char *ptr, STRLEN len); -PERL_CALLCONV void Perl_sv_catsv(pTHX_ SV *dstr, SV *sstr); -PERL_CALLCONV void Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *ssv); -PERL_CALLCONV char * Perl_sv_pv(pTHX_ SV *sv); -PERL_CALLCONV char * Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp); -PERL_CALLCONV char * Perl_sv_pvbyte(pTHX_ SV *sv); -PERL_CALLCONV char * Perl_sv_pvutf8(pTHX_ SV *sv); -PERL_CALLCONV STRLEN Perl_sv_utf8_upgrade(pTHX_ SV *sv); -PERL_CALLCONV NV Perl_huge(void); -PERL_CALLCONV void Perl_gv_fullname3(pTHX_ SV *sv, const GV *gv, const char *prefix); -PERL_CALLCONV void Perl_gv_efullname3(pTHX_ SV *sv, const GV *gv, const char *prefix); -PERL_CALLCONV GV * Perl_gv_fetchmethod(pTHX_ HV *stash, const char *name); -PERL_CALLCONV HE * Perl_hv_iternext(pTHX_ HV *hv); -PERL_CALLCONV void Perl_hv_magic(pTHX_ HV *hv, GV *gv, int how); -PERL_CALLCONV bool Perl_do_open(pTHX_ GV *gv, const char *name, I32 len, int as_raw, int rawmode, int rawperm, PerlIO *supplied_fp); -PERL_CALLCONV bool Perl_do_aexec(pTHX_ SV *really, SV **mark, SV **sp); -PERL_CALLCONV U8 * Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv); -PERL_CALLCONV bool Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep); -PERL_CALLCONV void Perl_sv_nolocking(pTHX_ SV *sv); -PERL_CALLCONV void Perl_sv_usepvn_mg(pTHX_ SV *sv, char *ptr, STRLEN len); -PERL_CALLCONV void Perl_sv_usepvn(pTHX_ SV *sv, char *ptr, STRLEN len); -PERL_CALLCONV int Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...); -PERL_CALLCONV int Perl_printf_nocontext(const char *format, ...); -PERL_CALLCONV int Perl_magic_setglob(pTHX_ SV* sv, MAGIC* mg); -PERL_CALLCONV AV * Perl_newAV(pTHX); -PERL_CALLCONV HV * Perl_newHV(pTHX); -PERL_CALLCONV IO * Perl_newIO(pTHX); -PERL_CALLCONV I32 Perl_my_stat(pTHX); -PERL_CALLCONV I32 Perl_my_lstat(pTHX); -PERL_CALLCONV I32 Perl_sv_eq(pTHX_ SV *sv1, SV *sv2); -PERL_CALLCONV char * Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp); -PERL_CALLCONV bool Perl_sv_2bool(pTHX_ SV *const sv); -PERL_CALLCONV CV * Perl_newSUB(pTHX_ I32 floor, OP* o, OP* proto, OP* block); -PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp); -PERL_CALLCONV UV Perl_to_utf8_title(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp); -PERL_CALLCONV UV Perl_to_utf8_upper(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp); -PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp); -PERL_CALLCONV SV *Perl_sv_mortalcopy(pTHX_ SV *const oldstr); -PERL_CALLCONV char* Perl_instr(const char* big, const char* little) - __attribute__warn_unused_result__ - __attribute__pure__; +/* The functions in this file should be able to call other deprecated functions + * without a compiler warning */ +GCC_DIAG_IGNORE(-Wdeprecated-declarations) /* ref() is now a macro using Perl_doref; * this version provided for binary compatibility only. @@ -121,6 +85,7 @@ Perl_ref(pTHX_ OP *o, I32 type) } /* +=for apidoc_section $SV =for apidoc sv_unref Unsets the RV status of the SV, and decrements the reference count of @@ -279,11 +244,11 @@ Perl_sv_force_normal(pTHX_ SV *sv) */ void -Perl_sv_setsv(pTHX_ SV *dstr, SV *sstr) +Perl_sv_setsv(pTHX_ SV *dsv, SV *ssv) { PERL_ARGS_ASSERT_SV_SETSV; - sv_setsv_flags(dstr, sstr, SV_GMAGIC); + sv_setsv_flags(dsv, ssv, SV_GMAGIC); } /* sv_catpvn() is now a macro using Perl_sv_catpvn_flags(); @@ -298,20 +263,12 @@ Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen) sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC); } -/* -=for apidoc sv_catpvn_mg - -Like C, but also handles 'set' magic. - -=cut -*/ - void -Perl_sv_catpvn_mg(pTHX_ SV *sv, const char *ptr, STRLEN len) +Perl_sv_catpvn_mg(pTHX_ SV *dsv, const char *sstr, STRLEN len) { PERL_ARGS_ASSERT_SV_CATPVN_MG; - sv_catpvn_flags(sv,ptr,len,SV_GMAGIC|SV_SMAGIC); + sv_catpvn_flags(dsv,sstr,len,SV_GMAGIC|SV_SMAGIC); } /* sv_catsv() is now a macro using Perl_sv_catsv_flags(); @@ -319,27 +276,19 @@ Perl_sv_catpvn_mg(pTHX_ SV *sv, const char *ptr, STRLEN len) */ void -Perl_sv_catsv(pTHX_ SV *dstr, SV *sstr) +Perl_sv_catsv(pTHX_ SV *dsv, SV *sstr) { PERL_ARGS_ASSERT_SV_CATSV; - sv_catsv_flags(dstr, sstr, SV_GMAGIC); + sv_catsv_flags(dsv, sstr, SV_GMAGIC); } -/* -=for apidoc sv_catsv_mg - -Like C, but also handles 'set' magic. - -=cut -*/ - void -Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *ssv) +Perl_sv_catsv_mg(pTHX_ SV *dsv, SV *sstr) { PERL_ARGS_ASSERT_SV_CATSV_MG; - sv_catsv_flags(dsv,ssv,SV_GMAGIC|SV_SMAGIC); + sv_catsv_flags(dsv,sstr,SV_GMAGIC|SV_SMAGIC); } /* @@ -357,9 +306,9 @@ Perl_sv_iv(pTHX_ SV *sv) PERL_ARGS_ASSERT_SV_IV; if (SvIOK(sv)) { - if (SvIsUV(sv)) - return (IV)SvUVX(sv); - return SvIVX(sv); + if (SvIsUV(sv)) + return (IV)SvUVX(sv); + return SvIVX(sv); } return sv_2iv(sv); } @@ -379,9 +328,9 @@ Perl_sv_uv(pTHX_ SV *sv) PERL_ARGS_ASSERT_SV_UV; if (SvIOK(sv)) { - if (SvIsUV(sv)) - return SvUVX(sv); - return (UV)SvIVX(sv); + if (SvIsUV(sv)) + return SvUVX(sv); + return (UV)SvIVX(sv); } return sv_2uv(sv); } @@ -401,7 +350,7 @@ Perl_sv_nv(pTHX_ SV *sv) PERL_ARGS_ASSERT_SV_NV; if (SvNOK(sv)) - return SvNVX(sv); + return SvNVX(sv); return sv_2nv(sv); } @@ -424,8 +373,8 @@ Perl_sv_pvn(pTHX_ SV *sv, STRLEN *lp) PERL_ARGS_ASSERT_SV_PVN; if (SvPOK(sv)) { - *lp = SvCUR(sv); - return SvPVX(sv); + *lp = SvCUR(sv); + return SvPVX(sv); } return sv_2pv(sv, lp); } @@ -437,8 +386,8 @@ Perl_sv_pvn_nomg(pTHX_ SV *sv, STRLEN *lp) PERL_ARGS_ASSERT_SV_PVN_NOMG; if (SvPOK(sv)) { - *lp = SvCUR(sv); - return SvPVX(sv); + *lp = SvCUR(sv); + return SvPVX(sv); } return sv_2pv_flags(sv, lp, 0); } @@ -558,11 +507,11 @@ int Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...) { int ret = 0; - va_list(arglist); + va_list arglist; /* Easier to special case this here than in embed.pl. (Look at what it generates for proto.h) */ -#ifdef PERL_IMPLICIT_CONTEXT +#ifdef MULTIPLICITY PERL_ARGS_ASSERT_FPRINTF_NOCONTEXT; #endif @@ -576,10 +525,10 @@ int Perl_printf_nocontext(const char *format, ...) { dTHX; - va_list(arglist); + va_list arglist; int ret = 0; -#ifdef PERL_IMPLICIT_CONTEXT +#ifdef MULTIPLICITY PERL_ARGS_ASSERT_PRINTF_NOCONTEXT; #endif @@ -641,6 +590,7 @@ Perl_gv_efullname3(pTHX_ SV *sv, const GV *gv, const char *prefix) } /* +=for apidoc_section $GV =for apidoc gv_fetchmethod See L. @@ -674,12 +624,12 @@ Perl_hv_magic(pTHX_ HV *hv, GV *gv, int how) bool Perl_do_open(pTHX_ GV *gv, const char *name, I32 len, int as_raw, - int rawmode, int rawperm, PerlIO *supplied_fp) + int rawmode, int rawperm, PerlIO *supplied_fp) { PERL_ARGS_ASSERT_DO_OPEN; return do_openn(gv, name, len, as_raw, rawmode, rawperm, - supplied_fp, (SV **) NULL, 0); + supplied_fp, (SV **) NULL, 0); } bool @@ -730,15 +680,15 @@ Perl_init_i18nl14n(pTHX_ int printwarn) } bool -Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep) +Perl_is_utf8_string_loc(const U8 *s, const STRLEN len, const U8 **ep) { PERL_ARGS_ASSERT_IS_UTF8_STRING_LOC; - PERL_UNUSED_CONTEXT; return is_utf8_string_loclen(s, len, ep, 0); } /* +=for apidoc_section $SV =for apidoc sv_nolocking Dummy routine which "locks" an SV when there is no locking module present. @@ -768,6 +718,10 @@ potentially warn under some level of strict-ness. "Superseded" by C. =cut + +PERL_UNLOCK_HOOK in intrpvar.h is the macro that refers to this, and guarantees +that mathoms gets loaded. + */ void @@ -789,17 +743,6 @@ Perl_save_long(pTHX_ long int *longp) } void -Perl_save_iv(pTHX_ IV *ivp) -{ - PERL_ARGS_ASSERT_SAVE_IV; - - SSCHECK(3); - SSPUSHIV(*ivp); - SSPUSHPTR(ivp); - SSPUSHUV(SAVEt_IV); -} - -void Perl_save_nogv(pTHX_ GV *gv) { PERL_ARGS_ASSERT_SAVE_NOGV; @@ -817,14 +760,14 @@ Perl_save_list(pTHX_ SV **sarg, I32 maxsarg) PERL_ARGS_ASSERT_SAVE_LIST; for (i = 1; i <= maxsarg; i++) { - SV *sv; - SvGETMAGIC(sarg[i]); - sv = newSV(0); - sv_setsv_nomg(sv,sarg[i]); - SSCHECK(3); - SSPUSHPTR(sarg[i]); /* remember the pointer */ - SSPUSHPTR(sv); /* remember the value */ - SSPUSHUV(SAVEt_ITEM); + SV *sv; + SvGETMAGIC(sarg[i]); + sv = newSV(0); + sv_setsv_nomg(sv,sarg[i]); + SSCHECK(3); + SSPUSHPTR(sarg[i]); /* remember the pointer */ + SSPUSHPTR(sv); /* remember the value */ + SSPUSHUV(SAVEt_ITEM); } } @@ -863,6 +806,7 @@ Perl_sv_usepvn(pTHX_ SV *sv, char *ptr, STRLEN len) } /* +=for apidoc_section $pack =for apidoc unpack_str The engine implementing C Perl function. Note: parameters C, @@ -871,10 +815,10 @@ C instead. =cut */ -I32 +SSize_t Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, - const char *strbeg, const char *strend, char **new_s, I32 ocnt, - U32 flags) + const char *strbeg, const char *strend, char **new_s, I32 ocnt, + U32 flags) { PERL_ARGS_ASSERT_UNPACK_STR; @@ -890,7 +834,7 @@ Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, The engine implementing C Perl function. Note: parameters C and C are not used. This call should not be used; use -C instead. +C> instead. =cut */ @@ -917,8 +861,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash) { PERL_ARGS_ASSERT_HV_EXISTS_ENT; - return hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash) - ? TRUE : FALSE; + return cBOOL(hv_common(hv, keysv, NULL, 0, 0, HV_FETCH_ISEXISTS, 0, hash)); } HE * @@ -927,7 +870,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, U32 hash) PERL_ARGS_ASSERT_HV_FETCH_ENT; return (HE *)hv_common(hv, keysv, NULL, 0, 0, - (lval ? HV_FETCH_LVALUE : 0), NULL, hash); + (lval ? HV_FETCH_LVALUE : 0), NULL, hash); } SV * @@ -936,15 +879,15 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash) PERL_ARGS_ASSERT_HV_DELETE_ENT; return MUTABLE_SV(hv_common(hv, keysv, NULL, 0, 0, flags | HV_DELETE, NULL, - hash)); + hash)); } SV** Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, U32 hash, - int flags) + int flags) { return (SV**) hv_common(hv, NULL, key, klen, flags, - (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), val, hash); + (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), val, hash); } SV** @@ -954,14 +897,14 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen_i32, SV *val, U32 hash) int flags; if (klen_i32 < 0) { - klen = -klen_i32; - flags = HVhek_UTF8; + klen = -klen_i32; + flags = HVhek_UTF8; } else { - klen = klen_i32; - flags = 0; + klen = klen_i32; + flags = 0; } return (SV **) hv_common(hv, NULL, key, klen, flags, - (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), val, hash); + (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), val, hash); } bool @@ -973,14 +916,13 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen_i32) PERL_ARGS_ASSERT_HV_EXISTS; if (klen_i32 < 0) { - klen = -klen_i32; - flags = HVhek_UTF8; + klen = -klen_i32; + flags = HVhek_UTF8; } else { - klen = klen_i32; - flags = 0; + klen = klen_i32; + flags = 0; } - return hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0) - ? TRUE : FALSE; + return cBOOL(hv_common(hv, NULL, key, klen, flags, HV_FETCH_ISEXISTS, 0, 0)); } SV** @@ -992,15 +934,15 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen_i32, I32 lval) PERL_ARGS_ASSERT_HV_FETCH; if (klen_i32 < 0) { - klen = -klen_i32; - flags = HVhek_UTF8; + klen = -klen_i32; + flags = HVhek_UTF8; } else { - klen = klen_i32; - flags = 0; + klen = klen_i32; + flags = 0; } return (SV **) hv_common(hv, NULL, key, klen, flags, - lval ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) - : HV_FETCH_JUST_SV, NULL, 0); + lval ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) + : HV_FETCH_JUST_SV, NULL, 0); } SV * @@ -1012,14 +954,14 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen_i32, I32 flags) PERL_ARGS_ASSERT_HV_DELETE; if (klen_i32 < 0) { - klen = -klen_i32; - k_flags = HVhek_UTF8; + klen = -klen_i32; + k_flags = HVhek_UTF8; } else { - klen = klen_i32; - k_flags = 0; + klen = klen_i32; + k_flags = 0; } return MUTABLE_SV(hv_common(hv, NULL, key, klen, k_flags, flags | HV_DELETE, - NULL, 0)); + NULL, 0)); } AV * @@ -1135,18 +1077,33 @@ Perl_sv_eq(pTHX_ SV *sv1, SV *sv2) char * Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp) { + PERL_ARGS_ASSERT_SV_COLLXFRM; return sv_collxfrm_flags(sv, nxp, SV_GMAGIC); } + +char * +Perl_mem_collxfrm(pTHX_ const char *input_string, STRLEN len, STRLEN *xlen) +{ + /* This function is retained for compatibility in case someone outside core + * is using this (but it is undocumented) */ + + PERL_ARGS_ASSERT_MEM_COLLXFRM; + + return _mem_collxfrm(input_string, len, xlen, FALSE); +} + #endif bool Perl_sv_2bool(pTHX_ SV *const sv) { + PERL_ARGS_ASSERT_SV_2BOOL; return sv_2bool_flags(sv, SV_GMAGIC); } /* +=for apidoc_section $custom =for apidoc custom_op_name Return the name for a given custom op. This was once used by the C macro, but is no longer: it has only been kept for compatibility, and @@ -1180,42 +1137,10 @@ Perl_newSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *block) return newATTRSUB(floor, o, proto, NULL, block); } -UV -Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) -{ - PERL_ARGS_ASSERT_TO_UTF8_FOLD; - - return _to_utf8_fold_flags(p, ustrp, lenp, FOLD_FLAGS_FULL); -} - -UV -Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) -{ - PERL_ARGS_ASSERT_TO_UTF8_LOWER; - - return _to_utf8_lower_flags(p, ustrp, lenp, FALSE); -} - -UV -Perl_to_utf8_title(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) -{ - PERL_ARGS_ASSERT_TO_UTF8_TITLE; - - return _to_utf8_title_flags(p, ustrp, lenp, FALSE); -} - -UV -Perl_to_utf8_upper(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) -{ - PERL_ARGS_ASSERT_TO_UTF8_UPPER; - - return _to_utf8_upper_flags(p, ustrp, lenp, FALSE); -} - SV * -Perl_sv_mortalcopy(pTHX_ SV *const oldstr) +Perl_sv_mortalcopy(pTHX_ SV *const oldsv) { - return Perl_sv_mortalcopy_flags(aTHX_ oldstr, SV_GMAGIC); + return Perl_sv_mortalcopy_flags(aTHX_ oldsv, SV_GMAGIC); } void @@ -1223,7 +1148,7 @@ Perl_sv_copypv(pTHX_ SV *const dsv, SV *const ssv) { PERL_ARGS_ASSERT_SV_COPYPV; - sv_copypv_flags(dsv, ssv, 0); + sv_copypv_flags(dsv, ssv, SV_GMAGIC); } UV /* Made into a function, so can be deprecated */ @@ -1240,497 +1165,208 @@ ASCII_TO_NEED(const UV enc, const UV ch) return ch; } -bool /* Made into a function, so can be deprecated */ -Perl_isIDFIRST_lazy(pTHX_ const char* p) -{ - PERL_ARGS_ASSERT_ISIDFIRST_LAZY; - - return isIDFIRST_lazy_if(p,1); -} - -bool /* Made into a function, so can be deprecated */ -Perl_isALNUM_lazy(pTHX_ const char* p) -{ - PERL_ARGS_ASSERT_ISALNUM_LAZY; - - return isALNUM_lazy_if(p,1); -} - -bool -Perl_is_uni_alnum(pTHX_ UV c) -{ - return isWORDCHAR_uni(c); -} - -bool -Perl_is_uni_alnumc(pTHX_ UV c) -{ - return isALNUM_uni(c); -} - -bool -Perl_is_uni_alpha(pTHX_ UV c) -{ - return isALPHA_uni(c); -} - -bool -Perl_is_uni_ascii(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isASCII_uni(c); -} - -bool -Perl_is_uni_blank(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isBLANK_uni(c); -} - -bool -Perl_is_uni_space(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isSPACE_uni(c); -} - -bool -Perl_is_uni_digit(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isDIGIT_uni(c); -} - -bool -Perl_is_uni_upper(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isUPPER_uni(c); -} - -bool -Perl_is_uni_lower(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isLOWER_uni(c); -} - -bool -Perl_is_uni_cntrl(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isCNTRL_L1(c); -} - -bool -Perl_is_uni_graph(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isGRAPH_uni(c); -} - -bool -Perl_is_uni_print(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isPRINT_uni(c); -} - -bool -Perl_is_uni_punct(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isPUNCT_uni(c); -} - -bool -Perl_is_uni_xdigit(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isXDIGIT_uni(c); -} - -bool -Perl_is_uni_alnum_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isWORDCHAR_LC_uvchr(c); -} - -bool -Perl_is_uni_alnumc_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isALPHANUMERIC_LC_uvchr(c); -} - -bool -Perl_is_uni_idfirst_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - /* XXX Should probably be something that resolves to the old IDFIRST, but - * this function is deprecated, so not bothering */ - return isIDFIRST_LC_uvchr(c); -} - -bool -Perl_is_uni_alpha_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isALPHA_LC_uvchr(c); -} - -bool -Perl_is_uni_ascii_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isASCII_LC_uvchr(c); -} - -bool -Perl_is_uni_blank_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isBLANK_LC_uvchr(c); -} - -bool -Perl_is_uni_space_lc(pTHX_ UV c) -{ - PERL_UNUSED_CONTEXT; - return isSPACE_LC_uvchr(c); -} - -bool -Perl_is_uni_digit_lc(pTHX_ UV c) -{ - return isDIGIT_LC_uvchr(c); -} - -bool -Perl_is_uni_idfirst(pTHX_ UV c) -{ - U8 tmpbuf[UTF8_MAXBYTES+1]; - uvchr_to_utf8(tmpbuf, c); - return _is_utf8_idstart(tmpbuf); -} - -bool -Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */ -{ - PERL_ARGS_ASSERT_IS_UTF8_IDFIRST; - - return _is_utf8_idstart(p); -} - -bool -Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */ -{ - PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST; - - return _is_utf8_xidstart(p); -} - -bool -Perl_is_utf8_idcont(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_IDCONT; - - return _is_utf8_idcont(p); -} - -bool -Perl_is_utf8_xidcont(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_XIDCONT; - - return _is_utf8_xidcont(p); -} - -bool -Perl_is_uni_upper_lc(pTHX_ UV c) -{ - return isUPPER_LC_uvchr(c); -} - -bool -Perl_is_uni_lower_lc(pTHX_ UV c) -{ - return isLOWER_LC_uvchr(c); -} +/* +=for apidoc_section $unicode +=for apidoc is_utf8_char -bool -Perl_is_uni_cntrl_lc(pTHX_ UV c) -{ - return isCNTRL_LC_uvchr(c); -} +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines) +character is a valid UTF-8 character. The actual number of bytes in the UTF-8 +character will be returned if it is valid, otherwise 0. -bool -Perl_is_uni_graph_lc(pTHX_ UV c) -{ - return isGRAPH_LC_uvchr(c); -} +This function is deprecated due to the possibility that malformed input could +cause reading beyond the end of the input buffer. Use L +instead. -bool -Perl_is_uni_print_lc(pTHX_ UV c) -{ - return isPRINT_LC_uvchr(c); -} +=cut */ -bool -Perl_is_uni_punct_lc(pTHX_ UV c) +STRLEN +Perl_is_utf8_char(const U8 *s) { - return isPUNCT_LC_uvchr(c); -} + PERL_ARGS_ASSERT_IS_UTF8_CHAR; -bool -Perl_is_uni_xdigit_lc(pTHX_ UV c) -{ - return isXDIGIT_LC_uvchr(c); + /* Assumes we have enough space, which is why this is deprecated. But the + * UTF8_CHK_SKIP(s)) makes it safe for the common case of NUL-terminated + * strings */ + return isUTF8_CHAR(s, s + UTF8_CHK_SKIP(s)); } -U32 -Perl_to_uni_upper_lc(pTHX_ U32 c) -{ - /* XXX returns only the first character -- do not use XXX */ - /* XXX no locale support yet */ - STRLEN len; - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - return (U32)to_uni_upper(c, tmpbuf, &len); -} +/* +=for apidoc is_utf8_char_buf -U32 -Perl_to_uni_title_lc(pTHX_ U32 c) -{ - /* XXX returns only the first character XXX -- do not use XXX */ - /* XXX no locale support yet */ - STRLEN len; - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - return (U32)to_uni_title(c, tmpbuf, &len); -} +This is identical to the macro L. -U32 -Perl_to_uni_lower_lc(pTHX_ U32 c) -{ - /* XXX returns only the first character -- do not use XXX */ - /* XXX no locale support yet */ - STRLEN len; - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - return (U32)to_uni_lower(c, tmpbuf, &len); -} +=cut */ -bool -Perl_is_utf8_alnum(pTHX_ const U8 *p) +STRLEN +Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end) { - PERL_ARGS_ASSERT_IS_UTF8_ALNUM; - - /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true - * descendant of isalnum(3), in other words, it doesn't - * contain the '_'. --jhi */ - return isWORDCHAR_utf8(p); -} -bool -Perl_is_utf8_alnumc(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_ALNUMC; + PERL_ARGS_ASSERT_IS_UTF8_CHAR_BUF; - return isALPHANUMERIC_utf8(p); + return isUTF8_CHAR(buf, buf_end); } -bool -Perl_is_utf8_alpha(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_ALPHA; - - return isALPHA_utf8(p); -} +/* DEPRECATED! + * Like L(), but should only be called when it is known that + * there are no malformations in the input UTF-8 string C. Surrogates, + * non-character code points, and non-Unicode code points are allowed */ -bool -Perl_is_utf8_ascii(pTHX_ const U8 *p) +UV +Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) { - PERL_ARGS_ASSERT_IS_UTF8_ASCII; PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI; - return isASCII_utf8(p); + return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); } -bool -Perl_is_utf8_blank(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_BLANK; - PERL_UNUSED_CONTEXT; - - return isBLANK_utf8(p); -} +/* +=for apidoc utf8_to_uvuni -bool -Perl_is_utf8_space(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_SPACE; - PERL_UNUSED_CONTEXT; +Returns the Unicode code point of the first character in the string C +which is assumed to be in UTF-8 encoding; C will be set to the +length, in bytes, of that character. - return isSPACE_utf8(p); -} +Some, but not all, UTF-8 malformations are detected, and in fact, some +malformed input could cause reading beyond the end of the input buffer, which +is one reason why this function is deprecated. The other is that only in +extremely limited circumstances should the Unicode versus native code point be +of any interest to you. See L for alternatives. -bool -Perl_is_utf8_perl_space(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE; - PERL_UNUSED_CONTEXT; +If C points to one of the detected malformations, and UTF8 warnings are +enabled, zero is returned and C<*retlen> is set (if C doesn't point to +NULL) to -1. If those warnings are off, the computed value if well-defined (or +the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> +is set (if C isn't NULL) so that (S + C<*retlen>>) is the +next possible position in C that could begin a non-malformed character. +See L for details on when the REPLACEMENT CHARACTER is returned. - /* Only true if is an ASCII space-like character, and ASCII is invariant - * under utf8, so can just use the macro */ - return isSPACE_A(*p); -} +=cut +*/ -bool -Perl_is_utf8_perl_word(pTHX_ const U8 *p) +UV +Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) { - PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD; PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_UTF8_TO_UVUNI; - /* Only true if is an ASCII word character, and ASCII is invariant - * under utf8, so can just use the macro */ - return isWORDCHAR_A(*p); + return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); } -bool -Perl_is_utf8_digit(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_DIGIT; +/* +=for apidoc pad_compname_type - return isDIGIT_utf8(p); -} +Looks up the type of the lexical variable at position C in the +currently-compiling pad. If the variable is typed, the stash of the +class to which it is typed is returned. If not, C is returned. -bool -Perl_is_utf8_posix_digit(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT; - PERL_UNUSED_CONTEXT; +Use L> instead. - /* Only true if is an ASCII digit character, and ASCII is invariant - * under utf8, so can just use the macro */ - return isDIGIT_A(*p); -} +=cut +*/ -bool -Perl_is_utf8_upper(pTHX_ const U8 *p) +HV * +Perl_pad_compname_type(pTHX_ const PADOFFSET po) { - PERL_ARGS_ASSERT_IS_UTF8_UPPER; - - return isUPPER_utf8(p); + return PAD_COMPNAME_TYPE(po); } -bool -Perl_is_utf8_lower(pTHX_ const U8 *p) -{ - PERL_ARGS_ASSERT_IS_UTF8_LOWER; - - return isLOWER_utf8(p); -} +/* return ptr to little string in big string, NULL if not found */ +/* The original version of this routine was donated by Corey Satten. */ -bool -Perl_is_utf8_cntrl(pTHX_ const U8 *p) +char * +Perl_instr(const char *big, const char *little) { - PERL_ARGS_ASSERT_IS_UTF8_CNTRL; - PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_INSTR; - return isCNTRL_utf8(p); + return instr(big, little); } -bool -Perl_is_utf8_graph(pTHX_ const U8 *p) +SV * +Perl_newSVsv(pTHX_ SV *const old) { - PERL_ARGS_ASSERT_IS_UTF8_GRAPH; - - return isGRAPH_utf8(p); + return newSVsv(old); } bool -Perl_is_utf8_print(pTHX_ const U8 *p) +Perl_sv_utf8_downgrade(pTHX_ SV *const sv, const bool fail_ok) { - PERL_ARGS_ASSERT_IS_UTF8_PRINT; + PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE; - return isPRINT_utf8(p); + return sv_utf8_downgrade(sv, fail_ok); } -bool -Perl_is_utf8_punct(pTHX_ const U8 *p) +char * +Perl_sv_2pvutf8(pTHX_ SV *sv, STRLEN *const lp) { - PERL_ARGS_ASSERT_IS_UTF8_PUNCT; + PERL_ARGS_ASSERT_SV_2PVUTF8; - return isPUNCT_utf8(p); + return sv_2pvutf8(sv, lp); } -bool -Perl_is_utf8_xdigit(pTHX_ const U8 *p) +char * +Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp) { - PERL_ARGS_ASSERT_IS_UTF8_XDIGIT; - PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_SV_2PVBYTE; - return isXDIGIT_utf8(p); + return sv_2pvbyte(sv, lp); } -bool -Perl_is_utf8_mark(pTHX_ const U8 *p) +U8 * +Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) { - PERL_ARGS_ASSERT_IS_UTF8_MARK; + PERL_ARGS_ASSERT_UVUNI_TO_UTF8; - return _is_utf8_mark(p); + return uvoffuni_to_utf8_flags(d, uv, 0); } /* -=for apidoc is_utf8_char +=for apidoc utf8n_to_uvuni -Tests if some arbitrary number of bytes begins in a valid UTF-8 -character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines) -character is a valid UTF-8 character. The actual number of bytes in the UTF-8 -character will be returned if it is valid, otherwise 0. +Instead use L, or rarely, L. -This function is deprecated due to the possibility that malformed input could -cause reading beyond the end of the input buffer. Use L -instead. +This function was useful for code that wanted to handle both EBCDIC and +ASCII platforms with Unicode properties, but starting in Perl v5.20, the +distinctions between the platforms have mostly been made invisible to most +code, so this function is quite unlikely to be what you want. If you do need +this precise functionality, use instead +C> +or C>. -=cut */ +=cut +*/ -STRLEN -Perl_is_utf8_char(const U8 *s) +UV +Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) { - PERL_ARGS_ASSERT_IS_UTF8_CHAR; + PERL_ARGS_ASSERT_UTF8N_TO_UVUNI; - /* Assumes we have enough space, which is why this is deprecated */ - return isUTF8_CHAR(s, s + UTF8SKIP(s)); + return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); } /* -=for apidoc is_utf8_char_buf +=for apidoc uvuni_to_utf8_flags -This is identical to the macro L. +Instead you almost certainly want to use L or +L. -=cut */ - -STRLEN -Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end) -{ - - PERL_ARGS_ASSERT_IS_UTF8_CHAR_BUF; - - return isUTF8_CHAR(buf, buf_end); -} +This function is a deprecated synonym for L, +which itself, while not deprecated, should be used only in isolated +circumstances. These functions were useful for code that wanted to handle +both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl +v5.20, the distinctions between the platforms have mostly been made invisible +to most code, so this function is quite unlikely to be what you want. -/* DEPRECATED! - * Like L(), but should only be called when it is known that - * there are no malformations in the input UTF-8 string C. Surrogates, - * non-character code points, and non-Unicode code points are allowed */ +=cut +*/ -UV -Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) +U8 * +Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) { - PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI; + PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS; - return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); + return uvoffuni_to_utf8_flags(d, uv, flags); } /* @@ -1760,76 +1396,20 @@ Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) { PERL_ARGS_ASSERT_UTF8_TO_UVCHR; - return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen); -} - -/* -=for apidoc utf8_to_uvuni - -Returns the Unicode code point of the first character in the string C -which is assumed to be in UTF-8 encoding; C will be set to the -length, in bytes, of that character. - -Some, but not all, UTF-8 malformations are detected, and in fact, some -malformed input could cause reading beyond the end of the input buffer, which -is one reason why this function is deprecated. The other is that only in -extremely limited circumstances should the Unicode versus native code point be -of any interest to you. See L for alternatives. - -If C points to one of the detected malformations, and UTF8 warnings are -enabled, zero is returned and C<*retlen> is set (if C doesn't point to -NULL) to -1. If those warnings are off, the computed value if well-defined (or -the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> -is set (if C isn't NULL) so that (S + C<*retlen>>) is the -next possible position in C that could begin a non-malformed character. -See L for details on when the REPLACEMENT CHARACTER is returned. - -=cut -*/ - -UV -Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) -{ - PERL_ARGS_ASSERT_UTF8_TO_UVUNI; - - return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); -} - -/* -=for apidoc Am|HV *|pad_compname_type|PADOFFSET po - -Looks up the type of the lexical variable at position C in the -currently-compiling pad. If the variable is typed, the stash of the -class to which it is typed is returned. If not, C is returned. - -=cut -*/ - -HV * -Perl_pad_compname_type(pTHX_ const PADOFFSET po) -{ - return PAD_COMPNAME_TYPE(po); -} - -/* now a macro */ -/* return ptr to little string in big string, NULL if not found */ -/* This routine was donated by Corey Satten. */ - -char * -Perl_instr(const char *big, const char *little) -{ - /* Porting tests require this macro to be used even though it doesn't exist - * (except for the commented-out version in proto.h). So provide a commented-out - * "use" of the prototype and supply a real version of what it expanded to. - PERL_ARGS_ASSERT_INSTR; - */ - assert(big); - assert(little); + /* This function is unsafe if malformed UTF-8 input is given it, which is + * why the function is deprecated. If the first byte of the input + * indicates that there are more bytes remaining in the sequence that forms + * the character than there are in the input buffer, it can read past the + * end. But we can make it safe if the input string happens to be + * NUL-terminated, as many strings in Perl are, by refusing to read past a + * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of + * the next character anyway. If the input isn't NUL-terminated, the + * function remains unsafe, as it always has been. */ - return strstr((char*)big, (char*)little); + return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen); } -END_EXTERN_C +GCC_DIAG_RESTORE #endif /* NO_MATHOMS */