X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/445bf929f6118f5f2b0e19171f576c3a6d7ada50..1d1f77a4a3d406b218c3066fcecead9a27cb2e11:/mathoms.c

diff --git a/mathoms.c b/mathoms.c
index 73f1e8d..fa60621 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -28,8 +28,16 @@
  *
  * The compilation of this file can be suppressed; see INSTALL
  *
+=head1 Obsolete backwards compatibility functions
+Some of these are also deprecated.  You can exclude these from
+your compiled Perl by adding this option to Configure:
+C<-Accflags='-DNO_MATHOMS'>
+
+=cut
+
  */
 
+
 #include "EXTERN.h"
 #define PERL_IN_MATHOMS_C
 #include "perl.h"
@@ -543,7 +551,7 @@ Perl_sv_utf8_upgrade(pTHX_ SV *sv)
 int
 Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...)
 {
-    dTHXs;
+    int ret = 0;
     va_list(arglist);
 
     /* Easier to special case this here than in embed.pl. (Look at what it
@@ -553,7 +561,9 @@ Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...)
 #endif
 
     va_start(arglist, format);
-    return PerlIO_vprintf(stream, format, arglist);
+    ret = PerlIO_vprintf(stream, format, arglist);
+    va_end(arglist);
+    return ret;
 }
 
 int
@@ -561,13 +571,16 @@ Perl_printf_nocontext(const char *format, ...)
 {
     dTHX;
     va_list(arglist);
+    int ret = 0;
 
 #ifdef PERL_IMPLICIT_CONTEXT
     PERL_ARGS_ASSERT_PRINTF_NOCONTEXT;
 #endif
 
     va_start(arglist, format);
-    return PerlIO_vprintf(PerlIO_stdout(), format, arglist);
+    ret = PerlIO_vprintf(PerlIO_stdout(), format, arglist);
+    va_end(arglist);
+    return ret;
 }
 
 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
@@ -714,6 +727,7 @@ bool
 Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep)
 {
     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOC;
+    PERL_UNUSED_CONTEXT;
 
     return is_utf8_string_loclen(s, len, ep, 0);
 }
@@ -760,8 +774,6 @@ Perl_sv_nounlocking(pTHX_ SV *sv)
 void
 Perl_save_long(pTHX_ long int *longp)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_SAVE_LONG;
 
     SSCHECK(3);
@@ -773,8 +785,6 @@ Perl_save_long(pTHX_ long int *longp)
 void
 Perl_save_iv(pTHX_ IV *ivp)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_SAVE_IV;
 
     SSCHECK(3);
@@ -786,8 +796,6 @@ Perl_save_iv(pTHX_ IV *ivp)
 void
 Perl_save_nogv(pTHX_ GV *gv)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_SAVE_NOGV;
 
     SSCHECK(2);
@@ -798,7 +806,6 @@ Perl_save_nogv(pTHX_ GV *gv)
 void
 Perl_save_list(pTHX_ SV **sarg, I32 maxsarg)
 {
-    dVAR;
     I32 i;
 
     PERL_ARGS_ASSERT_SAVE_LIST;
@@ -1041,15 +1048,12 @@ Perl_sv_insert(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len,
 void
 Perl_save_freesv(pTHX_ SV *sv)
 {
-    dVAR;
     save_freesv(sv);
 }
 
 void
 Perl_save_mortalizesv(pTHX_ SV *sv)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_SAVE_MORTALIZESV;
 
     save_mortalizesv(sv);
@@ -1058,21 +1062,18 @@ Perl_save_mortalizesv(pTHX_ SV *sv)
 void
 Perl_save_freeop(pTHX_ OP *o)
 {
-    dVAR;
     save_freeop(o);
 }
 
 void
 Perl_save_freepv(pTHX_ char *pv)
 {
-    dVAR;
     save_freepv(pv);
 }
 
 void
 Perl_save_op(pTHX)
 {
-    dVAR;
     save_op();
 }
 
@@ -1227,6 +1228,22 @@ ASCII_TO_NEED(const UV enc, const UV ch)
     return ch;
 }
 
+bool      /* Made into a function, so can be deprecated */
+Perl_isIDFIRST_lazy(pTHX_ const char* p)
+{
+    PERL_ARGS_ASSERT_ISIDFIRST_LAZY;
+
+    return isIDFIRST_lazy_if(p,1);
+}
+
+bool      /* Made into a function, so can be deprecated */
+Perl_isALNUM_lazy(pTHX_ const char* p)
+{
+    PERL_ARGS_ASSERT_ISALNUM_LAZY;
+
+    return isALNUM_lazy_if(p,1);
+}
+
 bool
 Perl_is_uni_alnum(pTHX_ UV c)
 {
@@ -1248,84 +1265,98 @@ Perl_is_uni_alpha(pTHX_ UV c)
 bool
 Perl_is_uni_ascii(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isASCII_uni(c);
 }
 
 bool
 Perl_is_uni_blank(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isBLANK_uni(c);
 }
 
 bool
 Perl_is_uni_space(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isSPACE_uni(c);
 }
 
 bool
 Perl_is_uni_digit(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isDIGIT_uni(c);
 }
 
 bool
 Perl_is_uni_upper(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isUPPER_uni(c);
 }
 
 bool
 Perl_is_uni_lower(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isLOWER_uni(c);
 }
 
 bool
 Perl_is_uni_cntrl(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isCNTRL_L1(c);
 }
 
 bool
 Perl_is_uni_graph(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isGRAPH_uni(c);
 }
 
 bool
 Perl_is_uni_print(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isPRINT_uni(c);
 }
 
 bool
 Perl_is_uni_punct(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isPUNCT_uni(c);
 }
 
 bool
 Perl_is_uni_xdigit(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isXDIGIT_uni(c);
 }
 
 bool
 Perl_is_uni_alnum_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isWORDCHAR_LC_uvchr(c);
 }
 
 bool
 Perl_is_uni_alnumc_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isALPHANUMERIC_LC_uvchr(c);
 }
 
 bool
 Perl_is_uni_idfirst_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     /* XXX Should probably be something that resolves to the old IDFIRST, but
      * this function is deprecated, so not bothering */
     return isIDFIRST_LC_uvchr(c);
@@ -1334,24 +1365,28 @@ Perl_is_uni_idfirst_lc(pTHX_ UV c)
 bool
 Perl_is_uni_alpha_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isALPHA_LC_uvchr(c);
 }
 
 bool
 Perl_is_uni_ascii_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isASCII_LC_uvchr(c);
 }
 
 bool
 Perl_is_uni_blank_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isBLANK_LC_uvchr(c);
 }
 
 bool
 Perl_is_uni_space_lc(pTHX_ UV c)
 {
+    PERL_UNUSED_CONTEXT;
     return isSPACE_LC_uvchr(c);
 }
 
@@ -1362,6 +1397,46 @@ Perl_is_uni_digit_lc(pTHX_ UV c)
 }
 
 bool
+Perl_is_uni_idfirst(pTHX_ UV c)
+{
+    U8 tmpbuf[UTF8_MAXBYTES+1];
+    uvchr_to_utf8(tmpbuf, c);
+    return _is_utf8_idstart(tmpbuf);
+}
+
+bool
+Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */
+{
+    PERL_ARGS_ASSERT_IS_UTF8_IDFIRST;
+
+    return _is_utf8_idstart(p);
+}
+
+bool
+Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */
+{
+    PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST;
+
+    return _is_utf8_xidstart(p);
+}
+
+bool
+Perl_is_utf8_idcont(pTHX_ const U8 *p)
+{
+    PERL_ARGS_ASSERT_IS_UTF8_IDCONT;
+
+    return _is_utf8_idcont(p);
+}
+
+bool
+Perl_is_utf8_xidcont(pTHX_ const U8 *p)
+{
+    PERL_ARGS_ASSERT_IS_UTF8_XIDCONT;
+
+    return _is_utf8_xidcont(p);
+}
+
+bool
 Perl_is_uni_upper_lc(pTHX_ UV c)
 {
     return isUPPER_LC_uvchr(c);
@@ -1436,8 +1511,6 @@ Perl_to_uni_lower_lc(pTHX_ U32 c)
 bool
 Perl_is_utf8_alnum(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_ALNUM;
 
     /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true
@@ -1449,8 +1522,6 @@ Perl_is_utf8_alnum(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_alnumc(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_ALNUMC;
 
     return isALPHANUMERIC_utf8(p);
@@ -1459,8 +1530,6 @@ Perl_is_utf8_alnumc(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_alpha(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_ALPHA;
 
     return isALPHA_utf8(p);
@@ -1469,9 +1538,8 @@ Perl_is_utf8_alpha(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_ascii(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_ASCII;
+    PERL_UNUSED_CONTEXT;
 
     return isASCII_utf8(p);
 }
@@ -1479,9 +1547,8 @@ Perl_is_utf8_ascii(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_blank(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_BLANK;
+    PERL_UNUSED_CONTEXT;
 
     return isBLANK_utf8(p);
 }
@@ -1489,9 +1556,8 @@ Perl_is_utf8_blank(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_space(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_SPACE;
+    PERL_UNUSED_CONTEXT;
 
     return isSPACE_utf8(p);
 }
@@ -1499,9 +1565,8 @@ Perl_is_utf8_space(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_perl_space(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_PERL_SPACE;
+    PERL_UNUSED_CONTEXT;
 
     /* Only true if is an ASCII space-like character, and ASCII is invariant
      * under utf8, so can just use the macro */
@@ -1511,9 +1576,8 @@ Perl_is_utf8_perl_space(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_perl_word(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_PERL_WORD;
+    PERL_UNUSED_CONTEXT;
 
     /* Only true if is an ASCII word character, and ASCII is invariant
      * under utf8, so can just use the macro */
@@ -1523,8 +1587,6 @@ Perl_is_utf8_perl_word(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_digit(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_DIGIT;
 
     return isDIGIT_utf8(p);
@@ -1533,9 +1595,8 @@ Perl_is_utf8_digit(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_POSIX_DIGIT;
+    PERL_UNUSED_CONTEXT;
 
     /* Only true if is an ASCII digit character, and ASCII is invariant
      * under utf8, so can just use the macro */
@@ -1545,8 +1606,6 @@ Perl_is_utf8_posix_digit(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_upper(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_UPPER;
 
     return isUPPER_utf8(p);
@@ -1555,8 +1614,6 @@ Perl_is_utf8_upper(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_lower(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_LOWER;
 
     return isLOWER_utf8(p);
@@ -1565,9 +1622,8 @@ Perl_is_utf8_lower(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_cntrl(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_CNTRL;
+    PERL_UNUSED_CONTEXT;
 
     return isCNTRL_utf8(p);
 }
@@ -1575,8 +1631,6 @@ Perl_is_utf8_cntrl(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_graph(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_GRAPH;
 
     return isGRAPH_utf8(p);
@@ -1585,8 +1639,6 @@ Perl_is_utf8_graph(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_print(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_PRINT;
 
     return isPRINT_utf8(p);
@@ -1595,8 +1647,6 @@ Perl_is_utf8_print(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_punct(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_PUNCT;
 
     return isPUNCT_utf8(p);
@@ -1605,9 +1655,8 @@ Perl_is_utf8_punct(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_xdigit(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
+    PERL_UNUSED_CONTEXT;
 
     return isXDIGIT_utf8(p);
 }
@@ -1615,13 +1664,116 @@ Perl_is_utf8_xdigit(pTHX_ const U8 *p)
 bool
 Perl_is_utf8_mark(pTHX_ const U8 *p)
 {
-    dVAR;
-
     PERL_ARGS_ASSERT_IS_UTF8_MARK;
 
     return _is_utf8_mark(p);
 }
 
+/*
+=for apidoc is_utf8_char
+
+Tests if some arbitrary number of bytes begins in a valid UTF-8
+character.  Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
+character is a valid UTF-8 character.  The actual number of bytes in the UTF-8
+character will be returned if it is valid, otherwise 0.
+
+This function is deprecated due to the possibility that malformed input could
+cause reading beyond the end of the input buffer.  Use L</isUTF8_CHAR>
+instead.
+
+=cut */
+
+STRLEN
+Perl_is_utf8_char(const U8 *s)
+{
+    PERL_ARGS_ASSERT_IS_UTF8_CHAR;
+
+    /* Assumes we have enough space, which is why this is deprecated */
+    return isUTF8_CHAR(s, s + UTF8SKIP(s));
+}
+
+/* DEPRECATED!
+ * Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>.  Surrogates,
+ * non-character code points, and non-Unicode code points are allowed */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+    return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
+}
+
+/*
+=for apidoc utf8_to_uvchr
+
+Returns the native code point of the first character in the string C<s>
+which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
+
+Some, but not all, UTF-8 malformations are detected, and in fact, some
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated.  Use L</utf8_to_uvchr_buf> instead.
+
+If C<s> points to one of the detected malformations, and UTF8 warnings are
+enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't
+NULL) to -1.  If those warnings are off, the computed value if well-defined (or
+the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
+is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
+next possible position in C<s> that could begin a non-malformed character.
+See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
+
+    return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
+/*
+=for apidoc utf8_to_uvuni
+
+Returns the Unicode code point of the first character in the string C<s>
+which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
+
+Some, but not all, UTF-8 malformations are detected, and in fact, some
+malformed input could cause reading beyond the end of the input buffer, which
+is one reason why this function is deprecated.  The other is that only in
+extremely limited circumstances should the Unicode versus native code point be
+of any interest to you.  See L</utf8_to_uvuni_buf> for alternatives.
+
+If C<s> points to one of the detected malformations, and UTF8 warnings are
+enabled, zero is returned and C<*retlen> is set (if C<retlen> doesn't point to
+NULL) to -1.  If those warnings are off, the computed value if well-defined (or
+the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
+is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
+next possible position in C<s> that could begin a non-malformed character.
+See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT_UTF8_TO_UVUNI;
+
+    return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
+}
+
+void
+Perl_save_re_context(pTHX)
+{
+    PERL_UNUSED_CONTEXT;
+}
+
+
 END_EXTERN_C
 
 #endif /* NO_MATHOMS */