X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2972341c4e56d0a957c701697676b4ccf879c92d..bfde97e001cac6d787c92149426f99821a4c69f0:/perl.h diff --git a/perl.h b/perl.h index 9e263da..bd92bfc 100644 --- a/perl.h +++ b/perl.h @@ -116,11 +116,6 @@ /* <--- here ends the logic shared by perl.h and makedef.pl */ -/* Microsoft Visual C++ 6.0 needs special treatment in numerous places */ -#if defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1200 && _MSC_VER < 1300 -# define USING_MSVC6 -#endif - #undef START_EXTERN_C #undef END_EXTERN_C #undef EXTERN_C @@ -246,7 +241,7 @@ Perl_pregfree(aTHX_ (prog)) #define CALLREGFREE_PVT(prog) \ - if(prog) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) + if(prog && RX_ENGINE(prog)) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) #define CALLREG_NUMBUF_FETCH(rx,paren,usesv) \ RX_ENGINE(rx)->numbered_buff_FETCH(aTHX_ (rx),(paren),(usesv)) @@ -299,9 +294,130 @@ : (REGEXP *)NULL) #endif +/* some compilers impersonate gcc */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define PERL_IS_GCC 1 +#endif +/* In case Configure was not used (we are using a "canned config" + * such as Win32, or a cross-compilation setup, for example) try going + * by the gcc major and minor versions. One useful URL is + * http://www.ohse.de/uwe/articles/gcc-attributes.html, + * but contrary to this information warn_unused_result seems + * not to be in gcc 3.3.5, at least. --jhi + * Also, when building extensions with an installed perl, this allows + * the user to upgrade gcc and get the right attributes, rather than + * relying on the list generated at Configure time. --AD + * Set these up now otherwise we get confused when some of the <*thread.h> + * includes below indirectly pull in (which needs to know if we + * have HASATTRIBUTE_FORMAT). + */ +#ifndef PERL_MICRO +#if defined __GNUC__ && !defined(__INTEL_COMPILER) +# if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 /* 3.1 -> */ +# define HASATTRIBUTE_DEPRECATED +# endif +# if __GNUC__ >= 3 /* 3.0 -> */ /* XXX Verify this version */ +# define HASATTRIBUTE_FORMAT +# if defined __MINGW32__ +# define PRINTF_FORMAT_NULL_OK +# endif +# endif +# if __GNUC__ >= 3 /* 3.0 -> */ +# define HASATTRIBUTE_MALLOC +# endif +# if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || __GNUC__ > 3 /* 3.3 -> */ +# define HASATTRIBUTE_NONNULL +# endif +# if __GNUC__ == 2 && __GNUC_MINOR__ >= 5 || __GNUC__ > 2 /* 2.5 -> */ +# define HASATTRIBUTE_NORETURN +# endif +# if __GNUC__ >= 3 /* gcc 3.0 -> */ +# define HASATTRIBUTE_PURE +# endif +# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ +# define HASATTRIBUTE_UNUSED +# endif +# if __GNUC__ == 3 && __GNUC_MINOR__ == 3 && !defined(__cplusplus) +# define HASATTRIBUTE_UNUSED /* gcc-3.3, but not g++-3.3. */ +# endif +# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ +# define HASATTRIBUTE_WARN_UNUSED_RESULT +# endif +/* always_inline is buggy in gcc <= 4.6 and causes compilation errors */ +# if __GNUC__ == 4 && __GNUC_MINOR__ >= 7 || __GNUC__ > 4 /* 4.7 -> */ +# define HASATTRIBUTE_ALWAYS_INLINE +# endif +#endif +#endif /* #ifndef PERL_MICRO */ +#ifdef HASATTRIBUTE_DEPRECATED +# define __attribute__deprecated__ __attribute__((deprecated)) +#endif +#ifdef HASATTRIBUTE_FORMAT +# define __attribute__format__(x,y,z) __attribute__((format(x,y,z))) +#endif +#ifdef HASATTRIBUTE_MALLOC +# define __attribute__malloc__ __attribute__((__malloc__)) +#endif +#ifdef HASATTRIBUTE_NONNULL +# define __attribute__nonnull__(a) __attribute__((nonnull(a))) +#endif +#ifdef HASATTRIBUTE_NORETURN +# define __attribute__noreturn__ __attribute__((noreturn)) +#endif +#ifdef HASATTRIBUTE_PURE +# define __attribute__pure__ __attribute__((pure)) +#endif +#ifdef HASATTRIBUTE_UNUSED +# define __attribute__unused__ __attribute__((unused)) +#endif +#ifdef HASATTRIBUTE_WARN_UNUSED_RESULT +# define __attribute__warn_unused_result__ __attribute__((warn_unused_result)) +#endif +#ifdef HASATTRIBUTE_ALWAYS_INLINE +/* always_inline is buggy in gcc <= 4.6 and causes compilation errors */ +# if !defined(PERL_IS_GCC) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7 || __GNUC__ > 4) +# define __attribute__always_inline__ __attribute__((always_inline)) +# endif +#endif + +/* If we haven't defined the attributes yet, define them to blank. */ +#ifndef __attribute__deprecated__ +# define __attribute__deprecated__ +#endif +#ifndef __attribute__format__ +# define __attribute__format__(x,y,z) +#endif +#ifndef __attribute__malloc__ +# define __attribute__malloc__ +#endif +#ifndef __attribute__nonnull__ +# define __attribute__nonnull__(a) +#endif +#ifndef __attribute__noreturn__ +# define __attribute__noreturn__ +#endif +#ifndef __attribute__pure__ +# define __attribute__pure__ +#endif +#ifndef __attribute__unused__ +# define __attribute__unused__ +#endif +#ifndef __attribute__warn_unused_result__ +# define __attribute__warn_unused_result__ +#endif +#ifndef __attribute__always_inline__ +# define __attribute__always_inline__ +#endif + +/* Some OS warn on NULL format to printf */ +#ifdef PRINTF_FORMAT_NULL_OK +# define __attribute__format__null_ok__(x,y,z) __attribute__format__(x,y,z) +#else +# define __attribute__format__null_ok__(x,y,z) +#endif /* * Because of backward compatibility reasons the PERL_UNUSED_DECL @@ -315,20 +431,8 @@ * */ -#if defined(__SYMBIAN32__) && defined(__GNUC__) -# ifdef __cplusplus -# define PERL_UNUSED_DECL -# else -# define PERL_UNUSED_DECL __attribute__((unused)) -# endif -#endif - #ifndef PERL_UNUSED_DECL -# if defined(HASATTRIBUTE_UNUSED) && (!defined(__cplusplus) || __GNUC__ >= 4) -# define PERL_UNUSED_DECL __attribute__unused__ -# else -# define PERL_UNUSED_DECL -# endif +# define PERL_UNUSED_DECL __attribute__unused__ #endif /* gcc -Wall: @@ -395,6 +499,11 @@ # endif #endif +#if defined(_MSC_VER) +/* XXX older MSVC versions have a smallish macro buffer */ +#define PERL_SMALL_MACRO_BUFFER +#endif + /* on gcc (and clang), specify that a warning should be temporarily * ignored; e.g. * @@ -448,6 +557,19 @@ #define CLANG_DIAG_IGNORE_STMT(x) CLANG_DIAG_IGNORE(x) NOOP #define CLANG_DIAG_RESTORE_STMT CLANG_DIAG_RESTORE NOOP +#if defined(_MSC_VER) && (_MSC_VER >= 1300) +# define MSVC_DIAG_IGNORE(x) __pragma(warning(push)) \ + __pragma(warning(disable : x)) +# define MSVC_DIAG_RESTORE __pragma(warning(pop)) +#else +# define MSVC_DIAG_IGNORE(x) +# define MSVC_DIAG_RESTORE +#endif +#define MSVC_DIAG_IGNORE_DECL(x) MSVC_DIAG_IGNORE(x) dNOOP +#define MSVC_DIAG_RESTORE_DECL MSVC_DIAG_RESTORE dNOOP +#define MSVC_DIAG_IGNORE_STMT(x) MSVC_DIAG_IGNORE(x) NOOP +#define MSVC_DIAG_RESTORE_STMT MSVC_DIAG_RESTORE NOOP + #define NOOP /*EMPTY*/(void)0 #define dNOOP struct Perl___notused_struct @@ -520,12 +642,25 @@ #endif /* - * STMT_START { statements; } STMT_END; - * can be used as a single statement, as in - * if (x) STMT_START { ... } STMT_END; else ... - * - * Trying to select a version that gives no warnings... - */ +=head1 Miscellaneous Functions + +=for apidoc AmnUu|void|STMT_START + + STMT_START { statements; } STMT_END; + +can be used as a single statement, as in + + if (x) STMT_START { ... } STMT_END; else ... + +These are often used in macro definitions. Note that you can't return a value +out of them. + +=for apidoc AmnUhu|void|STMT_END + +=cut + + Trying to select a version that gives no warnings... +*/ #if !(defined(STMT_START) && defined(STMT_END)) # ifdef PERL_USE_GCC_BRACE_GROUPS # define STMT_START (void)( /* gcc supports "({ STATEMENTS; })" */ @@ -540,11 +675,6 @@ # define BYTEORDER 0x1234 #endif -/* Overall memory policy? */ -#ifndef CONSERVATIVE -# define LIBERAL 1 -#endif - #if 'A' == 65 && 'I' == 73 && 'J' == 74 && 'Z' == 90 #define ASCIIish #else @@ -610,16 +740,24 @@ # define TAINT_WARN_get 0 # define TAINT_WARN_set(s) NOOP #else + /* Set to tainted if we are running under tainting mode */ # define TAINT (PL_tainted = PL_tainting) -# define TAINT_NOT (PL_tainted = FALSE) -# define TAINT_IF(c) if (UNLIKELY(c)) { PL_tainted = PL_tainting; } + +# define TAINT_NOT (PL_tainted = FALSE) /* Untaint */ +# define TAINT_IF(c) if (UNLIKELY(c)) { TAINT; } /* Conditionally taint */ # define TAINT_ENV() if (UNLIKELY(PL_tainting)) { taint_env(); } -# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { taint_proper(NULL, s); } + /* croak or warn if tainting */ +# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { \ + taint_proper(NULL, s); \ + } # define TAINT_set(s) (PL_tainted = (s)) -# define TAINT_get (PL_tainted) -# define TAINTING_get (PL_tainting) +# define TAINT_get (cBOOL(UNLIKELY(PL_tainted))) /* Is something tainted? */ +# define TAINTING_get (cBOOL(UNLIKELY(PL_tainting))) /* Is taint checking enabled? */ # define TAINTING_set(s) (PL_tainting = (s)) -# define TAINT_WARN_get (PL_taint_warn) +# define TAINT_WARN_get (PL_taint_warn) /* FALSE => tainting violations + are fatal + TRUE => they're just + warnings */ # define TAINT_WARN_set(s) (PL_taint_warn = (s)) #endif @@ -693,6 +831,10 @@ # define MB_CUR_MAX 1uL #endif +# ifdef I_WCHAR +# include +# endif + # include #ifdef I_STDINT @@ -719,10 +861,33 @@ # include #endif -#if !defined(NO_LOCALE) && defined(HAS_SETLOCALE) -# define USE_LOCALE +/* If not forbidden, we enable locale handling if either 1) the POSIX 2008 + * functions are available, or 2) just the setlocale() function. This logic is + * repeated in t/loc_tools.pl and makedef.pl; The three should be kept in + * sync. */ +#if ! defined(NO_LOCALE) + +# if ! defined(NO_POSIX_2008_LOCALE) \ + && defined(HAS_NEWLOCALE) \ + && defined(HAS_USELOCALE) \ + && defined(HAS_DUPLOCALE) \ + && defined(HAS_FREELOCALE) \ + && defined(LC_ALL_MASK) + + /* For simplicity, the code is written to assume that any platform advanced + * enough to have the Posix 2008 locale functions has LC_ALL. The final + * test above makes sure that assumption is valid */ + +# define HAS_POSIX_2008_LOCALE +# define USE_LOCALE +# elif defined(HAS_SETLOCALE) +# define USE_LOCALE +# endif +#endif + +#ifdef USE_LOCALE # define HAS_SKIP_LOCALE_INIT /* Solely for XS code to test for this - capability */ + #define */ # if !defined(NO_LOCALE_COLLATE) && defined(LC_COLLATE) \ && defined(HAS_STRXFRM) # define USE_LOCALE_COLLATE @@ -757,32 +922,41 @@ # if !defined(NO_LOCALE_TELEPHONE) && defined(LC_TELEPHONE) # define USE_LOCALE_TELEPHONE # endif -#endif /* !NO_LOCALE && HAS_SETLOCALE */ -#ifdef USE_LOCALE /* These locale things are all subject to change */ -# if defined(HAS_NEWLOCALE) \ - && defined(LC_ALL_MASK) \ - && defined(HAS_FREELOCALE) \ - && defined(HAS_USELOCALE) \ - && ! defined(NO_POSIX_2008_LOCALE) - - /* For simplicity, the code is written to assume that any platform advanced - * enough to have the Posix 2008 locale functions has LC_ALL. The test - * above makes sure that assumption is valid */ +/* XXX The next few defines are unfortunately duplicated in makedef.pl, and + * changes here MUST also be made there */ -# define HAS_POSIX_2008_LOCALE -# endif -# if defined(USE_ITHREADS) \ - && ( defined(HAS_POSIX_2008_LOCALE) \ - || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \ - && ! defined(NO_THREAD_SAFE_LOCALE) -# define USE_THREAD_SAFE_LOCALE +# if ! defined(HAS_SETLOCALE) && defined(HAS_POSIX_2008_LOCALE) +# define USE_POSIX_2008_LOCALE +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif + /* If compiled with + * -DUSE_THREAD_SAFE_LOCALE, will do so even + * on unthreaded builds */ +# elif (defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE)) \ + && ( defined(HAS_POSIX_2008_LOCALE) \ + || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \ + && ! defined(NO_THREAD_SAFE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif # ifdef HAS_POSIX_2008_LOCALE # define USE_POSIX_2008_LOCALE # endif # endif #endif +/* Microsoft documentation reads in the change log for VS 2015: + * "The localeconv function declared in locale.h now works correctly when + * per-thread locale is enabled. In previous versions of the library, this + * function would return the lconv data for the global locale, not the + * thread's locale." + */ +#if defined(WIN32) && defined(USE_THREAD_SAFE_LOCALE) && _MSC_VER < 1900 +# define TS_W32_BROKEN_LOCALECONV +#endif + #include #ifdef I_SYS_PARAM @@ -1027,14 +1201,30 @@ EXTERN_C int usleep(unsigned int); # define PERL_STRLEN_EXPAND_SHIFT 2 #endif -#include -#define STRUCT_OFFSET(s,m) offsetof(s,m) +/* This use of offsetof() requires /Zc:offsetof- for VS2017 (and presumably + * onwards) when building Socket.xs, but we can just use a different definition + * for STRUCT_OFFSET instead. */ +#if defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1910 +# define STRUCT_OFFSET(s,m) (Size_t)(&(((s *)0)->m)) +#else +# include +# define STRUCT_OFFSET(s,m) offsetof(s,m) +#endif -/* ptrdiff_t is C11, so undef it under pedantic builds */ +/* ptrdiff_t is C11, so undef it under pedantic builds. (Actually it is + * in C89, but apparently there are platforms where it doesn't exist. See + * thread beginning at http://nntp.perl.org/group/perl.perl5.porters/251541.) + * */ #ifdef PERL_GCC_PEDANTIC # undef HAS_PTRDIFF_T #endif +#ifdef HAS_PTRDIFF_T +# define Ptrdiff_t ptrdiff_t +#else +# define Ptrdiff_t SSize_t +#endif + #ifndef __SYMBIAN32__ # include #endif @@ -1059,7 +1249,7 @@ EXTERN_C int usleep(unsigned int); # define saferealloc Perl_realloc # define safefree Perl_mfree # define CHECK_MALLOC_TOO_LATE_FOR_(code) STMT_START { \ - if (!PL_tainting && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ + if (!TAINTING_get && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ code; \ } STMT_END # define CHECK_MALLOC_TOO_LATE_FOR(ch) \ @@ -1205,6 +1395,38 @@ EXTERN_C char *crypt(const char *, const char *); EXTERN_C char *crypt(const char *, const char *); #endif +/* +=head1 Errno + +=for apidoc m|void|SETERRNO|int errcode|int vmserrcode + +Set C, and on VMS set C. + +=for apidoc mn|void|dSAVEDERRNO + +Declare variables needed to save C and any operating system +specific error number. + +=for apidoc mn|void|dSAVE_ERRNO + +Declare variables needed to save C and any operating system +specific error number, and save them for optional later restoration +by C. + +=for apidoc mn|void|SAVE_ERRNO + +Save C and any operating system specific error number for +optional later restoration by C. Requires +C or C in scope. + +=for apidoc mn|void|RESTORE_ERRNO + +Restore C and any operating system specific error number that +was saved by C or C. + +=cut +*/ + #ifdef SETERRNO # undef SETERRNO /* SOCKS might have defined this */ #endif @@ -1276,6 +1498,29 @@ EXTERN_C char *crypt(const char *, const char *); # define RESTORE_ERRNO (errno = saved_errno) #endif +/* +=head1 Warning and Dieing + +=for apidoc Amn|SV *|ERRSV + +Returns the SV for C<$@>, creating it if needed. + +=for apidoc Am|void|CLEAR_ERRSV + +Clear the contents of C<$@>, setting it to the empty string. + +This replaces any read-only SV with a fresh SV and removes any magic. + +=for apidoc Am|void|SANE_ERRSV + +Clean up ERRSV so we can safely set it. + +This replaces any read-only SV with a fresh writable copy and removes +any magic. + +=cut +*/ + #define ERRSV GvSVn(PL_errgv) /* contains inlined gv_add_by_type */ @@ -1296,6 +1541,23 @@ EXTERN_C char *crypt(const char *, const char *); } \ } STMT_END +/* contains inlined gv_add_by_type */ +#define SANE_ERRSV() STMT_START { \ + SV ** const svp = &GvSV(PL_errgv); \ + if (!*svp) { \ + *svp = newSVpvs(""); \ + } else if (SvREADONLY(*svp)) { \ + SV *dupsv = newSVsv(*svp); \ + SvREFCNT_dec_NN(*svp); \ + *svp = dupsv; \ + } else { \ + SV *const errsv = *svp; \ + if (SvMAGICAL(errsv)) { \ + mg_free(errsv); \ + } \ + } \ + } STMT_END + #ifdef PERL_CORE # define DEFSV (0 + GvSVn(PL_defgv)) @@ -1324,7 +1586,7 @@ EXTERN_C char *crypt(const char *, const char *); #define UNKNOWN_ERRNO_MSG "(unknown)" -#if VMS +#ifdef VMS #define Strerror(e) strerror((e), vaxc$errno) #else #define Strerror(e) strerror(e) @@ -1521,9 +1783,17 @@ EXTERN_C char *crypt(const char *, const char *); /* This used to be conditionally defined based on whether we had a sprintf() * that correctly returns the string length (as required by C89), but we no * longer need that. XS modules can (and do) use this name, so it must remain - * a part of the API that's visible to modules. But we no longer document it - * either (because using sprintf() rather than snprintf() is almost always - * a bad idea). */ + * a part of the API that's visible to modules. + +=head1 Miscellaneous Functions + +=for apidoc ATmD|int|my_sprintf|NN char *buffer|NN const char *pat|... + +Do NOT use this due to the possibility of overflowing C. Instead use +my_snprintf() + +=cut +*/ #define my_sprintf sprintf /* @@ -1606,8 +1876,6 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCAT # define my_strlcat strlcat -#else -# define my_strlcat Perl_my_strlcat #endif #if defined(PERL_CORE) || defined(PERL_EXT) @@ -1620,14 +1888,10 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCPY # define my_strlcpy strlcpy -#else -# define my_strlcpy Perl_my_strlcpy #endif #ifdef HAS_STRNLEN # define my_strnlen strnlen -#else -# define my_strnlen Perl_my_strnlen #endif /* @@ -1641,13 +1905,13 @@ typedef UVTYPE UV; #if defined(USE_64_BIT_INT) && defined(HAS_QUAD) # if QUADKIND == QUAD_IS_INT64_T && defined(INT64_MAX) -# define IV_MAX INT64_MAX -# define IV_MIN INT64_MIN -# define UV_MAX UINT64_MAX +# define IV_MAX ((IV)INT64_MAX) +# define IV_MIN ((IV)INT64_MIN) +# define UV_MAX ((UV)UINT64_MAX) # ifndef UINT64_MIN # define UINT64_MIN 0 # endif -# define UV_MIN UINT64_MIN +# define UV_MIN ((UV)UINT64_MIN) # else # define IV_MAX PERL_QUAD_MAX # define IV_MIN PERL_QUAD_MIN @@ -1658,17 +1922,17 @@ typedef UVTYPE UV; # define UV_IS_QUAD #else # if defined(INT32_MAX) && IVSIZE == 4 -# define IV_MAX INT32_MAX -# define IV_MIN INT32_MIN +# define IV_MAX ((IV)INT32_MAX) +# define IV_MIN ((IV)INT32_MIN) # ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */ -# define UV_MAX UINT32_MAX +# define UV_MAX ((UV)UINT32_MAX) # else -# define UV_MAX 4294967295U +# define UV_MAX ((UV)4294967295U) # endif # ifndef UINT32_MIN # define UINT32_MIN 0 # endif -# define UV_MIN UINT32_MIN +# define UV_MIN ((UV)UINT32_MIN) # else # define IV_MAX PERL_LONG_MAX # define IV_MIN PERL_LONG_MIN @@ -1684,11 +1948,10 @@ typedef UVTYPE UV; # else # undef IV_IS_QUAD # undef UV_IS_QUAD -#if !defined(PERL_CORE) || defined(USING_MSVC6) +#if !defined(PERL_CORE) /* We think that removing this decade-old undef this will cause too much breakage on CPAN for too little gain. (See RT #119753) - However, we do need HAS_QUAD in the core for use by the drand48 code, - but not for Win32 VC6 because it has poor __int64 support. */ + However, we do need HAS_QUAD in the core for use by the drand48 code. */ # undef HAS_QUAD #endif # endif @@ -1780,11 +2043,6 @@ typedef NVTYPE NV; # include #endif -#ifdef USING_MSVC6 -/* VC6 has broken NaN semantics: NaN == NaN returns true instead of false, - * and for example NaN < IV_MIN. */ -# define NAN_COMPARE_BROKEN -#endif #if defined(__DECC) && defined(__osf__) /* Also Tru64 cc has broken NaN comparisons. */ # define NAN_COMPARE_BROKEN @@ -1923,6 +2181,7 @@ extern long double Perl_my_frexpl(long double x, int *e); # define Perl_fmod fmodq # define Perl_log logq # define Perl_log10 log10q +# define Perl_signbit signbitq # define Perl_pow powq # define Perl_sin sinq # define Perl_sinh sinhq @@ -2170,7 +2429,7 @@ extern long double Perl_my_frexpl(long double x, int *e); #endif /* Win32: _fpclass(), _isnan(), _finite(). */ -#ifdef WIN32 +#ifdef _MSC_VER # ifndef Perl_isnan # define Perl_isnan(x) _isnan(x) # endif @@ -2228,10 +2487,6 @@ extern long double Perl_my_frexpl(long double x, int *e); (Perl_fp_class_pdenorm(x) || Perl_fp_class_ndenorm(x)) #endif -#ifdef UNDER_CE -int isnan(double d); -#endif - #ifndef Perl_isnan # ifdef Perl_fp_class_nan # define Perl_isnan(x) Perl_fp_class_nan(x) @@ -2308,11 +2563,12 @@ int isnan(double d); #ifdef USE_PERL_ATOF # define Perl_atof(s) Perl_my_atof(s) -# define Perl_atof2(s, n) Perl_my_atof2(aTHX_ (s), &(n)) +# define Perl_atof2(s, n) Perl_my_atof3(aTHX_ (s), &(n), 0) #else # define Perl_atof(s) (NV)atof(s) # define Perl_atof2(s, n) ((n) = atof(s)) #endif +#define my_atof2(a,b) my_atof3(a,b,0) /* * CHAR_MIN and CHAR_MAX are not included here, as the (char) type may be @@ -2351,6 +2607,58 @@ int isnan(double d); # define PERL_QUAD_MIN (-PERL_QUAD_MAX - ((3 & -1) == 3)) #endif +/* +=head1 Numeric functions + +=for apidoc AmnUh||PERL_INT_MIN +=for apidoc AmnUh||PERL_LONG_MAX +=for apidoc AmnUh||PERL_LONG_MIN +=for apidoc AmnUh||PERL_QUAD_MAX +=for apidoc AmnUh||PERL_SHORT_MAX +=for apidoc AmnUh||PERL_SHORT_MIN +=for apidoc AmnUh||PERL_UCHAR_MAX +=for apidoc AmnUh||PERL_UCHAR_MIN +=for apidoc AmnUh||PERL_UINT_MAX +=for apidoc AmnUh||PERL_ULONG_MAX +=for apidoc AmnUh||PERL_ULONG_MIN +=for apidoc AmnUh||PERL_UQUAD_MAX +=for apidoc AmnUh||PERL_UQUAD_MIN +=for apidoc AmnUh||PERL_USHORT_MAX +=for apidoc AmnUh||PERL_USHORT_MIN +=for apidoc AmnUh||PERL_QUAD_MIN +=for apidoc AmnU||PERL_INT_MAX +This and +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C +give the largest and smallest number representable in the current +platform in variables of the corresponding types. + +For signed types, the smallest representable number is the most negative +number, the one furthest away from zero. + +For C99 and later compilers, these correspond to things like C, which +are available to the C code. But these constants, furnished by Perl, +allow code compiled on earlier compilers to portably have access to the same +constants. + +=cut + +*/ + typedef MEM_SIZE STRLEN; typedef struct op OP; @@ -2608,6 +2916,64 @@ typedef struct padname PADNAME; # define USE_ENVIRON_ARRAY #endif +#ifdef USE_ITHREADS + /* On some platforms it would be safe to use a read/write mutex with many + * readers possible at the same time. On other platforms, notably IBM ones, + * subsequent getenv calls destroy earlier ones. Those platforms would not + * be able to handle simultaneous getenv calls */ +# define ENV_LOCK MUTEX_LOCK(&PL_env_mutex) +# define ENV_UNLOCK MUTEX_UNLOCK(&PL_env_mutex) +# define ENV_INIT MUTEX_INIT(&PL_env_mutex); +# define ENV_TERM MUTEX_DESTROY(&PL_env_mutex); +#else +# define ENV_LOCK NOOP; +# define ENV_UNLOCK NOOP; +# define ENV_INIT NOOP; +# define ENV_TERM NOOP; +#endif + +/* Some critical sections need to lock both the locale and the environment. + * XXX khw intends to change this to lock both mutexes, but that brings up + * issues of potential deadlock, so should be done at the beginning of a + * development cycle. So for now, it just locks the environment. Note that + * many modern platforms are locale-thread-safe anyway, so locking the locale + * mutex is a no-op anyway */ +#define ENV_LOCALE_LOCK ENV_LOCK +#define ENV_LOCALE_UNLOCK ENV_UNLOCK + +/* And some critical sections care only that no one else is writing either the + * locale nor the environment. XXX Again this is for the future. This can be + * simulated with using COND_WAIT in thread.h */ +#define ENV_LOCALE_READ_LOCK ENV_LOCALE_LOCK +#define ENV_LOCALE_READ_UNLOCK ENV_LOCALE_UNLOCK + +#if defined(HAS_SIGACTION) && defined(SA_SIGINFO) + /* having sigaction(2) means that the OS supports both 1-arg and 3-arg + * signal handlers. But the perl core itself only fully supports 1-arg + * handlers, so don't enable for now. + * NB: POSIX::sigaction() supports both. + * + * # define PERL_USE_3ARG_SIGHANDLER + */ +#endif + +/* Siginfo_t: + * This is an alias for the OS's siginfo_t, except that where the OS + * doesn't support it, declare a dummy version instead. This allows us to + * have signal handler functions which always have a Siginfo_t parameter + * regardless of platform, (and which will just be passed a NULL value + * where the OS doesn't support HAS_SIGACTION). + */ + +#if defined(HAS_SIGACTION) && defined(SA_SIGINFO) + typedef siginfo_t Siginfo_t; +#else + typedef struct { + int si_signo; + } Siginfo_t; +#endif + + /* * initialise to avoid floating-point exceptions from overflow, etc */ @@ -2725,55 +3091,6 @@ freeing any remaining Perl interpreters. # endif #endif -/* In case Configure was not used (we are using a "canned config" - * such as Win32, or a cross-compilation setup, for example) try going - * by the gcc major and minor versions. One useful URL is - * http://www.ohse.de/uwe/articles/gcc-attributes.html, - * but contrary to this information warn_unused_result seems - * not to be in gcc 3.3.5, at least. --jhi - * Also, when building extensions with an installed perl, this allows - * the user to upgrade gcc and get the right attributes, rather than - * relying on the list generated at Configure time. --AD - * Set these up now otherwise we get confused when some of the <*thread.h> - * includes below indirectly pull in (which needs to know if we - * have HASATTRIBUTE_FORMAT). - */ - -#ifndef PERL_MICRO -#if defined __GNUC__ && !defined(__INTEL_COMPILER) -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 /* 3.1 -> */ -# define HASATTRIBUTE_DEPRECATED -# endif -# if __GNUC__ >= 3 /* 3.0 -> */ /* XXX Verify this version */ -# define HASATTRIBUTE_FORMAT -# if defined __MINGW32__ -# define PRINTF_FORMAT_NULL_OK -# endif -# endif -# if __GNUC__ >= 3 /* 3.0 -> */ -# define HASATTRIBUTE_MALLOC -# endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || __GNUC__ > 3 /* 3.3 -> */ -# define HASATTRIBUTE_NONNULL -# endif -# if __GNUC__ == 2 && __GNUC_MINOR__ >= 5 || __GNUC__ > 2 /* 2.5 -> */ -# define HASATTRIBUTE_NORETURN -# endif -# if __GNUC__ >= 3 /* gcc 3.0 -> */ -# define HASATTRIBUTE_PURE -# endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ -# define HASATTRIBUTE_UNUSED -# endif -# if __GNUC__ == 3 && __GNUC_MINOR__ == 3 && !defined(__cplusplus) -# define HASATTRIBUTE_UNUSED /* gcc-3.3, but not g++-3.3. */ -# endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ -# define HASATTRIBUTE_WARN_UNUSED_RESULT -# endif -#endif -#endif /* #ifndef PERL_MICRO */ - /* USE_5005THREADS needs to be after unixish.h as includes * which defines NSIG - which will stop inclusion of * this results in many functions being undeclared which bothers C++ @@ -3277,84 +3594,39 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) #endif #define SVfARG(p) ((void*)(p)) - -#ifndef HEKf -# define HEKf "2p" -#endif - -/* Not ideal, but we cannot easily include a number in an already-numeric - * format sequence. */ -#ifndef HEKf256 -# define HEKf256 "3p" -#endif - -#define HEKfARG(p) ((void*)(p)) - -/* Takes three arguments: is_utf8, length, str */ -#ifndef UTF8f -# define UTF8f "d%" UVuf "%4p" -#endif -#define UTF8fARG(u,l,p) (int)cBOOL(u), (UV)(l), (void*)(p) - -#define PNf UTF8f -#define PNfARG(pn) (int)1, (UV)PadnameLEN(pn), (void *)PadnamePV(pn) - -#ifdef PERL_CORE -/* not used; but needed for backward compatibility with XS code? - RMB */ -# undef UVf -#elif !defined(UVf) -# define UVf UVuf -#endif - -#ifdef HASATTRIBUTE_DEPRECATED -# define __attribute__deprecated__ __attribute__((deprecated)) -#endif -#ifdef HASATTRIBUTE_FORMAT -# define __attribute__format__(x,y,z) __attribute__((format(x,y,z))) -#endif -#ifdef HASATTRIBUTE_MALLOC -# define __attribute__malloc__ __attribute__((__malloc__)) -#endif -#ifdef HASATTRIBUTE_NONNULL -# define __attribute__nonnull__(a) __attribute__((nonnull(a))) -#endif -#ifdef HASATTRIBUTE_NORETURN -# define __attribute__noreturn__ __attribute__((noreturn)) -#endif -#ifdef HASATTRIBUTE_PURE -# define __attribute__pure__ __attribute__((pure)) -#endif -#ifdef HASATTRIBUTE_UNUSED -# define __attribute__unused__ __attribute__((unused)) -#endif -#ifdef HASATTRIBUTE_WARN_UNUSED_RESULT -# define __attribute__warn_unused_result__ __attribute__((warn_unused_result)) -#endif - -/* If we haven't defined the attributes yet, define them to blank. */ -#ifndef __attribute__deprecated__ -# define __attribute__deprecated__ -#endif -#ifndef __attribute__format__ -# define __attribute__format__(x,y,z) -#endif -#ifndef __attribute__malloc__ -# define __attribute__malloc__ -#endif -#ifndef __attribute__nonnull__ -# define __attribute__nonnull__(a) -#endif -#ifndef __attribute__noreturn__ -# define __attribute__noreturn__ + +#ifndef HEKf +# define HEKf "2p" #endif -#ifndef __attribute__pure__ -# define __attribute__pure__ + +/* Not ideal, but we cannot easily include a number in an already-numeric + * format sequence. */ +#ifndef HEKf256 +# define HEKf256 "3p" #endif -#ifndef __attribute__unused__ -# define __attribute__unused__ + +#define HEKfARG(p) ((void*)(p)) + +/* +=for apidoc Amnh||UTF8f +=for apidoc Amh||UTF8fARG|bool is_utf8|Size_t byte_len|char *str + +=cut + * %4p is a custom format + */ +#ifndef UTF8f +# define UTF8f "d%" UVuf "%4p" #endif -#ifndef __attribute__warn_unused_result__ -# define __attribute__warn_unused_result__ +#define UTF8fARG(u,l,p) (int)cBOOL(u), (UV)(l), (void*)(p) + +#define PNf UTF8f +#define PNfARG(pn) (int)1, (UV)PadnameLEN(pn), (void *)PadnamePV(pn) + +#ifdef PERL_CORE +/* not used; but needed for backward compatibility with XS code? - RMB */ +# undef UVf +#elif !defined(UVf) +# define UVf UVuf #endif #if !defined(DEBUGGING) && !defined(NDEBUG) @@ -3371,20 +3643,30 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) # define NORETURN_FUNCTION_END NOT_REACHED; return 0 #endif -/* Some OS warn on NULL format to printf */ -#ifdef PRINTF_FORMAT_NULL_OK -# define __attribute__format__null_ok__(x,y,z) __attribute__format__(x,y,z) -#else -# define __attribute__format__null_ok__(x,y,z) -#endif - #ifdef HAS_BUILTIN_EXPECT # define EXPECT(expr,val) __builtin_expect(expr,val) #else # define EXPECT(expr,val) (expr) #endif + +/* +=head1 Miscellaneous Functions + +=for apidoc AmU|bool|LIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be true. + +=for apidoc AmU|bool|UNLIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be false. + +=cut +*/ #define LIKELY(cond) EXPECT(cBOOL(cond),TRUE) #define UNLIKELY(cond) EXPECT(cBOOL(cond),FALSE) + #ifdef HAS_BUILTIN_CHOOSE_EXPR /* placeholder */ #endif @@ -3401,8 +3683,11 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) STATIC_ASSERT_STMT expands to a statement and is suitable for use inside a function. */ -#if (defined(static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L)) && (!defined(__IBMC__) || __IBMC__ >= 1210) -/* static_assert is a macro defined in in C11 or a compiler +#if (! defined(__IBMC__) || __IBMC__ >= 1210) \ + && (( defined(static_assert) && ( defined(_ISOC11_SOURCE) \ + || (__STDC_VERSION__ - 0) >= 201101L)) \ + || (defined(__cplusplus) && __cplusplus >= 201103L)) +/* XXX static_assert is a macro defined in in C11 or a compiler builtin in C++11. But IBM XL C V11 does not support _Static_assert, no matter what says. */ @@ -3458,9 +3743,9 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) # define NOT_REACHED #elif defined(DEBUGGING) && (__has_builtin(__builtin_unreachable) \ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5 || __GNUC__ > 4)) /* 4.5 -> */ -# define NOT_REACHED STMT_START { ASSUME(0); __builtin_unreachable(); } STMT_END +# define NOT_REACHED STMT_START { ASSUME(!"UNREACHABLE"); __builtin_unreachable(); } STMT_END #else -# define NOT_REACHED ASSUME(0) +# define NOT_REACHED ASSUME(!"UNREACHABLE") #endif /* Some unistd.h's give a prototype for pause() even though @@ -3579,6 +3864,14 @@ typedef struct crypt_data { /* straight from /usr/include/crypt.h */ # define PERL_STATIC_INLINE_NO_RET PERL_STATIC_INLINE #endif +#ifndef PERL_STATIC_FORCE_INLINE +# define PERL_STATIC_FORCE_INLINE PERL_STATIC_INLINE +#endif + +#ifndef PERL_STATIC_FORCE_INLINE_NO_RET +# define PERL_STATIC_FORCE_INLINE_NO_RET PERL_STATIC_INLINE +#endif + #if !defined(OS2) # include "iperlsys.h" #endif @@ -3604,13 +3897,13 @@ typedef struct crypt_data { /* straight from /usr/include/crypt.h */ /* macros to define bit-fields in structs. */ #ifndef PERL_BITFIELD8 -# define PERL_BITFIELD8 unsigned +# define PERL_BITFIELD8 U8 #endif #ifndef PERL_BITFIELD16 -# define PERL_BITFIELD16 unsigned +# define PERL_BITFIELD16 U16 #endif #ifndef PERL_BITFIELD32 -# define PERL_BITFIELD32 unsigned +# define PERL_BITFIELD32 U32 #endif #include "sv.h" @@ -3651,11 +3944,10 @@ typedef struct magic_state MGS; /* struct magic_state defined in mg.c */ struct scan_data_t; typedef struct regnode_charclass regnode_charclass; -struct regnode_charclass_class; - /* A hopefully less confusing name. The sub-classes are all Posix classes only * used under /l matching */ -typedef struct regnode_charclass_class regnode_charclass_posixl; +typedef struct regnode_charclass_posixl regnode_charclass_class; +typedef struct regnode_charclass_posixl regnode_charclass_posixl; typedef struct regnode_ssc regnode_ssc; typedef struct RExC_state_t RExC_state_t; @@ -3817,7 +4109,9 @@ my_swap16(const U16 x) { #define U_L(what) U_32(what) #ifdef HAS_SIGNBIT -# define Perl_signbit signbit +# ifndef Perl_signbit +# define Perl_signbit signbit +# endif #endif /* These do not care about the fractional part, only about the range. */ @@ -3834,7 +4128,7 @@ my_swap16(const U16 x) { #endif #ifndef __cplusplus -#if !(defined(WIN32) || defined(UNDER_CE) || defined(SYMBIAN)) +#if !(defined(WIN32) || defined(SYMBIAN)) Uid_t getuid (void); Uid_t geteuid (void); Gid_t getgid (void); @@ -3885,7 +4179,8 @@ Gid_t getegid (void); #define DEBUG_B_FLAG 0x02000000 /*33554432*/ #define DEBUG_L_FLAG 0x04000000 /*67108864*/ #define DEBUG_i_FLAG 0x08000000 /*134217728*/ -#define DEBUG_MASK 0x0FFFEFFF /* mask of all the standard flags */ +#define DEBUG_y_FLAG 0x10000000 /*268435456*/ +#define DEBUG_MASK 0x1FFFEFFF /* mask of all the standard flags */ #define DEBUG_DB_RECURSE_FLAG 0x40000000 #define DEBUG_TOP_FLAG 0x80000000 /* -D was given --> PL_debug |= FLAG */ @@ -3917,10 +4212,12 @@ Gid_t getegid (void); # define DEBUG_B_TEST_ UNLIKELY(PL_debug & DEBUG_B_FLAG) # define DEBUG_L_TEST_ UNLIKELY(PL_debug & DEBUG_L_FLAG) # define DEBUG_i_TEST_ UNLIKELY(PL_debug & DEBUG_i_FLAG) +# define DEBUG_y_TEST_ UNLIKELY(PL_debug & DEBUG_y_FLAG) # define DEBUG_Xv_TEST_ (DEBUG_X_TEST_ && DEBUG_v_TEST_) # define DEBUG_Uv_TEST_ (DEBUG_U_TEST_ && DEBUG_v_TEST_) # define DEBUG_Pv_TEST_ (DEBUG_P_TEST_ && DEBUG_v_TEST_) # define DEBUG_Lv_TEST_ (DEBUG_L_TEST_ && DEBUG_v_TEST_) +# define DEBUG_yv_TEST_ (DEBUG_y_TEST_ && DEBUG_v_TEST_) #ifdef DEBUGGING @@ -3951,10 +4248,12 @@ Gid_t getegid (void); # define DEBUG_B_TEST DEBUG_B_TEST_ # define DEBUG_L_TEST DEBUG_L_TEST_ # define DEBUG_i_TEST DEBUG_i_TEST_ +# define DEBUG_y_TEST DEBUG_y_TEST_ # define DEBUG_Xv_TEST DEBUG_Xv_TEST_ # define DEBUG_Uv_TEST DEBUG_Uv_TEST_ # define DEBUG_Pv_TEST DEBUG_Pv_TEST_ # define DEBUG_Lv_TEST DEBUG_Lv_TEST_ +# define DEBUG_yv_TEST DEBUG_yv_TEST_ # define PERL_DEB(a) a # define PERL_DEB2(a,b) a @@ -3988,11 +4287,12 @@ Gid_t getegid (void); # define DEBUG_f(a) DEBUG__(DEBUG_f_TEST, a) -#ifndef PERL_EXT_RE_BUILD -# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) -#else -# define DEBUG_r(a) STMT_START {a;} STMT_END -#endif /* PERL_EXT_RE_BUILD */ +/* For re_comp.c, re_exec.c, assume -Dr has been specified */ +# ifdef PERL_EXT_RE_BUILD +# define DEBUG_r(a) STMT_START {a;} STMT_END +# else +# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) +# endif /* PERL_EXT_RE_BUILD */ # define DEBUG_x(a) DEBUG__(DEBUG_x_TEST, a) # define DEBUG_u(a) DEBUG__(DEBUG_u_TEST, a) @@ -4003,6 +4303,7 @@ Gid_t getegid (void); # define DEBUG_Uv(a) DEBUG__(DEBUG_Uv_TEST, a) # define DEBUG_Pv(a) DEBUG__(DEBUG_Pv_TEST, a) # define DEBUG_Lv(a) DEBUG__(DEBUG_Lv_TEST, a) +# define DEBUG_yv(a) DEBUG__(DEBUG_yv_TEST, a) # define DEBUG_S(a) DEBUG__(DEBUG_S_TEST, a) # define DEBUG_T(a) DEBUG__(DEBUG_T_TEST, a) @@ -4015,8 +4316,9 @@ Gid_t getegid (void); # define DEBUG_B(a) DEBUG__(DEBUG_B_TEST, a) # define DEBUG_L(a) DEBUG__(DEBUG_L_TEST, a) # define DEBUG_i(a) DEBUG__(DEBUG_i_TEST, a) +# define DEBUG_y(a) DEBUG__(DEBUG_y_TEST, a) -#else /* DEBUGGING */ +#else /* ! DEBUGGING below */ # define DEBUG_p_TEST (0) # define DEBUG_s_TEST (0) @@ -4045,10 +4347,12 @@ Gid_t getegid (void); # define DEBUG_B_TEST (0) # define DEBUG_L_TEST (0) # define DEBUG_i_TEST (0) +# define DEBUG_y_TEST (0) # define DEBUG_Xv_TEST (0) # define DEBUG_Uv_TEST (0) # define DEBUG_Pv_TEST (0) # define DEBUG_Lv_TEST (0) +# define DEBUG_yv_TEST (0) # define PERL_DEB(a) # define PERL_DEB2(a,b) b @@ -4079,10 +4383,12 @@ Gid_t getegid (void); # define DEBUG_B(a) # define DEBUG_L(a) # define DEBUG_i(a) +# define DEBUG_y(a) # define DEBUG_Xv(a) # define DEBUG_Uv(a) # define DEBUG_Pv(a) # define DEBUG_Lv(a) +# define DEBUG_yv(a) #endif /* DEBUGGING */ @@ -4402,12 +4708,14 @@ EXTCONST char PL_no_dir_func[] EXTCONST char PL_no_func[] INIT("The %s function is unimplemented"); EXTCONST char PL_no_myglob[] - INIT("\"%s\" %se %s can't be in a package"); + INIT("\"%s\" %s %s can't be in a package"); EXTCONST char PL_no_localize_ref[] INIT("Can't localize through a reference"); EXTCONST char PL_memory_wrap[] INIT("panic: memory wrap"); - +EXTCONST char PL_extended_cp_format[] + INIT("Code point 0x%" UVXf " is not Unicode, requires a Perl extension," + " and so is not portable"); EXTCONST char PL_Yes[] INIT("1"); EXTCONST char PL_No[] @@ -4417,6 +4725,11 @@ EXTCONST char PL_Zero[] EXTCONST char PL_hexdigit[] INIT("0123456789abcdef0123456789ABCDEF"); +EXTCONST STRLEN PL_WARN_ALL + INIT(0); +EXTCONST STRLEN PL_WARN_NONE + INIT(0); + /* This is constant on most architectures, a global on OS/2 */ #ifndef OS2 EXTCONST char PL_sh_path[] @@ -4633,7 +4946,7 @@ EXTCONST unsigned char PL_mod_latin1_uc[] = { || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \ && UNICODE_DOT_DOT_VERSION >= 8) 255 /*sharp s*/, -#else /* uc() is itself in early unicode */ +#else /* uc(sharp s) is 'sharp s' itself in early unicode */ 223, #endif 224-32, 225-32, 226-32, 227-32, 228-32, 229-32, 230-32, 231-32, @@ -4653,7 +4966,7 @@ EXTCONST unsigned char PL_latin1_lc[]; #ifndef PERL_GLOBAL_STRUCT /* or perlvars.h */ #ifdef DOINIT -EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256] = { /* Unfortunately not EXTCONST. */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -4688,7 +5001,7 @@ EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ 248, 249, 250, 251, 252, 253, 254, 255 }; #else -EXT unsigned char PL_fold_locale[]; /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256]; /* Unfortunately not EXTCONST. */ #endif #endif /* !PERL_GLOBAL_STRUCT */ @@ -5389,13 +5702,8 @@ EXTCONST bool PL_valid_types_NV_set[]; * With the U8_NV version you will want to have inner braces, * while with the NV_U8 use just the NV. */ -#ifdef __cplusplus -#define INFNAN_U8_NV_DECL EXTERN_C const union { U8 u8[NVSIZE]; NV nv; } -#define INFNAN_NV_U8_DECL EXTERN_C const union { NV nv; U8 u8[NVSIZE]; } -#else #define INFNAN_U8_NV_DECL EXTCONST union { U8 u8[NVSIZE]; NV nv; } #define INFNAN_NV_U8_DECL EXTCONST union { NV nv; U8 u8[NVSIZE]; } -#endif /* if these never got defined, they need defaults */ #ifndef PERL_SET_CONTEXT @@ -5414,6 +5722,425 @@ EXTCONST bool PL_valid_types_NV_set[]; # define PERL_SET_THX(t) NOOP #endif +#ifndef EBCDIC + +/* The tables below are adapted from + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this copyright + * notice: + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +# ifdef DOINIT +# if 0 /* This is the original table given in + https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */ +static U8 utf8d_C9[] = { + /* The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-1F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-3F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-5F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-7F*/ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /*-9F*/ + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /*-BF*/ + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /*-DF*/ + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, /*-FF*/ + + /* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12 +}; + +# endif + +/* This is a version of the above table customized for Perl that doesn't + * exclude surrogates and accepts start bytes up through FD (FE on 64-bit + * machines). The classes have been renumbered so that the patterns are more + * evident in the table. The class numbers for start bytes are constrained so + * that they can be used as a shift count for masking off the leading one bits. + * It would make the code simpler if start byte FF could also be handled, but + * doing so would mean adding nodes for each of continuation bytes 6-12 + * remaining, and two more nodes for overlong detection (a total of 9), and + * there is room only for 4 more nodes unless we make the array U16 instead of + * U8. + * + * The classes are + * 00-7F 0 + * 80-81 7 Not legal immediately after start bytes E0 F0 F8 FC + * FE + * 82-83 8 Not legal immediately after start bytes E0 F0 F8 FC + * 84-87 9 Not legal immediately after start bytes E0 F0 F8 + * 88-8F 10 Not legal immediately after start bytes E0 F0 + * 90-9F 11 Not legal immediately after start byte E0 + * A0-BF 12 + * C0,C1 1 + * C2-DF 2 + * E0 13 + * E1-EF 3 + * F0 14 + * F1-F7 4 + * F8 15 + * F9-FB 5 + * FC 16 + * FD 6 + * FE 17 (or 1 on 32-bit machines, since it overflows) + * FF 1 + */ + +EXTCONST U8 PL_extended_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 7, 7, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10,10, /*80-8F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*90-9F*/ + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*A0-AF*/ + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 13, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /*E0-EF*/ + 14, 4, 4, 4, 4, 4, 4, 4,15, 5, 5, 5,16, 6, /*F0-FD*/ +# ifdef UV_IS_QUAD + 17, /*FE*/ +# else + 1, /*FE*/ +# endif + 1, /*FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Any three continuation bytes left. + * N4 Any four continuation bytes left. + * N5 Any five continuation bytes left. + * N6 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to N1 + * N7 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * the other continuations transition to N2 + * N8 Start byte is F8. Continuation bytes 80-87 are illegal (overlong); + * the other continuations transition to N3 + * N9 Start byte is FC. Continuation bytes 80-83 are illegal (overlong); + * the other continuations transition to N4 + * N10 Start byte is FE. Continuation bytes 80-81 are illegal (overlong); + * the other continuations transition to N5 + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# define NUM_CLASSES 18 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) +# define N8 ((N7) + NUM_CLASSES) +# define N9 ((N8) + NUM_CLASSES) +# define N10 ((N9) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 */ +/*N0*/ 0, 1,N1,N2,N3,N4,N5, 1, 1, 1, 1, 1, 1,N6,N7,N8,N9,N10, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, +/*N2*/ 1, 1, 1, 1, 1, 1, 1,N1,N1,N1,N1,N1,N1, 1, 1, 1, 1, 1, +/*N3*/ 1, 1, 1, 1, 1, 1, 1,N2,N2,N2,N2,N2,N2, 1, 1, 1, 1, 1, +/*N4*/ 1, 1, 1, 1, 1, 1, 1,N3,N3,N3,N3,N3,N3, 1, 1, 1, 1, 1, +/*N5*/ 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4,N4,N4, 1, 1, 1, 1, 1, + +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N1, 1, 1, 1, 1, 1, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N2,N2, 1, 1, 1, 1, 1, +/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N3,N3,N3, 1, 1, 1, 1, 1, +/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4, 1, 1, 1, 1, 1, +/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1,N5,N5,N5,N5,N5, 1, 1, 1, 1, 1, +}; + +/* And below is a version of the above table that accepts only strict UTF-8. + * Hence no surrogates nor non-characters, nor non-Unicode. Thus, if the input + * passes this dfa, it will be for a well-formed, non-problematic code point + * that can be returned immediately. + * + * The "Implementation details" portion of + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ shows how + * the first portion of the table maps each possible byte into a character + * class. And that the classes for those bytes which are start bytes have been + * carefully chosen so they serve as well to be used as a shift value to mask + * off the leading 1 bits of the start byte. Unfortunately the addition of + * being able to distinguish non-characters makes this not fully work. This is + * because, now, the start bytes E1-EF have to be broken into 3 classes instead + * of 2: + * 1) ED because it could be a surrogate + * 2) EF because it could be a non-character + * 3) the rest, which can never evaluate to a problematic code point. + * + * Each of E1-EF has three leading 1 bits, then a 0. That means we could use a + * shift (and hence class number) of either 3 or 4 to get a mask that works. + * But that only allows two categories, and we need three. khw made the + * decision to therefore treat the ED start byte as an error, so that the dfa + * drops out immediately for that. In the dfa, classes 3 and 4 are used to + * distinguish EF vs the rest. Then special code is used to deal with ED, + * that's executed only when the dfa drops out. The code points started by ED + * are half surrogates, and half hangul syllables. This means that 2048 of the + * the hangul syllables (about 18%) take longer than all other non-problematic + * code points to handle. + * + * The changes to handle non-characters requires the addition of states and + * classes to the dfa. (See the section on "Mapping bytes to character + * classes" in the linked-to document for further explanation of the original + * dfa.) + * + * The classes are + * 00-7F 0 + * 80-8E 9 + * 8F 10 + * 90-9E 11 + * 9F 12 + * A0-AE 13 + * AF 14 + * B0-B6 15 + * B7 16 + * B8-BD 15 + * BE 17 + * BF 18 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 1 + * EE 3 + * EF 4 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_strict_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,10, /*80-8F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12, /*90-9F*/ + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, /*A0-AF*/ + 15,15,15,15,15,15,15,16,15,15,15,15,15,15,17,18, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 4, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N4 Start byte is EF. Continuation byte B7 transitions to N8; BF to N9; + * the other continuations transitions to N1 + * N5 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * [9AB]F transition to N10; the other continuations to N2. + * N6 Start byte is F[123]. Continuation bytes [89AB]F transition + * to N10; the other continuations to N2. + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); 8F transitions to N10; the other continuations to N2 + * N8 Initial sequence is EF B7. Continuation bytes 90-AF are illegal + * (non-characters); the other continuations transition to N0. + * N9 Initial sequence is EF BF. Continuation bytes BE and BF are illegal + * (non-characters); the other continuations transition to N0. + * N10 Initial sequence is one of: F0 [9-B]F; F[123] [8-B]F; or F4 8F. + * Continuation byte BF transitions to N11; the other continuations to + * N1 + * N11 Initial sequence is the two bytes given in N10 followed by BF. + * Continuation bytes BE and BF are illegal (non-characters); the other + * continuations transition to N0. + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef N8 +# undef N9 +# undef NUM_CLASSES +# define NUM_CLASSES 19 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) +# define N8 ((N7) + NUM_CLASSES) +# define N9 ((N8) + NUM_CLASSES) +# define N10 ((N9) + NUM_CLASSES) +# define N11 ((N10) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 */ +/*N0*/ 0, 1, N1, N2, N4, N7, N6, N3, N5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1, N1, + +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N8, N1, N9, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2, N2, N2,N10, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2,N10, N2, N2, N2,N10, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, 1, 1, 1, 1, 1, 1, 1, 1, +/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1,N11, +/*N11*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +}; + +/* And below is yet another version of the above tables that accepts only UTF-8 + * as defined by Corregidum #9. Hence no surrogates nor non-Unicode, but + * it allows non-characters. This is isomorphic to the original table + * in https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * + * The classes are + * 00-7F 0 + * 80-8F 9 + * 90-9F 10 + * A0-BF 11 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 4 + * EE-EF 3 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_c9_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /*80-8F*/ + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /*90-9F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*A0-AF*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Any three continuation bytes left. + * N4 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N5 Start byte is ED. Continuation bytes A0-BF all lead to surrogates, + * so are illegal. The other continuations transition to state N1. + * N6 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * the other continuations transition to N2 + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); the other continuations transition to N2 + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef NUM_CLASSES +# define NUM_CLASSES 12 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 */ +/*N0*/ 0, 1, N1, N2, N5, N7, N3, N4, N6, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, N2, + +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, 1, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, 1, 1, +}; + +# else /* End of is DOINIT */ + +EXTCONST U8 PL_extended_utf8_dfa_tab[]; +EXTCONST U8 PL_strict_utf8_dfa_tab[]; +EXTCONST U8 PL_c9_utf8_dfa_tab[]; + +# endif +#endif /* end of isn't EBCDIC */ #ifndef PERL_NO_INLINE_FUNCTIONS /* Static inline funcs that depend on includes and declarations above. @@ -5535,11 +6262,19 @@ typedef struct am_table_short AMTS; # define KEYWORD_PLUGIN_MUTEX_LOCK MUTEX_LOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_TERM MUTEX_DESTROY(&PL_keyword_plugin_mutex) +# define USER_PROP_MUTEX_INIT MUTEX_INIT(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_LOCK MUTEX_LOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_TERM MUTEX_DESTROY(&PL_user_prop_mutex) #else # define KEYWORD_PLUGIN_MUTEX_INIT NOOP # define KEYWORD_PLUGIN_MUTEX_LOCK NOOP # define KEYWORD_PLUGIN_MUTEX_UNLOCK NOOP # define KEYWORD_PLUGIN_MUTEX_TERM NOOP +# define USER_PROP_MUTEX_INIT NOOP +# define USER_PROP_MUTEX_LOCK NOOP +# define USER_PROP_MUTEX_UNLOCK NOOP +# define USER_PROP_MUTEX_TERM NOOP #endif #ifdef USE_LOCALE /* These locale things are all subject to change */ @@ -5557,6 +6292,27 @@ typedef struct am_table_short AMTS; # define IN_SOME_LOCALE_FORM_COMPILETIME \ cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) +/* +=head1 Locale-related functions and macros + +=for apidoc Amn|bool|IN_LOCALE + +Evaluates to TRUE if the plain locale pragma without a parameter (S>) is in effect. + +=for apidoc Amn|bool|IN_LOCALE_COMPILETIME + +Evaluates to TRUE if, when compiling a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=for apidoc Amn|bool|IN_LOCALE_RUNTIME + +Evaluates to TRUE if, when executing a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=cut +*/ + # define IN_LOCALE \ (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) # define IN_SOME_LOCALE_FORM \ @@ -5583,7 +6339,7 @@ typedef struct am_table_short AMTS; # if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE) /* This internal macro should be called from places that operate under - * locale rules. It there is a problem with the current locale that + * locale rules. If there is a problem with the current locale that * hasn't been raised yet, it will output a warning this time. Because * this will so rarely be true, there is no point to optimize for time; * instead it makes sense to minimize space used and do all the work in @@ -5616,7 +6372,7 @@ typedef struct am_table_short AMTS; # define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \ STMT_START { /* Check if to warn before doing the conversion work */\ if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \ - UV cp = utf8_to_uvchr_buf((U8 *) s, (U8 *) send, NULL); \ + UV cp = utf8_to_uvchr_buf((U8 *) (s), (U8 *) (send), NULL); \ Perl_warner(aTHX_ packWARN(WARN_LOCALE), \ "Wide character (U+%" UVXf ") in %s", \ (cp == 0) \ @@ -5641,16 +6397,20 @@ typedef struct am_table_short AMTS; # define IN_LC_COMPILETIME(category) 0 # define IN_LC_RUNTIME(category) 0 # define IN_LC(category) 0 +# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c) #endif + /* Locale/thread synchronization macros. These aren't needed if using - * thread-safe locale operations */ + * thread-safe locale operations, except if something is broken */ #if defined(USE_LOCALE) \ && defined(USE_ITHREADS) \ - && ! defined(USE_THREAD_SAFE_LOCALE) + && (! defined(USE_THREAD_SAFE_LOCALE) || defined(TS_W32_BROKEN_LOCALECONV)) /* We have a locale object holding the 'C' locale for Posix 2008 */ -#ifndef USE_POSIX_2008_LOCALE +# ifndef USE_POSIX_2008_LOCALE # define _LOCALE_TERM_POSIX_2008 NOOP # else # define _LOCALE_TERM_POSIX_2008 \ @@ -5664,32 +6424,83 @@ typedef struct am_table_short AMTS; } \ } STMT_END # endif -# define LOCALE_INIT STMT_START { \ + +/* This is used as a generic lock for locale operations. For example this is + * used when calling nl_langinfo() so that another thread won't zap the + * contents of its buffer before it gets saved; and it's called when changing + * the locale of LC_MESSAGES. On some systems the latter can cause the + * nl_langinfo buffer to be zapped under a race condition. + * + * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock + * should be contained entirely within the locked portion of LC_NUMERIC. This + * mutex should be used only in very short sections of code, while + * LC_NUMERIC_LOCK may span more operations. By always following this + * convention, deadlock should be impossible. But if necessary, the two + * mutexes could be combined. + * + * Actually, the two macros just below with the '_V' suffixes are used in just + * a few places where there is a broken localeconv(), but otherwise things are + * thread safe, and hence don't need locking. Just below LOCALE_LOCK and + * LOCALE_UNLOCK are defined in terms of these for use everywhere else */ +# define LOCALE_LOCK_V \ + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: locking locale\n", __FILE__, __LINE__)); \ + MUTEX_LOCK(&PL_locale_mutex); \ + } STMT_END +# define LOCALE_UNLOCK_V \ + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \ + MUTEX_UNLOCK(&PL_locale_mutex); \ + } STMT_END + +/* On windows, we just need the mutex for LOCALE_LOCK */ +# ifdef TS_W32_BROKEN_LOCALECONV +# define LOCALE_LOCK NOOP +# define LOCALE_UNLOCK NOOP +# define LOCALE_INIT MUTEX_INIT(&PL_locale_mutex); +# define LOCALE_TERM MUTEX_DESTROY(&PL_locale_mutex) +# define LC_NUMERIC_LOCK(cond) +# define LC_NUMERIC_UNLOCK +# else +# define LOCALE_LOCK LOCALE_LOCK_V +# define LOCALE_UNLOCK LOCALE_UNLOCK_V + + /* We also need to lock LC_NUMERIC for non-windows (hence Posix 2008) + * systems */ +# define LOCALE_INIT STMT_START { \ MUTEX_INIT(&PL_locale_mutex); \ MUTEX_INIT(&PL_lc_numeric_mutex); \ } STMT_END -/* This mutex is used to create critical sections where we want the LC_NUMERIC - * locale to be locked into either the C (standard) locale, or the underlying - * locale, so that other threads interrupting this one don't change it to the - * wrong state before we've had a chance to complete our operation. It can - * stay locked over an entire printf operation, for example. And so is made - * distinct from the LOCALE_LOCK mutex. - * - * This simulates kind of a general semaphore. The current thread will lock - * the mutex if the per-thread variable is zero, and then increments that - * variable. Each corresponding UNLOCK decrements the variable until it is 0, - * at which point it actually unlocks the mutex. Since the variable is - * per-thread, there is no race with other threads. - * - * The single argument is a condition to test for, and if true, to panic, as - * this would be an attempt to complement the LC_NUMERIC state, and we're not - * supposed to because it's locked. - * - * Clang improperly gives warnings for this, if not silenced: - * https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#conditional-locks - * */ -# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ +# define LOCALE_TERM STMT_START { \ + MUTEX_DESTROY(&PL_locale_mutex); \ + MUTEX_DESTROY(&PL_lc_numeric_mutex); \ + _LOCALE_TERM_POSIX_2008; \ + } STMT_END + + /* This mutex is used to create critical sections where we want the + * LC_NUMERIC locale to be locked into either the C (standard) locale, or + * the underlying locale, so that other threads interrupting this one don't + * change it to the wrong state before we've had a chance to complete our + * operation. It can stay locked over an entire printf operation, for + * example. And so is made distinct from the LOCALE_LOCK mutex. + * + * This simulates kind of a general semaphore. The current thread will + * lock the mutex if the per-thread variable is zero, and then increments + * that variable. Each corresponding UNLOCK decrements the variable until + * it is 0, at which point it actually unlocks the mutex. Since the + * variable is per-thread, there is no race with other threads. + * + * The single argument is a condition to test for, and if true, to panic, + * as this would be an attempt to complement the LC_NUMERIC state, and + * we're not supposed to because it's locked. + * + * Clang improperly gives warnings for this, if not silenced: + * https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#conditional-locks + * */ +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ CLANG_DIAG_IGNORE(-Wthread-safety) \ STMT_START { \ if (PL_lc_numeric_mutex_depth <= 0) { \ @@ -5702,7 +6513,7 @@ typedef struct am_table_short AMTS; else { \ PL_lc_numeric_mutex_depth++; \ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: avoided lc_numeric_lock; depth=%d\n", \ + "%s: %d: avoided lc_numeric_lock; new depth=%d\n", \ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ if (cond_to_panic_if_already_locked) { \ Perl_croak_nocontext("panic: %s: %d: Trying to change" \ @@ -5712,7 +6523,7 @@ typedef struct am_table_short AMTS; } \ } STMT_END -# define LC_NUMERIC_UNLOCK \ +# define LC_NUMERIC_UNLOCK \ STMT_START { \ if (PL_lc_numeric_mutex_depth <= 1) { \ MUTEX_UNLOCK(&PL_lc_numeric_mutex); \ @@ -5724,47 +6535,19 @@ typedef struct am_table_short AMTS; else { \ PL_lc_numeric_mutex_depth--; \ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: avoided lc_numeric_unlock; depth=%d\n", \ + "%s: %d: avoided lc_numeric_unlock; new depth=%d\n",\ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ } \ } STMT_END \ CLANG_DIAG_RESTORE -/* This is used as a generic lock for locale operations. For example this is - * used when calling nl_langinfo() so that another thread won't zap the - * contents of its buffer before it gets saved; and it's called when changing - * the locale of LC_MESSAGES. On some systems the latter can cause the - * nl_langinfo buffer to be zapped under a race condition. - * - * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock - * should be contained entirely within the locked portion of LC_NUMERIC. This - * mutex should be used only in very short sections of code, while - * LC_NUMERIC_LOCK may span more operations. By always following this - * convention, deadlock should be impossible. But if necessary, the two - * mutexes could be combined */ -# define LOCALE_LOCK \ - STMT_START { \ - DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: locking locale\n", __FILE__, __LINE__)); \ - MUTEX_LOCK(&PL_locale_mutex); \ - } STMT_END -# define LOCALE_UNLOCK \ - STMT_START { \ - DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \ - MUTEX_UNLOCK(&PL_locale_mutex); \ - } STMT_END - -# define LOCALE_TERM \ - STMT_START { \ - MUTEX_DESTROY(&PL_locale_mutex); \ - MUTEX_DESTROY(&PL_lc_numeric_mutex); \ - _LOCALE_TERM_POSIX_2008; \ - } STMT_END +# endif /* End of needs locking LC_NUMERIC */ #else /* Below is no locale sync needed */ # define LOCALE_INIT # define LOCALE_LOCK +# define LOCALE_LOCK_V # define LOCALE_UNLOCK +# define LOCALE_UNLOCK_V # define LC_NUMERIC_LOCK(cond) # define LC_NUMERIC_UNLOCK # define LOCALE_TERM @@ -5773,7 +6556,10 @@ typedef struct am_table_short AMTS; #ifdef USE_LOCALE_NUMERIC /* These macros are for toggling between the underlying locale (UNDERLYING or - * LOCAL) and the C locale (STANDARD). + * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C + * locale if the underlying locale is indistinguishable from it in the numeric + * operations used by Perl, namely the decimal point, and even the thousands + * separator.) =head1 Locale-related functions and macros @@ -5817,10 +6603,11 @@ close by, and guaranteed to be called. =for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED -This is used to help wrap XS or C code that that is C locale-aware. -This locale category is generally kept set to the C locale by Perl for -backwards compatibility, and because most XS code that reads floating point -values can cope only with the decimal radix character being a dot. +This is used to help wrap XS or C code that is C locale-aware. +This locale category is generally kept set to a locale where the decimal radix +character is a dot, and the separator between groups of digits is empty. This +is because most XS code that reads floating point numbers is expecting them to +have this syntax. This macro makes sure the current C state is set properly, to be aware of locale if the call to the XS or C code from the Perl program is @@ -5847,7 +6634,15 @@ argument list, like this: On threaded perls not operating with thread-safe functionality, this macro uses a mutex to force a critical section. Therefore the matching RESTORE should be -close by, and guaranteed to be called. +close by, and guaranteed to be called; see L +for a more contained way to ensure that. + +=for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. =for apidoc Am|void|RESTORE_LC_NUMERIC @@ -5868,20 +6663,48 @@ expression, but with an empty argument list, like this: ... } +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED|block + +This macro invokes the supplied statement or block within the context +of a L .. L pair +if required, so eg: + + WITH_LC_NUMERIC_SET_TO_NEEDED( + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis) + ); + +is equivalent to: + + { +#ifdef USE_LOCALE_NUMERIC + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_TO_NEEDED(); +#endif + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis); +#ifdef USE_LOCALE_NUMERIC + RESTORE_LC_NUMERIC(); +#endif + } + +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric|block + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. + =cut */ -/* The numeric locale is generally kept in the C locale instead of the - * underlying locale. The current status is known by looking at two words. - * One is non-zero if the current numeric locale is the standard C/POSIX one. - * The other is non-zero if the current locale is the underlying locale. Both - * can be non-zero if, as often happens, the underlying locale is C. - * - * Its slightly more complicated than this, as the PL_numeric_standard variable - * is set if the current numeric locale is indistinguishable from the C locale. - * This happens when the radix character is a dot, and the thousands separator - * is the empty string. +/* If the underlying numeric locale has a non-dot decimal point or has a + * non-empty floating point thousands separator, the current locale is instead + * generally kept in the C locale instead of that underlying locale. The + * current status is known by looking at two words. One is non-zero if the + * current numeric locale is the standard C/POSIX one or is indistinguishable + * from C. The other is non-zero if the current locale is the underlying + * locale. Both can be non-zero if, as often happens, the underlying locale is + * C or indistinguishable from it. * * khw believes the reason for the variables instead of the bits in a single * word is to avoid having to have masking instructions. */ @@ -5897,12 +6720,13 @@ expression, but with an empty argument list, like this: # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION \ void (*_restore_LC_NUMERIC_function)(pTHX) = NULL -# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in) \ STMT_START { \ + bool _in_lc_numeric = (in); \ LC_NUMERIC_LOCK( \ - (IN_LC(LC_NUMERIC) && _NOT_IN_NUMERIC_UNDERLYING) \ - || _NOT_IN_NUMERIC_STANDARD); \ - if (IN_LC(LC_NUMERIC)) { \ + ( ( _in_lc_numeric && _NOT_IN_NUMERIC_UNDERLYING) \ + || (! _in_lc_numeric && _NOT_IN_NUMERIC_STANDARD))); \ + if (_in_lc_numeric) { \ if (_NOT_IN_NUMERIC_UNDERLYING) { \ Perl_set_numeric_underlying(aTHX); \ _restore_LC_NUMERIC_function \ @@ -5918,6 +6742,9 @@ expression, but with an empty argument list, like this: } \ } STMT_END +# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC)) + # define RESTORE_LC_NUMERIC() \ STMT_START { \ if (_restore_LC_NUMERIC_function) { \ @@ -5992,6 +6819,17 @@ expression, but with an empty argument list, like this: __FILE__, __LINE__, PL_numeric_standard)); \ } STMT_END +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { \ + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric); \ + block; \ + RESTORE_LC_NUMERIC(); \ + } STMT_END; + +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + WITH_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC), block) + #else /* !USE_LOCALE_NUMERIC */ # define SET_NUMERIC_STANDARD() @@ -6000,25 +6838,53 @@ expression, but with an empty argument list, like this: # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION # define STORE_LC_NUMERIC_SET_STANDARD() # define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric) # define STORE_LC_NUMERIC_SET_TO_NEEDED() # define RESTORE_LC_NUMERIC() # define LOCK_LC_NUMERIC_STANDARD() # define UNLOCK_LC_NUMERIC_STANDARD() +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { block; } STMT_END +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + STMT_START { block; } STMT_END #endif /* !USE_LOCALE_NUMERIC */ #define Atof my_atof -#ifdef USE_QUADMATH -# define Perl_strtod(s, e) strtoflt128(s, e) -#elif defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) -# if defined(HAS_STRTOLD) -# define Perl_strtod(s, e) strtold(s, e) -# elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) (NV)strtod(s, e) /* Unavoidable loss. */ -# endif -#elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) strtod(s, e) +/* + +=head1 Numeric functions + +=for apidoc AmTR|NV|Strtod|NN const char * const s|NULLOK char ** e + +This is a synonym for L. + +=for apidoc AmTR|NV|Strtol|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=for apidoc AmTR|NV|Strtoul|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=cut + +*/ + +#define Strtod my_strtod + +#if defined(HAS_STRTOD) \ + || defined(USE_QUADMATH) \ + || (defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ + && defined(USE_LONG_DOUBLE)) +# define Perl_strtod Strtod #endif #if !defined(Strtol) && defined(USE_64_BIT_INT) && defined(IV_IS_QUAD) && \ @@ -6081,6 +6947,14 @@ expression, but with an empty argument list, like this: # define Atoul(s) Strtoul(s, NULL, 10) #endif +#define grok_bin(s,lp,fp,rp) \ + grok_bin_oct_hex(s, lp, fp, rp, 1, _CC_BINDIGIT, 'b') +#define grok_oct(s,lp,fp,rp) \ + (*(fp) |= PERL_SCAN_DISALLOW_PREFIX, \ + grok_bin_oct_hex(s, lp, fp, rp, 3, _CC_OCTDIGIT, '\0')) +#define grok_hex(s,lp,fp,rp) \ + grok_bin_oct_hex(s, lp, fp, rp, 4, _CC_XDIGIT, 'x') + #ifndef PERL_SCRIPT_MODE #define PERL_SCRIPT_MODE "r" #endif @@ -6259,7 +7133,7 @@ expression, but with an empty argument list, like this: #endif #if defined(HAS_FLOCK) && !defined(HAS_FLOCK_PROTO) -int flock(int fd, int op); +EXTERN_C int flock(int fd, int op); #endif #ifndef O_RDONLY @@ -6313,19 +7187,50 @@ int flock(int fd, int op); #define IS_NUMBER_NAN 0x20 /* this is not */ #define IS_NUMBER_TRAILING 0x40 /* number has trailing trash */ +/* +=head1 Numeric functions + +=for apidoc AmdR|bool|GROK_NUMERIC_RADIX|NN const char **sp|NN const char *send + +A synonym for L + +=cut +*/ #define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) -/* Input flags: */ +/* Number scan flags. All are used for input, the ones used for output are so + * marked */ #define PERL_SCAN_ALLOW_UNDERSCORES 0x01 /* grok_??? accept _ in numbers */ #define PERL_SCAN_DISALLOW_PREFIX 0x02 /* grok_??? reject 0x in hex etc */ -#define PERL_SCAN_SILENT_ILLDIGIT 0x04 /* grok_??? not warn about illegal digits */ -#define PERL_SCAN_SILENT_NON_PORTABLE 0x08 /* grok_??? not warn about very large - numbers which are <= UV_MAX */ + +/* grok_??? input: ignored; output: found overflow */ +#define PERL_SCAN_GREATER_THAN_UV_MAX 0x04 + +/* grok_??? don't warn about illegal digits. To preserve total backcompat, + * this isn't set on output if one is found. Instead, see + * PERL_SCAN_NOTIFY_ILLDIGIT. */ +#define PERL_SCAN_SILENT_ILLDIGIT 0x08 + #define PERL_SCAN_TRAILING 0x10 /* grok_number_flags() allow trailing and set IS_NUMBER_TRAILING */ -/* Output flags: */ -#define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 /* should this merge with above? */ +/* These are considered experimental, so not exposed publicly */ +#if defined(PERL_CORE) || defined(PERL_EXT) +/* grok_??? don't warn about very large numbers which are <= UV_MAX; + * output: found such a number */ +# define PERL_SCAN_SILENT_NON_PORTABLE 0x20 + +/* If this is set on input, and no illegal digit is found, it will be cleared + * on output; otherwise unchanged */ +# define PERL_SCAN_NOTIFY_ILLDIGIT 0x40 + +/* Don't warn on overflow; output flag still set */ +# define PERL_SCAN_SILENT_OVERFLOW 0x80 + +/* Forbid a leading underscore, which the other one doesn't */ +# define PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES (0x100|PERL_SCAN_ALLOW_UNDERSCORES) +#endif + /* to let user control profiling */ #ifdef PERL_GPROF_CONTROL @@ -6335,12 +7240,8 @@ extern void moncontrol(int); #define PERL_GPROF_MONCONTROL(x) #endif -#ifdef UNDER_CE -#include "wince.h" -#endif - /* ISO 6429 NEL - C1 control NExt Line */ -/* See http://www.unicode.org/unicode/reports/tr13/ */ +/* See https://www.unicode.org/unicode/reports/tr13/ */ #define NEXT_LINE_CHAR NEXT_LINE_NATIVE #ifndef PIPESOCK_MODE @@ -6397,8 +7298,20 @@ extern void moncontrol(int); #define PERL_SIGNALS_UNSAFE_FLAG 0x0001 -/* Use instead of abs() since abs() forces its argument to be an int, - * but also beware since this evaluates its argument twice, so no x++. */ +/* +=head1 Numeric functions + +=for apidoc Am|int|PERL_ABS|int + +Typeless C or C, I. (The usage below indicates it is for +integers, but it works for any type.) Use instead of these, since the C +library ones force their argument to be what it is expecting, potentially +leading to disaster. But also beware that this evaluates its argument twice, +so no C. + +=cut +*/ + #define PERL_ABS(x) ((x) < 0 ? -(x) : (x)) #if defined(__DECC) && defined(__osf__) @@ -6416,9 +7329,19 @@ extern void moncontrol(int); # define do_aexec(really, mark,sp) do_aexec5(really, mark, sp, 0, 0) #endif -/* check embedded \0 characters in pathnames passed to syscalls, - but allow one ending \0 */ -#define IS_SAFE_SYSCALL(p, len, what, op_name) (S_is_safe_syscall(aTHX_ (p), (len), (what), (op_name))) + +/* +=head1 Miscellaneous Functions + +=for apidoc Am|bool|IS_SAFE_SYSCALL|NN const char *pv|STRLEN len|NN const char *what|NN const char *op_name + +Same as L. + +=cut + +Allows one ending \0 +*/ +#define IS_SAFE_SYSCALL(p, len, what, op_name) (Perl_is_safe_syscall(aTHX_ (p), (len), (what), (op_name))) #define IS_SAFE_PATHNAME(p, len, op_name) IS_SAFE_SYSCALL((p), (len), "pathname", (op_name)) @@ -6657,6 +7580,8 @@ extern void moncontrol(int); #ifdef DOUBLE_HAS_NAN +START_EXTERN_C + #ifdef DOINIT /* PL_inf and PL_nan initialization. @@ -6682,7 +7607,9 @@ extern void moncontrol(int); */ /* The quadmath literals are anon structs which -Wc++-compat doesn't like. */ +# ifndef USE_CPLUSPLUS GCC_DIAG_IGNORE_DECL(-Wc++-compat); +# endif # ifdef USE_QUADMATH /* Cannot use HUGE_VALQ for PL_inf because not a compile-time @@ -6752,7 +7679,9 @@ INFNAN_NV_U8_DECL PL_nan = { 0.0/0.0 }; /* keep last */ # endif # endif +# ifndef USE_CPLUSPLUS GCC_DIAG_RESTORE_DECL; +# endif #else @@ -6761,6 +7690,8 @@ INFNAN_NV_U8_DECL PL_nan; #endif +END_EXTERN_C + /* If you have not defined NV_INF/NV_NAN (like for example win32/win32.h), * we will define NV_INF/NV_NAN as the nv part of the global const * PL_inf/PL_nan. Note, however, that the preexisting NV_INF/NV_NAN