X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8c49c7c16863f886a6a4da3b585463cd16a8b976..7d769928d688d1662c7e4bda7038ebdc70c42bad:/perl.h diff --git a/perl.h b/perl.h index 4d48b04..05dbe0e 100644 --- a/perl.h +++ b/perl.h @@ -52,7 +52,7 @@ /* See L for detailed notes on * PERL_IMPLICIT_CONTEXT and PERL_IMPLICIT_SYS */ -/* Note that from here --> to <-- the same logic is +/* XXX NOTE that from here --> to <-- the same logic is * repeated in makedef.pl, so be certain to update * both places when editing. */ @@ -108,7 +108,8 @@ #endif /* Use the reentrant APIs like localtime_r and getpwent_r */ -/* Win32 has naturally threadsafe libraries, no need to use any _r variants. */ +/* Win32 has naturally threadsafe libraries, no need to use any _r variants. + * XXX KEEP makedef.pl copy of this code in sync */ #if defined(USE_ITHREADS) && !defined(USE_REENTRANT_API) && !defined(NETWARE) && !defined(WIN32) # define USE_REENTRANT_API #endif @@ -202,6 +203,12 @@ # undef PERL_TRACK_MEMPOOL #endif +#ifdef DEBUGGING +# define dTHX_DEBUGGING dTHX +#else +# define dTHX_DEBUGGING dNOOP +#endif + #define STATIC static #ifndef PERL_CORE @@ -239,7 +246,7 @@ Perl_pregfree(aTHX_ (prog)) #define CALLREGFREE_PVT(prog) \ - if(prog) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) + if(prog && RX_ENGINE(prog)) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) #define CALLREG_NUMBUF_FETCH(rx,paren,usesv) \ RX_ENGINE(rx)->numbered_buff_FETCH(aTHX_ (rx),(paren),(usesv)) @@ -391,26 +398,24 @@ /* on gcc (and clang), specify that a warning should be temporarily * ignored; e.g. * - * GCC_DIAG_IGNORE(-Wmultichar); + * GCC_DIAG_IGNORE_DECL(-Wmultichar); * char b = 'ab'; - * GCC_DIAG_RESTORE; + * GCC_DIAG_RESTORE_DECL; * * based on http://dbp-consulting.com/tutorials/SuppressingGCCWarnings.html * * Note that "pragma GCC diagnostic push/pop" was added in GCC 4.6, Mar 2011; * clang only pretends to be GCC 4.2, but still supports push/pop. * - * Note on usage: on non-gcc (or lookalike, like clang) compilers - * one cannot use these with a semicolon at file (global) level without - * warnings since they are defined as empty, which leads into the terminating - * semicolon being left alone on a line: - * ; - * which makes compilers mildly cranky. Therefore at file level one - * should use the GCC_DIAG_IGNORE and GCC_DIAG_RESTORE macros *without* - * the semicolons. + * Note on usage: all macros must be used at a place where a declaration + * or statement can occur, i.e., not in the middle of an expression. + * *_DIAG_IGNORE() and *_DIAG_RESTORE can be used in any such place, but + * must be used without a following semicolon. *_DIAG_IGNORE_DECL() and + * *_DIAG_RESTORE_DECL must be used with a following semicolon, and behave + * syntactically as declarations (like dNOOP). *_DIAG_IGNORE_STMT() + * and *_DIAG_RESTORE_STMT must be used with a following semicolon, + * and behave syntactically as statements (like NOOP). * - * (A dead-on-arrival solution would be to try to define the macros as - * NOOP or dNOOP, those don't work both inside functions and outside.) */ #if defined(__clang__) || defined(__clang) || \ @@ -424,6 +429,10 @@ # define GCC_DIAG_IGNORE(w) # define GCC_DIAG_RESTORE #endif +#define GCC_DIAG_IGNORE_DECL(x) GCC_DIAG_IGNORE(x) dNOOP +#define GCC_DIAG_RESTORE_DECL GCC_DIAG_RESTORE dNOOP +#define GCC_DIAG_IGNORE_STMT(x) GCC_DIAG_IGNORE(x) NOOP +#define GCC_DIAG_RESTORE_STMT GCC_DIAG_RESTORE NOOP /* for clang specific pragmas */ #if defined(__clang__) || defined(__clang) # define CLANG_DIAG_PRAGMA(x) _Pragma (#x) @@ -434,18 +443,26 @@ # define CLANG_DIAG_IGNORE(w) # define CLANG_DIAG_RESTORE #endif +#define CLANG_DIAG_IGNORE_DECL(x) CLANG_DIAG_IGNORE(x) dNOOP +#define CLANG_DIAG_RESTORE_DECL CLANG_DIAG_RESTORE dNOOP +#define CLANG_DIAG_IGNORE_STMT(x) CLANG_DIAG_IGNORE(x) NOOP +#define CLANG_DIAG_RESTORE_STMT CLANG_DIAG_RESTORE NOOP -#define NOOP /*EMPTY*/(void)0 -/* cea2e8a9dd23747f accidentally lost the comment originally from the first - check in of thread.h, explaining why we need dNOOP at all: */ -/* Rats: if dTHR is just blank then the subsequent ";" throws an error */ -/* Declaring a *function*, instead of a variable, ensures that we don't rely - on being able to suppress "unused" warnings. */ -#ifdef __cplusplus -#define dNOOP (void)0 +#if defined(_MSC_VER) && (_MSC_VER >= 1300) +# define MSVC_DIAG_IGNORE(x) __pragma(warning(push)) \ + __pragma(warning(disable : x)) +# define MSVC_DIAG_RESTORE __pragma(warning(pop)) #else -#define dNOOP extern int Perl___notused(void) +# define MSVC_DIAG_IGNORE(x) +# define MSVC_DIAG_RESTORE #endif +#define MSVC_DIAG_IGNORE_DECL(x) MSVC_DIAG_IGNORE(x) dNOOP +#define MSVC_DIAG_RESTORE_DECL MSVC_DIAG_RESTORE dNOOP +#define MSVC_DIAG_IGNORE_STMT(x) MSVC_DIAG_IGNORE(x) NOOP +#define MSVC_DIAG_RESTORE_STMT MSVC_DIAG_RESTORE NOOP + +#define NOOP /*EMPTY*/(void)0 +#define dNOOP struct Perl___notused_struct #ifndef pTHX /* Don't bother defining tTHX ; using it outside @@ -516,12 +533,25 @@ #endif /* - * STMT_START { statements; } STMT_END; - * can be used as a single statement, as in - * if (x) STMT_START { ... } STMT_END; else ... - * - * Trying to select a version that gives no warnings... - */ +=head1 Miscellaneous Functions + +=for apidoc AmnUu|void|STMT_START + + STMT_START { statements; } STMT_END; + +can be used as a single statement, as in + + if (x) STMT_START { ... } STMT_END; else ... + +These are often used in macro definitions. Note that you can't return a value +out of them. + +=for apidoc AmnUhu|void|STMT_END + +=cut + + Trying to select a version that gives no warnings... +*/ #if !(defined(STMT_START) && defined(STMT_END)) # ifdef PERL_USE_GCC_BRACE_GROUPS # define STMT_START (void)( /* gcc supports "({ STATEMENTS; })" */ @@ -606,16 +636,24 @@ # define TAINT_WARN_get 0 # define TAINT_WARN_set(s) NOOP #else + /* Set to tainted if we are running under tainting mode */ # define TAINT (PL_tainted = PL_tainting) -# define TAINT_NOT (PL_tainted = FALSE) -# define TAINT_IF(c) if (UNLIKELY(c)) { PL_tainted = PL_tainting; } + +# define TAINT_NOT (PL_tainted = FALSE) /* Untaint */ +# define TAINT_IF(c) if (UNLIKELY(c)) { TAINT; } /* Conditionally taint */ # define TAINT_ENV() if (UNLIKELY(PL_tainting)) { taint_env(); } -# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { taint_proper(NULL, s); } + /* croak or warn if tainting */ +# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { \ + taint_proper(NULL, s); \ + } # define TAINT_set(s) (PL_tainted = (s)) -# define TAINT_get (PL_tainted) -# define TAINTING_get (PL_tainting) +# define TAINT_get (PL_tainted) /* Is something tainted? */ +# define TAINTING_get (PL_tainting) /* Is taint checking enabled? */ # define TAINTING_set(s) (PL_tainting = (s)) -# define TAINT_WARN_get (PL_taint_warn) +# define TAINT_WARN_get (PL_taint_warn) /* FALSE => tainting violations + are fatal + TRUE => they're just + warnings */ # define TAINT_WARN_set(s) (PL_taint_warn = (s)) #endif @@ -679,16 +717,6 @@ #include -/* EVC 4 SDK headers includes a bad definition of MB_CUR_MAX in stdlib.h - which is included from stdarg.h. Bad definition not present in SD 2008 - SDK headers. wince.h is not yet included, so we cant fix this from there - since by then MB_CUR_MAX will be defined from stdlib.h. - cewchar.h includes a correct definition of MB_CUR_MAX and it is copied here - since cewchar.h can't be included this early */ -#if defined(UNDER_CE) && (_MSC_VER < 1300) -# define MB_CUR_MAX 1 -#endif - # include #ifdef I_STDINT @@ -715,10 +743,33 @@ # include #endif -#if !defined(NO_LOCALE) && defined(HAS_SETLOCALE) -# define USE_LOCALE +/* If not forbidden, we enable locale handling if either 1) the POSIX 2008 + * functions are available, or 2) just the setlocale() function. This logic is + * repeated in t/loc_tools.pl and makedef.pl; The three should be kept in + * sync. */ +#if ! defined(NO_LOCALE) + +# if ! defined(NO_POSIX_2008_LOCALE) \ + && defined(HAS_NEWLOCALE) \ + && defined(HAS_USELOCALE) \ + && defined(HAS_DUPLOCALE) \ + && defined(HAS_FREELOCALE) \ + && defined(LC_ALL_MASK) + + /* For simplicity, the code is written to assume that any platform advanced + * enough to have the Posix 2008 locale functions has LC_ALL. The final + * test above makes sure that assumption is valid */ + +# define HAS_POSIX_2008_LOCALE +# define USE_LOCALE +# elif defined(HAS_SETLOCALE) +# define USE_LOCALE +# endif +#endif + +#ifdef USE_LOCALE # define HAS_SKIP_LOCALE_INIT /* Solely for XS code to test for this - capability */ + #define */ # if !defined(NO_LOCALE_COLLATE) && defined(LC_COLLATE) \ && defined(HAS_STRXFRM) # define USE_LOCALE_COLLATE @@ -738,7 +789,55 @@ # if !defined(NO_LOCALE_TIME) && defined(LC_TIME) # define USE_LOCALE_TIME # endif -#endif /* !NO_LOCALE && HAS_SETLOCALE */ +# if !defined(NO_LOCALE_ADDRESS) && defined(LC_ADDRESS) +# define USE_LOCALE_ADDRESS +# endif +# if !defined(NO_LOCALE_IDENTIFICATION) && defined(LC_IDENTIFICATION) +# define USE_LOCALE_IDENTIFICATION +# endif +# if !defined(NO_LOCALE_MEASUREMENT) && defined(LC_MEASUREMENT) +# define USE_LOCALE_MEASUREMENT +# endif +# if !defined(NO_LOCALE_PAPER) && defined(LC_PAPER) +# define USE_LOCALE_PAPER +# endif +# if !defined(NO_LOCALE_TELEPHONE) && defined(LC_TELEPHONE) +# define USE_LOCALE_TELEPHONE +# endif + +/* XXX The next few defines are unfortunately duplicated in makedef.pl, and + * changes here MUST also be made there */ + +# if ! defined(HAS_SETLOCALE) && defined(HAS_POSIX_2008_LOCALE) +# define USE_POSIX_2008_LOCALE +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif + /* If compiled with + * -DUSE_THREAD_SAFE_LOCALE, will do so even + * on unthreaded builds */ +# elif (defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE)) \ + && ( defined(HAS_POSIX_2008_LOCALE) \ + || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \ + && ! defined(NO_THREAD_SAFE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif +# ifdef HAS_POSIX_2008_LOCALE +# define USE_POSIX_2008_LOCALE +# endif +# endif +#endif + +/* Microsoft documentation reads in the change log for VS 2015: + * "The localeconv function declared in locale.h now works correctly when + * per-thread locale is enabled. In previous versions of the library, this + * function would return the lconv data for the global locale, not the + * thread's locale." + */ +#if defined(WIN32) && defined(USE_THREAD_SAFE_LOCALE) && _MSC_VER < 1900 +# define TS_W32_BROKEN_LOCALECONV +#endif #include @@ -798,56 +897,124 @@ EXTERN_C int syscall(int, ...); EXTERN_C int usleep(unsigned int); #endif -#ifdef PERL_CORE +/* macros for correct constant construction. These are in C99 + * (so they will not be available in strict C89 mode), but they are nice, so + * let's define them if necessary. */ +#ifndef UINT16_C +# if INTSIZE >= 2 +# define UINT16_C(x) ((U16_TYPE)x##U) +# else +# define UINT16_C(x) ((U16_TYPE)x##UL) +# endif +#endif -/* macros for correct constant construction */ -# if INTSIZE >= 2 -# define U16_CONST(x) ((U16)x##U) -# else -# define U16_CONST(x) ((U16)x##UL) -# endif +#ifndef UINT32_C +# if INTSIZE >= 4 +# define UINT32_C(x) ((U32_TYPE)x##U) +# else +# define UINT32_C(x) ((U32_TYPE)x##UL) +# endif +#endif -# if INTSIZE >= 4 -# define U32_CONST(x) ((U32)x##U) -# else -# define U32_CONST(x) ((U32)x##UL) -# endif +#ifdef I_STDINT + typedef intmax_t PERL_INTMAX_T; + typedef uintmax_t PERL_UINTMAX_T; +#endif + +/* N.B. We use QUADKIND here instead of HAS_QUAD here, because that doesn't + * actually mean what it has always been documented to mean (see RT #119753) + * and is explicitly turned off outside of core with dire warnings about + * removing the undef. */ + +#if defined(QUADKIND) +# undef PeRl_INT64_C +# undef PeRl_UINT64_C +/* Prefer the native integer types (int and long) over long long + * (which is not C89) and Win32-specific __int64. */ +# if QUADKIND == QUAD_IS_INT && INTSIZE == 8 +# define PeRl_INT64_C(c) (c) +# define PeRl_UINT64_C(c) CAT2(c,U) +# endif +# if QUADKIND == QUAD_IS_LONG && LONGSIZE == 8 +# define PeRl_INT64_C(c) CAT2(c,L) +# define PeRl_UINT64_C(c) CAT2(c,UL) +# endif +# if QUADKIND == QUAD_IS_LONG_LONG && defined(HAS_LONG_LONG) +# define PeRl_INT64_C(c) CAT2(c,LL) +# define PeRl_UINT64_C(c) CAT2(c,ULL) +# endif +# if QUADKIND == QUAD_IS___INT64 +# define PeRl_INT64_C(c) CAT2(c,I64) +# define PeRl_UINT64_C(c) CAT2(c,UI64) +# endif +# ifndef PeRl_INT64_C +# define PeRl_INT64_C(c) ((I64)(c)) /* last resort */ +# define PeRl_UINT64_C(c) ((U64TYPE)(c)) +# endif +/* In OS X the INT64_C/UINT64_C are defined with LL/ULL, which will + * not fly with C89-pedantic gcc, so let's undefine them first so that + * we can redefine them with our native integer preferring versions. */ +# if defined(PERL_DARWIN) && defined(PERL_GCC_PEDANTIC) +# undef INT64_C +# undef UINT64_C +# endif +# ifndef INT64_C +# define INT64_C(c) PeRl_INT64_C(c) +# endif +# ifndef UINT64_C +# define UINT64_C(c) PeRl_UINT64_C(c) +# endif -# ifdef HAS_QUAD -# if INTSIZE >= 8 -# define U64_CONST(x) ((U64)x##U) -# elif LONGSIZE >= 8 -# define U64_CONST(x) ((U64)x##UL) -# elif QUADKIND == QUAD_IS_LONG_LONG -# define U64_CONST(x) ((U64)x##ULL) -# elif QUADKIND == QUAD_IS___INT64 -# define U64_CONST(x) ((U64)x##UI64) -# else /* best guess we can make */ -# define U64_CONST(x) ((U64)x##UL) +# ifndef I_STDINT + typedef I64TYPE PERL_INTMAX_T; + typedef U64TYPE PERL_UINTMAX_T; # endif -# endif +# ifndef INTMAX_C +# define INTMAX_C(c) INT64_C(c) +# endif +# ifndef UINTMAX_C +# define UINTMAX_C(c) UINT64_C(c) +# endif + +#else /* below QUADKIND is undefined */ + +/* Perl doesn't work on 16 bit systems, so must be 32 bit */ +# ifndef I_STDINT + typedef I32TYPE PERL_INTMAX_T; + typedef U32TYPE PERL_UINTMAX_T; +# endif +# ifndef INTMAX_C +# define INTMAX_C(c) INT32_C(c) +# endif +# ifndef UINTMAX_C +# define UINTMAX_C(c) UINT32_C(c) +# endif + +#endif /* no QUADKIND */ + +#ifdef PERL_CORE /* byte-swapping functions for big-/little-endian conversion */ # define _swab_16_(x) ((U16)( \ - (((U16)(x) & U16_CONST(0x00ff)) << 8) | \ - (((U16)(x) & U16_CONST(0xff00)) >> 8) )) + (((U16)(x) & UINT16_C(0x00ff)) << 8) | \ + (((U16)(x) & UINT16_C(0xff00)) >> 8) )) # define _swab_32_(x) ((U32)( \ - (((U32)(x) & U32_CONST(0x000000ff)) << 24) | \ - (((U32)(x) & U32_CONST(0x0000ff00)) << 8) | \ - (((U32)(x) & U32_CONST(0x00ff0000)) >> 8) | \ - (((U32)(x) & U32_CONST(0xff000000)) >> 24) )) + (((U32)(x) & UINT32_C(0x000000ff)) << 24) | \ + (((U32)(x) & UINT32_C(0x0000ff00)) << 8) | \ + (((U32)(x) & UINT32_C(0x00ff0000)) >> 8) | \ + (((U32)(x) & UINT32_C(0xff000000)) >> 24) )) # ifdef HAS_QUAD # define _swab_64_(x) ((U64)( \ - (((U64)(x) & U64_CONST(0x00000000000000ff)) << 56) | \ - (((U64)(x) & U64_CONST(0x000000000000ff00)) << 40) | \ - (((U64)(x) & U64_CONST(0x0000000000ff0000)) << 24) | \ - (((U64)(x) & U64_CONST(0x00000000ff000000)) << 8) | \ - (((U64)(x) & U64_CONST(0x000000ff00000000)) >> 8) | \ - (((U64)(x) & U64_CONST(0x0000ff0000000000)) >> 24) | \ - (((U64)(x) & U64_CONST(0x00ff000000000000)) >> 40) | \ - (((U64)(x) & U64_CONST(0xff00000000000000)) >> 56) )) + (((U64)(x) & UINT64_C(0x00000000000000ff)) << 56) | \ + (((U64)(x) & UINT64_C(0x000000000000ff00)) << 40) | \ + (((U64)(x) & UINT64_C(0x0000000000ff0000)) << 24) | \ + (((U64)(x) & UINT64_C(0x00000000ff000000)) << 8) | \ + (((U64)(x) & UINT64_C(0x000000ff00000000)) >> 8) | \ + (((U64)(x) & UINT64_C(0x0000ff0000000000)) >> 24) | \ + (((U64)(x) & UINT64_C(0x00ff000000000000)) >> 40) | \ + (((U64)(x) & UINT64_C(0xff00000000000000)) >> 56) )) # endif /* The old value was hard coded at 1008. (4096-16) seems to be a bit faster, @@ -916,14 +1083,30 @@ EXTERN_C int usleep(unsigned int); # define PERL_STRLEN_EXPAND_SHIFT 2 #endif -#include -#define STRUCT_OFFSET(s,m) offsetof(s,m) +/* This use of offsetof() requires /Zc:offsetof- for VS2017 (and presumably + * onwards) when building Socket.xs, but we can just use a different definition + * for STRUCT_OFFSET instead. */ +#if defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1910 +# define STRUCT_OFFSET(s,m) (Size_t)(&(((s *)0)->m)) +#else +# include +# define STRUCT_OFFSET(s,m) offsetof(s,m) +#endif -/* ptrdiff_t is C11, so undef it under pedantic builds */ +/* ptrdiff_t is C11, so undef it under pedantic builds. (Actually it is + * in C89, but apparently there are platforms where it doesn't exist. See + * thread beginning at http://nntp.perl.org/group/perl.perl5.porters/251541.) + * */ #ifdef PERL_GCC_PEDANTIC # undef HAS_PTRDIFF_T #endif +#ifdef HAS_PTRDIFF_T +# define Ptrdiff_t ptrdiff_t +#else +# define Ptrdiff_t SSize_t +#endif + #ifndef __SYMBIAN32__ # include #endif @@ -948,7 +1131,7 @@ EXTERN_C int usleep(unsigned int); # define saferealloc Perl_realloc # define safefree Perl_mfree # define CHECK_MALLOC_TOO_LATE_FOR_(code) STMT_START { \ - if (!PL_tainting && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ + if (!TAINTING_get && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ code; \ } STMT_END # define CHECK_MALLOC_TOO_LATE_FOR(ch) \ @@ -1094,6 +1277,38 @@ EXTERN_C char *crypt(const char *, const char *); EXTERN_C char *crypt(const char *, const char *); #endif +/* +=head1 Errno + +=for apidoc m|void|SETERRNO|int errcode|int vmserrcode + +Set C, and on VMS set C. + +=for apidoc mn|void|dSAVEDERRNO + +Declare variables needed to save C and any operating system +specific error number. + +=for apidoc mn|void|dSAVE_ERRNO + +Declare variables needed to save C and any operating system +specific error number, and save them for optional later restoration +by C. + +=for apidoc mn|void|SAVE_ERRNO + +Save C and any operating system specific error number for +optional later restoration by C. Requires +C or C in scope. + +=for apidoc mn|void|RESTORE_ERRNO + +Restore C and any operating system specific error number that +was saved by C or C. + +=cut +*/ + #ifdef SETERRNO # undef SETERRNO /* SOCKS might have defined this */ #endif @@ -1165,6 +1380,29 @@ EXTERN_C char *crypt(const char *, const char *); # define RESTORE_ERRNO (errno = saved_errno) #endif +/* +=head1 Warning and Dieing + +=for apidoc Amn|SV *|ERRSV + +Returns the SV for C<$@>, creating it if needed. + +=for apidoc Am|void|CLEAR_ERRSV + +Clear the contents of C<$@>, setting it to the empty string. + +This replaces any read-only SV with a fresh SV and removes any magic. + +=for apidoc Am|void|SANE_ERRSV + +Clean up ERRSV so we can safely set it. + +This replaces any read-only SV with a fresh writable copy and removes +any magic. + +=cut +*/ + #define ERRSV GvSVn(PL_errgv) /* contains inlined gv_add_by_type */ @@ -1185,6 +1423,23 @@ EXTERN_C char *crypt(const char *, const char *); } \ } STMT_END +/* contains inlined gv_add_by_type */ +#define SANE_ERRSV() STMT_START { \ + SV ** const svp = &GvSV(PL_errgv); \ + if (!*svp) { \ + *svp = newSVpvs(""); \ + } else if (SvREADONLY(*svp)) { \ + SV *dupsv = newSVsv(*svp); \ + SvREFCNT_dec_NN(*svp); \ + *svp = dupsv; \ + } else { \ + SV *const errsv = *svp; \ + if (SvMAGICAL(errsv)) { \ + mg_free(errsv); \ + } \ + } \ + } STMT_END + #ifdef PERL_CORE # define DEFSV (0 + GvSVn(PL_defgv)) @@ -1410,9 +1665,17 @@ EXTERN_C char *crypt(const char *, const char *); /* This used to be conditionally defined based on whether we had a sprintf() * that correctly returns the string length (as required by C89), but we no * longer need that. XS modules can (and do) use this name, so it must remain - * a part of the API that's visible to modules. But we no longer document it - * either (because using sprintf() rather than snprintf() is almost always - * a bad idea). */ + * a part of the API that's visible to modules. + +=head1 Miscellaneous Functions + +=for apidoc ATmD|int|my_sprintf|NN char *buffer|NN const char *pat|... + +Do NOT use this due to the possibility of overflowing C. Instead use +my_snprintf() + +=cut +*/ #define my_sprintf sprintf /* @@ -1495,8 +1758,6 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCAT # define my_strlcat strlcat -#else -# define my_strlcat Perl_my_strlcat #endif #if defined(PERL_CORE) || defined(PERL_EXT) @@ -1509,14 +1770,10 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCPY # define my_strlcpy strlcpy -#else -# define my_strlcpy Perl_my_strlcpy #endif #ifdef HAS_STRNLEN # define my_strnlen strnlen -#else -# define my_strnlen Perl_my_strnlen #endif /* @@ -1812,6 +2069,7 @@ extern long double Perl_my_frexpl(long double x, int *e); # define Perl_fmod fmodq # define Perl_log logq # define Perl_log10 log10q +# define Perl_signbit signbitq # define Perl_pow powq # define Perl_sin sinq # define Perl_sinh sinhq @@ -2059,7 +2317,7 @@ extern long double Perl_my_frexpl(long double x, int *e); #endif /* Win32: _fpclass(), _isnan(), _finite(). */ -#ifdef WIN32 +#ifdef _MSC_VER # ifndef Perl_isnan # define Perl_isnan(x) _isnan(x) # endif @@ -2117,10 +2375,6 @@ extern long double Perl_my_frexpl(long double x, int *e); (Perl_fp_class_pdenorm(x) || Perl_fp_class_ndenorm(x)) #endif -#ifdef UNDER_CE -int isnan(double d); -#endif - #ifndef Perl_isnan # ifdef Perl_fp_class_nan # define Perl_isnan(x) Perl_fp_class_nan(x) @@ -2197,11 +2451,12 @@ int isnan(double d); #ifdef USE_PERL_ATOF # define Perl_atof(s) Perl_my_atof(s) -# define Perl_atof2(s, n) Perl_my_atof2(aTHX_ (s), &(n)) +# define Perl_atof2(s, n) Perl_my_atof3(aTHX_ (s), &(n), 0) #else # define Perl_atof(s) (NV)atof(s) # define Perl_atof2(s, n) ((n) = atof(s)) #endif +#define my_atof2(a,b) my_atof3(a,b,0) /* * CHAR_MIN and CHAR_MAX are not included here, as the (char) type may be @@ -2240,6 +2495,58 @@ int isnan(double d); # define PERL_QUAD_MIN (-PERL_QUAD_MAX - ((3 & -1) == 3)) #endif +/* +=head1 Numeric functions + +=for apidoc AmnUh||PERL_INT_MIN +=for apidoc AmnUh||PERL_LONG_MAX +=for apidoc AmnUh||PERL_LONG_MIN +=for apidoc AmnUh||PERL_QUAD_MAX +=for apidoc AmnUh||PERL_SHORT_MAX +=for apidoc AmnUh||PERL_SHORT_MIN +=for apidoc AmnUh||PERL_UCHAR_MAX +=for apidoc AmnUh||PERL_UCHAR_MIN +=for apidoc AmnUh||PERL_UINT_MAX +=for apidoc AmnUh||PERL_ULONG_MAX +=for apidoc AmnUh||PERL_ULONG_MIN +=for apidoc AmnUh||PERL_UQUAD_MAX +=for apidoc AmnUh||PERL_UQUAD_MIN +=for apidoc AmnUh||PERL_USHORT_MAX +=for apidoc AmnUh||PERL_USHORT_MIN +=for apidoc AmnUh||PERL_QUAD_MIN +=for apidoc AmnU||PERL_INT_MAX +This and +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C +give the largest and smallest number representable in the current +platform in variables of the corresponding types. + +For signed types, the smallest representable number is the most negative +number, the one furthest away from zero. + +For C99 and later compilers, these correspond to things like C, which +are available to the C code. But these constants, furnished by Perl, +allow code compiled on earlier compilers to portably have access to the same +constants. + +=cut + +*/ + typedef MEM_SIZE STRLEN; typedef struct op OP; @@ -2311,8 +2618,8 @@ typedef AV PAD; typedef struct padnamelist PADNAMELIST; typedef struct padname PADNAME; -/* enable PERL_OP_PARENT by default */ -#if !defined(PERL_OP_PARENT) && !defined(PERL_NO_OP_PARENT) +/* always enable PERL_OP_PARENT */ +#if !defined(PERL_OP_PARENT) # define PERL_OP_PARENT #endif @@ -3272,8 +3579,25 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) #else # define EXPECT(expr,val) (expr) #endif + +/* +=head1 Miscellaneous Functions + +=for apidoc AmU|bool|LIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be true. + +=for apidoc AmU|bool|UNLIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be false. + +=cut +*/ #define LIKELY(cond) EXPECT(cBOOL(cond),TRUE) #define UNLIKELY(cond) EXPECT(cBOOL(cond),FALSE) + #ifdef HAS_BUILTIN_CHOOSE_EXPR /* placeholder */ #endif @@ -3347,9 +3671,9 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) # define NOT_REACHED #elif defined(DEBUGGING) && (__has_builtin(__builtin_unreachable) \ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5 || __GNUC__ > 4)) /* 4.5 -> */ -# define NOT_REACHED STMT_START { ASSUME(0); __builtin_unreachable(); } STMT_END +# define NOT_REACHED STMT_START { ASSUME(!"UNREACHABLE"); __builtin_unreachable(); } STMT_END #else -# define NOT_REACHED ASSUME(0) +# define NOT_REACHED ASSUME(!"UNREACHABLE") #endif /* Some unistd.h's give a prototype for pause() even though @@ -3540,11 +3864,10 @@ typedef struct magic_state MGS; /* struct magic_state defined in mg.c */ struct scan_data_t; typedef struct regnode_charclass regnode_charclass; -struct regnode_charclass_class; - /* A hopefully less confusing name. The sub-classes are all Posix classes only * used under /l matching */ -typedef struct regnode_charclass_class regnode_charclass_posixl; +typedef struct regnode_charclass_posixl regnode_charclass_class; +typedef struct regnode_charclass_posixl regnode_charclass_posixl; typedef struct regnode_ssc regnode_ssc; typedef struct RExC_state_t RExC_state_t; @@ -3706,7 +4029,9 @@ my_swap16(const U16 x) { #define U_L(what) U_32(what) #ifdef HAS_SIGNBIT -# define Perl_signbit signbit +# ifndef Perl_signbit +# define Perl_signbit signbit +# endif #endif /* These do not care about the fractional part, only about the range. */ @@ -3723,7 +4048,7 @@ my_swap16(const U16 x) { #endif #ifndef __cplusplus -#if !(defined(WIN32) || defined(UNDER_CE) || defined(SYMBIAN)) +#if !(defined(WIN32) || defined(SYMBIAN)) Uid_t getuid (void); Uid_t geteuid (void); Gid_t getgid (void); @@ -3770,7 +4095,7 @@ Gid_t getegid (void); #define DEBUG_C_FLAG 0x00200000 /*2097152 */ #define DEBUG_A_FLAG 0x00400000 /*4194304 */ #define DEBUG_q_FLAG 0x00800000 /*8388608 */ -/* spare 16777216*/ +#define DEBUG_M_FLAG 0x01000000 /*16777216*/ #define DEBUG_B_FLAG 0x02000000 /*33554432*/ #define DEBUG_L_FLAG 0x04000000 /*67108864*/ #define DEBUG_i_FLAG 0x08000000 /*134217728*/ @@ -3802,6 +4127,7 @@ Gid_t getegid (void); # define DEBUG_C_TEST_ UNLIKELY(PL_debug & DEBUG_C_FLAG) # define DEBUG_A_TEST_ UNLIKELY(PL_debug & DEBUG_A_FLAG) # define DEBUG_q_TEST_ UNLIKELY(PL_debug & DEBUG_q_FLAG) +# define DEBUG_M_TEST_ UNLIKELY(PL_debug & DEBUG_M_FLAG) # define DEBUG_B_TEST_ UNLIKELY(PL_debug & DEBUG_B_FLAG) # define DEBUG_L_TEST_ UNLIKELY(PL_debug & DEBUG_L_FLAG) # define DEBUG_i_TEST_ UNLIKELY(PL_debug & DEBUG_i_FLAG) @@ -3835,6 +4161,7 @@ Gid_t getegid (void); # define DEBUG_C_TEST DEBUG_C_TEST_ # define DEBUG_A_TEST DEBUG_A_TEST_ # define DEBUG_q_TEST DEBUG_q_TEST_ +# define DEBUG_M_TEST DEBUG_M_TEST_ # define DEBUG_B_TEST DEBUG_B_TEST_ # define DEBUG_L_TEST DEBUG_L_TEST_ # define DEBUG_i_TEST DEBUG_i_TEST_ @@ -3875,11 +4202,11 @@ Gid_t getegid (void); # define DEBUG_f(a) DEBUG__(DEBUG_f_TEST, a) -#ifndef PERL_EXT_RE_BUILD -# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) -#else -# define DEBUG_r(a) STMT_START {a;} STMT_END -#endif /* PERL_EXT_RE_BUILD */ +# ifndef PERL_EXT_RE_BUILD +# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) +# else +# define DEBUG_r(a) STMT_START {a;} STMT_END +# endif /* PERL_EXT_RE_BUILD */ # define DEBUG_x(a) DEBUG__(DEBUG_x_TEST, a) # define DEBUG_u(a) DEBUG__(DEBUG_u_TEST, a) @@ -3898,11 +4225,12 @@ Gid_t getegid (void); # define DEBUG_C(a) DEBUG__(DEBUG_C_TEST, a) # define DEBUG_A(a) DEBUG__(DEBUG_A_TEST, a) # define DEBUG_q(a) DEBUG__(DEBUG_q_TEST, a) +# define DEBUG_M(a) DEBUG__(DEBUG_M_TEST, a) # define DEBUG_B(a) DEBUG__(DEBUG_B_TEST, a) # define DEBUG_L(a) DEBUG__(DEBUG_L_TEST, a) # define DEBUG_i(a) DEBUG__(DEBUG_i_TEST, a) -#else /* DEBUGGING */ +#else /* ! DEBUGGING below */ # define DEBUG_p_TEST (0) # define DEBUG_s_TEST (0) @@ -3927,6 +4255,7 @@ Gid_t getegid (void); # define DEBUG_C_TEST (0) # define DEBUG_A_TEST (0) # define DEBUG_q_TEST (0) +# define DEBUG_M_TEST (0) # define DEBUG_B_TEST (0) # define DEBUG_L_TEST (0) # define DEBUG_i_TEST (0) @@ -3960,6 +4289,7 @@ Gid_t getegid (void); # define DEBUG_C(a) # define DEBUG_A(a) # define DEBUG_q(a) +# define DEBUG_M(a) # define DEBUG_B(a) # define DEBUG_L(a) # define DEBUG_i(a) @@ -4301,6 +4631,11 @@ EXTCONST char PL_Zero[] EXTCONST char PL_hexdigit[] INIT("0123456789abcdef0123456789ABCDEF"); +EXTCONST STRLEN PL_WARN_ALL + INIT(0); +EXTCONST STRLEN PL_WARN_NONE + INIT(0); + /* This is constant on most architectures, a global on OS/2 */ #ifndef OS2 EXTCONST char PL_sh_path[] @@ -4517,7 +4852,7 @@ EXTCONST unsigned char PL_mod_latin1_uc[] = { || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \ && UNICODE_DOT_DOT_VERSION >= 8) 255 /*sharp s*/, -#else /* uc() is itself in early unicode */ +#else /* uc(sharp s) is 'sharp s' itself in early unicode */ 223, #endif 224-32, 225-32, 226-32, 227-32, 228-32, 229-32, 230-32, 231-32, @@ -4537,7 +4872,7 @@ EXTCONST unsigned char PL_latin1_lc[]; #ifndef PERL_GLOBAL_STRUCT /* or perlvars.h */ #ifdef DOINIT -EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256] = { /* Unfortunately not EXTCONST. */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -4572,7 +4907,7 @@ EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ 248, 249, 250, 251, 252, 253, 254, 255 }; #else -EXT unsigned char PL_fold_locale[]; /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256]; /* Unfortunately not EXTCONST. */ #endif #endif /* !PERL_GLOBAL_STRUCT */ @@ -4660,7 +4995,7 @@ EXTCONST char* const PL_block_type[] = { "NULL", "WHEN", "BLOCK", - "LOOP_GIVEN", + "GIVEN", "LOOP_ARY", "LOOP_LAZYSV", "LOOP_LAZYIV", @@ -4868,8 +5203,8 @@ typedef enum { XREF, XSTATE, XBLOCK, - XATTRBLOCK, - XATTRTERM, + XATTRBLOCK, /* next token should be an attribute or block */ + XATTRTERM, /* next token should be an attribute, or block in a term */ XTERMBLOCK, XBLOCKTERM, XPOSTDEREF, @@ -5273,13 +5608,8 @@ EXTCONST bool PL_valid_types_NV_set[]; * With the U8_NV version you will want to have inner braces, * while with the NV_U8 use just the NV. */ -#ifdef __cplusplus -#define INFNAN_U8_NV_DECL EXTERN_C const union { U8 u8[NVSIZE]; NV nv; } -#define INFNAN_NV_U8_DECL EXTERN_C const union { NV nv; U8 u8[NVSIZE]; } -#else #define INFNAN_U8_NV_DECL EXTCONST union { U8 u8[NVSIZE]; NV nv; } #define INFNAN_NV_U8_DECL EXTCONST union { NV nv; U8 u8[NVSIZE]; } -#endif /* if these never got defined, they need defaults */ #ifndef PERL_SET_CONTEXT @@ -5298,6 +5628,425 @@ EXTCONST bool PL_valid_types_NV_set[]; # define PERL_SET_THX(t) NOOP #endif +#ifndef EBCDIC + +/* The tables below are adapted from + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this copyright + * notice: + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +# ifdef DOINIT +# if 0 /* This is the original table given in + http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */ +static U8 utf8d_C9[] = { + /* The first part of the table maps bytes to character classes that + * to reduce the size of the transition table and create bitmasks. */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-1F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-3F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-5F*/ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-7F*/ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /*-9F*/ + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /*-BF*/ + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /*-DF*/ + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, /*-FF*/ + + /* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a state. */ + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12 +}; + +# endif + +/* This is a version of the above table customized for Perl that doesn't + * exclude surrogates and accepts start bytes up through FD (FE on 64-bit + * machines). The classes have been renumbered so that the patterns are more + * evident in the table. The class numbers for start bytes are constrained so + * that they can be used as a shift count for masking off the leading one bits. + * It would make the code simpler if start byte FF could also be handled, but + * doing so would mean adding nodes for each of continuation bytes 6-12 + * remaining, and two more nodes for overlong detection (a total of 9), and + * there is room only for 4 more nodes unless we make the array U16 instead of + * U8. + * + * The classes are + * 00-7F 0 + * 80-81 7 Not legal immediately after start bytes E0 F0 F8 FC + * FE + * 82-83 8 Not legal immediately after start bytes E0 F0 F8 FC + * 84-87 9 Not legal immediately after start bytes E0 F0 F8 + * 88-8F 10 Not legal immediately after start bytes E0 F0 + * 90-9F 11 Not legal immediately after start byte E0 + * A0-BF 12 + * C0,C1 1 + * C2-DF 2 + * E0 13 + * E1-EF 3 + * F0 14 + * F1-F7 4 + * F8 15 + * F9-FB 5 + * FC 16 + * FD 6 + * FE 17 (or 1 on 32-bit machines, since it overflows) + * FF 1 + */ + +EXTCONST U8 PL_extended_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 7, 7, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10,10, /*80-8F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*90-9F*/ + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*A0-AF*/ + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 13, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /*E0-EF*/ + 14, 4, 4, 4, 4, 4, 4, 4,15, 5, 5, 5,16, 6, /*F0-FD*/ +# ifdef UV_IS_QUAD + 17, /*FE*/ +# else + 1, /*FE*/ +# endif + 1, /*FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Any three continuation bytes left. + * N4 Any four continuation bytes left. + * N5 Any five continuation bytes left. + * N6 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to N1 + * N7 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * the other continuations transition to N2 + * N8 Start byte is F8. Continuation bytes 80-87 are illegal (overlong); + * the other continuations transition to N3 + * N9 Start byte is FC. Continuation bytes 80-83 are illegal (overlong); + * the other continuations transition to N4 + * N10 Start byte is FE. Continuation bytes 80-81 are illegal (overlong); + * the other continuations transition to N5 + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# define NUM_CLASSES 18 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) +# define N8 ((N7) + NUM_CLASSES) +# define N9 ((N8) + NUM_CLASSES) +# define N10 ((N9) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 */ +/*N0*/ 0, 1,N1,N2,N3,N4,N5, 1, 1, 1, 1, 1, 1,N6,N7,N8,N9,N10, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, +/*N2*/ 1, 1, 1, 1, 1, 1, 1,N1,N1,N1,N1,N1,N1, 1, 1, 1, 1, 1, +/*N3*/ 1, 1, 1, 1, 1, 1, 1,N2,N2,N2,N2,N2,N2, 1, 1, 1, 1, 1, +/*N4*/ 1, 1, 1, 1, 1, 1, 1,N3,N3,N3,N3,N3,N3, 1, 1, 1, 1, 1, +/*N5*/ 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4,N4,N4, 1, 1, 1, 1, 1, + +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N1, 1, 1, 1, 1, 1, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N2,N2, 1, 1, 1, 1, 1, +/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N3,N3,N3, 1, 1, 1, 1, 1, +/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4, 1, 1, 1, 1, 1, +/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1,N5,N5,N5,N5,N5, 1, 1, 1, 1, 1, +}; + +/* And below is a version of the above table that accepts only strict UTF-8. + * Hence no surrogates nor non-characters, nor non-Unicode. Thus, if the input + * passes this dfa, it will be for a well-formed, non-problematic code point + * that can be returned immediately. + * + * The "Implementation details" portion of + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ shows how + * the first portion of the table maps each possible byte into a character + * class. And that the classes for those bytes which are start bytes have been + * carefully chosen so they serve as well to be used as a shift value to mask + * off the leading 1 bits of the start byte. Unfortunately the addition of + * being able to distinguish non-characters makes this not fully work. This is + * because, now, the start bytes E1-EF have to be broken into 3 classes instead + * of 2: + * 1) ED because it could be a surrogate + * 2) EF because it could be a non-character + * 3) the rest, which can never evaluate to a problematic code point. + * + * Each of E1-EF has three leading 1 bits, then a 0. That means we could use a + * shift (and hence class number) of either 3 or 4 to get a mask that works. + * But that only allows two categories, and we need three. khw made the + * decision to therefore treat the ED start byte as an error, so that the dfa + * drops out immediately for that. In the dfa, classes 3 and 4 are used to + * distinguish EF vs the rest. Then special code is used to deal with ED, + * that's executed only when the dfa drops out. The code points started by ED + * are half surrogates, and half hangul syllables. This means that 2048 of the + * the hangul syllables (about 18%) take longer than all other non-problematic + * code points to handle. + * + * The changes to handle non-characters requires the addition of states and + * classes to the dfa. (See the section on "Mapping bytes to character + * classes" in the linked-to document for further explanation of the original + * dfa.) + * + * The classes are + * 00-7F 0 + * 80-8E 9 + * 8F 10 + * 90-9E 11 + * 9F 12 + * A0-AE 13 + * AF 14 + * B0-B6 15 + * B7 16 + * B8-BD 15 + * BE 17 + * BF 18 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 1 + * EE 3 + * EF 4 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_strict_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,10, /*80-8F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12, /*90-9F*/ + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, /*A0-AF*/ + 15,15,15,15,15,15,15,16,15,15,15,15,15,15,17,18, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 4, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N4 Start byte is EF. Continuation byte B7 transitions to N8; BF to N9; + * the other continuations transitions to N1 + * N5 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * [9AB]F transition to N10; the other continuations to N2. + * N6 Start byte is F[123]. Continuation bytes [89AB]F transition + * to N10; the other continuations to N2. + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); 8F transitions to N10; the other continuations to N2 + * N8 Initial sequence is EF B7. Continuation bytes 90-AF are illegal + * (non-characters); the other continuations transition to N0. + * N9 Initial sequence is EF BF. Continuation bytes BE and BF are illegal + * (non-characters); the other continuations transition to N0. + * N10 Initial sequence is one of: F0 [9-B]F; F[123] [8-B]F; or F4 8F. + * Continuation byte BF transitions to N11; the other continuations to + * N1 + * N11 Initial sequence is the two bytes given in N10 followed by BF. + * Continuation bytes BE and BF are illegal (non-characters); the other + * continuations transition to N0. + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef N8 +# undef N9 +# undef NUM_CLASSES +# define NUM_CLASSES 19 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) +# define N8 ((N7) + NUM_CLASSES) +# define N9 ((N8) + NUM_CLASSES) +# define N10 ((N9) + NUM_CLASSES) +# define N11 ((N10) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 */ +/*N0*/ 0, 1, N1, N2, N4, N7, N6, N3, N5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1, N1, + +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N8, N1, N9, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2, N2, N2,N10, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2,N10, N2, N2, N2,N10, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, 1, 1, 1, 1, 1, 1, 1, 1, +/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1,N11, +/*N11*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +}; + +/* And below is yet another version of the above tables that accepts only UTF-8 + * as defined by Corregidum #9. Hence no surrogates nor non-Unicode, but + * it allows non-characters. This is isomorphic to the original table + * in http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * + * The classes are + * 00-7F 0 + * 80-8F 9 + * 90-9F 10 + * A0-BF 11 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 4 + * EE-EF 3 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_c9_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /*80-8F*/ + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /*90-9F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*A0-AF*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Any three continuation bytes left. + * N4 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N5 Start byte is ED. Continuation bytes A0-BF all lead to surrogates, + * so are illegal. The other continuations transition to state N1. + * N6 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * the other continuations transition to N2 + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); the other continuations transition to N2 + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef NUM_CLASSES +# define NUM_CLASSES 12 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 */ +/*N0*/ 0, 1, N1, N2, N5, N7, N3, N4, N6, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, N2, + +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, 1, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, 1, 1, +}; + +# else /* End of is DOINIT */ + +EXTCONST U8 PL_extended_utf8_dfa_tab[]; +EXTCONST U8 PL_strict_utf8_dfa_tab[]; +EXTCONST U8 PL_c9_utf8_dfa_tab[]; + +# endif +#endif /* end of isn't EBCDIC */ #ifndef PERL_NO_INLINE_FUNCTIONS /* Static inline funcs that depend on includes and declarations above. @@ -5419,127 +6168,105 @@ typedef struct am_table_short AMTS; # define KEYWORD_PLUGIN_MUTEX_LOCK MUTEX_LOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_TERM MUTEX_DESTROY(&PL_keyword_plugin_mutex) +# define USER_PROP_MUTEX_INIT MUTEX_INIT(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_LOCK MUTEX_LOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_TERM MUTEX_DESTROY(&PL_user_prop_mutex) #else # define KEYWORD_PLUGIN_MUTEX_INIT NOOP # define KEYWORD_PLUGIN_MUTEX_LOCK NOOP # define KEYWORD_PLUGIN_MUTEX_UNLOCK NOOP # define KEYWORD_PLUGIN_MUTEX_TERM NOOP +# define USER_PROP_MUTEX_INIT NOOP +# define USER_PROP_MUTEX_LOCK NOOP +# define USER_PROP_MUTEX_UNLOCK NOOP +# define USER_PROP_MUTEX_TERM NOOP #endif -#ifdef USE_LOCALE -/* These locale things are all subject to change */ +#ifdef USE_LOCALE /* These locale things are all subject to change */ + /* Returns TRUE if the plain locale pragma without a parameter is in effect. + * */ +# define IN_LOCALE_RUNTIME (PL_curcop \ + && CopHINTS_get(PL_curcop) & HINT_LOCALE) -# if defined(HAS_NEWLOCALE) \ - && defined(LC_ALL_MASK) \ - && defined(HAS_FREELOCALE) \ - && defined(HAS_USELOCALE) \ - && ! defined(NO_POSIX_2008_LOCALE) + /* Returns TRUE if either form of the locale pragma is in effect */ +# define IN_SOME_LOCALE_FORM_RUNTIME \ + cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) - /* The code is written for simplicity to assume that any platform advanced - * enough to have the Posix 2008 locale functions has LC_ALL. The test - * above makes sure that assumption is valid */ +# define IN_LOCALE_COMPILETIME cBOOL(PL_hints & HINT_LOCALE) +# define IN_SOME_LOCALE_FORM_COMPILETIME \ + cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) -# define HAS_POSIX_2008_LOCALE -# endif +/* +=head1 Locale-related functions and macros -/* We create a C locale object unconditionally if we have the functions to do - * so; hence must destroy it unconditionally at the end */ -# ifndef HAS_POSIX_2008_LOCALE -# define _LOCALE_TERM_POSIX_2008 NOOP -# else -# define _LOCALE_TERM_POSIX_2008 \ - STMT_START { \ - if (PL_C_locale_obj) { \ - /* Make sure we aren't using the locale \ - * space we are about to free */ \ - uselocale(LC_GLOBAL_LOCALE); \ - freelocale(PL_C_locale_obj); \ - PL_C_locale_obj = (locale_t) NULL; \ - } \ - } STMT_END -# endif +=for apidoc Amn|bool|IN_LOCALE -# ifndef USE_ITHREADS -# define LOCALE_INIT -# define LOCALE_LOCK -# define LOCALE_UNLOCK -# define LOCALE_TERM STMT_START { _LOCALE_TERM_POSIX_2008; } STMT_END -# else /* Below is do use threads */ -# define LOCALE_INIT MUTEX_INIT(&PL_locale_mutex) -# define LOCALE_LOCK MUTEX_LOCK(&PL_locale_mutex) -# define LOCALE_UNLOCK MUTEX_UNLOCK(&PL_locale_mutex) -# define LOCALE_TERM \ - STMT_START { \ - MUTEX_DESTROY(&PL_locale_mutex); \ - _LOCALE_TERM_POSIX_2008; \ - } STMT_END -# ifdef HAS_POSIX_2008_LOCALE -# define USE_POSIX_2008_LOCALE -# define USE_THREAD_SAFE_LOCALE -# endif -# endif +Evaluates to TRUE if the plain locale pragma without a parameter (S>) is in effect. -/* Returns TRUE if the plain locale pragma without a parameter is in effect - */ -# define IN_LOCALE_RUNTIME (PL_curcop \ - && CopHINTS_get(PL_curcop) & HINT_LOCALE) - -/* Returns TRUE if either form of the locale pragma is in effect */ -# define IN_SOME_LOCALE_FORM_RUNTIME \ - cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) - -# define IN_LOCALE_COMPILETIME cBOOL(PL_hints & HINT_LOCALE) -# define IN_SOME_LOCALE_FORM_COMPILETIME \ - cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) - -# define IN_LOCALE \ - (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) -# define IN_SOME_LOCALE_FORM \ - (IN_PERL_COMPILETIME ? IN_SOME_LOCALE_FORM_COMPILETIME \ - : IN_SOME_LOCALE_FORM_RUNTIME) - -# define IN_LC_ALL_COMPILETIME IN_LOCALE_COMPILETIME -# define IN_LC_ALL_RUNTIME IN_LOCALE_RUNTIME - -# define IN_LC_PARTIAL_COMPILETIME cBOOL(PL_hints & HINT_LOCALE_PARTIAL) -# define IN_LC_PARTIAL_RUNTIME \ - (PL_curcop && CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL) - -# define IN_LC_COMPILETIME(category) \ - (IN_LC_ALL_COMPILETIME || (IN_LC_PARTIAL_COMPILETIME \ - && Perl__is_in_locale_category(aTHX_ TRUE, (category)))) -# define IN_LC_RUNTIME(category) \ - (IN_LC_ALL_RUNTIME || (IN_LC_PARTIAL_RUNTIME \ - && Perl__is_in_locale_category(aTHX_ FALSE, (category)))) -# define IN_LC(category) \ +=for apidoc Amn|bool|IN_LOCALE_COMPILETIME + +Evaluates to TRUE if, when compiling a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=for apidoc Amn|bool|IN_LOCALE_RUNTIME + +Evaluates to TRUE if, when executing a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=cut +*/ + +# define IN_LOCALE \ + (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) +# define IN_SOME_LOCALE_FORM \ + (IN_PERL_COMPILETIME ? IN_SOME_LOCALE_FORM_COMPILETIME \ + : IN_SOME_LOCALE_FORM_RUNTIME) + +# define IN_LC_ALL_COMPILETIME IN_LOCALE_COMPILETIME +# define IN_LC_ALL_RUNTIME IN_LOCALE_RUNTIME + +# define IN_LC_PARTIAL_COMPILETIME cBOOL(PL_hints & HINT_LOCALE_PARTIAL) +# define IN_LC_PARTIAL_RUNTIME \ + (PL_curcop && CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL) + +# define IN_LC_COMPILETIME(category) \ + ( IN_LC_ALL_COMPILETIME \ + || ( IN_LC_PARTIAL_COMPILETIME \ + && Perl__is_in_locale_category(aTHX_ TRUE, (category)))) +# define IN_LC_RUNTIME(category) \ + (IN_LC_ALL_RUNTIME || (IN_LC_PARTIAL_RUNTIME \ + && Perl__is_in_locale_category(aTHX_ FALSE, (category)))) +# define IN_LC(category) \ (IN_LC_COMPILETIME(category) || IN_LC_RUNTIME(category)) -# if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE) +# if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE) - /* This internal macro should be called from places that operate under - * locale rules. It there is a problem with the current locale that - * hasn't been raised yet, it will output a warning this time. Because - * this will so rarely be true, there is no point to optimize for - * time; instead it makes sense to minimize space used and do all the - * work in the rarely called function */ -# ifdef USE_LOCALE_CTYPE -# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE \ + /* This internal macro should be called from places that operate under + * locale rules. If there is a problem with the current locale that + * hasn't been raised yet, it will output a warning this time. Because + * this will so rarely be true, there is no point to optimize for time; + * instead it makes sense to minimize space used and do all the work in + * the rarely called function */ +# ifdef USE_LOCALE_CTYPE +# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE \ STMT_START { \ if (UNLIKELY(PL_warn_locale)) { \ Perl__warn_problematic_locale(); \ } \ } STMT_END -# else -# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE -# endif +# else +# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE +# endif - /* These two internal macros are called when a warning should be raised, - * and will do so if enabled. The first takes a single code point - * argument; the 2nd, is a pointer to the first byte of the UTF-8 encoded - * string, and an end position which it won't try to read past */ -# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(cp) \ + /* These two internal macros are called when a warning should be raised, + * and will do so if enabled. The first takes a single code point + * argument; the 2nd, is a pointer to the first byte of the UTF-8 encoded + * string, and an end position which it won't try to read past */ +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(cp) \ STMT_START { \ if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \ Perl_warner(aTHX_ packWARN(WARN_LOCALE), \ @@ -5548,10 +6275,10 @@ typedef struct am_table_short AMTS; } \ } STMT_END -# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \ +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \ STMT_START { /* Check if to warn before doing the conversion work */\ if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \ - UV cp = utf8_to_uvchr_buf((U8 *) s, (U8 *) send, NULL); \ + UV cp = utf8_to_uvchr_buf((U8 *) (s), (U8 *) (send), NULL); \ Perl_warner(aTHX_ packWARN(WARN_LOCALE), \ "Wide character (U+%" UVXf ") in %s", \ (cp == 0) \ @@ -5561,35 +6288,184 @@ typedef struct am_table_short AMTS; } \ } STMT_END -# endif /* PERL_CORE or PERL_IN_XSUB_RE */ +# endif /* PERL_CORE or PERL_IN_XSUB_RE */ #else /* No locale usage */ -# define LOCALE_INIT -# define LOCALE_TERM -# define LOCALE_LOCK -# define LOCALE_UNLOCK -# define IN_LOCALE_RUNTIME 0 -# define IN_SOME_LOCALE_FORM_RUNTIME 0 -# define IN_LOCALE_COMPILETIME 0 -# define IN_SOME_LOCALE_FORM_COMPILETIME 0 -# define IN_LOCALE 0 -# define IN_SOME_LOCALE_FORM 0 -# define IN_LC_ALL_COMPILETIME 0 -# define IN_LC_ALL_RUNTIME 0 -# define IN_LC_PARTIAL_COMPILETIME 0 -# define IN_LC_PARTIAL_RUNTIME 0 -# define IN_LC_COMPILETIME(category) 0 -# define IN_LC_RUNTIME(category) 0 -# define IN_LC(category) 0 - -# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE -# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(a) -# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(a,b) +# define IN_LOCALE_RUNTIME 0 +# define IN_SOME_LOCALE_FORM_RUNTIME 0 +# define IN_LOCALE_COMPILETIME 0 +# define IN_SOME_LOCALE_FORM_COMPILETIME 0 +# define IN_LOCALE 0 +# define IN_SOME_LOCALE_FORM 0 +# define IN_LC_ALL_COMPILETIME 0 +# define IN_LC_ALL_RUNTIME 0 +# define IN_LC_PARTIAL_COMPILETIME 0 +# define IN_LC_PARTIAL_RUNTIME 0 +# define IN_LC_COMPILETIME(category) 0 +# define IN_LC_RUNTIME(category) 0 +# define IN_LC(category) 0 +# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) +# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c) +#endif + + +/* Locale/thread synchronization macros. These aren't needed if using + * thread-safe locale operations, except if something is broken */ +#if defined(USE_LOCALE) \ + && defined(USE_ITHREADS) \ + && (! defined(USE_THREAD_SAFE_LOCALE) || defined(TS_W32_BROKEN_LOCALECONV)) + +/* We have a locale object holding the 'C' locale for Posix 2008 */ +# ifndef USE_POSIX_2008_LOCALE +# define _LOCALE_TERM_POSIX_2008 NOOP +# else +# define _LOCALE_TERM_POSIX_2008 \ + STMT_START { \ + if (PL_C_locale_obj) { \ + /* Make sure we aren't using the locale \ + * space we are about to free */ \ + uselocale(LC_GLOBAL_LOCALE); \ + freelocale(PL_C_locale_obj); \ + PL_C_locale_obj = (locale_t) NULL; \ + } \ + } STMT_END +# endif + +/* This is used as a generic lock for locale operations. For example this is + * used when calling nl_langinfo() so that another thread won't zap the + * contents of its buffer before it gets saved; and it's called when changing + * the locale of LC_MESSAGES. On some systems the latter can cause the + * nl_langinfo buffer to be zapped under a race condition. + * + * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock + * should be contained entirely within the locked portion of LC_NUMERIC. This + * mutex should be used only in very short sections of code, while + * LC_NUMERIC_LOCK may span more operations. By always following this + * convention, deadlock should be impossible. But if necessary, the two + * mutexes could be combined. + * + * Actually, the two macros just below with the '_V' suffixes are used in just + * a few places where there is a broken localeconv(), but otherwise things are + * thread safe, and hence don't need locking. Just below LOCALE_LOCK and + * LOCALE_UNLOCK are defined in terms of these for use everywhere else */ +# define LOCALE_LOCK_V \ + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: locking locale\n", __FILE__, __LINE__)); \ + MUTEX_LOCK(&PL_locale_mutex); \ + } STMT_END +# define LOCALE_UNLOCK_V \ + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \ + MUTEX_UNLOCK(&PL_locale_mutex); \ + } STMT_END + +/* On windows, we just need the mutex for LOCALE_LOCK */ +# ifdef TS_W32_BROKEN_LOCALECONV +# define LOCALE_LOCK NOOP +# define LOCALE_UNLOCK NOOP +# define LOCALE_INIT MUTEX_INIT(&PL_locale_mutex); +# define LOCALE_TERM MUTEX_DESTROY(&PL_locale_mutex) +# define LC_NUMERIC_LOCK(cond) +# define LC_NUMERIC_UNLOCK +# else +# define LOCALE_LOCK LOCALE_LOCK_V +# define LOCALE_UNLOCK LOCALE_UNLOCK_V + + /* We also need to lock LC_NUMERIC for non-windows (hence Posix 2008) + * systems */ +# define LOCALE_INIT STMT_START { \ + MUTEX_INIT(&PL_locale_mutex); \ + MUTEX_INIT(&PL_lc_numeric_mutex); \ + } STMT_END + +# define LOCALE_TERM STMT_START { \ + MUTEX_DESTROY(&PL_locale_mutex); \ + MUTEX_DESTROY(&PL_lc_numeric_mutex); \ + _LOCALE_TERM_POSIX_2008; \ + } STMT_END + + /* This mutex is used to create critical sections where we want the + * LC_NUMERIC locale to be locked into either the C (standard) locale, or + * the underlying locale, so that other threads interrupting this one don't + * change it to the wrong state before we've had a chance to complete our + * operation. It can stay locked over an entire printf operation, for + * example. And so is made distinct from the LOCALE_LOCK mutex. + * + * This simulates kind of a general semaphore. The current thread will + * lock the mutex if the per-thread variable is zero, and then increments + * that variable. Each corresponding UNLOCK decrements the variable until + * it is 0, at which point it actually unlocks the mutex. Since the + * variable is per-thread, there is no race with other threads. + * + * The single argument is a condition to test for, and if true, to panic, + * as this would be an attempt to complement the LC_NUMERIC state, and + * we're not supposed to because it's locked. + * + * Clang improperly gives warnings for this, if not silenced: + * https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#conditional-locks + * */ +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + CLANG_DIAG_IGNORE(-Wthread-safety) \ + STMT_START { \ + if (PL_lc_numeric_mutex_depth <= 0) { \ + MUTEX_LOCK(&PL_lc_numeric_mutex); \ + PL_lc_numeric_mutex_depth = 1; \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: locking lc_numeric; depth=1\n", \ + __FILE__, __LINE__)); \ + } \ + else { \ + PL_lc_numeric_mutex_depth++; \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: avoided lc_numeric_lock; new depth=%d\n", \ + __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ + if (cond_to_panic_if_already_locked) { \ + Perl_croak_nocontext("panic: %s: %d: Trying to change" \ + " LC_NUMERIC incompatibly", \ + __FILE__, __LINE__); \ + } \ + } \ + } STMT_END + +# define LC_NUMERIC_UNLOCK \ + STMT_START { \ + if (PL_lc_numeric_mutex_depth <= 1) { \ + MUTEX_UNLOCK(&PL_lc_numeric_mutex); \ + PL_lc_numeric_mutex_depth = 0; \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: unlocking lc_numeric; depth=0\n", \ + __FILE__, __LINE__)); \ + } \ + else { \ + PL_lc_numeric_mutex_depth--; \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: avoided lc_numeric_unlock; new depth=%d\n",\ + __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ + } \ + } STMT_END \ + CLANG_DIAG_RESTORE + +# endif /* End of needs locking LC_NUMERIC */ +#else /* Below is no locale sync needed */ +# define LOCALE_INIT +# define LOCALE_LOCK +# define LOCALE_LOCK_V +# define LOCALE_UNLOCK +# define LOCALE_UNLOCK_V +# define LC_NUMERIC_LOCK(cond) +# define LC_NUMERIC_UNLOCK +# define LOCALE_TERM #endif #ifdef USE_LOCALE_NUMERIC /* These macros are for toggling between the underlying locale (UNDERLYING or - * LOCAL) and the C locale (STANDARD). + * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C + * locale if the underlying locale is indistinguishable from it in the numeric + * operations used by Perl, namely the decimal point, and even the thousands + * separator.) =head1 Locale-related functions and macros @@ -5627,12 +6503,17 @@ argument list, like this: The private variable is used to save the current locale state, so that the requisite matching call to L can restore it. +On threaded perls not operating with thread-safe functionality, this macro uses +a mutex to force a critical section. Therefore the matching RESTORE should be +close by, and guaranteed to be called. + =for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED -This is used to help wrap XS or C code that that is C locale-aware. -This locale category is generally kept set to the C locale by Perl for -backwards compatibility, and because most XS code that reads floating point -values can cope only with the decimal radix character being a dot. +This is used to help wrap XS or C code that is C locale-aware. +This locale category is generally kept set to a locale where the decimal radix +character is a dot, and the separator between groups of digits is empty. This +is because most XS code that reads floating point numbers is expecting them to +have this syntax. This macro makes sure the current C state is set properly, to be aware of locale if the call to the XS or C code from the Perl program is @@ -5657,14 +6538,24 @@ argument list, like this: ... } +On threaded perls not operating with thread-safe functionality, this macro uses +a mutex to force a critical section. Therefore the matching RESTORE should be +close by, and guaranteed to be called; see L +for a more contained way to ensure that. + +=for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. + =for apidoc Am|void|RESTORE_LC_NUMERIC This is used in conjunction with one of the macros L -and -L - -to properly restore the C state. +and L to properly restore the +C state. A call to L must have been made to declare at compile time a private variable used by this macro and the two @@ -5678,145 +6569,228 @@ expression, but with an empty argument list, like this: ... } +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED|block + +This macro invokes the supplied statement or block within the context +of a L .. L pair +if required, so eg: + + WITH_LC_NUMERIC_SET_TO_NEEDED( + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis) + ); + +is equivalent to: + + { +#ifdef USE_LOCALE_NUMERIC + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_TO_NEEDED(); +#endif + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis); +#ifdef USE_LOCALE_NUMERIC + RESTORE_LC_NUMERIC(); +#endif + } + +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric|block + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. + =cut */ +/* If the underlying numeric locale has a non-dot decimal point or has a + * non-empty floating point thousands separator, the current locale is instead + * generally kept in the C locale instead of that underlying locale. The + * current status is known by looking at two words. One is non-zero if the + * current numeric locale is the standard C/POSIX one or is indistinguishable + * from C. The other is non-zero if the current locale is the underlying + * locale. Both can be non-zero if, as often happens, the underlying locale is + * C or indistinguishable from it. + * + * khw believes the reason for the variables instead of the bits in a single + * word is to avoid having to have masking instructions. */ + # define _NOT_IN_NUMERIC_STANDARD (! PL_numeric_standard) /* We can lock the category to stay in the C locale, making requests to the * contrary be noops, in the dynamic scope by setting PL_numeric_standard to 2. * */ # define _NOT_IN_NUMERIC_UNDERLYING \ - (! PL_numeric_underlying && PL_numeric_standard < 2) + (! PL_numeric_underlying && PL_numeric_standard < 2) # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION \ void (*_restore_LC_NUMERIC_function)(pTHX) = NULL -# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ - if (IN_LC(LC_NUMERIC)) { \ - if (_NOT_IN_NUMERIC_UNDERLYING) { \ - Perl_set_numeric_underlying(aTHX); \ - _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \ - } \ - } \ - else { \ - if (_NOT_IN_NUMERIC_STANDARD) { \ - SET_NUMERIC_STANDARD(); \ - _restore_LC_NUMERIC_function = &Perl_set_numeric_underlying; \ - } \ - } +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in) \ + STMT_START { \ + bool _in_lc_numeric = (in); \ + LC_NUMERIC_LOCK( \ + ( ( _in_lc_numeric && _NOT_IN_NUMERIC_UNDERLYING) \ + || (! _in_lc_numeric && _NOT_IN_NUMERIC_STANDARD))); \ + if (_in_lc_numeric) { \ + if (_NOT_IN_NUMERIC_UNDERLYING) { \ + Perl_set_numeric_underlying(aTHX); \ + _restore_LC_NUMERIC_function \ + = &Perl_set_numeric_standard; \ + } \ + } \ + else { \ + if (_NOT_IN_NUMERIC_STANDARD) { \ + Perl_set_numeric_standard(aTHX); \ + _restore_LC_NUMERIC_function \ + = &Perl_set_numeric_underlying; \ + } \ + } \ + } STMT_END + +# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC)) # define RESTORE_LC_NUMERIC() \ - if (_restore_LC_NUMERIC_function) { \ - _restore_LC_NUMERIC_function(aTHX); \ - } + STMT_START { \ + if (_restore_LC_NUMERIC_function) { \ + _restore_LC_NUMERIC_function(aTHX); \ + } \ + LC_NUMERIC_UNLOCK; \ + } STMT_END /* The next two macros set unconditionally. These should be rarely used, and * only after being sure that this is what is needed */ # define SET_NUMERIC_STANDARD() \ - STMT_START { if (_NOT_IN_NUMERIC_STANDARD) \ - Perl_set_numeric_standard(aTHX); \ - } STMT_END + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: lc_numeric standard=%d\n", \ + __FILE__, __LINE__, PL_numeric_standard)); \ + Perl_set_numeric_standard(aTHX); \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: lc_numeric standard=%d\n", \ + __FILE__, __LINE__, PL_numeric_standard)); \ + } STMT_END # define SET_NUMERIC_UNDERLYING() \ - STMT_START { if (_NOT_IN_NUMERIC_UNDERLYING) \ - Perl_set_numeric_underlying(aTHX); } STMT_END + STMT_START { \ + if (_NOT_IN_NUMERIC_UNDERLYING) { \ + Perl_set_numeric_underlying(aTHX); \ + } \ + } STMT_END /* The rest of these LC_NUMERIC macros toggle to one or the other state, with * the RESTORE_foo ones called to switch back, but only if need be */ -# define STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD() \ - bool _was_underlying = _NOT_IN_NUMERIC_STANDARD; \ - if (_was_underlying) Perl_set_numeric_standard(aTHX); - -/* Doesn't change to underlying locale unless within the scope of some form of - * 'use locale'. This is the usual desired behavior. */ -# define STORE_LC_NUMERIC_STANDARD_SET_UNDERLYING() \ - bool _was_standard = _NOT_IN_NUMERIC_UNDERLYING \ - && IN_LC(LC_NUMERIC); \ - if (_was_standard) Perl_set_numeric_underlying(aTHX); +# define STORE_LC_NUMERIC_SET_STANDARD() \ + STMT_START { \ + LC_NUMERIC_LOCK(_NOT_IN_NUMERIC_STANDARD); \ + if (_NOT_IN_NUMERIC_STANDARD) { \ + _restore_LC_NUMERIC_function = &Perl_set_numeric_underlying;\ + Perl_set_numeric_standard(aTHX); \ + } \ + } STMT_END /* Rarely, we want to change to the underlying locale even outside of 'use * locale'. This is principally in the POSIX:: functions */ -# define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() \ - if (_NOT_IN_NUMERIC_UNDERLYING) { \ - Perl_set_numeric_underlying(aTHX); \ - _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \ - } +# define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() \ + STMT_START { \ + LC_NUMERIC_LOCK(_NOT_IN_NUMERIC_UNDERLYING); \ + if (_NOT_IN_NUMERIC_UNDERLYING) { \ + Perl_set_numeric_underlying(aTHX); \ + _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \ + } \ + } STMT_END /* Lock/unlock to the C locale until unlock is called. This needs to be * recursively callable. [perl #128207] */ -# define LOCK_LC_NUMERIC_STANDARD() \ - (__ASSERT_(PL_numeric_standard) \ - PL_numeric_standard++) -# define UNLOCK_LC_NUMERIC_STANDARD() \ - STMT_START { \ - if (PL_numeric_standard > 1) { \ - PL_numeric_standard--; \ - } \ - else { \ - assert(0); \ - } \ - } STMT_END +# define LOCK_LC_NUMERIC_STANDARD() \ + STMT_START { \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: lock lc_numeric_standard: new depth=%d\n", \ + __FILE__, __LINE__, PL_numeric_standard + 1)); \ + __ASSERT_(PL_numeric_standard) \ + PL_numeric_standard++; \ + } STMT_END -# define RESTORE_LC_NUMERIC_UNDERLYING() \ - if (_was_underlying) Perl_set_numeric_underlying(aTHX); +# define UNLOCK_LC_NUMERIC_STANDARD() \ + STMT_START { \ + if (PL_numeric_standard > 1) { \ + PL_numeric_standard--; \ + } \ + else { \ + assert(0); \ + } \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "%s: %d: lc_numeric_standard decrement lock, new depth=%d\n", \ + __FILE__, __LINE__, PL_numeric_standard)); \ + } STMT_END + +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { \ + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric); \ + block; \ + RESTORE_LC_NUMERIC(); \ + } STMT_END; -# define RESTORE_LC_NUMERIC_STANDARD() \ - if (_restore_LC_NUMERIC_function) { \ - _restore_LC_NUMERIC_function(aTHX); \ - } +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + WITH_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC), block) #else /* !USE_LOCALE_NUMERIC */ # define SET_NUMERIC_STANDARD() # define SET_NUMERIC_UNDERLYING() # define IS_NUMERIC_RADIX(a, b) (0) -# define STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD() -# define STORE_LC_NUMERIC_STANDARD_SET_UNDERLYING() -# define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() -# define RESTORE_LC_NUMERIC_UNDERLYING() -# define RESTORE_LC_NUMERIC_STANDARD() # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION +# define STORE_LC_NUMERIC_SET_STANDARD() +# define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric) # define STORE_LC_NUMERIC_SET_TO_NEEDED() # define RESTORE_LC_NUMERIC() # define LOCK_LC_NUMERIC_STANDARD() # define UNLOCK_LC_NUMERIC_STANDARD() +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { block; } STMT_END +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + STMT_START { block; } STMT_END #endif /* !USE_LOCALE_NUMERIC */ #define Atof my_atof -/* Back-compat names */ -#define DECLARATION_FOR_STORE_LC_NUMERIC_SET_TO_NEEDED \ - DECLARATION_FOR_LC_NUMERIC_MANIPULATION -#define DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED() \ - DECLARATION_FOR_STORE_LC_NUMERIC_SET_TO_NEEDED; \ - STORE_LC_NUMERIC_SET_TO_NEEDED(); -#define LOCK_NUMERIC_STANDARD() LOCK_LC_NUMERIC_STANDARD() -#define RESTORE_NUMERIC_LOCAL() RESTORE_LC_NUMERIC_UNDERLYING() -#define RESTORE_NUMERIC_STANDARD() RESTORE_LC_NUMERIC_STANDARD() -#define SET_NUMERIC_LOCAL() SET_NUMERIC_UNDERLYING() -#define STORE_NUMERIC_LOCAL_SET_STANDARD() \ - STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD() -#define STORE_NUMERIC_STANDARD_SET_LOCAL() \ - STORE_LC_NUMERIC_STANDARD_SET_UNDERLYING() -#define STORE_NUMERIC_STANDARD_FORCE_LOCAL() \ - STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() -#define UNLOCK_NUMERIC_STANDARD() UNLOCK_LC_NUMERIC_STANDARD() +/* +=head1 Numeric functions +=for apidoc AmTR|NV|Strtod|NN const char * const s|NULLOK char ** e -#ifdef USE_QUADMATH -# define Perl_strtod(s, e) strtoflt128(s, e) -#elif defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) -# if defined(HAS_STRTOLD) -# define Perl_strtod(s, e) strtold(s, e) -# elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) (NV)strtod(s, e) /* Unavoidable loss. */ -# endif -#elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) strtod(s, e) +This is a synonym for L. + +=for apidoc AmTR|NV|Strtol|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=for apidoc AmTR|NV|Strtoul|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=cut + +*/ + +#define Strtod my_strtod + +#if defined(HAS_STRTOD) \ + || defined(USE_QUADMATH) \ + || (defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ + && defined(USE_LONG_DOUBLE)) +# define Perl_strtod Strtod #endif #if !defined(Strtol) && defined(USE_64_BIT_INT) && defined(IV_IS_QUAD) && \ @@ -6111,6 +7085,15 @@ int flock(int fd, int op); #define IS_NUMBER_NAN 0x20 /* this is not */ #define IS_NUMBER_TRAILING 0x40 /* number has trailing trash */ +/* +=head1 Numeric functions + +=for apidoc AmdR|bool|GROK_NUMERIC_RADIX|NN const char **sp|NN const char *send + +A synonym for L + +=cut +*/ #define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) /* Input flags: */ @@ -6133,10 +7116,6 @@ extern void moncontrol(int); #define PERL_GPROF_MONCONTROL(x) #endif -#ifdef UNDER_CE -#include "wince.h" -#endif - /* ISO 6429 NEL - C1 control NExt Line */ /* See http://www.unicode.org/unicode/reports/tr13/ */ #define NEXT_LINE_CHAR NEXT_LINE_NATIVE @@ -6195,8 +7174,20 @@ extern void moncontrol(int); #define PERL_SIGNALS_UNSAFE_FLAG 0x0001 -/* Use instead of abs() since abs() forces its argument to be an int, - * but also beware since this evaluates its argument twice, so no x++. */ +/* +=head1 Numeric functions + +=for apidoc Am|int|PERL_ABS|int + +Typeless C or C, I. (The usage below indicates it is for +integers, but it works for any type.) Use instead of these, since the C +library ones force their argument to be what it is expecting, potentially +leading to disaster. But also beware that this evaluates its argument twice, +so no C. + +=cut +*/ + #define PERL_ABS(x) ((x) < 0 ? -(x) : (x)) #if defined(__DECC) && defined(__osf__) @@ -6214,9 +7205,19 @@ extern void moncontrol(int); # define do_aexec(really, mark,sp) do_aexec5(really, mark, sp, 0, 0) #endif -/* check embedded \0 characters in pathnames passed to syscalls, - but allow one ending \0 */ -#define IS_SAFE_SYSCALL(p, len, what, op_name) (S_is_safe_syscall(aTHX_ (p), (len), (what), (op_name))) + +/* +=head1 Miscellaneous Functions + +=for apidoc Am|bool|IS_SAFE_SYSCALL|NN const char *pv|STRLEN len|NN const char *what|NN const char *op_name + +Same as L. + +=cut + +Allows one ending \0 +*/ +#define IS_SAFE_SYSCALL(p, len, what, op_name) (Perl_is_safe_syscall(aTHX_ (p), (len), (what), (op_name))) #define IS_SAFE_PATHNAME(p, len, op_name) IS_SAFE_SYSCALL((p), (len), "pathname", (op_name)) @@ -6455,6 +7456,8 @@ extern void moncontrol(int); #ifdef DOUBLE_HAS_NAN +START_EXTERN_C + #ifdef DOINIT /* PL_inf and PL_nan initialization. @@ -6480,7 +7483,8 @@ extern void moncontrol(int); */ /* The quadmath literals are anon structs which -Wc++-compat doesn't like. */ -GCC_DIAG_IGNORE(-Wc++-compat) +GCC_DIAG_IGNORE_DECL(-Wpragmas); +GCC_DIAG_IGNORE_DECL(-Wc++-compat); # ifdef USE_QUADMATH /* Cannot use HUGE_VALQ for PL_inf because not a compile-time @@ -6550,7 +7554,8 @@ INFNAN_NV_U8_DECL PL_nan = { 0.0/0.0 }; /* keep last */ # endif # endif -GCC_DIAG_RESTORE +GCC_DIAG_RESTORE_DECL; +GCC_DIAG_RESTORE_DECL; #else @@ -6559,6 +7564,8 @@ INFNAN_NV_U8_DECL PL_nan; #endif +END_EXTERN_C + /* If you have not defined NV_INF/NV_NAN (like for example win32/win32.h), * we will define NV_INF/NV_NAN as the nv part of the global const * PL_inf/PL_nan. Note, however, that the preexisting NV_INF/NV_NAN