X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/bd0b697da315a5621a414fa3fa60630fd3735d0e..d78a5caa85b65d42bbf69df8d66aca78bcfdc3cf:/perl.h diff --git a/perl.h b/perl.h index 7531165..4a98718 100644 --- a/perl.h +++ b/perl.h @@ -246,7 +246,7 @@ Perl_pregfree(aTHX_ (prog)) #define CALLREGFREE_PVT(prog) \ - if(prog) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) + if(prog && RX_ENGINE(prog)) RX_ENGINE(prog)->rxfree(aTHX_ (prog)) #define CALLREG_NUMBUF_FETCH(rx,paren,usesv) \ RX_ENGINE(rx)->numbered_buff_FETCH(aTHX_ (rx),(paren),(usesv)) @@ -448,6 +448,19 @@ #define CLANG_DIAG_IGNORE_STMT(x) CLANG_DIAG_IGNORE(x) NOOP #define CLANG_DIAG_RESTORE_STMT CLANG_DIAG_RESTORE NOOP +#if defined(_MSC_VER) && (_MSC_VER >= 1300) +# define MSVC_DIAG_IGNORE(x) __pragma(warning(push)) \ + __pragma(warning(disable : x)) +# define MSVC_DIAG_RESTORE __pragma(warning(pop)) +#else +# define MSVC_DIAG_IGNORE(x) +# define MSVC_DIAG_RESTORE +#endif +#define MSVC_DIAG_IGNORE_DECL(x) MSVC_DIAG_IGNORE(x) dNOOP +#define MSVC_DIAG_RESTORE_DECL MSVC_DIAG_RESTORE dNOOP +#define MSVC_DIAG_IGNORE_STMT(x) MSVC_DIAG_IGNORE(x) NOOP +#define MSVC_DIAG_RESTORE_STMT MSVC_DIAG_RESTORE NOOP + #define NOOP /*EMPTY*/(void)0 #define dNOOP struct Perl___notused_struct @@ -610,16 +623,24 @@ # define TAINT_WARN_get 0 # define TAINT_WARN_set(s) NOOP #else + /* Set to tainted if we are running under tainting mode */ # define TAINT (PL_tainted = PL_tainting) -# define TAINT_NOT (PL_tainted = FALSE) -# define TAINT_IF(c) if (UNLIKELY(c)) { PL_tainted = PL_tainting; } + +# define TAINT_NOT (PL_tainted = FALSE) /* Untaint */ +# define TAINT_IF(c) if (UNLIKELY(c)) { TAINT; } /* Conditionally taint */ # define TAINT_ENV() if (UNLIKELY(PL_tainting)) { taint_env(); } -# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { taint_proper(NULL, s); } + /* croak or warn if tainting */ +# define TAINT_PROPER(s) if (UNLIKELY(PL_tainting)) { \ + taint_proper(NULL, s); \ + } # define TAINT_set(s) (PL_tainted = (s)) -# define TAINT_get (PL_tainted) -# define TAINTING_get (PL_tainting) +# define TAINT_get (PL_tainted) /* Is something tainted? */ +# define TAINTING_get (PL_tainting) /* Is taint checking enabled? */ # define TAINTING_set(s) (PL_tainting = (s)) -# define TAINT_WARN_get (PL_taint_warn) +# define TAINT_WARN_get (PL_taint_warn) /* FALSE => tainting violations + are fatal + TRUE => they're just + warnings */ # define TAINT_WARN_set(s) (PL_taint_warn = (s)) #endif @@ -683,16 +704,6 @@ #include -/* EVC 4 SDK headers includes a bad definition of MB_CUR_MAX in stdlib.h - which is included from stdarg.h. Bad definition not present in SD 2008 - SDK headers. wince.h is not yet included, so we cant fix this from there - since by then MB_CUR_MAX will be defined from stdlib.h. - cewchar.h includes a correct definition of MB_CUR_MAX and it is copied here - since cewchar.h can't be included this early */ -#if defined(UNDER_CE) && (_MSC_VER < 1300) -# define MB_CUR_MAX 1uL -#endif - # include #ifdef I_STDINT @@ -719,10 +730,33 @@ # include #endif -#if !defined(NO_LOCALE) && defined(HAS_SETLOCALE) -# define USE_LOCALE +/* If not forbidden, we enable locale handling if either 1) the POSIX 2008 + * functions are available, or 2) just the setlocale() function. This logic is + * repeated in t/loc_tools.pl and makedef.pl; The three should be kept in + * sync. */ +#if ! defined(NO_LOCALE) + +# if ! defined(NO_POSIX_2008_LOCALE) \ + && defined(HAS_NEWLOCALE) \ + && defined(HAS_USELOCALE) \ + && defined(HAS_DUPLOCALE) \ + && defined(HAS_FREELOCALE) \ + && defined(LC_ALL_MASK) + + /* For simplicity, the code is written to assume that any platform advanced + * enough to have the Posix 2008 locale functions has LC_ALL. The final + * test above makes sure that assumption is valid */ + +# define HAS_POSIX_2008_LOCALE +# define USE_LOCALE +# elif defined(HAS_SETLOCALE) +# define USE_LOCALE +# endif +#endif + +#ifdef USE_LOCALE # define HAS_SKIP_LOCALE_INIT /* Solely for XS code to test for this - capability */ + #define */ # if !defined(NO_LOCALE_COLLATE) && defined(LC_COLLATE) \ && defined(HAS_STRXFRM) # define USE_LOCALE_COLLATE @@ -757,29 +791,25 @@ # if !defined(NO_LOCALE_TELEPHONE) && defined(LC_TELEPHONE) # define USE_LOCALE_TELEPHONE # endif -#endif /* !NO_LOCALE && HAS_SETLOCALE */ /* XXX The next few defines are unfortunately duplicated in makedef.pl, and * changes here MUST also be made there */ -#ifdef USE_LOCALE /* These locale things are all subject to change */ -# if defined(HAS_NEWLOCALE) \ - && defined(LC_ALL_MASK) \ - && defined(HAS_FREELOCALE) \ - && defined(HAS_USELOCALE) \ - && ! defined(NO_POSIX_2008_LOCALE) - - /* For simplicity, the code is written to assume that any platform advanced - * enough to have the Posix 2008 locale functions has LC_ALL. The test - * above makes sure that assumption is valid */ - -# define HAS_POSIX_2008_LOCALE -# endif -# if defined(USE_ITHREADS) \ - && ( defined(HAS_POSIX_2008_LOCALE) \ - || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \ - && ! defined(NO_THREAD_SAFE_LOCALE) -# define USE_THREAD_SAFE_LOCALE +# if ! defined(HAS_SETLOCALE) && defined(HAS_POSIX_2008_LOCALE) +# define USE_POSIX_2008_LOCALE +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif + /* If compiled with + * -DUSE_THREAD_SAFE_LOCALE, will do so even + * on unthreaded builds */ +# elif (defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE)) \ + && ( defined(HAS_POSIX_2008_LOCALE) \ + || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \ + && ! defined(NO_THREAD_SAFE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE +# define USE_THREAD_SAFE_LOCALE +# endif # ifdef HAS_POSIX_2008_LOCALE # define USE_POSIX_2008_LOCALE # endif @@ -1050,11 +1080,20 @@ EXTERN_C int usleep(unsigned int); # define STRUCT_OFFSET(s,m) offsetof(s,m) #endif -/* ptrdiff_t is C11, so undef it under pedantic builds */ +/* ptrdiff_t is C11, so undef it under pedantic builds. (Actually it is + * in C89, but apparently there are platforms where it doesn't exist. See + * thread beginning at http://nntp.perl.org/group/perl.perl5.porters/251541.) + * */ #ifdef PERL_GCC_PEDANTIC # undef HAS_PTRDIFF_T #endif +#ifdef HAS_PTRDIFF_T +# define Ptrdiff_t ptrdiff_t +#else +# define Ptrdiff_t SSize_t +#endif + #ifndef __SYMBIAN32__ # include #endif @@ -1079,7 +1118,7 @@ EXTERN_C int usleep(unsigned int); # define saferealloc Perl_realloc # define safefree Perl_mfree # define CHECK_MALLOC_TOO_LATE_FOR_(code) STMT_START { \ - if (!PL_tainting && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ + if (!TAINTING_get && MallocCfg_ptr[MallocCfg_cfg_env_read]) \ code; \ } STMT_END # define CHECK_MALLOC_TOO_LATE_FOR(ch) \ @@ -1225,6 +1264,38 @@ EXTERN_C char *crypt(const char *, const char *); EXTERN_C char *crypt(const char *, const char *); #endif +/* +=head1 Errno + +=for apidoc m|void|SETERRNO|int errcode|int vmserrcode + +Set C, and on VMS set C. + +=for apidoc mn|void|dSAVEDERRNO + +Declare variables needed to save C and any operating system +specific error number. + +=for apidoc mn|void|dSAVE_ERRNO + +Declare variables needed to save C and any operating system +specific error number, and save them for optional later restoration +by C. + +=for apidoc mn|void|SAVE_ERRNO + +Save C and any operating system specific error number for +optional later restoration by C. Requires +C or C in scope. + +=for apidoc mn|void|RESTORE_ERRNO + +Restore C and any operating system specific error number that +was saved by C or C. + +=cut +*/ + #ifdef SETERRNO # undef SETERRNO /* SOCKS might have defined this */ #endif @@ -1296,6 +1367,29 @@ EXTERN_C char *crypt(const char *, const char *); # define RESTORE_ERRNO (errno = saved_errno) #endif +/* +=head1 Warning and Dieing + +=for apidoc Amn|SV *|ERRSV + +Returns the SV for C<$@>, creating it if needed. + +=for apidoc Am|void|CLEAR_ERRSV + +Clear the contents of C<$@>, setting it to the empty string. + +This replaces any read-only SV with a fresh SV and removes any magic. + +=for apidoc Am|void|SANE_ERRSV + +Clean up ERRSV so we can safely set it. + +This replaces any read-only SV with a fresh writable copy and removes +any magic. + +=cut +*/ + #define ERRSV GvSVn(PL_errgv) /* contains inlined gv_add_by_type */ @@ -1316,6 +1410,23 @@ EXTERN_C char *crypt(const char *, const char *); } \ } STMT_END +/* contains inlined gv_add_by_type */ +#define SANE_ERRSV() STMT_START { \ + SV ** const svp = &GvSV(PL_errgv); \ + if (!*svp) { \ + *svp = newSVpvs(""); \ + } else if (SvREADONLY(*svp)) { \ + SV *dupsv = newSVsv(*svp); \ + SvREFCNT_dec_NN(*svp); \ + *svp = dupsv; \ + } else { \ + SV *const errsv = *svp; \ + if (SvMAGICAL(errsv)) { \ + mg_free(errsv); \ + } \ + } \ + } STMT_END + #ifdef PERL_CORE # define DEFSV (0 + GvSVn(PL_defgv)) @@ -1541,9 +1652,17 @@ EXTERN_C char *crypt(const char *, const char *); /* This used to be conditionally defined based on whether we had a sprintf() * that correctly returns the string length (as required by C89), but we no * longer need that. XS modules can (and do) use this name, so it must remain - * a part of the API that's visible to modules. But we no longer document it - * either (because using sprintf() rather than snprintf() is almost always - * a bad idea). */ + * a part of the API that's visible to modules. + +=head1 Miscellaneous Functions + +=for apidoc ATmD|int|my_sprintf|NN char *buffer|NN const char *pat|... + +Do NOT use this due to the possibility of overflowing C. Instead use +my_snprintf() + +=cut +*/ #define my_sprintf sprintf /* @@ -1626,8 +1745,6 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCAT # define my_strlcat strlcat -#else -# define my_strlcat Perl_my_strlcat #endif #if defined(PERL_CORE) || defined(PERL_EXT) @@ -1640,14 +1757,10 @@ EXTERN_C char *crypt(const char *, const char *); #ifdef HAS_STRLCPY # define my_strlcpy strlcpy -#else -# define my_strlcpy Perl_my_strlcpy #endif #ifdef HAS_STRNLEN # define my_strnlen strnlen -#else -# define my_strnlen Perl_my_strnlen #endif /* @@ -1943,6 +2056,7 @@ extern long double Perl_my_frexpl(long double x, int *e); # define Perl_fmod fmodq # define Perl_log logq # define Perl_log10 log10q +# define Perl_signbit signbitq # define Perl_pow powq # define Perl_sin sinq # define Perl_sinh sinhq @@ -2190,7 +2304,7 @@ extern long double Perl_my_frexpl(long double x, int *e); #endif /* Win32: _fpclass(), _isnan(), _finite(). */ -#ifdef WIN32 +#ifdef _MSC_VER # ifndef Perl_isnan # define Perl_isnan(x) _isnan(x) # endif @@ -2248,10 +2362,6 @@ extern long double Perl_my_frexpl(long double x, int *e); (Perl_fp_class_pdenorm(x) || Perl_fp_class_ndenorm(x)) #endif -#ifdef UNDER_CE -int isnan(double d); -#endif - #ifndef Perl_isnan # ifdef Perl_fp_class_nan # define Perl_isnan(x) Perl_fp_class_nan(x) @@ -2372,6 +2482,58 @@ int isnan(double d); # define PERL_QUAD_MIN (-PERL_QUAD_MAX - ((3 & -1) == 3)) #endif +/* +=head1 Numeric functions + +=for apidoc AmnUh||PERL_INT_MIN +=for apidoc AmnUh||PERL_LONG_MAX +=for apidoc AmnUh||PERL_LONG_MIN +=for apidoc AmnUh||PERL_QUAD_MAX +=for apidoc AmnUh||PERL_SHORT_MAX +=for apidoc AmnUh||PERL_SHORT_MIN +=for apidoc AmnUh||PERL_UCHAR_MAX +=for apidoc AmnUh||PERL_UCHAR_MIN +=for apidoc AmnUh||PERL_UINT_MAX +=for apidoc AmnUh||PERL_ULONG_MAX +=for apidoc AmnUh||PERL_ULONG_MIN +=for apidoc AmnUh||PERL_UQUAD_MAX +=for apidoc AmnUh||PERL_UQUAD_MIN +=for apidoc AmnUh||PERL_USHORT_MAX +=for apidoc AmnUh||PERL_USHORT_MIN +=for apidoc AmnUh||PERL_QUAD_MIN +=for apidoc AmnU||PERL_INT_MAX +This and +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C, +C +give the largest and smallest number representable in the current +platform in variables of the corresponding types. + +For signed types, the smallest representable number is the most negative +number, the one furthest away from zero. + +For C99 and later compilers, these correspond to things like C, which +are available to the C code. But these constants, furnished by Perl, +allow code compiled on earlier compilers to portably have access to the same +constants. + +=cut + +*/ + typedef MEM_SIZE STRLEN; typedef struct op OP; @@ -3404,8 +3566,25 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) #else # define EXPECT(expr,val) (expr) #endif + +/* +=head1 Miscellaneous Functions + +=for apidoc AmU|bool|LIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be true. + +=for apidoc AmU|bool|UNLIKELY|const bool expr + +Returns the input unchanged, but at the same time it gives a branch prediction +hint to the compiler that this condition is likely to be false. + +=cut +*/ #define LIKELY(cond) EXPECT(cBOOL(cond),TRUE) #define UNLIKELY(cond) EXPECT(cBOOL(cond),FALSE) + #ifdef HAS_BUILTIN_CHOOSE_EXPR /* placeholder */ #endif @@ -3479,9 +3658,9 @@ EXTERN_C int perl_tsa_mutex_unlock(perl_mutex* mutex) # define NOT_REACHED #elif defined(DEBUGGING) && (__has_builtin(__builtin_unreachable) \ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5 || __GNUC__ > 4)) /* 4.5 -> */ -# define NOT_REACHED STMT_START { ASSUME(0); __builtin_unreachable(); } STMT_END +# define NOT_REACHED STMT_START { ASSUME(!"UNREACHABLE"); __builtin_unreachable(); } STMT_END #else -# define NOT_REACHED ASSUME(0) +# define NOT_REACHED ASSUME(!"UNREACHABLE") #endif /* Some unistd.h's give a prototype for pause() even though @@ -3672,11 +3851,10 @@ typedef struct magic_state MGS; /* struct magic_state defined in mg.c */ struct scan_data_t; typedef struct regnode_charclass regnode_charclass; -struct regnode_charclass_class; - /* A hopefully less confusing name. The sub-classes are all Posix classes only * used under /l matching */ -typedef struct regnode_charclass_class regnode_charclass_posixl; +typedef struct regnode_charclass_posixl regnode_charclass_class; +typedef struct regnode_charclass_posixl regnode_charclass_posixl; typedef struct regnode_ssc regnode_ssc; typedef struct RExC_state_t RExC_state_t; @@ -3838,7 +4016,9 @@ my_swap16(const U16 x) { #define U_L(what) U_32(what) #ifdef HAS_SIGNBIT -# define Perl_signbit signbit +# ifndef Perl_signbit +# define Perl_signbit signbit +# endif #endif /* These do not care about the fractional part, only about the range. */ @@ -3855,7 +4035,7 @@ my_swap16(const U16 x) { #endif #ifndef __cplusplus -#if !(defined(WIN32) || defined(UNDER_CE) || defined(SYMBIAN)) +#if !(defined(WIN32) || defined(SYMBIAN)) Uid_t getuid (void); Uid_t geteuid (void); Gid_t getgid (void); @@ -4009,11 +4189,11 @@ Gid_t getegid (void); # define DEBUG_f(a) DEBUG__(DEBUG_f_TEST, a) -#ifndef PERL_EXT_RE_BUILD -# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) -#else -# define DEBUG_r(a) STMT_START {a;} STMT_END -#endif /* PERL_EXT_RE_BUILD */ +# ifndef PERL_EXT_RE_BUILD +# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a) +# else +# define DEBUG_r(a) STMT_START {a;} STMT_END +# endif /* PERL_EXT_RE_BUILD */ # define DEBUG_x(a) DEBUG__(DEBUG_x_TEST, a) # define DEBUG_u(a) DEBUG__(DEBUG_u_TEST, a) @@ -4037,7 +4217,7 @@ Gid_t getegid (void); # define DEBUG_L(a) DEBUG__(DEBUG_L_TEST, a) # define DEBUG_i(a) DEBUG__(DEBUG_i_TEST, a) -#else /* DEBUGGING */ +#else /* ! DEBUGGING below */ # define DEBUG_p_TEST (0) # define DEBUG_s_TEST (0) @@ -4438,6 +4618,11 @@ EXTCONST char PL_Zero[] EXTCONST char PL_hexdigit[] INIT("0123456789abcdef0123456789ABCDEF"); +EXTCONST STRLEN PL_WARN_ALL + INIT(0); +EXTCONST STRLEN PL_WARN_NONE + INIT(0); + /* This is constant on most architectures, a global on OS/2 */ #ifndef OS2 EXTCONST char PL_sh_path[] @@ -4674,7 +4859,7 @@ EXTCONST unsigned char PL_latin1_lc[]; #ifndef PERL_GLOBAL_STRUCT /* or perlvars.h */ #ifdef DOINIT -EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256] = { /* Unfortunately not EXTCONST. */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -4709,7 +4894,7 @@ EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */ 248, 249, 250, 251, 252, 253, 254, 255 }; #else -EXT unsigned char PL_fold_locale[]; /* Unfortunately not EXTCONST. */ +EXT unsigned char PL_fold_locale[256]; /* Unfortunately not EXTCONST. */ #endif #endif /* !PERL_GLOBAL_STRUCT */ @@ -5518,7 +5703,7 @@ static U8 utf8d_C9[] = { * FF 1 */ -EXTCONST U8 perl_extended_utf8_dfa_tab[] = { +EXTCONST U8 PL_extended_utf8_dfa_tab[] = { /* The first part of the table maps bytes to character classes to reduce * the size of the transition table and create bitmasks. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ @@ -5596,9 +5781,256 @@ EXTCONST U8 perl_extended_utf8_dfa_tab[] = { /*N10*/ 1, 1, 1, 1, 1, 1, 1, 1,N5,N5,N5,N5,N5, 1, 1, 1, 1, 1, }; +/* And below is a version of the above table that accepts only strict UTF-8. + * Hence no surrogates nor non-characters, nor non-Unicode. Thus, if the input + * passes this dfa, it will be for a well-formed, non-problematic code point + * that can be returned immediately. + * + * The "Implementation details" portion of + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ shows how + * the first portion of the table maps each possible byte into a character + * class. And that the classes for those bytes which are start bytes have been + * carefully chosen so they serve as well to be used as a shift value to mask + * off the leading 1 bits of the start byte. Unfortunately the addition of + * being able to distinguish non-characters makes this not fully work. This is + * because, now, the start bytes E1-EF have to be broken into 3 classes instead + * of 2: + * 1) ED because it could be a surrogate + * 2) EF because it could be a non-character + * 3) the rest, which can never evaluate to a problematic code point. + * + * Each of E1-EF has three leading 1 bits, then a 0. That means we could use a + * shift (and hence class number) of either 3 or 4 to get a mask that works. + * But that only allows two categories, and we need three. khw made the + * decision to therefore treat the ED start byte as an error, so that the dfa + * drops out immediately for that. In the dfa, classes 3 and 4 are used to + * distinguish EF vs the rest. Then special code is used to deal with ED, + * that's executed only when the dfa drops out. The code points started by ED + * are half surrogates, and half hangul syllables. This means that 2048 of the + * the hangul syllables (about 18%) take longer than all other non-problematic + * code points to handle. + * + * The changes to handle non-characters requires the addition of states and + * classes to the dfa. (See the section on "Mapping bytes to character + * classes" in the linked-to document for further explanation of the original + * dfa.) + * + * The classes are + * 00-7F 0 + * 80-8E 9 + * 8F 10 + * 90-9E 11 + * 9F 12 + * A0-AE 13 + * AF 14 + * B0-B6 15 + * B7 16 + * B8-BD 15 + * BE 17 + * BF 18 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 1 + * EE 3 + * EF 4 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_strict_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,10, /*80-8F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12, /*90-9F*/ + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, /*A0-AF*/ + 15,15,15,15,15,15,15,16,15,15,15,15,15,15,17,18, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 4, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N4 Start byte is EF. Continuation byte B7 transitions to N8; BF to N9; + * the other continuations transitions to N1 + * N5 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * [9AB]F transition to N10; the other continuations to N2. + * N6 Start byte is F[123]. Continuation bytes [89AB]F transition + * to N10; the other continuations to N2. + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); 8F transitions to N10; the other continuations to N2 + * N8 Initial sequence is EF B7. Continuation bytes 90-AF are illegal + * (non-characters); the other continuations transition to N0. + * N9 Initial sequence is EF BF. Continuation bytes BE and BF are illegal + * (non-characters); the other continuations transition to N0. + * N10 Initial sequence is one of: F0 [9-B]F; F[123] [8-B]F; or F4 8F. + * Continuation byte BF transitions to N11; the other continuations to + * N1 + * N11 Initial sequence is the two bytes given in N10 followed by BF. + * Continuation bytes BE and BF are illegal (non-characters); the other + * continuations transition to N0. + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef N8 +# undef N9 +# undef NUM_CLASSES +# define NUM_CLASSES 19 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) +# define N8 ((N7) + NUM_CLASSES) +# define N9 ((N8) + NUM_CLASSES) +# define N10 ((N9) + NUM_CLASSES) +# define N11 ((N10) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 */ +/*N0*/ 0, 1, N1, N2, N4, N7, N6, N3, N5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1, N1, + +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N8, N1, N9, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2, N2, N2,N10, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2,N10, N2, N2, N2,N10, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, 1, 1, 1, 1, 1, 1, 1, 1, +/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1,N11, +/*N11*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +}; + +/* And below is yet another version of the above tables that accepts only UTF-8 + * as defined by Corregidum #9. Hence no surrogates nor non-Unicode, but + * it allows non-characters. This is isomorphic to the original table + * in http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * + * The classes are + * 00-7F 0 + * 80-8F 9 + * 90-9F 10 + * A0-BF 11 + * C0,C1 1 + * C2-DF 2 + * E0 7 + * E1-EC 3 + * ED 4 + * EE-EF 3 + * F0 8 + * F1-F3 6 (6 bits can be stripped) + * F4 5 (only 5 can be stripped) + * F5-FF 1 + */ + +EXTCONST U8 PL_c9_utf8_dfa_tab[] = { + /* The first part of the table maps bytes to character classes to reduce + * the size of the transition table and create bitmasks. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /*80-8F*/ + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /*90-9F*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*A0-AF*/ + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*B0-BF*/ + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/ + 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, /*E0-EF*/ + 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/ + +/* The second part is a transition table that maps a combination + * of a state of the automaton and a character class to a new state, called a + * node. The nodes are: + * N0 The initial state, and final accepting one. + * N1 Any one continuation byte (80-BF) left. This is transitioned to + * immediately when the start byte indicates a two-byte sequence + * N2 Any two continuation bytes left. + * N3 Any three continuation bytes left. + * N4 Start byte is E0. Continuation bytes 80-9F are illegal (overlong); + * the other continuations transition to state N1 + * N5 Start byte is ED. Continuation bytes A0-BF all lead to surrogates, + * so are illegal. The other continuations transition to state N1. + * N6 Start byte is F0. Continuation bytes 80-8F are illegal (overlong); + * the other continuations transition to N2 + * N7 Start byte is F4. Continuation bytes 90-BF are illegal + * (non-unicode); the other continuations transition to N2 + * 1 Reject. All transitions not mentioned above (except the single + * byte ones (as they are always legal) are to this state. + */ + +# undef N0 +# undef N1 +# undef N2 +# undef N3 +# undef N4 +# undef N5 +# undef N6 +# undef N7 +# undef NUM_CLASSES +# define NUM_CLASSES 12 +# define N0 0 +# define N1 ((N0) + NUM_CLASSES) +# define N2 ((N1) + NUM_CLASSES) +# define N3 ((N2) + NUM_CLASSES) +# define N4 ((N3) + NUM_CLASSES) +# define N5 ((N4) + NUM_CLASSES) +# define N6 ((N5) + NUM_CLASSES) +# define N7 ((N6) + NUM_CLASSES) + +/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 */ +/*N0*/ 0, 1, N1, N2, N5, N7, N3, N4, N6, 1, 1, 1, +/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, +/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, +/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, N2, + +/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, +/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, 1, +/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, +/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, 1, 1, +}; + # else /* End of is DOINIT */ -EXTCONST U8 perl_extended_utf8_dfa_tab[]; +EXTCONST U8 PL_extended_utf8_dfa_tab[]; +EXTCONST U8 PL_strict_utf8_dfa_tab[]; +EXTCONST U8 PL_c9_utf8_dfa_tab[]; # endif #endif /* end of isn't EBCDIC */ @@ -5723,11 +6155,19 @@ typedef struct am_table_short AMTS; # define KEYWORD_PLUGIN_MUTEX_LOCK MUTEX_LOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_keyword_plugin_mutex) # define KEYWORD_PLUGIN_MUTEX_TERM MUTEX_DESTROY(&PL_keyword_plugin_mutex) +# define USER_PROP_MUTEX_INIT MUTEX_INIT(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_LOCK MUTEX_LOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_user_prop_mutex) +# define USER_PROP_MUTEX_TERM MUTEX_DESTROY(&PL_user_prop_mutex) #else # define KEYWORD_PLUGIN_MUTEX_INIT NOOP # define KEYWORD_PLUGIN_MUTEX_LOCK NOOP # define KEYWORD_PLUGIN_MUTEX_UNLOCK NOOP # define KEYWORD_PLUGIN_MUTEX_TERM NOOP +# define USER_PROP_MUTEX_INIT NOOP +# define USER_PROP_MUTEX_LOCK NOOP +# define USER_PROP_MUTEX_UNLOCK NOOP +# define USER_PROP_MUTEX_TERM NOOP #endif #ifdef USE_LOCALE /* These locale things are all subject to change */ @@ -5745,6 +6185,27 @@ typedef struct am_table_short AMTS; # define IN_SOME_LOCALE_FORM_COMPILETIME \ cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL)) +/* +=head1 Locale-related functions and macros + +=for apidoc Amn|bool|IN_LOCALE + +Evaluates to TRUE if the plain locale pragma without a parameter (S>) is in effect. + +=for apidoc Amn|bool|IN_LOCALE_COMPILETIME + +Evaluates to TRUE if, when compiling a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=for apidoc Amn|bool|IN_LOCALE_RUNTIME + +Evaluates to TRUE if, when executing a perl program (including an C) if +the plain locale pragma without a parameter (S>) is in effect. + +=cut +*/ + # define IN_LOCALE \ (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) # define IN_SOME_LOCALE_FORM \ @@ -5842,7 +6303,7 @@ typedef struct am_table_short AMTS; && (! defined(USE_THREAD_SAFE_LOCALE) || defined(TS_W32_BROKEN_LOCALECONV)) /* We have a locale object holding the 'C' locale for Posix 2008 */ -#ifndef USE_POSIX_2008_LOCALE +# ifndef USE_POSIX_2008_LOCALE # define _LOCALE_TERM_POSIX_2008 NOOP # else # define _LOCALE_TERM_POSIX_2008 \ @@ -5945,7 +6406,7 @@ typedef struct am_table_short AMTS; else { \ PL_lc_numeric_mutex_depth++; \ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: avoided lc_numeric_lock; depth=%d\n", \ + "%s: %d: avoided lc_numeric_lock; new depth=%d\n", \ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ if (cond_to_panic_if_already_locked) { \ Perl_croak_nocontext("panic: %s: %d: Trying to change" \ @@ -5967,7 +6428,7 @@ typedef struct am_table_short AMTS; else { \ PL_lc_numeric_mutex_depth--; \ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ - "%s: %d: avoided lc_numeric_unlock; depth=%d\n", \ + "%s: %d: avoided lc_numeric_unlock; new depth=%d\n",\ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \ } \ } STMT_END \ @@ -6066,7 +6527,15 @@ argument list, like this: On threaded perls not operating with thread-safe functionality, this macro uses a mutex to force a critical section. Therefore the matching RESTORE should be -close by, and guaranteed to be called. +close by, and guaranteed to be called; see L +for a more contained way to ensure that. + +=for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. =for apidoc Am|void|RESTORE_LC_NUMERIC @@ -6087,6 +6556,36 @@ expression, but with an empty argument list, like this: ... } +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED + +This macro invokes the supplied statement or block within the context +of a L .. L pair +if required, so eg: + + WITH_LC_NUMERIC_SET_TO_NEEDED( + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis) + ); + +is equivalent to: + + { +#ifdef USE_LOCALE_NUMERIC + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_TO_NEEDED(); +#endif + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis); +#ifdef USE_LOCALE_NUMERIC + RESTORE_LC_NUMERIC(); +#endif + } + +=for apidoc Am|void|WITH_LC_NUMERIC_SET_TO_NEEDED_IN|bool in_lc_numeric + +Same as L with in_lc_numeric provided +as the precalculated value of C. It is the caller's +responsibility to ensure that the status of C and C +cannot have changed since the precalculation. + =cut */ @@ -6114,12 +6613,13 @@ expression, but with an empty argument list, like this: # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION \ void (*_restore_LC_NUMERIC_function)(pTHX) = NULL -# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in) \ STMT_START { \ + bool _in_lc_numeric = (in); \ LC_NUMERIC_LOCK( \ - (IN_LC(LC_NUMERIC) && _NOT_IN_NUMERIC_UNDERLYING) \ - || _NOT_IN_NUMERIC_STANDARD); \ - if (IN_LC(LC_NUMERIC)) { \ + ( ( _in_lc_numeric && _NOT_IN_NUMERIC_UNDERLYING) \ + || (! _in_lc_numeric && _NOT_IN_NUMERIC_STANDARD))); \ + if (_in_lc_numeric) { \ if (_NOT_IN_NUMERIC_UNDERLYING) { \ Perl_set_numeric_underlying(aTHX); \ _restore_LC_NUMERIC_function \ @@ -6135,6 +6635,9 @@ expression, but with an empty argument list, like this: } \ } STMT_END +# define STORE_LC_NUMERIC_SET_TO_NEEDED() \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC)) + # define RESTORE_LC_NUMERIC() \ STMT_START { \ if (_restore_LC_NUMERIC_function) { \ @@ -6209,6 +6712,17 @@ expression, but with an empty argument list, like this: __FILE__, __LINE__, PL_numeric_standard)); \ } STMT_END +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { \ + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; \ + STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric); \ + block; \ + RESTORE_LC_NUMERIC(); \ + } STMT_END; + +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + WITH_LC_NUMERIC_SET_TO_NEEDED_IN(IN_LC(LC_NUMERIC), block) + #else /* !USE_LOCALE_NUMERIC */ # define SET_NUMERIC_STANDARD() @@ -6217,25 +6731,53 @@ expression, but with an empty argument list, like this: # define DECLARATION_FOR_LC_NUMERIC_MANIPULATION # define STORE_LC_NUMERIC_SET_STANDARD() # define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() +# define STORE_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric) # define STORE_LC_NUMERIC_SET_TO_NEEDED() # define RESTORE_LC_NUMERIC() # define LOCK_LC_NUMERIC_STANDARD() # define UNLOCK_LC_NUMERIC_STANDARD() +# define WITH_LC_NUMERIC_SET_TO_NEEDED_IN(in_lc_numeric, block) \ + STMT_START { block; } STMT_END +# define WITH_LC_NUMERIC_SET_TO_NEEDED(block) \ + STMT_START { block; } STMT_END #endif /* !USE_LOCALE_NUMERIC */ #define Atof my_atof -#ifdef USE_QUADMATH -# define Perl_strtod(s, e) strtoflt128(s, e) -#elif defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) -# if defined(HAS_STRTOLD) -# define Perl_strtod(s, e) strtold(s, e) -# elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) (NV)strtod(s, e) /* Unavoidable loss. */ -# endif -#elif defined(HAS_STRTOD) -# define Perl_strtod(s, e) strtod(s, e) +/* + +=head1 Numeric functions + +=for apidoc AmTR|NV|Strtod|NN const char * const s|NULLOK char ** e + +This is a synonym for L. + +=for apidoc AmTR|NV|Strtol|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=for apidoc AmTR|NV|Strtoul|NN const char * const s|NULLOK char ** e|int base + +Platform and configuration independent C. This expands to the +appropriate C-like function based on the platform and F +options>. For example it could expand to C or C instead of +C. + +=cut + +*/ + +#define Strtod my_strtod + +#if defined(HAS_STRTOD) \ + || defined(USE_QUADMATH) \ + || (defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ + && defined(USE_LONG_DOUBLE)) +# define Perl_strtod Strtod #endif #if !defined(Strtol) && defined(USE_64_BIT_INT) && defined(IV_IS_QUAD) && \ @@ -6530,6 +7072,15 @@ int flock(int fd, int op); #define IS_NUMBER_NAN 0x20 /* this is not */ #define IS_NUMBER_TRAILING 0x40 /* number has trailing trash */ +/* +=head1 Numeric functions + +=for apidoc AmdR|bool|GROK_NUMERIC_RADIX|NN const char **sp|NN const char *send + +A synonym for L + +=cut +*/ #define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) /* Input flags: */ @@ -6552,10 +7103,6 @@ extern void moncontrol(int); #define PERL_GPROF_MONCONTROL(x) #endif -#ifdef UNDER_CE -#include "wince.h" -#endif - /* ISO 6429 NEL - C1 control NExt Line */ /* See http://www.unicode.org/unicode/reports/tr13/ */ #define NEXT_LINE_CHAR NEXT_LINE_NATIVE @@ -6614,8 +7161,20 @@ extern void moncontrol(int); #define PERL_SIGNALS_UNSAFE_FLAG 0x0001 -/* Use instead of abs() since abs() forces its argument to be an int, - * but also beware since this evaluates its argument twice, so no x++. */ +/* +=head1 Numeric functions + +=for apidoc Am|int|PERL_ABS|int + +Typeless C or C, I. (The usage below indicates it is for +integers, but it works for any type.) Use instead of these, since the C +library ones force their argument to be what it is expecting, potentially +leading to disaster. But also beware that this evaluates its argument twice, +so no C. + +=cut +*/ + #define PERL_ABS(x) ((x) < 0 ? -(x) : (x)) #if defined(__DECC) && defined(__osf__)