/* See L<perlguts/"The Perl API"> for detailed notes on
* PERL_IMPLICIT_CONTEXT and PERL_IMPLICIT_SYS */
-/* Note that from here --> to <-- the same logic is
+/* XXX NOTE that from here --> to <-- the same logic is
* repeated in makedef.pl, so be certain to update
* both places when editing. */
#endif
/* Use the reentrant APIs like localtime_r and getpwent_r */
-/* Win32 has naturally threadsafe libraries, no need to use any _r variants. */
+/* Win32 has naturally threadsafe libraries, no need to use any _r variants.
+ * XXX KEEP makedef.pl copy of this code in sync */
#if defined(USE_ITHREADS) && !defined(USE_REENTRANT_API) && !defined(NETWARE) && !defined(WIN32)
# define USE_REENTRANT_API
#endif
Perl_pregfree(aTHX_ (prog))
#define CALLREGFREE_PVT(prog) \
- if(prog) RX_ENGINE(prog)->rxfree(aTHX_ (prog))
+ if(prog && RX_ENGINE(prog)) RX_ENGINE(prog)->rxfree(aTHX_ (prog))
#define CALLREG_NUMBUF_FETCH(rx,paren,usesv) \
RX_ENGINE(rx)->numbered_buff_FETCH(aTHX_ (rx),(paren),(usesv))
# include <xlocale.h>
#endif
-#if !defined(NO_LOCALE) && defined(HAS_SETLOCALE)
-# define USE_LOCALE
+/* If not forbidden, we enable locale handling if either 1) the POSIX 2008
+ * functions are available, or 2) just the setlocale() function. This logic is
+ * repeated in t/loc_tools.pl and makedef.pl; The three should be kept in
+ * sync. */
+#if ! defined(NO_LOCALE)
+
+# if ! defined(NO_POSIX_2008_LOCALE) \
+ && defined(HAS_NEWLOCALE) \
+ && defined(HAS_USELOCALE) \
+ && defined(HAS_DUPLOCALE) \
+ && defined(HAS_FREELOCALE) \
+ && defined(LC_ALL_MASK)
+
+ /* For simplicity, the code is written to assume that any platform advanced
+ * enough to have the Posix 2008 locale functions has LC_ALL. The final
+ * test above makes sure that assumption is valid */
+
+# define HAS_POSIX_2008_LOCALE
+# define USE_LOCALE
+# elif defined(HAS_SETLOCALE)
+# define USE_LOCALE
+# endif
+#endif
+
+#ifdef USE_LOCALE
# define HAS_SKIP_LOCALE_INIT /* Solely for XS code to test for this
- capability */
+ #define */
# if !defined(NO_LOCALE_COLLATE) && defined(LC_COLLATE) \
&& defined(HAS_STRXFRM)
# define USE_LOCALE_COLLATE
# if !defined(NO_LOCALE_TELEPHONE) && defined(LC_TELEPHONE)
# define USE_LOCALE_TELEPHONE
# endif
-#endif /* !NO_LOCALE && HAS_SETLOCALE */
-
-#ifdef USE_LOCALE /* These locale things are all subject to change */
-# if defined(HAS_NEWLOCALE) \
- && defined(LC_ALL_MASK) \
- && defined(HAS_FREELOCALE) \
- && defined(HAS_USELOCALE) \
- && ! defined(NO_POSIX_2008_LOCALE)
- /* For simplicity, the code is written to assume that any platform advanced
- * enough to have the Posix 2008 locale functions has LC_ALL. The test
- * above makes sure that assumption is valid */
+/* XXX The next few defines are unfortunately duplicated in makedef.pl, and
+ * changes here MUST also be made there */
-# define HAS_POSIX_2008_LOCALE
-# endif
-# if defined(USE_ITHREADS) \
- && ( defined(HAS_POSIX_2008_LOCALE) \
- || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \
- && ! defined(NO_THREAD_SAFE_LOCALE)
-# define USE_THREAD_SAFE_LOCALE
+# if ! defined(HAS_SETLOCALE) && defined(HAS_POSIX_2008_LOCALE)
+# define USE_POSIX_2008_LOCALE
+# ifndef USE_THREAD_SAFE_LOCALE
+# define USE_THREAD_SAFE_LOCALE
+# endif
+ /* If compiled with
+ * -DUSE_THREAD_SAFE_LOCALE, will do so even
+ * on unthreaded builds */
+# elif (defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE)) \
+ && ( defined(HAS_POSIX_2008_LOCALE) \
+ || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \
+ && ! defined(NO_THREAD_SAFE_LOCALE)
+# ifndef USE_THREAD_SAFE_LOCALE
+# define USE_THREAD_SAFE_LOCALE
+# endif
# ifdef HAS_POSIX_2008_LOCALE
# define USE_POSIX_2008_LOCALE
# endif
# endif
#endif
+/* Microsoft documentation reads in the change log for VS 2015:
+ * "The localeconv function declared in locale.h now works correctly when
+ * per-thread locale is enabled. In previous versions of the library, this
+ * function would return the lconv data for the global locale, not the
+ * thread's locale."
+ */
+#if defined(WIN32) && defined(USE_THREAD_SAFE_LOCALE) && _MSC_VER < 1900
+# define TS_W32_BROKEN_LOCALECONV
+#endif
+
#include <setjmp.h>
#ifdef I_SYS_PARAM
# define PERL_STRLEN_EXPAND_SHIFT 2
#endif
-#include <stddef.h>
-#define STRUCT_OFFSET(s,m) offsetof(s,m)
+/* This use of offsetof() requires /Zc:offsetof- for VS2017 (and presumably
+ * onwards) when building Socket.xs, but we can just use a different definition
+ * for STRUCT_OFFSET instead. */
+#if defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1910
+# define STRUCT_OFFSET(s,m) (Size_t)(&(((s *)0)->m))
+#else
+# include <stddef.h>
+# define STRUCT_OFFSET(s,m) offsetof(s,m)
+#endif
-/* ptrdiff_t is C11, so undef it under pedantic builds */
+/* ptrdiff_t is C11, so undef it under pedantic builds. (Actually it is
+ * in C89, but apparently there are platforms where it doesn't exist. See
+ * thread beginning at http://nntp.perl.org/group/perl.perl5.porters/251541.)
+ * */
#ifdef PERL_GCC_PEDANTIC
# undef HAS_PTRDIFF_T
#endif
+#ifdef HAS_PTRDIFF_T
+# define Ptrdiff_t ptrdiff_t
+#else
+# define Ptrdiff_t SSize_t
+#endif
+
#ifndef __SYMBIAN32__
# include <string.h>
#endif
# define saferealloc Perl_realloc
# define safefree Perl_mfree
# define CHECK_MALLOC_TOO_LATE_FOR_(code) STMT_START { \
- if (!PL_tainting && MallocCfg_ptr[MallocCfg_cfg_env_read]) \
+ if (!TAINTING_get && MallocCfg_ptr[MallocCfg_cfg_env_read]) \
code; \
} STMT_END
# define CHECK_MALLOC_TOO_LATE_FOR(ch) \
# define Perl_fmod fmodq
# define Perl_log logq
# define Perl_log10 log10q
+# define Perl_signbit signbitq
# define Perl_pow powq
# define Perl_sin sinq
# define Perl_sinh sinhq
#endif
/* Win32: _fpclass(), _isnan(), _finite(). */
-#ifdef WIN32
+#ifdef _MSC_VER
# ifndef Perl_isnan
# define Perl_isnan(x) _isnan(x)
# endif
#ifdef USE_PERL_ATOF
# define Perl_atof(s) Perl_my_atof(s)
-# define Perl_atof2(s, n) Perl_my_atof2(aTHX_ (s), &(n))
+# define Perl_atof2(s, n) Perl_my_atof3(aTHX_ (s), &(n), 0)
#else
# define Perl_atof(s) (NV)atof(s)
# define Perl_atof2(s, n) ((n) = atof(s))
#endif
+#define my_atof2(a,b) my_atof3(a,b,0)
/*
* CHAR_MIN and CHAR_MAX are not included here, as the (char) type may be
struct scan_data_t;
typedef struct regnode_charclass regnode_charclass;
-struct regnode_charclass_class;
-
/* A hopefully less confusing name. The sub-classes are all Posix classes only
* used under /l matching */
-typedef struct regnode_charclass_class regnode_charclass_posixl;
+typedef struct regnode_charclass_posixl regnode_charclass_class;
+typedef struct regnode_charclass_posixl regnode_charclass_posixl;
typedef struct regnode_ssc regnode_ssc;
typedef struct RExC_state_t RExC_state_t;
#define U_L(what) U_32(what)
#ifdef HAS_SIGNBIT
-# define Perl_signbit signbit
+# ifndef Perl_signbit
+# define Perl_signbit signbit
+# endif
#endif
/* These do not care about the fractional part, only about the range. */
# define DEBUG_f(a) DEBUG__(DEBUG_f_TEST, a)
-#ifndef PERL_EXT_RE_BUILD
-# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a)
-#else
-# define DEBUG_r(a) STMT_START {a;} STMT_END
-#endif /* PERL_EXT_RE_BUILD */
+# ifndef PERL_EXT_RE_BUILD
+# define DEBUG_r(a) DEBUG__(DEBUG_r_TEST, a)
+# else
+# define DEBUG_r(a) STMT_START {a;} STMT_END
+# endif /* PERL_EXT_RE_BUILD */
# define DEBUG_x(a) DEBUG__(DEBUG_x_TEST, a)
# define DEBUG_u(a) DEBUG__(DEBUG_u_TEST, a)
# define DEBUG_L(a) DEBUG__(DEBUG_L_TEST, a)
# define DEBUG_i(a) DEBUG__(DEBUG_i_TEST, a)
-#else /* DEBUGGING */
+#else /* ! DEBUGGING below */
# define DEBUG_p_TEST (0)
# define DEBUG_s_TEST (0)
EXTCONST char PL_hexdigit[]
INIT("0123456789abcdef0123456789ABCDEF");
+EXTCONST STRLEN PL_WARN_ALL
+ INIT(0);
+EXTCONST STRLEN PL_WARN_NONE
+ INIT(0);
+
/* This is constant on most architectures, a global on OS/2 */
#ifndef OS2
EXTCONST char PL_sh_path[]
|| (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
&& UNICODE_DOT_DOT_VERSION >= 8)
255 /*sharp s*/,
-#else /* uc() is itself in early unicode */
+#else /* uc(sharp s) is 'sharp s' itself in early unicode */
223,
#endif
224-32, 225-32, 226-32, 227-32, 228-32, 229-32, 230-32, 231-32,
#ifndef PERL_GLOBAL_STRUCT /* or perlvars.h */
#ifdef DOINIT
-EXT unsigned char PL_fold_locale[] = { /* Unfortunately not EXTCONST. */
+EXT unsigned char PL_fold_locale[256] = { /* Unfortunately not EXTCONST. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
248, 249, 250, 251, 252, 253, 254, 255
};
#else
-EXT unsigned char PL_fold_locale[]; /* Unfortunately not EXTCONST. */
+EXT unsigned char PL_fold_locale[256]; /* Unfortunately not EXTCONST. */
#endif
#endif /* !PERL_GLOBAL_STRUCT */
XREF,
XSTATE,
XBLOCK,
- XATTRBLOCK,
- XATTRTERM,
+ XATTRBLOCK, /* next token should be an attribute or block */
+ XATTRTERM, /* next token should be an attribute, or block in a term */
XTERMBLOCK,
XBLOCKTERM,
XPOSTDEREF,
* With the U8_NV version you will want to have inner braces,
* while with the NV_U8 use just the NV. */
-#ifdef __cplusplus
-#define INFNAN_U8_NV_DECL EXTERN_C const union { U8 u8[NVSIZE]; NV nv; }
-#define INFNAN_NV_U8_DECL EXTERN_C const union { NV nv; U8 u8[NVSIZE]; }
-#else
#define INFNAN_U8_NV_DECL EXTCONST union { U8 u8[NVSIZE]; NV nv; }
#define INFNAN_NV_U8_DECL EXTCONST union { NV nv; U8 u8[NVSIZE]; }
-#endif
/* if these never got defined, they need defaults */
#ifndef PERL_SET_CONTEXT
# define PERL_SET_THX(t) NOOP
#endif
+#ifndef EBCDIC
+
+/* The tables below are adapted from
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which requires this copyright
+ * notice:
+
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*/
+
+# ifdef DOINIT
+# if 0 /* This is the original table given in
+ http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */
+static U8 utf8d_C9[] = {
+ /* The first part of the table maps bytes to character classes that
+ * to reduce the size of the transition table and create bitmasks. */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-1F*/
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-3F*/
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-5F*/
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-7F*/
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, /*-9F*/
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /*-BF*/
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /*-DF*/
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, /*-FF*/
+
+ /* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a state. */
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12
+};
+
+# endif
+
+/* This is a version of the above table customized for Perl that doesn't
+ * exclude surrogates and accepts start bytes up through FD (FE on 64-bit
+ * machines). The classes have been renumbered so that the patterns are more
+ * evident in the table. The class numbers for start bytes are constrained so
+ * that they can be used as a shift count for masking off the leading one bits.
+ * It would make the code simpler if start byte FF could also be handled, but
+ * doing so would mean adding nodes for each of continuation bytes 6-12
+ * remaining, and two more nodes for overlong detection (a total of 9), and
+ * there is room only for 4 more nodes unless we make the array U16 instead of
+ * U8.
+ *
+ * The classes are
+ * 00-7F 0
+ * 80-81 7 Not legal immediately after start bytes E0 F0 F8 FC
+ * FE
+ * 82-83 8 Not legal immediately after start bytes E0 F0 F8 FC
+ * 84-87 9 Not legal immediately after start bytes E0 F0 F8
+ * 88-8F 10 Not legal immediately after start bytes E0 F0
+ * 90-9F 11 Not legal immediately after start byte E0
+ * A0-BF 12
+ * C0,C1 1
+ * C2-DF 2
+ * E0 13
+ * E1-EF 3
+ * F0 14
+ * F1-F7 4
+ * F8 15
+ * F9-FB 5
+ * FC 16
+ * FD 6
+ * FE 17 (or 1 on 32-bit machines, since it overflows)
+ * FF 1
+ */
+
+EXTCONST U8 PL_extended_utf8_dfa_tab[] = {
+ /* The first part of the table maps bytes to character classes to reduce
+ * the size of the transition table and create bitmasks. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/
+ 7, 7, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10,10, /*80-8F*/
+ 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*90-9F*/
+ 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*A0-AF*/
+ 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, /*B0-BF*/
+ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/
+ 13, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /*E0-EF*/
+ 14, 4, 4, 4, 4, 4, 4, 4,15, 5, 5, 5,16, 6, /*F0-FD*/
+# ifdef UV_IS_QUAD
+ 17, /*FE*/
+# else
+ 1, /*FE*/
+# endif
+ 1, /*FF*/
+
+/* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a new state, called a
+ * node. The nodes are:
+ * N0 The initial state, and final accepting one.
+ * N1 Any one continuation byte (80-BF) left. This is transitioned to
+ * immediately when the start byte indicates a two-byte sequence
+ * N2 Any two continuation bytes left.
+ * N3 Any three continuation bytes left.
+ * N4 Any four continuation bytes left.
+ * N5 Any five continuation bytes left.
+ * N6 Start byte is E0. Continuation bytes 80-9F are illegal (overlong);
+ * the other continuations transition to N1
+ * N7 Start byte is F0. Continuation bytes 80-8F are illegal (overlong);
+ * the other continuations transition to N2
+ * N8 Start byte is F8. Continuation bytes 80-87 are illegal (overlong);
+ * the other continuations transition to N3
+ * N9 Start byte is FC. Continuation bytes 80-83 are illegal (overlong);
+ * the other continuations transition to N4
+ * N10 Start byte is FE. Continuation bytes 80-81 are illegal (overlong);
+ * the other continuations transition to N5
+ * 1 Reject. All transitions not mentioned above (except the single
+ * byte ones (as they are always legal) are to this state.
+ */
+
+# define NUM_CLASSES 18
+# define N0 0
+# define N1 ((N0) + NUM_CLASSES)
+# define N2 ((N1) + NUM_CLASSES)
+# define N3 ((N2) + NUM_CLASSES)
+# define N4 ((N3) + NUM_CLASSES)
+# define N5 ((N4) + NUM_CLASSES)
+# define N6 ((N5) + NUM_CLASSES)
+# define N7 ((N6) + NUM_CLASSES)
+# define N8 ((N7) + NUM_CLASSES)
+# define N9 ((N8) + NUM_CLASSES)
+# define N10 ((N9) + NUM_CLASSES)
+
+/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 */
+/*N0*/ 0, 1,N1,N2,N3,N4,N5, 1, 1, 1, 1, 1, 1,N6,N7,N8,N9,N10,
+/*N1*/ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+/*N2*/ 1, 1, 1, 1, 1, 1, 1,N1,N1,N1,N1,N1,N1, 1, 1, 1, 1, 1,
+/*N3*/ 1, 1, 1, 1, 1, 1, 1,N2,N2,N2,N2,N2,N2, 1, 1, 1, 1, 1,
+/*N4*/ 1, 1, 1, 1, 1, 1, 1,N3,N3,N3,N3,N3,N3, 1, 1, 1, 1, 1,
+/*N5*/ 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4,N4,N4, 1, 1, 1, 1, 1,
+
+/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N1, 1, 1, 1, 1, 1,
+/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N2,N2, 1, 1, 1, 1, 1,
+/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,N3,N3,N3, 1, 1, 1, 1, 1,
+/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1,N4,N4,N4,N4, 1, 1, 1, 1, 1,
+/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1,N5,N5,N5,N5,N5, 1, 1, 1, 1, 1,
+};
+
+/* And below is a version of the above table that accepts only strict UTF-8.
+ * Hence no surrogates nor non-characters, nor non-Unicode. Thus, if the input
+ * passes this dfa, it will be for a well-formed, non-problematic code point
+ * that can be returned immediately.
+ *
+ * The "Implementation details" portion of
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ shows how
+ * the first portion of the table maps each possible byte into a character
+ * class. And that the classes for those bytes which are start bytes have been
+ * carefully chosen so they serve as well to be used as a shift value to mask
+ * off the leading 1 bits of the start byte. Unfortunately the addition of
+ * being able to distinguish non-characters makes this not fully work. This is
+ * because, now, the start bytes E1-EF have to be broken into 3 classes instead
+ * of 2:
+ * 1) ED because it could be a surrogate
+ * 2) EF because it could be a non-character
+ * 3) the rest, which can never evaluate to a problematic code point.
+ *
+ * Each of E1-EF has three leading 1 bits, then a 0. That means we could use a
+ * shift (and hence class number) of either 3 or 4 to get a mask that works.
+ * But that only allows two categories, and we need three. khw made the
+ * decision to therefore treat the ED start byte as an error, so that the dfa
+ * drops out immediately for that. In the dfa, classes 3 and 4 are used to
+ * distinguish EF vs the rest. Then special code is used to deal with ED,
+ * that's executed only when the dfa drops out. The code points started by ED
+ * are half surrogates, and half hangul syllables. This means that 2048 of the
+ * the hangul syllables (about 18%) take longer than all other non-problematic
+ * code points to handle.
+ *
+ * The changes to handle non-characters requires the addition of states and
+ * classes to the dfa. (See the section on "Mapping bytes to character
+ * classes" in the linked-to document for further explanation of the original
+ * dfa.)
+ *
+ * The classes are
+ * 00-7F 0
+ * 80-8E 9
+ * 8F 10
+ * 90-9E 11
+ * 9F 12
+ * A0-AE 13
+ * AF 14
+ * B0-B6 15
+ * B7 16
+ * B8-BD 15
+ * BE 17
+ * BF 18
+ * C0,C1 1
+ * C2-DF 2
+ * E0 7
+ * E1-EC 3
+ * ED 1
+ * EE 3
+ * EF 4
+ * F0 8
+ * F1-F3 6 (6 bits can be stripped)
+ * F4 5 (only 5 can be stripped)
+ * F5-FF 1
+ */
+
+EXTCONST U8 PL_strict_utf8_dfa_tab[] = {
+ /* The first part of the table maps bytes to character classes to reduce
+ * the size of the transition table and create bitmasks. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,10, /*80-8F*/
+ 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12, /*90-9F*/
+ 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, /*A0-AF*/
+ 15,15,15,15,15,15,15,16,15,15,15,15,15,15,17,18, /*B0-BF*/
+ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/
+ 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 4, /*E0-EF*/
+ 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/
+
+/* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a new state, called a
+ * node. The nodes are:
+ * N0 The initial state, and final accepting one.
+ * N1 Any one continuation byte (80-BF) left. This is transitioned to
+ * immediately when the start byte indicates a two-byte sequence
+ * N2 Any two continuation bytes left.
+ * N3 Start byte is E0. Continuation bytes 80-9F are illegal (overlong);
+ * the other continuations transition to state N1
+ * N4 Start byte is EF. Continuation byte B7 transitions to N8; BF to N9;
+ * the other continuations transitions to N1
+ * N5 Start byte is F0. Continuation bytes 80-8F are illegal (overlong);
+ * [9AB]F transition to N10; the other continuations to N2.
+ * N6 Start byte is F[123]. Continuation bytes [89AB]F transition
+ * to N10; the other continuations to N2.
+ * N7 Start byte is F4. Continuation bytes 90-BF are illegal
+ * (non-unicode); 8F transitions to N10; the other continuations to N2
+ * N8 Initial sequence is EF B7. Continuation bytes 90-AF are illegal
+ * (non-characters); the other continuations transition to N0.
+ * N9 Initial sequence is EF BF. Continuation bytes BE and BF are illegal
+ * (non-characters); the other continuations transition to N0.
+ * N10 Initial sequence is one of: F0 [9-B]F; F[123] [8-B]F; or F4 8F.
+ * Continuation byte BF transitions to N11; the other continuations to
+ * N1
+ * N11 Initial sequence is the two bytes given in N10 followed by BF.
+ * Continuation bytes BE and BF are illegal (non-characters); the other
+ * continuations transition to N0.
+ * 1 Reject. All transitions not mentioned above (except the single
+ * byte ones (as they are always legal) are to this state.
+ */
+
+# undef N0
+# undef N1
+# undef N2
+# undef N3
+# undef N4
+# undef N5
+# undef N6
+# undef N7
+# undef N8
+# undef N9
+# undef NUM_CLASSES
+# define NUM_CLASSES 19
+# define N0 0
+# define N1 ((N0) + NUM_CLASSES)
+# define N2 ((N1) + NUM_CLASSES)
+# define N3 ((N2) + NUM_CLASSES)
+# define N4 ((N3) + NUM_CLASSES)
+# define N5 ((N4) + NUM_CLASSES)
+# define N6 ((N5) + NUM_CLASSES)
+# define N7 ((N6) + NUM_CLASSES)
+# define N8 ((N7) + NUM_CLASSES)
+# define N9 ((N8) + NUM_CLASSES)
+# define N10 ((N9) + NUM_CLASSES)
+# define N11 ((N10) + NUM_CLASSES)
+
+/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 */
+/*N0*/ 0, 1, N1, N2, N4, N7, N6, N3, N5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1, N1,
+
+/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1,
+/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N8, N1, N9,
+/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2, N2, N2,N10,
+/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, N2,N10, N2,N10, N2, N2, N2,N10,
+/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2,N10, 1, 1, 1, 1, 1, 1, 1, 1,
+/*N8*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
+/*N9*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+/*N10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1, N1, N1, N1, N1, N1, N1,N11,
+/*N11*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+};
+
+/* And below is yet another version of the above tables that accepts only UTF-8
+ * as defined by Corregidum #9. Hence no surrogates nor non-Unicode, but
+ * it allows non-characters. This is isomorphic to the original table
+ * in http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
+ * The classes are
+ * 00-7F 0
+ * 80-8F 9
+ * 90-9F 10
+ * A0-BF 11
+ * C0,C1 1
+ * C2-DF 2
+ * E0 7
+ * E1-EC 3
+ * ED 4
+ * EE-EF 3
+ * F0 8
+ * F1-F3 6 (6 bits can be stripped)
+ * F4 5 (only 5 can be stripped)
+ * F5-FF 1
+ */
+
+EXTCONST U8 PL_c9_utf8_dfa_tab[] = {
+ /* The first part of the table maps bytes to character classes to reduce
+ * the size of the transition table and create bitmasks. */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00-0F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10-1F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20-2F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30-3F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40-4F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50-5F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*60-6F*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70-7F*/
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /*80-8F*/
+ 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, /*90-9F*/
+ 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*A0-AF*/
+ 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, /*B0-BF*/
+ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*C0-CF*/
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /*D0-DF*/
+ 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, /*E0-EF*/
+ 8, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*F0-FF*/
+
+/* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a new state, called a
+ * node. The nodes are:
+ * N0 The initial state, and final accepting one.
+ * N1 Any one continuation byte (80-BF) left. This is transitioned to
+ * immediately when the start byte indicates a two-byte sequence
+ * N2 Any two continuation bytes left.
+ * N3 Any three continuation bytes left.
+ * N4 Start byte is E0. Continuation bytes 80-9F are illegal (overlong);
+ * the other continuations transition to state N1
+ * N5 Start byte is ED. Continuation bytes A0-BF all lead to surrogates,
+ * so are illegal. The other continuations transition to state N1.
+ * N6 Start byte is F0. Continuation bytes 80-8F are illegal (overlong);
+ * the other continuations transition to N2
+ * N7 Start byte is F4. Continuation bytes 90-BF are illegal
+ * (non-unicode); the other continuations transition to N2
+ * 1 Reject. All transitions not mentioned above (except the single
+ * byte ones (as they are always legal) are to this state.
+ */
+
+# undef N0
+# undef N1
+# undef N2
+# undef N3
+# undef N4
+# undef N5
+# undef N6
+# undef N7
+# undef NUM_CLASSES
+# define NUM_CLASSES 12
+# define N0 0
+# define N1 ((N0) + NUM_CLASSES)
+# define N2 ((N1) + NUM_CLASSES)
+# define N3 ((N2) + NUM_CLASSES)
+# define N4 ((N3) + NUM_CLASSES)
+# define N5 ((N4) + NUM_CLASSES)
+# define N6 ((N5) + NUM_CLASSES)
+# define N7 ((N6) + NUM_CLASSES)
+
+/*Class: 0 1 2 3 4 5 6 7 8 9 10 11 */
+/*N0*/ 0, 1, N1, N2, N5, N7, N3, N4, N6, 1, 1, 1,
+/*N1*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+/*N2*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, N1,
+/*N3*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2, N2,
+
+/*N4*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N1,
+/*N5*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N1, N1, 1,
+/*N6*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, N2,
+/*N7*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, N2, 1, 1,
+};
+
+# else /* End of is DOINIT */
+
+EXTCONST U8 PL_extended_utf8_dfa_tab[];
+EXTCONST U8 PL_strict_utf8_dfa_tab[];
+EXTCONST U8 PL_c9_utf8_dfa_tab[];
+
+# endif
+#endif /* end of isn't EBCDIC */
#ifndef PERL_NO_INLINE_FUNCTIONS
/* Static inline funcs that depend on includes and declarations above.
# define KEYWORD_PLUGIN_MUTEX_LOCK MUTEX_LOCK(&PL_keyword_plugin_mutex)
# define KEYWORD_PLUGIN_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_keyword_plugin_mutex)
# define KEYWORD_PLUGIN_MUTEX_TERM MUTEX_DESTROY(&PL_keyword_plugin_mutex)
+# define USER_PROP_MUTEX_INIT MUTEX_INIT(&PL_user_prop_mutex)
+# define USER_PROP_MUTEX_LOCK MUTEX_LOCK(&PL_user_prop_mutex)
+# define USER_PROP_MUTEX_UNLOCK MUTEX_UNLOCK(&PL_user_prop_mutex)
+# define USER_PROP_MUTEX_TERM MUTEX_DESTROY(&PL_user_prop_mutex)
#else
# define KEYWORD_PLUGIN_MUTEX_INIT NOOP
# define KEYWORD_PLUGIN_MUTEX_LOCK NOOP
# define KEYWORD_PLUGIN_MUTEX_UNLOCK NOOP
# define KEYWORD_PLUGIN_MUTEX_TERM NOOP
+# define USER_PROP_MUTEX_INIT NOOP
+# define USER_PROP_MUTEX_LOCK NOOP
+# define USER_PROP_MUTEX_UNLOCK NOOP
+# define USER_PROP_MUTEX_TERM NOOP
#endif
#ifdef USE_LOCALE /* These locale things are all subject to change */
- /* We create a C locale object unconditionally if we have the functions to
- * do so; hence must destroy it unconditionally at the end */
-# ifndef HAS_POSIX_2008_LOCALE
+ /* Returns TRUE if the plain locale pragma without a parameter is in effect.
+ * */
+# define IN_LOCALE_RUNTIME (PL_curcop \
+ && CopHINTS_get(PL_curcop) & HINT_LOCALE)
+
+ /* Returns TRUE if either form of the locale pragma is in effect */
+# define IN_SOME_LOCALE_FORM_RUNTIME \
+ cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
+
+# define IN_LOCALE_COMPILETIME cBOOL(PL_hints & HINT_LOCALE)
+# define IN_SOME_LOCALE_FORM_COMPILETIME \
+ cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
+
+# define IN_LOCALE \
+ (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
+# define IN_SOME_LOCALE_FORM \
+ (IN_PERL_COMPILETIME ? IN_SOME_LOCALE_FORM_COMPILETIME \
+ : IN_SOME_LOCALE_FORM_RUNTIME)
+
+# define IN_LC_ALL_COMPILETIME IN_LOCALE_COMPILETIME
+# define IN_LC_ALL_RUNTIME IN_LOCALE_RUNTIME
+
+# define IN_LC_PARTIAL_COMPILETIME cBOOL(PL_hints & HINT_LOCALE_PARTIAL)
+# define IN_LC_PARTIAL_RUNTIME \
+ (PL_curcop && CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL)
+
+# define IN_LC_COMPILETIME(category) \
+ ( IN_LC_ALL_COMPILETIME \
+ || ( IN_LC_PARTIAL_COMPILETIME \
+ && Perl__is_in_locale_category(aTHX_ TRUE, (category))))
+# define IN_LC_RUNTIME(category) \
+ (IN_LC_ALL_RUNTIME || (IN_LC_PARTIAL_RUNTIME \
+ && Perl__is_in_locale_category(aTHX_ FALSE, (category))))
+# define IN_LC(category) \
+ (IN_LC_COMPILETIME(category) || IN_LC_RUNTIME(category))
+
+# if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE)
+
+ /* This internal macro should be called from places that operate under
+ * locale rules. It there is a problem with the current locale that
+ * hasn't been raised yet, it will output a warning this time. Because
+ * this will so rarely be true, there is no point to optimize for time;
+ * instead it makes sense to minimize space used and do all the work in
+ * the rarely called function */
+# ifdef USE_LOCALE_CTYPE
+# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE \
+ STMT_START { \
+ if (UNLIKELY(PL_warn_locale)) { \
+ Perl__warn_problematic_locale(); \
+ } \
+ } STMT_END
+# else
+# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE
+# endif
+
+
+ /* These two internal macros are called when a warning should be raised,
+ * and will do so if enabled. The first takes a single code point
+ * argument; the 2nd, is a pointer to the first byte of the UTF-8 encoded
+ * string, and an end position which it won't try to read past */
+# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(cp) \
+ STMT_START { \
+ if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
+ Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
+ "Wide character (U+%" UVXf ") in %s",\
+ (UV) cp, OP_DESC(PL_op)); \
+ } \
+ } STMT_END
+
+# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \
+ STMT_START { /* Check if to warn before doing the conversion work */\
+ if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
+ UV cp = utf8_to_uvchr_buf((U8 *) (s), (U8 *) (send), NULL); \
+ Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
+ "Wide character (U+%" UVXf ") in %s", \
+ (cp == 0) \
+ ? UNICODE_REPLACEMENT \
+ : (UV) cp, \
+ OP_DESC(PL_op)); \
+ } \
+ } STMT_END
+
+# endif /* PERL_CORE or PERL_IN_XSUB_RE */
+#else /* No locale usage */
+# define IN_LOCALE_RUNTIME 0
+# define IN_SOME_LOCALE_FORM_RUNTIME 0
+# define IN_LOCALE_COMPILETIME 0
+# define IN_SOME_LOCALE_FORM_COMPILETIME 0
+# define IN_LOCALE 0
+# define IN_SOME_LOCALE_FORM 0
+# define IN_LC_ALL_COMPILETIME 0
+# define IN_LC_ALL_RUNTIME 0
+# define IN_LC_PARTIAL_COMPILETIME 0
+# define IN_LC_PARTIAL_RUNTIME 0
+# define IN_LC_COMPILETIME(category) 0
+# define IN_LC_RUNTIME(category) 0
+# define IN_LC(category) 0
+# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE
+# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send)
+# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c)
+#endif
+
+
+/* Locale/thread synchronization macros. These aren't needed if using
+ * thread-safe locale operations, except if something is broken */
+#if defined(USE_LOCALE) \
+ && defined(USE_ITHREADS) \
+ && (! defined(USE_THREAD_SAFE_LOCALE) || defined(TS_W32_BROKEN_LOCALECONV))
+
+/* We have a locale object holding the 'C' locale for Posix 2008 */
+# ifndef USE_POSIX_2008_LOCALE
# define _LOCALE_TERM_POSIX_2008 NOOP
# else
# define _LOCALE_TERM_POSIX_2008 \
} STMT_END
# endif
-# if ! defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE)
-# define LOCALE_INIT
-# define LOCALE_LOCK
-# define LOCALE_UNLOCK
+/* This is used as a generic lock for locale operations. For example this is
+ * used when calling nl_langinfo() so that another thread won't zap the
+ * contents of its buffer before it gets saved; and it's called when changing
+ * the locale of LC_MESSAGES. On some systems the latter can cause the
+ * nl_langinfo buffer to be zapped under a race condition.
+ *
+ * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock
+ * should be contained entirely within the locked portion of LC_NUMERIC. This
+ * mutex should be used only in very short sections of code, while
+ * LC_NUMERIC_LOCK may span more operations. By always following this
+ * convention, deadlock should be impossible. But if necessary, the two
+ * mutexes could be combined.
+ *
+ * Actually, the two macros just below with the '_V' suffixes are used in just
+ * a few places where there is a broken localeconv(), but otherwise things are
+ * thread safe, and hence don't need locking. Just below LOCALE_LOCK and
+ * LOCALE_UNLOCK are defined in terms of these for use everywhere else */
+# define LOCALE_LOCK_V \
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: locking locale\n", __FILE__, __LINE__)); \
+ MUTEX_LOCK(&PL_locale_mutex); \
+ } STMT_END
+# define LOCALE_UNLOCK_V \
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \
+ MUTEX_UNLOCK(&PL_locale_mutex); \
+ } STMT_END
+
+/* On windows, we just need the mutex for LOCALE_LOCK */
+# ifdef TS_W32_BROKEN_LOCALECONV
+# define LOCALE_LOCK NOOP
+# define LOCALE_UNLOCK NOOP
+# define LOCALE_INIT MUTEX_INIT(&PL_locale_mutex);
+# define LOCALE_TERM MUTEX_DESTROY(&PL_locale_mutex)
# define LC_NUMERIC_LOCK(cond)
# define LC_NUMERIC_UNLOCK
-# define LOCALE_TERM STMT_START { _LOCALE_TERM_POSIX_2008; } STMT_END
# else
-# define LOCALE_INIT STMT_START { \
+# define LOCALE_LOCK LOCALE_LOCK_V
+# define LOCALE_UNLOCK LOCALE_UNLOCK_V
+
+ /* We also need to lock LC_NUMERIC for non-windows (hence Posix 2008)
+ * systems */
+# define LOCALE_INIT STMT_START { \
MUTEX_INIT(&PL_locale_mutex); \
MUTEX_INIT(&PL_lc_numeric_mutex); \
} STMT_END
-/* This mutex is used to create critical sections where we want the LC_NUMERIC
- * locale to be locked into either the C (standard) locale, or the underlying
- * locale, so that other threads interrupting this one don't change it to the
- * wrong state before we've had a chance to complete our operation. It can
- * stay locked over an entire printf operation, for example. And so is made
- * distinct from the LOCALE_LOCK mutex.
- *
- * This simulates kind of a general semaphore. The current thread will lock
- * the mutex if the per-thread variable is zero, and then increments that
- * variable. Each corresponding UNLOCK decrements the variable until it is 0,
- * at which point it actually unlocks the mutex. Since the variable is
- * per-thread, there is no race with other threads.
- *
- * The single argument is a condition to test for, and if true, to panic, as
- * this would be an attempt to complement the LC_NUMERIC state, and we're not
- * supposed to because it's locked.
- *
- * Clang improperly gives warnings for this, if not silenced:
- * https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#conditional-locks
- * */
+# define LOCALE_TERM STMT_START { \
+ MUTEX_DESTROY(&PL_locale_mutex); \
+ MUTEX_DESTROY(&PL_lc_numeric_mutex); \
+ _LOCALE_TERM_POSIX_2008; \
+ } STMT_END
+
+ /* This mutex is used to create critical sections where we want the
+ * LC_NUMERIC locale to be locked into either the C (standard) locale, or
+ * the underlying locale, so that other threads interrupting this one don't
+ * change it to the wrong state before we've had a chance to complete our
+ * operation. It can stay locked over an entire printf operation, for
+ * example. And so is made distinct from the LOCALE_LOCK mutex.
+ *
+ * This simulates kind of a general semaphore. The current thread will
+ * lock the mutex if the per-thread variable is zero, and then increments
+ * that variable. Each corresponding UNLOCK decrements the variable until
+ * it is 0, at which point it actually unlocks the mutex. Since the
+ * variable is per-thread, there is no race with other threads.
+ *
+ * The single argument is a condition to test for, and if true, to panic,
+ * as this would be an attempt to complement the LC_NUMERIC state, and
+ * we're not supposed to because it's locked.
+ *
+ * Clang improperly gives warnings for this, if not silenced:
+ * https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#conditional-locks
+ * */
# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \
CLANG_DIAG_IGNORE(-Wthread-safety) \
STMT_START { \
} STMT_END \
CLANG_DIAG_RESTORE
-/* This is used as a generic lock for locale operations. For example this is
- * used when calling nl_langinfo() so that another thread won't zap the
- * contents of its buffer before it gets saved; and it's called when changing
- * the locale of LC_MESSAGES. On some systems the latter can cause the
- * nl_langinfo buffer to be zapped under a race condition.
- *
- * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock
- * should be contained entirely within the locked portion of LC_NUMERIC. This
- * mutex should be used only in very short sections of code, while
- * LC_NUMERIC_LOCK may span more operations. By always following this
- * convention, deadlock should be impossible. But if necessary, the two
- * mutexes could be combined */
-# define LOCALE_LOCK \
- STMT_START { \
- DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
- "%s: %d: locking locale\n", __FILE__, __LINE__)); \
- MUTEX_LOCK(&PL_locale_mutex); \
- } STMT_END
-# define LOCALE_UNLOCK \
- STMT_START { \
- DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
- "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \
- MUTEX_UNLOCK(&PL_locale_mutex); \
- } STMT_END
-
-# define LOCALE_TERM \
- STMT_START { \
- MUTEX_DESTROY(&PL_locale_mutex); \
- MUTEX_DESTROY(&PL_lc_numeric_mutex); \
- _LOCALE_TERM_POSIX_2008; \
- } STMT_END
-# endif
-
-/* Returns TRUE if the plain locale pragma without a parameter is in effect
- */
-# define IN_LOCALE_RUNTIME (PL_curcop \
- && CopHINTS_get(PL_curcop) & HINT_LOCALE)
-
-/* Returns TRUE if either form of the locale pragma is in effect */
-# define IN_SOME_LOCALE_FORM_RUNTIME \
- cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
-
-# define IN_LOCALE_COMPILETIME cBOOL(PL_hints & HINT_LOCALE)
-# define IN_SOME_LOCALE_FORM_COMPILETIME \
- cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
-
-# define IN_LOCALE \
- (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
-# define IN_SOME_LOCALE_FORM \
- (IN_PERL_COMPILETIME ? IN_SOME_LOCALE_FORM_COMPILETIME \
- : IN_SOME_LOCALE_FORM_RUNTIME)
-
-# define IN_LC_ALL_COMPILETIME IN_LOCALE_COMPILETIME
-# define IN_LC_ALL_RUNTIME IN_LOCALE_RUNTIME
-
-# define IN_LC_PARTIAL_COMPILETIME cBOOL(PL_hints & HINT_LOCALE_PARTIAL)
-# define IN_LC_PARTIAL_RUNTIME \
- (PL_curcop && CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL)
-
-# define IN_LC_COMPILETIME(category) \
- (IN_LC_ALL_COMPILETIME || (IN_LC_PARTIAL_COMPILETIME \
- && Perl__is_in_locale_category(aTHX_ TRUE, (category))))
-# define IN_LC_RUNTIME(category) \
- (IN_LC_ALL_RUNTIME || (IN_LC_PARTIAL_RUNTIME \
- && Perl__is_in_locale_category(aTHX_ FALSE, (category))))
-# define IN_LC(category) \
- (IN_LC_COMPILETIME(category) || IN_LC_RUNTIME(category))
-
-# if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE)
-
- /* This internal macro should be called from places that operate under
- * locale rules. It there is a problem with the current locale that
- * hasn't been raised yet, it will output a warning this time. Because
- * this will so rarely be true, there is no point to optimize for
- * time; instead it makes sense to minimize space used and do all the
- * work in the rarely called function */
-# ifdef USE_LOCALE_CTYPE
-# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE \
- STMT_START { \
- if (UNLIKELY(PL_warn_locale)) { \
- Perl__warn_problematic_locale(); \
- } \
- } STMT_END
-# else
-# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE
-# endif
-
-
- /* These two internal macros are called when a warning should be raised,
- * and will do so if enabled. The first takes a single code point
- * argument; the 2nd, is a pointer to the first byte of the UTF-8 encoded
- * string, and an end position which it won't try to read past */
-# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(cp) \
- STMT_START { \
- if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
- Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
- "Wide character (U+%" UVXf ") in %s",\
- (UV) cp, OP_DESC(PL_op)); \
- } \
- } STMT_END
-
-# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(s, send) \
- STMT_START { /* Check if to warn before doing the conversion work */\
- if (! PL_in_utf8_CTYPE_locale && ckWARN(WARN_LOCALE)) { \
- UV cp = utf8_to_uvchr_buf((U8 *) s, (U8 *) send, NULL); \
- Perl_warner(aTHX_ packWARN(WARN_LOCALE), \
- "Wide character (U+%" UVXf ") in %s", \
- (cp == 0) \
- ? UNICODE_REPLACEMENT \
- : (UV) cp, \
- OP_DESC(PL_op)); \
- } \
- } STMT_END
-
-# endif /* PERL_CORE or PERL_IN_XSUB_RE */
-#else /* No locale usage */
-# define LOCALE_INIT
-# define LOCALE_TERM
-# define LOCALE_LOCK
-# define LOCALE_UNLOCK
-# define IN_LOCALE_RUNTIME 0
-# define IN_SOME_LOCALE_FORM_RUNTIME 0
-# define IN_LOCALE_COMPILETIME 0
-# define IN_SOME_LOCALE_FORM_COMPILETIME 0
-# define IN_LOCALE 0
-# define IN_SOME_LOCALE_FORM 0
-# define IN_LC_ALL_COMPILETIME 0
-# define IN_LC_ALL_RUNTIME 0
-# define IN_LC_PARTIAL_COMPILETIME 0
-# define IN_LC_PARTIAL_RUNTIME 0
-# define IN_LC_COMPILETIME(category) 0
-# define IN_LC_RUNTIME(category) 0
-# define IN_LC(category) 0
-
-# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE
-# define _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(a)
-# define _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(a,b)
+# endif /* End of needs locking LC_NUMERIC */
+#else /* Below is no locale sync needed */
+# define LOCALE_INIT
+# define LOCALE_LOCK
+# define LOCALE_LOCK_V
+# define LOCALE_UNLOCK
+# define LOCALE_UNLOCK_V
+# define LC_NUMERIC_LOCK(cond)
+# define LC_NUMERIC_UNLOCK
+# define LOCALE_TERM
#endif
#ifdef USE_LOCALE_NUMERIC
/* These macros are for toggling between the underlying locale (UNDERLYING or
- * LOCAL) and the C locale (STANDARD).
+ * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C
+ * locale if the underlying locale is indistinguishable from it in the numeric
+ * operations used by Perl, namely the decimal point, and even the thousands
+ * separator.)
=head1 Locale-related functions and macros
=for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED
-This is used to help wrap XS or C code that that is C<LC_NUMERIC> locale-aware.
-This locale category is generally kept set to the C locale by Perl for
-backwards compatibility, and because most XS code that reads floating point
-values can cope only with the decimal radix character being a dot.
+This is used to help wrap XS or C code that is C<LC_NUMERIC> locale-aware.
+This locale category is generally kept set to a locale where the decimal radix
+character is a dot, and the separator between groups of digits is empty. This
+is because most XS code that reads floating point numbers is expecting them to
+have this syntax.
This macro makes sure the current C<LC_NUMERIC> state is set properly, to be
aware of locale if the call to the XS or C code from the Perl program is
*/
-/* The numeric locale is generally kept in the C locale instead of the
- * underlying locale. The current status is known by looking at two words.
- * One is non-zero if the current numeric locale is the standard C/POSIX one.
- * The other is non-zero if the current locale is the underlying locale. Both
- * can be non-zero if, as often happens, the underlying locale is C.
- *
- * Its slightly more complicated than this, as the PL_numeric_standard variable
- * is set if the current numeric locale is indistinguishable from the C locale.
- * This happens when the radix character is a dot, and the thousands separator
- * is the empty string.
+/* If the underlying numeric locale has a non-dot decimal point or has a
+ * non-empty floating point thousands separator, the current locale is instead
+ * generally kept in the C locale instead of that underlying locale. The
+ * current status is known by looking at two words. One is non-zero if the
+ * current numeric locale is the standard C/POSIX one or is indistinguishable
+ * from C. The other is non-zero if the current locale is the underlying
+ * locale. Both can be non-zero if, as often happens, the underlying locale is
+ * C or indistinguishable from it.
*
* khw believes the reason for the variables instead of the bits in a single
* word is to avoid having to have masking instructions. */
#ifdef USE_QUADMATH
# define Perl_strtod(s, e) strtoflt128(s, e)
#elif defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-# if defined(HAS_STRTOLD)
+# if defined(__MINGW64_VERSION_MAJOR) && defined(HAS_STRTOLD)
+ /***********************************************
+ We are unable to use strtold because of
+ https://sourceforge.net/p/mingw-w64/bugs/711/
+ &
+ https://sourceforge.net/p/mingw-w64/bugs/725/
+
+ but __mingw_strtold is fine.
+ ***********************************************/
+# define Perl_strtod(s, e) __mingw_strtold(s, e)
+# elif defined(HAS_STRTOLD)
# define Perl_strtod(s, e) strtold(s, e)
# elif defined(HAS_STRTOD)
# define Perl_strtod(s, e) (NV)strtod(s, e) /* Unavoidable loss. */
#ifdef DOUBLE_HAS_NAN
+START_EXTERN_C
+
#ifdef DOINIT
/* PL_inf and PL_nan initialization.
#endif
+END_EXTERN_C
+
/* If you have not defined NV_INF/NV_NAN (like for example win32/win32.h),
* we will define NV_INF/NV_NAN as the nv part of the global const
* PL_inf/PL_nan. Note, however, that the preexisting NV_INF/NV_NAN