X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8d72e74e3dc5017c5a3fade48e0c74109c297ebc..24f3e849b5ce9f3bf6b6be5d3e730562e927aa79:/locale.c diff --git a/locale.c b/locale.c index 277e038..bffb812 100644 --- a/locale.c +++ b/locale.c @@ -53,6 +53,9 @@ #ifdef I_WCHAR # include #endif +#ifdef I_WCTYPE +# include +#endif /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread @@ -207,7 +210,7 @@ const int categories[] = { /* The top-most real element is LC_ALL */ -const char * category_names[] = { +const char * const category_names[] = { # ifdef USE_LOCALE_NUMERIC "LC_NUMERIC", @@ -331,7 +334,7 @@ S_category_name(const int category) # define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 # define _DUMMY_COLLATE LC_COLLATE_INDEX # else -# define _DUMMY_COLLATE _DUMMY_COLLATE +# define _DUMMY_COLLATE _DUMMY_CTYPE # endif # ifdef USE_LOCALE_TIME # define LC_TIME_INDEX _DUMMY_COLLATE + 1 @@ -399,6 +402,7 @@ S_category_name(const int category) * known at compile time; "do_setlocale_r", not known until run time */ # define do_setlocale_c(cat, locale) my_setlocale(cat, locale) # define do_setlocale_r(cat, locale) my_setlocale(cat, locale) +# define FIX_GLIBC_LC_MESSAGES_BUG(i) #else /* Below uses POSIX 2008 */ @@ -412,6 +416,22 @@ S_category_name(const int category) emulate_setlocale(cat, locale, cat ## _INDEX, TRUE) # define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE) +# if ! defined(__GLIBC__) || ! defined(USE_LOCALE_MESSAGES) + +# define FIX_GLIBC_LC_MESSAGES_BUG(i) + +# else /* Invalidate glibc cache of loaded translations, see [perl #134264] */ + +# include +# define FIX_GLIBC_LC_MESSAGES_BUG(i) \ + STMT_START { \ + if ((i) == LC_MESSAGES_INDEX) { \ + textdomain(textdomain(NULL)); \ + } \ + } STMT_END + +# endif + /* A third array, parallel to the ones above to map from category to its * equivalent mask */ const int category_masks[] = { @@ -586,12 +606,15 @@ S_emulate_setlocale(const int category, /* If this assert fails, adjust the size of curlocales in intrpvar.h */ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); -# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) - +# if defined(_NL_LOCALE_NAME) \ + && defined(DEBUGGING) \ + && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME) + /* On systems that accept any locale name, the real underlying locale + * is often returned by this internal function, so we can't use it */ { /* Internal glibc for querylocale(), but doesn't handle * empty-string ("") locale properly; who knows what other - * glitches. Check it for now, under debug. */ + * glitches. Check for it now, under debug. */ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category), uselocale((locale_t) 0)); @@ -733,58 +756,9 @@ S_emulate_setlocale(const int category, # endif - } - - assert(PL_C_locale_obj); - - /* Otherwise, we are switching locales. This will generally entail freeing - * the current one's space (at the C library's discretion). We need to - * stop using that locale before the switch. So switch to a known locale - * object that we don't otherwise mess with. This returns the locale - * object in effect at the time of the switch. */ - old_obj = uselocale(PL_C_locale_obj); - -# ifdef DEBUGGING - - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj); - } - -# endif - - if (! old_obj) { - -# ifdef DEBUGGING - - if (DEBUG_L_TEST || debug_initialization) { - dSAVE_ERRNO; - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - RESTORE_ERRNO; - } - -# endif - - return NULL; - } - -# ifdef DEBUGGING - - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj); - } - -# endif - - /* If we weren't in a thread safe locale, set so that newlocale() below - which uses 'old_obj', uses an empty one. Same for our reserved C object. - The latter is defensive coding, so that, even if there is some bug, we - will never end up trying to modify either of these, as if passed to - newlocale(), they can be. */ - if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { - old_obj = (locale_t) 0; - } + } /* End of this being setlocale(LC_foo, NULL) */ - /* Create the new locale (it may actually modify the current one). */ + /* Here, we are switching locales. */ # ifndef HAS_QUERYLOCALE @@ -812,23 +786,10 @@ S_emulate_setlocale(const int category, const char * default_name; - /* To minimize other threads messing with the environment, we copy - * the variable, making it a temporary. But this doesn't work upon - * program initialization before any scopes are created, and at - * this time, there's nothing else going on that would interfere. - * So skip the copy in that case */ - if (PL_scopestack_ix == 0) { - default_name = PerlEnv_getenv("LANG"); - } - else { - default_name = savepv(PerlEnv_getenv("LANG")); - } + default_name = PerlEnv_getenv("LANG"); if (! default_name || strEQ(default_name, "")) { - default_name = "C"; - } - else if (PL_scopestack_ix != 0) { - SAVEFREEPV(default_name); + default_name = "C"; } if (category != LC_ALL) { @@ -864,22 +825,19 @@ S_emulate_setlocale(const int category, for (i = 0; i < LC_ALL_INDEX; i++) { const char * const env_override - = savepv(PerlEnv_getenv(category_names[i])); + = PerlEnv_getenv(category_names[i]); const char * this_locale = ( env_override && strNE(env_override, "")) ? env_override : default_name; if (! emulate_setlocale(categories[i], this_locale, i, TRUE)) { - Safefree(env_override); return NULL; } if (strNE(this_locale, default_name)) { did_override = TRUE; } - - Safefree(env_override); } /* If all the categories are the same, we can set LC_ALL to @@ -901,7 +859,7 @@ S_emulate_setlocale(const int category, } } } - } + } /* End of this being setlocale(LC_foo, "") */ else if (strchr(locale, ';')) { /* LC_ALL may actually incude a conglomeration of various categories. @@ -910,6 +868,7 @@ S_emulate_setlocale(const int category, * other platforms do it differently, so we have to handle all cases * ourselves */ + unsigned int i; const char * s = locale; const char * e = locale + strlen(locale); const char * p = s; @@ -917,8 +876,17 @@ S_emulate_setlocale(const int category, const char * name_start; const char * name_end; + /* If the string that gives what to set doesn't include all categories, + * the omitted ones get set to "C". To get this behavior, first set + * all the individual categories to "C", and override the furnished + * ones below */ + for (i = 0; i < LC_ALL_INDEX; i++) { + if (! emulate_setlocale(categories[i], "C", i, TRUE)) { + return NULL; + } + } + while (s < e) { - unsigned int i; /* Parse through the category */ while (isWORDCHAR(*p)) { @@ -988,81 +956,181 @@ S_emulate_setlocale(const int category, assert(category == LC_ALL); return do_setlocale_c(LC_ALL, NULL); - } + } /* End of this being setlocale(LC_ALL, + "LC_CTYPE=foo;LC_NUMERIC=bar;...") */ ready_to_set: ; + /* Here at the end of having to deal with the absence of querylocale(). + * Some cases have already been fully handled by recursive calls to this + * function. But at this point, we haven't dealt with those, but are now + * prepared to, knowing what the locale name to set this category to is. + * This would have come for free if this system had had querylocale() */ + # endif /* end of ! querylocale */ - /* Ready to create a new locale by modification of the exising one */ - new_obj = newlocale(mask, locale, old_obj); + assert(PL_C_locale_obj); - if (! new_obj) { - dSAVE_ERRNO; + /* Switching locales generally entails freeing the current one's space (at + * the C library's discretion). We need to stop using that locale before + * the switch. So switch to a known locale object that we don't otherwise + * mess with. This returns the locale object in effect at the time of the + * switch. */ + old_obj = uselocale(PL_C_locale_obj); # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - } + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj); + } # endif - if (! uselocale(old_obj)) { + if (! old_obj) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - } + if (DEBUG_L_TEST || debug_initialization) { + dSAVE_ERRNO; + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + RESTORE_ERRNO; + } # endif - } - RESTORE_ERRNO; return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, PL_C_locale_obj); } # endif - /* And switch into it */ - if (! uselocale(new_obj)) { - dSAVE_ERRNO; + /* If this call is to switch to the LC_ALL C locale, it already exists, and + * in fact, we already have switched to it (in preparation for what + * normally is to come). But since we're already there, continue to use + * it instead of trying to create a new locale */ + if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: will stay in C object\n", __FILE__, __LINE__); } # endif - if (! uselocale(old_obj)) { + new_obj = PL_C_locale_obj; + + /* We already had switched to the C locale in preparation for freeing + * 'old_obj' */ + if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) { + freelocale(old_obj); + } + } + else { + /* If we weren't in a thread safe locale, set so that newlocale() below + * which uses 'old_obj', uses an empty one. Same for our reserved C + * object. The latter is defensive coding, so that, even if there is + * some bug, we will never end up trying to modify either of these, as + * if passed to newlocale(), they can be. */ + if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { + old_obj = (locale_t) 0; + } + + /* Ready to create a new locale by modification of the exising one */ + new_obj = newlocale(mask, locale, old_obj); + + if (! new_obj) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale creating new object" + " failed: %d\n", __FILE__, __LINE__, GET_ERRNO); } # endif + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + RESTORE_ERRNO; + return NULL; + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale created %p", + __FILE__, __LINE__, new_obj); + if (old_obj) { + PerlIO_printf(Perl_debug_log, + "; should have freed %p", old_obj); + } + PerlIO_printf(Perl_debug_log, "\n"); + } + +# endif + + /* And switch into it */ + if (! uselocale(new_obj)) { + dSAVE_ERRNO; + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale switching to new object" + " failed\n", __FILE__, __LINE__); + } + +# endif + + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + freelocale(new_obj); + RESTORE_ERRNO; + return NULL; } - freelocale(new_obj); - RESTORE_ERRNO; - return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, new_obj); } # endif @@ -1094,6 +1162,8 @@ S_emulate_setlocale(const int category, Safefree(PL_curlocales[i]); PL_curlocales[i] = savepv(locale); } + + FIX_GLIBC_LC_MESSAGES_BUG(LC_MESSAGES_INDEX); } else { @@ -1108,6 +1178,8 @@ S_emulate_setlocale(const int category, /* Then update the category's record */ Safefree(PL_curlocales[index]); PL_curlocales[index] = savepv(locale); + + FIX_GLIBC_LC_MESSAGES_BUG(index); } # endif @@ -1174,7 +1246,7 @@ S_locking_setlocale(pTHX_ * ones. This is because as described earlier. If we know on input the * index corresponding to the category into the array where we store the * current locales, we don't have to calculate it. If the caller knows at - * compile time what the index is, it it can pass it, setting + * compile time what the index is, it can pass it, setting * 'is_index_valid' to TRUE; otherwise the index parameter is ignored. * */ @@ -1248,6 +1320,7 @@ S_locking_setlocale(pTHX_ } #endif +#ifdef USE_LOCALE STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) @@ -1283,6 +1356,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } # endif +#else + + PERL_UNUSED_ARG(use_locale); + #endif /* USE_LOCALE_NUMERIC and can find the radix char */ } @@ -1344,14 +1421,20 @@ S_new_numeric(pTHX_ const char *newnum) PL_numeric_underlying = TRUE; PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); +#ifndef TS_W32_BROKEN_LOCALECONV + /* If its name isn't C nor POSIX, it could still be indistinguishable from - * them */ + * them. But on broken Windows systems calling my_nl_langinfo() for + * THOUSEP can currently (but rarely) cause a race, so avoid doing that, + * and just always change the locale if not C nor POSIX on those systems */ if (! PL_numeric_standard) { PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(RADIXCHAR, FALSE /* Don't toggle locale */ )) && strEQ("", my_nl_langinfo(THOUSEP, FALSE))); } +#endif + /* Save the new name if it isn't the same as the previous one, if any */ if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); @@ -1401,19 +1484,20 @@ Perl_set_numeric_standard(pTHX) * to our records (which could be wrong if some XS code has changed the * locale behind our back) */ - do_setlocale_c(LC_NUMERIC, "C"); - PL_numeric_standard = TRUE; - PL_numeric_underlying = PL_numeric_underlying_is_standard; - set_numeric_radix(0); - # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "LC_NUMERIC locale now is standard C\n"); + "Setting LC_NUMERIC locale to standard C\n"); } # endif + + do_setlocale_c(LC_NUMERIC, "C"); + PL_numeric_standard = TRUE; + PL_numeric_underlying = PL_numeric_underlying_is_standard; + set_numeric_radix(0); + #endif /* USE_LOCALE_NUMERIC */ } @@ -1430,20 +1514,21 @@ Perl_set_numeric_underlying(pTHX) * if toggling isn't necessary according to our records (which could be * wrong if some XS code has changed the locale behind our back) */ - do_setlocale_c(LC_NUMERIC, PL_numeric_name); - PL_numeric_standard = PL_numeric_underlying_is_standard; - PL_numeric_underlying = TRUE; - set_numeric_radix(! PL_numeric_standard); - # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "LC_NUMERIC locale now is %s\n", + "Setting LC_NUMERIC locale to %s\n", PL_numeric_name); } # endif + + do_setlocale_c(LC_NUMERIC, PL_numeric_name); + PL_numeric_standard = PL_numeric_underlying_is_standard; + PL_numeric_underlying = TRUE; + set_numeric_radix(! PL_numeric_standard); + #endif /* USE_LOCALE_NUMERIC */ } @@ -1457,7 +1542,6 @@ S_new_ctype(pTHX_ const char *newctype) #ifndef USE_LOCALE_CTYPE - PERL_ARGS_ASSERT_NEW_CTYPE; PERL_UNUSED_ARG(newctype); PERL_UNUSED_CONTEXT; @@ -1479,6 +1563,7 @@ S_new_ctype(pTHX_ const char *newctype) /* Don't check for problems if we are suppressing the warnings */ bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST); + bool maybe_utf8_turkic = FALSE; PERL_ARGS_ASSERT_NEW_CTYPE; @@ -1495,6 +1580,23 @@ S_new_ctype(pTHX_ const char *newctype) * handle this specially because of the three problematic code points */ if (PL_in_utf8_CTYPE_locale) { Copy(PL_fold_latin1, PL_fold_locale, 256, U8); + + /* UTF-8 locales can have special handling for 'I' and 'i' if they are + * Turkic. Make sure these two are the only anomalies. (We don't use + * towupper and towlower because they aren't in C89.) */ + +#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) + + if (towupper('i') == 0x130 && towlower('I') == 0x131) { + +#else + + if (toupper('i') == 'i' && tolower('I') == 'I') { + +#endif + check_for_problems = TRUE; + maybe_utf8_turkic = TRUE; + } } /* We don't populate the other lists if a UTF-8 locale, but do check that @@ -1532,18 +1634,22 @@ S_new_ctype(pTHX_ const char *newctype) && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { bool is_bad = FALSE; - char name[3] = { '\0' }; + char name[4] = { '\0' }; /* Convert the name into a string */ - if (isPRINT_A(i)) { + if (isGRAPH_A(i)) { name[0] = i; name[1] = '\0'; } else if (i == '\n') { - my_strlcpy(name, "\n", sizeof(name)); + my_strlcpy(name, "\\n", sizeof(name)); + } + else if (i == '\t') { + my_strlcpy(name, "\\t", sizeof(name)); } else { - my_strlcpy(name, "\t", sizeof(name)); + assert(i == ' '); + my_strlcpy(name, "' '", sizeof(name)); } /* Check each possibe class */ @@ -1607,13 +1713,13 @@ S_new_ctype(pTHX_ const char *newctype) "isxdigit('%s') unexpectedly is %d\n", name, cBOOL(isxdigit(i)))); } - if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) { + if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) { is_bad = TRUE; DEBUG_L(PerlIO_printf(Perl_debug_log, "tolower('%s')=0x%x instead of the expected 0x%x\n", name, tolower(i), (int) toLOWER_A(i))); } - if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) { + if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) { is_bad = TRUE; DEBUG_L(PerlIO_printf(Perl_debug_log, "toupper('%s')=0x%x instead of the expected 0x%x\n", @@ -1636,6 +1742,19 @@ S_new_ctype(pTHX_ const char *newctype) } } + if (bad_count == 2 && maybe_utf8_turkic) { + bad_count = 0; + *bad_chars_list = '\0'; + PL_fold_locale['I'] = 'I'; + PL_fold_locale['i'] = 'i'; + PL_in_utf8_turkic_locale = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n", + __FILE__, __LINE__, newctype)); + } + else { + PL_in_utf8_turkic_locale = FALSE; + } + # ifdef MB_CUR_MAX /* We only handle single-byte locales (outside of UTF-8 ones; so if @@ -1661,7 +1780,10 @@ S_new_ctype(pTHX_ const char *newctype) # endif - if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) { + /* If we found problems and we want them output, do so */ + if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) + && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST))) + { if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) { PL_warn_locale = Perl_newSVpvf(aTHX_ "Locale '%s' contains (at least) the following characters" @@ -1966,8 +2088,61 @@ S_new_collate(pTHX_ const char *newcoll) } +#endif + #ifdef WIN32 +#define USE_WSETLOCALE + +#ifdef USE_WSETLOCALE + +STATIC char * +S_wrap_wsetlocale(pTHX_ int category, const char *locale) { + wchar_t *wlocale; + wchar_t *wresult; + char *result; + + if (locale) { + int req_size = + MultiByteToWideChar(CP_UTF8, 0, locale, -1, NULL, 0); + + if (!req_size) { + errno = EINVAL; + return NULL; + } + + Newx(wlocale, req_size, wchar_t); + if (!MultiByteToWideChar(CP_UTF8, 0, locale, -1, wlocale, req_size)) { + Safefree(wlocale); + errno = EINVAL; + return NULL; + } + } + else { + wlocale = NULL; + } + wresult = _wsetlocale(category, wlocale); + Safefree(wlocale); + if (wresult) { + int req_size = + WideCharToMultiByte(CP_UTF8, 0, wresult, -1, NULL, 0, NULL, NULL); + Newx(result, req_size, char); + SAVEFREEPV(result); /* is there something better we can do here? */ + if (!WideCharToMultiByte(CP_UTF8, 0, wresult, -1, + result, req_size, NULL, NULL)) { + errno = EINVAL; + return NULL; + } + } + else { + result = NULL; + } + + return result; +} + +#endif + STATIC char * S_win32_setlocale(pTHX_ int category, const char* locale) { @@ -2025,7 +2200,11 @@ S_win32_setlocale(pTHX_ int category, const char* locale) } +#ifdef USE_WSETLOCALE + result = S_wrap_wsetlocale(aTHX_ category, locale); +#else result = setlocale(category, locale); +#endif DEBUG_L(STMT_START { dSAVE_ERRNO; PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, @@ -2046,7 +2225,11 @@ S_win32_setlocale(pTHX_ int category, const char* locale) for (i = 0; i < LC_ALL_INDEX; i++) { result = PerlEnv_getenv(category_names[i]); if (result && strNE(result, "")) { +#ifdef USE_WSETLOCALE + S_wrap_wsetlocale(aTHX_ categories[i], result); +#else setlocale(categories[i], result); +#endif DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(categories[i], result, "not captured"))); @@ -2075,15 +2258,19 @@ S_win32_setlocale(pTHX_ int category, const char* locale) This is an (almost) drop-in replacement for the system L>, taking the same parameters, and returning the same information, except that it -returns the correct underlying C locale, instead of C always, as -perl keeps that locale category as C, changing it briefly during the -operations where the underlying one is required. +returns the correct underlying C locale. Regular C will +instead return C if the underlying locale has a non-dot decimal point +character, or a non-empty thousands separator for displaying floating point +numbers. This is because perl keeps that locale category such that it has a +dot and empty separator, changing the locale briefly during the operations +where the underlying one is required. C knows about this, and +compensates; regular C doesn't. Another reason it isn't completely a drop-in replacement is that it is declared to return S>, whereas the system setlocale omits the -C. (If it were being written today, plain setlocale would be declared -const, since it is illegal to change the information it returns; doing so leads -to segfaults.) +C (presumably because its API was specified long ago, and can't be +updated; it is illegal to change the information C returns; doing +so leads to segfaults.) Finally, C works under all circumstances, whereas plain C can be completely ineffective on some platforms under some @@ -2107,18 +2294,28 @@ Perl_setlocale(const int category, const char * locale) { /* This wraps POSIX::setlocale() */ +#ifndef USE_LOCALE + + PERL_UNUSED_ARG(category); + PERL_UNUSED_ARG(locale); + + return "C"; + +#else + const char * retval; const char * newlocale; dSAVEDERRNO; - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; dTHX; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; #ifdef USE_LOCALE_NUMERIC /* A NULL locale means only query what the current one is. We have the * LC_NUMERIC name saved, because we are normally switched into the C - * locale for it. For an LC_ALL query, switch back to get the correct - * results. All other categories don't require special handling */ + * (or equivalent) locale for it. For an LC_ALL query, switch back to get + * the correct results. All other categories don't require special + * handling */ if (locale == NULL) { if (category == LC_NUMERIC) { @@ -2202,20 +2399,23 @@ Perl_setlocale(const int category, const char * locale) # ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); new_ctype(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_CTYPE */ # ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); new_collate(newlocale); + Safefree(newlocale); # endif # ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); new_numeric(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_NUMERIC */ #endif /* LC_ALL */ @@ -2226,6 +2426,8 @@ Perl_setlocale(const int category, const char * locale) return retval; +#endif + } PERL_STATIC_INLINE const char * @@ -2285,13 +2487,14 @@ rather than getting segfaults at runtime. It delivers the correct results for the C and C items, without you having to write extra code. The reason for the extra code would be because these are from the C locale category, which is normally -kept set to the C locale by Perl, no matter what the underlying locale is -supposed to be, and so to get the expected results, you have to temporarily -toggle into the underlying locale, and later toggle back. (You could use plain -C and C> for this but -then you wouldn't get the other advantages of C; not keeping -C in the C locale would break a lot of CPAN, which is expecting the -radix (decimal point) character to be a dot.) +kept set by Perl so that the radix is a dot, and the separator is the empty +string, no matter what the underlying locale is supposed to be, and so to get +the expected results, you have to temporarily toggle into the underlying +locale, and later toggle back. (You could use plain C and +C> for this but then you wouldn't get +the other advantages of C; not keeping C in the C +(or equivalent) locale would break a lot of CPAN, which is expecting the radix +(decimal point) character to be a dot.) =item * @@ -2377,17 +2580,21 @@ S_my_nl_langinfo(const int item, bool toggle) dTHX; const char * retval; +#ifdef USE_LOCALE_NUMERIC + /* We only need to toggle into the underlying LC_NUMERIC locale for these * two items, and only if not already there */ if (toggle && (( item != RADIXCHAR && item != THOUSEP) || PL_numeric_underlying)) - { + +#endif /* No toggling needed if not using LC_NUMERIC */ + toggle = FALSE; - } #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ # if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \ - || ! defined(HAS_POSIX_2008_LOCALE) + || ! defined(HAS_POSIX_2008_LOCALE) \ + || ! defined(DUPLOCALE) /* Here, use plain nl_langinfo(), switching to the underlying LC_NUMERIC * for those items dependent on it. This must be copied to a buffer before @@ -2430,6 +2637,8 @@ S_my_nl_langinfo(const int item, bool toggle) do_free = TRUE; } +# ifdef USE_LOCALE_NUMERIC + if (toggle) { if (PL_underlying_numeric_obj) { cur = PL_underlying_numeric_obj; @@ -2440,6 +2649,8 @@ S_my_nl_langinfo(const int item, bool toggle) } } +# endif + /* We have to save it to a buffer, because the freelocale() just below * can invalidate the internal one */ retval = save_to_buffer(nl_langinfo_l(item, cur), @@ -2473,6 +2684,16 @@ S_my_nl_langinfo(const int item, bool toggle) const char * temp; DECLARATION_FOR_LC_NUMERIC_MANIPULATION; +# ifdef TS_W32_BROKEN_LOCALECONV + + const char * save_global; + const char * save_thread; + int needed_size; + char * ptr; + char * e; + char * item_start; + +# endif # endif # ifdef HAS_STRFTIME @@ -2552,8 +2773,8 @@ S_my_nl_langinfo(const int item, bool toggle) /* Here everything past the dot is a digit. Treat it as a * code page */ - save_to_buffer("CP", &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); + retval = save_to_buffer("CP", &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); offset = STRLENs("CP"); has_nondigit: @@ -2572,15 +2793,35 @@ S_my_nl_langinfo(const int item, bool toggle) /* We don't bother with localeconv_l() because any system that * has it is likely to also have nl_langinfo() */ - LOCALE_LOCK; /* Prevent interference with other threads - using localeconv() */ + LOCALE_LOCK_V; /* Prevent interference with other threads + using localeconv() */ + +# ifdef TS_W32_BROKEN_LOCALECONV + + /* This is a workaround for a Windows bug prior to VS 15. + * What we do here is, while locked, switch to the global + * locale so localeconv() works; then switch back just before + * the unlock. This can screw things up if some thread is + * already using the global locale while assuming no other is. + * A different workaround would be to call GetCurrencyFormat on + * a known value, and parse it; patches welcome + * + * We have to use LC_ALL instead of LC_MONETARY because of + * another bug in Windows */ + + save_thread = savepv(my_setlocale(LC_ALL, NULL)); + _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); + save_global= savepv(my_setlocale(LC_ALL, NULL)); + my_setlocale(LC_ALL, save_thread); + +# endif lc = localeconv(); if ( ! lc || ! lc->currency_symbol || strEQ("", lc->currency_symbol)) { - LOCALE_UNLOCK; + LOCALE_UNLOCK_V; return ""; } @@ -2600,18 +2841,115 @@ S_my_nl_langinfo(const int item, bool toggle) PL_langinfo_buf[0] = '+'; } - LOCALE_UNLOCK; +# ifdef TS_W32_BROKEN_LOCALECONV + + my_setlocale(LC_ALL, save_global); + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + my_setlocale(LC_ALL, save_thread); + Safefree(save_global); + Safefree(save_thread); + +# endif + + LOCALE_UNLOCK_V; break; +# ifdef TS_W32_BROKEN_LOCALECONV + case RADIXCHAR: + + /* For this, we output a known simple floating point number to + * a buffer, and parse it, looking for the radix */ + + if (toggle) { + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + } + + if (PL_langinfo_bufsize < 10) { + PL_langinfo_bufsize = 10; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + } + + needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize, + "%.1f", 1.5); + if (needed_size >= (int) PL_langinfo_bufsize) { + PL_langinfo_bufsize = needed_size + 1; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize, + "%.1f", 1.5); + assert(needed_size < (int) PL_langinfo_bufsize); + } + + ptr = PL_langinfo_buf; + e = PL_langinfo_buf + PL_langinfo_bufsize; + + /* Find the '1' */ + while (ptr < e && *ptr != '1') { + ptr++; + } + ptr++; + + /* Find the '5' */ + item_start = ptr; + while (ptr < e && *ptr != '5') { + ptr++; + } + + /* Everything in between is the radix string */ + if (ptr >= e) { + PL_langinfo_buf[0] = '?'; + PL_langinfo_buf[1] = '\0'; + } + else { + *ptr = '\0'; + Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf, char); + } + + if (toggle) { + RESTORE_LC_NUMERIC(); + } + + retval = PL_langinfo_buf; + break; + +# else + + case RADIXCHAR: /* No special handling needed */ + +# endif + case THOUSEP: if (toggle) { STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); } - LOCALE_LOCK; /* Prevent interference with other threads - using localeconv() */ + LOCALE_LOCK_V; /* Prevent interference with other threads + using localeconv() */ + +# ifdef TS_W32_BROKEN_LOCALECONV + + /* This should only be for the thousands separator. A + * different work around would be to use GetNumberFormat on a + * known value and parse the result to find the separator */ + save_thread = savepv(my_setlocale(LC_ALL, NULL)); + _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); + save_global = savepv(my_setlocale(LC_ALL, NULL)); + my_setlocale(LC_ALL, save_thread); +# if 0 + /* This is the start of code that for broken Windows replaces + * the above and below code, and instead calls + * GetNumberFormat() and then would parse that to find the + * thousands separator. It needs to handle UTF-16 vs -8 + * issues. */ + + needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5", NULL, PL_langinfo_buf, PL_langinfo_bufsize); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "%s: %d: return from GetNumber, count=%d, val=%s\n", + __FILE__, __LINE__, needed_size, PL_langinfo_buf)); + +# endif +# endif lc = localeconv(); if (! lc) { @@ -2629,7 +2967,17 @@ S_my_nl_langinfo(const int item, bool toggle) retval = save_to_buffer(temp, &PL_langinfo_buf, &PL_langinfo_bufsize, 0); - LOCALE_UNLOCK; +# ifdef TS_W32_BROKEN_LOCALECONV + + my_setlocale(LC_ALL, save_global); + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + my_setlocale(LC_ALL, save_thread); + Safefree(save_global); + Safefree(save_thread); + +# endif + + LOCALE_UNLOCK_V; if (toggle) { RESTORE_LC_NUMERIC(); @@ -2938,6 +3286,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * values for our db, instead of trying to change them. * */ + dVAR; + int ok = 1; #ifndef USE_LOCALE @@ -2947,7 +3297,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #else /* USE_LOCALE */ # ifdef __GLIBC__ - const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); + const char * const language = PerlEnv_getenv("LANGUAGE"); # endif @@ -2957,8 +3307,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) : ""; const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */ unsigned int trial_locales_count; - const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL")); - const char * const lang = savepv(PerlEnv_getenv("LANG")); + const char * const lc_all = PerlEnv_getenv("LC_ALL"); + const char * const lang = PerlEnv_getenv("LANG"); bool setlocale_failure = FALSE; unsigned int i; @@ -3098,6 +3448,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif # endif /* DEBUGGING */ + /* Initialize the per-thread mbrFOO() state variables. See POSIX.xs for + * why these particular incantations are used. */ +#ifdef HAS_MBRLEN + memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps)); +#endif +#ifdef HAS_MBRTOWC + memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps)); +#endif +#ifdef HAS_WCTOMBR + wcrtomb(NULL, L'\0', &PL_wcrtomb_ps); +#endif + /* Initialize the cache of the program's UTF-8ness for the always known * locales C and POSIX */ my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness, @@ -3110,7 +3472,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif # endif -# if defined(LC_ALL_MASK) && defined(HAS_POSIX_2008_LOCALE) +# ifdef USE_POSIX_2008_LOCALE PL_C_locale_obj = newlocale(LC_ALL_MASK, "C", (locale_t) 0); if (! PL_C_locale_obj) { @@ -3123,8 +3485,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif +# ifdef USE_LOCALE_NUMERIC + PL_numeric_radix_sv = newSVpvs("."); +# endif + # if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE) /* Initialize our records. If we have POSIX 2008, we have LC_ALL */ @@ -3530,15 +3896,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } # endif -# ifdef __GLIBC__ - - Safefree(language); - -# endif - - Safefree(lc_all); - Safefree(lang); - #endif /* USE_LOCALE */ #ifdef DEBUGGING @@ -3664,6 +4021,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, cur_min_x + COLLXFRM_HDR_LEN)) { PL_strxfrm_NUL_replacement = j; + Safefree(cur_min_x); cur_min_x = x; } else { @@ -3819,6 +4177,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, cur_max_x + COLLXFRM_HDR_LEN)) { PL_strxfrm_max_cp = j; + Safefree(cur_max_x); cur_max_x = x; } else { @@ -4060,11 +4419,6 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, return xbuf; bad: - Safefree(xbuf); - if (s != input_string) { - Safefree(s); - } - *xlen = 0; # ifdef DEBUGGING @@ -4074,6 +4428,12 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, # endif + Safefree(xbuf); + if (s != input_string) { + Safefree(s); + } + *xlen = 0; + return NULL; } @@ -4104,6 +4464,11 @@ S_print_collxfrm_input_and_return(pTHX_ PerlIO_printf(Perl_debug_log, "'\n"); } +# endif /* DEBUGGING */ +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE +# ifdef DEBUGGING + STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, @@ -4140,9 +4505,6 @@ S_print_bytes_for_locale(pTHX_ } # endif /* #ifdef DEBUGGING */ -#endif /* USE_LOCALE_COLLATE */ - -#ifdef USE_LOCALE STATIC const char * S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale) @@ -4226,6 +4588,9 @@ S_restore_switched_locale(pTHX_ const int category, const char * const original_ Safefree(original_locale); } +/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */ +#define CUR_LC_BUFFER_SIZE 64 + bool Perl__is_cur_LC_category_utf8(pTHX_ int category) { @@ -4263,6 +4628,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) the name in the cache */ char * delimited; /* The name plus the delimiters used to store it in the cache */ + char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */ char * name_pos; /* position of 'delimited' in the cache, or 0 if not there */ @@ -4291,9 +4657,15 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * utf8ness digit */ input_name_len_with_overhead = input_name_len + 3; - /* Allocate and populate space for a copy of the name surrounded by the - * delimiters */ - Newx(delimited, input_name_len_with_overhead, char); + if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) { + /* we can use the buffer, avoid a malloc */ + delimited = buffer; + } else { /* need a malloc */ + /* Allocate and populate space for a copy of the name surrounded by the + * delimiters */ + Newx(delimited, input_name_len_with_overhead, char); + } + delimited[0] = UTF8NESS_SEP[0]; Copy(save_input_locale, delimited + 1, input_name_len, char); delimited[input_name_len+1] = UTF8NESS_PREFIX[0]; @@ -4327,7 +4699,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; } - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4450,11 +4823,12 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) && wc == (wchar_t) UNICODE_REPLACEMENT); } +# endif + restore_switched_locale(LC_CTYPE, original_ctype_locale); goto finish_and_return; } -# endif # else /* Here, we must have a C89 compiler that doesn't have mbtowc(). Next @@ -4686,9 +5060,9 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) is_utf8 = TRUE; goto finish_and_return; } - } # endif + } # endif /* Other common encodings are the ISO 8859 series, which aren't UTF-8. But @@ -4752,9 +5126,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char); utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; - if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] - & (PERL_UINTMAX_T) ~1) != '0') - { + if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] & ~1) != '0') { Perl_croak(aTHX_ "panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%zu," " inserted_name=%s, its_len=%zu\n", @@ -4784,6 +5156,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) s++; e = strchr(s, UTF8NESS_PREFIX[0]); if (! e) { + e = PL_locale_utf8ness + strlen(PL_locale_utf8ness); Perl_croak(aTHX_ "panic: %s: %d: Corrupt utf8ness_cache: missing" " separator %.*s<-- HERE %s\n", @@ -4821,7 +5194,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # endif - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4838,15 +5212,15 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) const COP * const cop = (compiling) ? &PL_compiling : PL_curcop; - SV *categories = cop_hints_fetch_pvs(cop, "locale", 0); - if (! categories || categories == &PL_sv_placeholder) { + SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0); + if (! these_categories || these_categories == &PL_sv_placeholder) { return FALSE; } /* The pseudo-category 'not_characters' is -1, so just add 1 to each to get * a valid unsigned */ assert(category >= -1); - return cBOOL(SvUV(categories) & (1U << (category + 1))); + return cBOOL(SvUV(these_categories) & (1U << (category + 1))); } char * @@ -4875,19 +5249,34 @@ Perl_my_strerror(pTHX_ const int errnum) const bool within_locale_scope = IN_LC(LC_MESSAGES); -# if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) +# ifndef USE_ITHREADS - /* This function is trivial if we don't have to worry about thread safety - * and have strerror_l(), as it handles the switch of locales so we don't - * have to deal with that. We don't have to worry about thread safety if - * this is an unthreaded build, or if strerror_r() is also available. Both - * it and strerror_l() are thread-safe. Plain strerror() isn't thread - * safe. But on threaded builds when strerror_r() is available, the - * apparent call to strerror() below is actually a macro that - * behind-the-scenes calls strerror_r(). - */ + /* This function is trivial without threads. */ + if (within_locale_scope) { + errstr = savepv(strerror(errnum)); + } + else { + const char * save_locale = savepv(do_setlocale_c(LC_MESSAGES, NULL)); + + do_setlocale_c(LC_MESSAGES, "C"); + errstr = savepv(strerror(errnum)); + do_setlocale_c(LC_MESSAGES, save_locale); + Safefree(save_locale); + } -# if ! defined(USE_ITHREADS) || defined(HAS_STRERROR_R) +# elif defined(USE_POSIX_2008_LOCALE) \ + && defined(HAS_STRERROR_L) \ + && defined(HAS_DUPLOCALE) + + /* This function is also trivial if we don't have to worry about thread + * safety and have strerror_l(), as it handles the switch of locales so we + * don't have to deal with that. We don't have to worry about thread + * safety if strerror_r() is also available. Both it and strerror_l() are + * thread-safe. Plain strerror() isn't thread safe. But on threaded + * builds when strerror_r() is available, the apparent call to strerror() + * below is actually a macro that behind-the-scenes calls strerror_r(). */ + +# ifdef HAS_STRERROR_R if (within_locale_scope) { errstr = savepv(strerror(errnum)); @@ -4979,9 +5368,7 @@ Perl_my_strerror(pTHX_ const int errnum) LOCALE_UNLOCK; # endif /* End of doesn't have strerror_l */ -#endif /* End of does have locale messages */ - -#ifdef DEBUGGING +# ifdef DEBUGGING if (DEBUG_Lv_TEST) { PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '"); @@ -4989,7 +5376,8 @@ Perl_my_strerror(pTHX_ const int errnum) PerlIO_printf(Perl_debug_log, "'\n"); } -#endif +# endif +#endif /* End of does have locale messages */ SAVEFREEPV(errstr); return errstr; @@ -4999,7 +5387,7 @@ Perl_my_strerror(pTHX_ const int errnum) =for apidoc switch_to_global_locale -On systems without locale support, or on single-threaded builds, or on +On systems without locale support, or on typical single-threaded builds, or on platforms that do not support per-thread locale operations, this function does nothing. On such systems that do have locale support, only a locale global to the whole program is available. @@ -5011,6 +5399,25 @@ locale operation. As long as only a single thread is so-converted, everything works fine, as all the other threads continue to ignore the global one, so only this thread looks at it. +However, on Windows systems this isn't quite true prior to Visual Studio 15, +at which point Microsoft fixed a bug. A race can occur if you use the +following operations on earlier Windows platforms: + +=over + +=item L + +=item L, items C and C + +=item L, items C and C + +=back + +The first item is not fixable (except by upgrading to a later Visual Studio +release), but it would be possible to work around the latter two items by using +the Windows API functions C and C; patches +welcome. + Without this function call, threads that use the L> system function will not work properly, as all the locale-sensitive functions will look at the per-thread locale, and C will have no effect on this @@ -5092,10 +5499,17 @@ L|perlapi/switch_to_global_locale>. bool Perl_sync_locale() { + +#ifndef USE_LOCALE + + return TRUE; + +#else + const char * newlocale; dTHX; -#ifdef USE_POSIX_2008_LOCALE +# ifdef USE_POSIX_2008_LOCALE bool was_in_global_locale = FALSE; locale_t cur_obj = uselocale((locale_t) 0); @@ -5107,11 +5521,11 @@ Perl_sync_locale() * will affect the */ if (cur_obj == LC_GLOBAL_LOCALE) { -# ifdef HAS_QUERY_LOCALE +# ifdef HAS_QUERY_LOCALE do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL)); -# else +# else unsigned int i; @@ -5121,45 +5535,51 @@ Perl_sync_locale() do_setlocale_r(categories[i], setlocale(categories[i], NULL)); } -# endif +# endif was_in_global_locale = TRUE; } -#else +# else bool was_in_global_locale = TRUE; -#endif -#ifdef USE_LOCALE_CTYPE +# endif +# ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_CTYPE, NULL, newlocale))); new_ctype(newlocale); + Safefree(newlocale); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_COLLATE, NULL, newlocale))); new_collate(newlocale); + Safefree(newlocale); -#endif -#ifdef USE_LOCALE_NUMERIC +# endif +# ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_NUMERIC, NULL, newlocale))); new_numeric(newlocale); + Safefree(newlocale); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ return was_in_global_locale; + +#endif + } #if defined(DEBUGGING) && defined(USE_LOCALE) @@ -5257,8 +5677,9 @@ Perl_thread_locale_term() # ifndef WIN32 { /* Free up */ + dVAR; locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE); - if (cur_obj != LC_GLOBAL_LOCALE) { + if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) { freelocale(cur_obj); } }