X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/11a45381689bcf81dbebf23d9b05a95f187a769b..33f0d9624a416d3eb926048f8054b82304fba159:/locale.c diff --git a/locale.c b/locale.c index cca250b..67a2997 100644 --- a/locale.c +++ b/locale.c @@ -32,6 +32,15 @@ * the desired behavior of those functions at the moment. And, LC_MESSAGES is * switched to the C locale for outputting the message unless within the scope * of 'use locale'. + * + * This code now has multi-thread-safe locale handling on systems that support + * that. This is completely transparent to most XS code. On earlier systems, + * it would be possible to emulate thread-safe locales, but this likely would + * involve a lot of locale switching, and would require XS code changes. + * Macros could be written so that the code wouldn't have to know which type of + * system is being used. It's unlikely that we would ever do that, since most + * modern systems support thread-safe locales, but there was code written to + * this end, and is retained, #ifdef'd out. */ #include "EXTERN.h" @@ -56,6 +65,12 @@ static bool debug_initialization = FALSE; # define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) #endif + +/* Returns the Unix errno portion; ignoring any others. This is a macro here + * instead of putting it into perl.h, because unclear to khw what should be + * done generally. */ +#define GET_ERRNO saved_errno + /* strlen() of a literal string constant. We might want this more general, * but using it in just this file for now. A problem with more generality is * the compiler warnings about comparing unlike signs */ @@ -369,19 +384,886 @@ S_category_name(const int category) # ifdef LC_ALL # define LC_ALL_INDEX _DUMMY_TELEPHONE + 1 # endif -#endif /* ifdef USE_LOCALE */ +#endif /* ifdef USE_LOCALE */ + +/* Windows requres a customized base-level setlocale() */ +#ifdef WIN32 +# define my_setlocale(cat, locale) win32_setlocale(cat, locale) +#else +# define my_setlocale(cat, locale) setlocale(cat, locale) +#endif + +#ifndef USE_POSIX_2008_LOCALE + +/* "do_setlocale_c" is intended to be called when the category is a constant + * known at compile time; "do_setlocale_r", not known until run time */ +# define do_setlocale_c(cat, locale) my_setlocale(cat, locale) +# define do_setlocale_r(cat, locale) my_setlocale(cat, locale) + +#else /* Below uses POSIX 2008 */ + +/* We emulate setlocale with our own function. LC_foo is not valid for the + * POSIX 2008 functions. Instead LC_foo_MASK is used, which we use an array + * lookup to convert to. At compile time we have defined LC_foo_INDEX as the + * proper offset into the array 'category_masks[]'. At runtime, we have to + * search through the array (as the actual numbers may not be small contiguous + * positive integers which would lend themselves to array lookup). */ +# define do_setlocale_c(cat, locale) \ + emulate_setlocale(cat, locale, cat ## _INDEX, TRUE) +# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE) + +/* A third array, parallel to the ones above to map from category to its + * equivalent mask */ +const int category_masks[] = { +# ifdef USE_LOCALE_NUMERIC + LC_NUMERIC_MASK, +# endif +# ifdef USE_LOCALE_CTYPE + LC_CTYPE_MASK, +# endif +# ifdef USE_LOCALE_COLLATE + LC_COLLATE_MASK, +# endif +# ifdef USE_LOCALE_TIME + LC_TIME_MASK, +# endif +# ifdef USE_LOCALE_MESSAGES + LC_MESSAGES_MASK, +# endif +# ifdef USE_LOCALE_MONETARY + LC_MONETARY_MASK, +# endif +# ifdef USE_LOCALE_ADDRESS + LC_ADDRESS_MASK, +# endif +# ifdef USE_LOCALE_IDENTIFICATION + LC_IDENTIFICATION_MASK, +# endif +# ifdef USE_LOCALE_MEASUREMENT + LC_MEASUREMENT_MASK, +# endif +# ifdef USE_LOCALE_PAPER + LC_PAPER_MASK, +# endif +# ifdef USE_LOCALE_TELEPHONE + LC_TELEPHONE_MASK, +# endif + /* LC_ALL can't be turned off by a Configure + * option, and in Posix 2008, should always be + * here, so compile it in unconditionally. + * This could catch some glitches at compile + * time */ + LC_ALL_MASK + }; + +STATIC const char * +S_emulate_setlocale(const int category, + const char * locale, + unsigned int index, + const bool is_index_valid + ) +{ + /* This function effectively performs a setlocale() on just the current + * thread; thus it is thread-safe. It does this by using the POSIX 2008 + * locale functions to emulate the behavior of setlocale(). Similar to + * regular setlocale(), the return from this function points to memory that + * can be overwritten by other system calls, so needs to be copied + * immediately if you need to retain it. The difference here is that + * system calls besides another setlocale() can overwrite it. + * + * By doing this, most locale-sensitive functions become thread-safe. The + * exceptions are mostly those that return a pointer to static memory. + * + * This function takes the same parameters, 'category' and 'locale', that + * the regular setlocale() function does, but it also takes two additional + * ones. This is because the 2008 functions don't use a category; instead + * they use a corresponding mask. Because this function operates in both + * worlds, it may need one or the other or both. This function can + * calculate the mask from the input category, but to avoid this + * calculation, if the caller knows at compile time what the mask is, it + * can pass it, setting 'is_index_valid' to TRUE; otherwise the mask + * parameter is ignored. + * + * POSIX 2008, for some sick reason, chose not to provide a method to find + * the category name of a locale. Some vendors have created a + * querylocale() function to do just that. This function is a lot simpler + * to implement on systems that have this. Otherwise, we have to keep + * track of what the locale has been set to, so that we can return its + * name to emulate setlocale(). It's also possible for C code in some + * library to change the locale without us knowing it, though as of + * September 2017, there are no occurrences in CPAN of uselocale(). Some + * libraries do use setlocale(), but that changes the global locale, and + * threads using per-thread locales will just ignore those changes. + * Another problem is that without querylocale(), we have to guess at what + * was meant by setting a locale of "". We handle this by not actually + * ever setting to "" (unless querylocale exists), but to emulate what we + * think should happen for "". + */ + + int mask; + locale_t old_obj; + locale_t new_obj; + dTHX; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale input=%d (%s), \"%s\", %d, %d\n", __FILE__, __LINE__, category, category_name(category), locale, index, is_index_valid); + } + +# endif + + /* If the input mask might be incorrect, calculate the correct one */ + if (! is_index_valid) { + unsigned int i; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: finding index of category %d (%s)\n", __FILE__, __LINE__, category, category_name(category)); + } + +# endif + + for (i = 0; i <= LC_ALL_INDEX; i++) { + if (category == categories[i]) { + index = i; + goto found_index; + } + } + + /* Here, we don't know about this category, so can't handle it. + * Fallback to the early POSIX usages */ + Perl_warner(aTHX_ packWARN(WARN_LOCALE), + "Unknown locale category %d; can't set it to %s\n", + category, locale); + return NULL; + + found_index: ; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: index is %d for %s\n", __FILE__, __LINE__, index, category_name(category)); + } + +# endif + + } + + mask = category_masks[index]; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: category name is %s; mask is 0x%x\n", __FILE__, __LINE__, category_names[index], mask); + } + +# endif + + /* If just querying what the existing locale is ... */ + if (locale == NULL) { + locale_t cur_obj = uselocale((locale_t) 0); + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale querying %p\n", __FILE__, __LINE__, cur_obj); + } + +# endif + + if (cur_obj == LC_GLOBAL_LOCALE) { + return my_setlocale(category, NULL); + } + +# ifdef HAS_QUERYLOCALE + + return (char *) querylocale(mask, cur_obj); + +# else + + /* If this assert fails, adjust the size of curlocales in intrpvar.h */ + STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); + +# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) + + { + /* Internal glibc for querylocale(), but doesn't handle + * empty-string ("") locale properly; who knows what other + * glitches. Check it for now, under debug. */ + + char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category), + uselocale((locale_t) 0)); + /* + PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL"); + PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index); + PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]); + */ + if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) { + if ( strNE(PL_curlocales[index], temp_name) + && ! ( isNAME_C_OR_POSIX(temp_name) + && isNAME_C_OR_POSIX(PL_curlocales[index]))) { + +# ifdef USE_C_BACKTRACE + + dump_c_backtrace(Perl_debug_log, 20, 1); + +# endif + + Perl_croak(aTHX_ "panic: Mismatch between what Perl thinks %s is" + " (%s) and what internal glibc thinks" + " (%s)\n", category_names[index], + PL_curlocales[index], temp_name); + } + + return temp_name; + } + } + +# endif + + /* Without querylocale(), we have to use our record-keeping we've + * done. */ + + if (category != LC_ALL) { + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[index]); + } + +# endif + + return PL_curlocales[index]; + } + else { /* For LC_ALL */ + unsigned int i; + Size_t names_len = 0; + char * all_string; + bool are_all_categories_the_same_locale = TRUE; + + /* If we have a valid LC_ALL value, just return it */ + if (PL_curlocales[LC_ALL_INDEX]) { + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX]); + } + +# endif + + return PL_curlocales[LC_ALL_INDEX]; + } + + /* Otherwise, we need to construct a string of name=value pairs. + * We use the glibc syntax, like + * LC_NUMERIC=C;LC_TIME=en_US.UTF-8;... + * First calculate the needed size. Along the way, check if all + * the locale names are the same */ + for (i = 0; i < LC_ALL_INDEX; i++) { + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]); + } + +# endif + + names_len += strlen(category_names[i]) + + 1 /* '=' */ + + strlen(PL_curlocales[i]) + + 1; /* ';' */ + + if (i > 0 && strNE(PL_curlocales[i], PL_curlocales[i-1])) { + are_all_categories_the_same_locale = FALSE; + } + } + + /* If they are the same, we don't actually have to construct the + * string; we just make the entry in LC_ALL_INDEX valid, and be + * that single name */ + if (are_all_categories_the_same_locale) { + PL_curlocales[LC_ALL_INDEX] = savepv(PL_curlocales[0]); + return PL_curlocales[LC_ALL_INDEX]; + } + + names_len++; /* Trailing '\0' */ + SAVEFREEPV(Newx(all_string, names_len, char)); + *all_string = '\0'; + + /* Then fill in the string */ + for (i = 0; i < LC_ALL_INDEX; i++) { + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]); + } + +# endif + + my_strlcat(all_string, category_names[i], names_len); + my_strlcat(all_string, "=", names_len); + my_strlcat(all_string, PL_curlocales[i], names_len); + my_strlcat(all_string, ";", names_len); + } + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, all_string); + } + + #endif + + return all_string; + } + +# ifdef EINVAL + + SETERRNO(EINVAL, LIB_INVARG); + +# endif + + return NULL; + +# endif + + } + + /* Here, we are switching locales. */ + +# ifndef HAS_QUERYLOCALE + + if (strEQ(locale, "")) { + + /* For non-querylocale() systems, we do the setting of "" ourselves to + * be sure that we really know what's going on. We follow the Linux + * documented behavior (but if that differs from the actual behavior, + * this won't work exactly as the OS implements). We go out and + * examine the environment based on our understanding of how the system + * works, and use that to figure things out */ + + const char * const lc_all = PerlEnv_getenv("LC_ALL"); + + /* Use any "LC_ALL" environment variable, as it overrides everything + * else. */ + if (lc_all && strNE(lc_all, "")) { + locale = lc_all; + } + else { + + /* Otherwise, we need to dig deeper. Unless overridden, the + * default is the LANG environment variable; if it doesn't exist, + * then "C" */ + + const char * default_name; + + /* To minimize other threads messing with the environment, we copy + * the variable, making it a temporary. But this doesn't work upon + * program initialization before any scopes are created, and at + * this time, there's nothing else going on that would interfere. + * So skip the copy in that case */ + if (PL_scopestack_ix == 0) { + default_name = PerlEnv_getenv("LANG"); + } + else { + default_name = savepv(PerlEnv_getenv("LANG")); + } + + if (! default_name || strEQ(default_name, "")) { + default_name = "C"; + } + else if (PL_scopestack_ix != 0) { + SAVEFREEPV(default_name); + } + + if (category != LC_ALL) { + const char * const name = PerlEnv_getenv(category_names[index]); + + /* Here we are setting a single category. Assume will have the + * default name */ + locale = default_name; + + /* But then look for an overriding environment variable */ + if (name && strNE(name, "")) { + locale = name; + } + } + else { + bool did_override = FALSE; + unsigned int i; + + /* Here, we are getting LC_ALL. Any categories that don't have + * a corresponding environment variable set should be set to + * LANG, or to "C" if there is no LANG. If no individual + * categories differ from this, we can just set LC_ALL. This + * is buggy on systems that have extra categories that we don't + * know about. If there is an environment variable that sets + * that category, we won't know to look for it, and so our use + * of LANG or "C" improperly overrides it. On the other hand, + * if we don't do what is done here, and there is no + * environment variable, the category's locale should be set to + * LANG or "C". So there is no good solution. khw thinks the + * best is to look at systems to see what categories they have, + * and include them, and then to assume that we know the + * complete set */ + + for (i = 0; i < LC_ALL_INDEX; i++) { + const char * const env_override + = savepv(PerlEnv_getenv(category_names[i])); + const char * this_locale = ( env_override + && strNE(env_override, "")) + ? env_override + : default_name; + if (! emulate_setlocale(categories[i], this_locale, i, TRUE)) + { + Safefree(env_override); + return NULL; + } + + if (strNE(this_locale, default_name)) { + did_override = TRUE; + } + + Safefree(env_override); + } + + /* If all the categories are the same, we can set LC_ALL to + * that */ + if (! did_override) { + locale = default_name; + } + else { + + /* Here, LC_ALL is no longer valid, as some individual + * categories don't match it. We call ourselves + * recursively, as that will execute the code that + * generates the proper locale string for this situation. + * We don't do the remainder of this function, as that is + * to update our records, and we've just done that for the + * individual categories in the loop above, and doing so + * would cause LC_ALL to be done as well */ + return emulate_setlocale(LC_ALL, NULL, LC_ALL_INDEX, TRUE); + } + } + } + } + else if (strchr(locale, ';')) { + + /* LC_ALL may actually incude a conglomeration of various categories. + * Without querylocale, this code uses the glibc (as of this writing) + * syntax for representing that, but that is not a stable API, and + * other platforms do it differently, so we have to handle all cases + * ourselves */ + + unsigned int i; + const char * s = locale; + const char * e = locale + strlen(locale); + const char * p = s; + const char * category_end; + const char * name_start; + const char * name_end; + + /* If the string that gives what to set doesn't include all categories, + * the omitted ones get set to "C". To get this behavior, first set + * all the individual categories to "C", and override the furnished + * ones below */ + for (i = 0; i < LC_ALL_INDEX; i++) { + if (! emulate_setlocale(categories[i], "C", i, TRUE)) { + return NULL; + } + } + + while (s < e) { + + /* Parse through the category */ + while (isWORDCHAR(*p)) { + p++; + } + category_end = p; + + if (*p++ != '=') { + Perl_croak(aTHX_ + "panic: %s: %d: Unexpected character in locale name '%02X", + __FILE__, __LINE__, *(p-1)); + } + + /* Parse through the locale name */ + name_start = p; + while (p < e && *p != ';') { + if (! isGRAPH(*p)) { + Perl_croak(aTHX_ + "panic: %s: %d: Unexpected character in locale name '%02X", + __FILE__, __LINE__, *(p-1)); + } + p++; + } + name_end = p; + + /* Space past the semi-colon */ + if (p < e) { + p++; + } + + /* Find the index of the category name in our lists */ + for (i = 0; i < LC_ALL_INDEX; i++) { + char * individ_locale; + + /* Keep going if this isn't the index. The strnNE() avoids a + * Perl_form(), but would fail if ever a category name could be + * a substring of another one, like if there were a + * "LC_TIME_DATE" */ + if strnNE(s, category_names[i], category_end - s) { + continue; + } + + /* If this index is for the single category we're changing, we + * have found the locale to set it to. */ + if (category == categories[i]) { + locale = Perl_form(aTHX_ "%.*s", + (int) (name_end - name_start), + name_start); + goto ready_to_set; + } + + assert(category == LC_ALL); + individ_locale = Perl_form(aTHX_ "%.*s", + (int) (name_end - name_start), name_start); + if (! emulate_setlocale(categories[i], individ_locale, i, TRUE)) + { + return NULL; + } + } + + s = p; + } + + /* Here we have set all the individual categories by recursive calls. + * These collectively should have fixed up LC_ALL, so can just query + * what that now is */ + assert(category == LC_ALL); + + return do_setlocale_c(LC_ALL, NULL); + } + + ready_to_set: ; + + /* Here at the end of having to deal with the absence of querylocale(). + * Some cases have already been fully handled by recursive calls to this + * function. But at this point, we haven't dealt with those, but are now + * prepared to, knowing what the locale name to set this category to is. + * This would have come for free if this system had had querylocale() */ + +# endif /* end of ! querylocale */ + + assert(PL_C_locale_obj); + + /* Switching locales generally entails freeing the current one's space (at + * the C library's discretion). We need to stop using that locale before + * the switch. So switch to a known locale object that we don't otherwise + * mess with. This returns the locale object in effect at the time of the + * switch. */ + old_obj = uselocale(PL_C_locale_obj); + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj); + } + +# endif + + if (! old_obj) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + dSAVE_ERRNO; + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + RESTORE_ERRNO; + } + +# endif + + return NULL; + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj); + } + +# endif + + /* If we weren't in a thread safe locale, set so that newlocale() below + which uses 'old_obj', uses an empty one. Same for our reserved C object. + The latter is defensive coding, so that, even if there is some bug, we + will never end up trying to modify either of these, as if passed to + newlocale(), they can be. */ + if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { + old_obj = (locale_t) 0; + } + + /* Ready to create a new locale by modification of the exising one */ + new_obj = newlocale(mask, locale, old_obj); + + if (! new_obj) { + dSAVE_ERRNO; + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + RESTORE_ERRNO; + return NULL; + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p; should have freed %p\n", __FILE__, __LINE__, new_obj, old_obj); + } + +# endif + + /* And switch into it */ + if (! uselocale(new_obj)) { + dSAVE_ERRNO; + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__); + } + +# endif + + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + freelocale(new_obj); + RESTORE_ERRNO; + return NULL; + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj); + } + +# endif + + /* We are done, except for updating our records (if the system doesn't keep + * them) and in the case of locale "", we don't actually know what the + * locale that got switched to is, as it came from the environment. So + * have to find it */ + +# ifdef HAS_QUERYLOCALE + + if (strEQ(locale, "")) { + locale = querylocale(mask, new_obj); + } -/* Windows requres a customized base-level setlocale() */ -# ifdef WIN32 -# define my_setlocale(cat, locale) win32_setlocale(cat, locale) # else -# define my_setlocale(cat, locale) setlocale(cat, locale) + + /* Here, 'locale' is the return value */ + + /* Without querylocale(), we have to update our records */ + + if (category == LC_ALL) { + unsigned int i; + + /* For LC_ALL, we change all individual categories to correspond */ + /* PL_curlocales is a parallel array, so has same + * length as 'categories' */ + for (i = 0; i <= LC_ALL_INDEX; i++) { + Safefree(PL_curlocales[i]); + PL_curlocales[i] = savepv(locale); + } + } + else { + + /* For a single category, if it's not the same as the one in LC_ALL, we + * nullify LC_ALL */ + + if (PL_curlocales[LC_ALL_INDEX] && strNE(PL_curlocales[LC_ALL_INDEX], locale)) { + Safefree(PL_curlocales[LC_ALL_INDEX]); + PL_curlocales[LC_ALL_INDEX] = NULL; + } + + /* Then update the category's record */ + Safefree(PL_curlocales[index]); + PL_curlocales[index] = savepv(locale); + } + # endif -/* Just placeholders for now. "_c" is intended to be called when the category - * is a constant known at compile time; "_r", not known until run time */ -# define do_setlocale_c(category, locale) my_setlocale(category, locale) -# define do_setlocale_r(category, locale) my_setlocale(category, locale) + return locale; +} + +#endif /* USE_POSIX_2008_LOCALE */ + +#if 0 /* Code that was to emulate thread-safe locales on platforms that + didn't natively support them */ + +/* The way this would work is that we would keep a per-thread list of the + * correct locale for that thread. Any operation that was locale-sensitive + * would have to be changed so that it would look like this: + * + * LOCALE_LOCK; + * setlocale to the correct locale for this operation + * do operation + * LOCALE_UNLOCK + * + * This leaves the global locale in the most recently used operation's, but it + * was locked long enough to get the result. If that result is static, it + * needs to be copied before the unlock. + * + * Macros could be written like SETUP_LOCALE_DEPENDENT_OP(category) that did + * the setup, but are no-ops when not needed, and similarly, + * END_LOCALE_DEPENDENT_OP for the tear-down + * + * But every call to a locale-sensitive function would have to be changed, and + * if a module didn't cooperate by using the mutex, things would break. + * + * This code was abandoned before being completed or tested, and is left as-is +*/ + +# define do_setlocale_c(cat, locale) locking_setlocale(cat, locale, cat ## _INDEX, TRUE) +# define do_setlocale_r(cat, locale) locking_setlocale(cat, locale, 0, FALSE) + +STATIC char * +S_locking_setlocale(pTHX_ + const int category, + const char * locale, + int index, + const bool is_index_valid + ) +{ + /* This function kind of performs a setlocale() on just the current thread; + * thus it is kind of thread-safe. It does this by keeping a thread-level + * array of the current locales for each category. Every time a locale is + * switched to, it does the switch globally, but updates the thread's + * array. A query as to what the current locale is just returns the + * appropriate element from the array, and doesn't actually call the system + * setlocale(). The saving into the array is done in an uninterruptible + * section of code, so is unaffected by whatever any other threads might be + * doing. + * + * All locale-sensitive operations must work by first starting a critical + * section, then switching to the thread's locale as kept by this function, + * and then doing the operation, then ending the critical section. Thus, + * each gets done in the appropriate locale. simulating thread-safety. + * + * This function takes the same parameters, 'category' and 'locale', that + * the regular setlocale() function does, but it also takes two additional + * ones. This is because as described earlier. If we know on input the + * index corresponding to the category into the array where we store the + * current locales, we don't have to calculate it. If the caller knows at + * compile time what the index is, it it can pass it, setting + * 'is_index_valid' to TRUE; otherwise the index parameter is ignored. + * + */ + + /* If the input index might be incorrect, calculate the correct one */ + if (! is_index_valid) { + unsigned int i; + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: converting category %d to index\n", __FILE__, __LINE__, category); + } + + for (i = 0; i <= LC_ALL_INDEX; i++) { + if (category == categories[i]) { + index = i; + goto found_index; + } + } + + /* Here, we don't know about this category, so can't handle it. + * XXX best we can do is to unsafely set this + * XXX warning */ + + return my_setlocale(category, locale); + + found_index: ; + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: index is 0x%x\n", __FILE__, __LINE__, index); + } + } + + /* For a query, just return what's in our records */ + if (new_locale == NULL) { + return curlocales[index]; + } + + + /* Otherwise, we need to do the switch, and save the result, all in a + * critical section */ + + Safefree(curlocales[[index]]); + + /* It might be that this is called from an already-locked section of code. + * We would have to detect and skip the LOCK/UNLOCK if so */ + LOCALE_LOCK; + + curlocales[index] = savepv(my_setlocale(category, new_locale)); + + if (strEQ(new_locale, "")) { + +#ifdef LC_ALL + + /* The locale values come from the environment, and may not all be the + * same, so for LC_ALL, we have to update all the others, while the + * mutex is still locked */ + + if (category == LC_ALL) { + unsigned int i; + for (i = 0; i < LC_ALL_INDEX) { + curlocales[i] = my_setlocale(categories[i], NULL); + } + } + } + +#endif + + LOCALE_UNLOCK; + + return curlocales[index]; +} + +#endif STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) @@ -393,7 +1275,7 @@ S_set_numeric_radix(pTHX_ const bool use_locale) || defined(HAS_NL_LANGINFO)) const char * radix = (use_locale) - ? my_nl_langinfo(PERL_RADIXCHAR, FALSE) + ? my_nl_langinfo(RADIXCHAR, FALSE) /* FALSE => already in dest locale */ : "."; @@ -421,9 +1303,8 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } - -void -Perl_new_numeric(pTHX_ const char *newnum) +STATIC void +S_new_numeric(pTHX_ const char *newnum) { #ifndef USE_LOCALE_NUMERIC @@ -462,10 +1343,7 @@ Perl_new_numeric(pTHX_ const char *newnum) * variables are true at the same time. (Toggling is a * no-op under these circumstances.) This variable is * used to avoid having to recalculate. - * Any code changing the locale (outside this file) should use - * POSIX::setlocale, which calls this function. Therefore this function - * should be called directly only from this file and from - * POSIX::setlocale() */ + */ char *save_newnum; @@ -482,15 +1360,20 @@ Perl_new_numeric(pTHX_ const char *newnum) PL_numeric_underlying = TRUE; PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); +#ifndef TS_W32_BROKEN_LOCALECONV + /* If its name isn't C nor POSIX, it could still be indistinguishable from - * them */ + * them. But on broken Windows systems calling my_nl_langinfo() for + * THOUSEP can currently (but rarely) cause a race, so avoid doing that, + * and just always change the locale if not C nor POSIX on those systems */ if (! PL_numeric_standard) { - PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(PERL_RADIXCHAR, + PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(RADIXCHAR, FALSE /* Don't toggle locale */ )) - && strEQ("", my_nl_langinfo(PERL_THOUSEP, - FALSE))); + && strEQ("", my_nl_langinfo(THOUSEP, FALSE))); } +#endif + /* Save the new name if it isn't the same as the previous one, if any */ if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); @@ -517,7 +1400,12 @@ Perl_new_numeric(pTHX_ const char *newnum) /* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't * have to worry about the radix being a non-dot. (Core operations that * need the underlying locale change to it temporarily). */ - set_numeric_standard(); + if (PL_numeric_standard) { + set_numeric_radix(0); + } + else { + set_numeric_standard(); + } #endif /* USE_LOCALE_NUMERIC */ @@ -604,12 +1492,15 @@ S_new_ctype(pTHX_ const char *newctype) * that tofold() is tolc() since fold case is not a concept in POSIX, * * Any code changing the locale (outside this file) should use - * POSIX::setlocale, which calls this function. Therefore this function - * should be called directly only from this file and from + * Perl_setlocale or POSIX::setlocale, which call this function. Therefore + * this function should be called directly only from this file and from * POSIX::setlocale() */ dVAR; - UV i; + unsigned int i; + + /* Don't check for problems if we are suppressing the warnings */ + bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST); PERL_ARGS_ASSERT_NEW_CTYPE; @@ -627,27 +1518,28 @@ S_new_ctype(pTHX_ const char *newctype) if (PL_in_utf8_CTYPE_locale) { Copy(PL_fold_latin1, PL_fold_locale, 256, U8); } - else { + + /* We don't populate the other lists if a UTF-8 locale, but do check that + * everything works as expected, unless checking turned off */ + if (check_for_problems || ! PL_in_utf8_CTYPE_locale) { /* Assume enough space for every character being bad. 4 spaces each * for the 94 printable characters that are output like "'x' "; and 5 * spaces each for "'\\' ", "'\t' ", and "'\n' "; plus a terminating * NUL */ - char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ]; - - /* Don't check for problems if we are suppressing the warnings */ - bool check_for_problems = ckWARN_d(WARN_LOCALE) - || UNLIKELY(DEBUG_L_TEST); + char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ] = { '\0' }; bool multi_byte_locale = FALSE; /* Assume is a single-byte locale to start */ unsigned int bad_count = 0; /* Count of bad characters */ for (i = 0; i < 256; i++) { - if (isupper(i)) - PL_fold_locale[i] = (U8) tolower(i); - else if (islower(i)) - PL_fold_locale[i] = (U8) toupper(i); - else - PL_fold_locale[i] = (U8) i; + if (! PL_in_utf8_CTYPE_locale) { + if (isupper(i)) + PL_fold_locale[i] = (U8) tolower(i); + else if (islower(i)) + PL_fold_locale[i] = (U8) toupper(i); + else + PL_fold_locale[i] = (U8) i; + } /* If checking for locale problems, see if the native ASCII-range * printables plus \n and \t are in their expected categories in @@ -661,40 +1553,111 @@ S_new_ctype(pTHX_ const char *newctype) if ( check_for_problems && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { - if ( cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC_A(i)) - || cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i)) - || cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i)) - || cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i)) - || cBOOL(islower(i)) != cBOOL(isLOWER_A(i)) - || cBOOL(isprint(i)) != cBOOL(isPRINT_A(i)) - || cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i)) - || cBOOL(isspace(i)) != cBOOL(isSPACE_A(i)) - || cBOOL(isupper(i)) != cBOOL(isUPPER_A(i)) - || cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i)) - || tolower(i) != (int) toLOWER_A(i) - || toupper(i) != (int) toUPPER_A(i) - || (i == '\n' && ! isCNTRL_LC(i))) - { - if (bad_count) { /* Separate multiple entries with a - blank */ - bad_chars_list[bad_count++] = ' '; - } - bad_chars_list[bad_count++] = '\''; - if (isPRINT_A(i)) { - bad_chars_list[bad_count++] = (char) i; - } - else { - bad_chars_list[bad_count++] = '\\'; - if (i == '\n') { - bad_chars_list[bad_count++] = 'n'; - } - else { - assert(i == '\t'); - bad_chars_list[bad_count++] = 't'; - } + bool is_bad = FALSE; + char name[4] = { '\0' }; + + /* Convert the name into a string */ + if (isGRAPH_A(i)) { + name[0] = i; + name[1] = '\0'; + } + else if (i == '\n') { + my_strlcpy(name, "\\n", sizeof(name)); + } + else if (i == '\t') { + my_strlcpy(name, "\\t", sizeof(name)); + } + else { + assert(i == ' '); + my_strlcpy(name, "' '", sizeof(name)); + } + + /* Check each possibe class */ + if (UNLIKELY(cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isalnum('%s') unexpectedly is %d\n", + name, cBOOL(isalnum(i)))); + } + if (UNLIKELY(cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isalpha('%s') unexpectedly is %d\n", + name, cBOOL(isalpha(i)))); + } + if (UNLIKELY(cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isdigit('%s') unexpectedly is %d\n", + name, cBOOL(isdigit(i)))); + } + if (UNLIKELY(cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isgraph('%s') unexpectedly is %d\n", + name, cBOOL(isgraph(i)))); + } + if (UNLIKELY(cBOOL(islower(i)) != cBOOL(isLOWER_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "islower('%s') unexpectedly is %d\n", + name, cBOOL(islower(i)))); + } + if (UNLIKELY(cBOOL(isprint(i)) != cBOOL(isPRINT_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isprint('%s') unexpectedly is %d\n", + name, cBOOL(isprint(i)))); + } + if (UNLIKELY(cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "ispunct('%s') unexpectedly is %d\n", + name, cBOOL(ispunct(i)))); + } + if (UNLIKELY(cBOOL(isspace(i)) != cBOOL(isSPACE_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isspace('%s') unexpectedly is %d\n", + name, cBOOL(isspace(i)))); + } + if (UNLIKELY(cBOOL(isupper(i)) != cBOOL(isUPPER_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isupper('%s') unexpectedly is %d\n", + name, cBOOL(isupper(i)))); + } + if (UNLIKELY(cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "isxdigit('%s') unexpectedly is %d\n", + name, cBOOL(isxdigit(i)))); + } + if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "tolower('%s')=0x%x instead of the expected 0x%x\n", + name, tolower(i), (int) toLOWER_A(i))); + } + if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "toupper('%s')=0x%x instead of the expected 0x%x\n", + name, toupper(i), (int) toUPPER_A(i))); + } + if (UNLIKELY((i == '\n' && ! isCNTRL_LC(i)))) { + is_bad = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "'\\n' (=%02X) is not a control\n", (int) i)); + } + + /* Add to the list; Separate multiple entries with a blank */ + if (is_bad) { + if (bad_count) { + my_strlcat(bad_chars_list, " ", sizeof(bad_chars_list)); } - bad_chars_list[bad_count++] = '\''; - bad_chars_list[bad_count] = '\0'; + my_strlcat(bad_chars_list, name, sizeof(bad_chars_list)); + bad_count++; } } } @@ -708,7 +1671,8 @@ S_new_ctype(pTHX_ const char *newctype) "%s:%d: check_for_problems=%d, MB_CUR_MAX=%d\n", __FILE__, __LINE__, check_for_problems, (int) MB_CUR_MAX)); - if (check_for_problems && MB_CUR_MAX > 1 + if ( check_for_problems && MB_CUR_MAX > 1 + && ! PL_in_utf8_CTYPE_locale /* Some platforms return MB_CUR_MAX > 1 for even the "C" * locale. Just assume that the implementation for them (plus @@ -723,8 +1687,16 @@ S_new_ctype(pTHX_ const char *newctype) # endif - if (bad_count || multi_byte_locale) { - PL_warn_locale = Perl_newSVpvf(aTHX_ + if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) { + if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) { + PL_warn_locale = Perl_newSVpvf(aTHX_ + "Locale '%s' contains (at least) the following characters" + " which have\nunexpected meanings: %s\nThe Perl program" + " will use the expected meanings", + newctype, bad_chars_list); + } + else { + PL_warn_locale = Perl_newSVpvf(aTHX_ "Locale '%s' may not work well.%s%s%s\n", newctype, (multi_byte_locale) @@ -740,6 +1712,18 @@ S_new_ctype(pTHX_ const char *newctype) ? bad_chars_list : "" ); + } + +# ifdef HAS_NL_LANGINFO + + Perl_sv_catpvf(aTHX_ PL_warn_locale, "; codeset=%s", + /* parameter FALSE is a don't care here */ + my_nl_langinfo(CODESET, FALSE)); + +# endif + + Perl_sv_catpvf(aTHX_ PL_warn_locale, "\n"); + /* If we are actually in the scope of the locale or are debugging, * output the message now. If not in that scope, we save the * message to be output at the first operation using this locale, @@ -1068,8 +2052,12 @@ S_win32_setlocale(pTHX_ int category, const char* locale) } result = setlocale(category, locale); - DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(category, locale, result))); + DEBUG_L(STMT_START { + dSAVE_ERRNO; + PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, + setlocale_debug_string(category, locale, result)); + RESTORE_ERRNO; + } STMT_END); if (! override_LC_ALL) { return result; @@ -1092,40 +2080,88 @@ S_win32_setlocale(pTHX_ int category, const char* locale) } result = setlocale(LC_ALL, NULL); - DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + DEBUG_L(STMT_START { + dSAVE_ERRNO; + PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_ALL, NULL, result))); + setlocale_debug_string(LC_ALL, NULL, result)); + RESTORE_ERRNO; + } STMT_END); return result; } #endif -char * -Perl_setlocale(int category, const char * locale) +/* + +=head1 Locale-related functions and macros + +=for apidoc Perl_setlocale + +This is an (almost) drop-in replacement for the system L>, +taking the same parameters, and returning the same information, except that it +returns the correct underlying C locale. Regular C will +instead return C if the underlying locale has a non-dot decimal point +character, or a non-empty thousands separator for displaying floating point +numbers. This is because perl keeps that locale category such that it has a +dot and empty separator, changing the locale briefly during the operations +where the underlying one is required. C knows about this, and +compensates; regular C doesn't. + +Another reason it isn't completely a drop-in replacement is that it is +declared to return S>, whereas the system setlocale omits the +C (presumably because its API was specified long ago, and can't be +updated; it is illegal to change the information C returns; doing +so leads to segfaults.) + +Finally, C works under all circumstances, whereas plain +C can be completely ineffective on some platforms under some +configurations. + +C should not be used to change the locale except on systems +where the predefined variable C<${^SAFE_LOCALES}> is 1. On some such systems, +the system C is ineffective, returning the wrong information, and +failing to actually change the locale. C, however works +properly in all circumstances. + +The return points to a per-thread static buffer, which is overwritten the next +time C is called from the same thread. + +=cut + +*/ + +const char * +Perl_setlocale(const int category, const char * locale) { /* This wraps POSIX::setlocale() */ - char * retval; - char * newlocale; + const char * retval; + const char * newlocale; + dSAVEDERRNO; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; dTHX; #ifdef USE_LOCALE_NUMERIC /* A NULL locale means only query what the current one is. We have the * LC_NUMERIC name saved, because we are normally switched into the C - * locale for it. For an LC_ALL query, switch back to get the correct - * results. All other categories don't require special handling */ + * (or equivalent) locale for it. For an LC_ALL query, switch back to get + * the correct results. All other categories don't require special + * handling */ if (locale == NULL) { if (category == LC_NUMERIC) { - return savepv(PL_numeric_name); + + /* We don't have to copy this return value, as it is a per-thread + * variable, and won't change until a future setlocale */ + return PL_numeric_name; } # ifdef LC_ALL - else if (category == LC_ALL && ! PL_numeric_underlying) { - - SET_NUMERIC_UNDERLYING(); + else if (category == LC_ALL) { + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); } # endif @@ -1134,26 +2170,30 @@ Perl_setlocale(int category, const char * locale) #endif - /* Save retval since subsequent setlocale() calls may overwrite it. */ - retval = savepv(do_setlocale_r(category, locale)); + retval = save_to_buffer(do_setlocale_r(category, locale), + &PL_setlocale_buf, &PL_setlocale_bufsize, 0); + SAVE_ERRNO; + +#if defined(USE_LOCALE_NUMERIC) && defined(LC_ALL) + + if (locale == NULL && category == LC_ALL) { + RESTORE_LC_NUMERIC(); + } + +#endif DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(category, locale, retval))); - if (! retval) { - /* Should never happen that a query would return an error, but be - * sure and reset to C locale */ - if (locale == 0) { - SET_NUMERIC_STANDARD(); - } + RESTORE_ERRNO; + + if (! retval) { return NULL; } - /* If locale == NULL, we are just querying the state, but may have switched - * to NUMERIC_UNDERLYING. Switch back before returning. */ + /* If locale == NULL, we are just querying the state */ if (locale == NULL) { - SET_NUMERIC_STANDARD(); return retval; } @@ -1193,20 +2233,23 @@ Perl_setlocale(int category, const char * locale) # ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); new_ctype(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_CTYPE */ # ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); new_collate(newlocale); + Safefree(newlocale); # endif # ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); new_numeric(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_NUMERIC */ #endif /* LC_ALL */ @@ -1217,7 +2260,6 @@ Perl_setlocale(int category, const char * locale) return retval; - } PERL_STATIC_INLINE const char * @@ -1226,10 +2268,16 @@ S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t /* Copy the NUL-terminated 'string' to 'buf' + 'offset'. 'buf' has size 'buf_size', * growing it if necessary */ - const Size_t string_size = strlen(string) + offset + 1; + Size_t string_size; PERL_ARGS_ASSERT_SAVE_TO_BUFFER; + if (! string) { + return NULL; + } + + string_size = strlen(string) + offset + 1; + if (*buf_size == 0) { Newx(*buf, string_size, char); *buf_size = string_size; @@ -1245,11 +2293,9 @@ S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t /* -=head1 Locale-related functions and macros - =for apidoc Perl_langinfo -This is an (almost ª) drop-in replacement for the system C>, +This is an (almost) drop-in replacement for the system C>, taking the same C parameter values, and returning the same information. But it is more thread-safe than regular C, and hides the quirks of Perl's locale handling from your code, and can be used on systems that lack @@ -1261,88 +2307,66 @@ Expanding on these: =item * -It delivers the correct results for the C and C items, -without you having to write extra code. The reason for the extra code would be -because these are from the C locale category, which is normally -kept set to the C locale by Perl, no matter what the underlying locale is -supposed to be, and so to get the expected results, you have to temporarily -toggle into the underlying locale, and later toggle back. (You could use -plain C and C> for this -but then you wouldn't get the other advantages of C; not -keeping C in the C locale would break a lot of CPAN, which is -expecting the radix (decimal point) character to be a dot.) +The reason it isn't quite a drop-in replacement is actually an advantage. The +only difference is that it returns S>, whereas plain +C returns S>, but you are (only by documentation) +forbidden to write into the buffer. By declaring this C, the compiler +enforces this restriction, so if it is violated, you know at compilation time, +rather than getting segfaults at runtime. =item * -Depending on C, it works on systems that don't have C, hence -makes your code more portable. Of the fifty-some possible items specified by -the POSIX 2008 standard, -L, -only two are completely unimplemented. It uses various techniques to recover -the other items, including calling C>, and C>, -both of which are specified in C89, so should be always be available. Later -C versions have additional capabilities; C<""> is returned for -those not available on your system. - -The details for those items which may differ from what this emulation returns -and what a native C would return are: - -=over - -=item C - -=item C - -Unimplemented, so returns C<"">. - -=item C - -=item C - -=item C - -=item C - -Only the values for English are returned. C and C have been -removed from POSIX 2008, and are retained for backwards compatibility. Your -platform's C may not support them. - -=item C - -Always evaluates to C<%x>, the locale's appropriate date representation. - -=item C - -Always evaluates to C<%X>, the locale's appropriate time representation. - -=item C - -Always evaluates to C<%c>, the locale's appropriate date and time -representation. - -=item C - -The return may be incorrect for those rare locales where the currency symbol -replaces the radix character. -Send email to L if you have examples of it needing -to work differently. - -=item C +It delivers the correct results for the C and C items, +without you having to write extra code. The reason for the extra code would be +because these are from the C locale category, which is normally +kept set by Perl so that the radix is a dot, and the separator is the empty +string, no matter what the underlying locale is supposed to be, and so to get +the expected results, you have to temporarily toggle into the underlying +locale, and later toggle back. (You could use plain C and +C> for this but then you wouldn't get +the other advantages of C; not keeping C in the C +(or equivalent) locale would break a lot of CPAN, which is expecting the radix +(decimal point) character to be a dot.) -Currently this gives the same results as Linux does. -Send email to L if you have examples of it needing -to work differently. +=item * -=item C +The system function it replaces can have its static return buffer trashed, +not only by a subesequent call to that function, but by a C, +C, or other locale change. The returned buffer of this function is +not changed until the next call to it, so the buffer is never in a trashed +state. -=item C +=item * -=item C +Its return buffer is per-thread, so it also is never overwritten by a call to +this function from another thread; unlike the function it replaces. -=item C +=item * -These are derived by using C, and not all versions of that function -know about them. C<""> is returned for these on such systems. +But most importantly, it works on systems that don't have C, such +as Windows, hence makes your code more portable. Of the fifty-some possible +items specified by the POSIX 2008 standard, +L, +only one is completely unimplemented, though on non-Windows platforms, another +significant one is also not implemented). It uses various techniques to +recover the other items, including calling C>, and +C>, both of which are specified in C89, so should be always be +available. Later C versions have additional capabilities; C<""> is +returned for those not available on your system. + +It is important to note that when called with an item that is recovered by +using C, the buffer from any previous explicit call to +C will be overwritten. This means you must save that buffer's +contents if you need to access them after a call to this function. (But note +that you might not want to be using C directly anyway, because of +issues like the ones listed in the second item of this list (above) for +C and C. You can use the methods given in L to +call L and avoid all the issues, but then you have a hash to +unpack). + +The details for those items which may deviate from what this emulation returns +and what a native C would return are specified in +L. =back @@ -1353,29 +2377,8 @@ C, you must before the C C<#include>. You can replace your C C<#include> with this one. (Doing it this way keeps out the symbols that plain -C imports into the namespace for code that doesn't need it.) - -You also should not use the bare C item names, but should preface -them with C, so use C instead of plain C. -The C> versions will also work for this function on systems that do -have a native C. - -=item * - -It is thread-friendly, returning its result in a buffer that won't be -overwritten by another thread, so you don't have to code for that possibility. -The buffer can be overwritten by the next call to C or -C in the same thread. - -=item * - -ª It returns S>, whereas plain C returns S>, but you are (only by documentation) forbidden to write into the buffer. -By declaring this C, the compiler enforces this restriction. The extra -C is why this isn't an unequivocal drop-in replacement for -C. - -=back +C would try to import into the namespace for code that doesn't need +it.) The original impetus for C was so that code that needs to find out the current currency symbol, floating point radix character, or digit @@ -1399,7 +2402,7 @@ Perl_langinfo(const int item) return my_nl_langinfo(item, TRUE); } -const char * +STATIC const char * #ifdef HAS_NL_LANGINFO S_my_nl_langinfo(const nl_item item, bool toggle) #else @@ -1407,17 +2410,20 @@ S_my_nl_langinfo(const int item, bool toggle) #endif { dTHX; + const char * retval; /* We only need to toggle into the underlying LC_NUMERIC locale for these * two items, and only if not already there */ - if (toggle && (( item != PERL_RADIXCHAR && item != PERL_THOUSEP) + if (toggle && (( item != RADIXCHAR && item != THOUSEP) || PL_numeric_underlying)) { toggle = FALSE; } #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ -#if ! defined(HAS_POSIX_2008_LOCALE) +# if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \ + || ! defined(HAS_POSIX_2008_LOCALE) \ + || ! defined(DUPLOCALE) /* Here, use plain nl_langinfo(), switching to the underlying LC_NUMERIC * for those items dependent on it. This must be copied to a buffer before @@ -1435,8 +2441,12 @@ S_my_nl_langinfo(const int item, bool toggle) this code section (the only call to nl_langinfo in the core) */ - save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); + + /* Copy to a per-thread buffer, which is also one that won't be + * destroyed by a subsequent setlocale(), such as the + * RESTORE_LC_NUMERIC may do just below. */ + retval = save_to_buffer(nl_langinfo(item), + &PL_langinfo_buf, &PL_langinfo_bufsize, 0); LOCALE_UNLOCK; @@ -1466,8 +2476,11 @@ S_my_nl_langinfo(const int item, bool toggle) } } - save_to_buffer(nl_langinfo_l(item, cur), - &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + /* We have to save it to a buffer, because the freelocale() just below + * can invalidate the internal one */ + retval = save_to_buffer(nl_langinfo_l(item, cur), + &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + if (do_free) { freelocale(cur); } @@ -1475,16 +2488,16 @@ S_my_nl_langinfo(const int item, bool toggle) # endif - if (strEQ(PL_langinfo_buf, "")) { - if (item == PERL_YESSTR) { + if (strEQ(retval, "")) { + if (item == YESSTR) { return "yes"; } - if (item == PERL_NOSTR) { + if (item == NOSTR) { return "no"; } } - return PL_langinfo_buf; + return retval; #else /* Below, emulate nl_langinfo as best we can */ @@ -1493,8 +2506,19 @@ S_my_nl_langinfo(const int item, bool toggle) # ifdef HAS_LOCALECONV const struct lconv* lc; + const char * temp; DECLARATION_FOR_LC_NUMERIC_MANIPULATION; +# ifdef TS_W32_BROKEN_LOCALECONV + + const char * save_global; + const char * save_thread; + int needed_size; + char * ptr; + char * e; + char * item_start; + +# endif # endif # ifdef HAS_STRFTIME @@ -1504,96 +2528,281 @@ S_my_nl_langinfo(const int item, bool toggle) # endif - /* We copy the results to a per-thread buffer, even if not - * multi-threaded. This is in part to simplify this code, and partly - * because we need a buffer anyway for strftime(), and partly because a - * call of localeconv() could otherwise wipe out the buffer, and the - * programmer would not be expecting this, as this is a nl_langinfo() - * substitute after all, so s/he might be thinking their localeconv() - * is safe until another localeconv() call. */ + /* We copy the results to a per-thread buffer, even if not + * multi-threaded. This is in part to simplify this code, and partly + * because we need a buffer anyway for strftime(), and partly because a + * call of localeconv() could otherwise wipe out the buffer, and the + * programmer would not be expecting this, as this is a nl_langinfo() + * substitute after all, so s/he might be thinking their localeconv() + * is safe until another localeconv() call. */ + + switch (item) { + Size_t len; + + /* This is unimplemented */ + case ERA: /* For use with strftime() %E modifier */ + + default: + return ""; + + /* We use only an English set, since we don't know any more */ + case YESEXPR: return "^[+1yY]"; + case YESSTR: return "yes"; + case NOEXPR: return "^[-0nN]"; + case NOSTR: return "no"; + + case CODESET: + +# ifndef WIN32 + + /* On non-windows, this is unimplemented, in part because of + * inconsistencies between vendors. The Darwin native + * nl_langinfo() implementation simply looks at everything past + * any dot in the name, but that doesn't work for other + * vendors. Many Linux locales that don't have UTF-8 in their + * names really are UTF-8, for example; z/OS locales that do + * have UTF-8 in their names, aren't really UTF-8 */ + return ""; + +# else + + { /* But on Windows, the name does seem to be consistent, so + use that. */ + const char * p; + const char * first; + Size_t offset = 0; + const char * name = my_setlocale(LC_CTYPE, NULL); + + if (isNAME_C_OR_POSIX(name)) { + return "ANSI_X3.4-1968"; + } + + /* Find the dot in the locale name */ + first = (const char *) strchr(name, '.'); + if (! first) { + first = name; + goto has_nondigit; + } + + /* Look at everything past the dot */ + first++; + p = first; + + while (*p) { + if (! isDIGIT(*p)) { + goto has_nondigit; + } + + p++; + } - switch (item) { - Size_t len; - const char * retval; + /* Here everything past the dot is a digit. Treat it as a + * code page */ + retval = save_to_buffer("CP", &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); + offset = STRLENs("CP"); - /* These 2 are unimplemented */ - case PERL_CODESET: - case PERL_ERA: /* For use with strftime() %E modifier */ + has_nondigit: - default: - return ""; + retval = save_to_buffer(first, &PL_langinfo_buf, + &PL_langinfo_bufsize, offset); + } - /* We use only an English set, since we don't know any more */ - case PERL_YESEXPR: return "^[+1yY]"; - case PERL_YESSTR: return "yes"; - case PERL_NOEXPR: return "^[-0nN]"; - case PERL_NOSTR: return "no"; + break; +# endif # ifdef HAS_LOCALECONV - case PERL_CRNCYSTR: + case CRNCYSTR: /* We don't bother with localeconv_l() because any system that * has it is likely to also have nl_langinfo() */ - LOCALE_LOCK; /* Prevent interference with other threads - using localeconv() */ + LOCALE_LOCK_V; /* Prevent interference with other threads + using localeconv() */ + +# ifdef TS_W32_BROKEN_LOCALECONV + + /* This is a workaround for a Windows bug prior to VS 15. + * What we do here is, while locked, switch to the global + * locale so localeconv() works; then switch back just before + * the unlock. This can screw things up if some thread is + * already using the global locale while assuming no other is. + * A different workaround would be to call GetCurrencyFormat on + * a known value, and parse it; patches welcome + * + * We have to use LC_ALL instead of LC_MONETARY because of + * another bug in Windows */ + + save_thread = savepv(my_setlocale(LC_ALL, NULL)); + _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); + save_global= savepv(my_setlocale(LC_ALL, NULL)); + my_setlocale(LC_ALL, save_thread); + +# endif lc = localeconv(); if ( ! lc || ! lc->currency_symbol || strEQ("", lc->currency_symbol)) { - LOCALE_UNLOCK; + LOCALE_UNLOCK_V; return ""; } /* Leave the first spot empty to be filled in below */ - save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, - &PL_langinfo_bufsize, 1); + retval = save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, + &PL_langinfo_bufsize, 1); if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) { /* khw couldn't figure out how the localedef specifications would show that the $ should replace the radix; this is just a guess as to how it might work.*/ - *PL_langinfo_buf = '.'; + PL_langinfo_buf[0] = '.'; } else if (lc->p_cs_precedes) { - *PL_langinfo_buf = '-'; + PL_langinfo_buf[0] = '-'; } else { - *PL_langinfo_buf = '+'; + PL_langinfo_buf[0] = '+'; } - LOCALE_UNLOCK; +# ifdef TS_W32_BROKEN_LOCALECONV + + my_setlocale(LC_ALL, save_global); + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + my_setlocale(LC_ALL, save_thread); + Safefree(save_global); + Safefree(save_thread); + +# endif + + LOCALE_UNLOCK_V; + break; + +# ifdef TS_W32_BROKEN_LOCALECONV + + case RADIXCHAR: + + /* For this, we output a known simple floating point number to + * a buffer, and parse it, looking for the radix */ + + if (toggle) { + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + } + + if (PL_langinfo_bufsize < 10) { + PL_langinfo_bufsize = 10; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + } + + needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize, + "%.1f", 1.5); + if (needed_size >= (int) PL_langinfo_bufsize) { + PL_langinfo_bufsize = needed_size + 1; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize, + "%.1f", 1.5); + assert(needed_size < (int) PL_langinfo_bufsize); + } + + ptr = PL_langinfo_buf; + e = PL_langinfo_buf + PL_langinfo_bufsize; + + /* Find the '1' */ + while (ptr < e && *ptr != '1') { + ptr++; + } + ptr++; + + /* Find the '5' */ + item_start = ptr; + while (ptr < e && *ptr != '5') { + ptr++; + } + + /* Everything in between is the radix string */ + if (ptr >= e) { + PL_langinfo_buf[0] = '?'; + PL_langinfo_buf[1] = '\0'; + } + else { + *ptr = '\0'; + Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf, char); + } + + if (toggle) { + RESTORE_LC_NUMERIC(); + } + + retval = PL_langinfo_buf; break; - case PERL_RADIXCHAR: - case PERL_THOUSEP: +# else + + case RADIXCHAR: /* No special handling needed */ + +# endif + + case THOUSEP: if (toggle) { STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); } - LOCALE_LOCK; /* Prevent interference with other threads - using localeconv() */ + LOCALE_LOCK_V; /* Prevent interference with other threads + using localeconv() */ + +# ifdef TS_W32_BROKEN_LOCALECONV + + /* This should only be for the thousands separator. A + * different work around would be to use GetNumberFormat on a + * known value and parse the result to find the separator */ + save_thread = savepv(my_setlocale(LC_ALL, NULL)); + _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); + save_global = savepv(my_setlocale(LC_ALL, NULL)); + my_setlocale(LC_ALL, save_thread); +# if 0 + /* This is the start of code that for broken Windows replaces + * the above and below code, and instead calls + * GetNumberFormat() and then would parse that to find the + * thousands separator. It needs to handle UTF-16 vs -8 + * issues. */ + + needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5", NULL, PL_langinfo_buf, PL_langinfo_bufsize); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "%s: %d: return from GetNumber, count=%d, val=%s\n", + __FILE__, __LINE__, needed_size, PL_langinfo_buf)); + +# endif +# endif lc = localeconv(); if (! lc) { - retval = ""; + temp = ""; } else { - retval = (item == PERL_RADIXCHAR) + temp = (item == RADIXCHAR) ? lc->decimal_point : lc->thousands_sep; - if (! retval) { - retval = ""; + if (! temp) { + temp = ""; } } - save_to_buffer(retval, &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); + retval = save_to_buffer(temp, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); - LOCALE_UNLOCK; +# ifdef TS_W32_BROKEN_LOCALECONV + + my_setlocale(LC_ALL, save_global); + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + my_setlocale(LC_ALL, save_thread); + Safefree(save_global); + Safefree(save_thread); + +# endif + + LOCALE_UNLOCK_V; if (toggle) { RESTORE_LC_NUMERIC(); @@ -1610,30 +2819,26 @@ S_my_nl_langinfo(const int item, bool toggle) * for someone using them as formats (as opposed to trying to parse * them to figure out what the locale says). The other format * items are actually tested to verify they work on the platform */ - case PERL_D_FMT: return "%x"; - case PERL_T_FMT: return "%X"; - case PERL_D_T_FMT: return "%c"; + case D_FMT: return "%x"; + case T_FMT: return "%X"; + case D_T_FMT: return "%c"; /* These formats are only available in later strfmtime's */ - case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: - case PERL_T_FMT_AMPM: + case ERA_D_FMT: case ERA_T_FMT: case ERA_D_T_FMT: case T_FMT_AMPM: /* The rest can be gotten from most versions of strftime(). */ - case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: - case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: - case PERL_ABDAY_7: - case PERL_ALT_DIGITS: - case PERL_AM_STR: case PERL_PM_STR: - case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: - case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: - case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: - case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: - case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: - case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: - case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: - case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: - case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: - case PERL_MON_12: + case ABDAY_1: case ABDAY_2: case ABDAY_3: + case ABDAY_4: case ABDAY_5: case ABDAY_6: case ABDAY_7: + case ALT_DIGITS: + case AM_STR: case PM_STR: + case ABMON_1: case ABMON_2: case ABMON_3: case ABMON_4: + case ABMON_5: case ABMON_6: case ABMON_7: case ABMON_8: + case ABMON_9: case ABMON_10: case ABMON_11: case ABMON_12: + case DAY_1: case DAY_2: case DAY_3: case DAY_4: + case DAY_5: case DAY_6: case DAY_7: + case MON_1: case MON_2: case MON_3: case MON_4: + case MON_5: case MON_6: case MON_7: case MON_8: + case MON_9: case MON_10: case MON_11: case MON_12: LOCALE_LOCK; @@ -1652,82 +2857,82 @@ S_my_nl_langinfo(const int item, bool toggle) __FILE__, __LINE__, item); NOT_REACHED; /* NOTREACHED */ - case PERL_PM_STR: tm.tm_hour = 18; - case PERL_AM_STR: + case PM_STR: tm.tm_hour = 18; + case AM_STR: format = "%p"; break; - case PERL_ABDAY_7: tm.tm_wday++; - case PERL_ABDAY_6: tm.tm_wday++; - case PERL_ABDAY_5: tm.tm_wday++; - case PERL_ABDAY_4: tm.tm_wday++; - case PERL_ABDAY_3: tm.tm_wday++; - case PERL_ABDAY_2: tm.tm_wday++; - case PERL_ABDAY_1: + case ABDAY_7: tm.tm_wday++; + case ABDAY_6: tm.tm_wday++; + case ABDAY_5: tm.tm_wday++; + case ABDAY_4: tm.tm_wday++; + case ABDAY_3: tm.tm_wday++; + case ABDAY_2: tm.tm_wday++; + case ABDAY_1: format = "%a"; break; - case PERL_DAY_7: tm.tm_wday++; - case PERL_DAY_6: tm.tm_wday++; - case PERL_DAY_5: tm.tm_wday++; - case PERL_DAY_4: tm.tm_wday++; - case PERL_DAY_3: tm.tm_wday++; - case PERL_DAY_2: tm.tm_wday++; - case PERL_DAY_1: + case DAY_7: tm.tm_wday++; + case DAY_6: tm.tm_wday++; + case DAY_5: tm.tm_wday++; + case DAY_4: tm.tm_wday++; + case DAY_3: tm.tm_wday++; + case DAY_2: tm.tm_wday++; + case DAY_1: format = "%A"; break; - case PERL_ABMON_12: tm.tm_mon++; - case PERL_ABMON_11: tm.tm_mon++; - case PERL_ABMON_10: tm.tm_mon++; - case PERL_ABMON_9: tm.tm_mon++; - case PERL_ABMON_8: tm.tm_mon++; - case PERL_ABMON_7: tm.tm_mon++; - case PERL_ABMON_6: tm.tm_mon++; - case PERL_ABMON_5: tm.tm_mon++; - case PERL_ABMON_4: tm.tm_mon++; - case PERL_ABMON_3: tm.tm_mon++; - case PERL_ABMON_2: tm.tm_mon++; - case PERL_ABMON_1: + case ABMON_12: tm.tm_mon++; + case ABMON_11: tm.tm_mon++; + case ABMON_10: tm.tm_mon++; + case ABMON_9: tm.tm_mon++; + case ABMON_8: tm.tm_mon++; + case ABMON_7: tm.tm_mon++; + case ABMON_6: tm.tm_mon++; + case ABMON_5: tm.tm_mon++; + case ABMON_4: tm.tm_mon++; + case ABMON_3: tm.tm_mon++; + case ABMON_2: tm.tm_mon++; + case ABMON_1: format = "%b"; break; - case PERL_MON_12: tm.tm_mon++; - case PERL_MON_11: tm.tm_mon++; - case PERL_MON_10: tm.tm_mon++; - case PERL_MON_9: tm.tm_mon++; - case PERL_MON_8: tm.tm_mon++; - case PERL_MON_7: tm.tm_mon++; - case PERL_MON_6: tm.tm_mon++; - case PERL_MON_5: tm.tm_mon++; - case PERL_MON_4: tm.tm_mon++; - case PERL_MON_3: tm.tm_mon++; - case PERL_MON_2: tm.tm_mon++; - case PERL_MON_1: + case MON_12: tm.tm_mon++; + case MON_11: tm.tm_mon++; + case MON_10: tm.tm_mon++; + case MON_9: tm.tm_mon++; + case MON_8: tm.tm_mon++; + case MON_7: tm.tm_mon++; + case MON_6: tm.tm_mon++; + case MON_5: tm.tm_mon++; + case MON_4: tm.tm_mon++; + case MON_3: tm.tm_mon++; + case MON_2: tm.tm_mon++; + case MON_1: format = "%B"; break; - case PERL_T_FMT_AMPM: + case T_FMT_AMPM: format = "%r"; return_format = TRUE; break; - case PERL_ERA_D_FMT: + case ERA_D_FMT: format = "%Ex"; return_format = TRUE; break; - case PERL_ERA_T_FMT: + case ERA_T_FMT: format = "%EX"; return_format = TRUE; break; - case PERL_ERA_D_T_FMT: + case ERA_D_T_FMT: format = "%Ec"; return_format = TRUE; break; - case PERL_ALT_DIGITS: + case ALT_DIGITS: tm.tm_wday = 0; format = "%Ow"; /* Find the alternate digit for 0 */ break; @@ -1795,7 +3000,7 @@ S_my_nl_langinfo(const int item, bool toggle) * alternate format for wday 0. If the value is the same as * the normal 0, there isn't an alternate, so clear the buffer. * */ - if ( item == PERL_ALT_DIGITS + if ( item == ALT_DIGITS && strEQ(PL_langinfo_buf, "0")) { *PL_langinfo_buf = '\0'; @@ -1824,6 +3029,8 @@ S_my_nl_langinfo(const int item, bool toggle) LOCALE_UNLOCK; + retval = PL_langinfo_buf; + /* If to return the format, not the value, overwrite the buffer * with it. But some strftime()s will keep the original format * if illegal, so change those to "" */ @@ -1832,8 +3039,8 @@ S_my_nl_langinfo(const int item, bool toggle) *PL_langinfo_buf = '\0'; } else { - save_to_buffer(format, &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); + retval = save_to_buffer(format, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); } } @@ -1844,7 +3051,7 @@ S_my_nl_langinfo(const int item, bool toggle) } } - return PL_langinfo_buf; + return retval; #endif @@ -1980,51 +3187,87 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # ifdef USE_LOCALE_NUMERIC assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC); assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_NUMERIC_INDEX] == LC_NUMERIC_MASK); +# endif # endif # ifdef USE_LOCALE_CTYPE assert(categories[LC_CTYPE_INDEX] == LC_CTYPE); assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_CTYPE_INDEX] == LC_CTYPE_MASK); +# endif # endif # ifdef USE_LOCALE_COLLATE assert(categories[LC_COLLATE_INDEX] == LC_COLLATE); assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_COLLATE_INDEX] == LC_COLLATE_MASK); +# endif # endif # ifdef USE_LOCALE_TIME assert(categories[LC_TIME_INDEX] == LC_TIME); assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_TIME_INDEX] == LC_TIME_MASK); +# endif # endif # ifdef USE_LOCALE_MESSAGES assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES); assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_MESSAGES_INDEX] == LC_MESSAGES_MASK); +# endif # endif # ifdef USE_LOCALE_MONETARY assert(categories[LC_MONETARY_INDEX] == LC_MONETARY); assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_MONETARY_INDEX] == LC_MONETARY_MASK); +# endif # endif # ifdef USE_LOCALE_ADDRESS assert(categories[LC_ADDRESS_INDEX] == LC_ADDRESS); assert(strEQ(category_names[LC_ADDRESS_INDEX], "LC_ADDRESS")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_ADDRESS_INDEX] == LC_ADDRESS_MASK); +# endif # endif # ifdef USE_LOCALE_IDENTIFICATION assert(categories[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION); assert(strEQ(category_names[LC_IDENTIFICATION_INDEX], "LC_IDENTIFICATION")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION_MASK); +# endif # endif # ifdef USE_LOCALE_MEASUREMENT assert(categories[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT); assert(strEQ(category_names[LC_MEASUREMENT_INDEX], "LC_MEASUREMENT")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT_MASK); +# endif # endif # ifdef USE_LOCALE_PAPER assert(categories[LC_PAPER_INDEX] == LC_PAPER); assert(strEQ(category_names[LC_PAPER_INDEX], "LC_PAPER")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_PAPER_INDEX] == LC_PAPER_MASK); +# endif # endif # ifdef USE_LOCALE_TELEPHONE assert(categories[LC_TELEPHONE_INDEX] == LC_TELEPHONE); assert(strEQ(category_names[LC_TELEPHONE_INDEX], "LC_TELEPHONE")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_TELEPHONE_INDEX] == LC_TELEPHONE_MASK); +# endif # endif # ifdef LC_ALL assert(categories[LC_ALL_INDEX] == LC_ALL); assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL")); assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_ALL_INDEX] == LC_ALL_MASK); +# endif # endif # endif /* DEBUGGING */ @@ -2033,8 +3276,34 @@ Perl_init_i18nl10n(pTHX_ int printwarn) my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness, sizeof(PL_locale_utf8ness)); +# ifdef USE_THREAD_SAFE_LOCALE +# ifdef WIN32 + + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + +# endif +# endif +# ifdef USE_POSIX_2008_LOCALE + + PL_C_locale_obj = newlocale(LC_ALL_MASK, "C", (locale_t) 0); + if (! PL_C_locale_obj) { + Perl_croak_nocontext( + "panic: Cannot create POSIX 2008 C locale object; errno=%d", errno); + } + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: created C object %p\n", __FILE__, __LINE__, PL_C_locale_obj); + } + +# endif + PL_numeric_radix_sv = newSVpvs("."); +# if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE) + + /* Initialize our records. If we have POSIX 2008, we have LC_ALL */ + do_setlocale_c(LC_ALL, my_setlocale(LC_ALL, NULL)); + +# endif # ifdef LOCALE_ENVIRON_REQUIRED /* @@ -2393,18 +3662,19 @@ Perl_init_i18nl10n(pTHX_ int printwarn) for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { -# if defined(USE_ITHREADS) +# if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE) /* This caches whether each category's locale is UTF-8 or not. This * may involve changing the locale. It is ok to do this at - * initialization time before any threads have started, but not later. + * initialization time before any threads have started, but not later + * unless thread-safe operations are used. * Caching means that if the program heeds our dictate not to change * locales in threaded applications, this data will remain valid, and - * it may get queried without changing locales. If the environment is - * such that all categories have the same locale, this isn't needed, as - * the code will not change the locale; but this handles the uncommon - * case where the environment has disparate locales for the categories - * */ + * it may get queried without having to change locales. If the + * environment is such that all categories have the same locale, this + * isn't needed, as the code will not change the locale; but this + * handles the uncommon case where the environment has disparate + * locales for the categories */ (void) _is_cur_LC_category_utf8(categories[i]); # endif @@ -2645,6 +3915,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, /* Make sure the UTF8ness of the string and locale match */ if (utf8 != PL_in_utf8_COLLATE_locale) { + /* XXX convert above Unicode to 10FFFF? */ const char * const t = s; /* Temporary so we can later find where the input was */ @@ -3053,9 +4324,9 @@ S_switch_category_locale_to_template(pTHX_ const int switch_category, const int * LC_'template_category', if they aren't already the same. If not NULL, * 'template_locale' is the locale that 'template_category' is in. * - * Returns the original locale for 'switch_category' so can be switched - * back to with the companion function restore_switched_locale(), (NULL if - * no restoral is necessary.) */ + * Returns a copy of the name of the original locale for 'switch_category' + * so can be switched back to with the companion function + * restore_switched_locale(), (NULL if no restoral is necessary.) */ char * restore_to_locale = NULL; @@ -3110,8 +4381,9 @@ S_switch_category_locale_to_template(pTHX_ const int switch_category, const int STATIC void S_restore_switched_locale(pTHX_ const int category, const char * const original_locale) { - /* Restores the locale for LC_'category' to 'original_locale', or do - * nothing if the latter parameter is NULL */ + /* Restores the locale for LC_'category' to 'original_locale' (which is a + * copy that will be freed by this function), or do nothing if the latter + * parameter is NULL */ if (original_locale == NULL) { return; @@ -3136,7 +4408,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * could give the wrong result. The result will very likely be correct for * languages that have commonly used non-ASCII characters, but for notably * English, it comes down to if the locale's name ends in something like - * "UTF-8". It errs on the side of not being a UTF-8 locale. */ + * "UTF-8". It errs on the side of not being a UTF-8 locale. + * + * If the platform is early C89, not containing mbtowc(), or we are + * compiled to not pay attention to LC_CTYPE, this employs heuristics. + * These work very well for non-Latin locales or those whose currency + * symbol isn't a '$' nor plain ASCII text. But without LC_CTYPE and at + * least MB_CUR_MAX, English locales with an ASCII currency symbol depend + * on the name containing UTF-8 or not. */ /* Name of current locale corresponding to the input category */ const char *save_input_locale = NULL; @@ -3230,7 +4509,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * calculate it */ # if defined(USE_LOCALE_CTYPE) \ - && ( (defined(HAS_NL_LANGINFO) && defined(CODESET)) \ + && ( defined(HAS_NL_LANGINFO) \ || (defined(HAS_MBTOWC) || defined(HAS_MBRTOWC))) { @@ -3249,8 +4528,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* Standard UTF-8 needs at least 4 bytes to represent the maximum * Unicode code point. */ - DEBUG_L(PerlIO_printf(Perl_debug_log, "\tMB_CUR_MAX=%d\n", - (int) MB_CUR_MAX)); + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s: %d: MB_CUR_MAX=%d\n", + __FILE__, __LINE__, (int) MB_CUR_MAX)); if ((unsigned) MB_CUR_MAX < STRLENs(MAX_UNICODE_UTF8)) { is_utf8 = FALSE; restore_switched_locale(LC_CTYPE, original_ctype_locale); @@ -3258,7 +4537,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) } # endif -# if defined(HAS_NL_LANGINFO) && defined(CODESET) +# if defined(HAS_NL_LANGINFO) { /* The task is easiest if the platform has this POSIX 2001 function. Except on some platforms it can wrongly return "", so have to have @@ -3266,11 +4545,12 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) variances from that. For example, Turkish locales may use the alternate dotted I rules, and sometimes it appears to be a defective locale definition. XXX We should probably check for - these in the Latin1 range and warn */ - const char *codeset = my_nl_langinfo(PERL_CODESET, FALSE); + these in the Latin1 range and warn (but on glibc, requires + iswalnum() etc. due to their not handling 80-FF correctly */ + const char *codeset = my_nl_langinfo(CODESET, FALSE); /* FALSE => already in dest locale */ - DEBUG_L(PerlIO_printf(Perl_debug_log, + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\tnllanginfo returned CODESET '%s'\n", codeset)); if (codeset && strNE(codeset, "")) { @@ -3301,6 +4581,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) { wchar_t wc; int len; + dSAVEDERRNO; # if defined(HAS_MBRTOWC) && defined(USE_ITHREADS) @@ -3318,20 +4599,25 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) memset(&ps, 0, sizeof(ps));; PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift state */ - errno = 0; + SETERRNO(0, 0); len = mbrtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8), &ps); + SAVE_ERRNO; # else + LOCALE_LOCK; PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */ - errno = 0; + SETERRNO(0, 0); len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8)); + SAVE_ERRNO; + LOCALE_UNLOCK; # endif - DEBUG_L(PerlIO_printf(Perl_debug_log, + RESTORE_ERRNO; + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\treturn from mbtowc; len=%d; code_point=%x; errno=%d\n", - len, (unsigned int) wc, errno)); + len, (unsigned int) wc, GET_ERRNO)); is_utf8 = cBOOL( len == STRLENs(REPLACEMENT_CHARACTER_UTF8) && wc == (wchar_t) UNICODE_REPLACEMENT); @@ -3364,7 +4650,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) save_input_locale); bool only_ascii = FALSE; const U8 * currency_string - = (const U8 *) my_nl_langinfo(PERL_CRNCYSTR, FALSE); + = (const U8 *) my_nl_langinfo(CRNCYSTR, FALSE); /* 2nd param not relevant for this item */ const U8 * first_variant; @@ -3390,7 +4676,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* It isn't a UTF-8 locale if the symbol is not legal UTF-8; * otherwise assume the locale is UTF-8 if and only if the symbol * is non-ascii UTF-8. */ - DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n", + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n", save_input_locale, is_utf8)); goto finish_and_return; } @@ -3444,7 +4730,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * locale if we changed it */ restore_switched_locale(LC_TIME, original_time_locale); - DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n", + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n", save_input_locale, is_utf8_string((U8 *) formatted_time, 0))); is_utf8 = is_utf8_string((U8 *) formatted_time, 0); @@ -3455,7 +4741,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * ASCII. Go on to the next test. If we changed it, restore LC_TIME * to its original locale */ restore_switched_locale(LC_TIME, original_time_locale); - DEBUG_L(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); } # endif @@ -3508,7 +4794,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid, * any non-ascii means it is one; otherwise we assume it isn't */ - DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n", + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n", save_input_locale, is_utf8)); goto finish_and_return; @@ -3653,7 +4939,54 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # ifdef DEBUGGING + if (DEBUG_Lv_TEST) { + const char * s = PL_locale_utf8ness; + + /* Audit the structure */ + while (s < PL_locale_utf8ness + strlen(PL_locale_utf8ness)) { + const char *e; + + if (*s != UTF8NESS_SEP[0]) { + Perl_croak(aTHX_ + "panic: %s: %d: Corrupt utf8ness_cache: missing" + " separator %.*s<-- HERE %s\n", + __FILE__, __LINE__, + (int) (s - PL_locale_utf8ness), PL_locale_utf8ness, + s); + } + s++; + e = strchr(s, UTF8NESS_PREFIX[0]); + if (! e) { + Perl_croak(aTHX_ + "panic: %s: %d: Corrupt utf8ness_cache: missing" + " separator %.*s<-- HERE %s\n", + __FILE__, __LINE__, + (int) (e - PL_locale_utf8ness), PL_locale_utf8ness, + e); + } + e++; + if (*e != '0' && *e != '1') { + Perl_croak(aTHX_ + "panic: %s: %d: Corrupt utf8ness_cache: utf8ness" + " must be [01] %.*s<-- HERE %s\n", + __FILE__, __LINE__, + (int) (e + 1 - PL_locale_utf8ness), + PL_locale_utf8ness, e + 1); + } + if (ninstr(PL_locale_utf8ness, s, s-1, e)) { + Perl_croak(aTHX_ + "panic: %s: %d: Corrupt utf8ness_cache: entry" + " has duplicate %.*s<-- HERE %s\n", + __FILE__, __LINE__, + (int) (e - PL_locale_utf8ness), PL_locale_utf8ness, + e); + } + s = e + 1; + } + } + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "PL_locale_utf8ness is now %s; returning %d\n", PL_locale_utf8ness, is_utf8); @@ -3715,19 +5048,34 @@ Perl_my_strerror(pTHX_ const int errnum) const bool within_locale_scope = IN_LC(LC_MESSAGES); -# if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) +# ifndef USE_ITHREADS - /* This function is trivial if we don't have to worry about thread safety - * and have strerror_l(), as it handles the switch of locales so we don't - * have to deal with that. We don't have to worry about thread safety if - * this is an unthreaded build, or if strerror_r() is also available. Both - * it and strerror_l() are thread-safe. Plain strerror() isn't thread - * safe. But on threaded builds when strerror_r() is available, the - * apparent call to strerror() below is actually a macro that - * behind-the-scenes calls strerror_r(). - */ + /* This function is trivial without threads. */ + if (within_locale_scope) { + errstr = savepv(strerror(errnum)); + } + else { + const char * save_locale = savepv(do_setlocale_c(LC_MESSAGES, NULL)); + + do_setlocale_c(LC_MESSAGES, "C"); + errstr = savepv(strerror(errnum)); + do_setlocale_c(LC_MESSAGES, save_locale); + Safefree(save_locale); + } + +# elif defined(HAS_POSIX_2008_LOCALE) \ + && defined(HAS_STRERROR_L) \ + && defined(HAS_DUPLOCALE) + + /* This function is also trivial if we don't have to worry about thread + * safety and have strerror_l(), as it handles the switch of locales so we + * don't have to deal with that. We don't have to worry about thread + * safety if strerror_r() is also available. Both it and strerror_l() are + * thread-safe. Plain strerror() isn't thread safe. But on threaded + * builds when strerror_r() is available, the apparent call to strerror() + * below is actually a macro that behind-the-scenes calls strerror_r(). */ -# if ! defined(USE_ITHREADS) || defined(HAS_STRERROR_R) +# ifdef HAS_STRERROR_R if (within_locale_scope) { errstr = savepv(strerror(errnum)); @@ -3765,44 +5113,18 @@ Perl_my_strerror(pTHX_ const int errnum) # endif # else /* Doesn't have strerror_l() */ -# ifdef USE_POSIX_2008_LOCALE - - locale_t save_locale = NULL; - -# else - const char * save_locale = NULL; bool locale_is_C = FALSE; - /* We have a critical section to prevent another thread from changing the - * locale out from under us (or zapping the buffer returned from - * setlocale() ) */ + /* We have a critical section to prevent another thread from executing this + * same code at the same time. (On thread-safe perls, the LOCK is a + * no-op.) Since this is the only place in core that changes LC_MESSAGES + * (unless the user has called setlocale(), this works to prevent races. */ LOCALE_LOCK; -# endif - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "my_strerror called with errnum %d\n", errnum)); if (! within_locale_scope) { - errno = 0; - -# ifdef USE_POSIX_2008_LOCALE /* Use the thread-safe locale functions */ - - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "Not within locale scope, about to call" - " uselocale(0x%p)\n", PL_C_locale_obj)); - save_locale = uselocale(PL_C_locale_obj); - if (! save_locale) { - DEBUG_L(PerlIO_printf(Perl_debug_log, - "uselocale failed, errno=%d\n", errno)); - } - else { - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "uselocale returned 0x%p\n", save_locale)); - } - -# else /* Not thread-safe build */ - save_locale = do_setlocale_c(LC_MESSAGES, NULL); if (! save_locale) { Perl_croak(aTHX_ @@ -3821,9 +5143,6 @@ Perl_my_strerror(pTHX_ const int errnum) do_setlocale_c(LC_MESSAGES, "C"); } } - -# endif - } /* end of ! within_locale_scope */ else { DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s: %d: WITHIN locale scope\n", @@ -3835,21 +5154,6 @@ Perl_my_strerror(pTHX_ const int errnum) errstr = savepv(Strerror(errnum)); if (! within_locale_scope) { - errno = 0; - -# ifdef USE_POSIX_2008_LOCALE - - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s: %d: not within locale scope, restoring the locale\n", - __FILE__, __LINE__)); - if (save_locale && ! uselocale(save_locale)) { - DEBUG_L(PerlIO_printf(Perl_debug_log, - "uselocale restore failed, errno=%d\n", errno)); - } - } - -# else - if (save_locale && ! locale_is_C) { if (! do_setlocale_c(LC_MESSAGES, save_locale)) { Perl_croak(aTHX_ @@ -3862,7 +5166,6 @@ Perl_my_strerror(pTHX_ const int errnum) LOCALE_UNLOCK; -# endif # endif /* End of doesn't have strerror_l */ #endif /* End of does have locale messages */ @@ -3882,49 +5185,191 @@ Perl_my_strerror(pTHX_ const int errnum) /* -=for apidoc sync_locale +=for apidoc switch_to_global_locale -Changing the program's locale should be avoided by XS code. Nevertheless, -certain non-Perl libraries called from XS, such as C do so. When this -happens, Perl needs to be told that the locale has changed. Use this function -to do so, before returning to Perl. +On systems without locale support, or on single-threaded builds, or on +platforms that do not support per-thread locale operations, this function does +nothing. On such systems that do have locale support, only a locale global to +the whole program is available. + +On multi-threaded builds on systems that do have per-thread locale operations, +this function converts the thread it is running in to use the global locale. +This is for code that has not yet or cannot be updated to handle multi-threaded +locale operation. As long as only a single thread is so-converted, everything +works fine, as all the other threads continue to ignore the global one, so only +this thread looks at it. + +However, on Windows systems this isn't quite true prior to Visual Studio 15, +at which point Microsoft fixed a bug. A race can occur if you use the +following operations on earlier Windows platforms: + +=over + +=item L + +=item L, items C and C + +=item L, items C and C + +=back + +The first item is not fixable (except by upgrading to a later Visual Studio +release), but it would be possible to work around the latter two items by using +the Windows API functions C and C; patches +welcome. + +Without this function call, threads that use the L> system +function will not work properly, as all the locale-sensitive functions will +look at the per-thread locale, and C will have no effect on this +thread. + +Perl code should convert to either call +L|perlapi/Perl_setlocale> (which is a drop-in for the system +C) or use the methods given in L to call +L|POSIX/setlocale>. Either one will transparently properly +handle all cases of single- vs multi-thread, POSIX 2008-supported or not. + +Non-Perl libraries, such as C, that call the system C can +continue to work if this function is called before transferring control to the +library. + +Upon return from the code that needs to use the global locale, +L|perlapi/sync_locale> should be called to restore the safe +multi-thread operation. =cut */ void -Perl_sync_locale(pTHX) +Perl_switch_to_global_locale() +{ + +#ifdef USE_THREAD_SAFE_LOCALE +# ifdef WIN32 + + _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); + +# else +# ifdef HAS_QUERYLOCALE + + setlocale(LC_ALL, querylocale(LC_ALL_MASK, uselocale((locale_t) 0))); + +# else + + { + unsigned int i; + + for (i = 0; i < LC_ALL_INDEX; i++) { + setlocale(categories[i], do_setlocale_r(categories[i], NULL)); + } + } + +# endif + + uselocale(LC_GLOBAL_LOCALE); + +# endif +#endif + +} + +/* + +=for apidoc sync_locale + +L|perlapi/Perl_setlocale> can be used at any time to query or +change the locale (though changing the locale is antisocial and dangerous on +multi-threaded systems that don't have multi-thread safe locale operations. +(See L). Using the system +L> should be avoided. Nevertheless, certain non-Perl libraries +called from XS, such as C do so, and this can't be changed. When the +locale is changed by XS code that didn't use +L|perlapi/Perl_setlocale>, Perl needs to be told that the +locale has changed. Use this function to do so, before returning to Perl. + +The return value is a boolean: TRUE if the global locale at the time of call +was in effect; and FALSE if a per-thread locale was in effect. This can be +used by the caller that needs to restore things as-they-were to decide whether +or not to call +L|perlapi/switch_to_global_locale>. + +=cut +*/ + +bool +Perl_sync_locale() { - char * newlocale; + const char * newlocale; + dTHX; + +#ifdef USE_POSIX_2008_LOCALE + + bool was_in_global_locale = FALSE; + locale_t cur_obj = uselocale((locale_t) 0); + + /* On Windows, unless the foreign code has turned off the thread-safe + * locale setting, any plain setlocale() will have affected what we see, so + * no need to worry. Otherwise, If the foreign code has done a plain + * setlocale(), it will only affect the global locale on POSIX systems, but + * will affect the */ + if (cur_obj == LC_GLOBAL_LOCALE) { + +# ifdef HAS_QUERY_LOCALE + + do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL)); + +# else + + unsigned int i; + + /* We can't trust that we can read the LC_ALL format on the + * platform, so do them individually */ + for (i = 0; i < LC_ALL_INDEX; i++) { + do_setlocale_r(categories[i], setlocale(categories[i], NULL)); + } + +# endif + + was_in_global_locale = TRUE; + } + +#else + + bool was_in_global_locale = TRUE; +#endif #ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_CTYPE, NULL, newlocale))); new_ctype(newlocale); + Safefree(newlocale); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_COLLATE, NULL, newlocale))); new_collate(newlocale); + Safefree(newlocale); #endif #ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_NUMERIC, NULL, newlocale))); new_numeric(newlocale); + Safefree(newlocale); #endif /* USE_LOCALE_NUMERIC */ + return was_in_global_locale; } #if defined(DEBUGGING) && defined(USE_LOCALE) @@ -3945,7 +5390,7 @@ S_setlocale_debug_string(const int category, /* category number, /* initialise to a non-null value to keep it out of BSS and so keep * -DPERL_GLOBAL_STRUCT_PRIVATE happy */ - static char ret[128] = "If you can read this, thank your buggy C" + static char ret[256] = "If you can read this, thank your buggy C" " library strlcpy(), and change your hints file" " to undef it"; @@ -3980,6 +5425,58 @@ S_setlocale_debug_string(const int category, /* category number, #endif +void +Perl_thread_locale_init() +{ + /* Called from a thread on startup*/ + +#ifdef USE_THREAD_SAFE_LOCALE + + dTHX_DEBUGGING; + + /* C starts the new thread in the global C locale. If we are thread-safe, + * we want to not be in the global locale */ + + DEBUG_L(PerlIO_printf(Perl_debug_log, + "%s:%d: new thread, initial locale is %s; calling setlocale\n", + __FILE__, __LINE__, setlocale(LC_ALL, NULL))); + +# ifdef WIN32 + + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + +# else + + Perl_setlocale(LC_ALL, "C"); + +# endif +#endif + +} + +void +Perl_thread_locale_term() +{ + /* Called from a thread as it gets ready to terminate */ + +#ifdef USE_THREAD_SAFE_LOCALE + + /* C starts the new thread in the global C locale. If we are thread-safe, + * we want to not be in the global locale */ + +# ifndef WIN32 + + { /* Free up */ + locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE); + if (cur_obj != LC_GLOBAL_LOCALE) { + freelocale(cur_obj); + } + } + +# endif +#endif + +} /* * ex: set ts=8 sts=4 sw=4 et: