#ifdef I_WCHAR
# include <wchar.h>
#endif
+#ifdef I_WCTYPE
+# include <wctype.h>
+#endif
/* If the environment says to, we can output debugging information during
* initialization. This is done before option parsing, and before any thread
* creation, so can be a file-level static */
-#if ! defined(DEBUGGING) || defined(PERL_GLOBAL_STRUCT)
+#if ! defined(DEBUGGING)
# define debug_initialization 0
# define DEBUG_INITIALIZATION_set(v)
#else
PERL_ARGS_ASSERT_STDIZE_LOCALE;
if (s) {
- const char * const t = strchr(s, '.');
- okay = FALSE;
- if (t) {
- const char * const u = strchr(t, '\n');
- if (u && (u[1] == 0)) {
- const STRLEN len = u - s;
- Move(s + 1, locs, len, char);
- locs[len] = 0;
- okay = TRUE;
- }
- }
+ const char * const t = strchr(s, '.');
+ okay = FALSE;
+ if (t) {
+ const char * const u = strchr(t, '\n');
+ if (u && (u[1] == 0)) {
+ const STRLEN len = u - s;
+ Move(s + 1, locs, len, char);
+ locs[len] = 0;
+ okay = TRUE;
+ }
+ }
}
if (!okay)
- Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
+ Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
return locs;
}
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE,
# endif
+# ifdef USE_LOCALE_SYNTAX
+ LC_SYNTAX,
+# endif
+# ifdef USE_LOCALE_TOD
+ LC_TOD,
+# endif
# ifdef LC_ALL
LC_ALL,
# endif
/* The top-most real element is LC_ALL */
-const char * category_names[] = {
+const char * const category_names[] = {
# ifdef USE_LOCALE_NUMERIC
"LC_NUMERIC",
# ifdef USE_LOCALE_TELEPHONE
"LC_TELEPHONE",
# endif
+# ifdef USE_LOCALE_SYNTAX
+ "LC_SYNTAX",
+# endif
+# ifdef USE_LOCALE_TOD
+ "LC_TOD",
+# endif
# ifdef LC_ALL
"LC_ALL",
# endif
# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1
# define _DUMMY_COLLATE LC_COLLATE_INDEX
# else
-# define _DUMMY_COLLATE _DUMMY_COLLATE
+# define _DUMMY_COLLATE _DUMMY_CTYPE
# endif
# ifdef USE_LOCALE_TIME
# define LC_TIME_INDEX _DUMMY_COLLATE + 1
# else
# define _DUMMY_TELEPHONE _DUMMY_PAPER
# endif
+# ifdef USE_LOCALE_SYNTAX
+# define LC_SYNTAX_INDEX _DUMMY_TELEPHONE + 1
+# define _DUMMY_SYNTAX LC_SYNTAX_INDEX
+# else
+# define _DUMMY_SYNTAX _DUMMY_TELEPHONE
+# endif
+# ifdef USE_LOCALE_TOD
+# define LC_TOD_INDEX _DUMMY_SYNTAX + 1
+# define _DUMMY_TOD LC_TOD_INDEX
+# else
+# define _DUMMY_TOD _DUMMY_SYNTAX
+# endif
# ifdef LC_ALL
-# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1
+# define LC_ALL_INDEX _DUMMY_TOD + 1
# endif
#endif /* ifdef USE_LOCALE */
* known at compile time; "do_setlocale_r", not known until run time */
# define do_setlocale_c(cat, locale) my_setlocale(cat, locale)
# define do_setlocale_r(cat, locale) my_setlocale(cat, locale)
+# define FIX_GLIBC_LC_MESSAGES_BUG(i)
#else /* Below uses POSIX 2008 */
emulate_setlocale(cat, locale, cat ## _INDEX, TRUE)
# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE)
+# if ! defined(__GLIBC__) || ! defined(USE_LOCALE_MESSAGES)
+
+# define FIX_GLIBC_LC_MESSAGES_BUG(i)
+
+# else /* Invalidate glibc cache of loaded translations, see [perl #134264] */
+
+# include <libintl.h>
+# define FIX_GLIBC_LC_MESSAGES_BUG(i) \
+ STMT_START { \
+ if ((i) == LC_MESSAGES_INDEX) { \
+ textdomain(textdomain(NULL)); \
+ } \
+ } STMT_END
+
+# endif
+
/* A third array, parallel to the ones above to map from category to its
* equivalent mask */
const int category_masks[] = {
# ifdef USE_LOCALE_TELEPHONE
LC_TELEPHONE_MASK,
# endif
+# ifdef USE_LOCALE_SYNTAX
+ LC_SYNTAX_MASK,
+# endif
+# ifdef USE_LOCALE_TOD
+ LC_TOD_MASK,
+# endif
/* LC_ALL can't be turned off by a Configure
* option, and in Posix 2008, should always be
* here, so compile it in unconditionally.
/* If this assert fails, adjust the size of curlocales in intrpvar.h */
STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX);
-# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING)
-
+# if defined(_NL_LOCALE_NAME) \
+ && defined(DEBUGGING) \
+ /* On systems that accept any locale name, the real underlying \
+ * locale is often returned by this internal function, so we \
+ * can't use it */ \
+ && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME)
{
/* Internal glibc for querylocale(), but doesn't handle
* empty-string ("") locale properly; who knows what other
- * glitches. Check it for now, under debug. */
+ * glitches. Check for it now, under debug. */
char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
uselocale((locale_t) 0));
# endif
- }
-
- assert(PL_C_locale_obj);
-
- /* Otherwise, we are switching locales. This will generally entail freeing
- * the current one's space (at the C library's discretion). We need to
- * stop using that locale before the switch. So switch to a known locale
- * object that we don't otherwise mess with. This returns the locale
- * object in effect at the time of the switch. */
- old_obj = uselocale(PL_C_locale_obj);
-
-# ifdef DEBUGGING
-
- if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj);
- }
-
-# endif
-
- if (! old_obj) {
+ } /* End of this being setlocale(LC_foo, NULL) */
-# ifdef DEBUGGING
-
- if (DEBUG_L_TEST || debug_initialization) {
- dSAVE_ERRNO;
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
- RESTORE_ERRNO;
- }
-
-# endif
-
- return NULL;
- }
-
-# ifdef DEBUGGING
-
- if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj);
- }
-
-# endif
-
- /* If we weren't in a thread safe locale, set so that newlocale() below
- which uses 'old_obj', uses an empty one. Same for our reserved C object.
- The latter is defensive coding, so that, even if there is some bug, we
- will never end up trying to modify either of these, as if passed to
- newlocale(), they can be. */
- if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) {
- old_obj = (locale_t) 0;
- }
-
- /* Create the new locale (it may actually modify the current one). */
+ /* Here, we are switching locales. */
# ifndef HAS_QUERYLOCALE
const char * default_name;
- /* To minimize other threads messing with the environment, we copy
- * the variable, making it a temporary. But this doesn't work upon
- * program initialization before any scopes are created, and at
- * this time, there's nothing else going on that would interfere.
- * So skip the copy in that case */
- if (PL_scopestack_ix == 0) {
- default_name = PerlEnv_getenv("LANG");
- }
- else {
- default_name = savepv(PerlEnv_getenv("LANG"));
- }
+ default_name = PerlEnv_getenv("LANG");
if (! default_name || strEQ(default_name, "")) {
- default_name = "C";
- }
- else if (PL_scopestack_ix != 0) {
- SAVEFREEPV(default_name);
+ default_name = "C";
}
if (category != LC_ALL) {
for (i = 0; i < LC_ALL_INDEX; i++) {
const char * const env_override
- = savepv(PerlEnv_getenv(category_names[i]));
+ = PerlEnv_getenv(category_names[i]);
const char * this_locale = ( env_override
&& strNE(env_override, ""))
? env_override
: default_name;
if (! emulate_setlocale(categories[i], this_locale, i, TRUE))
{
- Safefree(env_override);
return NULL;
}
if (strNE(this_locale, default_name)) {
did_override = TRUE;
}
-
- Safefree(env_override);
}
/* If all the categories are the same, we can set LC_ALL to
}
}
}
- }
+ } /* End of this being setlocale(LC_foo, "") */
else if (strchr(locale, ';')) {
/* LC_ALL may actually incude a conglomeration of various categories.
* other platforms do it differently, so we have to handle all cases
* ourselves */
+ unsigned int i;
const char * s = locale;
const char * e = locale + strlen(locale);
const char * p = s;
const char * name_start;
const char * name_end;
+ /* If the string that gives what to set doesn't include all categories,
+ * the omitted ones get set to "C". To get this behavior, first set
+ * all the individual categories to "C", and override the furnished
+ * ones below */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ if (! emulate_setlocale(categories[i], "C", i, TRUE)) {
+ return NULL;
+ }
+ }
+
while (s < e) {
- unsigned int i;
/* Parse through the category */
while (isWORDCHAR(*p)) {
assert(category == LC_ALL);
return do_setlocale_c(LC_ALL, NULL);
- }
+ } /* End of this being setlocale(LC_ALL,
+ "LC_CTYPE=foo;LC_NUMERIC=bar;...") */
ready_to_set: ;
+ /* Here at the end of having to deal with the absence of querylocale().
+ * Some cases have already been fully handled by recursive calls to this
+ * function. But at this point, we haven't dealt with those, but are now
+ * prepared to, knowing what the locale name to set this category to is.
+ * This would have come for free if this system had had querylocale() */
+
# endif /* end of ! querylocale */
- /* Ready to create a new locale by modification of the exising one */
- new_obj = newlocale(mask, locale, old_obj);
+ assert(PL_C_locale_obj);
- if (! new_obj) {
- dSAVE_ERRNO;
+ /* Switching locales generally entails freeing the current one's space (at
+ * the C library's discretion). We need to stop using that locale before
+ * the switch. So switch to a known locale object that we don't otherwise
+ * mess with. This returns the locale object in effect at the time of the
+ * switch. */
+ old_obj = uselocale(PL_C_locale_obj);
# ifdef DEBUGGING
- if (DEBUG_L_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
- }
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj);
+ }
# endif
- if (! uselocale(old_obj)) {
+ if (! old_obj) {
# ifdef DEBUGGING
- if (DEBUG_L_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
- }
+ if (DEBUG_L_TEST || debug_initialization) {
+ dSAVE_ERRNO;
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ RESTORE_ERRNO;
+ }
# endif
- }
- RESTORE_ERRNO;
return NULL;
}
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p\n", __FILE__, __LINE__, new_obj);
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: emulate_setlocale now using %p\n",
+ __FILE__, __LINE__, PL_C_locale_obj);
}
# endif
- /* And switch into it */
- if (! uselocale(new_obj)) {
- dSAVE_ERRNO;
+ /* If this call is to switch to the LC_ALL C locale, it already exists, and
+ * in fact, we already have switched to it (in preparation for what
+ * normally is to come). But since we're already there, continue to use
+ * it instead of trying to create a new locale */
+ if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) {
# ifdef DEBUGGING
- if (DEBUG_L_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__);
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: will stay in C object\n", __FILE__, __LINE__);
}
# endif
- if (! uselocale(old_obj)) {
+ new_obj = PL_C_locale_obj;
+
+ /* We already had switched to the C locale in preparation for freeing
+ * 'old_obj' */
+ if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) {
+ freelocale(old_obj);
+ }
+ }
+ else {
+ /* If we weren't in a thread safe locale, set so that newlocale() below
+ * which uses 'old_obj', uses an empty one. Same for our reserved C
+ * object. The latter is defensive coding, so that, even if there is
+ * some bug, we will never end up trying to modify either of these, as
+ * if passed to newlocale(), they can be. */
+ if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) {
+ old_obj = (locale_t) 0;
+ }
+
+ /* Ready to create a new locale by modification of the exising one */
+ new_obj = newlocale(mask, locale, old_obj);
+
+ if (! new_obj) {
+ dSAVE_ERRNO;
# ifdef DEBUGGING
if (DEBUG_L_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: emulate_setlocale creating new object"
+ " failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
}
# endif
+ if (! uselocale(old_obj)) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: switching back failed: %d\n",
+ __FILE__, __LINE__, GET_ERRNO);
+ }
+
+# endif
+
+ }
+ RESTORE_ERRNO;
+ return NULL;
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: emulate_setlocale created %p",
+ __FILE__, __LINE__, new_obj);
+ if (old_obj) {
+ PerlIO_printf(Perl_debug_log,
+ "; should have freed %p", old_obj);
+ }
+ PerlIO_printf(Perl_debug_log, "\n");
+ }
+
+# endif
+
+ /* And switch into it */
+ if (! uselocale(new_obj)) {
+ dSAVE_ERRNO;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: emulate_setlocale switching to new object"
+ " failed\n", __FILE__, __LINE__);
+ }
+
+# endif
+
+ if (! uselocale(old_obj)) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: switching back failed: %d\n",
+ __FILE__, __LINE__, GET_ERRNO);
+ }
+
+# endif
+
+ }
+ freelocale(new_obj);
+ RESTORE_ERRNO;
+ return NULL;
}
- freelocale(new_obj);
- RESTORE_ERRNO;
- return NULL;
}
# ifdef DEBUGGING
if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj);
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: emulate_setlocale now using %p\n",
+ __FILE__, __LINE__, new_obj);
}
# endif
Safefree(PL_curlocales[i]);
PL_curlocales[i] = savepv(locale);
}
+
+ FIX_GLIBC_LC_MESSAGES_BUG(LC_MESSAGES_INDEX);
}
else {
/* Then update the category's record */
Safefree(PL_curlocales[index]);
PL_curlocales[index] = savepv(locale);
+
+ FIX_GLIBC_LC_MESSAGES_BUG(index);
}
# endif
#endif /* USE_POSIX_2008_LOCALE */
-#if 0 /* Code that was to emulate thread-safe locales on platforms that
- didn't natively support them */
-
-/* The way this would work is that we would keep a per-thread list of the
- * correct locale for that thread. Any operation that was locale-sensitive
- * would have to be changed so that it would look like this:
- *
- * LOCALE_LOCK;
- * setlocale to the correct locale for this operation
- * do operation
- * LOCALE_UNLOCK
- *
- * This leaves the global locale in the most recently used operation's, but it
- * was locked long enough to get the result. If that result is static, it
- * needs to be copied before the unlock.
- *
- * Macros could be written like SETUP_LOCALE_DEPENDENT_OP(category) that did
- * the setup, but are no-ops when not needed, and similarly,
- * END_LOCALE_DEPENDENT_OP for the tear-down
- *
- * But every call to a locale-sensitive function would have to be changed, and
- * if a module didn't cooperate by using the mutex, things would break.
- *
- * This code was abandoned before being completed or tested, and is left as-is
-*/
-
-# define do_setlocale_c(cat, locale) locking_setlocale(cat, locale, cat ## _INDEX, TRUE)
-# define do_setlocale_r(cat, locale) locking_setlocale(cat, locale, 0, FALSE)
-
-STATIC char *
-S_locking_setlocale(pTHX_
- const int category,
- const char * locale,
- int index,
- const bool is_index_valid
- )
-{
- /* This function kind of performs a setlocale() on just the current thread;
- * thus it is kind of thread-safe. It does this by keeping a thread-level
- * array of the current locales for each category. Every time a locale is
- * switched to, it does the switch globally, but updates the thread's
- * array. A query as to what the current locale is just returns the
- * appropriate element from the array, and doesn't actually call the system
- * setlocale(). The saving into the array is done in an uninterruptible
- * section of code, so is unaffected by whatever any other threads might be
- * doing.
- *
- * All locale-sensitive operations must work by first starting a critical
- * section, then switching to the thread's locale as kept by this function,
- * and then doing the operation, then ending the critical section. Thus,
- * each gets done in the appropriate locale. simulating thread-safety.
- *
- * This function takes the same parameters, 'category' and 'locale', that
- * the regular setlocale() function does, but it also takes two additional
- * ones. This is because as described earlier. If we know on input the
- * index corresponding to the category into the array where we store the
- * current locales, we don't have to calculate it. If the caller knows at
- * compile time what the index is, it it can pass it, setting
- * 'is_index_valid' to TRUE; otherwise the index parameter is ignored.
- *
- */
-
- /* If the input index might be incorrect, calculate the correct one */
- if (! is_index_valid) {
- unsigned int i;
-
- if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: converting category %d to index\n", __FILE__, __LINE__, category);
- }
-
- for (i = 0; i <= LC_ALL_INDEX; i++) {
- if (category == categories[i]) {
- index = i;
- goto found_index;
- }
- }
-
- /* Here, we don't know about this category, so can't handle it.
- * XXX best we can do is to unsafely set this
- * XXX warning */
-
- return my_setlocale(category, locale);
-
- found_index: ;
-
- if (DEBUG_Lv_TEST || debug_initialization) {
- PerlIO_printf(Perl_debug_log, "%s:%d: index is 0x%x\n", __FILE__, __LINE__, index);
- }
- }
-
- /* For a query, just return what's in our records */
- if (new_locale == NULL) {
- return curlocales[index];
- }
-
-
- /* Otherwise, we need to do the switch, and save the result, all in a
- * critical section */
-
- Safefree(curlocales[[index]]);
-
- /* It might be that this is called from an already-locked section of code.
- * We would have to detect and skip the LOCK/UNLOCK if so */
- LOCALE_LOCK;
-
- curlocales[index] = savepv(my_setlocale(category, new_locale));
-
- if (strEQ(new_locale, "")) {
-
-#ifdef LC_ALL
-
- /* The locale values come from the environment, and may not all be the
- * same, so for LC_ALL, we have to update all the others, while the
- * mutex is still locked */
-
- if (category == LC_ALL) {
- unsigned int i;
- for (i = 0; i < LC_ALL_INDEX) {
- curlocales[i] = my_setlocale(categories[i], NULL);
- }
- }
- }
-
-#endif
-
- LOCALE_UNLOCK;
-
- return curlocales[index];
-}
-
-#endif
+#ifdef USE_LOCALE
STATIC void
S_set_numeric_radix(pTHX_ const bool use_locale)
}
# endif
+#else
+
+ PERL_UNUSED_ARG(use_locale);
+
#endif /* USE_LOCALE_NUMERIC and can find the radix char */
}
char *save_newnum;
if (! newnum) {
- Safefree(PL_numeric_name);
- PL_numeric_name = NULL;
- PL_numeric_standard = TRUE;
- PL_numeric_underlying = TRUE;
- PL_numeric_underlying_is_standard = TRUE;
- return;
+ Safefree(PL_numeric_name);
+ PL_numeric_name = NULL;
+ PL_numeric_standard = TRUE;
+ PL_numeric_underlying = TRUE;
+ PL_numeric_underlying_is_standard = TRUE;
+ return;
}
save_newnum = stdize_locale(savepv(newnum));
PL_numeric_underlying = TRUE;
PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum);
+#ifndef TS_W32_BROKEN_LOCALECONV
+
/* If its name isn't C nor POSIX, it could still be indistinguishable from
- * them */
+ * them. But on broken Windows systems calling my_nl_langinfo() for
+ * THOUSEP can currently (but rarely) cause a race, so avoid doing that,
+ * and just always change the locale if not C nor POSIX on those systems */
if (! PL_numeric_standard) {
PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(RADIXCHAR,
FALSE /* Don't toggle locale */ ))
&& strEQ("", my_nl_langinfo(THOUSEP, FALSE)));
}
+#endif
+
/* Save the new name if it isn't the same as the previous one, if any */
if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) {
- Safefree(PL_numeric_name);
- PL_numeric_name = save_newnum;
+ Safefree(PL_numeric_name);
+ PL_numeric_name = save_newnum;
}
else {
- Safefree(save_newnum);
+ Safefree(save_newnum);
}
PL_numeric_underlying_is_standard = PL_numeric_standard;
* to our records (which could be wrong if some XS code has changed the
* locale behind our back) */
- do_setlocale_c(LC_NUMERIC, "C");
- PL_numeric_standard = TRUE;
- PL_numeric_underlying = PL_numeric_underlying_is_standard;
- set_numeric_radix(0);
-
# ifdef DEBUGGING
if (DEBUG_L_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log,
- "LC_NUMERIC locale now is standard C\n");
+ "Setting LC_NUMERIC locale to standard C\n");
}
# endif
+
+ do_setlocale_c(LC_NUMERIC, "C");
+ PL_numeric_standard = TRUE;
+ PL_numeric_underlying = PL_numeric_underlying_is_standard;
+ set_numeric_radix(0);
+
#endif /* USE_LOCALE_NUMERIC */
}
* if toggling isn't necessary according to our records (which could be
* wrong if some XS code has changed the locale behind our back) */
- do_setlocale_c(LC_NUMERIC, PL_numeric_name);
- PL_numeric_standard = PL_numeric_underlying_is_standard;
- PL_numeric_underlying = TRUE;
- set_numeric_radix(! PL_numeric_standard);
-
# ifdef DEBUGGING
if (DEBUG_L_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log,
- "LC_NUMERIC locale now is %s\n",
+ "Setting LC_NUMERIC locale to %s\n",
PL_numeric_name);
}
# endif
+
+ do_setlocale_c(LC_NUMERIC, PL_numeric_name);
+ PL_numeric_standard = PL_numeric_underlying_is_standard;
+ PL_numeric_underlying = TRUE;
+ set_numeric_radix(! PL_numeric_standard);
+
#endif /* USE_LOCALE_NUMERIC */
}
#ifndef USE_LOCALE_CTYPE
- PERL_ARGS_ASSERT_NEW_CTYPE;
PERL_UNUSED_ARG(newctype);
PERL_UNUSED_CONTEXT;
* this function should be called directly only from this file and from
* POSIX::setlocale() */
- dVAR;
unsigned int i;
/* Don't check for problems if we are suppressing the warnings */
bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST);
+ bool maybe_utf8_turkic = FALSE;
PERL_ARGS_ASSERT_NEW_CTYPE;
* handle this specially because of the three problematic code points */
if (PL_in_utf8_CTYPE_locale) {
Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
+
+ /* UTF-8 locales can have special handling for 'I' and 'i' if they are
+ * Turkic. Make sure these two are the only anomalies. (We don't use
+ * towupper and towlower because they aren't in C89.) */
+
+#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER)
+
+ if (towupper('i') == 0x130 && towlower('I') == 0x131) {
+
+#else
+
+ if (toupper('i') == 'i' && tolower('I') == 'I') {
+
+#endif
+ check_for_problems = TRUE;
+ maybe_utf8_turkic = TRUE;
+ }
}
/* We don't populate the other lists if a UTF-8 locale, but do check that
&& (isGRAPH_A(i) || isBLANK_A(i) || i == '\n'))
{
bool is_bad = FALSE;
- char name[3] = { '\0' };
+ char name[4] = { '\0' };
/* Convert the name into a string */
- if (isPRINT_A(i)) {
+ if (isGRAPH_A(i)) {
name[0] = i;
name[1] = '\0';
}
else if (i == '\n') {
- my_strlcpy(name, "\n", sizeof(name));
+ my_strlcpy(name, "\\n", sizeof(name));
+ }
+ else if (i == '\t') {
+ my_strlcpy(name, "\\t", sizeof(name));
}
else {
- my_strlcpy(name, "\t", sizeof(name));
+ assert(i == ' ');
+ my_strlcpy(name, "' '", sizeof(name));
}
/* Check each possibe class */
"isxdigit('%s') unexpectedly is %d\n",
name, cBOOL(isxdigit(i))));
}
- if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) {
+ if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) {
is_bad = TRUE;
DEBUG_L(PerlIO_printf(Perl_debug_log,
"tolower('%s')=0x%x instead of the expected 0x%x\n",
name, tolower(i), (int) toLOWER_A(i)));
}
- if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) {
+ if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) {
is_bad = TRUE;
DEBUG_L(PerlIO_printf(Perl_debug_log,
"toupper('%s')=0x%x instead of the expected 0x%x\n",
}
}
+ if (bad_count == 2 && maybe_utf8_turkic) {
+ bad_count = 0;
+ *bad_chars_list = '\0';
+ PL_fold_locale['I'] = 'I';
+ PL_fold_locale['i'] = 'i';
+ PL_in_utf8_turkic_locale = TRUE;
+ DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n",
+ __FILE__, __LINE__, newctype));
+ }
+ else {
+ PL_in_utf8_turkic_locale = FALSE;
+ }
+
# ifdef MB_CUR_MAX
/* We only handle single-byte locales (outside of UTF-8 ones; so if
# endif
- if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) {
+ /* If we found problems and we want them output, do so */
+ if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale))
+ && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST)))
+ {
if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) {
PL_warn_locale = Perl_newSVpvf(aTHX_
"Locale '%s' contains (at least) the following characters"
* an unlikely bug */
if (! newcoll) {
- if (PL_collation_name) {
- ++PL_collation_ix;
- Safefree(PL_collation_name);
- PL_collation_name = NULL;
- }
- PL_collation_standard = TRUE;
+ if (PL_collation_name) {
+ ++PL_collation_ix;
+ Safefree(PL_collation_name);
+ PL_collation_name = NULL;
+ }
+ PL_collation_standard = TRUE;
is_standard_collation:
- PL_collxfrm_base = 0;
- PL_collxfrm_mult = 2;
+ PL_collxfrm_base = 0;
+ PL_collxfrm_mult = 2;
PL_in_utf8_COLLATE_locale = FALSE;
PL_strxfrm_NUL_replacement = '\0';
PL_strxfrm_max_cp = 0;
- return;
+ return;
}
/* If this is not the same locale as currently, set the new one up */
if (! PL_collation_name || strNE(PL_collation_name, newcoll)) {
- ++PL_collation_ix;
- Safefree(PL_collation_name);
- PL_collation_name = stdize_locale(savepv(newcoll));
- PL_collation_standard = isNAME_C_OR_POSIX(newcoll);
+ ++PL_collation_ix;
+ Safefree(PL_collation_name);
+ PL_collation_name = stdize_locale(savepv(newcoll));
+ PL_collation_standard = isNAME_C_OR_POSIX(newcoll);
if (PL_collation_standard) {
goto is_standard_collation;
}
* get it right the first time to avoid wasted expensive string
* transformations. */
- {
+ {
/* We use the string below to find how long the tranformation of it
* is. Almost all locales are supersets of ASCII, or at least the
* ASCII letters. We use all of them, half upper half lower,
}
# endif
- }
+ }
}
#endif /* USE_LOCALE_COLLATE */
}
+#endif
+
#ifdef WIN32
+#define USE_WSETLOCALE
+
+#ifdef USE_WSETLOCALE
+
+STATIC char *
+S_wrap_wsetlocale(pTHX_ int category, const char *locale) {
+ wchar_t *wlocale;
+ wchar_t *wresult;
+ char *result;
+
+ if (locale) {
+ int req_size =
+ MultiByteToWideChar(CP_UTF8, 0, locale, -1, NULL, 0);
+
+ if (!req_size) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ Newx(wlocale, req_size, wchar_t);
+ if (!MultiByteToWideChar(CP_UTF8, 0, locale, -1, wlocale, req_size)) {
+ Safefree(wlocale);
+ errno = EINVAL;
+ return NULL;
+ }
+ }
+ else {
+ wlocale = NULL;
+ }
+ wresult = _wsetlocale(category, wlocale);
+ Safefree(wlocale);
+ if (wresult) {
+ int req_size =
+ WideCharToMultiByte(CP_UTF8, 0, wresult, -1, NULL, 0, NULL, NULL);
+ Newx(result, req_size, char);
+ SAVEFREEPV(result); /* is there something better we can do here? */
+ if (!WideCharToMultiByte(CP_UTF8, 0, wresult, -1,
+ result, req_size, NULL, NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ }
+ else {
+ result = NULL;
+ }
+
+ return result;
+}
+
+#endif
+
STATIC char *
S_win32_setlocale(pTHX_ int category, const char* locale)
{
}
+#ifdef USE_WSETLOCALE
+ result = S_wrap_wsetlocale(aTHX_ category, locale);
+#else
result = setlocale(category, locale);
+#endif
DEBUG_L(STMT_START {
dSAVE_ERRNO;
PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__,
for (i = 0; i < LC_ALL_INDEX; i++) {
result = PerlEnv_getenv(category_names[i]);
if (result && strNE(result, "")) {
+#ifdef USE_WSETLOCALE
+ S_wrap_wsetlocale(aTHX_ categories[i], result);
+#else
setlocale(categories[i], result);
+#endif
DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
__FILE__, __LINE__,
setlocale_debug_string(categories[i], result, "not captured")));
#endif
/*
-
-=head1 Locale-related functions and macros
-
=for apidoc Perl_setlocale
This is an (almost) drop-in replacement for the system L<C<setlocale(3)>>,
taking the same parameters, and returning the same information, except that it
-returns the correct underlying C<LC_NUMERIC> locale, instead of C<C> always, as
-perl keeps that locale category as C<C>, changing it briefly during the
-operations where the underlying one is required.
+returns the correct underlying C<LC_NUMERIC> locale. Regular C<setlocale> will
+instead return C<C> if the underlying locale has a non-dot decimal point
+character, or a non-empty thousands separator for displaying floating point
+numbers. This is because perl keeps that locale category such that it has a
+dot and empty separator, changing the locale briefly during the operations
+where the underlying one is required. C<Perl_setlocale> knows about this, and
+compensates; regular C<setlocale> doesn't.
Another reason it isn't completely a drop-in replacement is that it is
declared to return S<C<const char *>>, whereas the system setlocale omits the
-C<const>. (If it were being written today, plain setlocale would be declared
-const, since it is illegal to change the information it returns; doing so leads
-to segfaults.)
+C<const> (presumably because its API was specified long ago, and can't be
+updated; it is illegal to change the information C<setlocale> returns; doing
+so leads to segfaults.)
Finally, C<Perl_setlocale> works under all circumstances, whereas plain
C<setlocale> can be completely ineffective on some platforms under some
{
/* This wraps POSIX::setlocale() */
+#ifndef USE_LOCALE
+
+ PERL_UNUSED_ARG(category);
+ PERL_UNUSED_ARG(locale);
+
+ return "C";
+
+#else
+
const char * retval;
const char * newlocale;
dSAVEDERRNO;
- DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
dTHX;
+ DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
#ifdef USE_LOCALE_NUMERIC
/* A NULL locale means only query what the current one is. We have the
* LC_NUMERIC name saved, because we are normally switched into the C
- * locale for it. For an LC_ALL query, switch back to get the correct
- * results. All other categories don't require special handling */
+ * (or equivalent) locale for it. For an LC_ALL query, switch back to get
+ * the correct results. All other categories don't require special
+ * handling */
if (locale == NULL) {
if (category == LC_NUMERIC) {
# ifdef USE_LOCALE_CTYPE
- newlocale = do_setlocale_c(LC_CTYPE, NULL);
+ newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL));
new_ctype(newlocale);
+ Safefree(newlocale);
# endif /* USE_LOCALE_CTYPE */
# ifdef USE_LOCALE_COLLATE
- newlocale = do_setlocale_c(LC_COLLATE, NULL);
+ newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL));
new_collate(newlocale);
+ Safefree(newlocale);
# endif
# ifdef USE_LOCALE_NUMERIC
- newlocale = do_setlocale_c(LC_NUMERIC, NULL);
+ newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL));
new_numeric(newlocale);
+ Safefree(newlocale);
# endif /* USE_LOCALE_NUMERIC */
#endif /* LC_ALL */
return retval;
+#endif
+
}
PERL_STATIC_INLINE const char *
It delivers the correct results for the C<RADIXCHAR> and C<THOUSEP> items,
without you having to write extra code. The reason for the extra code would be
because these are from the C<LC_NUMERIC> locale category, which is normally
-kept set to the C locale by Perl, no matter what the underlying locale is
-supposed to be, and so to get the expected results, you have to temporarily
-toggle into the underlying locale, and later toggle back. (You could use plain
-C<nl_langinfo> and C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this but
-then you wouldn't get the other advantages of C<Perl_langinfo()>; not keeping
-C<LC_NUMERIC> in the C locale would break a lot of CPAN, which is expecting the
-radix (decimal point) character to be a dot.)
+kept set by Perl so that the radix is a dot, and the separator is the empty
+string, no matter what the underlying locale is supposed to be, and so to get
+the expected results, you have to temporarily toggle into the underlying
+locale, and later toggle back. (You could use plain C<nl_langinfo> and
+C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this but then you wouldn't get
+the other advantages of C<Perl_langinfo()>; not keeping C<LC_NUMERIC> in the C
+(or equivalent) locale would break a lot of CPAN, which is expecting the radix
+(decimal point) character to be a dot.)
=item *
The system function it replaces can have its static return buffer trashed,
-not only by a subesequent call to that function, but by a C<freelocale>,
+not only by a subsequent call to that function, but by a C<freelocale>,
C<setlocale>, or other locale change. The returned buffer of this function is
not changed until the next call to it, so the buffer is never in a trashed
state.
as Windows, hence makes your code more portable. Of the fifty-some possible
items specified by the POSIX 2008 standard,
L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
-only two are completely unimplemented (though the loss of one of these is
-significant). It uses various techniques to recover the other items, including
-calling C<L<localeconv(3)>>, and C<L<strftime(3)>>, both of which are specified
-in C89, so should be always be available. Later C<strftime()> versions have
-additional capabilities; C<""> is returned for those not available on your
-system.
+only one is completely unimplemented, though on non-Windows platforms, another
+significant one is also not implemented). It uses various techniques to
+recover the other items, including calling C<L<localeconv(3)>>, and
+C<L<strftime(3)>>, both of which are specified in C89, so should be always be
+available. Later C<strftime()> versions have additional capabilities; C<""> is
+returned for those not available on your system.
It is important to note that when called with an item that is recovered by
using C<localeconv>, the buffer from any previous explicit call to
C<localeconv> will be overwritten. This means you must save that buffer's
-contents if you need to access them after a call to this function.
-
-The details for those items which may differ from what this emulation returns
-and what a native C<nl_langinfo()> would return are:
-
-=over
-
-=item C<CODESET>
-
-=item C<ERA>
-
-Unimplemented, so returns C<"">.
-
-=item C<YESEXPR>
-
-=item C<YESSTR>
-
-=item C<NOEXPR>
-
-=item C<NOSTR>
-
-Only the values for English are returned. C<YESSTR> and C<NOSTR> have been
-removed from POSIX 2008, and are retained here for backwards compatibility.
-Your platform's C<nl_langinfo> may not support them.
-
-=item C<D_FMT>
-
-Always evaluates to C<%x>, the locale's appropriate date representation.
-
-=item C<T_FMT>
-
-Always evaluates to C<%X>, the locale's appropriate time representation.
-
-=item C<D_T_FMT>
-
-Always evaluates to C<%c>, the locale's appropriate date and time
-representation.
-
-=item C<CRNCYSTR>
-
-The return may be incorrect for those rare locales where the currency symbol
-replaces the radix character.
-Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
-to work differently.
+contents if you need to access them after a call to this function. (But note
+that you might not want to be using C<localeconv()> directly anyway, because of
+issues like the ones listed in the second item of this list (above) for
+C<RADIXCHAR> and C<THOUSEP>. You can use the methods given in L<perlcall> to
+call L<POSIX/localeconv> and avoid all the issues, but then you have a hash to
+unpack).
-=item C<ALT_DIGITS>
-
-Currently this gives the same results as Linux does.
-Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
-to work differently.
-
-=item C<ERA_D_FMT>
-
-=item C<ERA_T_FMT>
-
-=item C<ERA_D_T_FMT>
-
-=item C<T_FMT_AMPM>
-
-These are derived by using C<strftime()>, and not all versions of that function
-know about them. C<""> is returned for these on such systems.
-
-=back
+The details for those items which may deviate from what this emulation returns
+and what a native C<nl_langinfo()> would return are specified in
+L<I18N::Langinfo>.
=back
before the C<perl.h> C<#include>. You can replace your C<langinfo.h>
C<#include> with this one. (Doing it this way keeps out the symbols that plain
-C<langinfo.h> imports into the namespace for code that doesn't need it.)
+C<langinfo.h> would try to import into the namespace for code that doesn't need
+it.)
The original impetus for C<Perl_langinfo()> was so that code that needs to
find out the current currency symbol, floating point radix character, or digit
dTHX;
const char * retval;
+#ifdef USE_LOCALE_NUMERIC
+
/* We only need to toggle into the underlying LC_NUMERIC locale for these
* two items, and only if not already there */
if (toggle && (( item != RADIXCHAR && item != THOUSEP)
|| PL_numeric_underlying))
- {
+
+#endif /* No toggling needed if not using LC_NUMERIC */
+
toggle = FALSE;
- }
#if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */
# if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \
STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
}
- LOCALE_LOCK; /* Prevent interference from another thread executing
- this code section (the only call to nl_langinfo in
- the core) */
-
+ /* Prevent interference from another thread executing this code
+ * section. */
+ NL_LANGINFO_LOCK;
/* Copy to a per-thread buffer, which is also one that won't be
* destroyed by a subsequent setlocale(), such as the
* RESTORE_LC_NUMERIC may do just below. */
retval = save_to_buffer(nl_langinfo(item),
&PL_langinfo_buf, &PL_langinfo_bufsize, 0);
-
- LOCALE_UNLOCK;
+ NL_LANGINFO_UNLOCK;
if (toggle) {
RESTORE_LC_NUMERIC();
do_free = TRUE;
}
+# ifdef USE_LOCALE_NUMERIC
+
if (toggle) {
if (PL_underlying_numeric_obj) {
cur = PL_underlying_numeric_obj;
}
}
+# endif
+
/* We have to save it to a buffer, because the freelocale() just below
* can invalidate the internal one */
retval = save_to_buffer(nl_langinfo_l(item, cur),
const char * temp;
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+# ifdef TS_W32_BROKEN_LOCALECONV
+
+ const char * save_global;
+ const char * save_thread;
+ int needed_size;
+ char * ptr;
+ char * e;
+ char * item_start;
+
+# endif
# endif
# ifdef HAS_STRFTIME
switch (item) {
Size_t len;
- /* These 2 are unimplemented */
- case CODESET:
+ /* This is unimplemented */
case ERA: /* For use with strftime() %E modifier */
default:
case NOEXPR: return "^[-0nN]";
case NOSTR: return "no";
+ case CODESET:
+
+# ifndef WIN32
+
+ /* On non-windows, this is unimplemented, in part because of
+ * inconsistencies between vendors. The Darwin native
+ * nl_langinfo() implementation simply looks at everything past
+ * any dot in the name, but that doesn't work for other
+ * vendors. Many Linux locales that don't have UTF-8 in their
+ * names really are UTF-8, for example; z/OS locales that do
+ * have UTF-8 in their names, aren't really UTF-8 */
+ return "";
+
+# else
+
+ { /* But on Windows, the name does seem to be consistent, so
+ use that. */
+ const char * p;
+ const char * first;
+ Size_t offset = 0;
+ const char * name = my_setlocale(LC_CTYPE, NULL);
+ if (isNAME_C_OR_POSIX(name)) {
+ return "ANSI_X3.4-1968";
+ }
+
+ /* Find the dot in the locale name */
+ first = (const char *) strchr(name, '.');
+ if (! first) {
+ first = name;
+ goto has_nondigit;
+ }
+
+ /* Look at everything past the dot */
+ first++;
+ p = first;
+
+ while (*p) {
+ if (! isDIGIT(*p)) {
+ goto has_nondigit;
+ }
+
+ p++;
+ }
+
+ /* Here everything past the dot is a digit. Treat it as a
+ * code page */
+ retval = save_to_buffer("CP", &PL_langinfo_buf,
+ &PL_langinfo_bufsize, 0);
+ offset = STRLENs("CP");
+
+ has_nondigit:
+
+ retval = save_to_buffer(first, &PL_langinfo_buf,
+ &PL_langinfo_bufsize, offset);
+ }
+
+ break;
+
+# endif
# ifdef HAS_LOCALECONV
case CRNCYSTR:
/* We don't bother with localeconv_l() because any system that
* has it is likely to also have nl_langinfo() */
- LOCALE_LOCK; /* Prevent interference with other threads
- using localeconv() */
+ LOCALECONV_LOCK; /* Prevent interference with other threads
+ using localeconv() */
+
+# ifdef TS_W32_BROKEN_LOCALECONV
+
+ /* This is a workaround for a Windows bug prior to VS 15.
+ * What we do here is, while locked, switch to the global
+ * locale so localeconv() works; then switch back just before
+ * the unlock. This can screw things up if some thread is
+ * already using the global locale while assuming no other is.
+ * A different workaround would be to call GetCurrencyFormat on
+ * a known value, and parse it; patches welcome
+ *
+ * We have to use LC_ALL instead of LC_MONETARY because of
+ * another bug in Windows */
+
+ save_thread = savepv(my_setlocale(LC_ALL, NULL));
+ _configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
+ save_global= savepv(my_setlocale(LC_ALL, NULL));
+ my_setlocale(LC_ALL, save_thread);
+
+# endif
lc = localeconv();
if ( ! lc
|| ! lc->currency_symbol
|| strEQ("", lc->currency_symbol))
{
- LOCALE_UNLOCK;
+ LOCALECONV_UNLOCK;
return "";
}
PL_langinfo_buf[0] = '+';
}
- LOCALE_UNLOCK;
+# ifdef TS_W32_BROKEN_LOCALECONV
+
+ my_setlocale(LC_ALL, save_global);
+ _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+ my_setlocale(LC_ALL, save_thread);
+ Safefree(save_global);
+ Safefree(save_thread);
+
+# endif
+
+ LOCALECONV_UNLOCK;
break;
+# ifdef TS_W32_BROKEN_LOCALECONV
+
case RADIXCHAR:
+
+ /* For this, we output a known simple floating point number to
+ * a buffer, and parse it, looking for the radix */
+
+ if (toggle) {
+ STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
+ }
+
+ if (PL_langinfo_bufsize < 10) {
+ PL_langinfo_bufsize = 10;
+ Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
+ }
+
+ needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
+ "%.1f", 1.5);
+ if (needed_size >= (int) PL_langinfo_bufsize) {
+ PL_langinfo_bufsize = needed_size + 1;
+ Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
+ needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
+ "%.1f", 1.5);
+ assert(needed_size < (int) PL_langinfo_bufsize);
+ }
+
+ ptr = PL_langinfo_buf;
+ e = PL_langinfo_buf + PL_langinfo_bufsize;
+
+ /* Find the '1' */
+ while (ptr < e && *ptr != '1') {
+ ptr++;
+ }
+ ptr++;
+
+ /* Find the '5' */
+ item_start = ptr;
+ while (ptr < e && *ptr != '5') {
+ ptr++;
+ }
+
+ /* Everything in between is the radix string */
+ if (ptr >= e) {
+ PL_langinfo_buf[0] = '?';
+ PL_langinfo_buf[1] = '\0';
+ }
+ else {
+ *ptr = '\0';
+ Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf, char);
+ }
+
+ if (toggle) {
+ RESTORE_LC_NUMERIC();
+ }
+
+ retval = PL_langinfo_buf;
+ break;
+
+# else
+
+ case RADIXCHAR: /* No special handling needed */
+
+# endif
+
case THOUSEP:
if (toggle) {
STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
}
- LOCALE_LOCK; /* Prevent interference with other threads
- using localeconv() */
+ LOCALECONV_LOCK; /* Prevent interference with other threads
+ using localeconv() */
+
+# ifdef TS_W32_BROKEN_LOCALECONV
+
+ /* This should only be for the thousands separator. A
+ * different work around would be to use GetNumberFormat on a
+ * known value and parse the result to find the separator */
+ save_thread = savepv(my_setlocale(LC_ALL, NULL));
+ _configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
+ save_global = savepv(my_setlocale(LC_ALL, NULL));
+ my_setlocale(LC_ALL, save_thread);
+# if 0
+ /* This is the start of code that for broken Windows replaces
+ * the above and below code, and instead calls
+ * GetNumberFormat() and then would parse that to find the
+ * thousands separator. It needs to handle UTF-16 vs -8
+ * issues. */
+
+ needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5", NULL, PL_langinfo_buf, PL_langinfo_bufsize);
+ DEBUG_L(PerlIO_printf(Perl_debug_log,
+ "%s: %d: return from GetNumber, count=%d, val=%s\n",
+ __FILE__, __LINE__, needed_size, PL_langinfo_buf));
+
+# endif
+# endif
lc = localeconv();
if (! lc) {
retval = save_to_buffer(temp, &PL_langinfo_buf,
&PL_langinfo_bufsize, 0);
- LOCALE_UNLOCK;
+# ifdef TS_W32_BROKEN_LOCALECONV
+
+ my_setlocale(LC_ALL, save_global);
+ _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+ my_setlocale(LC_ALL, save_thread);
+ Safefree(save_global);
+ Safefree(save_thread);
+
+# endif
+
+ LOCALECONV_UNLOCK;
if (toggle) {
RESTORE_LC_NUMERIC();
case MON_5: case MON_6: case MON_7: case MON_8:
case MON_9: case MON_10: case MON_11: case MON_12:
- LOCALE_LOCK;
-
init_tm(&tm); /* Precaution against core dumps */
tm.tm_sec = 30;
tm.tm_min = 30;
tm.tm_year = 2017 - 1900;
tm.tm_wday = 0;
tm.tm_mon = 0;
+
+ GCC_DIAG_IGNORE_STMT(-Wimplicit-fallthrough);
+
switch (item) {
default:
- LOCALE_UNLOCK;
Perl_croak(aTHX_
"panic: %s: %d: switch case: %d problem",
__FILE__, __LINE__, item);
break;
}
+ GCC_DIAG_RESTORE_STMT;
+
/* We can't use my_strftime() because it doesn't look at
* tm_wday */
while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize,
* wday was chosen because its range is all a single digit.
* Things like tm_sec have two digits as the minimum: '00' */
- LOCALE_UNLOCK;
-
retval = PL_langinfo_buf;
/* If to return the format, not the value, overwrite the buffer
* Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is
* set, debugging information is output.
*
- * This looks more complicated than it is, mainly due to the #ifdefs.
+ * This looks more complicated than it is, mainly due to the #ifdefs and
+ * error handling.
+ *
+ * Besides some asserts, data structure initialization, and specific
+ * platform complications, this routine is effectively just two things.
+ *
+ * a) setlocale(LC_ALL, "");
+ *
+ * which sets LC_ALL to the values in the current environment.
+ *
+ * And for each individual category 'foo' whose value we care about:
+ *
+ * b) save_foo = setlocale(LC_foo, NULL); handle_foo(save_foo);
+ *
+ * (We don't tend to care about categories like LC_PAPER, for example.)
+ *
+ * But there are complications. On systems without LC_ALL, it emulates
+ * step a) by looping through all the categories, and doing
+ *
+ * setlocale(LC_foo, "");
*
- * We try to set LC_ALL to the value determined by the environment. If
- * there is no LC_ALL on this platform, we try the individual categories we
- * know about. If this works, we are done.
+ * on each.
*
- * But if it doesn't work, we have to do something else. We search the
- * environment variables ourselves instead of relying on the system to do
- * it. We look at, in order, LC_ALL, LANG, a system default locale (if we
- * think there is one), and the ultimate fallback "C". This is all done in
- * the same loop as above to avoid duplicating code, but it makes things
- * more complex. The 'trial_locales' array is initialized with just one
- * element; it causes the behavior described in the paragraph above this to
- * happen. If that fails, we add elements to 'trial_locales', and do extra
- * loop iterations to cause the behavior described in this paragraph.
+ * And it has to deal with if this is an embedded perl, whose locale
+ * doesn't come from the environment, but has been set up by the caller.
+ * This is pretty simply handled: the "" in the setlocale calls is not a
+ * string constant, but a variable which is set to NULL in the embedded
+ * case.
+ *
+ * But the major complication is handling failure and doing fallback.
+ * There is an array, trial_locales, the elements of which are looped over
+ * until the locale is successfully set. The array is initialized with
+ * just one element, for
+ * setlocale(LC_ALL, $NULL_or_empty)
+ * If that works, as it almost always does, there's no more elements and
+ * the loop iterates just the once. Otherwise elements are added for each
+ * of the environment variables that POSIX dictates should control the
+ * program, in priority order, with a final one being "C". The loop is
+ * repeated until the first one succeeds. If all fail, we limp along with
+ * whatever state we got to. If there is no LC_ALL, an inner loop is run
+ * through all categories (making things look complex).
+ *
+ * A further complication is that Windows has an additional fallback, the
+ * user-default ANSI code page obtained from the operating system. This is
+ * added as yet another loop iteration, just before the final "C"
*
* On Ultrix, the locale MUST come from the environment, so there is
* preliminary code to set it. I (khw) am not sure that it is necessary,
* and that this couldn't be folded into the loop, but barring any real
* platforms to test on, it's staying as-is
- *
- * A slight complication is that in embedded Perls, the locale may already
- * be set-up, and we don't want to get it from the normal environment
- * variables. This is handled by having a special environment variable
- * indicate we're in this situation. We simply set setlocale's 2nd
- * parameter to be a NULL instead of "". That indicates to setlocale that
- * it is not to change anything, but to return the current value,
- * effectively initializing perl's db to what the locale already is.
- *
- * We play the same trick with NULL if a LC_ALL succeeds. We call
- * setlocale() on the individual categores with NULL to get their existing
- * values for our db, instead of trying to change them.
- * */
+ */
int ok = 1;
#else /* USE_LOCALE */
# ifdef __GLIBC__
- const char * const language = savepv(PerlEnv_getenv("LANGUAGE"));
+ const char * const language = PerlEnv_getenv("LANGUAGE");
# endif
: "";
const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */
unsigned int trial_locales_count;
- const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL"));
- const char * const lang = savepv(PerlEnv_getenv("LANG"));
+ const char * const lc_all = PerlEnv_getenv("LC_ALL");
+ const char * const lang = PerlEnv_getenv("LANG");
bool setlocale_failure = FALSE;
unsigned int i;
DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")));
# define DEBUG_LOCALE_INIT(category, locale, result) \
- STMT_START { \
- if (debug_initialization) { \
+ STMT_START { \
+ if (debug_initialization) { \
PerlIO_printf(Perl_debug_log, \
"%s:%d: %s\n", \
__FILE__, __LINE__, \
locale, \
result)); \
} \
- } STMT_END
+ } STMT_END
/* Make sure the parallel arrays are properly set up */
# ifdef USE_LOCALE_NUMERIC
assert(category_masks[LC_TELEPHONE_INDEX] == LC_TELEPHONE_MASK);
# endif
# endif
+# ifdef USE_LOCALE_SYNTAX
+ assert(categories[LC_SYNTAX_INDEX] == LC_SYNTAX);
+ assert(strEQ(category_names[LC_SYNTAX_INDEX], "LC_SYNTAX"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_SYNTAX_INDEX] == LC_SYNTAX_MASK);
+# endif
+# endif
+# ifdef USE_LOCALE_TOD
+ assert(categories[LC_TOD_INDEX] == LC_TOD);
+ assert(strEQ(category_names[LC_TOD_INDEX], "LC_TOD"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_TOD_INDEX] == LC_TOD_MASK);
+# endif
+# endif
# ifdef LC_ALL
assert(categories[LC_ALL_INDEX] == LC_ALL);
assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL"));
# endif
# endif /* DEBUGGING */
+ /* Initialize the per-thread mbrFOO() state variables. See POSIX.xs for
+ * why these particular incantations are used. */
+#ifdef HAS_MBRLEN
+ memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps));
+#endif
+#ifdef HAS_MBRTOWC
+ memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps));
+#endif
+#ifdef HAS_WCTOMBR
+ wcrtomb(NULL, L'\0', &PL_wcrtomb_ps);
+#endif
+
/* Initialize the cache of the program's UTF-8ness for the always known
* locales C and POSIX */
my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness,
# endif
# endif
-# if defined(LC_ALL_MASK) && defined(HAS_POSIX_2008_LOCALE)
+# ifdef USE_POSIX_2008_LOCALE
PL_C_locale_obj = newlocale(LC_ALL_MASK, "C", (locale_t) 0);
if (! PL_C_locale_obj) {
# endif
+# ifdef USE_LOCALE_NUMERIC
+
PL_numeric_radix_sv = newSVpvs(".");
+# endif
+
# if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE)
/* Initialize our records. If we have POSIX 2008, we have LC_ALL */
DEBUG_LOCALE_INIT(categories[j], trial_locale, curlocales[j]);
}
- if (! setlocale_failure) { /* All succeeded */
+ if (LIKELY(! setlocale_failure)) { /* All succeeded */
break; /* Exit trial_locales loop */
}
}
This is an alternative to using the -C command line switch
(the -C if present will override this). */
{
- const char *p = PerlEnv_getenv("PERL_UNICODE");
- PL_unicode = p ? parse_unicode_opts(&p) : 0;
- if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
- PL_utf8cache = -1;
+ const char *p = PerlEnv_getenv("PERL_UNICODE");
+ PL_unicode = p ? parse_unicode_opts(&p) : 0;
+ if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
+ PL_utf8cache = -1;
}
# endif
-# ifdef __GLIBC__
-
- Safefree(language);
-
-# endif
-
- Safefree(lc_all);
- Safefree(lang);
-
#endif /* USE_LOCALE */
#ifdef DEBUGGING
cur_min_x + COLLXFRM_HDR_LEN))
{
PL_strxfrm_NUL_replacement = j;
+ Safefree(cur_min_x);
cur_min_x = x;
}
else {
cur_max_x + COLLXFRM_HDR_LEN))
{
PL_strxfrm_max_cp = j;
+ Safefree(cur_max_x);
cur_max_x = x;
}
else {
if (UNLIKELY(! xbuf)) {
DEBUG_L(PerlIO_printf(Perl_debug_log,
"_mem_collxfrm: Couldn't malloc %zu bytes\n", xAlloc));
- goto bad;
+ goto bad;
}
/* Store the collation id */
if (DEBUG_Lv_TEST || debug_initialization) {
PerlIO_printf(Perl_debug_log,
"_mem_collxfrm required more space than previously calculated"
- " for locale %s, trying again with new guess=%d+%zu\n",
- PL_collation_name, (int) COLLXFRM_HDR_LEN,
+ " for locale %s, trying again with new guess=%zu+%zu\n",
+ PL_collation_name, COLLXFRM_HDR_LEN,
xAlloc - COLLXFRM_HDR_LEN);
}
return xbuf;
bad:
- Safefree(xbuf);
- if (s != input_string) {
- Safefree(s);
- }
- *xlen = 0;
# ifdef DEBUGGING
# endif
+ Safefree(xbuf);
+ if (s != input_string) {
+ Safefree(s);
+ }
+ *xlen = 0;
+
return NULL;
}
PerlIO_printf(Perl_debug_log, "'\n");
}
+# endif /* DEBUGGING */
+#endif /* USE_LOCALE_COLLATE */
+#ifdef USE_LOCALE
+# ifdef DEBUGGING
+
STATIC void
S_print_bytes_for_locale(pTHX_
const char * const s,
}
# endif /* #ifdef DEBUGGING */
-#endif /* USE_LOCALE_COLLATE */
-
-#ifdef USE_LOCALE
STATIC const char *
S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale)
Safefree(restore_to_locale);
DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale unchanged as %s\n",
- category_name(switch_category), restore_to_locale));
+ category_name(switch_category), template_locale));
return NULL;
}
Safefree(original_locale);
}
+/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */
+#define CUR_LC_BUFFER_SIZE 64
+
bool
Perl__is_cur_LC_category_utf8(pTHX_ int category)
{
the name in the cache */
char * delimited; /* The name plus the delimiters used to store
it in the cache */
+ char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */
char * name_pos; /* position of 'delimited' in the cache, or 0
if not there */
* utf8ness digit */
input_name_len_with_overhead = input_name_len + 3;
- /* Allocate and populate space for a copy of the name surrounded by the
- * delimiters */
- Newx(delimited, input_name_len_with_overhead, char);
+ if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) {
+ /* we can use the buffer, avoid a malloc */
+ delimited = buffer;
+ } else { /* need a malloc */
+ /* Allocate and populate space for a copy of the name surrounded by the
+ * delimiters */
+ Newx(delimited, input_name_len_with_overhead, char);
+ }
+
delimited[0] = UTF8NESS_SEP[0];
Copy(save_input_locale, delimited + 1, input_name_len, char);
delimited[input_name_len+1] = UTF8NESS_PREFIX[0];
utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
}
- Safefree(delimited);
+ /* free only when not using the buffer */
+ if ( delimited != buffer ) Safefree(delimited);
Safefree(save_input_locale);
return is_utf8;
}
* calculate it */
# if defined(USE_LOCALE_CTYPE) \
- && ( (defined(HAS_NL_LANGINFO) && defined(CODESET)) \
+ && ( defined(HAS_NL_LANGINFO) \
|| (defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)))
{
}
# endif
-# if defined(HAS_NL_LANGINFO) && defined(CODESET)
+# if defined(HAS_NL_LANGINFO)
{ /* The task is easiest if the platform has this POSIX 2001 function.
Except on some platforms it can wrongly return "", so have to have
# if defined(HAS_MBRTOWC) && defined(USE_ITHREADS)
/* Prefer this function if available, as it's reentrant */
- memset(&ps, 0, sizeof(ps));;
+ memzero(&ps, sizeof(ps));;
PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift
state */
SETERRNO(0, 0);
# else
- LOCALE_LOCK;
+ MBTOWC_LOCK;
PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
SETERRNO(0, 0);
len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
SAVE_ERRNO;
- LOCALE_UNLOCK;
+ MBTOWC_UNLOCK;
# endif
&& wc == (wchar_t) UNICODE_REPLACEMENT);
}
+# endif
+
restore_switched_locale(LC_CTYPE, original_ctype_locale);
goto finish_and_return;
}
-# endif
# else
/* Here, we must have a C89 compiler that doesn't have mbtowc(). Next
is_utf8 = TRUE;
goto finish_and_return;
}
- }
# endif
+ }
# endif
/* Other common encodings are the ISO 8859 series, which aren't UTF-8. But
Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char);
utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
- if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1]
- & (PERL_UINTMAX_T) ~1) != '0')
- {
+ if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] & ~1) != '0') {
Perl_croak(aTHX_
"panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%zu,"
" inserted_name=%s, its_len=%zu\n",
s++;
e = strchr(s, UTF8NESS_PREFIX[0]);
if (! e) {
+ e = PL_locale_utf8ness + strlen(PL_locale_utf8ness);
Perl_croak(aTHX_
"panic: %s: %d: Corrupt utf8ness_cache: missing"
" separator %.*s<-- HERE %s\n",
# endif
- Safefree(delimited);
+ /* free only when not using the buffer */
+ if ( delimited != buffer ) Safefree(delimited);
Safefree(save_input_locale);
return is_utf8;
}
bool
Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
{
- dVAR;
/* Internal function which returns if we are in the scope of a pragma that
* enables the locale category 'category'. 'compiling' should indicate if
* this is during the compilation phase (TRUE) or not (FALSE). */
const COP * const cop = (compiling) ? &PL_compiling : PL_curcop;
- SV *categories = cop_hints_fetch_pvs(cop, "locale", 0);
- if (! categories || categories == &PL_sv_placeholder) {
+ SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0);
+ if (! these_categories || these_categories == &PL_sv_placeholder) {
return FALSE;
}
/* The pseudo-category 'not_characters' is -1, so just add 1 to each to get
* a valid unsigned */
assert(category >= -1);
- return cBOOL(SvUV(categories) & (1U << (category + 1)));
+ return cBOOL(SvUV(these_categories) & (1U << (category + 1)));
}
char *
* to the C locale */
char *errstr;
- dVAR;
#ifndef USE_LOCALE_MESSAGES
const bool within_locale_scope = IN_LC(LC_MESSAGES);
-# if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L)
+# ifndef USE_ITHREADS
- /* This function is trivial if we don't have to worry about thread safety
- * and have strerror_l(), as it handles the switch of locales so we don't
- * have to deal with that. We don't have to worry about thread safety if
- * this is an unthreaded build, or if strerror_r() is also available. Both
- * it and strerror_l() are thread-safe. Plain strerror() isn't thread
- * safe. But on threaded builds when strerror_r() is available, the
- * apparent call to strerror() below is actually a macro that
- * behind-the-scenes calls strerror_r().
- */
+ /* This function is trivial without threads. */
+ if (within_locale_scope) {
+ errstr = savepv(strerror(errnum));
+ }
+ else {
+ const char * save_locale = savepv(do_setlocale_c(LC_MESSAGES, NULL));
+
+ do_setlocale_c(LC_MESSAGES, "C");
+ errstr = savepv(strerror(errnum));
+ do_setlocale_c(LC_MESSAGES, save_locale);
+ Safefree(save_locale);
+ }
-# if ! defined(USE_ITHREADS) || defined(HAS_STRERROR_R)
+# elif defined(USE_POSIX_2008_LOCALE) \
+ && defined(HAS_STRERROR_L)
+
+ /* This function is also trivial if we don't have to worry about thread
+ * safety and have strerror_l(), as it handles the switch of locales so we
+ * don't have to deal with that. We don't have to worry about thread
+ * safety if strerror_r() is also available. Both it and strerror_l() are
+ * thread-safe. Plain strerror() isn't thread safe. But on threaded
+ * builds when strerror_r() is available, the apparent call to strerror()
+ * below is actually a macro that behind-the-scenes calls strerror_r(). */
+
+# ifdef HAS_STRERROR_R
if (within_locale_scope) {
errstr = savepv(strerror(errnum));
* same code at the same time. (On thread-safe perls, the LOCK is a
* no-op.) Since this is the only place in core that changes LC_MESSAGES
* (unless the user has called setlocale(), this works to prevent races. */
- LOCALE_LOCK;
+ SETLOCALE_LOCK;
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"my_strerror called with errnum %d\n", errnum));
if (! within_locale_scope) {
save_locale = do_setlocale_c(LC_MESSAGES, NULL);
if (! save_locale) {
+ SETLOCALE_UNLOCK;
Perl_croak(aTHX_
"panic: %s: %d: Could not find current LC_MESSAGES locale,"
" errno=%d\n", __FILE__, __LINE__, errno);
/* The setlocale() just below likely will zap 'save_locale', so
* create a copy. */
save_locale = savepv(save_locale);
- do_setlocale_c(LC_MESSAGES, "C");
+ if (! do_setlocale_c(LC_MESSAGES, "C")) {
+
+ /* If, for some reason, the locale change failed, we
+ * soldier on as best as possible under the circumstances,
+ * using the current locale, and clear save_locale, so we
+ * don't try to change back. On z/0S, all setlocale()
+ * calls fail after you've created a thread. This is their
+ * way of making sure the entire process is always a single
+ * locale. This means that 'use locale' is always in place
+ * for messages under these circumstances. */
+ Safefree(save_locale);
+ save_locale = NULL;
+ }
}
}
} /* end of ! within_locale_scope */
if (! within_locale_scope) {
if (save_locale && ! locale_is_C) {
if (! do_setlocale_c(LC_MESSAGES, save_locale)) {
+ SETLOCALE_UNLOCK;
Perl_croak(aTHX_
- "panic: %s: %d: setlocale restore failed, errno=%d\n",
- __FILE__, __LINE__, errno);
+ "panic: %s: %d: setlocale restore to '%s' failed, errno=%d\n",
+ __FILE__, __LINE__, save_locale, errno);
}
Safefree(save_locale);
}
}
- LOCALE_UNLOCK;
+ SETLOCALE_UNLOCK;
# endif /* End of doesn't have strerror_l */
-#endif /* End of does have locale messages */
-
-#ifdef DEBUGGING
+# ifdef DEBUGGING
if (DEBUG_Lv_TEST) {
PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '");
PerlIO_printf(Perl_debug_log, "'\n");
}
-#endif
+# endif
+#endif /* End of does have locale messages */
SAVEFREEPV(errstr);
return errstr;
=for apidoc switch_to_global_locale
-On systems without locale support, or on single-threaded builds, or on
+On systems without locale support, or on typical single-threaded builds, or on
platforms that do not support per-thread locale operations, this function does
nothing. On such systems that do have locale support, only a locale global to
the whole program is available.
works fine, as all the other threads continue to ignore the global one, so only
this thread looks at it.
+However, on Windows systems this isn't quite true prior to Visual Studio 15,
+at which point Microsoft fixed a bug. A race can occur if you use the
+following operations on earlier Windows platforms:
+
+=over
+
+=item L<POSIX::localeconv|POSIX/localeconv>
+
+=item L<I18N::Langinfo>, items C<CRNCYSTR> and C<THOUSEP>
+
+=item L<perlapi/Perl_langinfo>, items C<CRNCYSTR> and C<THOUSEP>
+
+=back
+
+The first item is not fixable (except by upgrading to a later Visual Studio
+release), but it would be possible to work around the latter two items by using
+the Windows API functions C<GetNumberFormat> and C<GetCurrencyFormat>; patches
+welcome.
+
Without this function call, threads that use the L<C<setlocale(3)>> system
function will not work properly, as all the locale-sensitive functions will
look at the per-thread locale, and C<setlocale> will have no effect on this
bool
Perl_sync_locale()
{
+
+#ifndef USE_LOCALE
+
+ return TRUE;
+
+#else
+
const char * newlocale;
dTHX;
-#ifdef USE_POSIX_2008_LOCALE
+# ifdef USE_POSIX_2008_LOCALE
bool was_in_global_locale = FALSE;
locale_t cur_obj = uselocale((locale_t) 0);
* will affect the */
if (cur_obj == LC_GLOBAL_LOCALE) {
-# ifdef HAS_QUERY_LOCALE
+# ifdef HAS_QUERY_LOCALE
do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL));
-# else
+# else
unsigned int i;
do_setlocale_r(categories[i], setlocale(categories[i], NULL));
}
-# endif
+# endif
was_in_global_locale = TRUE;
}
-#else
+# else
bool was_in_global_locale = TRUE;
-#endif
-#ifdef USE_LOCALE_CTYPE
+# endif
+# ifdef USE_LOCALE_CTYPE
- newlocale = do_setlocale_c(LC_CTYPE, NULL);
+ newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL));
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: %s\n", __FILE__, __LINE__,
setlocale_debug_string(LC_CTYPE, NULL, newlocale)));
new_ctype(newlocale);
+ Safefree(newlocale);
-#endif /* USE_LOCALE_CTYPE */
-#ifdef USE_LOCALE_COLLATE
+# endif /* USE_LOCALE_CTYPE */
+# ifdef USE_LOCALE_COLLATE
- newlocale = do_setlocale_c(LC_COLLATE, NULL);
+ newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL));
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: %s\n", __FILE__, __LINE__,
setlocale_debug_string(LC_COLLATE, NULL, newlocale)));
new_collate(newlocale);
+ Safefree(newlocale);
-#endif
-#ifdef USE_LOCALE_NUMERIC
+# endif
+# ifdef USE_LOCALE_NUMERIC
- newlocale = do_setlocale_c(LC_NUMERIC, NULL);
+ newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL));
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"%s:%d: %s\n", __FILE__, __LINE__,
setlocale_debug_string(LC_NUMERIC, NULL, newlocale)));
new_numeric(newlocale);
+ Safefree(newlocale);
-#endif /* USE_LOCALE_NUMERIC */
+# endif /* USE_LOCALE_NUMERIC */
return was_in_global_locale;
+
+#endif
+
}
#if defined(DEBUGGING) && defined(USE_LOCALE)
* be overwritten by the next call, so this should be used just to
* formulate a string to immediately print or savepv() on. */
- /* initialise to a non-null value to keep it out of BSS and so keep
- * -DPERL_GLOBAL_STRUCT_PRIVATE happy */
- static char ret[256] = "If you can read this, thank your buggy C"
- " library strlcpy(), and change your hints file"
- " to undef it";
+ static char ret[256];
my_strlcpy(ret, "setlocale(", sizeof(ret));
my_strlcat(ret, category_name(category), sizeof(ret));
{ /* Free up */
locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE);
- if (cur_obj != LC_GLOBAL_LOCALE) {
+ if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) {
freelocale(cur_obj);
}
}