+#include "EXTERN.h"
+#define PERL_IN_LOCALE_C
+#include "perl_langinfo.h"
+#include "perl.h"
+
+#include "reentr.h"
+
+#ifdef I_WCHAR
+# include <wchar.h>
+#endif
+
+/* If the environment says to, we can output debugging information during
+ * initialization. This is done before option parsing, and before any thread
+ * creation, so can be a file-level static */
+#if ! defined(DEBUGGING) || defined(PERL_GLOBAL_STRUCT)
+# define debug_initialization 0
+# define DEBUG_INITIALIZATION_set(v)
+#else
+static bool debug_initialization = FALSE;
+# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v)
+#endif
+
+
+/* Returns the Unix errno portion; ignoring any others. This is a macro here
+ * instead of putting it into perl.h, because unclear to khw what should be
+ * done generally. */
+#define GET_ERRNO saved_errno
+
+/* strlen() of a literal string constant. We might want this more general,
+ * but using it in just this file for now. A problem with more generality is
+ * the compiler warnings about comparing unlike signs */
+#define STRLENs(s) (sizeof("" s "") - 1)
+
+/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the
+ * return of setlocale(), then this is extremely likely to be the C or POSIX
+ * locale. However, the output of setlocale() is documented to be opaque, but
+ * the odds are extremely small that it would return these two strings for some
+ * other locale. Note that VMS in these two locales includes many non-ASCII
+ * characters as controls and punctuation (below are hex bytes):
+ * cntrl: 84-97 9B-9F
+ * punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD
+ * Oddly, none there are listed as alphas, though some represent alphabetics
+ * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */
+#define isNAME_C_OR_POSIX(name) \
+ ( (name) != NULL \
+ && (( *(name) == 'C' && (*(name + 1)) == '\0') \
+ || strEQ((name), "POSIX")))
+
+#ifdef USE_LOCALE
+
+/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
+ * looked up. This is in the form of a C string: */
+
+#define UTF8NESS_SEP "\v"
+#define UTF8NESS_PREFIX "\f"
+
+/* So, the string looks like:
+ *
+ * \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0
+ *
+ * where the digit 0 after the \a indicates that the locale starting just
+ * after the preceding \v is not UTF-8, and the digit 1 mean it is. */
+
+STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
+STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
+
+#define C_and_POSIX_utf8ness UTF8NESS_SEP "C" UTF8NESS_PREFIX "0" \
+ UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
+
+/* The cache is initialized to C_and_POSIX_utf8ness at start up. These are
+ * kept there always. The remining portion of the cache is LRU, with the
+ * oldest looked-up locale at the tail end */
+
+STATIC char *
+S_stdize_locale(pTHX_ char *locs)
+{
+ /* Standardize the locale name from a string returned by 'setlocale',
+ * possibly modifying that string.
+ *
+ * The typical return value of setlocale() is either
+ * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
+ * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
+ * (the space-separated values represent the various sublocales,
+ * in some unspecified order). This is not handled by this function.
+ *
+ * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
+ * which is harmful for further use of the string in setlocale(). This
+ * function removes the trailing new line and everything up through the '='
+ * */
+
+ const char * const s = strchr(locs, '=');
+ bool okay = TRUE;
+
+ PERL_ARGS_ASSERT_STDIZE_LOCALE;
+
+ if (s) {
+ const char * const t = strchr(s, '.');
+ okay = FALSE;
+ if (t) {
+ const char * const u = strchr(t, '\n');
+ if (u && (u[1] == 0)) {
+ const STRLEN len = u - s;
+ Move(s + 1, locs, len, char);
+ locs[len] = 0;
+ okay = TRUE;
+ }
+ }
+ }
+
+ if (!okay)
+ Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
+
+ return locs;
+}
+
+/* Two parallel arrays; first the locale categories Perl uses on this system;
+ * the second array is their names. These arrays are in mostly arbitrary
+ * order. */
+
+const int categories[] = {
+
+# ifdef USE_LOCALE_NUMERIC
+ LC_NUMERIC,
+# endif
+# ifdef USE_LOCALE_CTYPE
+ LC_CTYPE,
+# endif
+# ifdef USE_LOCALE_COLLATE
+ LC_COLLATE,
+# endif
+# ifdef USE_LOCALE_TIME
+ LC_TIME,
+# endif
+# ifdef USE_LOCALE_MESSAGES
+ LC_MESSAGES,
+# endif
+# ifdef USE_LOCALE_MONETARY
+ LC_MONETARY,
+# endif
+# ifdef USE_LOCALE_ADDRESS
+ LC_ADDRESS,
+# endif
+# ifdef USE_LOCALE_IDENTIFICATION
+ LC_IDENTIFICATION,
+# endif
+# ifdef USE_LOCALE_MEASUREMENT
+ LC_MEASUREMENT,
+# endif
+# ifdef USE_LOCALE_PAPER
+ LC_PAPER,
+# endif
+# ifdef USE_LOCALE_TELEPHONE
+ LC_TELEPHONE,
+# endif
+# ifdef LC_ALL
+ LC_ALL,
+# endif
+ -1 /* Placeholder because C doesn't allow a
+ trailing comma, and it would get complicated
+ with all the #ifdef's */
+};
+
+/* The top-most real element is LC_ALL */
+
+const char * category_names[] = {
+
+# ifdef USE_LOCALE_NUMERIC
+ "LC_NUMERIC",
+# endif
+# ifdef USE_LOCALE_CTYPE
+ "LC_CTYPE",
+# endif
+# ifdef USE_LOCALE_COLLATE
+ "LC_COLLATE",
+# endif
+# ifdef USE_LOCALE_TIME
+ "LC_TIME",
+# endif
+# ifdef USE_LOCALE_MESSAGES
+ "LC_MESSAGES",
+# endif
+# ifdef USE_LOCALE_MONETARY
+ "LC_MONETARY",
+# endif
+# ifdef USE_LOCALE_ADDRESS
+ "LC_ADDRESS",
+# endif
+# ifdef USE_LOCALE_IDENTIFICATION
+ "LC_IDENTIFICATION",
+# endif
+# ifdef USE_LOCALE_MEASUREMENT
+ "LC_MEASUREMENT",
+# endif
+# ifdef USE_LOCALE_PAPER
+ "LC_PAPER",
+# endif
+# ifdef USE_LOCALE_TELEPHONE
+ "LC_TELEPHONE",
+# endif
+# ifdef LC_ALL
+ "LC_ALL",
+# endif
+ NULL /* Placeholder */
+ };
+
+# ifdef LC_ALL
+
+ /* On systems with LC_ALL, it is kept in the highest index position. (-2
+ * to account for the final unused placeholder element.) */
+# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
+
+# else
+
+ /* On systems without LC_ALL, we pretend it is there, one beyond the real
+ * top element, hence in the unused placeholder element. */
+# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
+
+# endif
+
+/* Pretending there is an LC_ALL element just above allows us to avoid most
+ * special cases. Most loops through these arrays in the code below are
+ * written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work
+ * on either type of system. But the code must be written to not access the
+ * element at 'LC_ALL_INDEX' except on platforms that have it. This can be
+ * checked for at compile time by using the #define LC_ALL_INDEX which is only
+ * defined if we do have LC_ALL. */
+
+STATIC const char *
+S_category_name(const int category)
+{
+ unsigned int i;
+
+#ifdef LC_ALL
+
+ if (category == LC_ALL) {
+ return "LC_ALL";
+ }
+
+#endif
+
+ for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
+ if (category == categories[i]) {
+ return category_names[i];
+ }
+ }
+
+ {
+ const char suffix[] = " (unknown)";
+ int temp = category;
+ Size_t length = sizeof(suffix) + 1;
+ char * unknown;
+ dTHX;
+
+ if (temp < 0) {
+ length++;
+ temp = - temp;
+ }
+
+ /* Calculate the number of digits */
+ while (temp >= 10) {
+ temp /= 10;
+ length++;
+ }
+
+ Newx(unknown, length, char);
+ my_snprintf(unknown, length, "%d%s", category, suffix);
+ SAVEFREEPV(unknown);
+ return unknown;
+ }
+}
+
+/* Now create LC_foo_INDEX #defines for just those categories on this system */
+# ifdef USE_LOCALE_NUMERIC
+# define LC_NUMERIC_INDEX 0
+# define _DUMMY_NUMERIC LC_NUMERIC_INDEX
+# else
+# define _DUMMY_NUMERIC -1
+# endif
+# ifdef USE_LOCALE_CTYPE
+# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1
+# define _DUMMY_CTYPE LC_CTYPE_INDEX
+# else
+# define _DUMMY_CTYPE _DUMMY_NUMERIC
+# endif
+# ifdef USE_LOCALE_COLLATE
+# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1
+# define _DUMMY_COLLATE LC_COLLATE_INDEX
+# else
+# define _DUMMY_COLLATE _DUMMY_CTYPE
+# endif
+# ifdef USE_LOCALE_TIME
+# define LC_TIME_INDEX _DUMMY_COLLATE + 1
+# define _DUMMY_TIME LC_TIME_INDEX
+# else
+# define _DUMMY_TIME _DUMMY_COLLATE
+# endif
+# ifdef USE_LOCALE_MESSAGES
+# define LC_MESSAGES_INDEX _DUMMY_TIME + 1
+# define _DUMMY_MESSAGES LC_MESSAGES_INDEX
+# else
+# define _DUMMY_MESSAGES _DUMMY_TIME
+# endif
+# ifdef USE_LOCALE_MONETARY
+# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1
+# define _DUMMY_MONETARY LC_MONETARY_INDEX
+# else
+# define _DUMMY_MONETARY _DUMMY_MESSAGES
+# endif
+# ifdef USE_LOCALE_ADDRESS
+# define LC_ADDRESS_INDEX _DUMMY_MONETARY + 1
+# define _DUMMY_ADDRESS LC_ADDRESS_INDEX
+# else
+# define _DUMMY_ADDRESS _DUMMY_MONETARY
+# endif
+# ifdef USE_LOCALE_IDENTIFICATION
+# define LC_IDENTIFICATION_INDEX _DUMMY_ADDRESS + 1
+# define _DUMMY_IDENTIFICATION LC_IDENTIFICATION_INDEX
+# else
+# define _DUMMY_IDENTIFICATION _DUMMY_ADDRESS
+# endif
+# ifdef USE_LOCALE_MEASUREMENT
+# define LC_MEASUREMENT_INDEX _DUMMY_IDENTIFICATION + 1
+# define _DUMMY_MEASUREMENT LC_MEASUREMENT_INDEX
+# else
+# define _DUMMY_MEASUREMENT _DUMMY_IDENTIFICATION
+# endif
+# ifdef USE_LOCALE_PAPER
+# define LC_PAPER_INDEX _DUMMY_MEASUREMENT + 1
+# define _DUMMY_PAPER LC_PAPER_INDEX
+# else
+# define _DUMMY_PAPER _DUMMY_MEASUREMENT
+# endif
+# ifdef USE_LOCALE_TELEPHONE
+# define LC_TELEPHONE_INDEX _DUMMY_PAPER + 1
+# define _DUMMY_TELEPHONE LC_TELEPHONE_INDEX
+# else
+# define _DUMMY_TELEPHONE _DUMMY_PAPER
+# endif
+# ifdef LC_ALL
+# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1
+# endif
+#endif /* ifdef USE_LOCALE */
+
+/* Windows requres a customized base-level setlocale() */
+#ifdef WIN32
+# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
+#else
+# define my_setlocale(cat, locale) setlocale(cat, locale)
+#endif
+
+#ifndef USE_POSIX_2008_LOCALE
+
+/* "do_setlocale_c" is intended to be called when the category is a constant
+ * known at compile time; "do_setlocale_r", not known until run time */
+# define do_setlocale_c(cat, locale) my_setlocale(cat, locale)
+# define do_setlocale_r(cat, locale) my_setlocale(cat, locale)
+
+#else /* Below uses POSIX 2008 */
+
+/* We emulate setlocale with our own function. LC_foo is not valid for the
+ * POSIX 2008 functions. Instead LC_foo_MASK is used, which we use an array
+ * lookup to convert to. At compile time we have defined LC_foo_INDEX as the
+ * proper offset into the array 'category_masks[]'. At runtime, we have to
+ * search through the array (as the actual numbers may not be small contiguous
+ * positive integers which would lend themselves to array lookup). */
+# define do_setlocale_c(cat, locale) \
+ emulate_setlocale(cat, locale, cat ## _INDEX, TRUE)
+# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE)
+
+/* A third array, parallel to the ones above to map from category to its
+ * equivalent mask */
+const int category_masks[] = {
+# ifdef USE_LOCALE_NUMERIC
+ LC_NUMERIC_MASK,
+# endif
+# ifdef USE_LOCALE_CTYPE
+ LC_CTYPE_MASK,
+# endif
+# ifdef USE_LOCALE_COLLATE
+ LC_COLLATE_MASK,
+# endif
+# ifdef USE_LOCALE_TIME
+ LC_TIME_MASK,
+# endif
+# ifdef USE_LOCALE_MESSAGES
+ LC_MESSAGES_MASK,
+# endif
+# ifdef USE_LOCALE_MONETARY
+ LC_MONETARY_MASK,
+# endif
+# ifdef USE_LOCALE_ADDRESS
+ LC_ADDRESS_MASK,
+# endif
+# ifdef USE_LOCALE_IDENTIFICATION
+ LC_IDENTIFICATION_MASK,
+# endif
+# ifdef USE_LOCALE_MEASUREMENT
+ LC_MEASUREMENT_MASK,
+# endif
+# ifdef USE_LOCALE_PAPER
+ LC_PAPER_MASK,
+# endif
+# ifdef USE_LOCALE_TELEPHONE
+ LC_TELEPHONE_MASK,
+# endif
+ /* LC_ALL can't be turned off by a Configure
+ * option, and in Posix 2008, should always be
+ * here, so compile it in unconditionally.
+ * This could catch some glitches at compile
+ * time */
+ LC_ALL_MASK
+ };
+
+STATIC const char *
+S_emulate_setlocale(const int category,
+ const char * locale,
+ unsigned int index,
+ const bool is_index_valid
+ )
+{
+ /* This function effectively performs a setlocale() on just the current
+ * thread; thus it is thread-safe. It does this by using the POSIX 2008
+ * locale functions to emulate the behavior of setlocale(). Similar to
+ * regular setlocale(), the return from this function points to memory that
+ * can be overwritten by other system calls, so needs to be copied
+ * immediately if you need to retain it. The difference here is that
+ * system calls besides another setlocale() can overwrite it.
+ *
+ * By doing this, most locale-sensitive functions become thread-safe. The
+ * exceptions are mostly those that return a pointer to static memory.
+ *
+ * This function takes the same parameters, 'category' and 'locale', that
+ * the regular setlocale() function does, but it also takes two additional
+ * ones. This is because the 2008 functions don't use a category; instead
+ * they use a corresponding mask. Because this function operates in both
+ * worlds, it may need one or the other or both. This function can
+ * calculate the mask from the input category, but to avoid this
+ * calculation, if the caller knows at compile time what the mask is, it
+ * can pass it, setting 'is_index_valid' to TRUE; otherwise the mask
+ * parameter is ignored.
+ *
+ * POSIX 2008, for some sick reason, chose not to provide a method to find
+ * the category name of a locale. Some vendors have created a
+ * querylocale() function to do just that. This function is a lot simpler
+ * to implement on systems that have this. Otherwise, we have to keep
+ * track of what the locale has been set to, so that we can return its
+ * name to emulate setlocale(). It's also possible for C code in some
+ * library to change the locale without us knowing it, though as of
+ * September 2017, there are no occurrences in CPAN of uselocale(). Some
+ * libraries do use setlocale(), but that changes the global locale, and
+ * threads using per-thread locales will just ignore those changes.
+ * Another problem is that without querylocale(), we have to guess at what
+ * was meant by setting a locale of "". We handle this by not actually
+ * ever setting to "" (unless querylocale exists), but to emulate what we
+ * think should happen for "".
+ */
+
+ int mask;
+ locale_t old_obj;
+ locale_t new_obj;
+ dTHX;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale input=%d (%s), \"%s\", %d, %d\n", __FILE__, __LINE__, category, category_name(category), locale, index, is_index_valid);
+ }
+
+# endif
+
+ /* If the input mask might be incorrect, calculate the correct one */
+ if (! is_index_valid) {
+ unsigned int i;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: finding index of category %d (%s)\n", __FILE__, __LINE__, category, category_name(category));
+ }
+
+# endif
+
+ for (i = 0; i <= LC_ALL_INDEX; i++) {
+ if (category == categories[i]) {
+ index = i;
+ goto found_index;
+ }
+ }
+
+ /* Here, we don't know about this category, so can't handle it.
+ * Fallback to the early POSIX usages */
+ Perl_warner(aTHX_ packWARN(WARN_LOCALE),
+ "Unknown locale category %d; can't set it to %s\n",
+ category, locale);
+ return NULL;
+
+ found_index: ;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: index is %d for %s\n", __FILE__, __LINE__, index, category_name(category));
+ }
+
+# endif
+
+ }
+
+ mask = category_masks[index];
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: category name is %s; mask is 0x%x\n", __FILE__, __LINE__, category_names[index], mask);
+ }
+
+# endif
+
+ /* If just querying what the existing locale is ... */
+ if (locale == NULL) {
+ locale_t cur_obj = uselocale((locale_t) 0);
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale querying %p\n", __FILE__, __LINE__, cur_obj);
+ }
+
+# endif
+
+ if (cur_obj == LC_GLOBAL_LOCALE) {
+ return my_setlocale(category, NULL);
+ }
+
+# ifdef HAS_QUERYLOCALE
+
+ return (char *) querylocale(mask, cur_obj);
+
+# else
+
+ /* If this assert fails, adjust the size of curlocales in intrpvar.h */
+ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX);
+
+# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING)
+
+ {
+ /* Internal glibc for querylocale(), but doesn't handle
+ * empty-string ("") locale properly; who knows what other
+ * glitches. Check it for now, under debug. */
+
+ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
+ uselocale((locale_t) 0));
+ /*
+ PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL");
+ PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index);
+ PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]);
+ */
+ if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) {
+ if ( strNE(PL_curlocales[index], temp_name)
+ && ! ( isNAME_C_OR_POSIX(temp_name)
+ && isNAME_C_OR_POSIX(PL_curlocales[index]))) {
+
+# ifdef USE_C_BACKTRACE
+
+ dump_c_backtrace(Perl_debug_log, 20, 1);
+
+# endif
+
+ Perl_croak(aTHX_ "panic: Mismatch between what Perl thinks %s is"
+ " (%s) and what internal glibc thinks"
+ " (%s)\n", category_names[index],
+ PL_curlocales[index], temp_name);
+ }
+
+ return temp_name;
+ }
+ }
+
+# endif
+
+ /* Without querylocale(), we have to use our record-keeping we've
+ * done. */
+
+ if (category != LC_ALL) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[index]);
+ }
+
+# endif
+
+ return PL_curlocales[index];
+ }
+ else { /* For LC_ALL */
+ unsigned int i;
+ Size_t names_len = 0;
+ char * all_string;
+ bool are_all_categories_the_same_locale = TRUE;
+
+ /* If we have a valid LC_ALL value, just return it */
+ if (PL_curlocales[LC_ALL_INDEX]) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX]);
+ }
+
+# endif
+
+ return PL_curlocales[LC_ALL_INDEX];
+ }
+
+ /* Otherwise, we need to construct a string of name=value pairs.
+ * We use the glibc syntax, like
+ * LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
+ * First calculate the needed size. Along the way, check if all
+ * the locale names are the same */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
+ }
+
+# endif
+
+ names_len += strlen(category_names[i])
+ + 1 /* '=' */
+ + strlen(PL_curlocales[i])
+ + 1; /* ';' */
+
+ if (i > 0 && strNE(PL_curlocales[i], PL_curlocales[i-1])) {
+ are_all_categories_the_same_locale = FALSE;
+ }
+ }
+
+ /* If they are the same, we don't actually have to construct the
+ * string; we just make the entry in LC_ALL_INDEX valid, and be
+ * that single name */
+ if (are_all_categories_the_same_locale) {
+ PL_curlocales[LC_ALL_INDEX] = savepv(PL_curlocales[0]);
+ return PL_curlocales[LC_ALL_INDEX];
+ }
+
+ names_len++; /* Trailing '\0' */
+ SAVEFREEPV(Newx(all_string, names_len, char));
+ *all_string = '\0';
+
+ /* Then fill in the string */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
+ }
+
+# endif
+
+ my_strlcat(all_string, category_names[i], names_len);
+ my_strlcat(all_string, "=", names_len);
+ my_strlcat(all_string, PL_curlocales[i], names_len);
+ my_strlcat(all_string, ";", names_len);
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, all_string);
+ }
+
+ #endif
+
+ return all_string;
+ }
+
+# ifdef EINVAL
+
+ SETERRNO(EINVAL, LIB_INVARG);
+
+# endif
+
+ return NULL;
+
+# endif
+
+ }
+
+ /* Here, we are switching locales. */
+
+# ifndef HAS_QUERYLOCALE
+
+ if (strEQ(locale, "")) {
+
+ /* For non-querylocale() systems, we do the setting of "" ourselves to
+ * be sure that we really know what's going on. We follow the Linux
+ * documented behavior (but if that differs from the actual behavior,
+ * this won't work exactly as the OS implements). We go out and
+ * examine the environment based on our understanding of how the system
+ * works, and use that to figure things out */
+
+ const char * const lc_all = PerlEnv_getenv("LC_ALL");
+
+ /* Use any "LC_ALL" environment variable, as it overrides everything
+ * else. */
+ if (lc_all && strNE(lc_all, "")) {
+ locale = lc_all;
+ }
+ else {
+
+ /* Otherwise, we need to dig deeper. Unless overridden, the
+ * default is the LANG environment variable; if it doesn't exist,
+ * then "C" */
+
+ const char * default_name;
+
+ /* To minimize other threads messing with the environment, we copy
+ * the variable, making it a temporary. But this doesn't work upon
+ * program initialization before any scopes are created, and at
+ * this time, there's nothing else going on that would interfere.
+ * So skip the copy in that case */
+ if (PL_scopestack_ix == 0) {
+ default_name = PerlEnv_getenv("LANG");
+ }
+ else {
+ default_name = savepv(PerlEnv_getenv("LANG"));
+ }
+
+ if (! default_name || strEQ(default_name, "")) {
+ default_name = "C";
+ }
+ else if (PL_scopestack_ix != 0) {
+ SAVEFREEPV(default_name);
+ }
+
+ if (category != LC_ALL) {
+ const char * const name = PerlEnv_getenv(category_names[index]);
+
+ /* Here we are setting a single category. Assume will have the
+ * default name */
+ locale = default_name;
+
+ /* But then look for an overriding environment variable */
+ if (name && strNE(name, "")) {
+ locale = name;
+ }
+ }
+ else {
+ bool did_override = FALSE;
+ unsigned int i;
+
+ /* Here, we are getting LC_ALL. Any categories that don't have
+ * a corresponding environment variable set should be set to
+ * LANG, or to "C" if there is no LANG. If no individual
+ * categories differ from this, we can just set LC_ALL. This
+ * is buggy on systems that have extra categories that we don't
+ * know about. If there is an environment variable that sets
+ * that category, we won't know to look for it, and so our use
+ * of LANG or "C" improperly overrides it. On the other hand,
+ * if we don't do what is done here, and there is no
+ * environment variable, the category's locale should be set to
+ * LANG or "C". So there is no good solution. khw thinks the
+ * best is to look at systems to see what categories they have,
+ * and include them, and then to assume that we know the
+ * complete set */
+
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ const char * const env_override
+ = savepv(PerlEnv_getenv(category_names[i]));
+ const char * this_locale = ( env_override
+ && strNE(env_override, ""))
+ ? env_override
+ : default_name;
+ if (! emulate_setlocale(categories[i], this_locale, i, TRUE))
+ {
+ Safefree(env_override);
+ return NULL;
+ }
+
+ if (strNE(this_locale, default_name)) {
+ did_override = TRUE;
+ }
+
+ Safefree(env_override);
+ }
+
+ /* If all the categories are the same, we can set LC_ALL to
+ * that */
+ if (! did_override) {
+ locale = default_name;
+ }
+ else {
+
+ /* Here, LC_ALL is no longer valid, as some individual
+ * categories don't match it. We call ourselves
+ * recursively, as that will execute the code that
+ * generates the proper locale string for this situation.
+ * We don't do the remainder of this function, as that is
+ * to update our records, and we've just done that for the
+ * individual categories in the loop above, and doing so
+ * would cause LC_ALL to be done as well */
+ return emulate_setlocale(LC_ALL, NULL, LC_ALL_INDEX, TRUE);
+ }
+ }
+ }
+ }
+ else if (strchr(locale, ';')) {
+
+ /* LC_ALL may actually incude a conglomeration of various categories.
+ * Without querylocale, this code uses the glibc (as of this writing)
+ * syntax for representing that, but that is not a stable API, and
+ * other platforms do it differently, so we have to handle all cases
+ * ourselves */
+
+ unsigned int i;
+ const char * s = locale;
+ const char * e = locale + strlen(locale);
+ const char * p = s;
+ const char * category_end;
+ const char * name_start;
+ const char * name_end;
+
+ /* If the string that gives what to set doesn't include all categories,
+ * the omitted ones get set to "C". To get this behavior, first set
+ * all the individual categories to "C", and override the furnished
+ * ones below */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ if (! emulate_setlocale(categories[i], "C", i, TRUE)) {
+ return NULL;
+ }
+ }
+
+ while (s < e) {
+
+ /* Parse through the category */
+ while (isWORDCHAR(*p)) {
+ p++;
+ }
+ category_end = p;
+
+ if (*p++ != '=') {
+ Perl_croak(aTHX_
+ "panic: %s: %d: Unexpected character in locale name '%02X",
+ __FILE__, __LINE__, *(p-1));
+ }
+
+ /* Parse through the locale name */
+ name_start = p;
+ while (p < e && *p != ';') {
+ if (! isGRAPH(*p)) {
+ Perl_croak(aTHX_
+ "panic: %s: %d: Unexpected character in locale name '%02X",
+ __FILE__, __LINE__, *(p-1));
+ }
+ p++;
+ }
+ name_end = p;
+
+ /* Space past the semi-colon */
+ if (p < e) {
+ p++;
+ }
+
+ /* Find the index of the category name in our lists */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ char * individ_locale;
+
+ /* Keep going if this isn't the index. The strnNE() avoids a
+ * Perl_form(), but would fail if ever a category name could be
+ * a substring of another one, like if there were a
+ * "LC_TIME_DATE" */
+ if strnNE(s, category_names[i], category_end - s) {
+ continue;
+ }
+
+ /* If this index is for the single category we're changing, we
+ * have found the locale to set it to. */
+ if (category == categories[i]) {
+ locale = Perl_form(aTHX_ "%.*s",
+ (int) (name_end - name_start),
+ name_start);
+ goto ready_to_set;
+ }
+
+ assert(category == LC_ALL);
+ individ_locale = Perl_form(aTHX_ "%.*s",
+ (int) (name_end - name_start), name_start);
+ if (! emulate_setlocale(categories[i], individ_locale, i, TRUE))
+ {
+ return NULL;
+ }
+ }
+
+ s = p;
+ }
+
+ /* Here we have set all the individual categories by recursive calls.
+ * These collectively should have fixed up LC_ALL, so can just query
+ * what that now is */
+ assert(category == LC_ALL);
+
+ return do_setlocale_c(LC_ALL, NULL);
+ }
+
+ ready_to_set: ;
+
+ /* Here at the end of having to deal with the absence of querylocale().
+ * Some cases have already been fully handled by recursive calls to this
+ * function. But at this point, we haven't dealt with those, but are now
+ * prepared to, knowing what the locale name to set this category to is.
+ * This would have come for free if this system had had querylocale() */
+
+# endif /* end of ! querylocale */
+
+ assert(PL_C_locale_obj);
+
+ /* Switching locales generally entails freeing the current one's space (at
+ * the C library's discretion). We need to stop using that locale before
+ * the switch. So switch to a known locale object that we don't otherwise
+ * mess with. This returns the locale object in effect at the time of the
+ * switch. */
+ old_obj = uselocale(PL_C_locale_obj);
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj);
+ }
+
+# endif
+
+ if (! old_obj) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ dSAVE_ERRNO;
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ RESTORE_ERRNO;
+ }
+
+# endif
+
+ return NULL;
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj);
+ }
+
+# endif
+
+ /* If we weren't in a thread safe locale, set so that newlocale() below
+ which uses 'old_obj', uses an empty one. Same for our reserved C object.
+ The latter is defensive coding, so that, even if there is some bug, we
+ will never end up trying to modify either of these, as if passed to
+ newlocale(), they can be. */
+ if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) {
+ old_obj = (locale_t) 0;
+ }
+
+ /* Ready to create a new locale by modification of the exising one */
+ new_obj = newlocale(mask, locale, old_obj);