X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/86799d2d7311249b2270db1ccb7d283999810d4f..7a3934240c41fbfc2d9bd119996fc8740d6a6973:/locale.c diff --git a/locale.c b/locale.c index 46b0ec6..e29d0c4 100644 --- a/locale.c +++ b/locale.c @@ -44,37 +44,76 @@ /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread * creation, so can be a file-level static */ -#ifdef DEBUGGING -# ifdef PERL_GLOBAL_STRUCT - /* no global syms allowed */ -# define debug_initialization 0 -# define DEBUG_INITIALIZATION_set(v) -# else +#if ! defined(DEBUGGING) || defined(PERL_GLOBAL_STRUCT) +# define debug_initialization 0 +# define DEBUG_INITIALIZATION_set(v) +#else static bool debug_initialization = FALSE; -# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) -# endif +# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) #endif +/* strlen() of a literal string constant. We might want this more general, + * but using it in just this file for now. A problem with more generality is + * the compiler warnings about comparing unlike signs */ +#define STRLENs(s) (sizeof("" s "") - 1) + +/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the + * return of setlocale(), then this is extremely likely to be the C or POSIX + * locale. However, the output of setlocale() is documented to be opaque, but + * the odds are extremely small that it would return these two strings for some + * other locale. Note that VMS in these two locales includes many non-ASCII + * characters as controls and punctuation (below are hex bytes): + * cntrl: 84-97 9B-9F + * punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD + * Oddly, none there are listed as alphas, though some represent alphabetics + * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */ +#define isNAME_C_OR_POSIX(name) \ + ( (name) != NULL \ + && (( *(name) == 'C' && (*(name + 1)) == '\0') \ + || strEQ((name), "POSIX"))) + #ifdef USE_LOCALE -/* - * Standardize the locale name from a string returned by 'setlocale', possibly - * modifying that string. - * - * The typical return value of setlocale() is either - * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL - * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL - * (the space-separated values represent the various sublocales, - * in some unspecified order). This is not handled by this function. +/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far + * looked up. This is in the form of a C string: */ + +#define UTF8NESS_SEP "\v" +#define UTF8NESS_PREFIX "\f" + +/* So, the string looks like: * - * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", - * which is harmful for further use of the string in setlocale(). This - * function removes the trailing new line and everything up through the '=' + * \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0 * - */ + * where the digit 0 after the \a indicates that the locale starting just + * after the preceding \v is not UTF-8, and the digit 1 mean it is. */ + +STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1); +STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1); + +#define C_and_POSIX_utf8ness UTF8NESS_SEP "C" UTF8NESS_PREFIX "0" \ + UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0" + +/* The cache is initialized to C_and_POSIX_utf8ness at start up. These are + * kept there always. The remining portion of the cache is LRU, with the + * oldest looked-up locale at the tail end */ + STATIC char * S_stdize_locale(pTHX_ char *locs) { + /* Standardize the locale name from a string returned by 'setlocale', + * possibly modifying that string. + * + * The typical return value of setlocale() is either + * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL + * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL + * (the space-separated values represent the various sublocales, + * in some unspecified order). This is not handled by this function. + * + * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", + * which is harmful for further use of the string in setlocale(). This + * function removes the trailing new line and everything up through the '=' + * */ + const char * const s = strchr(locs, '='); bool okay = TRUE; @@ -100,8 +139,246 @@ S_stdize_locale(pTHX_ char *locs) return locs; } +/* Two parallel arrays; first the locale categories Perl uses on this system; + * the second array is their names. These arrays are in mostly arbitrary + * order. */ + +const int categories[] = { + +# ifdef USE_LOCALE_NUMERIC + LC_NUMERIC, +# endif +# ifdef USE_LOCALE_CTYPE + LC_CTYPE, +# endif +# ifdef USE_LOCALE_COLLATE + LC_COLLATE, +# endif +# ifdef USE_LOCALE_TIME + LC_TIME, +# endif +# ifdef USE_LOCALE_MESSAGES + LC_MESSAGES, +# endif +# ifdef USE_LOCALE_MONETARY + LC_MONETARY, +# endif +# ifdef USE_LOCALE_ADDRESS + LC_ADDRESS, +# endif +# ifdef USE_LOCALE_IDENTIFICATION + LC_IDENTIFICATION, +# endif +# ifdef USE_LOCALE_MEASUREMENT + LC_MEASUREMENT, +# endif +# ifdef USE_LOCALE_PAPER + LC_PAPER, +# endif +# ifdef USE_LOCALE_TELEPHONE + LC_TELEPHONE, +# endif +# ifdef LC_ALL + LC_ALL, +# endif + -1 /* Placeholder because C doesn't allow a + trailing comma, and it would get complicated + with all the #ifdef's */ +}; + +/* The top-most real element is LC_ALL */ + +const char * category_names[] = { + +# ifdef USE_LOCALE_NUMERIC + "LC_NUMERIC", +# endif +# ifdef USE_LOCALE_CTYPE + "LC_CTYPE", +# endif +# ifdef USE_LOCALE_COLLATE + "LC_COLLATE", +# endif +# ifdef USE_LOCALE_TIME + "LC_TIME", +# endif +# ifdef USE_LOCALE_MESSAGES + "LC_MESSAGES", +# endif +# ifdef USE_LOCALE_MONETARY + "LC_MONETARY", +# endif +# ifdef USE_LOCALE_ADDRESS + "LC_ADDRESS", +# endif +# ifdef USE_LOCALE_IDENTIFICATION + "LC_IDENTIFICATION", +# endif +# ifdef USE_LOCALE_MEASUREMENT + "LC_MEASUREMENT", +# endif +# ifdef USE_LOCALE_PAPER + "LC_PAPER", +# endif +# ifdef USE_LOCALE_TELEPHONE + "LC_TELEPHONE", +# endif +# ifdef LC_ALL + "LC_ALL", +# endif + NULL /* Placeholder */ + }; + +# ifdef LC_ALL + + /* On systems with LC_ALL, it is kept in the highest index position. (-2 + * to account for the final unused placeholder element.) */ +# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2) + +# else + + /* On systems without LC_ALL, we pretend it is there, one beyond the real + * top element, hence in the unused placeholder element. */ +# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1) + +# endif + +/* Pretending there is an LC_ALL element just above allows us to avoid most + * special cases. Most loops through these arrays in the code below are + * written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work + * on either type of system. But the code must be written to not access the + * element at 'LC_ALL_INDEX' except on platforms that have it. This can be + * checked for at compile time by using the #define LC_ALL_INDEX which is only + * defined if we do have LC_ALL. */ + +STATIC const char * +S_category_name(const int category) +{ + unsigned int i; + +#ifdef LC_ALL + + if (category == LC_ALL) { + return "LC_ALL"; + } + #endif + for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { + if (category == categories[i]) { + return category_names[i]; + } + } + + { + const char suffix[] = " (unknown)"; + int temp = category; + Size_t length = sizeof(suffix) + 1; + char * unknown; + dTHX; + + if (temp < 0) { + length++; + temp = - temp; + } + + /* Calculate the number of digits */ + while (temp >= 10) { + temp /= 10; + length++; + } + + Newx(unknown, length, char); + my_snprintf(unknown, length, "%d%s", category, suffix); + SAVEFREEPV(unknown); + return unknown; + } +} + +/* Now create LC_foo_INDEX #defines for just those categories on this system */ +# ifdef USE_LOCALE_NUMERIC +# define LC_NUMERIC_INDEX 0 +# define _DUMMY_NUMERIC LC_NUMERIC_INDEX +# else +# define _DUMMY_NUMERIC -1 +# endif +# ifdef USE_LOCALE_CTYPE +# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1 +# define _DUMMY_CTYPE LC_CTYPE_INDEX +# else +# define _DUMMY_CTYPE _DUMMY_NUMERIC +# endif +# ifdef USE_LOCALE_COLLATE +# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 +# define _DUMMY_COLLATE LC_COLLATE_INDEX +# else +# define _DUMMY_COLLATE _DUMMY_COLLATE +# endif +# ifdef USE_LOCALE_TIME +# define LC_TIME_INDEX _DUMMY_COLLATE + 1 +# define _DUMMY_TIME LC_TIME_INDEX +# else +# define _DUMMY_TIME _DUMMY_COLLATE +# endif +# ifdef USE_LOCALE_MESSAGES +# define LC_MESSAGES_INDEX _DUMMY_TIME + 1 +# define _DUMMY_MESSAGES LC_MESSAGES_INDEX +# else +# define _DUMMY_MESSAGES _DUMMY_TIME +# endif +# ifdef USE_LOCALE_MONETARY +# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1 +# define _DUMMY_MONETARY LC_MONETARY_INDEX +# else +# define _DUMMY_MONETARY _DUMMY_MESSAGES +# endif +# ifdef USE_LOCALE_ADDRESS +# define LC_ADDRESS_INDEX _DUMMY_MONETARY + 1 +# define _DUMMY_ADDRESS LC_ADDRESS_INDEX +# else +# define _DUMMY_ADDRESS _DUMMY_MONETARY +# endif +# ifdef USE_LOCALE_IDENTIFICATION +# define LC_IDENTIFICATION_INDEX _DUMMY_ADDRESS + 1 +# define _DUMMY_IDENTIFICATION LC_IDENTIFICATION_INDEX +# else +# define _DUMMY_IDENTIFICATION _DUMMY_ADDRESS +# endif +# ifdef USE_LOCALE_MEASUREMENT +# define LC_MEASUREMENT_INDEX _DUMMY_IDENTIFICATION + 1 +# define _DUMMY_MEASUREMENT LC_MEASUREMENT_INDEX +# else +# define _DUMMY_MEASUREMENT _DUMMY_IDENTIFICATION +# endif +# ifdef USE_LOCALE_PAPER +# define LC_PAPER_INDEX _DUMMY_MEASUREMENT + 1 +# define _DUMMY_PAPER LC_PAPER_INDEX +# else +# define _DUMMY_PAPER _DUMMY_MEASUREMENT +# endif +# ifdef USE_LOCALE_TELEPHONE +# define LC_TELEPHONE_INDEX _DUMMY_PAPER + 1 +# define _DUMMY_TELEPHONE LC_TELEPHONE_INDEX +# else +# define _DUMMY_TELEPHONE _DUMMY_PAPER +# endif +# ifdef LC_ALL +# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1 +# endif +#endif /* ifdef USE_LOCALE */ + +/* Windows requres a customized base-level setlocale() */ +# ifdef WIN32 +# define my_setlocale(cat, locale) win32_setlocale(cat, locale) +# else +# define my_setlocale(cat, locale) setlocale(cat, locale) +# endif + +/* Just placeholders for now. "_c" is intended to be called when the category + * is a constant known at compile time; "_r", not known until run time */ +# define do_setlocale_c(category, locale) my_setlocale(category, locale) +# define do_setlocale_r(category, locale) my_setlocale(category, locale) + STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) { @@ -115,9 +392,9 @@ S_set_numeric_radix(pTHX_ const bool use_locale) if (use_locale) { const char * radix = my_nl_langinfo(PERL_RADIXCHAR, FALSE); /* FALSE => already in dest locale */ - /* ... and the character being used isn't a dot */ if (strNE(radix, ".")) { + if (PL_numeric_radix_sv) { sv_setpv(PL_numeric_radix_sv, radix); } @@ -125,10 +402,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) PL_numeric_radix_sv = newSVpv(radix, 0); } - if ( ! is_utf8_invariant_string( - (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) - && is_utf8_string( - (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) + /* If this is valid UTF-8 that isn't totally ASCII, and we are in + * a UTF-8 locale, then mark the radix as being in UTF-8 */ + if (is_utf8_non_invariant_string((U8 *) SvPVX(PL_numeric_radix_sv), + SvCUR(PL_numeric_radix_sv)) && _is_cur_LC_category_utf8(LC_NUMERIC)) { SvUTF8_on(PL_numeric_radix_sv); @@ -159,20 +436,6 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } -/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the - * return of setlocale(), then this is extremely likely to be the C or POSIX - * locale. However, the output of setlocale() is documented to be opaque, but - * the odds are extremely small that it would return these two strings for some - * other locale. Note that VMS in these two locales includes many non-ASCII - * characters as controls and punctuation (below are hex bytes): - * cntrl: 00-1F 7F 84-97 9B-9F - * punct: 21-2F 3A-40 5B-60 7B-7E A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD - * Oddly, none there are listed as alphas, though some represent alphabetics - * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */ -#define isNAME_C_OR_POSIX(name) \ - ( (name) != NULL \ - && (( *(name) == 'C' && (*(name + 1)) == '\0') \ - || strEQ((name), "POSIX"))) void Perl_new_numeric(pTHX_ const char *newnum) @@ -184,13 +447,13 @@ Perl_new_numeric(pTHX_ const char *newnum) #else - /* Called after all libc setlocale() calls affecting LC_NUMERIC, to tell + /* Called after each libc setlocale() call affecting LC_NUMERIC, to tell * core Perl this and that 'newnum' is the name of the new locale. * It installs this locale as the current underlying default. * * The default locale and the C locale can be toggled between by use of the - * set_numeric_local() and set_numeric_standard() functions, which should - * probably not be called directly, but only via macros like + * set_numeric_underlying() and set_numeric_standard() functions, which + * should probably not be called directly, but only via macros like * SET_NUMERIC_STANDARD() in perl.h. * * The toggling is necessary mainly so that a non-dot radix decimal point @@ -199,17 +462,21 @@ Perl_new_numeric(pTHX_ const char *newnum) * * This sets several interpreter-level variables: * PL_numeric_name The underlying locale's name: a copy of 'newnum' - * PL_numeric_local A boolean indicating if the toggled state is such + * PL_numeric_underlying A boolean indicating if the toggled state is such * that the current locale is the program's underlying * locale * PL_numeric_standard An int indicating if the toggled state is such - * that the current locale is the C locale. If non-zero, - * it is in C; if > 1, it means it may not be toggled away + * that the current locale is the C locale or + * indistinguishable from the C locale. If non-zero, it + * is in C; if > 1, it means it may not be toggled away * from C. - * Note that both of the last two variables can be true at the same time, - * if the underlying locale is C. (Toggling is a no-op under these - * circumstances.) - * + * PL_numeric_underlying_is_standard A bool kept by this function + * indicating that the underlying locale and the standard + * C locale are indistinguishable for the purposes of + * LC_NUMERIC. This happens when both of the above two + * variables are true at the same time. (Toggling is a + * no-op under these circumstances.) This variable is + * used to avoid having to recalculate. * Any code changing the locale (outside this file) should use * POSIX::setlocale, which calls this function. Therefore this function * should be called directly only from this file and from @@ -221,15 +488,25 @@ Perl_new_numeric(pTHX_ const char *newnum) Safefree(PL_numeric_name); PL_numeric_name = NULL; PL_numeric_standard = TRUE; - PL_numeric_local = TRUE; + PL_numeric_underlying = TRUE; + PL_numeric_underlying_is_standard = TRUE; return; } save_newnum = stdize_locale(savepv(newnum)); - + PL_numeric_underlying = TRUE; PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); - PL_numeric_local = TRUE; + /* If its name isn't C nor POSIX, it could still be indistinguishable from + * them */ + if (! PL_numeric_standard) { + PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(PERL_RADIXCHAR, + FALSE /* Don't toggle locale */ )) + && strEQ("", my_nl_langinfo(PERL_THOUSEP, + FALSE))); + } + + /* Save the new name if it isn't the same as the previous one, if any */ if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); PL_numeric_name = save_newnum; @@ -238,6 +515,12 @@ Perl_new_numeric(pTHX_ const char *newnum) Safefree(save_newnum); } + PL_numeric_underlying_is_standard = PL_numeric_standard; + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "Called new_numeric with %s, PL_numeric_name=%s\n", newnum, PL_numeric_name); + } + /* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't * have to worry about the radix being a non-dot. (Core operations that * need the underlying locale change to it temporarily). */ @@ -259,16 +542,16 @@ Perl_set_numeric_standard(pTHX) * to our records (which could be wrong if some XS code has changed the * locale behind our back) */ - setlocale(LC_NUMERIC, "C"); + do_setlocale_c(LC_NUMERIC, "C"); PL_numeric_standard = TRUE; - PL_numeric_local = isNAME_C_OR_POSIX(PL_numeric_name); + PL_numeric_underlying = PL_numeric_underlying_is_standard; set_numeric_radix(0); # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "Underlying LC_NUMERIC locale now is C\n"); + "LC_NUMERIC locale now is standard C\n"); } # endif @@ -277,7 +560,7 @@ Perl_set_numeric_standard(pTHX) } void -Perl_set_numeric_local(pTHX) +Perl_set_numeric_underlying(pTHX) { #ifdef USE_LOCALE_NUMERIC @@ -288,16 +571,16 @@ Perl_set_numeric_local(pTHX) * if toggling isn't necessary according to our records (which could be * wrong if some XS code has changed the locale behind our back) */ - setlocale(LC_NUMERIC, PL_numeric_name); - PL_numeric_standard = isNAME_C_OR_POSIX(PL_numeric_name); - PL_numeric_local = TRUE; - set_numeric_radix(1); + do_setlocale_c(LC_NUMERIC, PL_numeric_name); + PL_numeric_standard = PL_numeric_underlying_is_standard; + PL_numeric_underlying = TRUE; + set_numeric_radix(! PL_numeric_standard); # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "Underlying LC_NUMERIC locale now is %s\n", + "LC_NUMERIC locale now is %s\n", PL_numeric_name); } @@ -321,7 +604,7 @@ S_new_ctype(pTHX_ const char *newctype) #else - /* Called after all libc setlocale() calls affecting LC_CTYPE, to tell + /* Called after each libc setlocale() call affecting LC_CTYPE, to tell * core Perl this and that 'newctype' is the name of the new locale. * * This function sets up the folding arrays for all 256 bytes, assuming @@ -366,10 +649,10 @@ S_new_ctype(pTHX_ const char *newctype) unsigned int bad_count = 0; /* Count of bad characters */ for (i = 0; i < 256; i++) { - if (isUPPER_LC((U8) i)) - PL_fold_locale[i] = (U8) toLOWER_LC((U8) i); - else if (isLOWER_LC((U8) i)) - PL_fold_locale[i] = (U8) toUPPER_LC((U8) i); + if (isupper(i)) + PL_fold_locale[i] = (U8) tolower(i); + else if (islower(i)) + PL_fold_locale[i] = (U8) toupper(i); else PL_fold_locale[i] = (U8) i; @@ -385,10 +668,19 @@ S_new_ctype(pTHX_ const char *newctype) if ( check_for_problems && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { - if (( isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i)) - || (isPUNCT_A(i) && ! isPUNCT_LC(i)) - || (isBLANK_A(i) && ! isBLANK_LC(i)) - || (i == '\n' && ! isCNTRL_LC(i))) + if ( cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC(i)) + || cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i)) + || cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i)) + || cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i)) + || cBOOL(islower(i)) != cBOOL(isLOWER_A(i)) + || cBOOL(isprint(i)) != cBOOL(isPRINT_A(i)) + || cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i)) + || cBOOL(isspace(i)) != cBOOL(isSPACE_A(i)) + || cBOOL(isupper(i)) != cBOOL(isUPPER_A(i)) + || cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i)) + || tolower(i) != (int) toLOWER_A(i) + || toupper(i) != (int) toUPPER_A(i) + || (i == '\n' && ! isCNTRL_LC(i))) { if (bad_count) { /* Separate multiple entries with a blank */ @@ -468,12 +760,12 @@ S_new_ctype(pTHX_ const char *newctype) * here is transparent to this function's caller */ const char * const badlocale = savepv(newctype); - setlocale(LC_CTYPE, "C"); + do_setlocale_c(LC_CTYPE, "C"); /* The '0' below suppresses a bogus gcc compiler warning */ Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0); - setlocale(LC_CTYPE, badlocale); + do_setlocale_c(LC_CTYPE, badlocale); Safefree(badlocale); if (IN_LC(LC_CTYPE)) { @@ -501,11 +793,9 @@ Perl__warn_problematic_locale() * _CHECK_AND_WARN_PROBLEMATIC_LOCALE */ if (PL_warn_locale) { - /*GCC_DIAG_IGNORE(-Wformat-security); Didn't work */ Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0 /* dummy to avoid compiler warning */ ); - /* GCC_DIAG_RESTORE; */ SvREFCNT_dec_NN(PL_warn_locale); PL_warn_locale = NULL; } @@ -525,7 +815,7 @@ S_new_collate(pTHX_ const char *newcoll) #else - /* Called after all libc setlocale() calls affecting LC_COLLATE, to tell + /* Called after each libc setlocale() call affecting LC_COLLATE, to tell * core Perl this and that 'newcoll' is the name of the new locale. * * The design of locale collation is that every locale change is given an @@ -736,14 +1026,10 @@ S_new_collate(pTHX_ const char *newcoll) } -#ifndef WIN32 /* No wrapper except on Windows */ - -# define my_setlocale(a,b) setlocale(a,b) - -#else /* WIN32 */ +#ifdef WIN32 STATIC char * -S_my_setlocale(pTHX_ int category, const char* locale) +S_win32_setlocale(pTHX_ int category, const char* locale) { /* This, for Windows, emulates POSIX setlocale() behavior. There is no * difference between the two unless the input locale is "", which normally @@ -761,6 +1047,7 @@ S_my_setlocale(pTHX_ int category, const char* locale) bool override_LC_ALL = FALSE; char * result; + unsigned int i; if (locale && strEQ(locale, "")) { @@ -768,73 +1055,30 @@ S_my_setlocale(pTHX_ int category, const char* locale) locale = PerlEnv_getenv("LC_ALL"); if (! locale) { + if (category == LC_ALL) { + override_LC_ALL = TRUE; + } + else { # endif - switch (category) { - -# ifdef LC_ALL - case LC_ALL: - override_LC_ALL = TRUE; - break; /* We already know its variable isn't set */ - -# endif -# ifdef USE_LOCALE_TIME - - case LC_TIME: - locale = PerlEnv_getenv("LC_TIME"); - break; - -# endif -# ifdef USE_LOCALE_CTYPE - - case LC_CTYPE: - locale = PerlEnv_getenv("LC_CTYPE"); - break; - -# endif -# ifdef USE_LOCALE_COLLATE - - case LC_COLLATE: - locale = PerlEnv_getenv("LC_COLLATE"); - break; - -# endif -# ifdef USE_LOCALE_MONETARY - - case LC_MONETARY: - locale = PerlEnv_getenv("LC_MONETARY"); - break; - -# endif -# ifdef USE_LOCALE_NUMERIC - - case LC_NUMERIC: - locale = PerlEnv_getenv("LC_NUMERIC"); - break; - -# endif -# ifdef USE_LOCALE_MESSAGES - case LC_MESSAGES: - locale = PerlEnv_getenv("LC_MESSAGES"); - break; - -# endif + for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { + if (category == categories[i]) { + locale = PerlEnv_getenv(category_names[i]); + goto found_locale; + } + } - default: - /* This is a category, like PAPER_SIZE that we don't - * know about; and so can't provide a wrapper. */ - break; - } - if (! locale) { locale = PerlEnv_getenv("LANG"); if (! locale) { locale = ""; } - } + + found_locale: ; # ifdef LC_ALL + } } # endif @@ -855,73 +1099,16 @@ S_my_setlocale(pTHX_ int category, const char* locale) * one that is set. (If they are set to "", it means to use the same thing * we just set LC_ALL to, so can skip) */ -# ifdef USE_LOCALE_TIME - - result = PerlEnv_getenv("LC_TIME"); - if (result && strNE(result, "")) { - setlocale(LC_TIME, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_TIME, result, "not captured"))); - } - -# endif -# ifdef USE_LOCALE_CTYPE - - result = PerlEnv_getenv("LC_CTYPE"); - if (result && strNE(result, "")) { - setlocale(LC_CTYPE, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_CTYPE, result, "not captured"))); - } - -# endif -# ifdef USE_LOCALE_COLLATE - - result = PerlEnv_getenv("LC_COLLATE"); - if (result && strNE(result, "")) { - setlocale(LC_COLLATE, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_COLLATE, result, "not captured"))); - } - -# endif -# ifdef USE_LOCALE_MONETARY - - result = PerlEnv_getenv("LC_MONETARY"); - if (result && strNE(result, "")) { - setlocale(LC_MONETARY, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_MONETARY, result, "not captured"))); - } - -# endif -# ifdef USE_LOCALE_NUMERIC - - result = PerlEnv_getenv("LC_NUMERIC"); - if (result && strNE(result, "")) { - setlocale(LC_NUMERIC, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_NUMERIC, result, "not captured"))); - } - -# endif -# ifdef USE_LOCALE_MESSAGES - - result = PerlEnv_getenv("LC_MESSAGES"); - if (result && strNE(result, "")) { - setlocale(LC_MESSAGES, result); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", - __FILE__, __LINE__, - setlocale_debug_string(LC_MESSAGES, result, "not captured"))); + for (i = 0; i < LC_ALL_INDEX; i++) { + result = PerlEnv_getenv(category_names[i]); + if (result && strNE(result, "")) { + setlocale(categories[i], result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + setlocale_debug_string(categories[i], result, "not captured"))); + } } -# endif - result = setlocale(LC_ALL, NULL); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, @@ -938,15 +1125,15 @@ Perl_setlocale(int category, const char * locale) /* This wraps POSIX::setlocale() */ char * retval; + char * newlocale; dTHX; #ifdef USE_LOCALE_NUMERIC - /* A NULL locale means only query what the current one is. We - * have the LC_NUMERIC name saved, because we are normally switched - * into the C locale for it. Switch back so an LC_ALL query will yield - * the correct results; all other categories don't require special - * handling */ + /* A NULL locale means only query what the current one is. We have the + * LC_NUMERIC name saved, because we are normally switched into the C + * locale for it. For an LC_ALL query, switch back to get the correct + * results. All other categories don't require special handling */ if (locale == NULL) { if (category == LC_NUMERIC) { return savepv(PL_numeric_name); @@ -954,7 +1141,8 @@ Perl_setlocale(int category, const char * locale) # ifdef LC_ALL - else if (category == LC_ALL) { + else if (category == LC_ALL && ! PL_numeric_underlying) { + SET_NUMERIC_UNDERLYING(); } @@ -964,7 +1152,8 @@ Perl_setlocale(int category, const char * locale) #endif - retval = my_setlocale(category, locale); + /* Save retval since subsequent setlocale() calls may overwrite it. */ + retval = savepv(do_setlocale_r(category, locale)); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, @@ -979,112 +1168,69 @@ Perl_setlocale(int category, const char * locale) return NULL; } - /* Save retval since subsequent setlocale() calls may overwrite it. */ - retval = savepv(retval); - /* If locale == NULL, we are just querying the state, but may have switched * to NUMERIC_UNDERLYING. Switch back before returning. */ if (locale == NULL) { SET_NUMERIC_STANDARD(); return retval; } - else { /* Now that have switched locales, we have to update our records to - correspond */ - -#ifdef USE_LOCALE_CTYPE - - if ( category == LC_CTYPE - -# ifdef LC_ALL - || category == LC_ALL + /* Now that have switched locales, we have to update our records to + * correspond. */ -# endif - - ) - { - char *newctype; - -# ifdef LC_ALL - - if (category == LC_ALL) { - newctype = setlocale(LC_CTYPE, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_CTYPE, NULL, newctype))); - } - else + switch (category) { -# endif +#ifdef USE_LOCALE_CTYPE - newctype = retval; - new_ctype(newctype); - } + case LC_CTYPE: + new_ctype(retval); + break; -#endif /* USE_LOCALE_CTYPE */ +#endif #ifdef USE_LOCALE_COLLATE - if ( category == LC_COLLATE - -# ifdef LC_ALL - - || category == LC_ALL - -# endif - - ) - { - char *newcoll; - -# ifdef LC_ALL - - if (category == LC_ALL) { - newcoll = setlocale(LC_COLLATE, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_COLLATE, NULL, newcoll))); - } - else - -# endif - - newcoll = retval; - new_collate(newcoll); - } + case LC_COLLATE: + new_collate(retval); + break; -#endif /* USE_LOCALE_COLLATE */ +#endif #ifdef USE_LOCALE_NUMERIC - if ( category == LC_NUMERIC + case LC_NUMERIC: + new_numeric(retval); + break; -# ifdef LC_ALL +#endif +#ifdef LC_ALL - || category == LC_ALL + case LC_ALL: -# endif + /* LC_ALL updates all the things we care about. The values may not + * be the same as 'retval', as the locale "" may have set things + * individually */ - ) - { - char *newnum; +# ifdef USE_LOCALE_CTYPE -# ifdef LC_ALL + newlocale = do_setlocale_c(LC_CTYPE, NULL); + new_ctype(newlocale); - if (category == LC_ALL) { - newnum = setlocale(LC_NUMERIC, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_NUMERIC, NULL, newnum))); - } - else +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + + newlocale = do_setlocale_c(LC_COLLATE, NULL); + new_collate(newlocale); # endif +# ifdef USE_LOCALE_NUMERIC - newnum = retval; - new_numeric(newnum); - } + newlocale = do_setlocale_c(LC_NUMERIC, NULL); + new_numeric(newlocale); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ +#endif /* LC_ALL */ + default: + break; } return retval; @@ -1169,11 +1315,15 @@ Unimplemented, so returns C<"">. =item C +=item C + =item C -Only the values for English are returned. Earlier POSIX standards also -specified C and C, but these have been removed from POSIX 2008, -and aren't supported by C. +=item C + +Only the values for English are returned. C and C have been +removed from POSIX 2008, and are retained for backwards compatibility. Your +platform's C may not support them. =item C @@ -1276,6 +1426,14 @@ S_my_nl_langinfo(const int item, bool toggle) { dTHX; + /* We only need to toggle into the underlying LC_NUMERIC locale for these + * two items, and only if not already there */ + if (toggle && (( item != PERL_RADIXCHAR && item != PERL_THOUSEP) + || PL_numeric_underlying)) + { + toggle = FALSE; + } + #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ #if ! defined(HAS_POSIX_2008_LOCALE) @@ -1284,403 +1442,419 @@ S_my_nl_langinfo(const int item, bool toggle) * switching back, as some systems destroy the buffer when setlocale() is * called */ - LOCALE_LOCK; + { + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; - if (toggle) { - if (item == PERL_RADIXCHAR || item == PERL_THOUSEP) { - setlocale(LC_NUMERIC, PL_numeric_name); - } - else { - toggle = FALSE; + if (toggle) { + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); } - } - save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + LOCALE_LOCK; /* Prevent interference from another thread executing + this code section (the only call to nl_langinfo in + the core) */ - if (toggle) { - setlocale(LC_NUMERIC, "C"); - } + save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); - LOCALE_UNLOCK; + LOCALE_UNLOCK; - return PL_langinfo_buf; + if (toggle) { + RESTORE_LC_NUMERIC(); + } + } # else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */ - bool do_free = FALSE; - locale_t cur = uselocale((locale_t) 0); + { + bool do_free = FALSE; + locale_t cur = uselocale((locale_t) 0); - if (cur == LC_GLOBAL_LOCALE) { - cur = duplocale(LC_GLOBAL_LOCALE); - do_free = TRUE; - } + if (cur == LC_GLOBAL_LOCALE) { + cur = duplocale(LC_GLOBAL_LOCALE); + do_free = TRUE; + } - if ( toggle - && (item == PERL_RADIXCHAR || item == PERL_THOUSEP)) - { - cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur); - do_free = TRUE; + if (toggle) { + cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur); + do_free = TRUE; + } + + save_to_buffer(nl_langinfo_l(item, cur), + &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + if (do_free) { + freelocale(cur); + } } - save_to_buffer(nl_langinfo_l(item, cur), - &PL_langinfo_buf, &PL_langinfo_bufsize, 0); - if (do_free) { - freelocale(cur); +# endif + + if (strEQ(PL_langinfo_buf, "")) { + if (item == PERL_YESSTR) { + return "yes"; + } + if (item == PERL_NOSTR) { + return "no"; + } } return PL_langinfo_buf; -# endif #else /* Below, emulate nl_langinfo as best we can */ + + { + # ifdef HAS_LOCALECONV - const struct lconv* lc; + const struct lconv* lc; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; # endif # ifdef HAS_STRFTIME - struct tm tm; - bool return_format = FALSE; /* Return the %format, not the value */ - const char * format; + struct tm tm; + bool return_format = FALSE; /* Return the %format, not the value */ + const char * format; # endif - /* We copy the results to a per-thread buffer, even if not multi-threaded. - * This is in part to simplify this code, and partly because we need a - * buffer anyway for strftime(), and partly because a call of localeconv() - * could otherwise wipe out the buffer, and the programmer would not be - * expecting this, as this is a nl_langinfo() substitute after all, so s/he - * might be thinking their localeconv() is safe until another localeconv() - * call. */ + /* We copy the results to a per-thread buffer, even if not + * multi-threaded. This is in part to simplify this code, and partly + * because we need a buffer anyway for strftime(), and partly because a + * call of localeconv() could otherwise wipe out the buffer, and the + * programmer would not be expecting this, as this is a nl_langinfo() + * substitute after all, so s/he might be thinking their localeconv() + * is safe until another localeconv() call. */ - switch (item) { - Size_t len; - const char * retval; + switch (item) { + Size_t len; + const char * retval; - /* These 2 are unimplemented */ - case PERL_CODESET: - case PERL_ERA: /* For use with strftime() %E modifier */ + /* These 2 are unimplemented */ + case PERL_CODESET: + case PERL_ERA: /* For use with strftime() %E modifier */ - default: - return ""; + default: + return ""; - /* We use only an English set, since we don't know any more */ - case PERL_YESEXPR: return "^[+1yY]"; - case PERL_NOEXPR: return "^[-0nN]"; + /* We use only an English set, since we don't know any more */ + case PERL_YESEXPR: return "^[+1yY]"; + case PERL_YESSTR: return "yes"; + case PERL_NOEXPR: return "^[-0nN]"; + case PERL_NOSTR: return "no"; # ifdef HAS_LOCALECONV - case PERL_CRNCYSTR: + case PERL_CRNCYSTR: - LOCALE_LOCK; + /* We don't bother with localeconv_l() because any system that + * has it is likely to also have nl_langinfo() */ - lc = localeconv(); - if (! lc || ! lc->currency_symbol || strEQ("", lc->currency_symbol)) - { - LOCALE_UNLOCK; - return ""; - } + LOCALE_LOCK; /* Prevent interference with other threads + using localeconv() */ - /* Leave the first spot empty to be filled in below */ - save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, - &PL_langinfo_bufsize, 1); - if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) - { /* khw couldn't figure out how the localedef specifications - would show that the $ should replace the radix; this is - just a guess as to how it might work.*/ - *PL_langinfo_buf = '.'; - } - else if (lc->p_cs_precedes) { - *PL_langinfo_buf = '-'; - } - else { - *PL_langinfo_buf = '+'; - } + lc = localeconv(); + if ( ! lc + || ! lc->currency_symbol + || strEQ("", lc->currency_symbol)) + { + LOCALE_UNLOCK; + return ""; + } - LOCALE_UNLOCK; - break; + /* Leave the first spot empty to be filled in below */ + save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, + &PL_langinfo_bufsize, 1); + if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) + { /* khw couldn't figure out how the localedef specifications + would show that the $ should replace the radix; this is + just a guess as to how it might work.*/ + *PL_langinfo_buf = '.'; + } + else if (lc->p_cs_precedes) { + *PL_langinfo_buf = '-'; + } + else { + *PL_langinfo_buf = '+'; + } - case PERL_RADIXCHAR: - case PERL_THOUSEP: + LOCALE_UNLOCK; + break; - LOCALE_LOCK; + case PERL_RADIXCHAR: + case PERL_THOUSEP: - if (toggle) { - setlocale(LC_NUMERIC, PL_numeric_name); - } + if (toggle) { + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + } - lc = localeconv(); - if (! lc) { - retval = ""; - } - else switch (item) { - case PERL_RADIXCHAR: - if (! lc->decimal_point) { - retval = ""; - } - else { - retval = lc->decimal_point; - } - break; + LOCALE_LOCK; /* Prevent interference with other threads + using localeconv() */ - case PERL_THOUSEP: - if (! lc->thousands_sep || strEQ("", lc->thousands_sep)) { + lc = localeconv(); + if (! lc) { + retval = ""; + } + else { + retval = (item == PERL_RADIXCHAR) + ? lc->decimal_point + : lc->thousands_sep; + if (! retval) { retval = ""; } - else { - retval = lc->thousands_sep; - } - break; - - default: - LOCALE_UNLOCK; - Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", - __FILE__, __LINE__, item); - } + } - save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + save_to_buffer(retval, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); - if (toggle) { - setlocale(LC_NUMERIC, "C"); - } + LOCALE_UNLOCK; - LOCALE_UNLOCK; + if (toggle) { + RESTORE_LC_NUMERIC(); + } - break; + break; # endif # ifdef HAS_STRFTIME - /* These are defined by C89, so we assume that strftime supports them, - * and so are returned unconditionally; they may not be what the locale - * actually says, but should give good enough results for someone using - * them as formats (as opposed to trying to parse them to figure out - * what the locale says). The other format items are actually tested to - * verify they work on the platform */ - case PERL_D_FMT: return "%x"; - case PERL_T_FMT: return "%X"; - case PERL_D_T_FMT: return "%c"; - - /* These formats are only available in later strfmtime's */ - case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: - case PERL_T_FMT_AMPM: - - /* The rest can be gotten from most versions of strftime(). */ - case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: - case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: - case PERL_ABDAY_7: - case PERL_ALT_DIGITS: - case PERL_AM_STR: case PERL_PM_STR: - case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: - case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: - case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: - case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: - case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: - case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: - case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: - case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: - case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: case PERL_MON_12: - - LOCALE_LOCK; - - init_tm(&tm); /* Precaution against core dumps */ - tm.tm_sec = 30; - tm.tm_min = 30; - tm.tm_hour = 6; - tm.tm_year = 2017 - 1900; - tm.tm_wday = 0; - tm.tm_mon = 0; - switch (item) { - default: - LOCALE_UNLOCK; - Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", - __FILE__, __LINE__, item); - NOT_REACHED; /* NOTREACHED */ - - case PERL_PM_STR: tm.tm_hour = 18; - case PERL_AM_STR: - format = "%p"; - break; - - case PERL_ABDAY_7: tm.tm_wday++; - case PERL_ABDAY_6: tm.tm_wday++; - case PERL_ABDAY_5: tm.tm_wday++; - case PERL_ABDAY_4: tm.tm_wday++; - case PERL_ABDAY_3: tm.tm_wday++; - case PERL_ABDAY_2: tm.tm_wday++; - case PERL_ABDAY_1: - format = "%a"; - break; - - case PERL_DAY_7: tm.tm_wday++; - case PERL_DAY_6: tm.tm_wday++; - case PERL_DAY_5: tm.tm_wday++; - case PERL_DAY_4: tm.tm_wday++; - case PERL_DAY_3: tm.tm_wday++; - case PERL_DAY_2: tm.tm_wday++; - case PERL_DAY_1: - format = "%A"; - break; - - case PERL_ABMON_12: tm.tm_mon++; - case PERL_ABMON_11: tm.tm_mon++; - case PERL_ABMON_10: tm.tm_mon++; - case PERL_ABMON_9: tm.tm_mon++; - case PERL_ABMON_8: tm.tm_mon++; - case PERL_ABMON_7: tm.tm_mon++; - case PERL_ABMON_6: tm.tm_mon++; - case PERL_ABMON_5: tm.tm_mon++; - case PERL_ABMON_4: tm.tm_mon++; - case PERL_ABMON_3: tm.tm_mon++; - case PERL_ABMON_2: tm.tm_mon++; - case PERL_ABMON_1: - format = "%b"; - break; - - case PERL_MON_12: tm.tm_mon++; - case PERL_MON_11: tm.tm_mon++; - case PERL_MON_10: tm.tm_mon++; - case PERL_MON_9: tm.tm_mon++; - case PERL_MON_8: tm.tm_mon++; - case PERL_MON_7: tm.tm_mon++; - case PERL_MON_6: tm.tm_mon++; - case PERL_MON_5: tm.tm_mon++; - case PERL_MON_4: tm.tm_mon++; - case PERL_MON_3: tm.tm_mon++; - case PERL_MON_2: tm.tm_mon++; - case PERL_MON_1: - format = "%B"; - break; + /* These are defined by C89, so we assume that strftime supports + * them, and so are returned unconditionally; they may not be what + * the locale actually says, but should give good enough results + * for someone using them as formats (as opposed to trying to parse + * them to figure out what the locale says). The other format + * items are actually tested to verify they work on the platform */ + case PERL_D_FMT: return "%x"; + case PERL_T_FMT: return "%X"; + case PERL_D_T_FMT: return "%c"; + + /* These formats are only available in later strfmtime's */ + case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: + case PERL_T_FMT_AMPM: + + /* The rest can be gotten from most versions of strftime(). */ + case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: + case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: + case PERL_ABDAY_7: + case PERL_ALT_DIGITS: + case PERL_AM_STR: case PERL_PM_STR: + case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: + case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: + case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: + case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: + case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: + case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: + case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: + case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: + case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: + case PERL_MON_12: + + LOCALE_LOCK; + + init_tm(&tm); /* Precaution against core dumps */ + tm.tm_sec = 30; + tm.tm_min = 30; + tm.tm_hour = 6; + tm.tm_year = 2017 - 1900; + tm.tm_wday = 0; + tm.tm_mon = 0; + switch (item) { + default: + LOCALE_UNLOCK; + Perl_croak(aTHX_ + "panic: %s: %d: switch case: %d problem", + __FILE__, __LINE__, item); + NOT_REACHED; /* NOTREACHED */ + + case PERL_PM_STR: tm.tm_hour = 18; + case PERL_AM_STR: + format = "%p"; + break; + + case PERL_ABDAY_7: tm.tm_wday++; + case PERL_ABDAY_6: tm.tm_wday++; + case PERL_ABDAY_5: tm.tm_wday++; + case PERL_ABDAY_4: tm.tm_wday++; + case PERL_ABDAY_3: tm.tm_wday++; + case PERL_ABDAY_2: tm.tm_wday++; + case PERL_ABDAY_1: + format = "%a"; + break; + + case PERL_DAY_7: tm.tm_wday++; + case PERL_DAY_6: tm.tm_wday++; + case PERL_DAY_5: tm.tm_wday++; + case PERL_DAY_4: tm.tm_wday++; + case PERL_DAY_3: tm.tm_wday++; + case PERL_DAY_2: tm.tm_wday++; + case PERL_DAY_1: + format = "%A"; + break; + + case PERL_ABMON_12: tm.tm_mon++; + case PERL_ABMON_11: tm.tm_mon++; + case PERL_ABMON_10: tm.tm_mon++; + case PERL_ABMON_9: tm.tm_mon++; + case PERL_ABMON_8: tm.tm_mon++; + case PERL_ABMON_7: tm.tm_mon++; + case PERL_ABMON_6: tm.tm_mon++; + case PERL_ABMON_5: tm.tm_mon++; + case PERL_ABMON_4: tm.tm_mon++; + case PERL_ABMON_3: tm.tm_mon++; + case PERL_ABMON_2: tm.tm_mon++; + case PERL_ABMON_1: + format = "%b"; + break; + + case PERL_MON_12: tm.tm_mon++; + case PERL_MON_11: tm.tm_mon++; + case PERL_MON_10: tm.tm_mon++; + case PERL_MON_9: tm.tm_mon++; + case PERL_MON_8: tm.tm_mon++; + case PERL_MON_7: tm.tm_mon++; + case PERL_MON_6: tm.tm_mon++; + case PERL_MON_5: tm.tm_mon++; + case PERL_MON_4: tm.tm_mon++; + case PERL_MON_3: tm.tm_mon++; + case PERL_MON_2: tm.tm_mon++; + case PERL_MON_1: + format = "%B"; + break; + + case PERL_T_FMT_AMPM: + format = "%r"; + return_format = TRUE; + break; + + case PERL_ERA_D_FMT: + format = "%Ex"; + return_format = TRUE; + break; + + case PERL_ERA_T_FMT: + format = "%EX"; + return_format = TRUE; + break; + + case PERL_ERA_D_T_FMT: + format = "%Ec"; + return_format = TRUE; + break; + + case PERL_ALT_DIGITS: + tm.tm_wday = 0; + format = "%Ow"; /* Find the alternate digit for 0 */ + break; + } - case PERL_T_FMT_AMPM: - format = "%r"; - return_format = TRUE; - break; + /* We can't use my_strftime() because it doesn't look at + * tm_wday */ + while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize, + format, &tm)) + { + /* A zero return means one of: + * a) there wasn't enough space in PL_langinfo_buf + * b) the format, like a plain %p, returns empty + * c) it was an illegal format, though some + * implementations of strftime will just return the + * illegal format as a plain character sequence. + * + * To quickly test for case 'b)', try again but precede + * the format with a plain character. If that result is + * still empty, the problem is either 'a)' or 'c)' */ + + Size_t format_size = strlen(format) + 1; + Size_t mod_size = format_size + 1; + char * mod_format; + char * temp_result; + + Newx(mod_format, mod_size, char); + Newx(temp_result, PL_langinfo_bufsize, char); + *mod_format = ' '; + my_strlcpy(mod_format + 1, format, mod_size); + len = strftime(temp_result, + PL_langinfo_bufsize, + mod_format, &tm); + Safefree(mod_format); + Safefree(temp_result); + + /* If 'len' is non-zero, it means that we had a case like + * %p which means the current locale doesn't use a.m. or + * p.m., and that is valid */ + if (len == 0) { + + /* Here, still didn't work. If we get well beyond a + * reasonable size, bail out to prevent an infinite + * loop. */ + + if (PL_langinfo_bufsize > 100 * format_size) { + *PL_langinfo_buf = '\0'; + } + else { + /* Double the buffer size to retry; Add 1 in case + * original was 0, so we aren't stuck at 0. */ + PL_langinfo_bufsize *= 2; + PL_langinfo_bufsize++; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + continue; + } + } - case PERL_ERA_D_FMT: - format = "%Ex"; - return_format = TRUE; break; + } - case PERL_ERA_T_FMT: - format = "%EX"; - return_format = TRUE; - break; + /* Here, we got a result. + * + * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the + * alternate format for wday 0. If the value is the same as + * the normal 0, there isn't an alternate, so clear the buffer. + * */ + if ( item == PERL_ALT_DIGITS + && strEQ(PL_langinfo_buf, "0")) + { + *PL_langinfo_buf = '\0'; + } - case PERL_ERA_D_T_FMT: - format = "%Ec"; - return_format = TRUE; - break; + /* ALT_DIGITS is problematic. Experiments on it showed that + * strftime() did not always work properly when going from + * alt-9 to alt-10. Only a few locales have this item defined, + * and in all of them on Linux that khw was able to find, + * nl_langinfo() merely returned the alt-0 character, possibly + * doubled. Most Unicode digits are in blocks of 10 + * consecutive code points, so that is sufficient information + * for those scripts, as we can infer alt-1, alt-2, .... But + * for a Japanese locale, a CJK ideographic 0 is returned, and + * the CJK digits are not in code point order, so you can't + * really infer anything. The localedef for this locale did + * specify the succeeding digits, so that strftime() works + * properly on them, without needing to infer anything. But + * the nl_langinfo() return did not give sufficient information + * for the caller to understand what's going on. So until + * there is evidence that it should work differently, this + * returns the alt-0 string for ALT_DIGITS. + * + * wday was chosen because its range is all a single digit. + * Things like tm_sec have two digits as the minimum: '00' */ - case PERL_ALT_DIGITS: - tm.tm_wday = 0; - format = "%Ow"; /* Find the alternate digit for 0 */ - break; - } + LOCALE_UNLOCK; - /* We can't use my_strftime() because it doesn't look at tm_wday */ - while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize, - format, &tm)) - { - /* A zero return means one of: - * a) there wasn't enough space in PL_langinfo_buf - * b) the format, like a plain %p, returns empty - * c) it was an illegal format, though some implementations of - * strftime will just return the illegal format as a plain - * character sequence. - * - * To quickly test for case 'b)', try again but precede the - * format with a plain character. If that result is still - * empty, the problem is either 'a)' or 'c)' */ - - Size_t format_size = strlen(format) + 1; - Size_t mod_size = format_size + 1; - char * mod_format; - char * temp_result; - - Newx(mod_format, mod_size, char); - Newx(temp_result, PL_langinfo_bufsize, char); - *mod_format = '\a'; - my_strlcpy(mod_format + 1, format, mod_size); - len = strftime(temp_result, - PL_langinfo_bufsize, - mod_format, &tm); - Safefree(mod_format); - Safefree(temp_result); - - /* If 'len' is non-zero, it means that we had a case like %p - * which means the current locale doesn't use a.m. or p.m., and - * that is valid */ - if (len == 0) { - - /* Here, still didn't work. If we get well beyond a - * reasonable size, bail out to prevent an infinite loop. */ - - if (PL_langinfo_bufsize > 100 * format_size) { + /* If to return the format, not the value, overwrite the buffer + * with it. But some strftime()s will keep the original format + * if illegal, so change those to "" */ + if (return_format) { + if (strEQ(PL_langinfo_buf, format)) { *PL_langinfo_buf = '\0'; } - else { /* Double the buffer size to retry; Add 1 in case - original was 0, so we aren't stuck at 0. */ - PL_langinfo_bufsize *= 2; - PL_langinfo_bufsize++; - Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); - continue; + else { + save_to_buffer(format, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); } } break; - } - - /* Here, we got a result. - * - * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the - * alternate format for wday 0. If the value is the same as the - * normal 0, there isn't an alternate, so clear the buffer. */ - if ( item == PERL_ALT_DIGITS - && strEQ(PL_langinfo_buf, "0")) - { - *PL_langinfo_buf = '\0'; - } - - /* ALT_DIGITS is problematic. Experiments on it showed that - * strftime() did not always work properly when going from alt-9 to - * alt-10. Only a few locales have this item defined, and in all - * of them on Linux that khw was able to find, nl_langinfo() merely - * returned the alt-0 character, possibly doubled. Most Unicode - * digits are in blocks of 10 consecutive code points, so that is - * sufficient information for those scripts, as we can infer alt-1, - * alt-2, .... But for a Japanese locale, a CJK ideographic 0 is - * returned, and the CJK digits are not in code point order, so you - * can't really infer anything. The localedef for this locale did - * specify the succeeding digits, so that strftime() works properly - * on them, without needing to infer anything. But the - * nl_langinfo() return did not give sufficient information for the - * caller to understand what's going on. So until there is - * evidence that it should work differently, this returns the alt-0 - * string for ALT_DIGITS. - * - * wday was chosen because its range is all a single digit. Things - * like tm_sec have two digits as the minimum: '00' */ - - LOCALE_UNLOCK; - - /* If to return the format, not the value, overwrite the buffer - * with it. But some strftime()s will keep the original format if - * illegal, so change those to "" */ - if (return_format) { - if (strEQ(PL_langinfo_buf, format)) { - *PL_langinfo_buf = '\0'; - } - else { - save_to_buffer(format, &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); - } - } - - break; # endif + } } return PL_langinfo_buf; @@ -1749,22 +1923,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PERL_UNUSED_ARG(printwarn); -#else /* USE_LOCALE */ -# ifdef USE_LOCALE_CTYPE - - char *curctype = NULL; - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - - char *curcoll = NULL; - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - char *curnum = NULL; - -# endif /* USE_LOCALE_NUMERIC */ +#else /* USE_LOCALE */ # ifdef __GLIBC__ const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); @@ -1786,15 +1945,19 @@ Perl_init_i18nl10n(pTHX_ int printwarn) const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG"); const bool locwarn = (printwarn > 1 - || (printwarn - && (! bad_lang_use_once + || ( printwarn + && ( ! bad_lang_use_once || ( - /* disallow with "" or "0" */ - *bad_lang_use_once - && strNE("0", bad_lang_use_once))))); - bool done = FALSE; - char * sl_result; /* return from setlocale() */ - char * locale_param; + /* disallow with "" or "0" */ + *bad_lang_use_once + && strNE("0", bad_lang_use_once))))); + + /* setlocale() return vals; not copied so must be looked at immediately */ + const char * sl_result[NOMINAL_LC_ALL_INDEX + 1]; + + /* current locale for given category; should have been copied so aren't + * volatile */ + const char * curlocales[NOMINAL_LC_ALL_INDEX + 1]; # ifdef WIN32 @@ -1807,7 +1970,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) const char *system_default_locale = NULL; # endif -# ifdef DEBUGGING + +# ifndef DEBUGGING +# define DEBUG_LOCALE_INIT(a,b,c) +# else DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT"))); @@ -1823,102 +1989,101 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } \ } STMT_END -# else -# define DEBUG_LOCALE_INIT(a,b,c) -# endif - -# ifndef LOCALE_ENVIRON_REQUIRED +/* Make sure the parallel arrays are properly set up */ +# ifdef USE_LOCALE_NUMERIC + assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC); + assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC")); +# endif +# ifdef USE_LOCALE_CTYPE + assert(categories[LC_CTYPE_INDEX] == LC_CTYPE); + assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE")); +# endif +# ifdef USE_LOCALE_COLLATE + assert(categories[LC_COLLATE_INDEX] == LC_COLLATE); + assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE")); +# endif +# ifdef USE_LOCALE_TIME + assert(categories[LC_TIME_INDEX] == LC_TIME); + assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME")); +# endif +# ifdef USE_LOCALE_MESSAGES + assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES); + assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES")); +# endif +# ifdef USE_LOCALE_MONETARY + assert(categories[LC_MONETARY_INDEX] == LC_MONETARY); + assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY")); +# endif +# ifdef USE_LOCALE_ADDRESS + assert(categories[LC_ADDRESS_INDEX] == LC_ADDRESS); + assert(strEQ(category_names[LC_ADDRESS_INDEX], "LC_ADDRESS")); +# endif +# ifdef USE_LOCALE_IDENTIFICATION + assert(categories[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION); + assert(strEQ(category_names[LC_IDENTIFICATION_INDEX], "LC_IDENTIFICATION")); +# endif +# ifdef USE_LOCALE_MEASUREMENT + assert(categories[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT); + assert(strEQ(category_names[LC_MEASUREMENT_INDEX], "LC_MEASUREMENT")); +# endif +# ifdef USE_LOCALE_PAPER + assert(categories[LC_PAPER_INDEX] == LC_PAPER); + assert(strEQ(category_names[LC_PAPER_INDEX], "LC_PAPER")); +# endif +# ifdef USE_LOCALE_TELEPHONE + assert(categories[LC_TELEPHONE_INDEX] == LC_TELEPHONE); + assert(strEQ(category_names[LC_TELEPHONE_INDEX], "LC_TELEPHONE")); +# endif +# ifdef LC_ALL + assert(categories[LC_ALL_INDEX] == LC_ALL); + assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL")); + assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX); +# endif +# endif /* DEBUGGING */ - PERL_UNUSED_VAR(done); - PERL_UNUSED_VAR(locale_param); + /* Initialize the cache of the program's UTF-8ness for the always known + * locales C and POSIX */ + my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness, + sizeof(PL_locale_utf8ness)); -# else +# ifdef LOCALE_ENVIRON_REQUIRED /* * Ultrix setlocale(..., "") fails if there are no environment * variables from which to get a locale name. */ -# ifdef LC_ALL - - if (lang) { - sl_result = my_setlocale(LC_ALL, setlocale_init); - DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result); - if (sl_result) - done = TRUE; - else - setlocale_failure = TRUE; - } - if (! setlocale_failure) { - -# ifdef USE_LOCALE_CTYPE - - locale_param = (! done && (lang || PerlEnv_getenv("LC_CTYPE"))) - ? setlocale_init - : NULL; - curctype = my_setlocale(LC_CTYPE, locale_param); - DEBUG_LOCALE_INIT(LC_CTYPE, locale_param, sl_result); - if (! curctype) - setlocale_failure = TRUE; - else - curctype = savepv(curctype); - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - - locale_param = (! done && (lang || PerlEnv_getenv("LC_COLLATE"))) - ? setlocale_init - : NULL; - curcoll = my_setlocale(LC_COLLATE, locale_param); - DEBUG_LOCALE_INIT(LC_COLLATE, locale_param, sl_result); - if (! curcoll) - setlocale_failure = TRUE; - else - curcoll = savepv(curcoll); - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - locale_param = (! done && (lang || PerlEnv_getenv("LC_NUMERIC"))) - ? setlocale_init - : NULL; - curnum = my_setlocale(LC_NUMERIC, locale_param); - DEBUG_LOCALE_INIT(LC_NUMERIC, locale_param, sl_result); - if (! curnum) - setlocale_failure = TRUE; - else - curnum = savepv(curnum); - -# endif /* USE_LOCALE_NUMERIC */ -# ifdef USE_LOCALE_MESSAGES - - locale_param = (! done && (lang || PerlEnv_getenv("LC_MESSAGES"))) - ? setlocale_init - : NULL; - sl_result = my_setlocale(LC_MESSAGES, locale_param); - DEBUG_LOCALE_INIT(LC_MESSAGES, locale_param, sl_result); - if (! sl_result) { - setlocale_failure = TRUE; - } - -# endif /* USE_LOCALE_MESSAGES */ -# ifdef USE_LOCALE_MONETARY +# ifndef LC_ALL +# error Ultrix without LC_ALL not implemented +# else - locale_param = (! done && (lang || PerlEnv_getenv("LC_MONETARY"))) - ? setlocale_init - : NULL; - sl_result = my_setlocale(LC_MONETARY, locale_param); - DEBUG_LOCALE_INIT(LC_MONETARY, locale_param, sl_result); - if (! sl_result) { - setlocale_failure = TRUE; + { + bool done = FALSE; + if (lang) { + sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, setlocale_init); + DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result[LC_ALL_INDEX]); + if (sl_result[LC_ALL_INDEX]) + done = TRUE; + else + setlocale_failure = TRUE; + } + if (! setlocale_failure) { + const char * locale_param; + for (i = 0; i < LC_ALL_INDEX; i++) { + locale_param = (! done && (lang || PerlEnv_getenv(category_names[i]))) + ? setlocale_init + : NULL; + sl_result[i] = do_setlocale_r(categories[i], locale_param); + if (! sl_result[i]) { + setlocale_failure = TRUE; + } + DEBUG_LOCALE_INIT(categories[i], locale_param, sl_result[i]); + } } - -# endif /* USE_LOCALE_MONETARY */ - } # endif /* LC_ALL */ -# endif /* !LOCALE_ENVIRON_REQUIRED */ +# endif /* LOCALE_ENVIRON_REQUIRED */ /* We try each locale in the list until we get one that works, or exhaust * the list. Normally the loop is executed just once. But if setting the @@ -1938,7 +2103,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) setlocale_failure = FALSE; # ifdef SYSTEM_DEFAULT_LOCALE -# ifdef WIN32 +# ifdef WIN32 /* Note that assumes Win32 has LC_ALL */ /* On Windows machines, an entry of "" after the 0th means to use * the system default locale, which we now proceed to get. */ @@ -1947,7 +2112,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Note that this may change the locale, but we are going to do * that anyway just below */ - system_default_locale = setlocale(LC_ALL, ""); + system_default_locale = do_setlocale_c(LC_ALL, ""); DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale); /* Skip if invalid or if it's already on the list of locales to @@ -1963,15 +2128,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) trial_locale = system_default_locale; } -# endif /* WIN32 */ +# else +# error SYSTEM_DEFAULT_LOCALE only implemented for Win32 +# endif # endif /* SYSTEM_DEFAULT_LOCALE */ - } + + } /* For i > 0 */ # ifdef LC_ALL - sl_result = my_setlocale(LC_ALL, trial_locale); - DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result); - if (! sl_result) { + sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, trial_locale); + DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result[LC_ALL_INDEX]); + if (! sl_result[LC_ALL_INDEX]) { setlocale_failure = TRUE; } else { @@ -1987,60 +2155,19 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif /* LC_ALL */ - if (!setlocale_failure) { - -# ifdef USE_LOCALE_CTYPE - - Safefree(curctype); - curctype = my_setlocale(LC_CTYPE, trial_locale); - DEBUG_LOCALE_INIT(LC_CTYPE, trial_locale, curctype); - if (! curctype) - setlocale_failure = TRUE; - else - curctype = savepv(curctype); - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - - Safefree(curcoll); - curcoll = my_setlocale(LC_COLLATE, trial_locale); - DEBUG_LOCALE_INIT(LC_COLLATE, trial_locale, curcoll); - if (! curcoll) - setlocale_failure = TRUE; - else - curcoll = savepv(curcoll); - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - Safefree(curnum); - curnum = my_setlocale(LC_NUMERIC, trial_locale); - DEBUG_LOCALE_INIT(LC_NUMERIC, trial_locale, curnum); - if (! curnum) - setlocale_failure = TRUE; - else - curnum = savepv(curnum); - -# endif /* USE_LOCALE_NUMERIC */ -# ifdef USE_LOCALE_MESSAGES - - sl_result = my_setlocale(LC_MESSAGES, trial_locale); - DEBUG_LOCALE_INIT(LC_MESSAGES, trial_locale, sl_result); - if (! (sl_result)) - setlocale_failure = TRUE; - -# endif /* USE_LOCALE_MESSAGES */ -# ifdef USE_LOCALE_MONETARY - - sl_result = my_setlocale(LC_MONETARY, trial_locale); - DEBUG_LOCALE_INIT(LC_MONETARY, trial_locale, sl_result); - if (! (sl_result)) - setlocale_failure = TRUE; - -# endif /* USE_LOCALE_MONETARY */ + if (! setlocale_failure) { + unsigned int j; + for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) { + curlocales[j] + = savepv(do_setlocale_r(categories[j], trial_locale)); + if (! curlocales[j]) { + setlocale_failure = TRUE; + } + DEBUG_LOCALE_INIT(categories[j], trial_locale, curlocales[j]); + } - if (! setlocale_failure) { /* Success */ - break; + if (! setlocale_failure) { /* All succeeded */ + break; /* Exit trial_locales loop */ } } @@ -2062,25 +2189,14 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PerlIO_printf(Perl_error_log, "perl: warning: Setting locale failed for the categories:\n\t"); -# ifdef USE_LOCALE_CTYPE - - if (! curctype) - PerlIO_printf(Perl_error_log, "LC_CTYPE "); - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - if (! curcoll) - PerlIO_printf(Perl_error_log, "LC_COLLATE "); - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - if (! curnum) - PerlIO_printf(Perl_error_log, "LC_NUMERIC "); - -# endif /* USE_LOCALE_NUMERIC */ - - PerlIO_printf(Perl_error_log, "and possibly others\n"); + for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) { + if (! curlocales[j]) { + PerlIO_printf(Perl_error_log, category_names[j]); + } + else { + Safefree(curlocales[j]); + } + } # endif /* LC_ALL */ @@ -2216,6 +2332,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) msg = "Falling back to"; } else { /* fallback failed */ + unsigned int j; /* We dropped off the end of the loop, so have to decrement i to * get back to the value the last time through */ @@ -2226,28 +2343,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* To continue, we should use whatever values we've got */ -# ifdef USE_LOCALE_CTYPE - - Safefree(curctype); - curctype = savepv(setlocale(LC_CTYPE, NULL)); - DEBUG_LOCALE_INIT(LC_CTYPE, NULL, curctype); - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - - Safefree(curcoll); - curcoll = savepv(setlocale(LC_COLLATE, NULL)); - DEBUG_LOCALE_INIT(LC_COLLATE, NULL, curcoll); - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - Safefree(curnum); - curnum = savepv(setlocale(LC_NUMERIC, NULL)); - DEBUG_LOCALE_INIT(LC_NUMERIC, NULL, curnum); - -# endif /* USE_LOCALE_NUMERIC */ - + for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) { + Safefree(curlocales[j]); + curlocales[j] = savepv(do_setlocale_r(categories[j], NULL)); + DEBUG_LOCALE_INIT(categories[j], NULL, curlocales[j]); + } } if (locwarn) { @@ -2284,29 +2384,54 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } } /* End of tried to fallback */ + /* Done with finding the locales; update our records */ + # ifdef USE_LOCALE_CTYPE - new_ctype(curctype); + new_ctype(curlocales[LC_CTYPE_INDEX]); -# endif /* USE_LOCALE_CTYPE */ +# endif # ifdef USE_LOCALE_COLLATE - new_collate(curcoll); + new_collate(curlocales[LC_COLLATE_INDEX]); -# endif /* USE_LOCALE_COLLATE */ +# endif # ifdef USE_LOCALE_NUMERIC - new_numeric(curnum); + new_numeric(curlocales[LC_NUMERIC_INDEX]); + +# endif + + for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { + +# if defined(USE_ITHREADS) + + /* This caches whether each category's locale is UTF-8 or not. This + * may involve changing the locale. It is ok to do this at + * initialization time before any threads have started, but not later. + * Caching means that if the program heeds our dictate not to change + * locales in threaded applications, this data will remain valid, and + * it may get queried without changing locales. If the environment is + * such that all categories have the same locale, this isn't needed, as + * the code will not change the locale; but this handles the uncommon + * case where the environment has disparate locales for the categories + * */ + (void) _is_cur_LC_category_utf8(categories[i]); + +# endif + + Safefree(curlocales[i]); + } -# endif /* USE_LOCALE_NUMERIC */ # if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) /* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE - * locale is UTF-8. If PL_utf8locale and PL_unicode (set by -C or by - * $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the - * PerlIO :utf8 layer on STDIN, STDOUT, STDERR, _and_ the default open - * discipline. */ - PL_utf8locale = _is_cur_LC_category_utf8(LC_CTYPE); + * locale is UTF-8. The call to new_ctype() just above has already + * calculated the latter value and saved it in PL_in_utf8_CTYPE_locale. If + * both PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) + * are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer on + * STDIN, STDOUT, STDERR, _and_ the default open discipline. */ + PL_utf8locale = PL_in_utf8_CTYPE_locale; /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. This is an alternative to using the -C command line switch @@ -2319,22 +2444,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } # endif -# ifdef USE_LOCALE_CTYPE - - Safefree(curctype); - -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE - - Safefree(curcoll); - -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC - - Safefree(curnum); - -# endif /* USE_LOCALE_NUMERIC */ - # ifdef __GLIBC__ Safefree(language); @@ -2503,7 +2612,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, } /* End of determining the character that is to replace NULs */ /* If the replacement is variant under UTF-8, it must match the - * UTF8-ness as the original */ + * UTF8-ness of the original */ if ( ! UVCHR_IS_INVARIANT(PL_strxfrm_NUL_replacement) && utf8) { this_replacement_char[0] = UTF8_EIGHT_BIT_HI(PL_strxfrm_NUL_replacement); @@ -2959,44 +3068,110 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * English, it comes down to if the locale's name ends in something like * "UTF-8". It errs on the side of not being a UTF-8 locale. */ - char *save_input_locale = NULL; + /* Name of current locale corresponding to the input category */ + const char *save_input_locale = NULL; + + bool is_utf8 = FALSE; /* The return value */ STRLEN final_pos; + /* The variables below are for the cache of previous lookups using this + * function. The cache is a C string, described at the definition for + * 'C_and_POSIX_utf8ness'. + * + * The first part of the cache is fixed, for the C and POSIX locales. The + * varying part starts just after them. */ + char * utf8ness_cache = PL_locale_utf8ness + STRLENs(C_and_POSIX_utf8ness); + + Size_t utf8ness_cache_size; /* Size of the varying portion */ + Size_t input_name_len; /* Length in bytes of save_input_locale */ + Size_t input_name_len_with_overhead; /* plus extra chars used to store + the name in the cache */ + char * delimited; /* The name plus the delimiters used to store + it in the cache */ + char * name_pos; /* position of 'delimited' in the cache, or 0 + if not there */ + + # ifdef LC_ALL assert(category != LC_ALL); # endif - /* First dispose of the trivial cases */ - save_input_locale = setlocale(category, NULL); + /* Get the desired category's locale */ + save_input_locale = do_setlocale_r(category, NULL); if (! save_input_locale) { - DEBUG_L(PerlIO_printf(Perl_debug_log, - "Could not find current locale for category %d\n", - category)); - return FALSE; /* XXX maybe should croak */ + Perl_croak(aTHX_ + "panic: %s: %d: Could not find current locale for %s\n", + __FILE__, __LINE__, category_name(category)); } + save_input_locale = stdize_locale(savepv(save_input_locale)); - if (isNAME_C_OR_POSIX(save_input_locale)) { - DEBUG_L(PerlIO_printf(Perl_debug_log, - "Current locale for category %d is %s\n", - category, save_input_locale)); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Current locale for %s is %s\n", + category_name(category), save_input_locale)); + + input_name_len = strlen(save_input_locale); + + /* In our cache, each name is accompanied by two delimiters and a single + * utf8ness digit */ + input_name_len_with_overhead = input_name_len + 3; + + /* Allocate and populate space for a copy of the name surrounded by the + * delimiters */ + Newx(delimited, input_name_len_with_overhead, char); + delimited[0] = UTF8NESS_SEP[0]; + Copy(save_input_locale, delimited + 1, input_name_len, char); + delimited[input_name_len+1] = UTF8NESS_PREFIX[0]; + delimited[input_name_len+2] = '\0'; + + /* And see if that is in the cache */ + name_pos = instr(PL_locale_utf8ness, delimited); + if (name_pos) { + is_utf8 = *(name_pos + input_name_len_with_overhead - 1) - '0'; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "UTF8ness for locale %s=%d, \n", + save_input_locale, is_utf8); + } + +# endif + + /* And, if not already in that position, move it to the beginning of + * the non-constant portion of the list, since it is the most recently + * used. (We don't have to worry about overflow, since just moving + * existing names around) */ + if (name_pos > utf8ness_cache) { + Move(utf8ness_cache, + utf8ness_cache + input_name_len_with_overhead, + name_pos - utf8ness_cache, char); + Copy(delimited, + utf8ness_cache, + input_name_len_with_overhead - 1, char); + utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; + } + + Safefree(delimited); Safefree(save_input_locale); - return FALSE; + return is_utf8; } + /* Here we don't have stored the utf8ness for the input locale. We have to + * calculate it */ + # if defined(USE_LOCALE_CTYPE) \ && (defined(MB_CUR_MAX) || (defined(HAS_NL_LANGINFO) && defined(CODESET))) { /* Next try nl_langinfo or MB_CUR_MAX if available */ char *save_ctype_locale = NULL; - bool is_utf8; if (category != LC_CTYPE) { /* These work only on LC_CTYPE */ /* Get the current LC_CTYPE locale */ - save_ctype_locale = setlocale(LC_CTYPE, NULL); + save_ctype_locale = do_setlocale_c(LC_CTYPE, NULL); if (! save_ctype_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for LC_CTYPE\n")); @@ -3012,7 +3187,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_ctype_locale); save_ctype_locale = NULL; } - else if (! setlocale(LC_CTYPE, save_input_locale)) { + else if (! do_setlocale_c(LC_CTYPE, save_input_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not change LC_CTYPE locale to %s\n", save_input_locale)); @@ -3030,26 +3205,32 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # if defined(HAS_NL_LANGINFO) && defined(CODESET) - { - char *codeset = nl_langinfo(CODESET); - if (codeset && strNE(codeset, "")) { - codeset = savepv(codeset); + { /* The task is easiest if the platform has this POSIX 2001 function */ + const char *codeset = my_nl_langinfo(PERL_CODESET, FALSE); + /* FALSE => already in dest locale */ + DEBUG_L(PerlIO_printf(Perl_debug_log, + "\tnllanginfo returned CODESET '%s'\n", codeset)); + + if (codeset && strNE(codeset, "")) { /* If we switched LC_CTYPE, switch back */ if (save_ctype_locale) { - setlocale(LC_CTYPE, save_ctype_locale); + do_setlocale_c(LC_CTYPE, save_ctype_locale); Safefree(save_ctype_locale); } - is_utf8 = foldEQ(codeset, STR_WITH_LEN("UTF-8")) - || foldEQ(codeset, STR_WITH_LEN("UTF8")); + /* If the implementation of foldEQ() somehow were + * to change to not go byte-by-byte, this could + * read past end of string, as only one length is + * checked. But currently, a premature NUL will + * compare false, and it will stop there */ + is_utf8 = cBOOL( foldEQ(codeset, STR_WITH_LEN("UTF-8")) + || foldEQ(codeset, STR_WITH_LEN("UTF8"))); DEBUG_L(PerlIO_printf(Perl_debug_log, "\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n", codeset, is_utf8)); - Safefree(codeset); - Safefree(save_input_locale); - return is_utf8; + goto finish_and_return; } } @@ -3063,7 +3244,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * Unicode code point. Since UTF-8 is the only non-single byte * encoding we handle, we just say any such encoding is UTF-8, and if * turns out to be wrong, other things will fail */ - is_utf8 = MB_CUR_MAX >= 4; + is_utf8 = (unsigned) MB_CUR_MAX >= STRLENs(MAX_UNICODE_UTF8); DEBUG_L(PerlIO_printf(Perl_debug_log, "\tMB_CUR_MAX=%d; ?UTF8 locale=%d\n", @@ -3079,17 +3260,22 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * result */ if (is_utf8) { wchar_t wc; + int len; + PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */ errno = 0; - if ((size_t)mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)) - != strlen(HYPHEN_UTF8) - || wc != (wchar_t) 0x2010) + len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8)); + + + if ( len != STRLENs(REPLACEMENT_CHARACTER_UTF8) + || wc != (wchar_t) UNICODE_REPLACEMENT) { is_utf8 = FALSE; - DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", (unsigned int)wc)); + DEBUG_L(PerlIO_printf(Perl_debug_log, "\replacement=U+%x\n", + (unsigned int)wc)); DEBUG_L(PerlIO_printf(Perl_debug_log, "\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n", - mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)), errno)); + len, errno)); } } @@ -3097,11 +3283,11 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* If we switched LC_CTYPE, switch back */ if (save_ctype_locale) { - setlocale(LC_CTYPE, save_ctype_locale); + do_setlocale_c(LC_CTYPE, save_ctype_locale); Safefree(save_ctype_locale); } - return is_utf8; + goto finish_and_return; # endif @@ -3126,7 +3312,6 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) { char *save_monetary_locale = NULL; bool only_ascii = FALSE; - bool is_utf8 = FALSE; struct lconv* lc; /* Like above for LC_CTYPE, we first set LC_MONETARY to the locale of @@ -3134,7 +3319,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) if (category != LC_MONETARY) { - save_monetary_locale = setlocale(LC_MONETARY, NULL); + save_monetary_locale = do_setlocale_c(LC_MONETARY, NULL); if (! save_monetary_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for LC_MONETARY\n")); @@ -3146,7 +3331,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_monetary_locale); save_monetary_locale = NULL; } - else if (! setlocale(LC_MONETARY, save_input_locale)) { + else if (! do_setlocale_c(LC_MONETARY, save_input_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not change LC_MONETARY locale to %s\n", save_input_locale)); @@ -3172,7 +3357,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* If we changed it, restore LC_MONETARY to its original locale */ if (save_monetary_locale) { - setlocale(LC_MONETARY, save_monetary_locale); + do_setlocale_c(LC_MONETARY, save_monetary_locale); Safefree(save_monetary_locale); } @@ -3183,8 +3368,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * is non-ascii UTF-8. */ DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n", save_input_locale, is_utf8)); - Safefree(save_input_locale); - return is_utf8; + goto finish_and_return; } } cant_use_monetary: @@ -3211,7 +3395,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) if (category != LC_TIME) { - save_time_locale = setlocale(LC_TIME, NULL); + save_time_locale = do_setlocale_c(LC_TIME, NULL); if (! save_time_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for LC_TIME\n")); @@ -3223,7 +3407,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_time_locale); save_time_locale = NULL; } - else if (! setlocale(LC_TIME, save_input_locale)) { + else if (! do_setlocale_c(LC_TIME, save_input_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not change LC_TIME locale to %s\n", save_input_locale)); @@ -3262,22 +3446,22 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * false otherwise. But first, restore LC_TIME to its original * locale if we changed it */ if (save_time_locale) { - setlocale(LC_TIME, save_time_locale); + do_setlocale_c(LC_TIME, save_time_locale); Safefree(save_time_locale); } DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n", save_input_locale, is_utf8_string((U8 *) formatted_time, 0))); - Safefree(save_input_locale); - return is_utf8_string((U8 *) formatted_time, 0); + is_utf8 = is_utf8_string((U8 *) formatted_time, 0); + goto finish_and_return; } /* Falling off the end of the loop indicates all the names were just * ASCII. Go on to the next test. If we changed it, restore LC_TIME * to its original locale */ if (save_time_locale) { - setlocale(LC_TIME, save_time_locale); + do_setlocale_c(LC_TIME, save_time_locale); Safefree(save_time_locale); } DEBUG_L(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); @@ -3301,7 +3485,6 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * are much more likely to have been translated. */ { int e; - bool is_utf8 = FALSE; bool non_ascii = FALSE; char *save_messages_locale = NULL; const char * errmsg = NULL; @@ -3311,7 +3494,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) if (category != LC_MESSAGES) { - save_messages_locale = setlocale(LC_MESSAGES, NULL); + save_messages_locale = do_setlocale_c(LC_MESSAGES, NULL); if (! save_messages_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for LC_MESSAGES\n")); @@ -3323,7 +3506,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_messages_locale); save_messages_locale = NULL; } - else if (! setlocale(LC_MESSAGES, save_input_locale)) { + else if (! do_setlocale_c(LC_MESSAGES, save_input_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not change LC_MESSAGES locale to %s\n", save_input_locale)); @@ -3354,7 +3537,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* And, if we changed it, restore LC_MESSAGES to its original locale */ if (save_messages_locale) { - setlocale(LC_MESSAGES, save_messages_locale); + do_setlocale_c(LC_MESSAGES, save_messages_locale); Safefree(save_messages_locale); } @@ -3365,8 +3548,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n", save_input_locale, is_utf8)); - Safefree(save_input_locale); - return is_utf8; + goto finish_and_return; } DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); @@ -3388,13 +3570,13 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) final_pos = strlen(save_input_locale) - 1; if (final_pos >= 3) { - char *name = save_input_locale; + const char *name = save_input_locale; /* Find next 'U' or 'u' and look from there */ while ((name += strcspn(name, "Uu") + 1) <= save_input_locale + final_pos - 2) { - if ( !isALPHA_FOLD_NE(*name, 't') + if ( isALPHA_FOLD_NE(*name, 't') || isALPHA_FOLD_NE(*(name + 1), 'f')) { continue; @@ -3410,8 +3592,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale %s ends with UTF-8 in name\n", save_input_locale)); - Safefree(save_input_locale); - return TRUE; + is_utf8 = TRUE; + goto finish_and_return; } } DEBUG_L(PerlIO_printf(Perl_debug_log, @@ -3419,19 +3601,19 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) save_input_locale)); } -# endif -# ifdef WIN32 +# ifdef WIN32 /* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */ if (memENDs(save_input_locale, final_pos, "65001")) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale %s ends with 65001 in name, is UTF-8 locale\n", save_input_locale)); - Safefree(save_input_locale); - return TRUE; + is_utf8 = TRUE; + goto finish_and_return; } -# endif +# endif +# endif /* Other common encodings are the ISO 8859 series, which aren't UTF-8. But * since we are about to return FALSE anyway, there is no point in doing @@ -3442,16 +3624,81 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale %s has 8859 in name, not UTF-8 locale\n", save_input_locale)); - Safefree(save_input_locale); - return FALSE; + is_utf8 = FALSE; + goto finish_and_return; } # endif DEBUG_L(PerlIO_printf(Perl_debug_log, "Assuming locale %s is not a UTF-8 locale\n", save_input_locale)); + is_utf8 = FALSE; + + finish_and_return: + + /* Cache this result so we don't have to go through all this next time. */ + utf8ness_cache_size = sizeof(PL_locale_utf8ness) + - (utf8ness_cache - PL_locale_utf8ness); + + /* But we can't save it if it is too large for the total space available */ + if (LIKELY(input_name_len_with_overhead < utf8ness_cache_size)) { + Size_t utf8ness_cache_len = strlen(utf8ness_cache); + + /* Here it can fit, but we may need to clear out the oldest cached + * result(s) to do so. Check */ + if (utf8ness_cache_len + input_name_len_with_overhead + >= utf8ness_cache_size) + { + /* Here we have to clear something out to make room for this. + * Start looking at the rightmost place where it could fit and find + * the beginning of the entry that extends past that. */ + char * cutoff = (char *) my_memrchr(utf8ness_cache, + UTF8NESS_SEP[0], + utf8ness_cache_size + - input_name_len_with_overhead); + + assert(cutoff); + assert(cutoff >= utf8ness_cache); + + /* This and all subsequent entries must be removed */ + *cutoff = '\0'; + utf8ness_cache_len = strlen(utf8ness_cache); + } + + /* Make space for the new entry */ + Move(utf8ness_cache, + utf8ness_cache + input_name_len_with_overhead, + utf8ness_cache_len + 1 /* Incl. trailing NUL */, char); + + /* And insert it */ + Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char); + utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; + + if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] + & (PERL_UINTMAX_T) ~1) != '0') + { + Perl_croak(aTHX_ + "panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%u," + " inserted_name=%s, its_len=%u\n", + __FILE__, __LINE__, + PL_locale_utf8ness, strlen(PL_locale_utf8ness), + delimited, input_name_len_with_overhead); + } + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "PL_locale_utf8ness is now %s; returning %d\n", + PL_locale_utf8ness, is_utf8); + } + +# endif + + Safefree(delimited); Safefree(save_input_locale); - return FALSE; + return is_utf8; } #endif @@ -3506,18 +3753,53 @@ Perl_my_strerror(pTHX_ const int errnum) # if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) - /* This function is trivial if we have strerror_l() */ + /* This function is trivial if we don't have to worry about thread safety + * and have strerror_l(), as it handles the switch of locales so we don't + * have to deal with that. We don't have to worry about thread safety if + * this is an unthreaded build, or if strerror_r() is also available. Both + * it and strerror_l() are thread-safe. Plain strerror() isn't thread + * safe. But on threaded builds when strerror_r() is available, the + * apparent call to strerror() below is actually a macro that + * behind-the-scenes calls strerror_r(). + */ + +# if ! defined(USE_ITHREADS) || defined(HAS_STRERROR_R) if (within_locale_scope) { - errstr = strerror(errnum); + errstr = savepv(strerror(errnum)); } else { - errstr = strerror_l(errnum, PL_C_locale_obj); + errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); + } + +# else + + /* Here we have strerror_l(), but not strerror_r() and we are on a + * threaded-build. We use strerror_l() for everything, constructing a + * locale to pass to it if necessary */ + + bool do_free = FALSE; + locale_t locale_to_use; + + if (within_locale_scope) { + locale_to_use = uselocale((locale_t) 0); + if (locale_to_use == LC_GLOBAL_LOCALE) { + locale_to_use = duplocale(LC_GLOBAL_LOCALE); + do_free = TRUE; + } + } + else { /* Use C locale if not within 'use locale' scope */ + locale_to_use = PL_C_locale_obj; } - errstr = savepv(errstr); + errstr = savepv(strerror_l(errnum, locale_to_use)); -# else /* Doesn't have strerror_l(). */ + if (do_free) { + freelocale(locale_to_use); + } + +# endif +# else /* Doesn't have strerror_l() */ # ifdef USE_POSIX_2008_LOCALE @@ -3525,7 +3807,7 @@ Perl_my_strerror(pTHX_ const int errnum) # else - char * save_locale = NULL; + const char * save_locale = NULL; bool locale_is_C = FALSE; /* We have a critical section to prevent another thread from changing the @@ -3557,7 +3839,7 @@ Perl_my_strerror(pTHX_ const int errnum) # else /* Not thread-safe build */ - save_locale = setlocale(LC_MESSAGES, NULL); + save_locale = do_setlocale_c(LC_MESSAGES, NULL); if (! save_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "setlocale failed, errno=%d\n", errno)); @@ -3571,7 +3853,7 @@ Perl_my_strerror(pTHX_ const int errnum) /* The setlocale() just below likely will zap 'save_locale', so * create a copy. */ save_locale = savepv(save_locale); - setlocale(LC_MESSAGES, "C"); + do_setlocale_c(LC_MESSAGES, "C"); } } @@ -3604,7 +3886,7 @@ Perl_my_strerror(pTHX_ const int errnum) # else if (save_locale && ! locale_is_C) { - if (! setlocale(LC_MESSAGES, save_locale)) { + if (! do_setlocale_c(LC_MESSAGES, save_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "setlocale restore failed, errno=%d\n", errno)); } @@ -3647,21 +3929,33 @@ to do so, before returning to Perl. void Perl_sync_locale(pTHX) { + char * newlocale; #ifdef USE_LOCALE_CTYPE - new_ctype(setlocale(LC_CTYPE, NULL)); + newlocale = do_setlocale_c(LC_CTYPE, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + setlocale_debug_string(LC_CTYPE, NULL, newlocale))); + new_ctype(newlocale); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE - new_collate(setlocale(LC_COLLATE, NULL)); + newlocale = do_setlocale_c(LC_COLLATE, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + setlocale_debug_string(LC_COLLATE, NULL, newlocale))); + new_collate(newlocale); #endif #ifdef USE_LOCALE_NUMERIC - set_numeric_local(); /* Switch from "C" to underlying LC_NUMERIC */ - new_numeric(setlocale(LC_NUMERIC, NULL)); + newlocale = do_setlocale_c(LC_NUMERIC, NULL); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: %s\n", __FILE__, __LINE__, + setlocale_debug_string(LC_NUMERIC, NULL, newlocale))); + new_numeric(newlocale); #endif /* USE_LOCALE_NUMERIC */ @@ -3688,65 +3982,9 @@ S_setlocale_debug_string(const int category, /* category number, static char ret[128] = "If you can read this, thank your buggy C" " library strlcpy(), and change your hints file" " to undef it"; - my_strlcpy(ret, "setlocale(", sizeof(ret)); - - switch (category) { - default: - my_snprintf(ret, sizeof(ret), "%s? %d", ret, category); - break; - -# ifdef LC_ALL - - case LC_ALL: - my_strlcat(ret, "LC_ALL", sizeof(ret)); - break; - -# endif -# ifdef LC_CTYPE - - case LC_CTYPE: - my_strlcat(ret, "LC_CTYPE", sizeof(ret)); - break; - -# endif -# ifdef LC_NUMERIC - - case LC_NUMERIC: - my_strlcat(ret, "LC_NUMERIC", sizeof(ret)); - break; - -# endif -# ifdef LC_COLLATE - - case LC_COLLATE: - my_strlcat(ret, "LC_COLLATE", sizeof(ret)); - break; - -# endif -# ifdef LC_TIME - - case LC_TIME: - my_strlcat(ret, "LC_TIME", sizeof(ret)); - break; - -# endif -# ifdef LC_MONETARY - - case LC_MONETARY: - my_strlcat(ret, "LC_MONETARY", sizeof(ret)); - break; - -# endif -# ifdef LC_MESSAGES - - case LC_MESSAGES: - my_strlcat(ret, "LC_MESSAGES", sizeof(ret)); - break; - -# endif - - } + my_strlcpy(ret, "setlocale(", sizeof(ret)); + my_strlcat(ret, category_name(category), sizeof(ret)); my_strlcat(ret, ", ", sizeof(ret)); if (locale) {