X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/92c0a900a3381af22f6d14091dbaffaa47ad592c..d35fca5f9e7ec48bb82d2a2d4566dfb68f955e6e:/locale.c diff --git a/locale.c b/locale.c index 0bf234c..0bf8057 100644 --- a/locale.c +++ b/locale.c @@ -29,7 +29,9 @@ * in such scope than if not. However, various libc functions called by Perl * are affected by the LC_NUMERIC category, so there are macros in perl.h that * are used to toggle between the current locale and the C locale depending on - * the desired behavior of those functions at the moment. + * the desired behavior of those functions at the moment. And, LC_MESSAGES is + * switched to the C locale for outputting the message unless within the scope + * of 'use locale'. */ #include "EXTERN.h" @@ -106,7 +108,7 @@ Perl_set_numeric_radix(pTHX) sv_setpv(PL_numeric_radix_sv, lc->decimal_point); else PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); - if (! is_ascii_string((U8 *) lc->decimal_point, 0) + if (! is_invariant_string((U8 *) lc->decimal_point, 0) && is_utf8_string((U8 *) lc->decimal_point, 0) && _is_cur_LC_category_utf8(LC_NUMERIC)) { @@ -117,10 +119,13 @@ Perl_set_numeric_radix(pTHX) else PL_numeric_radix_sv = NULL; - DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale radix is %s\n", + DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale radix is '%s', ?UTF-8=%d\n", (PL_numeric_radix_sv) - ? lc->decimal_point - : "NULL")); + ? SvPVX(PL_numeric_radix_sv) + : "NULL", + (PL_numeric_radix_sv) + ? cBOOL(SvUTF8(PL_numeric_radix_sv)) + : 0)); # endif /* HAS_LOCALECONV */ #endif /* USE_LOCALE_NUMERIC */ @@ -187,13 +192,17 @@ Perl_new_numeric(pTHX_ const char *newnum) } save_newnum = stdize_locale(savepv(newnum)); + + PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); + PL_numeric_local = TRUE; + if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); PL_numeric_name = save_newnum; } - - PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); - PL_numeric_local = TRUE; + else { + Safefree(save_newnum); + } /* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't * have to worry about the radix being a non-dot. (Core operations that @@ -272,6 +281,13 @@ Perl_new_ctype(pTHX_ const char *newctype) PERL_ARGS_ASSERT_NEW_CTYPE; + /* We will replace any bad locale warning with 1) nothing if the new one is + * ok; or 2) a new warning for the bad new locale */ + if (PL_warn_locale) { + SvREFCNT_dec_NN(PL_warn_locale); + PL_warn_locale = NULL; + } + PL_in_utf8_CTYPE_locale = _is_cur_LC_category_utf8(LC_CTYPE); /* A UTF-8 locale gets standard rules. But note that code still has to @@ -280,6 +296,18 @@ Perl_new_ctype(pTHX_ const char *newctype) Copy(PL_fold_latin1, PL_fold_locale, 256, U8); } else { + /* Assume enough space for every character being bad. 4 spaces each + * for the 94 printable characters that are output like "'x' "; and 5 + * spaces each for "'\\' ", "'\t' ", and "'\n' "; plus a terminating + * NUL */ + char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ]; + + bool check_for_problems = ckWARN_d(WARN_LOCALE); /* No warnings means + no check */ + bool multi_byte_locale = FALSE; /* Assume is a single-byte locale + to start */ + unsigned int bad_count = 0; /* Count of bad characters */ + for (i = 0; i < 256; i++) { if (isUPPER_LC((U8) i)) PL_fold_locale[i] = (U8) toLOWER_LC((U8) i); @@ -287,6 +315,104 @@ Perl_new_ctype(pTHX_ const char *newctype) PL_fold_locale[i] = (U8) toUPPER_LC((U8) i); else PL_fold_locale[i] = (U8) i; + + /* If checking for locale problems, see if the native ASCII-range + * printables plus \n and \t are in their expected categories in + * the new locale. If not, this could mean big trouble, upending + * Perl's and most programs' assumptions, like having a + * metacharacter with special meaning become a \w. Fortunately, + * it's very rare to find locales that aren't supersets of ASCII + * nowadays. It isn't a problem for most controls to be changed + * into something else; we check only \n and \t, though perhaps \r + * could be an issue as well. */ + if (check_for_problems + && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) + { + if ((isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i)) + || (isPUNCT_A(i) && ! isPUNCT_LC(i)) + || (isBLANK_A(i) && ! isBLANK_LC(i)) + || (i == '\n' && ! isCNTRL_LC(i))) + { + if (bad_count) { /* Separate multiple entries with a + blank */ + bad_chars_list[bad_count++] = ' '; + } + bad_chars_list[bad_count++] = '\''; + if (isPRINT_A(i)) { + bad_chars_list[bad_count++] = (char) i; + } + else { + bad_chars_list[bad_count++] = '\\'; + if (i == '\n') { + bad_chars_list[bad_count++] = 'n'; + } + else { + assert(i == '\t'); + bad_chars_list[bad_count++] = 't'; + } + } + bad_chars_list[bad_count++] = '\''; + bad_chars_list[bad_count] = '\0'; + } + } + } + +#ifdef MB_CUR_MAX + /* We only handle single-byte locales (outside of UTF-8 ones; so if + * this locale requires more than one byte, there are going to be + * problems. */ + if (check_for_problems && MB_CUR_MAX > 1 + + /* Some platforms return MB_CUR_MAX > 1 for even the "C" + * locale. Just assume that the implementation for them (plus + * for POSIX) is correct and the > 1 value is spurious. (Since + * these are specially handled to never be considered UTF-8 + * locales, as long as this is the only problem, everything + * should work fine */ + && strNE(newctype, "C") && strNE(newctype, "POSIX")) + { + multi_byte_locale = TRUE; + } +#endif + + if (bad_count || multi_byte_locale) { + PL_warn_locale = Perl_newSVpvf(aTHX_ + "Locale '%s' may not work well.%s%s%s\n", + newctype, + (multi_byte_locale) + ? " Some characters in it are not recognized by" + " Perl." + : "", + (bad_count) + ? "\nThe following characters (and maybe others)" + " may not have the same meaning as the Perl" + " program expects:\n" + : "", + (bad_count) + ? bad_chars_list + : "" + ); + /* If we are actually in the scope of the locale, output the + * message now. Otherwise we save it to be output at the first + * operation using this locale, if that actually happens. Most + * programs don't use locales, so they are immune to bad ones */ + if (IN_LC(LC_CTYPE)) { + + /* We have to save 'newctype' because the setlocale() just + * below may destroy it. The next setlocale() further down + * should restore it properly so that the intermediate change + * here is transparent to this function's caller */ + const char * const badlocale = savepv(newctype); + + setlocale(LC_CTYPE, "C"); + + /* The '0' below suppresses a bogus gcc compiler warning */ + Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0); + setlocale(LC_CTYPE, badlocale); + Safefree(badlocale); + SvREFCNT_dec_NN(PL_warn_locale); + PL_warn_locale = NULL; + } } } @@ -297,6 +423,32 @@ Perl_new_ctype(pTHX_ const char *newctype) } void +Perl__warn_problematic_locale() +{ + +#ifdef USE_LOCALE_CTYPE + + dTHX; + + /* Internal-to-core function that outputs the message in PL_warn_locale, + * and then NULLS it. Should be called only through the macro + * _CHECK_AND_WARN_PROBLEMATIC_LOCALE */ + + if (PL_warn_locale) { + /*GCC_DIAG_IGNORE(-Wformat-security); Didn't work */ + Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), + SvPVX(PL_warn_locale), + 0 /* dummy to avoid compiler warning */ ); + /* GCC_DIAG_RESTORE; */ + SvREFCNT_dec_NN(PL_warn_locale); + PL_warn_locale = NULL; + } + +#endif + +} + +void Perl_new_collate(pTHX_ const char *newcoll) { #ifdef USE_LOCALE_COLLATE @@ -307,7 +459,21 @@ Perl_new_collate(pTHX_ const char *newcoll) * Any code changing the locale (outside this file) should use * POSIX::setlocale, which calls this function. Therefore this function * should be called directly only from this file and from - * POSIX::setlocale() */ + * POSIX::setlocale(). + * + * The design of locale collation is that every locale change is given an + * index 'PL_collation_ix'. The first time a string particpates in an + * operation that requires collation while locale collation is active, it + * is given PERL_MAGIC_collxfrm magic (via sv_collxfrm_flags()). That + * magic includes the collation index, and the transformation of the string + * by strxfrm(), q.v. That transformation is used when doing comparisons, + * instead of the string itself. If a string changes, the magic is + * cleared. The next time the locale changes, the index is incremented, + * and so we know during a comparison that the transformation is not + * necessarily still valid, and so is recomputed. Note that if the locale + * changes enough times, the index could wrap (a U32), and it is possible + * that a transformation would improperly be considered valid, leading to + * an unlikely bug */ if (! newcoll) { if (PL_collation_name) { @@ -321,6 +487,7 @@ Perl_new_collate(pTHX_ const char *newcoll) return; } + /* If this is not the same locale as currently, set the new one up */ if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { ++PL_collation_ix; Safefree(PL_collation_name); @@ -328,6 +495,32 @@ Perl_new_collate(pTHX_ const char *newcoll) PL_collation_standard = isNAME_C_OR_POSIX(newcoll); { + /* A locale collation definition includes primary, secondary, + * tertiary, etc. weights for each character. To sort, the primary + * weights are used, and only if they compare equal, then the + * secondary weights are used, and only if they compare equal, then + * the tertiary, etc. strxfrm() works by taking the input string, + * say ABC, and creating an output string consisting of first the + * primary weights, A¹B¹C¹ followed by the secondary ones, A²B²C²; + * and then the tertiary, etc, yielding A¹B¹C¹A²B²C²A³B³C³.... + * Some characters may not have weights at every level. In our + * example, let's say B doesn't have a tertiary weight, and A + * doesn't have a secondary weight. The constructed string is then + * going to be A¹B¹C¹B²C²A³C³.... This has the desired + * characteristics that strcmp() will look at the secondary or + * tertiary weights only if the strings compare equal at all higher + * priority weights. The length of the transformed string is + * roughly a linear function of the input string. It's not exactly + * linear because some characters don't have weights at all levels, + * and there are some complications, so there is often per-string + * overhead. When we call strxfrm() we have to allocate some + * memory to hold the transformed string. The calculations below + * try to find constants for this locale 'm' and 'b' so that m*x + + * b equals how much space we need given the size of the input + * string in 'x'. If we calculate too small, we increase the size + * as needed, and call strxfrm() again, but it is better to get it + * right the first time to avoid wasted expensive string + * transformations. */ /* 2: at most so many chars ('a', 'b'). */ /* 50: surely no system expands a char more. */ #define XFRMBUFSIZE (2 * 50) @@ -365,7 +558,7 @@ Perl_my_setlocale(pTHX_ int category, const char* locale) * otherwise to use the particular category's variable if set; otherwise to * use the LANG variable. */ - bool override_LC_ALL = 0; + bool override_LC_ALL = FALSE; char * result; if (locale && strEQ(locale, "")) { @@ -426,12 +619,14 @@ Perl_my_setlocale(pTHX_ int category, const char* locale) } result = setlocale(category, locale); + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, + _setlocale_debug_string(category, locale, result))); if (! override_LC_ALL) { return result; } - /* Here the input locale was LC_ALL, and we have set it to what is in the + /* Here the input category was LC_ALL, and we have set it to what is in the * LANG variable or the system default if there is no LANG. But these have * lower priority than the other LC_foo variables, so override it for each * one that is set. (If they are set to "", it means to use the same thing @@ -440,41 +635,63 @@ Perl_my_setlocale(pTHX_ int category, const char* locale) result = PerlEnv_getenv("LC_TIME"); if (result && strNE(result, "")) { setlocale(LC_TIME, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_TIME, result, "not captured"))); } # endif # ifdef USE_LOCALE_CTYPE result = PerlEnv_getenv("LC_CTYPE"); if (result && strNE(result, "")) { setlocale(LC_CTYPE, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_CTYPE, result, "not captured"))); } # endif # ifdef USE_LOCALE_COLLATE result = PerlEnv_getenv("LC_COLLATE"); if (result && strNE(result, "")) { setlocale(LC_COLLATE, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_COLLATE, result, "not captured"))); } # endif # ifdef USE_LOCALE_MONETARY result = PerlEnv_getenv("LC_MONETARY"); if (result && strNE(result, "")) { setlocale(LC_MONETARY, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_MONETARY, result, "not captured"))); } # endif # ifdef USE_LOCALE_NUMERIC result = PerlEnv_getenv("LC_NUMERIC"); if (result && strNE(result, "")) { setlocale(LC_NUMERIC, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_NUMERIC, result, "not captured"))); } # endif # ifdef USE_LOCALE_MESSAGES result = PerlEnv_getenv("LC_MESSAGES"); if (result && strNE(result, "")) { setlocale(LC_MESSAGES, result); + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_MESSAGES, result, "not captured"))); } # endif - return setlocale(LC_ALL, NULL); + result = setlocale(LC_ALL, NULL); + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", + __FILE__, __LINE__, + _setlocale_debug_string(LC_ALL, NULL, result))); + return result; } #endif @@ -496,7 +713,42 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * 1 = set ok or not applicable, * 0 = fallback to a locale of lower priority * -1 = fallback to all locales failed, not even to the C locale - */ + * + * Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is + * set, debugging information is output. + * + * This looks more complicated than it is, mainly due to the #ifdefs. + * + * We try to set LC_ALL to the value determined by the environment. If + * there is no LC_ALL on this platform, we try the individual categories we + * know about. If this works, we are done. + * + * But if it doesn't work, we have to do something else. We search the + * environment variables ourselves instead of relying on the system to do + * it. We look at, in order, LC_ALL, LANG, a system default locale (if we + * think there is one), and the ultimate fallback "C". This is all done in + * the same loop as above to avoid duplicating code, but it makes things + * more complex. After the original failure, we add the fallback + * possibilities to the list of locales to try, and iterate the loop + * through them all until one succeeds. + * + * On Ultrix, the locale MUST come from the environment, so there is + * preliminary code to set it. I (khw) am not sure that it is necessary, + * and that this couldn't be folded into the loop, but barring any real + * platforms to test on, it's staying as-is + * + * A slight complication is that in embedded Perls, the locale may already + * be set-up, and we don't want to get it from the normal environment + * variables. This is handled by having a special environment variable + * indicate we're in this situation. We simply set setlocale's 2nd + * parameter to be a NULL instead of "". That indicates to setlocale that + * it is not to change anything, but to return the current value, + * effectively initializing perl's db to what the locale already is. + * + * We play the same trick with NULL if a LC_ALL succeeds. We call + * setlocale() on the individual categores with NULL to get their existing + * values for our db, instead of trying to change them. + * */ int ok = 1; @@ -511,25 +763,52 @@ Perl_init_i18nl10n(pTHX_ int printwarn) char *curnum = NULL; #endif /* USE_LOCALE_NUMERIC */ #ifdef __GLIBC__ - char * const language = PerlEnv_getenv("LANGUAGE"); + const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); #endif /* NULL uses the existing already set up locale */ const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) ? NULL : ""; +#ifdef DEBUGGING + const bool debug = (PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")) + ? TRUE + : FALSE; +# define DEBUG_LOCALE_INIT(category, locale, result) \ + STMT_START { \ + if (debug) { \ + PerlIO_printf(Perl_debug_log, \ + "%s:%d: %s\n", \ + __FILE__, __LINE__, \ + _setlocale_debug_string(category, \ + locale, \ + result)); \ + } \ + } STMT_END +#else +# define DEBUG_LOCALE_INIT(a,b,c) +#endif const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */ unsigned int trial_locales_count; - char * const lc_all = PerlEnv_getenv("LC_ALL"); - char * const lang = PerlEnv_getenv("LANG"); + const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL")); + const char * const lang = savepv(PerlEnv_getenv("LANG")); bool setlocale_failure = FALSE; unsigned int i; char *p; - const bool locwarn = (printwarn > 1 || - (printwarn && - (!(p = PerlEnv_getenv("PERL_BADLANG")) || - grok_atou(p, NULL)))); + + /* A later getenv() could zap this, so only use here */ + const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG"); + + const bool locwarn = (printwarn > 1 + || (printwarn + && (! bad_lang_use_once + || ( + /* disallow with "" or "0" */ + *bad_lang_use_once + && strNE("0", bad_lang_use_once))))); bool done = FALSE; + char * sl_result; /* return from setlocale() */ + char * locale_param; #ifdef WIN32 /* In some systems you can find out the system default locale * and use that as the fallback locale. */ @@ -541,6 +820,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifndef LOCALE_ENVIRON_REQUIRED PERL_UNUSED_VAR(done); + PERL_UNUSED_VAR(locale_param); #else /* @@ -550,55 +830,64 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # ifdef LC_ALL if (lang) { - if (my_setlocale(LC_ALL, setlocale_init)) + sl_result = my_setlocale(LC_ALL, setlocale_init); + DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result); + if (sl_result) done = TRUE; else setlocale_failure = TRUE; } - if (!setlocale_failure) { + if (! setlocale_failure) { # ifdef USE_LOCALE_CTYPE - Safefree(curctype); - if (! (curctype = - my_setlocale(LC_CTYPE, - (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_CTYPE"))) + ? setlocale_init + : NULL; + curctype = my_setlocale(LC_CTYPE, locale_param); + DEBUG_LOCALE_INIT(LC_CTYPE, locale_param, sl_result); + if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); # endif /* USE_LOCALE_CTYPE */ # ifdef USE_LOCALE_COLLATE - Safefree(curcoll); - if (! (curcoll = - my_setlocale(LC_COLLATE, - (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_COLLATE"))) + ? setlocale_init + : NULL; + curcoll = my_setlocale(LC_COLLATE, locale_param); + DEBUG_LOCALE_INIT(LC_COLLATE, locale_param, sl_result); + if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); # endif /* USE_LOCALE_COLLATE */ # ifdef USE_LOCALE_NUMERIC - Safefree(curnum); - if (! (curnum = - my_setlocale(LC_NUMERIC, - (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) - ? setlocale_init : NULL))) + locale_param = (! done && (lang || PerlEnv_getenv("LC_NUMERIC"))) + ? setlocale_init + : NULL; + curnum = my_setlocale(LC_NUMERIC, locale_param); + DEBUG_LOCALE_INIT(LC_NUMERIC, locale_param, sl_result); + if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); # endif /* USE_LOCALE_NUMERIC */ # ifdef USE_LOCALE_MESSAGES - if (! my_setlocale(LC_MESSAGES, - (!done && (lang || PerlEnv_getenv("LC_MESSAGES"))) - ? setlocale_init : NULL)) - { + locale_param = (! done && (lang || PerlEnv_getenv("LC_MESSAGES"))) + ? setlocale_init + : NULL; + sl_result = my_setlocale(LC_MESSAGES, locale_param); + DEBUG_LOCALE_INIT(LC_MESSAGES, locale_param, sl_result); + if (! sl_result) setlocale_failure = TRUE; } # endif /* USE_LOCALE_MESSAGES */ # ifdef USE_LOCALE_MONETARY - if (! my_setlocale(LC_MONETARY, - (!done && (lang || PerlEnv_getenv("LC_MONETARY"))) - ? setlocale_init : NULL)) - { + locale_param = (! done && (lang || PerlEnv_getenv("LC_MONETARY"))) + ? setlocale_init + : NULL; + sl_result = my_setlocale(LC_MONETARY, locale_param); + DEBUG_LOCALE_INIT(LC_MONETARY, locale_param, sl_result); + if (! sl_result) { setlocale_failure = TRUE; } # endif /* USE_LOCALE_MONETARY */ @@ -609,7 +898,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif /* !LOCALE_ENVIRON_REQUIRED */ /* We try each locale in the list until we get one that works, or exhaust - * the list */ + * the list. Normally the loop is executed just once. But if setting the + * locale fails, inside the loop we add fallback trials to the array and so + * will execute the loop multiple times */ trial_locales[0] = setlocale_init; trial_locales_count = 1; for (i= 0; i < trial_locales_count; i++) { @@ -632,6 +923,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Note that this may change the locale, but we are going to do * that anyway just below */ system_default_locale = setlocale(LC_ALL, ""); + DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale); /* Skip if invalid or it's already on the list of locales to * try */ @@ -651,7 +943,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } #ifdef LC_ALL - if (! my_setlocale(LC_ALL, trial_locale)) { + sl_result = my_setlocale(LC_ALL, trial_locale); + DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result); + if (! sl_result) { setlocale_failure = TRUE; } else { @@ -669,31 +963,41 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE Safefree(curctype); - if (! (curctype = my_setlocale(LC_CTYPE, trial_locale))) + curctype = my_setlocale(LC_CTYPE, trial_locale); + DEBUG_LOCALE_INIT(LC_CTYPE, trial_locale, curctype); + if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE Safefree(curcoll); - if (! (curcoll = my_setlocale(LC_COLLATE, trial_locale))) + curcoll = my_setlocale(LC_COLLATE, trial_locale); + DEBUG_LOCALE_INIT(LC_COLLATE, trial_locale, curcoll); + if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC Safefree(curnum); - if (! (curnum = my_setlocale(LC_NUMERIC, trial_locale))) + curnum = my_setlocale(LC_NUMERIC, trial_locale); + DEBUG_LOCALE_INIT(LC_NUMERIC, trial_locale, curnum); + if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); #endif /* USE_LOCALE_NUMERIC */ #ifdef USE_LOCALE_MESSAGES - if (! (my_setlocale(LC_MESSAGES, trial_locale))) + sl_result = my_setlocale(LC_MESSAGES, trial_locale); + DEBUG_LOCALE_INIT(LC_MESSAGES, trial_locale, sl_result); + if (! (sl_result)) setlocale_failure = TRUE; #endif /* USE_LOCALE_MESSAGES */ #ifdef USE_LOCALE_MONETARY - if (! (my_setlocale(LC_MONETARY, trial_locale))) + sl_result = my_setlocale(LC_MONETARY, trial_locale); + DEBUG_LOCALE_INIT(LC_MONETARY, trial_locale, sl_result); + if (! (sl_result)) setlocale_failure = TRUE; #endif /* USE_LOCALE_MONETARY */ @@ -718,18 +1022,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PerlIO_printf(Perl_error_log, "perl: warning: Setting locale failed for the categories:\n\t"); -#ifdef USE_LOCALE_CTYPE +# ifdef USE_LOCALE_CTYPE if (! curctype) PerlIO_printf(Perl_error_log, "LC_CTYPE "); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE if (! curcoll) PerlIO_printf(Perl_error_log, "LC_COLLATE "); -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC if (! curnum) PerlIO_printf(Perl_error_log, "LC_NUMERIC "); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ PerlIO_printf(Perl_error_log, "and possibly others\n"); #endif /* LC_ALL */ @@ -778,7 +1082,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } /* Calculate what fallback locales to try. We have avoided this - * until we have to, becuase failure is quite unlikely. This will + * until we have to, because failure is quite unlikely. This will * usually change the upper bound of the loop we are in. * * Since the system's default way of setting the locale has not @@ -786,7 +1090,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * LANG, and the C locale. We don't try the same locale twice, so * don't add to the list if already there. (On POSIX systems, the * LC_ALL element will likely be a repeat of the 0th element "", - * but there's no harm done by doing it explicitly */ + * but there's no harm done by doing it explicitly. + * + * Note that this tries the LC_ALL environment variable even on + * systems which have no LC_ALL locale setting. This may or may + * not have been originally intentional, but there's no real need + * to change the behavior. */ if (lc_all) { for (j = 0; j < trial_locales_count; j++) { if (strEQ(lc_all, trial_locales[j])) { @@ -851,14 +1160,17 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifdef USE_LOCALE_CTYPE Safefree(curctype); curctype = savepv(setlocale(LC_CTYPE, NULL)); + DEBUG_LOCALE_INIT(LC_CTYPE, NULL, curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE Safefree(curcoll); curcoll = savepv(setlocale(LC_COLLATE, NULL)); + DEBUG_LOCALE_INIT(LC_COLLATE, NULL, curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC Safefree(curnum); curnum = savepv(setlocale(LC_NUMERIC, NULL)); + DEBUG_LOCALE_INIT(LC_NUMERIC, NULL, curnum); #endif /* USE_LOCALE_NUMERIC */ } @@ -933,6 +1245,13 @@ Perl_init_i18nl10n(pTHX_ int printwarn) Safefree(curnum); #endif /* USE_LOCALE_NUMERIC */ +#ifdef __GLIBC__ + Safefree(language); +#endif + + Safefree(lc_all); + Safefree(lang); + #else /* !USE_LOCALE */ PERL_UNUSED_ARG(printwarn); #endif /* USE_LOCALE */ @@ -948,6 +1267,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * differences. First, it handles embedded NULs. Second, it allocates * a bit more memory than needed for the transformed data itself. * The real transformed data begins at offset sizeof(collationix). + * *xlen is set to the length of that, and doesn't include the collation index + * size. * Please see sv_collxfrm() to see how this is used. */ @@ -964,23 +1285,36 @@ Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; Newx(xbuf, xAlloc, char); - if (! xbuf) + if (UNLIKELY(! xbuf)) goto bad; + /* Store the collation id */ *(U32*)xbuf = PL_collation_ix; xout = sizeof(PL_collation_ix); + + /* Then the transformation of the input. We loop until successful, or we + * give up */ for (xin = 0; xin < len; ) { Size_t xused; for (;;) { xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); - if (xused >= PERL_INT_MAX) - goto bad; + + /* If the transformed string occupies less space than we told + * strxfrm() was available, it means it successfully transformed + * the whole string. */ if ((STRLEN)xused < xAlloc - xout) break; + + if (UNLIKELY(xused >= PERL_INT_MAX)) + goto bad; + + /* Otherwise it should be that the transformation stopped in the + * middle because it ran out of space. Malloc more, and try again. + * */ xAlloc = (2 * xAlloc) + 1; Renew(xbuf, xAlloc, char); - if (! xbuf) + if (UNLIKELY(! xbuf)) goto bad; } @@ -1140,7 +1474,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) || wc != (wchar_t) 0x2010) { is_utf8 = FALSE; - DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", wc)); + DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", (unsigned int)wc)); DEBUG_L(PerlIO_printf(Perl_debug_log, "\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n", mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)), errno)); @@ -1211,7 +1545,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) lc = localeconv(); if (! lc || ! lc->currency_symbol - || is_ascii_string((U8 *) lc->currency_symbol, 0)) + || is_invariant_string((U8 *) lc->currency_symbol, 0)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); only_ascii = TRUE; @@ -1284,14 +1618,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* Here the current LC_TIME is set to the locale of the category * whose information is desired. Look at all the days of the week and - * month names, and the timezone and am/pm indicator for non-ASCII + * month names, and the timezone and am/pm indicator for UTF-8 variant * characters. The first such a one found will tell us if the locale * is UTF-8 or not */ for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */ formatted_time = my_strftime("%A %B %Z %p", 0, 0, hour, dom, month, 112, 0, 0, is_dst); - if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) { + if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) { /* Here, we didn't find a non-ASCII. Try the next time through * with the complemented dst and am/pm, and try with the next @@ -1392,7 +1726,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) break; } errmsg = savepv(errmsg); - if (! is_ascii_string((U8 *) errmsg, 0)) { + if (! is_invariant_string((U8 *) errmsg, 0)) { non_ascii = TRUE; is_utf8 = is_utf8_string((U8 *) errmsg, 0); break; @@ -1531,13 +1865,21 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) char * Perl_my_strerror(pTHX_ const int errnum) { + dVAR; /* Uses C locale for the error text unless within scope of 'use locale' for * LC_MESSAGES */ #ifdef USE_LOCALE_MESSAGES if (! IN_LC(LC_MESSAGES)) { - char * save_locale = setlocale(LC_MESSAGES, NULL); + char * save_locale; + + /* We have a critical section to prevent another thread from changing + * the locale out from under us (or zapping the buffer returned from + * setlocale() ) */ + LOCALE_LOCK; + + save_locale = setlocale(LC_MESSAGES, NULL); if (! isNAME_C_OR_POSIX(save_locale)) { char *errstr; @@ -1552,8 +1894,13 @@ Perl_my_strerror(pTHX_ const int errnum) { setlocale(LC_MESSAGES, save_locale); Safefree(save_locale); + + LOCALE_UNLOCK; + return errstr; } + + LOCALE_UNLOCK; } #endif @@ -1593,14 +1940,99 @@ Perl_sync_locale(pTHX) } +#if defined(DEBUGGING) && defined(USE_LOCALE) + +char * +Perl__setlocale_debug_string(const int category, /* category number, + like LC_ALL */ + const char* const locale, /* locale name */ + + /* return value from setlocale() when attempting to + * set 'category' to 'locale' */ + const char* const retval) +{ + /* Returns a pointer to a NUL-terminated string in static storage with + * added text about the info passed in. This is not thread safe and will + * be overwritten by the next call, so this should be used just to + * formulate a string to immediately print or savepv() on. */ + + /* initialise to a non-null value to keep it out of BSS and so keep + * -DPERL_GLOBAL_STRUCT_PRIVATE happy */ + static char ret[128] = "x"; + + my_strlcpy(ret, "setlocale(", sizeof(ret)); + + switch (category) { + default: + my_snprintf(ret, sizeof(ret), "%s? %d", ret, category); + break; +# ifdef LC_ALL + case LC_ALL: + my_strlcat(ret, "LC_ALL", sizeof(ret)); + break; +# endif +# ifdef LC_CTYPE + case LC_CTYPE: + my_strlcat(ret, "LC_CTYPE", sizeof(ret)); + break; +# endif +# ifdef LC_NUMERIC + case LC_NUMERIC: + my_strlcat(ret, "LC_NUMERIC", sizeof(ret)); + break; +# endif +# ifdef LC_COLLATE + case LC_COLLATE: + my_strlcat(ret, "LC_COLLATE", sizeof(ret)); + break; +# endif +# ifdef LC_TIME + case LC_TIME: + my_strlcat(ret, "LC_TIME", sizeof(ret)); + break; +# endif +# ifdef LC_MONETARY + case LC_MONETARY: + my_strlcat(ret, "LC_MONETARY", sizeof(ret)); + break; +# endif +# ifdef LC_MESSAGES + case LC_MESSAGES: + my_strlcat(ret, "LC_MESSAGES", sizeof(ret)); + break; +# endif + } + + my_strlcat(ret, ", ", sizeof(ret)); + + if (locale) { + my_strlcat(ret, "\"", sizeof(ret)); + my_strlcat(ret, locale, sizeof(ret)); + my_strlcat(ret, "\"", sizeof(ret)); + } + else { + my_strlcat(ret, "NULL", sizeof(ret)); + } + + my_strlcat(ret, ") returned ", sizeof(ret)); + + if (retval) { + my_strlcat(ret, "\"", sizeof(ret)); + my_strlcat(ret, retval, sizeof(ret)); + my_strlcat(ret, "\"", sizeof(ret)); + } + else { + my_strlcat(ret, "NULL", sizeof(ret)); + } + + assert(strlen(ret) < sizeof(ret)); + + return ret; +} + +#endif /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */