X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7bb8f702f9ddfc65ea019531908a111e0d4d8351..b8a2649a502a2760c6bc90d32f71a03c232eee81:/locale.c diff --git a/locale.c b/locale.c index 7653340..5d6566f 100644 --- a/locale.c +++ b/locale.c @@ -53,11 +53,14 @@ #ifdef I_WCHAR # include #endif +#ifdef I_WCTYPE +# include +#endif /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread * creation, so can be a file-level static */ -#if ! defined(DEBUGGING) || defined(PERL_GLOBAL_STRUCT) +#if ! defined(DEBUGGING) # define debug_initialization 0 # define DEBUG_INITIALIZATION_set(v) #else @@ -197,6 +200,12 @@ const int categories[] = { # ifdef USE_LOCALE_TELEPHONE LC_TELEPHONE, # endif +# ifdef USE_LOCALE_SYNTAX + LC_SYNTAX, +# endif +# ifdef USE_LOCALE_TOD + LC_TOD, +# endif # ifdef LC_ALL LC_ALL, # endif @@ -207,7 +216,7 @@ const int categories[] = { /* The top-most real element is LC_ALL */ -const char * category_names[] = { +const char * const category_names[] = { # ifdef USE_LOCALE_NUMERIC "LC_NUMERIC", @@ -242,6 +251,12 @@ const char * category_names[] = { # ifdef USE_LOCALE_TELEPHONE "LC_TELEPHONE", # endif +# ifdef USE_LOCALE_SYNTAX + "LC_SYNTAX", +# endif +# ifdef USE_LOCALE_TOD + "LC_TOD", +# endif # ifdef LC_ALL "LC_ALL", # endif @@ -331,7 +346,7 @@ S_category_name(const int category) # define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 # define _DUMMY_COLLATE LC_COLLATE_INDEX # else -# define _DUMMY_COLLATE _DUMMY_COLLATE +# define _DUMMY_COLLATE _DUMMY_CTYPE # endif # ifdef USE_LOCALE_TIME # define LC_TIME_INDEX _DUMMY_COLLATE + 1 @@ -381,8 +396,20 @@ S_category_name(const int category) # else # define _DUMMY_TELEPHONE _DUMMY_PAPER # endif +# ifdef USE_LOCALE_SYNTAX +# define LC_SYNTAX_INDEX _DUMMY_TELEPHONE + 1 +# define _DUMMY_SYNTAX LC_SYNTAX_INDEX +# else +# define _DUMMY_SYNTAX _DUMMY_TELEPHONE +# endif +# ifdef USE_LOCALE_TOD +# define LC_TOD_INDEX _DUMMY_SYNTAX + 1 +# define _DUMMY_TOD LC_TOD_INDEX +# else +# define _DUMMY_TOD _DUMMY_SYNTAX +# endif # ifdef LC_ALL -# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1 +# define LC_ALL_INDEX _DUMMY_TOD + 1 # endif #endif /* ifdef USE_LOCALE */ @@ -399,6 +426,7 @@ S_category_name(const int category) * known at compile time; "do_setlocale_r", not known until run time */ # define do_setlocale_c(cat, locale) my_setlocale(cat, locale) # define do_setlocale_r(cat, locale) my_setlocale(cat, locale) +# define FIX_GLIBC_LC_MESSAGES_BUG(i) #else /* Below uses POSIX 2008 */ @@ -412,6 +440,22 @@ S_category_name(const int category) emulate_setlocale(cat, locale, cat ## _INDEX, TRUE) # define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE) +# if ! defined(__GLIBC__) || ! defined(USE_LOCALE_MESSAGES) + +# define FIX_GLIBC_LC_MESSAGES_BUG(i) + +# else /* Invalidate glibc cache of loaded translations, see [perl #134264] */ + +# include +# define FIX_GLIBC_LC_MESSAGES_BUG(i) \ + STMT_START { \ + if ((i) == LC_MESSAGES_INDEX) { \ + textdomain(textdomain(NULL)); \ + } \ + } STMT_END + +# endif + /* A third array, parallel to the ones above to map from category to its * equivalent mask */ const int category_masks[] = { @@ -448,6 +492,12 @@ const int category_masks[] = { # ifdef USE_LOCALE_TELEPHONE LC_TELEPHONE_MASK, # endif +# ifdef USE_LOCALE_SYNTAX + LC_SYNTAX_MASK, +# endif +# ifdef USE_LOCALE_TOD + LC_TOD_MASK, +# endif /* LC_ALL can't be turned off by a Configure * option, and in Posix 2008, should always be * here, so compile it in unconditionally. @@ -586,12 +636,15 @@ S_emulate_setlocale(const int category, /* If this assert fails, adjust the size of curlocales in intrpvar.h */ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); -# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) - +# if defined(_NL_LOCALE_NAME) \ + && defined(DEBUGGING) \ + && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME) + /* On systems that accept any locale name, the real underlying locale + * is often returned by this internal function, so we can't use it */ { /* Internal glibc for querylocale(), but doesn't handle * empty-string ("") locale properly; who knows what other - * glitches. Check it for now, under debug. */ + * glitches. Check for it now, under debug. */ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category), uselocale((locale_t) 0)); @@ -733,7 +786,7 @@ S_emulate_setlocale(const int category, # endif - } + } /* End of this being setlocale(LC_foo, NULL) */ /* Here, we are switching locales. */ @@ -763,24 +816,11 @@ S_emulate_setlocale(const int category, const char * default_name; - /* To minimize other threads messing with the environment, we copy - * the variable, making it a temporary. But this doesn't work upon - * program initialization before any scopes are created, and at - * this time, there's nothing else going on that would interfere. - * So skip the copy in that case */ - if (PL_scopestack_ix == 0) { - default_name = PerlEnv_getenv("LANG"); - } - else { - default_name = savepv(PerlEnv_getenv("LANG")); - } + default_name = PerlEnv_getenv("LANG"); if (! default_name || strEQ(default_name, "")) { default_name = "C"; } - else if (PL_scopestack_ix != 0) { - SAVEFREEPV(default_name); - } if (category != LC_ALL) { const char * const name = PerlEnv_getenv(category_names[index]); @@ -815,22 +855,19 @@ S_emulate_setlocale(const int category, for (i = 0; i < LC_ALL_INDEX; i++) { const char * const env_override - = savepv(PerlEnv_getenv(category_names[i])); + = PerlEnv_getenv(category_names[i]); const char * this_locale = ( env_override && strNE(env_override, "")) ? env_override : default_name; if (! emulate_setlocale(categories[i], this_locale, i, TRUE)) { - Safefree(env_override); return NULL; } if (strNE(this_locale, default_name)) { did_override = TRUE; } - - Safefree(env_override); } /* If all the categories are the same, we can set LC_ALL to @@ -852,7 +889,7 @@ S_emulate_setlocale(const int category, } } } - } + } /* End of this being setlocale(LC_foo, "") */ else if (strchr(locale, ';')) { /* LC_ALL may actually incude a conglomeration of various categories. @@ -949,7 +986,8 @@ S_emulate_setlocale(const int category, assert(category == LC_ALL); return do_setlocale_c(LC_ALL, NULL); - } + } /* End of this being setlocale(LC_ALL, + "LC_CTYPE=foo;LC_NUMERIC=bar;...") */ ready_to_set: ; @@ -996,89 +1034,133 @@ S_emulate_setlocale(const int category, # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, PL_C_locale_obj); } # endif - /* If we weren't in a thread safe locale, set so that newlocale() below - which uses 'old_obj', uses an empty one. Same for our reserved C object. - The latter is defensive coding, so that, even if there is some bug, we - will never end up trying to modify either of these, as if passed to - newlocale(), they can be. */ - if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { - old_obj = (locale_t) 0; - } - - /* Ready to create a new locale by modification of the exising one */ - new_obj = newlocale(mask, locale, old_obj); - - if (! new_obj) { - dSAVE_ERRNO; + /* If this call is to switch to the LC_ALL C locale, it already exists, and + * in fact, we already have switched to it (in preparation for what + * normally is to come). But since we're already there, continue to use + * it instead of trying to create a new locale */ + if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: will stay in C object\n", __FILE__, __LINE__); } # endif - if (! uselocale(old_obj)) { + new_obj = PL_C_locale_obj; + + /* We already had switched to the C locale in preparation for freeing + * 'old_obj' */ + if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) { + freelocale(old_obj); + } + } + else { + /* If we weren't in a thread safe locale, set so that newlocale() below + * which uses 'old_obj', uses an empty one. Same for our reserved C + * object. The latter is defensive coding, so that, even if there is + * some bug, we will never end up trying to modify either of these, as + * if passed to newlocale(), they can be. */ + if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { + old_obj = (locale_t) 0; + } + + /* Ready to create a new locale by modification of the exising one */ + new_obj = newlocale(mask, locale, old_obj); + + if (! new_obj) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale creating new object" + " failed: %d\n", __FILE__, __LINE__, GET_ERRNO); } # endif - } - RESTORE_ERRNO; - return NULL; - } + if (! uselocale(old_obj)) { # ifdef DEBUGGING - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p; should have freed %p\n", __FILE__, __LINE__, new_obj, old_obj); - } + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } # endif - /* And switch into it */ - if (! uselocale(new_obj)) { - dSAVE_ERRNO; + } + RESTORE_ERRNO; + return NULL; + } # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale created %p", + __FILE__, __LINE__, new_obj); + if (old_obj) { + PerlIO_printf(Perl_debug_log, + "; should have freed %p", old_obj); + } + PerlIO_printf(Perl_debug_log, "\n"); } # endif - if (! uselocale(old_obj)) { + /* And switch into it */ + if (! uselocale(new_obj)) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale switching to new object" + " failed\n", __FILE__, __LINE__); } # endif + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + freelocale(new_obj); + RESTORE_ERRNO; + return NULL; } - freelocale(new_obj); - RESTORE_ERRNO; - return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, new_obj); } # endif @@ -1110,6 +1192,8 @@ S_emulate_setlocale(const int category, Safefree(PL_curlocales[i]); PL_curlocales[i] = savepv(locale); } + + FIX_GLIBC_LC_MESSAGES_BUG(LC_MESSAGES_INDEX); } else { @@ -1124,6 +1208,8 @@ S_emulate_setlocale(const int category, /* Then update the category's record */ Safefree(PL_curlocales[index]); PL_curlocales[index] = savepv(locale); + + FIX_GLIBC_LC_MESSAGES_BUG(index); } # endif @@ -1190,7 +1276,7 @@ S_locking_setlocale(pTHX_ * ones. This is because as described earlier. If we know on input the * index corresponding to the category into the array where we store the * current locales, we don't have to calculate it. If the caller knows at - * compile time what the index is, it it can pass it, setting + * compile time what the index is, it can pass it, setting * 'is_index_valid' to TRUE; otherwise the index parameter is ignored. * */ @@ -1264,6 +1350,7 @@ S_locking_setlocale(pTHX_ } #endif +#ifdef USE_LOCALE STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) @@ -1299,6 +1386,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } # endif +#else + + PERL_UNUSED_ARG(use_locale); + #endif /* USE_LOCALE_NUMERIC and can find the radix char */ } @@ -1481,7 +1572,6 @@ S_new_ctype(pTHX_ const char *newctype) #ifndef USE_LOCALE_CTYPE - PERL_ARGS_ASSERT_NEW_CTYPE; PERL_UNUSED_ARG(newctype); PERL_UNUSED_CONTEXT; @@ -1498,11 +1588,11 @@ S_new_ctype(pTHX_ const char *newctype) * this function should be called directly only from this file and from * POSIX::setlocale() */ - dVAR; unsigned int i; /* Don't check for problems if we are suppressing the warnings */ bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST); + bool maybe_utf8_turkic = FALSE; PERL_ARGS_ASSERT_NEW_CTYPE; @@ -1519,6 +1609,23 @@ S_new_ctype(pTHX_ const char *newctype) * handle this specially because of the three problematic code points */ if (PL_in_utf8_CTYPE_locale) { Copy(PL_fold_latin1, PL_fold_locale, 256, U8); + + /* UTF-8 locales can have special handling for 'I' and 'i' if they are + * Turkic. Make sure these two are the only anomalies. (We don't use + * towupper and towlower because they aren't in C89.) */ + +#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) + + if (towupper('i') == 0x130 && towlower('I') == 0x131) { + +#else + + if (toupper('i') == 'i' && tolower('I') == 'I') { + +#endif + check_for_problems = TRUE; + maybe_utf8_turkic = TRUE; + } } /* We don't populate the other lists if a UTF-8 locale, but do check that @@ -1635,13 +1742,13 @@ S_new_ctype(pTHX_ const char *newctype) "isxdigit('%s') unexpectedly is %d\n", name, cBOOL(isxdigit(i)))); } - if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) { + if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) { is_bad = TRUE; DEBUG_L(PerlIO_printf(Perl_debug_log, "tolower('%s')=0x%x instead of the expected 0x%x\n", name, tolower(i), (int) toLOWER_A(i))); } - if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) { + if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) { is_bad = TRUE; DEBUG_L(PerlIO_printf(Perl_debug_log, "toupper('%s')=0x%x instead of the expected 0x%x\n", @@ -1664,6 +1771,19 @@ S_new_ctype(pTHX_ const char *newctype) } } + if (bad_count == 2 && maybe_utf8_turkic) { + bad_count = 0; + *bad_chars_list = '\0'; + PL_fold_locale['I'] = 'I'; + PL_fold_locale['i'] = 'i'; + PL_in_utf8_turkic_locale = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n", + __FILE__, __LINE__, newctype)); + } + else { + PL_in_utf8_turkic_locale = FALSE; + } + # ifdef MB_CUR_MAX /* We only handle single-byte locales (outside of UTF-8 ones; so if @@ -1689,7 +1809,10 @@ S_new_ctype(pTHX_ const char *newctype) # endif - if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) { + /* If we found problems and we want them output, do so */ + if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) + && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST))) + { if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) { PL_warn_locale = Perl_newSVpvf(aTHX_ "Locale '%s' contains (at least) the following characters" @@ -1994,8 +2117,61 @@ S_new_collate(pTHX_ const char *newcoll) } +#endif + #ifdef WIN32 +#define USE_WSETLOCALE + +#ifdef USE_WSETLOCALE + +STATIC char * +S_wrap_wsetlocale(pTHX_ int category, const char *locale) { + wchar_t *wlocale; + wchar_t *wresult; + char *result; + + if (locale) { + int req_size = + MultiByteToWideChar(CP_UTF8, 0, locale, -1, NULL, 0); + + if (!req_size) { + errno = EINVAL; + return NULL; + } + + Newx(wlocale, req_size, wchar_t); + if (!MultiByteToWideChar(CP_UTF8, 0, locale, -1, wlocale, req_size)) { + Safefree(wlocale); + errno = EINVAL; + return NULL; + } + } + else { + wlocale = NULL; + } + wresult = _wsetlocale(category, wlocale); + Safefree(wlocale); + if (wresult) { + int req_size = + WideCharToMultiByte(CP_UTF8, 0, wresult, -1, NULL, 0, NULL, NULL); + Newx(result, req_size, char); + SAVEFREEPV(result); /* is there something better we can do here? */ + if (!WideCharToMultiByte(CP_UTF8, 0, wresult, -1, + result, req_size, NULL, NULL)) { + errno = EINVAL; + return NULL; + } + } + else { + result = NULL; + } + + return result; +} + +#endif + STATIC char * S_win32_setlocale(pTHX_ int category, const char* locale) { @@ -2053,7 +2229,11 @@ S_win32_setlocale(pTHX_ int category, const char* locale) } +#ifdef USE_WSETLOCALE + result = S_wrap_wsetlocale(aTHX_ category, locale); +#else result = setlocale(category, locale); +#endif DEBUG_L(STMT_START { dSAVE_ERRNO; PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, @@ -2074,7 +2254,11 @@ S_win32_setlocale(pTHX_ int category, const char* locale) for (i = 0; i < LC_ALL_INDEX; i++) { result = PerlEnv_getenv(category_names[i]); if (result && strNE(result, "")) { +#ifdef USE_WSETLOCALE + S_wrap_wsetlocale(aTHX_ categories[i], result); +#else setlocale(categories[i], result); +#endif DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(categories[i], result, "not captured"))); @@ -2097,7 +2281,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) /* -=head1 Locale-related functions and macros +=for apidoc_section Locales =for apidoc Perl_setlocale @@ -2139,11 +2323,20 @@ Perl_setlocale(const int category, const char * locale) { /* This wraps POSIX::setlocale() */ +#ifndef USE_LOCALE + + PERL_UNUSED_ARG(category); + PERL_UNUSED_ARG(locale); + + return "C"; + +#else + const char * retval; const char * newlocale; dSAVEDERRNO; - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; dTHX; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; #ifdef USE_LOCALE_NUMERIC @@ -2262,6 +2455,8 @@ Perl_setlocale(const int category, const char * locale) return retval; +#endif + } PERL_STATIC_INLINE const char * @@ -2414,13 +2609,16 @@ S_my_nl_langinfo(const int item, bool toggle) dTHX; const char * retval; +#ifdef USE_LOCALE_NUMERIC + /* We only need to toggle into the underlying LC_NUMERIC locale for these * two items, and only if not already there */ if (toggle && (( item != RADIXCHAR && item != THOUSEP) || PL_numeric_underlying)) - { + +#endif /* No toggling needed if not using LC_NUMERIC */ + toggle = FALSE; - } #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ # if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \ @@ -2468,6 +2666,8 @@ S_my_nl_langinfo(const int item, bool toggle) do_free = TRUE; } +# ifdef USE_LOCALE_NUMERIC + if (toggle) { if (PL_underlying_numeric_obj) { cur = PL_underlying_numeric_obj; @@ -2478,6 +2678,8 @@ S_my_nl_langinfo(const int item, bool toggle) } } +# endif + /* We have to save it to a buffer, because the freelocale() just below * can invalidate the internal one */ retval = save_to_buffer(nl_langinfo_l(item, cur), @@ -3113,6 +3315,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * values for our db, instead of trying to change them. * */ + int ok = 1; #ifndef USE_LOCALE @@ -3122,7 +3325,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #else /* USE_LOCALE */ # ifdef __GLIBC__ - const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); + const char * const language = PerlEnv_getenv("LANGUAGE"); # endif @@ -3132,8 +3335,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) : ""; const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */ unsigned int trial_locales_count; - const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL")); - const char * const lang = savepv(PerlEnv_getenv("LANG")); + const char * const lc_all = PerlEnv_getenv("LC_ALL"); + const char * const lang = PerlEnv_getenv("LANG"); bool setlocale_failure = FALSE; unsigned int i; @@ -3263,6 +3466,20 @@ Perl_init_i18nl10n(pTHX_ int printwarn) assert(category_masks[LC_TELEPHONE_INDEX] == LC_TELEPHONE_MASK); # endif # endif +# ifdef USE_LOCALE_SYNTAX + assert(categories[LC_SYNTAX_INDEX] == LC_SYNTAX); + assert(strEQ(category_names[LC_SYNTAX_INDEX], "LC_SYNTAX")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_SYNTAX_INDEX] == LC_SYNTAX_MASK); +# endif +# endif +# ifdef USE_LOCALE_TOD + assert(categories[LC_TOD_INDEX] == LC_TOD); + assert(strEQ(category_names[LC_TOD_INDEX], "LC_TOD")); +# ifdef USE_POSIX_2008_LOCALE + assert(category_masks[LC_TOD_INDEX] == LC_TOD_MASK); +# endif +# endif # ifdef LC_ALL assert(categories[LC_ALL_INDEX] == LC_ALL); assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL")); @@ -3273,6 +3490,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif # endif /* DEBUGGING */ + /* Initialize the per-thread mbrFOO() state variables. See POSIX.xs for + * why these particular incantations are used. */ +#ifdef HAS_MBRLEN + memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps)); +#endif +#ifdef HAS_MBRTOWC + memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps)); +#endif +#ifdef HAS_WCTOMBR + wcrtomb(NULL, L'\0', &PL_wcrtomb_ps); +#endif + /* Initialize the cache of the program's UTF-8ness for the always known * locales C and POSIX */ my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness, @@ -3298,8 +3527,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif +# ifdef USE_LOCALE_NUMERIC + PL_numeric_radix_sv = newSVpvs("."); +# endif + # if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE) /* Initialize our records. If we have POSIX 2008, we have LC_ALL */ @@ -3705,15 +3938,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } # endif -# ifdef __GLIBC__ - - Safefree(language); - -# endif - - Safefree(lc_all); - Safefree(lang); - #endif /* USE_LOCALE */ #ifdef DEBUGGING @@ -3839,6 +4063,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, cur_min_x + COLLXFRM_HDR_LEN)) { PL_strxfrm_NUL_replacement = j; + Safefree(cur_min_x); cur_min_x = x; } else { @@ -3994,6 +4219,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, cur_max_x + COLLXFRM_HDR_LEN)) { PL_strxfrm_max_cp = j; + Safefree(cur_max_x); cur_max_x = x; } else { @@ -4235,11 +4461,6 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, return xbuf; bad: - Safefree(xbuf); - if (s != input_string) { - Safefree(s); - } - *xlen = 0; # ifdef DEBUGGING @@ -4249,6 +4470,12 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, # endif + Safefree(xbuf); + if (s != input_string) { + Safefree(s); + } + *xlen = 0; + return NULL; } @@ -4279,6 +4506,11 @@ S_print_collxfrm_input_and_return(pTHX_ PerlIO_printf(Perl_debug_log, "'\n"); } +# endif /* DEBUGGING */ +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE +# ifdef DEBUGGING + STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, @@ -4315,9 +4547,6 @@ S_print_bytes_for_locale(pTHX_ } # endif /* #ifdef DEBUGGING */ -#endif /* USE_LOCALE_COLLATE */ - -#ifdef USE_LOCALE STATIC const char * S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale) @@ -4361,7 +4590,7 @@ S_switch_category_locale_to_template(pTHX_ const int switch_category, const int Safefree(restore_to_locale); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale unchanged as %s\n", - category_name(switch_category), restore_to_locale)); + category_name(switch_category), template_locale)); return NULL; } @@ -4401,6 +4630,9 @@ S_restore_switched_locale(pTHX_ const int category, const char * const original_ Safefree(original_locale); } +/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */ +#define CUR_LC_BUFFER_SIZE 64 + bool Perl__is_cur_LC_category_utf8(pTHX_ int category) { @@ -4438,6 +4670,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) the name in the cache */ char * delimited; /* The name plus the delimiters used to store it in the cache */ + char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */ char * name_pos; /* position of 'delimited' in the cache, or 0 if not there */ @@ -4466,9 +4699,15 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * utf8ness digit */ input_name_len_with_overhead = input_name_len + 3; - /* Allocate and populate space for a copy of the name surrounded by the - * delimiters */ - Newx(delimited, input_name_len_with_overhead, char); + if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) { + /* we can use the buffer, avoid a malloc */ + delimited = buffer; + } else { /* need a malloc */ + /* Allocate and populate space for a copy of the name surrounded by the + * delimiters */ + Newx(delimited, input_name_len_with_overhead, char); + } + delimited[0] = UTF8NESS_SEP[0]; Copy(save_input_locale, delimited + 1, input_name_len, char); delimited[input_name_len+1] = UTF8NESS_PREFIX[0]; @@ -4502,7 +4741,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; } - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4625,11 +4865,12 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) && wc == (wchar_t) UNICODE_REPLACEMENT); } +# endif + restore_switched_locale(LC_CTYPE, original_ctype_locale); goto finish_and_return; } -# endif # else /* Here, we must have a C89 compiler that doesn't have mbtowc(). Next @@ -4861,9 +5102,9 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) is_utf8 = TRUE; goto finish_and_return; } - } # endif + } # endif /* Other common encodings are the ISO 8859 series, which aren't UTF-8. But @@ -4927,9 +5168,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char); utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; - if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] - & (PERL_UINTMAX_T) ~1) != '0') - { + if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] & ~1) != '0') { Perl_croak(aTHX_ "panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%zu," " inserted_name=%s, its_len=%zu\n", @@ -4959,6 +5198,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) s++; e = strchr(s, UTF8NESS_PREFIX[0]); if (! e) { + e = PL_locale_utf8ness + strlen(PL_locale_utf8ness); Perl_croak(aTHX_ "panic: %s: %d: Corrupt utf8ness_cache: missing" " separator %.*s<-- HERE %s\n", @@ -4996,7 +5236,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # endif - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -5006,22 +5247,21 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) bool Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) { - dVAR; /* Internal function which returns if we are in the scope of a pragma that * enables the locale category 'category'. 'compiling' should indicate if * this is during the compilation phase (TRUE) or not (FALSE). */ const COP * const cop = (compiling) ? &PL_compiling : PL_curcop; - SV *categories = cop_hints_fetch_pvs(cop, "locale", 0); - if (! categories || categories == &PL_sv_placeholder) { + SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0); + if (! these_categories || these_categories == &PL_sv_placeholder) { return FALSE; } /* The pseudo-category 'not_characters' is -1, so just add 1 to each to get * a valid unsigned */ assert(category >= -1); - return cBOOL(SvUV(categories) & (1U << (category + 1))); + return cBOOL(SvUV(these_categories) & (1U << (category + 1))); } char * @@ -5037,7 +5277,6 @@ Perl_my_strerror(pTHX_ const int errnum) * to the C locale */ char *errstr; - dVAR; #ifndef USE_LOCALE_MESSAGES @@ -5065,9 +5304,9 @@ Perl_my_strerror(pTHX_ const int errnum) Safefree(save_locale); } -# elif defined(HAS_POSIX_2008_LOCALE) \ - && defined(HAS_STRERROR_L) \ - && defined(HAS_DUPLOCALE) +# elif defined(USE_POSIX_2008_LOCALE) \ + && defined(HAS_STRERROR_L) \ + && defined(HAS_DUPLOCALE) /* This function is also trivial if we don't have to worry about thread * safety and have strerror_l(), as it handles the switch of locales so we @@ -5169,9 +5408,7 @@ Perl_my_strerror(pTHX_ const int errnum) LOCALE_UNLOCK; # endif /* End of doesn't have strerror_l */ -#endif /* End of does have locale messages */ - -#ifdef DEBUGGING +# ifdef DEBUGGING if (DEBUG_Lv_TEST) { PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '"); @@ -5179,7 +5416,8 @@ Perl_my_strerror(pTHX_ const int errnum) PerlIO_printf(Perl_debug_log, "'\n"); } -#endif +# endif +#endif /* End of does have locale messages */ SAVEFREEPV(errstr); return errstr; @@ -5189,7 +5427,7 @@ Perl_my_strerror(pTHX_ const int errnum) =for apidoc switch_to_global_locale -On systems without locale support, or on single-threaded builds, or on +On systems without locale support, or on typical single-threaded builds, or on platforms that do not support per-thread locale operations, this function does nothing. On such systems that do have locale support, only a locale global to the whole program is available. @@ -5301,10 +5539,17 @@ L|perlapi/switch_to_global_locale>. bool Perl_sync_locale() { + +#ifndef USE_LOCALE + + return TRUE; + +#else + const char * newlocale; dTHX; -#ifdef USE_POSIX_2008_LOCALE +# ifdef USE_POSIX_2008_LOCALE bool was_in_global_locale = FALSE; locale_t cur_obj = uselocale((locale_t) 0); @@ -5316,11 +5561,11 @@ Perl_sync_locale() * will affect the */ if (cur_obj == LC_GLOBAL_LOCALE) { -# ifdef HAS_QUERY_LOCALE +# ifdef HAS_QUERY_LOCALE do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL)); -# else +# else unsigned int i; @@ -5330,17 +5575,17 @@ Perl_sync_locale() do_setlocale_r(categories[i], setlocale(categories[i], NULL)); } -# endif +# endif was_in_global_locale = TRUE; } -#else +# else bool was_in_global_locale = TRUE; -#endif -#ifdef USE_LOCALE_CTYPE +# endif +# ifdef USE_LOCALE_CTYPE newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5349,8 +5594,8 @@ Perl_sync_locale() new_ctype(newlocale); Safefree(newlocale); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5359,8 +5604,8 @@ Perl_sync_locale() new_collate(newlocale); Safefree(newlocale); -#endif -#ifdef USE_LOCALE_NUMERIC +# endif +# ifdef USE_LOCALE_NUMERIC newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5369,9 +5614,12 @@ Perl_sync_locale() new_numeric(newlocale); Safefree(newlocale); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ return was_in_global_locale; + +#endif + } #if defined(DEBUGGING) && defined(USE_LOCALE) @@ -5390,11 +5638,7 @@ S_setlocale_debug_string(const int category, /* category number, * be overwritten by the next call, so this should be used just to * formulate a string to immediately print or savepv() on. */ - /* initialise to a non-null value to keep it out of BSS and so keep - * -DPERL_GLOBAL_STRUCT_PRIVATE happy */ - static char ret[256] = "If you can read this, thank your buggy C" - " library strlcpy(), and change your hints file" - " to undef it"; + static char ret[256]; my_strlcpy(ret, "setlocale(", sizeof(ret)); my_strlcat(ret, category_name(category), sizeof(ret)); @@ -5470,7 +5714,7 @@ Perl_thread_locale_term() { /* Free up */ locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE); - if (cur_obj != LC_GLOBAL_LOCALE) { + if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) { freelocale(cur_obj); } }