X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7bb8f702f9ddfc65ea019531908a111e0d4d8351..8e9f3eef1e2836fe28e16465c5a7bb39af947d81:/locale.c diff --git a/locale.c b/locale.c index 7653340..81aa00e 100644 --- a/locale.c +++ b/locale.c @@ -53,6 +53,9 @@ #ifdef I_WCHAR # include #endif +#ifdef I_WCTYPE +# include +#endif /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread @@ -207,7 +210,7 @@ const int categories[] = { /* The top-most real element is LC_ALL */ -const char * category_names[] = { +const char * const category_names[] = { # ifdef USE_LOCALE_NUMERIC "LC_NUMERIC", @@ -331,7 +334,7 @@ S_category_name(const int category) # define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 # define _DUMMY_COLLATE LC_COLLATE_INDEX # else -# define _DUMMY_COLLATE _DUMMY_COLLATE +# define _DUMMY_COLLATE _DUMMY_CTYPE # endif # ifdef USE_LOCALE_TIME # define LC_TIME_INDEX _DUMMY_COLLATE + 1 @@ -586,12 +589,15 @@ S_emulate_setlocale(const int category, /* If this assert fails, adjust the size of curlocales in intrpvar.h */ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); -# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) - +# if defined(_NL_LOCALE_NAME) \ + && defined(DEBUGGING) \ + && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME) + /* On systems that accept any locale name, the real underlying locale + * is often returned by this internal function, so we can't use it */ { /* Internal glibc for querylocale(), but doesn't handle * empty-string ("") locale properly; who knows what other - * glitches. Check it for now, under debug. */ + * glitches. Check for it now, under debug. */ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category), uselocale((locale_t) 0)); @@ -996,89 +1002,131 @@ S_emulate_setlocale(const int category, # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, PL_C_locale_obj); } # endif - /* If we weren't in a thread safe locale, set so that newlocale() below - which uses 'old_obj', uses an empty one. Same for our reserved C object. - The latter is defensive coding, so that, even if there is some bug, we - will never end up trying to modify either of these, as if passed to - newlocale(), they can be. */ - if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { - old_obj = (locale_t) 0; - } - - /* Ready to create a new locale by modification of the exising one */ - new_obj = newlocale(mask, locale, old_obj); - - if (! new_obj) { - dSAVE_ERRNO; + /* If we are switching to the LC_ALL C locale, it already exists. Use + * it instead of trying to create a new locale */ + if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: will stay in C object\n", __FILE__, __LINE__); } # endif - if (! uselocale(old_obj)) { + new_obj = PL_C_locale_obj; + + /* We already had switched to the C locale in preparation for freeing + * 'old_obj' */ + if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) { + freelocale(old_obj); + } + } + else { + /* If we weren't in a thread safe locale, set so that newlocale() below + * which uses 'old_obj', uses an empty one. Same for our reserved C + * object. The latter is defensive coding, so that, even if there is + * some bug, we will never end up trying to modify either of these, as + * if passed to newlocale(), they can be. */ + if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { + old_obj = (locale_t) 0; + } + + /* Ready to create a new locale by modification of the exising one */ + new_obj = newlocale(mask, locale, old_obj); + + if (! new_obj) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale creating new object" + " failed: %d\n", __FILE__, __LINE__, GET_ERRNO); } # endif - } - RESTORE_ERRNO; - return NULL; - } + if (! uselocale(old_obj)) { # ifdef DEBUGGING - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p; should have freed %p\n", __FILE__, __LINE__, new_obj, old_obj); - } + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } # endif - /* And switch into it */ - if (! uselocale(new_obj)) { - dSAVE_ERRNO; + } + RESTORE_ERRNO; + return NULL; + } # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale created %p", + __FILE__, __LINE__, new_obj); + if (old_obj) { + PerlIO_printf(Perl_debug_log, + "; should have freed %p", old_obj); + } + PerlIO_printf(Perl_debug_log, "\n"); } # endif - if (! uselocale(old_obj)) { + /* And switch into it */ + if (! uselocale(new_obj)) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale switching to new object" + " failed\n", __FILE__, __LINE__); } # endif + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + freelocale(new_obj); + RESTORE_ERRNO; + return NULL; } - freelocale(new_obj); - RESTORE_ERRNO; - return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, new_obj); } # endif @@ -1264,6 +1312,7 @@ S_locking_setlocale(pTHX_ } #endif +#ifdef USE_LOCALE STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) @@ -1299,6 +1348,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } # endif +#else + + PERL_UNUSED_ARG(use_locale); + #endif /* USE_LOCALE_NUMERIC and can find the radix char */ } @@ -1481,7 +1534,6 @@ S_new_ctype(pTHX_ const char *newctype) #ifndef USE_LOCALE_CTYPE - PERL_ARGS_ASSERT_NEW_CTYPE; PERL_UNUSED_ARG(newctype); PERL_UNUSED_CONTEXT; @@ -1503,6 +1555,7 @@ S_new_ctype(pTHX_ const char *newctype) /* Don't check for problems if we are suppressing the warnings */ bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST); + bool maybe_utf8_turkic = FALSE; PERL_ARGS_ASSERT_NEW_CTYPE; @@ -1519,6 +1572,23 @@ S_new_ctype(pTHX_ const char *newctype) * handle this specially because of the three problematic code points */ if (PL_in_utf8_CTYPE_locale) { Copy(PL_fold_latin1, PL_fold_locale, 256, U8); + + /* UTF-8 locales can have special handling for 'I' and 'i' if they are + * Turkic. Make sure these two are the only anomalies. (We don't use + * towupper and towlower because they aren't in C89.) */ + +#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) + + if (towupper('i') == 0x130 && towlower('I') == 0x131) { + +#else + + if (toupper('i') == 'i' && tolower('I') == 'I') { + +#endif + check_for_problems = TRUE; + maybe_utf8_turkic = TRUE; + } } /* We don't populate the other lists if a UTF-8 locale, but do check that @@ -1664,6 +1734,19 @@ S_new_ctype(pTHX_ const char *newctype) } } + if (bad_count == 2 && maybe_utf8_turkic) { + bad_count = 0; + *bad_chars_list = '\0'; + PL_fold_locale['I'] = 'I'; + PL_fold_locale['i'] = 'i'; + PL_in_utf8_turkic_locale = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n", + __FILE__, __LINE__, newctype)); + } + else { + PL_in_utf8_turkic_locale = FALSE; + } + # ifdef MB_CUR_MAX /* We only handle single-byte locales (outside of UTF-8 ones; so if @@ -1689,7 +1772,10 @@ S_new_ctype(pTHX_ const char *newctype) # endif - if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) { + /* If we found problems and we want them output, do so */ + if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) + && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST))) + { if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) { PL_warn_locale = Perl_newSVpvf(aTHX_ "Locale '%s' contains (at least) the following characters" @@ -1994,6 +2080,8 @@ S_new_collate(pTHX_ const char *newcoll) } +#endif + #ifdef WIN32 STATIC char * @@ -2139,11 +2227,20 @@ Perl_setlocale(const int category, const char * locale) { /* This wraps POSIX::setlocale() */ +#ifdef NO_LOCALE + + PERL_UNUSED_ARG(category); + PERL_UNUSED_ARG(locale); + + return "C"; + +#else + const char * retval; const char * newlocale; dSAVEDERRNO; - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; dTHX; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; #ifdef USE_LOCALE_NUMERIC @@ -2262,6 +2359,8 @@ Perl_setlocale(const int category, const char * locale) return retval; +#endif + } PERL_STATIC_INLINE const char * @@ -2414,13 +2513,16 @@ S_my_nl_langinfo(const int item, bool toggle) dTHX; const char * retval; +#ifdef USE_LOCALE_NUMERIC + /* We only need to toggle into the underlying LC_NUMERIC locale for these * two items, and only if not already there */ if (toggle && (( item != RADIXCHAR && item != THOUSEP) || PL_numeric_underlying)) - { + +#endif /* No toggling needed if not using LC_NUMERIC */ + toggle = FALSE; - } #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ # if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \ @@ -2468,6 +2570,8 @@ S_my_nl_langinfo(const int item, bool toggle) do_free = TRUE; } +# ifdef USE_LOCALE_NUMERIC + if (toggle) { if (PL_underlying_numeric_obj) { cur = PL_underlying_numeric_obj; @@ -2478,6 +2582,8 @@ S_my_nl_langinfo(const int item, bool toggle) } } +# endif + /* We have to save it to a buffer, because the freelocale() just below * can invalidate the internal one */ retval = save_to_buffer(nl_langinfo_l(item, cur), @@ -3113,6 +3219,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * values for our db, instead of trying to change them. * */ + dVAR; + int ok = 1; #ifndef USE_LOCALE @@ -3298,8 +3406,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif +# ifdef USE_LOCALE_NUMERIC + PL_numeric_radix_sv = newSVpvs("."); +# endif + # if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE) /* Initialize our records. If we have POSIX 2008, we have LC_ALL */ @@ -4279,6 +4391,11 @@ S_print_collxfrm_input_and_return(pTHX_ PerlIO_printf(Perl_debug_log, "'\n"); } +# endif /* DEBUGGING */ +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE +# ifdef DEBUGGING + STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, @@ -4315,9 +4432,6 @@ S_print_bytes_for_locale(pTHX_ } # endif /* #ifdef DEBUGGING */ -#endif /* USE_LOCALE_COLLATE */ - -#ifdef USE_LOCALE STATIC const char * S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale) @@ -4401,6 +4515,9 @@ S_restore_switched_locale(pTHX_ const int category, const char * const original_ Safefree(original_locale); } +/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */ +#define CUR_LC_BUFFER_SIZE 64 + bool Perl__is_cur_LC_category_utf8(pTHX_ int category) { @@ -4438,6 +4555,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) the name in the cache */ char * delimited; /* The name plus the delimiters used to store it in the cache */ + char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */ char * name_pos; /* position of 'delimited' in the cache, or 0 if not there */ @@ -4466,9 +4584,15 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * utf8ness digit */ input_name_len_with_overhead = input_name_len + 3; - /* Allocate and populate space for a copy of the name surrounded by the - * delimiters */ - Newx(delimited, input_name_len_with_overhead, char); + if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) { + /* we can use the buffer, avoid a malloc */ + delimited = buffer; + } else { /* need a malloc */ + /* Allocate and populate space for a copy of the name surrounded by the + * delimiters */ + Newx(delimited, input_name_len_with_overhead, char); + } + delimited[0] = UTF8NESS_SEP[0]; Copy(save_input_locale, delimited + 1, input_name_len, char); delimited[input_name_len+1] = UTF8NESS_PREFIX[0]; @@ -4502,7 +4626,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; } - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4625,11 +4750,12 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) && wc == (wchar_t) UNICODE_REPLACEMENT); } +# endif + restore_switched_locale(LC_CTYPE, original_ctype_locale); goto finish_and_return; } -# endif # else /* Here, we must have a C89 compiler that doesn't have mbtowc(). Next @@ -4861,9 +4987,9 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) is_utf8 = TRUE; goto finish_and_return; } - } # endif + } # endif /* Other common encodings are the ISO 8859 series, which aren't UTF-8. But @@ -4996,7 +5122,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # endif - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -5013,15 +5140,15 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) const COP * const cop = (compiling) ? &PL_compiling : PL_curcop; - SV *categories = cop_hints_fetch_pvs(cop, "locale", 0); - if (! categories || categories == &PL_sv_placeholder) { + SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0); + if (! these_categories || these_categories == &PL_sv_placeholder) { return FALSE; } /* The pseudo-category 'not_characters' is -1, so just add 1 to each to get * a valid unsigned */ assert(category >= -1); - return cBOOL(SvUV(categories) & (1U << (category + 1))); + return cBOOL(SvUV(these_categories) & (1U << (category + 1))); } char * @@ -5169,9 +5296,7 @@ Perl_my_strerror(pTHX_ const int errnum) LOCALE_UNLOCK; # endif /* End of doesn't have strerror_l */ -#endif /* End of does have locale messages */ - -#ifdef DEBUGGING +# ifdef DEBUGGING if (DEBUG_Lv_TEST) { PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '"); @@ -5179,7 +5304,8 @@ Perl_my_strerror(pTHX_ const int errnum) PerlIO_printf(Perl_debug_log, "'\n"); } -#endif +# endif +#endif /* End of does have locale messages */ SAVEFREEPV(errstr); return errstr; @@ -5189,7 +5315,7 @@ Perl_my_strerror(pTHX_ const int errnum) =for apidoc switch_to_global_locale -On systems without locale support, or on single-threaded builds, or on +On systems without locale support, or on typical single-threaded builds, or on platforms that do not support per-thread locale operations, this function does nothing. On such systems that do have locale support, only a locale global to the whole program is available. @@ -5301,10 +5427,17 @@ L|perlapi/switch_to_global_locale>. bool Perl_sync_locale() { + +#ifndef USE_LOCALE + + return TRUE; + +#else + const char * newlocale; dTHX; -#ifdef USE_POSIX_2008_LOCALE +# ifdef USE_POSIX_2008_LOCALE bool was_in_global_locale = FALSE; locale_t cur_obj = uselocale((locale_t) 0); @@ -5316,11 +5449,11 @@ Perl_sync_locale() * will affect the */ if (cur_obj == LC_GLOBAL_LOCALE) { -# ifdef HAS_QUERY_LOCALE +# ifdef HAS_QUERY_LOCALE do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL)); -# else +# else unsigned int i; @@ -5330,17 +5463,17 @@ Perl_sync_locale() do_setlocale_r(categories[i], setlocale(categories[i], NULL)); } -# endif +# endif was_in_global_locale = TRUE; } -#else +# else bool was_in_global_locale = TRUE; -#endif -#ifdef USE_LOCALE_CTYPE +# endif +# ifdef USE_LOCALE_CTYPE newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5349,8 +5482,8 @@ Perl_sync_locale() new_ctype(newlocale); Safefree(newlocale); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5359,8 +5492,8 @@ Perl_sync_locale() new_collate(newlocale); Safefree(newlocale); -#endif -#ifdef USE_LOCALE_NUMERIC +# endif +# ifdef USE_LOCALE_NUMERIC newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, @@ -5369,9 +5502,12 @@ Perl_sync_locale() new_numeric(newlocale); Safefree(newlocale); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ return was_in_global_locale; + +#endif + } #if defined(DEBUGGING) && defined(USE_LOCALE) @@ -5469,8 +5605,9 @@ Perl_thread_locale_term() # ifndef WIN32 { /* Free up */ + dVAR; locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE); - if (cur_obj != LC_GLOBAL_LOCALE) { + if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) { freelocale(cur_obj); } }