X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/80306bb6fd3a203fe3583693a061e7cf3bdce42a..e72200e71a601e2c7882a03502d6a68aaa59985c:/locale.c diff --git a/locale.c b/locale.c index 6a4e012..89dc07f 100644 --- a/locale.c +++ b/locale.c @@ -53,6 +53,9 @@ #ifdef I_WCHAR # include #endif +#ifdef I_WCTYPE +# include +#endif /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread @@ -207,7 +210,7 @@ const int categories[] = { /* The top-most real element is LC_ALL */ -const char * category_names[] = { +const char * const category_names[] = { # ifdef USE_LOCALE_NUMERIC "LC_NUMERIC", @@ -331,7 +334,7 @@ S_category_name(const int category) # define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 # define _DUMMY_COLLATE LC_COLLATE_INDEX # else -# define _DUMMY_COLLATE _DUMMY_COLLATE +# define _DUMMY_COLLATE _DUMMY_CTYPE # endif # ifdef USE_LOCALE_TIME # define LC_TIME_INDEX _DUMMY_COLLATE + 1 @@ -586,12 +589,15 @@ S_emulate_setlocale(const int category, /* If this assert fails, adjust the size of curlocales in intrpvar.h */ STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX); -# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING) - +# if defined(_NL_LOCALE_NAME) \ + && defined(DEBUGGING) \ + && ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME) + /* On systems that accept any locale name, the real underlying locale + * is often returned by this internal function, so we can't use it */ { /* Internal glibc for querylocale(), but doesn't handle * empty-string ("") locale properly; who knows what other - * glitches. Check it for now, under debug. */ + * glitches. Check for it now, under debug. */ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category), uselocale((locale_t) 0)); @@ -735,56 +741,7 @@ S_emulate_setlocale(const int category, } - assert(PL_C_locale_obj); - - /* Otherwise, we are switching locales. This will generally entail freeing - * the current one's space (at the C library's discretion). We need to - * stop using that locale before the switch. So switch to a known locale - * object that we don't otherwise mess with. This returns the locale - * object in effect at the time of the switch. */ - old_obj = uselocale(PL_C_locale_obj); - -# ifdef DEBUGGING - - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj); - } - -# endif - - if (! old_obj) { - -# ifdef DEBUGGING - - if (DEBUG_L_TEST || debug_initialization) { - dSAVE_ERRNO; - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - RESTORE_ERRNO; - } - -# endif - - return NULL; - } - -# ifdef DEBUGGING - - if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj); - } - -# endif - - /* If we weren't in a thread safe locale, set so that newlocale() below - which uses 'old_obj', uses an empty one. Same for our reserved C object. - The latter is defensive coding, so that, even if there is some bug, we - will never end up trying to modify either of these, as if passed to - newlocale(), they can be. */ - if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { - old_obj = (locale_t) 0; - } - - /* Create the new locale (it may actually modify the current one). */ + /* Here, we are switching locales. */ # ifndef HAS_QUERYLOCALE @@ -910,6 +867,7 @@ S_emulate_setlocale(const int category, * other platforms do it differently, so we have to handle all cases * ourselves */ + unsigned int i; const char * s = locale; const char * e = locale + strlen(locale); const char * p = s; @@ -917,8 +875,17 @@ S_emulate_setlocale(const int category, const char * name_start; const char * name_end; + /* If the string that gives what to set doesn't include all categories, + * the omitted ones get set to "C". To get this behavior, first set + * all the individual categories to "C", and override the furnished + * ones below */ + for (i = 0; i < LC_ALL_INDEX; i++) { + if (! emulate_setlocale(categories[i], "C", i, TRUE)) { + return NULL; + } + } + while (s < e) { - unsigned int i; /* Parse through the category */ while (isWORDCHAR(*p)) { @@ -992,77 +959,174 @@ S_emulate_setlocale(const int category, ready_to_set: ; + /* Here at the end of having to deal with the absence of querylocale(). + * Some cases have already been fully handled by recursive calls to this + * function. But at this point, we haven't dealt with those, but are now + * prepared to, knowing what the locale name to set this category to is. + * This would have come for free if this system had had querylocale() */ + # endif /* end of ! querylocale */ - /* Ready to create a new locale by modification of the exising one */ - new_obj = newlocale(mask, locale, old_obj); + assert(PL_C_locale_obj); - if (! new_obj) { - dSAVE_ERRNO; + /* Switching locales generally entails freeing the current one's space (at + * the C library's discretion). We need to stop using that locale before + * the switch. So switch to a known locale object that we don't otherwise + * mess with. This returns the locale object in effect at the time of the + * switch. */ + old_obj = uselocale(PL_C_locale_obj); # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - } + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj); + } # endif - if (! uselocale(old_obj)) { + if (! old_obj) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); - } + if (DEBUG_L_TEST || debug_initialization) { + dSAVE_ERRNO; + PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + RESTORE_ERRNO; + } # endif - } - RESTORE_ERRNO; return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, PL_C_locale_obj); } # endif - /* And switch into it */ - if (! uselocale(new_obj)) { - dSAVE_ERRNO; + /* If we are switching to the LC_ALL C locale, it already exists. Use + * it instead of trying to create a new locale */ + if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) { # ifdef DEBUGGING - if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__); + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: will stay in C object\n", __FILE__, __LINE__); } # endif - if (! uselocale(old_obj)) { + new_obj = PL_C_locale_obj; + + /* We already had switched to the C locale in preparation for freeing + * 'old_obj' */ + if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) { + freelocale(old_obj); + } + } + else { + /* If we weren't in a thread safe locale, set so that newlocale() below + * which uses 'old_obj', uses an empty one. Same for our reserved C + * object. The latter is defensive coding, so that, even if there is + * some bug, we will never end up trying to modify either of these, as + * if passed to newlocale(), they can be. */ + if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) { + old_obj = (locale_t) 0; + } + + /* Ready to create a new locale by modification of the exising one */ + new_obj = newlocale(mask, locale, old_obj); + + if (! new_obj) { + dSAVE_ERRNO; # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale creating new object" + " failed: %d\n", __FILE__, __LINE__, GET_ERRNO); } # endif + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + RESTORE_ERRNO; + return NULL; + } + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale created %p", + __FILE__, __LINE__, new_obj); + if (old_obj) { + PerlIO_printf(Perl_debug_log, + "; should have freed %p", old_obj); + } + PerlIO_printf(Perl_debug_log, "\n"); + } + +# endif + + /* And switch into it */ + if (! uselocale(new_obj)) { + dSAVE_ERRNO; + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale switching to new object" + " failed\n", __FILE__, __LINE__); + } + +# endif + + if (! uselocale(old_obj)) { + +# ifdef DEBUGGING + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, + "%s:%d: switching back failed: %d\n", + __FILE__, __LINE__, GET_ERRNO); + } + +# endif + + } + freelocale(new_obj); + RESTORE_ERRNO; + return NULL; } - freelocale(new_obj); - RESTORE_ERRNO; - return NULL; } # ifdef DEBUGGING if (DEBUG_Lv_TEST || debug_initialization) { - PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj); + PerlIO_printf(Perl_debug_log, + "%s:%d: emulate_setlocale now using %p\n", + __FILE__, __LINE__, new_obj); } # endif @@ -1248,6 +1312,7 @@ S_locking_setlocale(pTHX_ } #endif +#ifdef USE_LOCALE STATIC void S_set_numeric_radix(pTHX_ const bool use_locale) @@ -1283,6 +1348,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } # endif +#else + + PERL_UNUSED_ARG(use_locale); + #endif /* USE_LOCALE_NUMERIC and can find the radix char */ } @@ -1407,19 +1476,20 @@ Perl_set_numeric_standard(pTHX) * to our records (which could be wrong if some XS code has changed the * locale behind our back) */ - do_setlocale_c(LC_NUMERIC, "C"); - PL_numeric_standard = TRUE; - PL_numeric_underlying = PL_numeric_underlying_is_standard; - set_numeric_radix(0); - # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "LC_NUMERIC locale now is standard C\n"); + "Setting LC_NUMERIC locale to standard C\n"); } # endif + + do_setlocale_c(LC_NUMERIC, "C"); + PL_numeric_standard = TRUE; + PL_numeric_underlying = PL_numeric_underlying_is_standard; + set_numeric_radix(0); + #endif /* USE_LOCALE_NUMERIC */ } @@ -1436,20 +1506,21 @@ Perl_set_numeric_underlying(pTHX) * if toggling isn't necessary according to our records (which could be * wrong if some XS code has changed the locale behind our back) */ - do_setlocale_c(LC_NUMERIC, PL_numeric_name); - PL_numeric_standard = PL_numeric_underlying_is_standard; - PL_numeric_underlying = TRUE; - set_numeric_radix(! PL_numeric_standard); - # ifdef DEBUGGING if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "LC_NUMERIC locale now is %s\n", + "Setting LC_NUMERIC locale to %s\n", PL_numeric_name); } # endif + + do_setlocale_c(LC_NUMERIC, PL_numeric_name); + PL_numeric_standard = PL_numeric_underlying_is_standard; + PL_numeric_underlying = TRUE; + set_numeric_radix(! PL_numeric_standard); + #endif /* USE_LOCALE_NUMERIC */ } @@ -1463,7 +1534,6 @@ S_new_ctype(pTHX_ const char *newctype) #ifndef USE_LOCALE_CTYPE - PERL_ARGS_ASSERT_NEW_CTYPE; PERL_UNUSED_ARG(newctype); PERL_UNUSED_CONTEXT; @@ -1485,6 +1555,7 @@ S_new_ctype(pTHX_ const char *newctype) /* Don't check for problems if we are suppressing the warnings */ bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST); + bool maybe_utf8_turkic = FALSE; PERL_ARGS_ASSERT_NEW_CTYPE; @@ -1501,6 +1572,23 @@ S_new_ctype(pTHX_ const char *newctype) * handle this specially because of the three problematic code points */ if (PL_in_utf8_CTYPE_locale) { Copy(PL_fold_latin1, PL_fold_locale, 256, U8); + + /* UTF-8 locales can have special handling for 'I' and 'i' if they are + * Turkic. Make sure these two are the only anomalies. (We don't use + * towupper and towlower because they aren't in C89.) */ + +#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) + + if (towupper('i') == 0x130 && towlower('I') == 0x131) { + +#else + + if (toupper('i') == 'i' && tolower('I') == 'I') { + +#endif + check_for_problems = TRUE; + maybe_utf8_turkic = TRUE; + } } /* We don't populate the other lists if a UTF-8 locale, but do check that @@ -1538,18 +1626,22 @@ S_new_ctype(pTHX_ const char *newctype) && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { bool is_bad = FALSE; - char name[3] = { '\0' }; + char name[4] = { '\0' }; /* Convert the name into a string */ - if (isPRINT_A(i)) { + if (isGRAPH_A(i)) { name[0] = i; name[1] = '\0'; } else if (i == '\n') { - my_strlcpy(name, "\n", sizeof(name)); + my_strlcpy(name, "\\n", sizeof(name)); + } + else if (i == '\t') { + my_strlcpy(name, "\\t", sizeof(name)); } else { - my_strlcpy(name, "\t", sizeof(name)); + assert(i == ' '); + my_strlcpy(name, "' '", sizeof(name)); } /* Check each possibe class */ @@ -1642,6 +1734,19 @@ S_new_ctype(pTHX_ const char *newctype) } } + if (bad_count == 2 && maybe_utf8_turkic) { + bad_count = 0; + *bad_chars_list = '\0'; + PL_fold_locale['I'] = 'I'; + PL_fold_locale['i'] = 'i'; + PL_in_utf8_turkic_locale = TRUE; + DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n", + __FILE__, __LINE__, newctype)); + } + else { + PL_in_utf8_turkic_locale = FALSE; + } + # ifdef MB_CUR_MAX /* We only handle single-byte locales (outside of UTF-8 ones; so if @@ -1667,7 +1772,10 @@ S_new_ctype(pTHX_ const char *newctype) # endif - if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) { + /* If we found problems and we want them output, do so */ + if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) + && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST))) + { if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) { PL_warn_locale = Perl_newSVpvf(aTHX_ "Locale '%s' contains (at least) the following characters" @@ -1972,6 +2080,8 @@ S_new_collate(pTHX_ const char *newcoll) } +#endif + #ifdef WIN32 STATIC char * @@ -2081,15 +2191,19 @@ S_win32_setlocale(pTHX_ int category, const char* locale) This is an (almost) drop-in replacement for the system L>, taking the same parameters, and returning the same information, except that it -returns the correct underlying C locale, instead of C always, as -perl keeps that locale category as C, changing it briefly during the -operations where the underlying one is required. +returns the correct underlying C locale. Regular C will +instead return C if the underlying locale has a non-dot decimal point +character, or a non-empty thousands separator for displaying floating point +numbers. This is because perl keeps that locale category such that it has a +dot and empty separator, changing the locale briefly during the operations +where the underlying one is required. C knows about this, and +compensates; regular C doesn't. Another reason it isn't completely a drop-in replacement is that it is declared to return S>, whereas the system setlocale omits the -C. (If it were being written today, plain setlocale would be declared -const, since it is illegal to change the information it returns; doing so leads -to segfaults.) +C (presumably because its API was specified long ago, and can't be +updated; it is illegal to change the information C returns; doing +so leads to segfaults.) Finally, C works under all circumstances, whereas plain C can be completely ineffective on some platforms under some @@ -2113,18 +2227,28 @@ Perl_setlocale(const int category, const char * locale) { /* This wraps POSIX::setlocale() */ +#ifdef NO_LOCALE + + PERL_UNUSED_ARG(category); + PERL_UNUSED_ARG(locale); + + return "C"; + +#else + const char * retval; const char * newlocale; dSAVEDERRNO; - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; dTHX; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; #ifdef USE_LOCALE_NUMERIC /* A NULL locale means only query what the current one is. We have the * LC_NUMERIC name saved, because we are normally switched into the C - * locale for it. For an LC_ALL query, switch back to get the correct - * results. All other categories don't require special handling */ + * (or equivalent) locale for it. For an LC_ALL query, switch back to get + * the correct results. All other categories don't require special + * handling */ if (locale == NULL) { if (category == LC_NUMERIC) { @@ -2208,20 +2332,23 @@ Perl_setlocale(const int category, const char * locale) # ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); new_ctype(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_CTYPE */ # ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); new_collate(newlocale); + Safefree(newlocale); # endif # ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); new_numeric(newlocale); + Safefree(newlocale); # endif /* USE_LOCALE_NUMERIC */ #endif /* LC_ALL */ @@ -2232,6 +2359,8 @@ Perl_setlocale(const int category, const char * locale) return retval; +#endif + } PERL_STATIC_INLINE const char * @@ -2291,13 +2420,14 @@ rather than getting segfaults at runtime. It delivers the correct results for the C and C items, without you having to write extra code. The reason for the extra code would be because these are from the C locale category, which is normally -kept set to the C locale by Perl, no matter what the underlying locale is -supposed to be, and so to get the expected results, you have to temporarily -toggle into the underlying locale, and later toggle back. (You could use plain -C and C> for this but -then you wouldn't get the other advantages of C; not keeping -C in the C locale would break a lot of CPAN, which is expecting the -radix (decimal point) character to be a dot.) +kept set by Perl so that the radix is a dot, and the separator is the empty +string, no matter what the underlying locale is supposed to be, and so to get +the expected results, you have to temporarily toggle into the underlying +locale, and later toggle back. (You could use plain C and +C> for this but then you wouldn't get +the other advantages of C; not keeping C in the C +(or equivalent) locale would break a lot of CPAN, which is expecting the radix +(decimal point) character to be a dot.) =item * @@ -2383,13 +2513,16 @@ S_my_nl_langinfo(const int item, bool toggle) dTHX; const char * retval; +#ifdef USE_LOCALE_NUMERIC + /* We only need to toggle into the underlying LC_NUMERIC locale for these * two items, and only if not already there */ if (toggle && (( item != RADIXCHAR && item != THOUSEP) || PL_numeric_underlying)) - { + +#endif /* No toggling needed if not using LC_NUMERIC */ + toggle = FALSE; - } #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ # if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \ @@ -2437,6 +2570,8 @@ S_my_nl_langinfo(const int item, bool toggle) do_free = TRUE; } +# ifdef USE_LOCALE_NUMERIC + if (toggle) { if (PL_underlying_numeric_obj) { cur = PL_underlying_numeric_obj; @@ -2447,6 +2582,8 @@ S_my_nl_langinfo(const int item, bool toggle) } } +# endif + /* We have to save it to a buffer, because the freelocale() just below * can invalidate the internal one */ retval = save_to_buffer(nl_langinfo_l(item, cur), @@ -2569,8 +2706,8 @@ S_my_nl_langinfo(const int item, bool toggle) /* Here everything past the dot is a digit. Treat it as a * code page */ - save_to_buffer("CP", &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); + retval = save_to_buffer("CP", &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); offset = STRLENs("CP"); has_nondigit: @@ -3082,6 +3219,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * values for our db, instead of trying to change them. * */ + dVAR; + int ok = 1; #ifndef USE_LOCALE @@ -3267,8 +3406,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn) # endif +# ifdef USE_LOCALE_NUMERIC + PL_numeric_radix_sv = newSVpvs("."); +# endif + # if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE) /* Initialize our records. If we have POSIX 2008, we have LC_ALL */ @@ -4248,6 +4391,11 @@ S_print_collxfrm_input_and_return(pTHX_ PerlIO_printf(Perl_debug_log, "'\n"); } +# endif /* DEBUGGING */ +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE +# ifdef DEBUGGING + STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, @@ -4284,9 +4432,6 @@ S_print_bytes_for_locale(pTHX_ } # endif /* #ifdef DEBUGGING */ -#endif /* USE_LOCALE_COLLATE */ - -#ifdef USE_LOCALE STATIC const char * S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale) @@ -4370,6 +4515,9 @@ S_restore_switched_locale(pTHX_ const int category, const char * const original_ Safefree(original_locale); } +/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */ +#define CUR_LC_BUFFER_SIZE 64 + bool Perl__is_cur_LC_category_utf8(pTHX_ int category) { @@ -4407,6 +4555,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) the name in the cache */ char * delimited; /* The name plus the delimiters used to store it in the cache */ + char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */ char * name_pos; /* position of 'delimited' in the cache, or 0 if not there */ @@ -4435,9 +4584,15 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * utf8ness digit */ input_name_len_with_overhead = input_name_len + 3; - /* Allocate and populate space for a copy of the name surrounded by the - * delimiters */ - Newx(delimited, input_name_len_with_overhead, char); + if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) { + /* we can use the buffer, avoid a malloc */ + delimited = buffer; + } else { /* need a malloc */ + /* Allocate and populate space for a copy of the name surrounded by the + * delimiters */ + Newx(delimited, input_name_len_with_overhead, char); + } + delimited[0] = UTF8NESS_SEP[0]; Copy(save_input_locale, delimited + 1, input_name_len, char); delimited[input_name_len+1] = UTF8NESS_PREFIX[0]; @@ -4471,7 +4626,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0'; } - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4594,11 +4750,12 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) && wc == (wchar_t) UNICODE_REPLACEMENT); } +# endif + restore_switched_locale(LC_CTYPE, original_ctype_locale); goto finish_and_return; } -# endif # else /* Here, we must have a C89 compiler that doesn't have mbtowc(). Next @@ -4830,9 +4987,9 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) is_utf8 = TRUE; goto finish_and_return; } - } # endif + } # endif /* Other common encodings are the ISO 8859 series, which aren't UTF-8. But @@ -4965,7 +5122,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) # endif - Safefree(delimited); + /* free only when not using the buffer */ + if ( delimited != buffer ) Safefree(delimited); Safefree(save_input_locale); return is_utf8; } @@ -4982,15 +5140,15 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) const COP * const cop = (compiling) ? &PL_compiling : PL_curcop; - SV *categories = cop_hints_fetch_pvs(cop, "locale", 0); - if (! categories || categories == &PL_sv_placeholder) { + SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0); + if (! these_categories || these_categories == &PL_sv_placeholder) { return FALSE; } /* The pseudo-category 'not_characters' is -1, so just add 1 to each to get * a valid unsigned */ assert(category >= -1); - return cBOOL(SvUV(categories) & (1U << (category + 1))); + return cBOOL(SvUV(these_categories) & (1U << (category + 1))); } char * @@ -5026,11 +5184,12 @@ Perl_my_strerror(pTHX_ const int errnum) errstr = savepv(strerror(errnum)); } else { - const char * save_locale = do_setlocale_c(LC_MESSAGES, NULL); + const char * save_locale = savepv(do_setlocale_c(LC_MESSAGES, NULL)); do_setlocale_c(LC_MESSAGES, "C"); errstr = savepv(strerror(errnum)); do_setlocale_c(LC_MESSAGES, save_locale); + Safefree(save_locale); } # elif defined(HAS_POSIX_2008_LOCALE) \ @@ -5137,9 +5296,7 @@ Perl_my_strerror(pTHX_ const int errnum) LOCALE_UNLOCK; # endif /* End of doesn't have strerror_l */ -#endif /* End of does have locale messages */ - -#ifdef DEBUGGING +# ifdef DEBUGGING if (DEBUG_Lv_TEST) { PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '"); @@ -5147,7 +5304,8 @@ Perl_my_strerror(pTHX_ const int errnum) PerlIO_printf(Perl_debug_log, "'\n"); } -#endif +# endif +#endif /* End of does have locale messages */ SAVEFREEPV(errstr); return errstr; @@ -5269,10 +5427,17 @@ L|perlapi/switch_to_global_locale>. bool Perl_sync_locale() { + +#ifndef USE_LOCALE + + return TRUE; + +#else + const char * newlocale; dTHX; -#ifdef USE_POSIX_2008_LOCALE +# ifdef USE_POSIX_2008_LOCALE bool was_in_global_locale = FALSE; locale_t cur_obj = uselocale((locale_t) 0); @@ -5284,11 +5449,11 @@ Perl_sync_locale() * will affect the */ if (cur_obj == LC_GLOBAL_LOCALE) { -# ifdef HAS_QUERY_LOCALE +# ifdef HAS_QUERY_LOCALE do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL)); -# else +# else unsigned int i; @@ -5298,45 +5463,51 @@ Perl_sync_locale() do_setlocale_r(categories[i], setlocale(categories[i], NULL)); } -# endif +# endif was_in_global_locale = TRUE; } -#else +# else bool was_in_global_locale = TRUE; -#endif -#ifdef USE_LOCALE_CTYPE +# endif +# ifdef USE_LOCALE_CTYPE - newlocale = do_setlocale_c(LC_CTYPE, NULL); + newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_CTYPE, NULL, newlocale))); new_ctype(newlocale); + Safefree(newlocale); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE - newlocale = do_setlocale_c(LC_COLLATE, NULL); + newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_COLLATE, NULL, newlocale))); new_collate(newlocale); + Safefree(newlocale); -#endif -#ifdef USE_LOCALE_NUMERIC +# endif +# ifdef USE_LOCALE_NUMERIC - newlocale = do_setlocale_c(LC_NUMERIC, NULL); + newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, setlocale_debug_string(LC_NUMERIC, NULL, newlocale))); new_numeric(newlocale); + Safefree(newlocale); -#endif /* USE_LOCALE_NUMERIC */ +# endif /* USE_LOCALE_NUMERIC */ return was_in_global_locale; + +#endif + } #if defined(DEBUGGING) && defined(USE_LOCALE) @@ -5435,7 +5606,7 @@ Perl_thread_locale_term() { /* Free up */ locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE); - if (cur_obj != LC_GLOBAL_LOCALE) { + if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) { freelocale(cur_obj); } }