X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/e5f10d4955ff54b3fe492cd78c412eb54b0ae45a..50bf02bdec7e84f5ebd09f3f200e5939e80e4e19:/locale.c diff --git a/locale.c b/locale.c index 0454382..50bcaea 100644 --- a/locale.c +++ b/locale.c @@ -44,21 +44,34 @@ /* If the environment says to, we can output debugging information during * initialization. This is done before option parsing, and before any thread * creation, so can be a file-level static */ -#ifdef DEBUGGING -# ifdef PERL_GLOBAL_STRUCT - /* no global syms allowed */ -# define debug_initialization 0 -# define DEBUG_INITIALIZATION_set(v) -# else +#if ! defined(DEBUGGING) || defined(PERL_GLOBAL_STRUCT) +# define debug_initialization 0 +# define DEBUG_INITIALIZATION_set(v) +#else static bool debug_initialization = FALSE; -# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) -# endif +# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) #endif -/* strlen() of a literal string constant. XXX We might want this more general, - * but using it in just this file for now */ +/* strlen() of a literal string constant. We might want this more general, + * but using it in just this file for now. A problem with more generality is + * the compiler warnings about comparing unlike signs */ #define STRLENs(s) (sizeof("" s "") - 1) +/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the + * return of setlocale(), then this is extremely likely to be the C or POSIX + * locale. However, the output of setlocale() is documented to be opaque, but + * the odds are extremely small that it would return these two strings for some + * other locale. Note that VMS in these two locales includes many non-ASCII + * characters as controls and punctuation (below are hex bytes): + * cntrl: 84-97 9B-9F + * punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD + * Oddly, none there are listed as alphas, though some represent alphabetics + * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */ +#define isNAME_C_OR_POSIX(name) \ + ( (name) != NULL \ + && (( *(name) == 'C' && (*(name + 1)) == '\0') \ + || strEQ((name), "POSIX"))) + #ifdef USE_LOCALE /* @@ -128,6 +141,21 @@ const int categories[] = { # ifdef USE_LOCALE_MONETARY LC_MONETARY, # endif +# ifdef USE_LOCALE_ADDRESS + LC_ADDRESS, +# endif +# ifdef USE_LOCALE_IDENTIFICATION + LC_IDENTIFICATION, +# endif +# ifdef USE_LOCALE_MEASUREMENT + LC_MEASUREMENT, +# endif +# ifdef USE_LOCALE_PAPER + LC_PAPER, +# endif +# ifdef USE_LOCALE_TELEPHONE + LC_TELEPHONE, +# endif # ifdef LC_ALL LC_ALL, # endif @@ -158,6 +186,21 @@ const char * category_names[] = { # ifdef USE_LOCALE_MONETARY "LC_MONETARY", # endif +# ifdef USE_LOCALE_ADDRESS + "LC_ADDRESS", +# endif +# ifdef USE_LOCALE_IDENTIFICATION + "LC_IDENTIFICATION", +# endif +# ifdef USE_LOCALE_MEASUREMENT + "LC_MEASUREMENT", +# endif +# ifdef USE_LOCALE_PAPER + "LC_PAPER", +# endif +# ifdef USE_LOCALE_TELEPHONE + "LC_TELEPHONE", +# endif # ifdef LC_ALL "LC_ALL", # endif @@ -182,10 +225,126 @@ const char * category_names[] = { * special cases. Most loops through these arrays in the code below are * written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work * on either type of system. But the code must be written to not access the - * element at 'NOMINAL_LC_ALL_INDEX' except on platforms that have it. */ + * element at 'LC_ALL_INDEX' except on platforms that have it. This can be + * checked for at compile time by using the #define LC_ALL_INDEX which is only + * defined if we do have LC_ALL. */ + +STATIC const char * +S_category_name(const int category) +{ + unsigned int i; + +#ifdef LC_ALL + + if (category == LC_ALL) { + return "LC_ALL"; + } #endif + for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { + if (category == categories[i]) { + return category_names[i]; + } + } + + { + const char suffix[] = " (unknown)"; + int temp = category; + Size_t length = sizeof(suffix) + 1; + char * unknown; + dTHX; + + if (temp < 0) { + length++; + temp = - temp; + } + + /* Calculate the number of digits */ + while (temp >= 10) { + temp /= 10; + length++; + } + + Newx(unknown, length, char); + my_snprintf(unknown, length, "%d%s", category, suffix); + SAVEFREEPV(unknown); + return unknown; + } +} + +/* Now create LC_foo_INDEX #defines for just those categories on this system */ +# ifdef USE_LOCALE_NUMERIC +# define LC_NUMERIC_INDEX 0 +# define _DUMMY_NUMERIC LC_NUMERIC_INDEX +# else +# define _DUMMY_NUMERIC -1 +# endif +# ifdef USE_LOCALE_CTYPE +# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1 +# define _DUMMY_CTYPE LC_CTYPE_INDEX +# else +# define _DUMMY_CTYPE _DUMMY_NUMERIC +# endif +# ifdef USE_LOCALE_COLLATE +# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1 +# define _DUMMY_COLLATE LC_COLLATE_INDEX +# else +# define _DUMMY_COLLATE _DUMMY_COLLATE +# endif +# ifdef USE_LOCALE_TIME +# define LC_TIME_INDEX _DUMMY_COLLATE + 1 +# define _DUMMY_TIME LC_TIME_INDEX +# else +# define _DUMMY_TIME _DUMMY_COLLATE +# endif +# ifdef USE_LOCALE_MESSAGES +# define LC_MESSAGES_INDEX _DUMMY_TIME + 1 +# define _DUMMY_MESSAGES LC_MESSAGES_INDEX +# else +# define _DUMMY_MESSAGES _DUMMY_TIME +# endif +# ifdef USE_LOCALE_MONETARY +# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1 +# define _DUMMY_MONETARY LC_MONETARY_INDEX +# else +# define _DUMMY_MONETARY _DUMMY_MESSAGES +# endif +# ifdef USE_LOCALE_ADDRESS +# define LC_ADDRESS_INDEX _DUMMY_MONETARY + 1 +# define _DUMMY_ADDRESS LC_ADDRESS_INDEX +# else +# define _DUMMY_ADDRESS _DUMMY_MONETARY +# endif +# ifdef USE_LOCALE_IDENTIFICATION +# define LC_IDENTIFICATION_INDEX _DUMMY_ADDRESS + 1 +# define _DUMMY_IDENTIFICATION LC_IDENTIFICATION_INDEX +# else +# define _DUMMY_IDENTIFICATION _DUMMY_ADDRESS +# endif +# ifdef USE_LOCALE_MEASUREMENT +# define LC_MEASUREMENT_INDEX _DUMMY_IDENTIFICATION + 1 +# define _DUMMY_MEASUREMENT LC_MEASUREMENT_INDEX +# else +# define _DUMMY_MEASUREMENT _DUMMY_IDENTIFICATION +# endif +# ifdef USE_LOCALE_PAPER +# define LC_PAPER_INDEX _DUMMY_MEASUREMENT + 1 +# define _DUMMY_PAPER LC_PAPER_INDEX +# else +# define _DUMMY_PAPER _DUMMY_MEASUREMENT +# endif +# ifdef USE_LOCALE_TELEPHONE +# define LC_TELEPHONE_INDEX _DUMMY_PAPER + 1 +# define _DUMMY_TELEPHONE LC_TELEPHONE_INDEX +# else +# define _DUMMY_TELEPHONE _DUMMY_PAPER +# endif +# ifdef LC_ALL +# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1 +# endif +#endif /* ifdef USE_LOCALE */ + /* Windows requres a customized base-level setlocale() */ # ifdef WIN32 # define my_setlocale(cat, locale) win32_setlocale(cat, locale) @@ -211,9 +370,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale) if (use_locale) { const char * radix = my_nl_langinfo(PERL_RADIXCHAR, FALSE); /* FALSE => already in dest locale */ - /* ... and the character being used isn't a dot */ if (strNE(radix, ".")) { + const U8 * first_variant; + if (PL_numeric_radix_sv) { sv_setpv(PL_numeric_radix_sv, radix); } @@ -221,10 +381,17 @@ S_set_numeric_radix(pTHX_ const bool use_locale) PL_numeric_radix_sv = newSVpv(radix, 0); } - if ( ! is_utf8_invariant_string( - (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) - && is_utf8_string( - (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) + /* If there is a byte variant under UTF-8, and if the remainder of + * the string starting there is valid UTF-8, and we are in a UTF-8 + * locale, then mark the radix as being in UTF-8 */ + if ( ! is_utf8_invariant_string_loc( + (U8 *) SvPVX(PL_numeric_radix_sv), + SvCUR(PL_numeric_radix_sv), + &first_variant) + && is_utf8_string(first_variant, + SvCUR(PL_numeric_radix_sv) + - ((char *) first_variant + - SvPVX(PL_numeric_radix_sv))) && _is_cur_LC_category_utf8(LC_NUMERIC)) { SvUTF8_on(PL_numeric_radix_sv); @@ -255,20 +422,6 @@ S_set_numeric_radix(pTHX_ const bool use_locale) } -/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the - * return of setlocale(), then this is extremely likely to be the C or POSIX - * locale. However, the output of setlocale() is documented to be opaque, but - * the odds are extremely small that it would return these two strings for some - * other locale. Note that VMS in these two locales includes many non-ASCII - * characters as controls and punctuation (below are hex bytes): - * cntrl: 84-97 9B-9F - * punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD - * Oddly, none there are listed as alphas, though some represent alphabetics - * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */ -#define isNAME_C_OR_POSIX(name) \ - ( (name) != NULL \ - && (( *(name) == 'C' && (*(name + 1)) == '\0') \ - || strEQ((name), "POSIX"))) void Perl_new_numeric(pTHX_ const char *newnum) @@ -280,7 +433,7 @@ Perl_new_numeric(pTHX_ const char *newnum) #else - /* Called after all libc setlocale() calls affecting LC_NUMERIC, to tell + /* Called after each libc setlocale() call affecting LC_NUMERIC, to tell * core Perl this and that 'newnum' is the name of the new locale. * It installs this locale as the current underlying default. * @@ -299,13 +452,17 @@ Perl_new_numeric(pTHX_ const char *newnum) * that the current locale is the program's underlying * locale * PL_numeric_standard An int indicating if the toggled state is such - * that the current locale is the C locale. If non-zero, - * it is in C; if > 1, it means it may not be toggled away + * that the current locale is the C locale or + * indistinguishable from the C locale. If non-zero, it + * is in C; if > 1, it means it may not be toggled away * from C. - * Note that both of the last two variables can be true at the same time, - * if the underlying locale is C. (Toggling is a no-op under these - * circumstances.) - * + * PL_numeric_underlying_is_standard A bool kept by this function + * indicating that the underlying locale and the standard + * C locale are indistinguishable for the purposes of + * LC_NUMERIC. This happens when both of the above two + * variables are true at the same time. (Toggling is a + * no-op under these circumstances.) This variable is + * used to avoid having to recalculate. * Any code changing the locale (outside this file) should use * POSIX::setlocale, which calls this function. Therefore this function * should be called directly only from this file and from @@ -318,14 +475,24 @@ Perl_new_numeric(pTHX_ const char *newnum) PL_numeric_name = NULL; PL_numeric_standard = TRUE; PL_numeric_underlying = TRUE; + PL_numeric_underlying_is_standard = TRUE; return; } save_newnum = stdize_locale(savepv(newnum)); - - PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); PL_numeric_underlying = TRUE; + PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); + /* If its name isn't C nor POSIX, it could still be indistinguishable from + * them */ + if (! PL_numeric_standard) { + PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(PERL_RADIXCHAR, + FALSE /* Don't toggle locale */ )) + && strEQ("", my_nl_langinfo(PERL_THOUSEP, + FALSE))); + } + + /* Save the new name if it isn't the same as the previous one, if any */ if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); PL_numeric_name = save_newnum; @@ -334,6 +501,12 @@ Perl_new_numeric(pTHX_ const char *newnum) Safefree(save_newnum); } + PL_numeric_underlying_is_standard = PL_numeric_standard; + + if (DEBUG_L_TEST || debug_initialization) { + PerlIO_printf(Perl_debug_log, "Called new_numeric with %s, PL_numeric_name=%s\n", newnum, PL_numeric_name); + } + /* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't * have to worry about the radix being a non-dot. (Core operations that * need the underlying locale change to it temporarily). */ @@ -357,7 +530,7 @@ Perl_set_numeric_standard(pTHX) do_setlocale_c(LC_NUMERIC, "C"); PL_numeric_standard = TRUE; - PL_numeric_underlying = isNAME_C_OR_POSIX(PL_numeric_name); + PL_numeric_underlying = PL_numeric_underlying_is_standard; set_numeric_radix(0); # ifdef DEBUGGING @@ -385,9 +558,9 @@ Perl_set_numeric_underlying(pTHX) * wrong if some XS code has changed the locale behind our back) */ do_setlocale_c(LC_NUMERIC, PL_numeric_name); - PL_numeric_standard = isNAME_C_OR_POSIX(PL_numeric_name); + PL_numeric_standard = PL_numeric_underlying_is_standard; PL_numeric_underlying = TRUE; - set_numeric_radix(1); + set_numeric_radix(! PL_numeric_standard); # ifdef DEBUGGING @@ -417,7 +590,7 @@ S_new_ctype(pTHX_ const char *newctype) #else - /* Called after all libc setlocale() calls affecting LC_CTYPE, to tell + /* Called after each libc setlocale() call affecting LC_CTYPE, to tell * core Perl this and that 'newctype' is the name of the new locale. * * This function sets up the folding arrays for all 256 bytes, assuming @@ -462,10 +635,10 @@ S_new_ctype(pTHX_ const char *newctype) unsigned int bad_count = 0; /* Count of bad characters */ for (i = 0; i < 256; i++) { - if (isUPPER_LC((U8) i)) - PL_fold_locale[i] = (U8) toLOWER_LC((U8) i); - else if (isLOWER_LC((U8) i)) - PL_fold_locale[i] = (U8) toUPPER_LC((U8) i); + if (isupper(i)) + PL_fold_locale[i] = (U8) tolower(i); + else if (islower(i)) + PL_fold_locale[i] = (U8) toupper(i); else PL_fold_locale[i] = (U8) i; @@ -481,10 +654,19 @@ S_new_ctype(pTHX_ const char *newctype) if ( check_for_problems && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { - if (( isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i)) - || (isPUNCT_A(i) && ! isPUNCT_LC(i)) - || (isBLANK_A(i) && ! isBLANK_LC(i)) - || (i == '\n' && ! isCNTRL_LC(i))) + if ( cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC(i)) + || cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i)) + || cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i)) + || cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i)) + || cBOOL(islower(i)) != cBOOL(isLOWER_A(i)) + || cBOOL(isprint(i)) != cBOOL(isPRINT_A(i)) + || cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i)) + || cBOOL(isspace(i)) != cBOOL(isSPACE_A(i)) + || cBOOL(isupper(i)) != cBOOL(isUPPER_A(i)) + || cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i)) + || tolower(i) != (int) toLOWER_A(i) + || toupper(i) != (int) toUPPER_A(i) + || (i == '\n' && ! isCNTRL_LC(i))) { if (bad_count) { /* Separate multiple entries with a blank */ @@ -597,11 +779,9 @@ Perl__warn_problematic_locale() * _CHECK_AND_WARN_PROBLEMATIC_LOCALE */ if (PL_warn_locale) { - /*GCC_DIAG_IGNORE(-Wformat-security); Didn't work */ Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0 /* dummy to avoid compiler warning */ ); - /* GCC_DIAG_RESTORE; */ SvREFCNT_dec_NN(PL_warn_locale); PL_warn_locale = NULL; } @@ -621,7 +801,7 @@ S_new_collate(pTHX_ const char *newcoll) #else - /* Called after all libc setlocale() calls affecting LC_COLLATE, to tell + /* Called after each libc setlocale() call affecting LC_COLLATE, to tell * core Perl this and that 'newcoll' is the name of the new locale. * * The design of locale collation is that every locale change is given an @@ -905,7 +1085,7 @@ S_win32_setlocale(pTHX_ int category, const char* locale) * one that is set. (If they are set to "", it means to use the same thing * we just set LC_ALL to, so can skip) */ - for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { + for (i = 0; i < LC_ALL_INDEX; i++) { result = PerlEnv_getenv(category_names[i]); if (result && strNE(result, "")) { setlocale(categories[i], result); @@ -936,11 +1116,10 @@ Perl_setlocale(int category, const char * locale) #ifdef USE_LOCALE_NUMERIC - /* A NULL locale means only query what the current one is. We - * have the LC_NUMERIC name saved, because we are normally switched - * into the C locale for it. Switch back so an LC_ALL query will yield - * the correct results; all other categories don't require special - * handling */ + /* A NULL locale means only query what the current one is. We have the + * LC_NUMERIC name saved, because we are normally switched into the C + * locale for it. For an LC_ALL query, switch back to get the correct + * results. All other categories don't require special handling */ if (locale == NULL) { if (category == LC_NUMERIC) { return savepv(PL_numeric_name); @@ -948,7 +1127,8 @@ Perl_setlocale(int category, const char * locale) # ifdef LC_ALL - else if (category == LC_ALL) { + else if (category == LC_ALL && ! PL_numeric_underlying) { + SET_NUMERIC_UNDERLYING(); } @@ -1121,11 +1301,15 @@ Unimplemented, so returns C<"">. =item C +=item C + =item C -Only the values for English are returned. Earlier POSIX standards also -specified C and C, but these have been removed from POSIX 2008, -and aren't supported by C. +=item C + +Only the values for English are returned. C and C have been +removed from POSIX 2008, and are retained for backwards compatibility. Your +platform's C may not support them. =item C @@ -1239,7 +1423,9 @@ S_my_nl_langinfo(const int item, bool toggle) LOCALE_LOCK; if (toggle) { - if (item == PERL_RADIXCHAR || item == PERL_THOUSEP) { + if ( ! PL_numeric_underlying + && (item == PERL_RADIXCHAR || item == PERL_THOUSEP)) + { do_setlocale_c(LC_NUMERIC, PL_numeric_name); } else { @@ -1255,8 +1441,6 @@ S_my_nl_langinfo(const int item, bool toggle) LOCALE_UNLOCK; - return PL_langinfo_buf; - # else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */ bool do_free = FALSE; @@ -1267,9 +1451,7 @@ S_my_nl_langinfo(const int item, bool toggle) do_free = TRUE; } - if ( toggle - && (item == PERL_RADIXCHAR || item == PERL_THOUSEP)) - { + if (toggle) { cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur); do_free = TRUE; } @@ -1280,359 +1462,377 @@ S_my_nl_langinfo(const int item, bool toggle) freelocale(cur); } +# endif + + if (strEQ(PL_langinfo_buf, "")) { + if (item == PERL_YESSTR) { + return "yes"; + } + if (item == PERL_NOSTR) { + return "no"; + } + } + return PL_langinfo_buf; -# endif #else /* Below, emulate nl_langinfo as best we can */ + + { + # ifdef HAS_LOCALECONV - const struct lconv* lc; + const struct lconv* lc; # endif # ifdef HAS_STRFTIME - struct tm tm; - bool return_format = FALSE; /* Return the %format, not the value */ - const char * format; + struct tm tm; + bool return_format = FALSE; /* Return the %format, not the value */ + const char * format; # endif - /* We copy the results to a per-thread buffer, even if not multi-threaded. - * This is in part to simplify this code, and partly because we need a - * buffer anyway for strftime(), and partly because a call of localeconv() - * could otherwise wipe out the buffer, and the programmer would not be - * expecting this, as this is a nl_langinfo() substitute after all, so s/he - * might be thinking their localeconv() is safe until another localeconv() - * call. */ + /* We copy the results to a per-thread buffer, even if not + * multi-threaded. This is in part to simplify this code, and partly + * because we need a buffer anyway for strftime(), and partly because a + * call of localeconv() could otherwise wipe out the buffer, and the + * programmer would not be expecting this, as this is a nl_langinfo() + * substitute after all, so s/he might be thinking their localeconv() + * is safe until another localeconv() call. */ - switch (item) { - Size_t len; - const char * retval; + switch (item) { + Size_t len; + const char * retval; - /* These 2 are unimplemented */ - case PERL_CODESET: - case PERL_ERA: /* For use with strftime() %E modifier */ + /* These 2 are unimplemented */ + case PERL_CODESET: + case PERL_ERA: /* For use with strftime() %E modifier */ - default: - return ""; + default: + return ""; - /* We use only an English set, since we don't know any more */ - case PERL_YESEXPR: return "^[+1yY]"; - case PERL_NOEXPR: return "^[-0nN]"; + /* We use only an English set, since we don't know any more */ + case PERL_YESEXPR: return "^[+1yY]"; + case PERL_YESSTR: return "yes"; + case PERL_NOEXPR: return "^[-0nN]"; + case PERL_NOSTR: return "no"; # ifdef HAS_LOCALECONV - case PERL_CRNCYSTR: + case PERL_CRNCYSTR: - LOCALE_LOCK; + LOCALE_LOCK; - lc = localeconv(); - if (! lc || ! lc->currency_symbol || strEQ("", lc->currency_symbol)) - { - LOCALE_UNLOCK; - return ""; - } + /* We don't bother with localeconv_l() because any system that + * has it is likely to also have nl_langinfo() */ - /* Leave the first spot empty to be filled in below */ - save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, - &PL_langinfo_bufsize, 1); - if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) - { /* khw couldn't figure out how the localedef specifications - would show that the $ should replace the radix; this is - just a guess as to how it might work.*/ - *PL_langinfo_buf = '.'; - } - else if (lc->p_cs_precedes) { - *PL_langinfo_buf = '-'; - } - else { - *PL_langinfo_buf = '+'; - } + lc = localeconv(); + if ( ! lc + || ! lc->currency_symbol + || strEQ("", lc->currency_symbol)) + { + LOCALE_UNLOCK; + return ""; + } - LOCALE_UNLOCK; - break; + /* Leave the first spot empty to be filled in below */ + save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, + &PL_langinfo_bufsize, 1); + if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) + { /* khw couldn't figure out how the localedef specifications + would show that the $ should replace the radix; this is + just a guess as to how it might work.*/ + *PL_langinfo_buf = '.'; + } + else if (lc->p_cs_precedes) { + *PL_langinfo_buf = '-'; + } + else { + *PL_langinfo_buf = '+'; + } - case PERL_RADIXCHAR: - case PERL_THOUSEP: + LOCALE_UNLOCK; + break; - LOCALE_LOCK; + case PERL_RADIXCHAR: + case PERL_THOUSEP: - if (toggle) { - do_setlocale_c(LC_NUMERIC, PL_numeric_name); - } + LOCALE_LOCK; - lc = localeconv(); - if (! lc) { - retval = ""; - } - else switch (item) { - case PERL_RADIXCHAR: - if (! lc->decimal_point) { - retval = ""; + if (toggle) { + if (! PL_numeric_underlying) { + do_setlocale_c(LC_NUMERIC, PL_numeric_name); } else { - retval = lc->decimal_point; + toggle = FALSE; } - break; + } - case PERL_THOUSEP: - if (! lc->thousands_sep || strEQ("", lc->thousands_sep)) { + lc = localeconv(); + if (! lc) { + retval = ""; + } + else { + retval = (item == PERL_RADIXCHAR) + ? lc->decimal_point + : lc->thousands_sep; + if (! retval) { retval = ""; } - else { - retval = lc->thousands_sep; - } - break; - - default: - LOCALE_UNLOCK; - Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", - __FILE__, __LINE__, item); - } + } - save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + save_to_buffer(retval, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); - if (toggle) { - do_setlocale_c(LC_NUMERIC, "C"); - } + if (toggle) { + do_setlocale_c(LC_NUMERIC, "C"); + } - LOCALE_UNLOCK; + LOCALE_UNLOCK; - break; + break; # endif # ifdef HAS_STRFTIME - /* These are defined by C89, so we assume that strftime supports them, - * and so are returned unconditionally; they may not be what the locale - * actually says, but should give good enough results for someone using - * them as formats (as opposed to trying to parse them to figure out - * what the locale says). The other format items are actually tested to - * verify they work on the platform */ - case PERL_D_FMT: return "%x"; - case PERL_T_FMT: return "%X"; - case PERL_D_T_FMT: return "%c"; - - /* These formats are only available in later strfmtime's */ - case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: - case PERL_T_FMT_AMPM: - - /* The rest can be gotten from most versions of strftime(). */ - case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: - case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: - case PERL_ABDAY_7: - case PERL_ALT_DIGITS: - case PERL_AM_STR: case PERL_PM_STR: - case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: - case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: - case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: - case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: - case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: - case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: - case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: - case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: - case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: case PERL_MON_12: - - LOCALE_LOCK; - - init_tm(&tm); /* Precaution against core dumps */ - tm.tm_sec = 30; - tm.tm_min = 30; - tm.tm_hour = 6; - tm.tm_year = 2017 - 1900; - tm.tm_wday = 0; - tm.tm_mon = 0; - switch (item) { - default: - LOCALE_UNLOCK; - Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", - __FILE__, __LINE__, item); - NOT_REACHED; /* NOTREACHED */ - - case PERL_PM_STR: tm.tm_hour = 18; - case PERL_AM_STR: - format = "%p"; - break; - - case PERL_ABDAY_7: tm.tm_wday++; - case PERL_ABDAY_6: tm.tm_wday++; - case PERL_ABDAY_5: tm.tm_wday++; - case PERL_ABDAY_4: tm.tm_wday++; - case PERL_ABDAY_3: tm.tm_wday++; - case PERL_ABDAY_2: tm.tm_wday++; - case PERL_ABDAY_1: - format = "%a"; - break; - - case PERL_DAY_7: tm.tm_wday++; - case PERL_DAY_6: tm.tm_wday++; - case PERL_DAY_5: tm.tm_wday++; - case PERL_DAY_4: tm.tm_wday++; - case PERL_DAY_3: tm.tm_wday++; - case PERL_DAY_2: tm.tm_wday++; - case PERL_DAY_1: - format = "%A"; - break; - - case PERL_ABMON_12: tm.tm_mon++; - case PERL_ABMON_11: tm.tm_mon++; - case PERL_ABMON_10: tm.tm_mon++; - case PERL_ABMON_9: tm.tm_mon++; - case PERL_ABMON_8: tm.tm_mon++; - case PERL_ABMON_7: tm.tm_mon++; - case PERL_ABMON_6: tm.tm_mon++; - case PERL_ABMON_5: tm.tm_mon++; - case PERL_ABMON_4: tm.tm_mon++; - case PERL_ABMON_3: tm.tm_mon++; - case PERL_ABMON_2: tm.tm_mon++; - case PERL_ABMON_1: - format = "%b"; - break; - - case PERL_MON_12: tm.tm_mon++; - case PERL_MON_11: tm.tm_mon++; - case PERL_MON_10: tm.tm_mon++; - case PERL_MON_9: tm.tm_mon++; - case PERL_MON_8: tm.tm_mon++; - case PERL_MON_7: tm.tm_mon++; - case PERL_MON_6: tm.tm_mon++; - case PERL_MON_5: tm.tm_mon++; - case PERL_MON_4: tm.tm_mon++; - case PERL_MON_3: tm.tm_mon++; - case PERL_MON_2: tm.tm_mon++; - case PERL_MON_1: - format = "%B"; - break; + /* These are defined by C89, so we assume that strftime supports + * them, and so are returned unconditionally; they may not be what + * the locale actually says, but should give good enough results + * for someone using them as formats (as opposed to trying to parse + * them to figure out what the locale says). The other format + * items are actually tested to verify they work on the platform */ + case PERL_D_FMT: return "%x"; + case PERL_T_FMT: return "%X"; + case PERL_D_T_FMT: return "%c"; + + /* These formats are only available in later strfmtime's */ + case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: + case PERL_T_FMT_AMPM: + + /* The rest can be gotten from most versions of strftime(). */ + case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: + case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: + case PERL_ABDAY_7: + case PERL_ALT_DIGITS: + case PERL_AM_STR: case PERL_PM_STR: + case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: + case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: + case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: + case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: + case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: + case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: + case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: + case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: + case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: + case PERL_MON_12: + + LOCALE_LOCK; + + init_tm(&tm); /* Precaution against core dumps */ + tm.tm_sec = 30; + tm.tm_min = 30; + tm.tm_hour = 6; + tm.tm_year = 2017 - 1900; + tm.tm_wday = 0; + tm.tm_mon = 0; + switch (item) { + default: + LOCALE_UNLOCK; + Perl_croak(aTHX_ + "panic: %s: %d: switch case: %d problem", + __FILE__, __LINE__, item); + NOT_REACHED; /* NOTREACHED */ + + case PERL_PM_STR: tm.tm_hour = 18; + case PERL_AM_STR: + format = "%p"; + break; + + case PERL_ABDAY_7: tm.tm_wday++; + case PERL_ABDAY_6: tm.tm_wday++; + case PERL_ABDAY_5: tm.tm_wday++; + case PERL_ABDAY_4: tm.tm_wday++; + case PERL_ABDAY_3: tm.tm_wday++; + case PERL_ABDAY_2: tm.tm_wday++; + case PERL_ABDAY_1: + format = "%a"; + break; + + case PERL_DAY_7: tm.tm_wday++; + case PERL_DAY_6: tm.tm_wday++; + case PERL_DAY_5: tm.tm_wday++; + case PERL_DAY_4: tm.tm_wday++; + case PERL_DAY_3: tm.tm_wday++; + case PERL_DAY_2: tm.tm_wday++; + case PERL_DAY_1: + format = "%A"; + break; + + case PERL_ABMON_12: tm.tm_mon++; + case PERL_ABMON_11: tm.tm_mon++; + case PERL_ABMON_10: tm.tm_mon++; + case PERL_ABMON_9: tm.tm_mon++; + case PERL_ABMON_8: tm.tm_mon++; + case PERL_ABMON_7: tm.tm_mon++; + case PERL_ABMON_6: tm.tm_mon++; + case PERL_ABMON_5: tm.tm_mon++; + case PERL_ABMON_4: tm.tm_mon++; + case PERL_ABMON_3: tm.tm_mon++; + case PERL_ABMON_2: tm.tm_mon++; + case PERL_ABMON_1: + format = "%b"; + break; + + case PERL_MON_12: tm.tm_mon++; + case PERL_MON_11: tm.tm_mon++; + case PERL_MON_10: tm.tm_mon++; + case PERL_MON_9: tm.tm_mon++; + case PERL_MON_8: tm.tm_mon++; + case PERL_MON_7: tm.tm_mon++; + case PERL_MON_6: tm.tm_mon++; + case PERL_MON_5: tm.tm_mon++; + case PERL_MON_4: tm.tm_mon++; + case PERL_MON_3: tm.tm_mon++; + case PERL_MON_2: tm.tm_mon++; + case PERL_MON_1: + format = "%B"; + break; + + case PERL_T_FMT_AMPM: + format = "%r"; + return_format = TRUE; + break; + + case PERL_ERA_D_FMT: + format = "%Ex"; + return_format = TRUE; + break; + + case PERL_ERA_T_FMT: + format = "%EX"; + return_format = TRUE; + break; + + case PERL_ERA_D_T_FMT: + format = "%Ec"; + return_format = TRUE; + break; + + case PERL_ALT_DIGITS: + tm.tm_wday = 0; + format = "%Ow"; /* Find the alternate digit for 0 */ + break; + } - case PERL_T_FMT_AMPM: - format = "%r"; - return_format = TRUE; - break; + /* We can't use my_strftime() because it doesn't look at + * tm_wday */ + while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize, + format, &tm)) + { + /* A zero return means one of: + * a) there wasn't enough space in PL_langinfo_buf + * b) the format, like a plain %p, returns empty + * c) it was an illegal format, though some + * implementations of strftime will just return the + * illegal format as a plain character sequence. + * + * To quickly test for case 'b)', try again but precede + * the format with a plain character. If that result is + * still empty, the problem is either 'a)' or 'c)' */ + + Size_t format_size = strlen(format) + 1; + Size_t mod_size = format_size + 1; + char * mod_format; + char * temp_result; + + Newx(mod_format, mod_size, char); + Newx(temp_result, PL_langinfo_bufsize, char); + *mod_format = ' '; + my_strlcpy(mod_format + 1, format, mod_size); + len = strftime(temp_result, + PL_langinfo_bufsize, + mod_format, &tm); + Safefree(mod_format); + Safefree(temp_result); + + /* If 'len' is non-zero, it means that we had a case like + * %p which means the current locale doesn't use a.m. or + * p.m., and that is valid */ + if (len == 0) { + + /* Here, still didn't work. If we get well beyond a + * reasonable size, bail out to prevent an infinite + * loop. */ + + if (PL_langinfo_bufsize > 100 * format_size) { + *PL_langinfo_buf = '\0'; + } + else { + /* Double the buffer size to retry; Add 1 in case + * original was 0, so we aren't stuck at 0. */ + PL_langinfo_bufsize *= 2; + PL_langinfo_bufsize++; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + continue; + } + } - case PERL_ERA_D_FMT: - format = "%Ex"; - return_format = TRUE; break; + } - case PERL_ERA_T_FMT: - format = "%EX"; - return_format = TRUE; - break; + /* Here, we got a result. + * + * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the + * alternate format for wday 0. If the value is the same as + * the normal 0, there isn't an alternate, so clear the buffer. + * */ + if ( item == PERL_ALT_DIGITS + && strEQ(PL_langinfo_buf, "0")) + { + *PL_langinfo_buf = '\0'; + } - case PERL_ERA_D_T_FMT: - format = "%Ec"; - return_format = TRUE; - break; + /* ALT_DIGITS is problematic. Experiments on it showed that + * strftime() did not always work properly when going from + * alt-9 to alt-10. Only a few locales have this item defined, + * and in all of them on Linux that khw was able to find, + * nl_langinfo() merely returned the alt-0 character, possibly + * doubled. Most Unicode digits are in blocks of 10 + * consecutive code points, so that is sufficient information + * for those scripts, as we can infer alt-1, alt-2, .... But + * for a Japanese locale, a CJK ideographic 0 is returned, and + * the CJK digits are not in code point order, so you can't + * really infer anything. The localedef for this locale did + * specify the succeeding digits, so that strftime() works + * properly on them, without needing to infer anything. But + * the nl_langinfo() return did not give sufficient information + * for the caller to understand what's going on. So until + * there is evidence that it should work differently, this + * returns the alt-0 string for ALT_DIGITS. + * + * wday was chosen because its range is all a single digit. + * Things like tm_sec have two digits as the minimum: '00' */ - case PERL_ALT_DIGITS: - tm.tm_wday = 0; - format = "%Ow"; /* Find the alternate digit for 0 */ - break; - } + LOCALE_UNLOCK; - /* We can't use my_strftime() because it doesn't look at tm_wday */ - while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize, - format, &tm)) - { - /* A zero return means one of: - * a) there wasn't enough space in PL_langinfo_buf - * b) the format, like a plain %p, returns empty - * c) it was an illegal format, though some implementations of - * strftime will just return the illegal format as a plain - * character sequence. - * - * To quickly test for case 'b)', try again but precede the - * format with a plain character. If that result is still - * empty, the problem is either 'a)' or 'c)' */ - - Size_t format_size = strlen(format) + 1; - Size_t mod_size = format_size + 1; - char * mod_format; - char * temp_result; - - Newx(mod_format, mod_size, char); - Newx(temp_result, PL_langinfo_bufsize, char); - *mod_format = '\a'; - my_strlcpy(mod_format + 1, format, mod_size); - len = strftime(temp_result, - PL_langinfo_bufsize, - mod_format, &tm); - Safefree(mod_format); - Safefree(temp_result); - - /* If 'len' is non-zero, it means that we had a case like %p - * which means the current locale doesn't use a.m. or p.m., and - * that is valid */ - if (len == 0) { - - /* Here, still didn't work. If we get well beyond a - * reasonable size, bail out to prevent an infinite loop. */ - - if (PL_langinfo_bufsize > 100 * format_size) { + /* If to return the format, not the value, overwrite the buffer + * with it. But some strftime()s will keep the original format + * if illegal, so change those to "" */ + if (return_format) { + if (strEQ(PL_langinfo_buf, format)) { *PL_langinfo_buf = '\0'; } - else { /* Double the buffer size to retry; Add 1 in case - original was 0, so we aren't stuck at 0. */ - PL_langinfo_bufsize *= 2; - PL_langinfo_bufsize++; - Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); - continue; + else { + save_to_buffer(format, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); } } break; - } - - /* Here, we got a result. - * - * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the - * alternate format for wday 0. If the value is the same as the - * normal 0, there isn't an alternate, so clear the buffer. */ - if ( item == PERL_ALT_DIGITS - && strEQ(PL_langinfo_buf, "0")) - { - *PL_langinfo_buf = '\0'; - } - - /* ALT_DIGITS is problematic. Experiments on it showed that - * strftime() did not always work properly when going from alt-9 to - * alt-10. Only a few locales have this item defined, and in all - * of them on Linux that khw was able to find, nl_langinfo() merely - * returned the alt-0 character, possibly doubled. Most Unicode - * digits are in blocks of 10 consecutive code points, so that is - * sufficient information for those scripts, as we can infer alt-1, - * alt-2, .... But for a Japanese locale, a CJK ideographic 0 is - * returned, and the CJK digits are not in code point order, so you - * can't really infer anything. The localedef for this locale did - * specify the succeeding digits, so that strftime() works properly - * on them, without needing to infer anything. But the - * nl_langinfo() return did not give sufficient information for the - * caller to understand what's going on. So until there is - * evidence that it should work differently, this returns the alt-0 - * string for ALT_DIGITS. - * - * wday was chosen because its range is all a single digit. Things - * like tm_sec have two digits as the minimum: '00' */ - - LOCALE_UNLOCK; - - /* If to return the format, not the value, overwrite the buffer - * with it. But some strftime()s will keep the original format if - * illegal, so change those to "" */ - if (return_format) { - if (strEQ(PL_langinfo_buf, format)) { - *PL_langinfo_buf = '\0'; - } - else { - save_to_buffer(format, &PL_langinfo_buf, - &PL_langinfo_bufsize, 0); - } - } - - break; # endif + } } return PL_langinfo_buf; @@ -1729,15 +1929,13 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* disallow with "" or "0" */ *bad_lang_use_once && strNE("0", bad_lang_use_once))))); - bool done = FALSE; - char * sl_result[NOMINAL_LC_ALL_INDEX + 1]; /* setlocale() return vals; - not copied so must be - looked at immediately */ - char * curlocales[NOMINAL_LC_ALL_INDEX + 1]; /* current locale for given - category; should have been - copied so aren't volatile - */ - char * locale_param; + + /* setlocale() return vals; not copied so must be looked at immediately */ + const char * sl_result[NOMINAL_LC_ALL_INDEX + 1]; + + /* current locale for given category; should have been copied so aren't + * volatile */ + const char * curlocales[NOMINAL_LC_ALL_INDEX + 1]; # ifdef WIN32 @@ -1750,7 +1948,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) const char *system_default_locale = NULL; # endif -# ifdef DEBUGGING + +# ifndef DEBUGGING +# define DEBUG_LOCALE_INIT(a,b,c) +# else DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT"))); @@ -1766,42 +1967,90 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } \ } STMT_END -# else -# define DEBUG_LOCALE_INIT(a,b,c) -# endif - -# ifndef LOCALE_ENVIRON_REQUIRED - - PERL_UNUSED_VAR(done); - PERL_UNUSED_VAR(locale_param); - -# else +/* Make sure the parallel arrays are properly set up */ +# ifdef USE_LOCALE_NUMERIC + assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC); + assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC")); +# endif +# ifdef USE_LOCALE_CTYPE + assert(categories[LC_CTYPE_INDEX] == LC_CTYPE); + assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE")); +# endif +# ifdef USE_LOCALE_COLLATE + assert(categories[LC_COLLATE_INDEX] == LC_COLLATE); + assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE")); +# endif +# ifdef USE_LOCALE_TIME + assert(categories[LC_TIME_INDEX] == LC_TIME); + assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME")); +# endif +# ifdef USE_LOCALE_MESSAGES + assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES); + assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES")); +# endif +# ifdef USE_LOCALE_MONETARY + assert(categories[LC_MONETARY_INDEX] == LC_MONETARY); + assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY")); +# endif +# ifdef USE_LOCALE_ADDRESS + assert(categories[LC_ADDRESS_INDEX] == LC_ADDRESS); + assert(strEQ(category_names[LC_ADDRESS_INDEX], "LC_ADDRESS")); +# endif +# ifdef USE_LOCALE_IDENTIFICATION + assert(categories[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION); + assert(strEQ(category_names[LC_IDENTIFICATION_INDEX], "LC_IDENTIFICATION")); +# endif +# ifdef USE_LOCALE_MEASUREMENT + assert(categories[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT); + assert(strEQ(category_names[LC_MEASUREMENT_INDEX], "LC_MEASUREMENT")); +# endif +# ifdef USE_LOCALE_PAPER + assert(categories[LC_PAPER_INDEX] == LC_PAPER); + assert(strEQ(category_names[LC_PAPER_INDEX], "LC_PAPER")); +# endif +# ifdef USE_LOCALE_TELEPHONE + assert(categories[LC_TELEPHONE_INDEX] == LC_TELEPHONE); + assert(strEQ(category_names[LC_TELEPHONE_INDEX], "LC_TELEPHONE")); +# endif +# ifdef LC_ALL + assert(categories[LC_ALL_INDEX] == LC_ALL); + assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL")); + assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX); +# endif +# endif /* DEBUGGING */ +# ifdef LOCALE_ENVIRON_REQUIRED /* * Ultrix setlocale(..., "") fails if there are no environment * variables from which to get a locale name. */ -# ifdef LC_ALL +# ifndef LC_ALL +# error Ultrix without LC_ALL not implemented +# else - if (lang) { - sl_result[NOMINAL_LC_ALL_INDEX] = do_setlocale_c(LC_ALL, setlocale_init); - DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result[NOMINAL_LC_ALL_INDEX]); - if (sl_result[NOMINAL_LC_ALL_INDEX]) - done = TRUE; - else - setlocale_failure = TRUE; - } - if (! setlocale_failure) { - for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { - locale_param = (! done && (lang || PerlEnv_getenv(category_names[i]))) - ? setlocale_init - : NULL; - sl_result[i] = do_setlocale_r(categories[i], locale_param); - if (! sl_result[i]) { + { + bool done = FALSE; + if (lang) { + sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, setlocale_init); + DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result[LC_ALL_INDEX]); + if (sl_result[LC_ALL_INDEX]) + done = TRUE; + else setlocale_failure = TRUE; + } + if (! setlocale_failure) { + const char * locale_param; + for (i = 0; i < LC_ALL_INDEX; i++) { + locale_param = (! done && (lang || PerlEnv_getenv(category_names[i]))) + ? setlocale_init + : NULL; + sl_result[i] = do_setlocale_r(categories[i], locale_param); + if (! sl_result[i]) { + setlocale_failure = TRUE; + } + DEBUG_LOCALE_INIT(categories[i], locale_param, sl_result[i]); } - DEBUG_LOCALE_INIT(categories[i], locale_param, sl_result[i]); } } @@ -1826,7 +2075,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) setlocale_failure = FALSE; # ifdef SYSTEM_DEFAULT_LOCALE -# ifdef WIN32 +# ifdef WIN32 /* Note that assumes Win32 has LC_ALL */ /* On Windows machines, an entry of "" after the 0th means to use * the system default locale, which we now proceed to get. */ @@ -1851,15 +2100,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) trial_locale = system_default_locale; } -# endif /* WIN32 */ +# else +# error SYSTEM_DEFAULT_LOCALE only implemented for Win32 +# endif # endif /* SYSTEM_DEFAULT_LOCALE */ - } + + } /* For i > 0 */ # ifdef LC_ALL - sl_result[NOMINAL_LC_ALL_INDEX] = do_setlocale_c(LC_ALL, trial_locale); - DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result[NOMINAL_LC_ALL_INDEX]); - if (! sl_result[NOMINAL_LC_ALL_INDEX]) { + sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, trial_locale); + DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result[LC_ALL_INDEX]); + if (! sl_result[LC_ALL_INDEX]) { setlocale_failure = TRUE; } else { @@ -1918,8 +2170,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } } - PerlIO_printf(Perl_error_log, "and possibly others\n"); - # endif /* LC_ALL */ PerlIO_printf(Perl_error_log, @@ -2108,45 +2358,36 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Done with finding the locales; update our records */ - for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { - switch (categories[i]) { - # ifdef USE_LOCALE_CTYPE - case LC_CTYPE: - new_ctype(curlocales[i]); - break; + new_ctype(curlocales[LC_CTYPE_INDEX]); # endif # ifdef USE_LOCALE_COLLATE - case LC_COLLATE: - new_collate(curlocales[i]); - break; + new_collate(curlocales[LC_COLLATE_INDEX]); # endif # ifdef USE_LOCALE_NUMERIC - case LC_NUMERIC: - new_numeric(curlocales[i]); - break; + new_numeric(curlocales[LC_NUMERIC_INDEX]); # endif - default: ; - } + for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) { Safefree(curlocales[i]); } # if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) /* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE - * locale is UTF-8. If PL_utf8locale and PL_unicode (set by -C or by - * $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the - * PerlIO :utf8 layer on STDIN, STDOUT, STDERR, _and_ the default open - * discipline. */ - PL_utf8locale = _is_cur_LC_category_utf8(LC_CTYPE); + * locale is UTF-8. The call to new_ctype() just above has already + * calculated the latter value and saved it in PL_in_utf8_CTYPE_locale. If + * both PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) + * are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer on + * STDIN, STDOUT, STDERR, _and_ the default open discipline. */ + PL_utf8locale = PL_in_utf8_CTYPE_locale; /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. This is an alternative to using the -C command line switch @@ -2327,7 +2568,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, } /* End of determining the character that is to replace NULs */ /* If the replacement is variant under UTF-8, it must match the - * UTF8-ness as the original */ + * UTF8-ness of the original */ if ( ! UVCHR_IS_INVARIANT(PL_strxfrm_NUL_replacement) && utf8) { this_replacement_char[0] = UTF8_EIGHT_BIT_HI(PL_strxfrm_NUL_replacement); @@ -2783,7 +3024,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * English, it comes down to if the locale's name ends in something like * "UTF-8". It errs on the side of not being a UTF-8 locale. */ - char *save_input_locale = NULL; + const char *save_input_locale = NULL; STRLEN final_pos; # ifdef LC_ALL @@ -2853,9 +3094,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * should give the correct results */ # if defined(HAS_NL_LANGINFO) && defined(CODESET) - /* The task is easiest if has this POSIX 2001 function */ - { + { /* The task is easiest if the platform has this POSIX 2001 function */ const char *codeset = my_nl_langinfo(PERL_CODESET, FALSE); /* FALSE => already in dest locale */ @@ -2892,7 +3132,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * Unicode code point. Since UTF-8 is the only non-single byte * encoding we handle, we just say any such encoding is UTF-8, and if * turns out to be wrong, other things will fail */ - is_utf8 = MB_CUR_MAX >= 4; + is_utf8 = (unsigned) MB_CUR_MAX >= STRLENs(MAX_UNICODE_UTF8); DEBUG_L(PerlIO_printf(Perl_debug_log, "\tMB_CUR_MAX=%d; ?UTF8 locale=%d\n", @@ -2916,7 +3156,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) if ( len != STRLENs(REPLACEMENT_CHARACTER_UTF8) - || wc != (wchar_t) 0xFFFD) + || wc != (wchar_t) UNICODE_REPLACEMENT) { is_utf8 = FALSE; DEBUG_L(PerlIO_printf(Perl_debug_log, "\replacement=U+%x\n", @@ -3222,7 +3462,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) final_pos = strlen(save_input_locale) - 1; if (final_pos >= 3) { - char *name = save_input_locale; + const char *name = save_input_locale; /* Find next 'U' or 'u' and look from there */ while ((name += strcspn(name, "Uu") + 1) @@ -3340,7 +3580,17 @@ Perl_my_strerror(pTHX_ const int errnum) # if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) - /* This function is trivial if we have strerror_l() */ + /* This function is trivial if we don't have to worry about thread safety + * and have strerror_l(), as it handles the switch of locales so we don't + * have to deal with that. We don't have to worry about thread safety if + * this is an unthreaded build, or if strerror_r() is also available. Both + * it and strerror_l() are thread-safe. Plain strerror() isn't thread + * safe. But on threaded builds when strerror_r() is available, the + * apparent call to strerror() below is actually a macro that + * behind-the-scenes calls strerror_r(). + */ + +# if ! defined(USE_ITHREADS) || defined(HAS_STRERROR_R) if (within_locale_scope) { errstr = savepv(strerror(errnum)); @@ -3349,7 +3599,34 @@ Perl_my_strerror(pTHX_ const int errnum) errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); } -# else /* Doesn't have strerror_l(). */ +# else + + /* Here we have strerror_l(), but not strerror_r() and we are on a + * threaded-build. We use strerror_l() for everything, constructing a + * locale to pass to it if necessary */ + + bool do_free = FALSE; + locale_t locale_to_use; + + if (within_locale_scope) { + locale_to_use = uselocale((locale_t) 0); + if (locale_to_use == LC_GLOBAL_LOCALE) { + locale_to_use = duplocale(LC_GLOBAL_LOCALE); + do_free = TRUE; + } + } + else { /* Use C locale if not within 'use locale' scope */ + locale_to_use = PL_C_locale_obj; + } + + errstr = savepv(strerror_l(errnum, locale_to_use)); + + if (do_free) { + freelocale(locale_to_use); + } + +# endif +# else /* Doesn't have strerror_l() */ # ifdef USE_POSIX_2008_LOCALE @@ -3357,7 +3634,7 @@ Perl_my_strerror(pTHX_ const int errnum) # else - char * save_locale = NULL; + const char * save_locale = NULL; bool locale_is_C = FALSE; /* We have a critical section to prevent another thread from changing the @@ -3532,25 +3809,9 @@ S_setlocale_debug_string(const int category, /* category number, static char ret[128] = "If you can read this, thank your buggy C" " library strlcpy(), and change your hints file" " to undef it"; - unsigned int i; - - const unsigned int highest_index = NOMINAL_LC_ALL_INDEX; my_strlcpy(ret, "setlocale(", sizeof(ret)); - - /* Look for category in our list, and if found, add its name */ - for (i = 0; i <= highest_index; i++) { - if (category == categories[i]) { - my_strlcat(ret, category_names[i], sizeof(ret)); - goto found_category; - } - } - - /* Unknown category to us */ - my_snprintf(ret, sizeof(ret), "%s? %d", ret, category); - - found_category: - + my_strlcat(ret, category_name(category), sizeof(ret)); my_strlcat(ret, ", ", sizeof(ret)); if (locale) {