X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/fca394fcca6f8b19f183027c66da8a99bb43312d..6274c42ee9c605682ee45a1d474f47b0642e608c:/locale.c diff --git a/locale.c b/locale.c index 0986614..4631b86 100644 --- a/locale.c +++ b/locale.c @@ -1,6 +1,7 @@ /* locale.c * - * Copyright (c) 2001-2002, Larry Wall + * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -8,13 +9,19 @@ */ /* - * A Elbereth Gilthoniel, - * silivren penna míriel - * o menel aglar elenath! - * Na-chaered palan-díriel - * o galadhremmin ennorath, - * Fanuilos, le linnathon - * nef aear, si nef aearon! + * A Elbereth Gilthoniel, + * silivren penna míriel + * o menel aglar elenath! + * Na-chaered palan-díriel + * o galadhremmin ennorath, + * Fanuilos, le linnathon + * nef aear, si nef aearon! + * + * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] + */ + +/* utility functions for handling locale-specific stuff like what + * character represents the decimal point. */ #include "EXTERN.h" @@ -31,6 +38,7 @@ #include "reentr.h" +#if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE) /* * Standardize the locale name from a string returned by 'setlocale'. * @@ -38,7 +46,7 @@ * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL * (the space-separated values represent the various sublocales, - * in some unspecificed order) + * in some unspecified order) * * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", * which is harmful for further use of the string in setlocale(). @@ -47,24 +55,21 @@ STATIC char * S_stdize_locale(pTHX_ char *locs) { - char *s; + const char * const s = strchr(locs, '='); bool okay = TRUE; - if ((s = strchr(locs, '='))) { - char *t; + PERL_ARGS_ASSERT_STDIZE_LOCALE; + if (s) { + const char * const t = strchr(s, '.'); okay = FALSE; - if ((t = strchr(s, '.'))) { - char *u; - - if ((u = strchr(t, '\n'))) { - - if (u[1] == 0) { - STRLEN len = u - s; - Move(s + 1, locs, len, char); - locs[len] = 0; - okay = TRUE; - } + if (t) { + const char * const u = strchr(t, '\n'); + if (u && (u[1] == 0)) { + const STRLEN len = u - s; + Move(s + 1, locs, len, char); + locs[len] = 0; + okay = TRUE; } } } @@ -74,19 +79,20 @@ S_stdize_locale(pTHX_ char *locs) return locs; } +#endif void Perl_set_numeric_radix(pTHX) { #ifdef USE_LOCALE_NUMERIC + dVAR; # ifdef HAS_LOCALECONV - struct lconv* lc; + const struct lconv* const lc = localeconv(); - lc = localeconv(); if (lc && lc->decimal_point) { if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { SvREFCNT_dec(PL_numeric_radix_sv); - PL_numeric_radix_sv = Nullsv; + PL_numeric_radix_sv = NULL; } else { if (PL_numeric_radix_sv) @@ -96,7 +102,7 @@ Perl_set_numeric_radix(pTHX) } } else - PL_numeric_radix_sv = Nullsv; + PL_numeric_radix_sv = NULL; # endif /* HAS_LOCALECONV */ #endif /* USE_LOCALE_NUMERIC */ } @@ -105,15 +111,14 @@ Perl_set_numeric_radix(pTHX) * Set up for a new numeric locale. */ void -Perl_new_numeric(pTHX_ char *newnum) +Perl_new_numeric(pTHX_ const char *newnum) { #ifdef USE_LOCALE_NUMERIC + dVAR; if (! newnum) { - if (PL_numeric_name) { - Safefree(PL_numeric_name); - PL_numeric_name = NULL; - } + Safefree(PL_numeric_name); + PL_numeric_name = NULL; PL_numeric_standard = TRUE; PL_numeric_local = TRUE; return; @@ -122,7 +127,8 @@ Perl_new_numeric(pTHX_ char *newnum) if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) { Safefree(PL_numeric_name); PL_numeric_name = stdize_locale(savepv(newnum)); - PL_numeric_standard = (strEQ(newnum, "C") || strEQ(newnum, "POSIX")); + PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0') + || strEQ(newnum, "POSIX")); PL_numeric_local = TRUE; set_numeric_radix(); } @@ -134,6 +140,7 @@ void Perl_set_numeric_standard(pTHX) { #ifdef USE_LOCALE_NUMERIC + dVAR; if (! PL_numeric_standard) { setlocale(LC_NUMERIC, "C"); @@ -149,6 +156,7 @@ void Perl_set_numeric_local(pTHX) { #ifdef USE_LOCALE_NUMERIC + dVAR; if (! PL_numeric_local) { setlocale(LC_NUMERIC, PL_numeric_name); @@ -164,12 +172,14 @@ Perl_set_numeric_local(pTHX) * Set up for a new ctype locale. */ void -Perl_new_ctype(pTHX_ char *newctype) +Perl_new_ctype(pTHX_ const char *newctype) { #ifdef USE_LOCALE_CTYPE - + dVAR; int i; + PERL_ARGS_ASSERT_NEW_CTYPE; + for (i = 0; i < 256; i++) { if (isUPPER_LC(i)) PL_fold_locale[i] = toLOWER_LC(i); @@ -180,15 +190,19 @@ Perl_new_ctype(pTHX_ char *newctype) } #endif /* USE_LOCALE_CTYPE */ + PERL_ARGS_ASSERT_NEW_CTYPE; + PERL_UNUSED_ARG(newctype); + PERL_UNUSED_CONTEXT; } /* * Set up for a new collation locale. */ void -Perl_new_collate(pTHX_ char *newcoll) +Perl_new_collate(pTHX_ const char *newcoll) { #ifdef USE_LOCALE_COLLATE + dVAR; if (! newcoll) { if (PL_collation_name) { @@ -206,16 +220,17 @@ Perl_new_collate(pTHX_ char *newcoll) ++PL_collation_ix; Safefree(PL_collation_name); PL_collation_name = stdize_locale(savepv(newcoll)); - PL_collation_standard = (strEQ(newcoll, "C") || strEQ(newcoll, "POSIX")); + PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') + || strEQ(newcoll, "POSIX")); { /* 2: at most so many chars ('a', 'b'). */ /* 50: surely no system expands a char more. */ #define XFRMBUFSIZE (2 * 50) char xbuf[XFRMBUFSIZE]; - Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); - Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); - SSize_t mult = fb - fa; + const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); + const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); + const SSize_t mult = fb - fa; if (mult < 1) Perl_croak(aTHX_ "strxfrm() gets absurd"); PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; @@ -240,6 +255,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) */ #if defined(USE_LOCALE) + dVAR; #ifdef USE_LOCALE_CTYPE char *curctype = NULL; @@ -251,10 +267,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) char *curnum = NULL; #endif /* USE_LOCALE_NUMERIC */ #ifdef __GLIBC__ - char *language = PerlEnv_getenv("LANGUAGE"); + char * const language = PerlEnv_getenv("LANGUAGE"); #endif - char *lc_all = PerlEnv_getenv("LC_ALL"); - char *lang = PerlEnv_getenv("LANG"); + char * const lc_all = PerlEnv_getenv("LC_ALL"); + char * const lang = PerlEnv_getenv("LANG"); bool setlocale_failure = FALSE; #ifdef LOCALE_ENVIRON_REQUIRED @@ -275,28 +291,31 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE + Safefree(curctype); if (! (curctype = setlocale(LC_CTYPE, (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) - ? "" : Nullch))) + ? "" : NULL))) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE + Safefree(curcoll); if (! (curcoll = setlocale(LC_COLLATE, (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) - ? "" : Nullch))) + ? "" : NULL))) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC + Safefree(curnum); if (! (curnum = setlocale(LC_NUMERIC, (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) - ? "" : Nullch))) + ? "" : NULL))) setlocale_failure = TRUE; else curnum = savepv(curnum); @@ -314,18 +333,21 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE + Safefree(curctype); if (! (curctype = setlocale(LC_CTYPE, ""))) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE + Safefree(curcoll); if (! (curcoll = setlocale(LC_COLLATE, ""))) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC + Safefree(curnum); if (! (curnum = setlocale(LC_NUMERIC, ""))) setlocale_failure = TRUE; else @@ -335,7 +357,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (setlocale_failure) { char *p; - bool locwarn = (printwarn > 1 || + const bool locwarn = (printwarn > 1 || (printwarn && (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); @@ -446,13 +468,16 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif /* ! LC_ALL */ #ifdef USE_LOCALE_CTYPE - curctype = savepv(setlocale(LC_CTYPE, Nullch)); + Safefree(curctype); + curctype = savepv(setlocale(LC_CTYPE, NULL)); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE - curcoll = savepv(setlocale(LC_COLLATE, Nullch)); + Safefree(curcoll); + curcoll = savepv(setlocale(LC_COLLATE, NULL)); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC - curnum = savepv(setlocale(LC_NUMERIC, Nullch)); + Safefree(curnum); + curnum = savepv(setlocale(LC_NUMERIC, NULL)); #endif /* USE_LOCALE_NUMERIC */ } else { @@ -475,75 +500,73 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifdef USE_PERLIO { - /* Set PL_wantutf8 to TRUE if using PerlIO _and_ + /* Set PL_utf8locale to TRUE if using PerlIO _and_ any of the following are true: - nl_langinfo(CODESET) contains /^utf-?8/i - - $ENV{LANGUAGE} contains /^utf-?8/i (only if using glibc) - - $ENV{LC_CALL} contains /^utf-?8/i + - $ENV{LC_ALL} contains /^utf-?8/i - $ENV{LC_CTYPE} contains /^utf-?8/i - - $ENV{LANG} contains /^utf-?8/i - If PL_wantutf8 is true, perl.c:S_parse_body() - will turn on the PerlIO :utf8 discipline on STDIN, STDOUT, - STDERR, _and_ the default open discipline. + - $ENV{LANG} contains /^utf-?8/i + The LC_ALL, LC_CTYPE, LANG obey the usual override + hierarchy of locale environment variables. (LANGUAGE + affects only LC_MESSAGES only under glibc.) (If present, + it overrides LC_MESSAGES for GNU gettext, and it also + can have more than one locale, separated by spaces, + in case you need to know.) + If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) + are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer + on STDIN, STDOUT, STDERR, _and_ the default open discipline. */ - bool wantutf8 = FALSE; + bool utf8locale = FALSE; char *codeset = NULL; #if defined(HAS_NL_LANGINFO) && defined(CODESET) codeset = nl_langinfo(CODESET); #endif - if (codeset && - (ibcmp(codeset, "UTF-8", 5) == 0 || - ibcmp(codeset, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (codeset) + utf8locale = (foldEQ(codeset, STR_WITH_LEN("UTF-8")) + || foldEQ(codeset, STR_WITH_LEN("UTF8") )); #if defined(USE_LOCALE) -#ifdef __GLIBC__ - if (!wantutf8 && language && - (ibcmp(language, "UTF-8", 5) == 0 || - ibcmp(language, "UTF8", 4) == 0)) - wantutf8 = TRUE; -#endif - if (!wantutf8 && lc_all && - (ibcmp(lc_all, "UTF-8", 5) == 0 || - ibcmp(lc_all, "UTF8", 4) == 0)) - wantutf8 = TRUE; + else { /* nl_langinfo(CODESET) is supposed to correctly + * interpret the locale environment variables, + * but just in case it fails, let's do this manually. */ + if (lang) + utf8locale = (foldEQ(lang, STR_WITH_LEN("UTF-8")) + || foldEQ(lang, STR_WITH_LEN("UTF8") )); #ifdef USE_LOCALE_CTYPE - if (!wantutf8 && curctype && - (ibcmp(curctype, "UTF-8", 5) == 0 || - ibcmp(curctype, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (curctype) + utf8locale = (foldEQ(curctype, STR_WITH_LEN("UTF-8")) + || foldEQ(curctype, STR_WITH_LEN("UTF8") )); #endif - if (!wantutf8 && lang && - (ibcmp(lang, "UTF-8", 5) == 0 || - ibcmp(lang, "UTF8", 4) == 0)) - wantutf8 = TRUE; + if (lc_all) + utf8locale = (foldEQ(lc_all, STR_WITH_LEN("UTF-8")) + || foldEQ(lc_all, STR_WITH_LEN("UTF8") )); + } #endif /* USE_LOCALE */ - if (wantutf8) - PL_wantutf8 = TRUE; + if (utf8locale) + PL_utf8locale = TRUE; + } + /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. + This is an alternative to using the -C command line switch + (the -C if present will override this). */ + { + const char *p = PerlEnv_getenv("PERL_UNICODE"); + PL_unicode = p ? parse_unicode_opts(&p) : 0; + if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) + PL_utf8cache = -1; } #endif #ifdef USE_LOCALE_CTYPE - if (curctype != NULL) - Safefree(curctype); + Safefree(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE - if (curcoll != NULL) - Safefree(curcoll); + Safefree(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC - if (curnum != NULL) - Safefree(curnum); + Safefree(curnum); #endif /* USE_LOCALE_NUMERIC */ return ok; } -/* Backwards compatibility. */ -int -Perl_init_i18nl14n(pTHX_ int printwarn) -{ - return init_i18nl10n(printwarn); -} - #ifdef USE_LOCALE_COLLATE /* @@ -553,28 +576,32 @@ Perl_init_i18nl14n(pTHX_ int printwarn) * The real transformed data begins at offset sizeof(collationix). * Please see sv_collxfrm() to see how this is used. */ + char * Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) { + dVAR; char *xbuf; STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ + PERL_ARGS_ASSERT_MEM_COLLXFRM; + /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ /* the +1 is for the terminating NUL. */ xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; - New(171, xbuf, xAlloc, char); + Newx(xbuf, xAlloc, char); if (! xbuf) goto bad; *(U32*)xbuf = PL_collation_ix; xout = sizeof(PL_collation_ix); for (xin = 0; xin < len; ) { - SSize_t xused; + Size_t xused; for (;;) { xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); - if (xused == -1) + if (xused >= PERL_INT_MAX) goto bad; if ((STRLEN)xused < xAlloc - xout) break; @@ -603,3 +630,12 @@ Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) #endif /* USE_LOCALE_COLLATE */ +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + * + * ex: set ts=8 sts=4 sw=4 noet: + */