X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/96a5add60f1f39d38341c09c11f0542e68f782b0..d1a30ea2dc426e354909eff54f683522681ab603:/locale.c diff --git a/locale.c b/locale.c index dd3060f..b7a78fb 100644 --- a/locale.c +++ b/locale.c @@ -1,7 +1,7 @@ /* locale.c * - * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2005, 2006, by Larry Wall and others + * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -9,13 +9,15 @@ */ /* - * A Elbereth Gilthoniel, - * silivren penna míriel - * o menel aglar elenath! - * Na-chaered palan-díriel - * o galadhremmin ennorath, - * Fanuilos, le linnathon - * nef aear, si nef aearon! + * A Elbereth Gilthoniel, + * silivren penna míriel + * o menel aglar elenath! + * Na-chaered palan-díriel + * o galadhremmin ennorath, + * Fanuilos, le linnathon + * nef aear, si nef aearon! + * + * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] */ /* utility functions for handling locale-specific stuff like what @@ -26,10 +28,6 @@ #define PERL_IN_LOCALE_C #include "perl.h" -#ifdef I_LOCALE -# include -#endif - #ifdef I_LANGINFO # include #endif @@ -44,7 +42,7 @@ * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL * (the space-separated values represent the various sublocales, - * in some unspecificed order) + * in some unspecified order) * * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", * which is harmful for further use of the string in setlocale(). @@ -56,6 +54,8 @@ S_stdize_locale(pTHX_ char *locs) const char * const s = strchr(locs, '='); bool okay = TRUE; + PERL_ARGS_ASSERT_STDIZE_LOCALE; + if (s) { const char * const t = strchr(s, '.'); okay = FALSE; @@ -174,6 +174,8 @@ Perl_new_ctype(pTHX_ const char *newctype) dVAR; int i; + PERL_ARGS_ASSERT_NEW_CTYPE; + for (i = 0; i < 256; i++) { if (isUPPER_LC(i)) PL_fold_locale[i] = toLOWER_LC(i); @@ -184,6 +186,7 @@ Perl_new_ctype(pTHX_ const char *newctype) } #endif /* USE_LOCALE_CTYPE */ + PERL_ARGS_ASSERT_NEW_CTYPE; PERL_UNUSED_ARG(newctype); PERL_UNUSED_CONTEXT; } @@ -224,8 +227,9 @@ Perl_new_collate(pTHX_ const char *newcoll) const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); const SSize_t mult = fb - fa; - if (mult < 1) - Perl_croak(aTHX_ "strxfrm() gets absurd"); + if (mult < 1 && !(fa == 0 && fb == 0)) + Perl_croak(aTHX_ "panic: strxfrm() gets absurd - a => %"UVuf", ab => %"UVuf, + (UV) fa, (UV) fb); PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; PL_collxfrm_mult = mult; } @@ -284,6 +288,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE + Safefree(curctype); if (! (curctype = setlocale(LC_CTYPE, (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) @@ -293,6 +298,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE + Safefree(curcoll); if (! (curcoll = setlocale(LC_COLLATE, (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) @@ -302,6 +308,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC + Safefree(curnum); if (! (curnum = setlocale(LC_NUMERIC, (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) @@ -323,18 +330,21 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (!setlocale_failure) { #ifdef USE_LOCALE_CTYPE + Safefree(curctype); if (! (curctype = setlocale(LC_CTYPE, ""))) setlocale_failure = TRUE; else curctype = savepv(curctype); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE + Safefree(curcoll); if (! (curcoll = setlocale(LC_COLLATE, ""))) setlocale_failure = TRUE; else curcoll = savepv(curcoll); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC + Safefree(curnum); if (! (curnum = setlocale(LC_NUMERIC, ""))) setlocale_failure = TRUE; else @@ -455,12 +465,15 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #endif /* ! LC_ALL */ #ifdef USE_LOCALE_CTYPE + Safefree(curctype); curctype = savepv(setlocale(LC_CTYPE, NULL)); #endif /* USE_LOCALE_CTYPE */ #ifdef USE_LOCALE_COLLATE + Safefree(curcoll); curcoll = savepv(setlocale(LC_COLLATE, NULL)); #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE_NUMERIC + Safefree(curnum); curnum = savepv(setlocale(LC_NUMERIC, NULL)); #endif /* USE_LOCALE_NUMERIC */ } @@ -506,23 +519,23 @@ Perl_init_i18nl10n(pTHX_ int printwarn) codeset = nl_langinfo(CODESET); #endif if (codeset) - utf8locale = (Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF-8")) == 0 || - Perl_ibcmp(aTHX_ codeset, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(codeset, STR_WITH_LEN("UTF-8")) + || foldEQ(codeset, STR_WITH_LEN("UTF8") )); #if defined(USE_LOCALE) else { /* nl_langinfo(CODESET) is supposed to correctly * interpret the locale environment variables, * but just in case it fails, let's do this manually. */ if (lang) - utf8locale = (Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF-8")) == 0 || - Perl_ibcmp(aTHX_ lang, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(lang, STR_WITH_LEN("UTF-8")) + || foldEQ(lang, STR_WITH_LEN("UTF8") )); #ifdef USE_LOCALE_CTYPE if (curctype) - utf8locale = (Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF-8")) == 0 || - Perl_ibcmp(aTHX_ curctype, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(curctype, STR_WITH_LEN("UTF-8")) + || foldEQ(curctype, STR_WITH_LEN("UTF8") )); #endif if (lc_all) - utf8locale = (Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF-8")) == 0 || - Perl_ibcmp(aTHX_ lc_all, STR_WITH_LEN("UTF8") ) == 0); + utf8locale = (foldEQ(lc_all, STR_WITH_LEN("UTF-8")) + || foldEQ(lc_all, STR_WITH_LEN("UTF8") )); } #endif /* USE_LOCALE */ if (utf8locale) @@ -534,6 +547,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) { const char *p = PerlEnv_getenv("PERL_UNICODE"); PL_unicode = p ? parse_unicode_opts(&p) : 0; + if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) + PL_utf8cache = -1; } #endif @@ -566,6 +581,8 @@ Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) char *xbuf; STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ + PERL_ARGS_ASSERT_MEM_COLLXFRM; + /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ /* the +1 is for the terminating NUL. */ @@ -577,11 +594,11 @@ Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) *(U32*)xbuf = PL_collation_ix; xout = sizeof(PL_collation_ix); for (xin = 0; xin < len; ) { - SSize_t xused; + Size_t xused; for (;;) { xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); - if (xused == -1) + if (xused >= PERL_INT_MAX) goto bad; if ((STRLEN)xused < xAlloc - xout) break;