X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/be3c0a43e1e6b1244032726df02a3ab450a3c4be..8006bbc377c15cd3d89b49736744acd8c1396f24:/locale.c diff --git a/locale.c b/locale.c index 22e9030..6f5f016 100644 --- a/locale.c +++ b/locale.c @@ -1,6 +1,7 @@ /* locale.c * - * Copyright (c) 2001-2002, Larry Wall + * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -25,6 +26,12 @@ # include #endif +#ifdef I_LANGINFO +# include +#endif + +#include "reentr.h" + /* * Standardize the locale name from a string returned by 'setlocale'. * @@ -212,7 +219,7 @@ Perl_new_collate(pTHX_ char *newcoll) SSize_t mult = fb - fa; if (mult < 1) Perl_croak(aTHX_ "strxfrm() gets absurd"); - PL_collxfrm_base = (fa > mult) ? (fa - mult) : 0; + PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; PL_collxfrm_mult = mult; } } @@ -462,10 +469,66 @@ Perl_init_i18nl10n(pTHX_ int printwarn) #ifdef USE_LOCALE_NUMERIC new_numeric(curnum); #endif /* USE_LOCALE_NUMERIC */ + } #endif /* USE_LOCALE */ +#ifdef USE_PERLIO + { + /* Set PL_utf8locale to TRUE if using PerlIO _and_ + any of the following are true: + - nl_langinfo(CODESET) contains /^utf-?8/i + - $ENV{LC_ALL} contains /^utf-?8/i + - $ENV{LC_CTYPE} contains /^utf-?8/i + - $ENV{LANG} contains /^utf-?8/i + The LC_ALL, LC_CTYPE, LANG obey the usual override + hierarchy of locale environment variables. (LANGUAGE + affects only LC_MESSAGES only under glibc.) (If present, + it overrides LC_MESSAGES for GNU gettext, and it also + can have more than one locale, separated by spaces, + in case you need to know.) + If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) + are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer + on STDIN, STDOUT, STDERR, _and_ the default open discipline. + */ + bool utf8locale = FALSE; + char *codeset = NULL; +#if defined(HAS_NL_LANGINFO) && defined(CODESET) + codeset = nl_langinfo(CODESET); +#endif + if (codeset) + utf8locale = (ibcmp(codeset, "UTF-8", 5) == 0 || + ibcmp(codeset, "UTF8", 4) == 0); +#if defined(USE_LOCALE) + else { /* nl_langinfo(CODESET) is supposed to correctly + * interpret the locale environment variables, + * but just in case it fails, let's do this manually. */ + if (lang) + utf8locale = (ibcmp(lang, "UTF-8", 5) == 0 || + ibcmp(lang, "UTF8", 4) == 0); +#ifdef USE_LOCALE_CTYPE + if (curctype) + utf8locale = (ibcmp(curctype, "UTF-8", 5) == 0 || + ibcmp(curctype, "UTF8", 4) == 0); +#endif + if (lc_all) + utf8locale = (ibcmp(lc_all, "UTF-8", 5) == 0 || + ibcmp(lc_all, "UTF8", 4) == 0); + } +#endif /* USE_LOCALE */ + if (utf8locale) + PL_utf8locale = TRUE; + } + /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. + This is an alternative to using the -C command line switch + (the -C if present will override this). */ + { + char *p = PerlEnv_getenv("PERL_UNICODE"); + PL_unicode = p ? parse_unicode_opts(&p) : 0; + } +#endif + #ifdef USE_LOCALE_CTYPE if (curctype != NULL) Safefree(curctype); @@ -520,7 +583,7 @@ Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); if (xused == -1) goto bad; - if (xused < xAlloc - xout) + if ((STRLEN)xused < xAlloc - xout) break; xAlloc = (2 * xAlloc) + 1; Renew(xbuf, xAlloc, char);