perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* locale.c
	2	*
	3	* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
	4	* 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* A Elbereth Gilthoniel,
	13	* silivren penna míriel
	14	* o menel aglar elenath!
	15	* Na-chaered palan-díriel
	16	* o galadhremmin ennorath,
	17	* Fanuilos, le linnathon
	18	* nef aear, si nef aearon!
	19	*
	20	* [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
	21	*/
	22
	23	/* utility functions for handling locale-specific stuff like what
	24	* character represents the decimal point.
	25	*
	26	* All C programs have an underlying locale. Perl code generally doesn't pay
	27	* any attention to it except within the scope of a 'use locale'. For most
	28	* categories, it accomplishes this by just using different operations if it is
	29	* in such scope than if not. However, various libc functions called by Perl
	30	* are affected by the LC_NUMERIC category, so there are macros in perl.h that
	31	* are used to toggle between the current locale and the C locale depending on
	32	* the desired behavior of those functions at the moment. And, LC_MESSAGES is
	33	* switched to the C locale for outputting the message unless within the scope
	34	* of 'use locale'.
	35	*/
	36
	37	#include "EXTERN.h"
	38	#define PERL_IN_LOCALE_C
	39	#include "perl_langinfo.h"
	40	#include "perl.h"
	41
	42	#include "reentr.h"
	43
	44	/* If the environment says to, we can output debugging information during
	45	* initialization. This is done before option parsing, and before any thread
	46	* creation, so can be a file-level static */
	47	#ifdef DEBUGGING
	48	# ifdef PERL_GLOBAL_STRUCT
	49	/* no global syms allowed */
	50	# define debug_initialization 0
	51	# define DEBUG_INITIALIZATION_set(v)
	52	# else
	53	static bool debug_initialization = FALSE;
	54	# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v)
	55	# endif
	56	#endif
	57
	58	/* strlen() of a literal string constant. XXX We might want this more general,
	59	* but using it in just this file for now */
	60	#define STRLENs(s) (sizeof("" s "") - 1)
	61
	62	/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the
	63	* return of setlocale(), then this is extremely likely to be the C or POSIX
	64	* locale. However, the output of setlocale() is documented to be opaque, but
	65	* the odds are extremely small that it would return these two strings for some
	66	* other locale. Note that VMS in these two locales includes many non-ASCII
	67	* characters as controls and punctuation (below are hex bytes):
	68	* cntrl: 84-97 9B-9F
	69	* punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD
	70	* Oddly, none there are listed as alphas, though some represent alphabetics
	71	* http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */
	72	#define isNAME_C_OR_POSIX(name) \
	73	( (name) != NULL \
	74	&& (( (name) == 'C' && ((name + 1)) == '\0') \
	75	\|\| strEQ((name), "POSIX")))
	76
	77	#ifdef USE_LOCALE
	78
	79	/*
	80	* Standardize the locale name from a string returned by 'setlocale', possibly
	81	* modifying that string.
	82	*
	83	* The typical return value of setlocale() is either
	84	* (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
	85	* (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
	86	* (the space-separated values represent the various sublocales,
	87	* in some unspecified order). This is not handled by this function.
	88	*
	89	* In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
	90	* which is harmful for further use of the string in setlocale(). This
	91	* function removes the trailing new line and everything up through the '='
	92	*
	93	*/
	94	STATIC char *
	95	S_stdize_locale(pTHX_ char *locs)
	96	{
	97	const char * const s = strchr(locs, '=');
	98	bool okay = TRUE;
	99
	100	PERL_ARGS_ASSERT_STDIZE_LOCALE;
	101
	102	if (s) {
	103	const char * const t = strchr(s, '.');
	104	okay = FALSE;
	105	if (t) {
	106	const char * const u = strchr(t, '\n');
	107	if (u && (u[1] == 0)) {
	108	const STRLEN len = u - s;
	109	Move(s + 1, locs, len, char);
	110	locs[len] = 0;
	111	okay = TRUE;
	112	}
	113	}
	114	}
	115
	116	if (!okay)
	117	Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
	118
	119	return locs;
	120	}
	121
	122	/* Two parallel arrays; first the locale categories Perl uses on this system;
	123	* the second array is their names. These arrays are in mostly arbitrary
	124	* order. */
	125
	126	const int categories[] = {
	127
	128	# ifdef USE_LOCALE_NUMERIC
	129	LC_NUMERIC,
	130	# endif
	131	# ifdef USE_LOCALE_CTYPE
	132	LC_CTYPE,
	133	# endif
	134	# ifdef USE_LOCALE_COLLATE
	135	LC_COLLATE,
	136	# endif
	137	# ifdef USE_LOCALE_TIME
	138	LC_TIME,
	139	# endif
	140	# ifdef USE_LOCALE_MESSAGES
	141	LC_MESSAGES,
	142	# endif
	143	# ifdef USE_LOCALE_MONETARY
	144	LC_MONETARY,
	145	# endif
	146	# ifdef LC_ALL
	147	LC_ALL,
	148	# endif
	149	-1 /* Placeholder because C doesn't allow a
	150	trailing comma, and it would get complicated
	151	with all the #ifdef's */
	152	};
	153
	154	/* The top-most real element is LC_ALL */
	155
	156	const char * category_names[] = {
	157
	158	# ifdef USE_LOCALE_NUMERIC
	159	"LC_NUMERIC",
	160	# endif
	161	# ifdef USE_LOCALE_CTYPE
	162	"LC_CTYPE",
	163	# endif
	164	# ifdef USE_LOCALE_COLLATE
	165	"LC_COLLATE",
	166	# endif
	167	# ifdef USE_LOCALE_TIME
	168	"LC_TIME",
	169	# endif
	170	# ifdef USE_LOCALE_MESSAGES
	171	"LC_MESSAGES",
	172	# endif
	173	# ifdef USE_LOCALE_MONETARY
	174	"LC_MONETARY",
	175	# endif
	176	# ifdef LC_ALL
	177	"LC_ALL",
	178	# endif
	179	NULL /* Placeholder */
	180	};
	181
	182	# ifdef LC_ALL
	183
	184	/* On systems with LC_ALL, it is kept in the highest index position. (-2
	185	* to account for the final unused placeholder element.) */
	186	# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
	187
	188	# else
	189
	190	/* On systems without LC_ALL, we pretend it is there, one beyond the real
	191	* top element, hence in the unused placeholder element. */
	192	# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
	193
	194	# endif
	195
	196	/* Pretending there is an LC_ALL element just above allows us to avoid most
	197	* special cases. Most loops through these arrays in the code below are
	198	* written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work
	199	* on either type of system. But the code must be written to not access the
	200	* element at 'LC_ALL_INDEX' except on platforms that have it. This can be
	201	* checked for at compile time by using the #define LC_ALL_INDEX which is only
	202	* defined if we do have LC_ALL. */
	203
	204	STATIC const char *
	205	S_category_name(const int category)
	206	{
	207	unsigned int i;
	208
	209	#ifdef LC_ALL
	210
	211	if (category == LC_ALL) {
	212	return "LC_ALL";
	213	}
	214
	215	#endif
	216
	217	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	218	if (category == categories[i]) {
	219	return category_names[i];
	220	}
	221	}
	222
	223	{
	224	const char suffix[] = " (unknown)";
	225	int temp = category;
	226	Size_t length = sizeof(suffix) + 1;
	227	char * unknown;
	228	dTHX;
	229
	230	if (temp < 0) {
	231	length++;
	232	temp = - temp;
	233	}
	234
	235	/* Calculate the number of digits */
	236	while (temp >= 10) {
	237	temp /= 10;
	238	length++;
	239	}
	240
	241	Newx(unknown, length, char);
	242	my_snprintf(unknown, length, "%d%s", category, suffix);
	243	SAVEFREEPV(unknown);
	244	return unknown;
	245	}
	246	}
	247
	248	/* Now create LC_foo_INDEX #defines for just those categories on this system */
	249	# ifdef USE_LOCALE_NUMERIC
	250	# define LC_NUMERIC_INDEX 0
	251	# define _DUMMY_NUMERIC LC_NUMERIC_INDEX
	252	# else
	253	# define _DUMMY_NUMERIC -1
	254	# endif
	255	# ifdef USE_LOCALE_CTYPE
	256	# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1
	257	# define _DUMMY_CTYPE LC_CTYPE_INDEX
	258	# else
	259	# define _DUMMY_CTYPE _DUMMY_NUMERIC
	260	# endif
	261	# ifdef USE_LOCALE_COLLATE
	262	# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1
	263	# define _DUMMY_COLLATE LC_COLLATE_INDEX
	264	# else
	265	# define _DUMMY_COLLATE _DUMMY_COLLATE
	266	# endif
	267	# ifdef USE_LOCALE_TIME
	268	# define LC_TIME_INDEX _DUMMY_COLLATE + 1
	269	# define _DUMMY_TIME LC_TIME_INDEX
	270	# else
	271	# define _DUMMY_TIME _DUMMY_COLLATE
	272	# endif
	273	# ifdef USE_LOCALE_MESSAGES
	274	# define LC_MESSAGES_INDEX _DUMMY_TIME + 1
	275	# define _DUMMY_MESSAGES LC_MESSAGES_INDEX
	276	# else
	277	# define _DUMMY_MESSAGES _DUMMY_TIME
	278	# endif
	279	# ifdef USE_LOCALE_MONETARY
	280	# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1
	281	# define _DUMMY_MONETARY LC_MONETARY_INDEX
	282	# else
	283	# define _DUMMY_MONETARY _DUMMY_MESSAGES
	284	# endif
	285	# ifdef LC_ALL
	286	# define LC_ALL_INDEX _DUMMY_MONETARY + 1
	287	# endif
	288	#endif /* ifdef USE_LOCALE */
	289
	290	/* Windows requres a customized base-level setlocale() */
	291	# ifdef WIN32
	292	# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
	293	# else
	294	# define my_setlocale(cat, locale) setlocale(cat, locale)
	295	# endif
	296
	297	/* Just placeholders for now. "_c" is intended to be called when the category
	298	* is a constant known at compile time; "_r", not known until run time */
	299	# define do_setlocale_c(category, locale) my_setlocale(category, locale)
	300	# define do_setlocale_r(category, locale) my_setlocale(category, locale)
	301
	302	STATIC void
	303	S_set_numeric_radix(pTHX_ const bool use_locale)
	304	{
	305	/* If 'use_locale' is FALSE, set to use a dot for the radix character. If
	306	* TRUE, use the radix character derived from the current locale */
	307
	308	#if defined(USE_LOCALE_NUMERIC) && ( defined(HAS_LOCALECONV) \
	309	\|\| defined(HAS_NL_LANGINFO))
	310
	311	/* We only set up the radix SV if we are to use a locale radix ... */
	312	if (use_locale) {
	313	const char * radix = my_nl_langinfo(PERL_RADIXCHAR, FALSE);
	314	/* FALSE => already in dest locale */
	315
	316	/* ... and the character being used isn't a dot */
	317	if (strNE(radix, ".")) {
	318	if (PL_numeric_radix_sv) {
	319	sv_setpv(PL_numeric_radix_sv, radix);
	320	}
	321	else {
	322	PL_numeric_radix_sv = newSVpv(radix, 0);
	323	}
	324
	325	if ( ! is_utf8_invariant_string(
	326	(U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv))
	327	&& is_utf8_string(
	328	(U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv))
	329	&& _is_cur_LC_category_utf8(LC_NUMERIC))
	330	{
	331	SvUTF8_on(PL_numeric_radix_sv);
	332	}
	333	goto done;
	334	}
	335	}
	336
	337	SvREFCNT_dec(PL_numeric_radix_sv);
	338	PL_numeric_radix_sv = NULL;
	339
	340	done: ;
	341
	342	# ifdef DEBUGGING
	343
	344	if (DEBUG_L_TEST \|\| debug_initialization) {
	345	PerlIO_printf(Perl_debug_log, "Locale radix is '%s', ?UTF-8=%d\n",
	346	(PL_numeric_radix_sv)
	347	? SvPVX(PL_numeric_radix_sv)
	348	: "NULL",
	349	(PL_numeric_radix_sv)
	350	? cBOOL(SvUTF8(PL_numeric_radix_sv))
	351	: 0);
	352	}
	353
	354	# endif
	355	#endif /* USE_LOCALE_NUMERIC and can find the radix char */
	356
	357	}
	358
	359
	360	void
	361	Perl_new_numeric(pTHX_ const char *newnum)
	362	{
	363
	364	#ifndef USE_LOCALE_NUMERIC
	365
	366	PERL_UNUSED_ARG(newnum);
	367
	368	#else
	369
	370	/* Called after all libc setlocale() calls affecting LC_NUMERIC, to tell
	371	* core Perl this and that 'newnum' is the name of the new locale.
	372	* It installs this locale as the current underlying default.
	373	*
	374	* The default locale and the C locale can be toggled between by use of the
	375	* set_numeric_underlying() and set_numeric_standard() functions, which
	376	* should probably not be called directly, but only via macros like
	377	* SET_NUMERIC_STANDARD() in perl.h.
	378	*
	379	* The toggling is necessary mainly so that a non-dot radix decimal point
	380	* character can be output, while allowing internal calculations to use a
	381	* dot.
	382	*
	383	* This sets several interpreter-level variables:
	384	* PL_numeric_name The underlying locale's name: a copy of 'newnum'
	385	* PL_numeric_underlying A boolean indicating if the toggled state is such
	386	* that the current locale is the program's underlying
	387	* locale
	388	* PL_numeric_standard An int indicating if the toggled state is such
	389	* that the current locale is the C locale. If non-zero,
	390	* it is in C; if > 1, it means it may not be toggled away
	391	* from C.
	392	* Note that both of the last two variables can be true at the same time,
	393	* if the underlying locale is C. (Toggling is a no-op under these
	394	* circumstances.)
	395	*
	396	* Any code changing the locale (outside this file) should use
	397	* POSIX::setlocale, which calls this function. Therefore this function
	398	* should be called directly only from this file and from
	399	* POSIX::setlocale() */
	400
	401	char *save_newnum;
	402
	403	if (! newnum) {
	404	Safefree(PL_numeric_name);
	405	PL_numeric_name = NULL;
	406	PL_numeric_standard = TRUE;
	407	PL_numeric_underlying = TRUE;
	408	return;
	409	}
	410
	411	save_newnum = stdize_locale(savepv(newnum));
	412
	413	PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum);
	414	PL_numeric_underlying = TRUE;
	415
	416	if (! PL_numeric_name \|\| strNE(PL_numeric_name, save_newnum)) {
	417	Safefree(PL_numeric_name);
	418	PL_numeric_name = save_newnum;
	419	}
	420	else {
	421	Safefree(save_newnum);
	422	}
	423
	424	/* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't
	425	* have to worry about the radix being a non-dot. (Core operations that
	426	* need the underlying locale change to it temporarily). */
	427	set_numeric_standard();
	428
	429	#endif /* USE_LOCALE_NUMERIC */
	430
	431	}
	432
	433	void
	434	Perl_set_numeric_standard(pTHX)
	435	{
	436
	437	#ifdef USE_LOCALE_NUMERIC
	438
	439	/* Toggle the LC_NUMERIC locale to C. Most code should use the macros like
	440	* SET_NUMERIC_STANDARD() in perl.h instead of calling this directly. The
	441	* macro avoids calling this routine if toggling isn't necessary according
	442	* to our records (which could be wrong if some XS code has changed the
	443	* locale behind our back) */
	444
	445	do_setlocale_c(LC_NUMERIC, "C");
	446	PL_numeric_standard = TRUE;
	447	PL_numeric_underlying = isNAME_C_OR_POSIX(PL_numeric_name);
	448	set_numeric_radix(0);
	449
	450	# ifdef DEBUGGING
	451
	452	if (DEBUG_L_TEST \|\| debug_initialization) {
	453	PerlIO_printf(Perl_debug_log,
	454	"LC_NUMERIC locale now is standard C\n");
	455	}
	456
	457	# endif
	458	#endif /* USE_LOCALE_NUMERIC */
	459
	460	}
	461
	462	void
	463	Perl_set_numeric_underlying(pTHX)
	464	{
	465
	466	#ifdef USE_LOCALE_NUMERIC
	467
	468	/* Toggle the LC_NUMERIC locale to the current underlying default. Most
	469	* code should use the macros like SET_NUMERIC_UNDERLYING() in perl.h
	470	* instead of calling this directly. The macro avoids calling this routine
	471	* if toggling isn't necessary according to our records (which could be
	472	* wrong if some XS code has changed the locale behind our back) */
	473
	474	do_setlocale_c(LC_NUMERIC, PL_numeric_name);
	475	PL_numeric_standard = isNAME_C_OR_POSIX(PL_numeric_name);
	476	PL_numeric_underlying = TRUE;
	477	set_numeric_radix(1);
	478
	479	# ifdef DEBUGGING
	480
	481	if (DEBUG_L_TEST \|\| debug_initialization) {
	482	PerlIO_printf(Perl_debug_log,
	483	"LC_NUMERIC locale now is %s\n",
	484	PL_numeric_name);
	485	}
	486
	487	# endif
	488	#endif /* USE_LOCALE_NUMERIC */
	489
	490	}
	491
	492	/*
	493	* Set up for a new ctype locale.
	494	*/
	495	STATIC void
	496	S_new_ctype(pTHX_ const char *newctype)
	497	{
	498
	499	#ifndef USE_LOCALE_CTYPE
	500
	501	PERL_ARGS_ASSERT_NEW_CTYPE;
	502	PERL_UNUSED_ARG(newctype);
	503	PERL_UNUSED_CONTEXT;
	504
	505	#else
	506
	507	/* Called after all libc setlocale() calls affecting LC_CTYPE, to tell
	508	* core Perl this and that 'newctype' is the name of the new locale.
	509	*
	510	* This function sets up the folding arrays for all 256 bytes, assuming
	511	* that tofold() is tolc() since fold case is not a concept in POSIX,
	512	*
	513	* Any code changing the locale (outside this file) should use
	514	* POSIX::setlocale, which calls this function. Therefore this function
	515	* should be called directly only from this file and from
	516	* POSIX::setlocale() */
	517
	518	dVAR;
	519	UV i;
	520
	521	PERL_ARGS_ASSERT_NEW_CTYPE;
	522
	523	/* We will replace any bad locale warning with 1) nothing if the new one is
	524	* ok; or 2) a new warning for the bad new locale */
	525	if (PL_warn_locale) {
	526	SvREFCNT_dec_NN(PL_warn_locale);
	527	PL_warn_locale = NULL;
	528	}
	529
	530	PL_in_utf8_CTYPE_locale = _is_cur_LC_category_utf8(LC_CTYPE);
	531
	532	/* A UTF-8 locale gets standard rules. But note that code still has to
	533	* handle this specially because of the three problematic code points */
	534	if (PL_in_utf8_CTYPE_locale) {
	535	Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
	536	}
	537	else {
	538	/* Assume enough space for every character being bad. 4 spaces each
	539	* for the 94 printable characters that are output like "'x' "; and 5
	540	* spaces each for "'\\' ", "'\t' ", and "'\n' "; plus a terminating
	541	* NUL */
	542	char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ];
	543
	544	/* Don't check for problems if we are suppressing the warnings */
	545	bool check_for_problems = ckWARN_d(WARN_LOCALE)
	546	\|\| UNLIKELY(DEBUG_L_TEST);
	547	bool multi_byte_locale = FALSE; /* Assume is a single-byte locale
	548	to start */
	549	unsigned int bad_count = 0; /* Count of bad characters */
	550
	551	for (i = 0; i < 256; i++) {
	552	if (isUPPER_LC((U8) i))
	553	PL_fold_locale[i] = (U8) toLOWER_LC((U8) i);
	554	else if (isLOWER_LC((U8) i))
	555	PL_fold_locale[i] = (U8) toUPPER_LC((U8) i);
	556	else
	557	PL_fold_locale[i] = (U8) i;
	558
	559	/* If checking for locale problems, see if the native ASCII-range
	560	* printables plus \n and \t are in their expected categories in
	561	* the new locale. If not, this could mean big trouble, upending
	562	* Perl's and most programs' assumptions, like having a
	563	* metacharacter with special meaning become a \w. Fortunately,
	564	* it's very rare to find locales that aren't supersets of ASCII
	565	* nowadays. It isn't a problem for most controls to be changed
	566	* into something else; we check only \n and \t, though perhaps \r
	567	* could be an issue as well. */
	568	if ( check_for_problems
	569	&& (isGRAPH_A(i) \|\| isBLANK_A(i) \|\| i == '\n'))
	570	{
	571	if ( cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC(i))
	572	\|\| cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i))
	573	\|\| cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i))
	574	\|\| cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i))
	575	\|\| cBOOL(islower(i)) != cBOOL(isLOWER_A(i))
	576	\|\| cBOOL(isprint(i)) != cBOOL(isPRINT_A(i))
	577	\|\| cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i))
	578	\|\| cBOOL(isspace(i)) != cBOOL(isSPACE_A(i))
	579	\|\| cBOOL(isupper(i)) != cBOOL(isUPPER_A(i))
	580	\|\| cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i))
	581	\|\| tolower(i) != (int) toLOWER_A(i)
	582	\|\| toupper(i) != (int) toUPPER_A(i)
	583	\|\| (i == '\n' && ! isCNTRL_LC(i)))
	584	{
	585	if (bad_count) { /* Separate multiple entries with a
	586	blank */
	587	bad_chars_list[bad_count++] = ' ';
	588	}
	589	bad_chars_list[bad_count++] = '\'';
	590	if (isPRINT_A(i)) {
	591	bad_chars_list[bad_count++] = (char) i;
	592	}
	593	else {
	594	bad_chars_list[bad_count++] = '\\';
	595	if (i == '\n') {
	596	bad_chars_list[bad_count++] = 'n';
	597	}
	598	else {
	599	assert(i == '\t');
	600	bad_chars_list[bad_count++] = 't';
	601	}
	602	}
	603	bad_chars_list[bad_count++] = '\'';
	604	bad_chars_list[bad_count] = '\0';
	605	}
	606	}
	607	}
	608
	609	# ifdef MB_CUR_MAX
	610
	611	/* We only handle single-byte locales (outside of UTF-8 ones; so if
	612	* this locale requires more than one byte, there are going to be
	613	* problems. */
	614	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	615	"%s:%d: check_for_problems=%d, MB_CUR_MAX=%d\n",
	616	__FILE__, __LINE__, check_for_problems, (int) MB_CUR_MAX));
	617
	618	if (check_for_problems && MB_CUR_MAX > 1
	619
	620	/* Some platforms return MB_CUR_MAX > 1 for even the "C"
	621	* locale. Just assume that the implementation for them (plus
	622	* for POSIX) is correct and the > 1 value is spurious. (Since
	623	* these are specially handled to never be considered UTF-8
	624	* locales, as long as this is the only problem, everything
	625	* should work fine */
	626	&& strNE(newctype, "C") && strNE(newctype, "POSIX"))
	627	{
	628	multi_byte_locale = TRUE;
	629	}
	630
	631	# endif
	632
	633	if (bad_count \|\| multi_byte_locale) {
	634	PL_warn_locale = Perl_newSVpvf(aTHX_
	635	"Locale '%s' may not work well.%s%s%s\n",
	636	newctype,
	637	(multi_byte_locale)
	638	? " Some characters in it are not recognized by"
	639	" Perl."
	640	: "",
	641	(bad_count)
	642	? "\nThe following characters (and maybe others)"
	643	" may not have the same meaning as the Perl"
	644	" program expects:\n"
	645	: "",
	646	(bad_count)
	647	? bad_chars_list
	648	: ""
	649	);
	650	/* If we are actually in the scope of the locale or are debugging,
	651	* output the message now. If not in that scope, we save the
	652	* message to be output at the first operation using this locale,
	653	* if that actually happens. Most programs don't use locales, so
	654	* they are immune to bad ones. */
	655	if (IN_LC(LC_CTYPE) \|\| UNLIKELY(DEBUG_L_TEST)) {
	656
	657	/* We have to save 'newctype' because the setlocale() just
	658	* below may destroy it. The next setlocale() further down
	659	* should restore it properly so that the intermediate change
	660	* here is transparent to this function's caller */
	661	const char * const badlocale = savepv(newctype);
	662
	663	do_setlocale_c(LC_CTYPE, "C");
	664
	665	/* The '0' below suppresses a bogus gcc compiler warning */
	666	Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0);
	667
	668	do_setlocale_c(LC_CTYPE, badlocale);
	669	Safefree(badlocale);
	670
	671	if (IN_LC(LC_CTYPE)) {
	672	SvREFCNT_dec_NN(PL_warn_locale);
	673	PL_warn_locale = NULL;
	674	}
	675	}
	676	}
	677	}
	678
	679	#endif /* USE_LOCALE_CTYPE */
	680
	681	}
	682
	683	void
	684	Perl__warn_problematic_locale()
	685	{
	686
	687	#ifdef USE_LOCALE_CTYPE
	688
	689	dTHX;
	690
	691	/* Internal-to-core function that outputs the message in PL_warn_locale,
	692	* and then NULLS it. Should be called only through the macro
	693	* _CHECK_AND_WARN_PROBLEMATIC_LOCALE */
	694
	695	if (PL_warn_locale) {
	696	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
	697	SvPVX(PL_warn_locale),
	698	0 /* dummy to avoid compiler warning */ );
	699	SvREFCNT_dec_NN(PL_warn_locale);
	700	PL_warn_locale = NULL;
	701	}
	702
	703	#endif
	704
	705	}
	706
	707	STATIC void
	708	S_new_collate(pTHX_ const char *newcoll)
	709	{
	710
	711	#ifndef USE_LOCALE_COLLATE
	712
	713	PERL_UNUSED_ARG(newcoll);
	714	PERL_UNUSED_CONTEXT;
	715
	716	#else
	717
	718	/* Called after all libc setlocale() calls affecting LC_COLLATE, to tell
	719	* core Perl this and that 'newcoll' is the name of the new locale.
	720	*
	721	* The design of locale collation is that every locale change is given an
	722	* index 'PL_collation_ix'. The first time a string particpates in an
	723	* operation that requires collation while locale collation is active, it
	724	* is given PERL_MAGIC_collxfrm magic (via sv_collxfrm_flags()). That
	725	* magic includes the collation index, and the transformation of the string
	726	* by strxfrm(), q.v. That transformation is used when doing comparisons,
	727	* instead of the string itself. If a string changes, the magic is
	728	* cleared. The next time the locale changes, the index is incremented,
	729	* and so we know during a comparison that the transformation is not
	730	* necessarily still valid, and so is recomputed. Note that if the locale
	731	* changes enough times, the index could wrap (a U32), and it is possible
	732	* that a transformation would improperly be considered valid, leading to
	733	* an unlikely bug */
	734
	735	if (! newcoll) {
	736	if (PL_collation_name) {
	737	++PL_collation_ix;
	738	Safefree(PL_collation_name);
	739	PL_collation_name = NULL;
	740	}
	741	PL_collation_standard = TRUE;
	742	is_standard_collation:
	743	PL_collxfrm_base = 0;
	744	PL_collxfrm_mult = 2;
	745	PL_in_utf8_COLLATE_locale = FALSE;
	746	PL_strxfrm_NUL_replacement = '\0';
	747	PL_strxfrm_max_cp = 0;
	748	return;
	749	}
	750
	751	/* If this is not the same locale as currently, set the new one up */
	752	if (! PL_collation_name \|\| strNE(PL_collation_name, newcoll)) {
	753	++PL_collation_ix;
	754	Safefree(PL_collation_name);
	755	PL_collation_name = stdize_locale(savepv(newcoll));
	756	PL_collation_standard = isNAME_C_OR_POSIX(newcoll);
	757	if (PL_collation_standard) {
	758	goto is_standard_collation;
	759	}
	760
	761	PL_in_utf8_COLLATE_locale = _is_cur_LC_category_utf8(LC_COLLATE);
	762	PL_strxfrm_NUL_replacement = '\0';
	763	PL_strxfrm_max_cp = 0;
	764
	765	/* A locale collation definition includes primary, secondary, tertiary,
	766	* etc. weights for each character. To sort, the primary weights are
	767	* used, and only if they compare equal, then the secondary weights are
	768	* used, and only if they compare equal, then the tertiary, etc.
	769	*
	770	* strxfrm() works by taking the input string, say ABC, and creating an
	771	* output transformed string consisting of first the primary weights,
	772	* A¹B¹C¹ followed by the secondary ones, A²B²C²; and then the
	773	* tertiary, etc, yielding A¹B¹C¹ A²B²C² A³B³C³ .... Some characters
	774	* may not have weights at every level. In our example, let's say B
	775	* doesn't have a tertiary weight, and A doesn't have a secondary
	776	* weight. The constructed string is then going to be
	777	* A¹B¹C¹ B²C² A³C³ ....
	778	* This has the desired effect that strcmp() will look at the secondary
	779	* or tertiary weights only if the strings compare equal at all higher
	780	* priority weights. The spaces shown here, like in
	781	* "A¹B¹C¹ A²B²C² "
	782	* are not just for readability. In the general case, these must
	783	* actually be bytes, which we will call here 'separator weights'; and
	784	* they must be smaller than any other weight value, but since these
	785	* are C strings, only the terminating one can be a NUL (some
	786	* implementations may include a non-NUL separator weight just before
	787	* the NUL). Implementations tend to reserve 01 for the separator
	788	* weights. They are needed so that a shorter string's secondary
	789	* weights won't be misconstrued as primary weights of a longer string,
	790	* etc. By making them smaller than any other weight, the shorter
	791	* string will sort first. (Actually, if all secondary weights are
	792	* smaller than all primary ones, there is no need for a separator
	793	* weight between those two levels, etc.)
	794	*
	795	* The length of the transformed string is roughly a linear function of
	796	* the input string. It's not exactly linear because some characters
	797	* don't have weights at all levels. When we call strxfrm() we have to
	798	* allocate some memory to hold the transformed string. The
	799	* calculations below try to find coefficients 'm' and 'b' for this
	800	* locale so that m*x + b equals how much space we need, given the size
	801	* of the input string in 'x'. If we calculate too small, we increase
	802	* the size as needed, and call strxfrm() again, but it is better to
	803	* get it right the first time to avoid wasted expensive string
	804	* transformations. */
	805
	806	{
	807	/* We use the string below to find how long the tranformation of it
	808	* is. Almost all locales are supersets of ASCII, or at least the
	809	* ASCII letters. We use all of them, half upper half lower,
	810	* because if we used fewer, we might hit just the ones that are
	811	* outliers in a particular locale. Most of the strings being
	812	* collated will contain a preponderance of letters, and even if
	813	* they are above-ASCII, they are likely to have the same number of
	814	* weight levels as the ASCII ones. It turns out that digits tend
	815	* to have fewer levels, and some punctuation has more, but those
	816	* are relatively sparse in text, and khw believes this gives a
	817	* reasonable result, but it could be changed if experience so
	818	* dictates. */
	819	const char longer[] = "ABCDEFGHIJKLMnopqrstuvwxyz";
	820	char * x_longer; /* Transformed 'longer' */
	821	Size_t x_len_longer; /* Length of 'x_longer' */
	822
	823	char * x_shorter; /* We also transform a substring of 'longer' */
	824	Size_t x_len_shorter;
	825
	826	/* _mem_collxfrm() is used get the transformation (though here we
	827	* are interested only in its length). It is used because it has
	828	* the intelligence to handle all cases, but to work, it needs some
	829	* values of 'm' and 'b' to get it started. For the purposes of
	830	* this calculation we use a very conservative estimate of 'm' and
	831	* 'b'. This assumes a weight can be multiple bytes, enough to
	832	* hold any UV on the platform, and there are 5 levels, 4 weight
	833	* bytes, and a trailing NUL. */
	834	PL_collxfrm_base = 5;
	835	PL_collxfrm_mult = 5 * sizeof(UV);
	836
	837	/* Find out how long the transformation really is */
	838	x_longer = _mem_collxfrm(longer,
	839	sizeof(longer) - 1,
	840	&x_len_longer,
	841
	842	/* We avoid converting to UTF-8 in the
	843	* called function by telling it the
	844	* string is in UTF-8 if the locale is a
	845	* UTF-8 one. Since the string passed
	846	* here is invariant under UTF-8, we can
	847	* claim it's UTF-8 even though it isn't.
	848	* */
	849	PL_in_utf8_COLLATE_locale);
	850	Safefree(x_longer);
	851
	852	/* Find out how long the transformation of a substring of 'longer'
	853	* is. Together the lengths of these transformations are
	854	* sufficient to calculate 'm' and 'b'. The substring is all of
	855	* 'longer' except the first character. This minimizes the chances
	856	* of being swayed by outliers */
	857	x_shorter = _mem_collxfrm(longer + 1,
	858	sizeof(longer) - 2,
	859	&x_len_shorter,
	860	PL_in_utf8_COLLATE_locale);
	861	Safefree(x_shorter);
	862
	863	/* If the results are nonsensical for this simple test, the whole
	864	* locale definition is suspect. Mark it so that locale collation
	865	* is not active at all for it. XXX Should we warn? */
	866	if ( x_len_shorter == 0
	867	\|\| x_len_longer == 0
	868	\|\| x_len_shorter >= x_len_longer)
	869	{
	870	PL_collxfrm_mult = 0;
	871	PL_collxfrm_base = 0;
	872	}
	873	else {
	874	SSize_t base; /* Temporary */
	875
	876	/* We have both: m * strlen(longer) + b = x_len_longer
	877	* m * strlen(shorter) + b = x_len_shorter;
	878	* subtracting yields:
	879	* m * (strlen(longer) - strlen(shorter))
	880	* = x_len_longer - x_len_shorter
	881	* But we have set things up so that 'shorter' is 1 byte smaller
	882	* than 'longer'. Hence:
	883	* m = x_len_longer - x_len_shorter
	884	*
	885	* But if something went wrong, make sure the multiplier is at
	886	* least 1.
	887	*/
	888	if (x_len_longer > x_len_shorter) {
	889	PL_collxfrm_mult = (STRLEN) x_len_longer - x_len_shorter;
	890	}
	891	else {
	892	PL_collxfrm_mult = 1;
	893	}
	894
	895	/* mx + b = len
	896	* so: b = len - mx
	897	* but in case something has gone wrong, make sure it is
	898	* non-negative */
	899	base = x_len_longer - PL_collxfrm_mult * (sizeof(longer) - 1);
	900	if (base < 0) {
	901	base = 0;
	902	}
	903
	904	/* Add 1 for the trailing NUL */
	905	PL_collxfrm_base = base + 1;
	906	}
	907
	908	# ifdef DEBUGGING
	909
	910	if (DEBUG_L_TEST \|\| debug_initialization) {
	911	PerlIO_printf(Perl_debug_log,
	912	"%s:%d: ?UTF-8 locale=%d; x_len_shorter=%zu, "
	913	"x_len_longer=%zu,"
	914	" collate multipler=%zu, collate base=%zu\n",
	915	__FILE__, __LINE__,
	916	PL_in_utf8_COLLATE_locale,
	917	x_len_shorter, x_len_longer,
	918	PL_collxfrm_mult, PL_collxfrm_base);
	919	}
	920	# endif
	921
	922	}
	923	}
	924
	925	#endif /* USE_LOCALE_COLLATE */
	926
	927	}
	928
	929	#ifdef WIN32
	930
	931	STATIC char *
	932	S_win32_setlocale(pTHX_ int category, const char* locale)
	933	{
	934	/* This, for Windows, emulates POSIX setlocale() behavior. There is no
	935	* difference between the two unless the input locale is "", which normally
	936	* means on Windows to get the machine default, which is set via the
	937	* computer's "Regional and Language Options" (or its current equivalent).
	938	* In POSIX, it instead means to find the locale from the user's
	939	* environment. This routine changes the Windows behavior to first look in
	940	* the environment, and, if anything is found, use that instead of going to
	941	* the machine default. If there is no environment override, the machine
	942	* default is used, by calling the real setlocale() with "".
	943	*
	944	* The POSIX behavior is to use the LC_ALL variable if set; otherwise to
	945	* use the particular category's variable if set; otherwise to use the LANG
	946	* variable. */
	947
	948	bool override_LC_ALL = FALSE;
	949	char * result;
	950	unsigned int i;
	951
	952	if (locale && strEQ(locale, "")) {
	953
	954	# ifdef LC_ALL
	955
	956	locale = PerlEnv_getenv("LC_ALL");
	957	if (! locale) {
	958	if (category == LC_ALL) {
	959	override_LC_ALL = TRUE;
	960	}
	961	else {
	962
	963	# endif
	964
	965	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	966	if (category == categories[i]) {
	967	locale = PerlEnv_getenv(category_names[i]);
	968	goto found_locale;
	969	}
	970	}
	971
	972	locale = PerlEnv_getenv("LANG");
	973	if (! locale) {
	974	locale = "";
	975	}
	976
	977	found_locale: ;
	978
	979	# ifdef LC_ALL
	980
	981	}
	982	}
	983
	984	# endif
	985
	986	}
	987
	988	result = setlocale(category, locale);
	989	DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__,
	990	setlocale_debug_string(category, locale, result)));
	991
	992	if (! override_LC_ALL) {
	993	return result;
	994	}
	995
	996	/* Here the input category was LC_ALL, and we have set it to what is in the
	997	* LANG variable or the system default if there is no LANG. But these have
	998	* lower priority than the other LC_foo variables, so override it for each
	999	* one that is set. (If they are set to "", it means to use the same thing
	1000	* we just set LC_ALL to, so can skip) */
	1001
	1002	for (i = 0; i < LC_ALL_INDEX; i++) {
	1003	result = PerlEnv_getenv(category_names[i]);
	1004	if (result && strNE(result, "")) {
	1005	setlocale(categories[i], result);
	1006	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
	1007	__FILE__, __LINE__,
	1008	setlocale_debug_string(categories[i], result, "not captured")));
	1009	}
	1010	}
	1011
	1012	result = setlocale(LC_ALL, NULL);
	1013	DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
	1014	__FILE__, __LINE__,
	1015	setlocale_debug_string(LC_ALL, NULL, result)));
	1016
	1017	return result;
	1018	}
	1019
	1020	#endif
	1021
	1022	char *
	1023	Perl_setlocale(int category, const char * locale)
	1024	{
	1025	/* This wraps POSIX::setlocale() */
	1026
	1027	char * retval;
	1028	char * newlocale;
	1029	dTHX;
	1030
	1031	#ifdef USE_LOCALE_NUMERIC
	1032
	1033	/* A NULL locale means only query what the current one is. We
	1034	* have the LC_NUMERIC name saved, because we are normally switched
	1035	* into the C locale for it. Switch back so an LC_ALL query will yield
	1036	* the correct results; all other categories don't require special
	1037	* handling */
	1038	if (locale == NULL) {
	1039	if (category == LC_NUMERIC) {
	1040	return savepv(PL_numeric_name);
	1041	}
	1042
	1043	# ifdef LC_ALL
	1044
	1045	else if (category == LC_ALL) {
	1046	SET_NUMERIC_UNDERLYING();
	1047	}
	1048
	1049	# endif
	1050
	1051	}
	1052
	1053	#endif
	1054
	1055	/* Save retval since subsequent setlocale() calls may overwrite it. */
	1056	retval = savepv(do_setlocale_r(category, locale));
	1057
	1058	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1059	"%s:%d: %s\n", __FILE__, __LINE__,
	1060	setlocale_debug_string(category, locale, retval)));
	1061	if (! retval) {
	1062	/* Should never happen that a query would return an error, but be
	1063	* sure and reset to C locale */
	1064	if (locale == 0) {
	1065	SET_NUMERIC_STANDARD();
	1066	}
	1067
	1068	return NULL;
	1069	}
	1070
	1071	/* If locale == NULL, we are just querying the state, but may have switched
	1072	* to NUMERIC_UNDERLYING. Switch back before returning. */
	1073	if (locale == NULL) {
	1074	SET_NUMERIC_STANDARD();
	1075	return retval;
	1076	}
	1077
	1078	/* Now that have switched locales, we have to update our records to
	1079	* correspond. */
	1080
	1081	switch (category) {
	1082
	1083	#ifdef USE_LOCALE_CTYPE
	1084
	1085	case LC_CTYPE:
	1086	new_ctype(retval);
	1087	break;
	1088
	1089	#endif
	1090	#ifdef USE_LOCALE_COLLATE
	1091
	1092	case LC_COLLATE:
	1093	new_collate(retval);
	1094	break;
	1095
	1096	#endif
	1097	#ifdef USE_LOCALE_NUMERIC
	1098
	1099	case LC_NUMERIC:
	1100	new_numeric(retval);
	1101	break;
	1102
	1103	#endif
	1104	#ifdef LC_ALL
	1105
	1106	case LC_ALL:
	1107
	1108	/* LC_ALL updates all the things we care about. The values may not
	1109	* be the same as 'retval', as the locale "" may have set things
	1110	* individually */
	1111
	1112	# ifdef USE_LOCALE_CTYPE
	1113
	1114	newlocale = do_setlocale_c(LC_CTYPE, NULL);
	1115	new_ctype(newlocale);
	1116
	1117	# endif /* USE_LOCALE_CTYPE */
	1118	# ifdef USE_LOCALE_COLLATE
	1119
	1120	newlocale = do_setlocale_c(LC_COLLATE, NULL);
	1121	new_collate(newlocale);
	1122
	1123	# endif
	1124	# ifdef USE_LOCALE_NUMERIC
	1125
	1126	newlocale = do_setlocale_c(LC_NUMERIC, NULL);
	1127	new_numeric(newlocale);
	1128
	1129	# endif /* USE_LOCALE_NUMERIC */
	1130	#endif /* LC_ALL */
	1131
	1132	default:
	1133	break;
	1134	}
	1135
	1136	return retval;
	1137
	1138
	1139	}
	1140
	1141	PERL_STATIC_INLINE const char *
	1142	S_save_to_buffer(const char * string, char *buf, Size_t buf_size, const Size_t offset)
	1143	{
	1144	/* Copy the NUL-terminated 'string' to 'buf' + 'offset'. 'buf' has size 'buf_size',
	1145	* growing it if necessary */
	1146
	1147	const Size_t string_size = strlen(string) + offset + 1;
	1148
	1149	PERL_ARGS_ASSERT_SAVE_TO_BUFFER;
	1150
	1151	if (*buf_size == 0) {
	1152	Newx(*buf, string_size, char);
	1153	*buf_size = string_size;
	1154	}
	1155	else if (string_size > *buf_size) {
	1156	Renew(*buf, string_size, char);
	1157	*buf_size = string_size;
	1158	}
	1159
	1160	Copy(string, *buf + offset, string_size - offset, char);
	1161	return *buf;
	1162	}
	1163
	1164	/*
	1165
	1166	=head1 Locale-related functions and macros
	1167
	1168	=for apidoc Perl_langinfo
	1169
	1170	This is an (almost ª) drop-in replacement for the system C<L<nl_langinfo(3)>>,
	1171	taking the same C<item> parameter values, and returning the same information.
	1172	But it is more thread-safe than regular C<nl_langinfo()>, and hides the quirks
	1173	of Perl's locale handling from your code, and can be used on systems that lack
	1174	a native C<nl_langinfo>.
	1175
	1176	Expanding on these:
	1177
	1178	=over
	1179
	1180	=item *
	1181
	1182	It delivers the correct results for the C<RADIXCHAR> and C<THOUSESEP> items,
	1183	without you having to write extra code. The reason for the extra code would be
	1184	because these are from the C<LC_NUMERIC> locale category, which is normally
	1185	kept set to the C locale by Perl, no matter what the underlying locale is
	1186	supposed to be, and so to get the expected results, you have to temporarily
	1187	toggle into the underlying locale, and later toggle back. (You could use
	1188	plain C<nl_langinfo> and C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this
	1189	but then you wouldn't get the other advantages of C<Perl_langinfo()>; not
	1190	keeping C<LC_NUMERIC> in the C locale would break a lot of CPAN, which is
	1191	expecting the radix (decimal point) character to be a dot.)
	1192
	1193	=item *
	1194
	1195	Depending on C<item>, it works on systems that don't have C<nl_langinfo>, hence
	1196	makes your code more portable. Of the fifty-some possible items specified by
	1197	the POSIX 2008 standard,
	1198	L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
	1199	only two are completely unimplemented. It uses various techniques to recover
	1200	the other items, including calling C<L<localeconv(3)>>, and C<L<strftime(3)>>,
	1201	both of which are specified in C89, so should be always be available. Later
	1202	C<strftime()> versions have additional capabilities; C<""> is returned for
	1203	those not available on your system.
	1204
	1205	The details for those items which may differ from what this emulation returns
	1206	and what a native C<nl_langinfo()> would return are:
	1207
	1208	=over
	1209
	1210	=item C<CODESET>
	1211
	1212	=item C<ERA>
	1213
	1214	Unimplemented, so returns C<"">.
	1215
	1216	=item C<YESEXPR>
	1217
	1218	=item C<NOEXPR>
	1219
	1220	Only the values for English are returned. Earlier POSIX standards also
	1221	specified C<YESSTR> and C<NOSTR>, but these have been removed from POSIX 2008,
	1222	and aren't supported by C<Perl_langinfo>.
	1223
	1224	=item C<D_FMT>
	1225
	1226	Always evaluates to C<%x>, the locale's appropriate date representation.
	1227
	1228	=item C<T_FMT>
	1229
	1230	Always evaluates to C<%X>, the locale's appropriate time representation.
	1231
	1232	=item C<D_T_FMT>
	1233
	1234	Always evaluates to C<%c>, the locale's appropriate date and time
	1235	representation.
	1236
	1237	=item C<CRNCYSTR>
	1238
	1239	The return may be incorrect for those rare locales where the currency symbol
	1240	replaces the radix character.
	1241	Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
	1242	to work differently.
	1243
	1244	=item C<ALT_DIGITS>
	1245
	1246	Currently this gives the same results as Linux does.
	1247	Send email to L<mailto:perlbug@perl.org> if you have examples of it needing
	1248	to work differently.
	1249
	1250	=item C<ERA_D_FMT>
	1251
	1252	=item C<ERA_T_FMT>
	1253
	1254	=item C<ERA_D_T_FMT>
	1255
	1256	=item C<T_FMT_AMPM>
	1257
	1258	These are derived by using C<strftime()>, and not all versions of that function
	1259	know about them. C<""> is returned for these on such systems.
	1260
	1261	=back
	1262
	1263	When using C<Perl_langinfo> on systems that don't have a native
	1264	C<nl_langinfo()>, you must
	1265
	1266	#include "perl_langinfo.h"
	1267
	1268	before the C<perl.h> C<#include>. You can replace your C<langinfo.h>
	1269	C<#include> with this one. (Doing it this way keeps out the symbols that plain
	1270	C<langinfo.h> imports into the namespace for code that doesn't need it.)
	1271
	1272	You also should not use the bare C<langinfo.h> item names, but should preface
	1273	them with C<PERL_>, so use C<PERL_RADIXCHAR> instead of plain C<RADIXCHAR>.
	1274	The C<PERL_I<foo>> versions will also work for this function on systems that do
	1275	have a native C<nl_langinfo>.
	1276
	1277	=item *
	1278
	1279	It is thread-friendly, returning its result in a buffer that won't be
	1280	overwritten by another thread, so you don't have to code for that possibility.
	1281	The buffer can be overwritten by the next call to C<nl_langinfo> or
	1282	C<Perl_langinfo> in the same thread.
	1283
	1284	=item *
	1285
	1286	ª It returns S<C<const char *>>, whereas plain C<nl_langinfo()> returns S<C<char
	1287	*>>, but you are (only by documentation) forbidden to write into the buffer.
	1288	By declaring this C<const>, the compiler enforces this restriction. The extra
	1289	C<const> is why this isn't an unequivocal drop-in replacement for
	1290	C<nl_langinfo>.
	1291
	1292	=back
	1293
	1294	The original impetus for C<Perl_langinfo()> was so that code that needs to
	1295	find out the current currency symbol, floating point radix character, or digit
	1296	grouping separator can use, on all systems, the simpler and more
	1297	thread-friendly C<nl_langinfo> API instead of C<L<localeconv(3)>> which is a
	1298	pain to make thread-friendly. For other fields returned by C<localeconv>, it
	1299	is better to use the methods given in L<perlcall> to call
	1300	L<C<POSIX::localeconv()>\|POSIX/localeconv>, which is thread-friendly.
	1301
	1302	=cut
	1303
	1304	*/
	1305
	1306	const char *
	1307	#ifdef HAS_NL_LANGINFO
	1308	Perl_langinfo(const nl_item item)
	1309	#else
	1310	Perl_langinfo(const int item)
	1311	#endif
	1312	{
	1313	return my_nl_langinfo(item, TRUE);
	1314	}
	1315
	1316	const char *
	1317	#ifdef HAS_NL_LANGINFO
	1318	S_my_nl_langinfo(const nl_item item, bool toggle)
	1319	#else
	1320	S_my_nl_langinfo(const int item, bool toggle)
	1321	#endif
	1322	{
	1323	dTHX;
	1324
	1325	#if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */
	1326	#if ! defined(HAS_POSIX_2008_LOCALE)
	1327
	1328	/* Here, use plain nl_langinfo(), switching to the underlying LC_NUMERIC
	1329	* for those items dependent on it. This must be copied to a buffer before
	1330	* switching back, as some systems destroy the buffer when setlocale() is
	1331	* called */
	1332
	1333	LOCALE_LOCK;
	1334
	1335	if (toggle) {
	1336	if (item == PERL_RADIXCHAR \|\| item == PERL_THOUSEP) {
	1337	do_setlocale_c(LC_NUMERIC, PL_numeric_name);
	1338	}
	1339	else {
	1340	toggle = FALSE;
	1341	}
	1342	}
	1343
	1344	save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
	1345
	1346	if (toggle) {
	1347	do_setlocale_c(LC_NUMERIC, "C");
	1348	}
	1349
	1350	LOCALE_UNLOCK;
	1351
	1352	return PL_langinfo_buf;
	1353
	1354	# else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */
	1355
	1356	bool do_free = FALSE;
	1357	locale_t cur = uselocale((locale_t) 0);
	1358
	1359	if (cur == LC_GLOBAL_LOCALE) {
	1360	cur = duplocale(LC_GLOBAL_LOCALE);
	1361	do_free = TRUE;
	1362	}
	1363
	1364	if ( toggle
	1365	&& (item == PERL_RADIXCHAR \|\| item == PERL_THOUSEP))
	1366	{
	1367	cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur);
	1368	do_free = TRUE;
	1369	}
	1370
	1371	save_to_buffer(nl_langinfo_l(item, cur),
	1372	&PL_langinfo_buf, &PL_langinfo_bufsize, 0);
	1373	if (do_free) {
	1374	freelocale(cur);
	1375	}
	1376
	1377	return PL_langinfo_buf;
	1378
	1379	# endif
	1380	#else /* Below, emulate nl_langinfo as best we can */
	1381	# ifdef HAS_LOCALECONV
	1382
	1383	const struct lconv* lc;
	1384
	1385	# endif
	1386	# ifdef HAS_STRFTIME
	1387
	1388	struct tm tm;
	1389	bool return_format = FALSE; /* Return the %format, not the value */
	1390	const char * format;
	1391
	1392	# endif
	1393
	1394	/* We copy the results to a per-thread buffer, even if not multi-threaded.
	1395	* This is in part to simplify this code, and partly because we need a
	1396	* buffer anyway for strftime(), and partly because a call of localeconv()
	1397	* could otherwise wipe out the buffer, and the programmer would not be
	1398	* expecting this, as this is a nl_langinfo() substitute after all, so s/he
	1399	* might be thinking their localeconv() is safe until another localeconv()
	1400	* call. */
	1401
	1402	switch (item) {
	1403	Size_t len;
	1404	const char * retval;
	1405
	1406	/* These 2 are unimplemented */
	1407	case PERL_CODESET:
	1408	case PERL_ERA: /* For use with strftime() %E modifier */
	1409
	1410	default:
	1411	return "";
	1412
	1413	/* We use only an English set, since we don't know any more */
	1414	case PERL_YESEXPR: return "^[+1yY]";
	1415	case PERL_NOEXPR: return "^[-0nN]";
	1416
	1417	# ifdef HAS_LOCALECONV
	1418
	1419	case PERL_CRNCYSTR:
	1420
	1421	LOCALE_LOCK;
	1422
	1423	lc = localeconv();
	1424	if (! lc \|\| ! lc->currency_symbol \|\| strEQ("", lc->currency_symbol))
	1425	{
	1426	LOCALE_UNLOCK;
	1427	return "";
	1428	}
	1429
	1430	/* Leave the first spot empty to be filled in below */
	1431	save_to_buffer(lc->currency_symbol, &PL_langinfo_buf,
	1432	&PL_langinfo_bufsize, 1);
	1433	if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, ""))
	1434	{ /* khw couldn't figure out how the localedef specifications
	1435	would show that the $ should replace the radix; this is
	1436	just a guess as to how it might work.*/
	1437	*PL_langinfo_buf = '.';
	1438	}
	1439	else if (lc->p_cs_precedes) {
	1440	*PL_langinfo_buf = '-';
	1441	}
	1442	else {
	1443	*PL_langinfo_buf = '+';
	1444	}
	1445
	1446	LOCALE_UNLOCK;
	1447	break;
	1448
	1449	case PERL_RADIXCHAR:
	1450	case PERL_THOUSEP:
	1451
	1452	LOCALE_LOCK;
	1453
	1454	if (toggle) {
	1455	do_setlocale_c(LC_NUMERIC, PL_numeric_name);
	1456	}
	1457
	1458	lc = localeconv();
	1459	if (! lc) {
	1460	retval = "";
	1461	}
	1462	else {
	1463	retval = (item == PERL_RADIXCHAR)
	1464	? lc->decimal_point
	1465	: lc->thousands_sep;
	1466	if (! retval) {
	1467	retval = "";
	1468	}
	1469	}
	1470
	1471	save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
	1472
	1473	if (toggle) {
	1474	do_setlocale_c(LC_NUMERIC, "C");
	1475	}
	1476
	1477	LOCALE_UNLOCK;
	1478
	1479	break;
	1480
	1481	# endif
	1482	# ifdef HAS_STRFTIME
	1483
	1484	/* These are defined by C89, so we assume that strftime supports them,
	1485	* and so are returned unconditionally; they may not be what the locale
	1486	* actually says, but should give good enough results for someone using
	1487	* them as formats (as opposed to trying to parse them to figure out
	1488	* what the locale says). The other format items are actually tested to
	1489	* verify they work on the platform */
	1490	case PERL_D_FMT: return "%x";
	1491	case PERL_T_FMT: return "%X";
	1492	case PERL_D_T_FMT: return "%c";
	1493
	1494	/* These formats are only available in later strfmtime's */
	1495	case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT:
	1496	case PERL_T_FMT_AMPM:
	1497
	1498	/* The rest can be gotten from most versions of strftime(). */
	1499	case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3:
	1500	case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6:
	1501	case PERL_ABDAY_7:
	1502	case PERL_ALT_DIGITS:
	1503	case PERL_AM_STR: case PERL_PM_STR:
	1504	case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3:
	1505	case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6:
	1506	case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9:
	1507	case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12:
	1508	case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4:
	1509	case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7:
	1510	case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4:
	1511	case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8:
	1512	case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: case PERL_MON_12:
	1513
	1514	LOCALE_LOCK;
	1515
	1516	init_tm(&tm); /* Precaution against core dumps */
	1517	tm.tm_sec = 30;
	1518	tm.tm_min = 30;
	1519	tm.tm_hour = 6;
	1520	tm.tm_year = 2017 - 1900;
	1521	tm.tm_wday = 0;
	1522	tm.tm_mon = 0;
	1523	switch (item) {
	1524	default:
	1525	LOCALE_UNLOCK;
	1526	Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem",
	1527	__FILE__, __LINE__, item);
	1528	NOT_REACHED; /* NOTREACHED */
	1529
	1530	case PERL_PM_STR: tm.tm_hour = 18;
	1531	case PERL_AM_STR:
	1532	format = "%p";
	1533	break;
	1534
	1535	case PERL_ABDAY_7: tm.tm_wday++;
	1536	case PERL_ABDAY_6: tm.tm_wday++;
	1537	case PERL_ABDAY_5: tm.tm_wday++;
	1538	case PERL_ABDAY_4: tm.tm_wday++;
	1539	case PERL_ABDAY_3: tm.tm_wday++;
	1540	case PERL_ABDAY_2: tm.tm_wday++;
	1541	case PERL_ABDAY_1:
	1542	format = "%a";
	1543	break;
	1544
	1545	case PERL_DAY_7: tm.tm_wday++;
	1546	case PERL_DAY_6: tm.tm_wday++;
	1547	case PERL_DAY_5: tm.tm_wday++;
	1548	case PERL_DAY_4: tm.tm_wday++;
	1549	case PERL_DAY_3: tm.tm_wday++;
	1550	case PERL_DAY_2: tm.tm_wday++;
	1551	case PERL_DAY_1:
	1552	format = "%A";
	1553	break;
	1554
	1555	case PERL_ABMON_12: tm.tm_mon++;
	1556	case PERL_ABMON_11: tm.tm_mon++;
	1557	case PERL_ABMON_10: tm.tm_mon++;
	1558	case PERL_ABMON_9: tm.tm_mon++;
	1559	case PERL_ABMON_8: tm.tm_mon++;
	1560	case PERL_ABMON_7: tm.tm_mon++;
	1561	case PERL_ABMON_6: tm.tm_mon++;
	1562	case PERL_ABMON_5: tm.tm_mon++;
	1563	case PERL_ABMON_4: tm.tm_mon++;
	1564	case PERL_ABMON_3: tm.tm_mon++;
	1565	case PERL_ABMON_2: tm.tm_mon++;
	1566	case PERL_ABMON_1:
	1567	format = "%b";
	1568	break;
	1569
	1570	case PERL_MON_12: tm.tm_mon++;
	1571	case PERL_MON_11: tm.tm_mon++;
	1572	case PERL_MON_10: tm.tm_mon++;
	1573	case PERL_MON_9: tm.tm_mon++;
	1574	case PERL_MON_8: tm.tm_mon++;
	1575	case PERL_MON_7: tm.tm_mon++;
	1576	case PERL_MON_6: tm.tm_mon++;
	1577	case PERL_MON_5: tm.tm_mon++;
	1578	case PERL_MON_4: tm.tm_mon++;
	1579	case PERL_MON_3: tm.tm_mon++;
	1580	case PERL_MON_2: tm.tm_mon++;
	1581	case PERL_MON_1:
	1582	format = "%B";
	1583	break;
	1584
	1585	case PERL_T_FMT_AMPM:
	1586	format = "%r";
	1587	return_format = TRUE;
	1588	break;
	1589
	1590	case PERL_ERA_D_FMT:
	1591	format = "%Ex";
	1592	return_format = TRUE;
	1593	break;
	1594
	1595	case PERL_ERA_T_FMT:
	1596	format = "%EX";
	1597	return_format = TRUE;
	1598	break;
	1599
	1600	case PERL_ERA_D_T_FMT:
	1601	format = "%Ec";
	1602	return_format = TRUE;
	1603	break;
	1604
	1605	case PERL_ALT_DIGITS:
	1606	tm.tm_wday = 0;
	1607	format = "%Ow"; /* Find the alternate digit for 0 */
	1608	break;
	1609	}
	1610
	1611	/* We can't use my_strftime() because it doesn't look at tm_wday */
	1612	while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize,
	1613	format, &tm))
	1614	{
	1615	/* A zero return means one of:
	1616	* a) there wasn't enough space in PL_langinfo_buf
	1617	* b) the format, like a plain %p, returns empty
	1618	* c) it was an illegal format, though some implementations of
	1619	* strftime will just return the illegal format as a plain
	1620	* character sequence.
	1621	*
	1622	* To quickly test for case 'b)', try again but precede the
	1623	* format with a plain character. If that result is still
	1624	* empty, the problem is either 'a)' or 'c)' */
	1625
	1626	Size_t format_size = strlen(format) + 1;
	1627	Size_t mod_size = format_size + 1;
	1628	char * mod_format;
	1629	char * temp_result;
	1630
	1631	Newx(mod_format, mod_size, char);
	1632	Newx(temp_result, PL_langinfo_bufsize, char);
	1633	*mod_format = '\a';
	1634	my_strlcpy(mod_format + 1, format, mod_size);
	1635	len = strftime(temp_result,
	1636	PL_langinfo_bufsize,
	1637	mod_format, &tm);
	1638	Safefree(mod_format);
	1639	Safefree(temp_result);
	1640
	1641	/* If 'len' is non-zero, it means that we had a case like %p
	1642	* which means the current locale doesn't use a.m. or p.m., and
	1643	* that is valid */
	1644	if (len == 0) {
	1645
	1646	/* Here, still didn't work. If we get well beyond a
	1647	* reasonable size, bail out to prevent an infinite loop. */
	1648
	1649	if (PL_langinfo_bufsize > 100 * format_size) {
	1650	*PL_langinfo_buf = '\0';
	1651	}
	1652	else { /* Double the buffer size to retry; Add 1 in case
	1653	original was 0, so we aren't stuck at 0. */
	1654	PL_langinfo_bufsize *= 2;
	1655	PL_langinfo_bufsize++;
	1656	Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
	1657	continue;
	1658	}
	1659	}
	1660
	1661	break;
	1662	}
	1663
	1664	/* Here, we got a result.
	1665	*
	1666	* If the item is 'ALT_DIGITS', PL_langinfo_buf contains the
	1667	* alternate format for wday 0. If the value is the same as the
	1668	* normal 0, there isn't an alternate, so clear the buffer. */
	1669	if ( item == PERL_ALT_DIGITS
	1670	&& strEQ(PL_langinfo_buf, "0"))
	1671	{
	1672	*PL_langinfo_buf = '\0';
	1673	}
	1674
	1675	/* ALT_DIGITS is problematic. Experiments on it showed that
	1676	* strftime() did not always work properly when going from alt-9 to
	1677	* alt-10. Only a few locales have this item defined, and in all
	1678	* of them on Linux that khw was able to find, nl_langinfo() merely
	1679	* returned the alt-0 character, possibly doubled. Most Unicode
	1680	* digits are in blocks of 10 consecutive code points, so that is
	1681	* sufficient information for those scripts, as we can infer alt-1,
	1682	* alt-2, .... But for a Japanese locale, a CJK ideographic 0 is
	1683	* returned, and the CJK digits are not in code point order, so you
	1684	* can't really infer anything. The localedef for this locale did
	1685	* specify the succeeding digits, so that strftime() works properly
	1686	* on them, without needing to infer anything. But the
	1687	* nl_langinfo() return did not give sufficient information for the
	1688	* caller to understand what's going on. So until there is
	1689	* evidence that it should work differently, this returns the alt-0
	1690	* string for ALT_DIGITS.
	1691	*
	1692	* wday was chosen because its range is all a single digit. Things
	1693	* like tm_sec have two digits as the minimum: '00' */
	1694
	1695	LOCALE_UNLOCK;
	1696
	1697	/* If to return the format, not the value, overwrite the buffer
	1698	* with it. But some strftime()s will keep the original format if
	1699	* illegal, so change those to "" */
	1700	if (return_format) {
	1701	if (strEQ(PL_langinfo_buf, format)) {
	1702	*PL_langinfo_buf = '\0';
	1703	}
	1704	else {
	1705	save_to_buffer(format, &PL_langinfo_buf,
	1706	&PL_langinfo_bufsize, 0);
	1707	}
	1708	}
	1709
	1710	break;
	1711
	1712	# endif
	1713
	1714	}
	1715
	1716	return PL_langinfo_buf;
	1717
	1718	#endif
	1719
	1720	}
	1721
	1722	/*
	1723	* Initialize locale awareness.
	1724	*/
	1725	int
	1726	Perl_init_i18nl10n(pTHX_ int printwarn)
	1727	{
	1728	/* printwarn is
	1729	*
	1730	* 0 if not to output warning when setup locale is bad
	1731	* 1 if to output warning based on value of PERL_BADLANG
	1732	* >1 if to output regardless of PERL_BADLANG
	1733	*
	1734	* returns
	1735	* 1 = set ok or not applicable,
	1736	* 0 = fallback to a locale of lower priority
	1737	* -1 = fallback to all locales failed, not even to the C locale
	1738	*
	1739	* Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is
	1740	* set, debugging information is output.
	1741	*
	1742	* This looks more complicated than it is, mainly due to the #ifdefs.
	1743	*
	1744	* We try to set LC_ALL to the value determined by the environment. If
	1745	* there is no LC_ALL on this platform, we try the individual categories we
	1746	* know about. If this works, we are done.
	1747	*
	1748	* But if it doesn't work, we have to do something else. We search the
	1749	* environment variables ourselves instead of relying on the system to do
	1750	* it. We look at, in order, LC_ALL, LANG, a system default locale (if we
	1751	* think there is one), and the ultimate fallback "C". This is all done in
	1752	* the same loop as above to avoid duplicating code, but it makes things
	1753	* more complex. The 'trial_locales' array is initialized with just one
	1754	* element; it causes the behavior described in the paragraph above this to
	1755	* happen. If that fails, we add elements to 'trial_locales', and do extra
	1756	* loop iterations to cause the behavior described in this paragraph.
	1757	*
	1758	* On Ultrix, the locale MUST come from the environment, so there is
	1759	* preliminary code to set it. I (khw) am not sure that it is necessary,
	1760	* and that this couldn't be folded into the loop, but barring any real
	1761	* platforms to test on, it's staying as-is
	1762	*
	1763	* A slight complication is that in embedded Perls, the locale may already
	1764	* be set-up, and we don't want to get it from the normal environment
	1765	* variables. This is handled by having a special environment variable
	1766	* indicate we're in this situation. We simply set setlocale's 2nd
	1767	* parameter to be a NULL instead of "". That indicates to setlocale that
	1768	* it is not to change anything, but to return the current value,
	1769	* effectively initializing perl's db to what the locale already is.
	1770	*
	1771	* We play the same trick with NULL if a LC_ALL succeeds. We call
	1772	* setlocale() on the individual categores with NULL to get their existing
	1773	* values for our db, instead of trying to change them.
	1774	* */
	1775
	1776	int ok = 1;
	1777
	1778	#ifndef USE_LOCALE
	1779
	1780	PERL_UNUSED_ARG(printwarn);
	1781
	1782	#else /* USE_LOCALE */
	1783	# ifdef __GLIBC__
	1784
	1785	const char * const language = savepv(PerlEnv_getenv("LANGUAGE"));
	1786
	1787	# endif
	1788
	1789	/* NULL uses the existing already set up locale */
	1790	const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT"))
	1791	? NULL
	1792	: "";
	1793	const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */
	1794	unsigned int trial_locales_count;
	1795	const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL"));
	1796	const char * const lang = savepv(PerlEnv_getenv("LANG"));
	1797	bool setlocale_failure = FALSE;
	1798	unsigned int i;
	1799
	1800	/* A later getenv() could zap this, so only use here */
	1801	const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG");
	1802
	1803	const bool locwarn = (printwarn > 1
	1804	\|\| ( printwarn
	1805	&& ( ! bad_lang_use_once
	1806	\|\| (
	1807	/* disallow with "" or "0" */
	1808	*bad_lang_use_once
	1809	&& strNE("0", bad_lang_use_once)))));
	1810	bool done = FALSE;
	1811	char * sl_result[NOMINAL_LC_ALL_INDEX + 1]; /* setlocale() return vals;
	1812	not copied so must be
	1813	looked at immediately */
	1814	char * curlocales[NOMINAL_LC_ALL_INDEX + 1]; /* current locale for given
	1815	category; should have been
	1816	copied so aren't volatile
	1817	*/
	1818	char * locale_param;
	1819
	1820	# ifdef WIN32
	1821
	1822	/* In some systems you can find out the system default locale
	1823	* and use that as the fallback locale. */
	1824	# define SYSTEM_DEFAULT_LOCALE
	1825	# endif
	1826	# ifdef SYSTEM_DEFAULT_LOCALE
	1827
	1828	const char *system_default_locale = NULL;
	1829
	1830	# endif
	1831
	1832	# ifndef DEBUGGING
	1833	# define DEBUG_LOCALE_INIT(a,b,c)
	1834	# else
	1835
	1836	DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")));
	1837
	1838	# define DEBUG_LOCALE_INIT(category, locale, result) \
	1839	STMT_START { \
	1840	if (debug_initialization) { \
	1841	PerlIO_printf(Perl_debug_log, \
	1842	"%s:%d: %s\n", \
	1843	__FILE__, __LINE__, \
	1844	setlocale_debug_string(category, \
	1845	locale, \
	1846	result)); \
	1847	} \
	1848	} STMT_END
	1849
	1850	/* Make sure the parallel arrays are properly set up */
	1851	# ifdef USE_LOCALE_NUMERIC
	1852	assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC);
	1853	assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC"));
	1854	# endif
	1855	# ifdef USE_LOCALE_CTYPE
	1856	assert(categories[LC_CTYPE_INDEX] == LC_CTYPE);
	1857	assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE"));
	1858	# endif
	1859	# ifdef USE_LOCALE_COLLATE
	1860	assert(categories[LC_COLLATE_INDEX] == LC_COLLATE);
	1861	assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE"));
	1862	# endif
	1863	# ifdef USE_LOCALE_TIME
	1864	assert(categories[LC_TIME_INDEX] == LC_TIME);
	1865	assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME"));
	1866	# endif
	1867	# ifdef USE_LOCALE_MESSAGES
	1868	assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES);
	1869	assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES"));
	1870	# endif
	1871	# ifdef USE_LOCALE_MONETARY
	1872	assert(categories[LC_MONETARY_INDEX] == LC_MONETARY);
	1873	assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY"));
	1874	# endif
	1875	# ifdef LC_ALL
	1876	assert(categories[LC_ALL_INDEX] == LC_ALL);
	1877	assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL"));
	1878	assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX);
	1879	# endif
	1880	# endif /* DEBUGGING */
	1881	# ifndef LOCALE_ENVIRON_REQUIRED
	1882
	1883	PERL_UNUSED_VAR(done);
	1884	PERL_UNUSED_VAR(locale_param);
	1885
	1886	# else
	1887
	1888	/*
	1889	* Ultrix setlocale(..., "") fails if there are no environment
	1890	* variables from which to get a locale name.
	1891	*/
	1892
	1893	# ifdef LC_ALL
	1894
	1895	if (lang) {
	1896	sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, setlocale_init);
	1897	DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result[LC_ALL_INDEX]);
	1898	if (sl_result[LC_ALL_INDEX])
	1899	done = TRUE;
	1900	else
	1901	setlocale_failure = TRUE;
	1902	}
	1903	if (! setlocale_failure) {
	1904	for (i = 0; i < LC_ALL_INDEX; i++) {
	1905	locale_param = (! done && (lang \|\| PerlEnv_getenv(category_names[i])))
	1906	? setlocale_init
	1907	: NULL;
	1908	sl_result[i] = do_setlocale_r(categories[i], locale_param);
	1909	if (! sl_result[i]) {
	1910	setlocale_failure = TRUE;
	1911	}
	1912	DEBUG_LOCALE_INIT(categories[i], locale_param, sl_result[i]);
	1913	}
	1914	}
	1915
	1916	# endif /* LC_ALL */
	1917	# endif /* LOCALE_ENVIRON_REQUIRED */
	1918
	1919	/* We try each locale in the list until we get one that works, or exhaust
	1920	* the list. Normally the loop is executed just once. But if setting the
	1921	* locale fails, inside the loop we add fallback trials to the array and so
	1922	* will execute the loop multiple times */
	1923	trial_locales[0] = setlocale_init;
	1924	trial_locales_count = 1;
	1925
	1926	for (i= 0; i < trial_locales_count; i++) {
	1927	const char * trial_locale = trial_locales[i];
	1928
	1929	if (i > 0) {
	1930
	1931	/* XXX This is to preserve old behavior for LOCALE_ENVIRON_REQUIRED
	1932	* when i==0, but I (khw) don't think that behavior makes much
	1933	* sense */
	1934	setlocale_failure = FALSE;
	1935
	1936	# ifdef SYSTEM_DEFAULT_LOCALE
	1937	# ifdef WIN32
	1938
	1939	/* On Windows machines, an entry of "" after the 0th means to use
	1940	* the system default locale, which we now proceed to get. */
	1941	if (strEQ(trial_locale, "")) {
	1942	unsigned int j;
	1943
	1944	/* Note that this may change the locale, but we are going to do
	1945	* that anyway just below */
	1946	system_default_locale = do_setlocale_c(LC_ALL, "");
	1947	DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale);
	1948
	1949	/* Skip if invalid or if it's already on the list of locales to
	1950	* try */
	1951	if (! system_default_locale) {
	1952	goto next_iteration;
	1953	}
	1954	for (j = 0; j < trial_locales_count; j++) {
	1955	if (strEQ(system_default_locale, trial_locales[j])) {
	1956	goto next_iteration;
	1957	}
	1958	}
	1959
	1960	trial_locale = system_default_locale;
	1961	}
	1962	# endif /* WIN32 */
	1963	# endif /* SYSTEM_DEFAULT_LOCALE */
	1964	}
	1965
	1966	# ifdef LC_ALL
	1967
	1968	sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, trial_locale);
	1969	DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result[LC_ALL_INDEX]);
	1970	if (! sl_result[LC_ALL_INDEX]) {
	1971	setlocale_failure = TRUE;
	1972	}
	1973	else {
	1974	/* Since LC_ALL succeeded, it should have changed all the other
	1975	* categories it can to its value; so we massage things so that the
	1976	* setlocales below just return their category's current values.
	1977	* This adequately handles the case in NetBSD where LC_COLLATE may
	1978	* not be defined for a locale, and setting it individually will
	1979	* fail, whereas setting LC_ALL succeeds, leaving LC_COLLATE set to
	1980	* the POSIX locale. */
	1981	trial_locale = NULL;
	1982	}
	1983
	1984	# endif /* LC_ALL */
	1985
	1986	if (! setlocale_failure) {
	1987	unsigned int j;
	1988	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	1989	curlocales[j]
	1990	= savepv(do_setlocale_r(categories[j], trial_locale));
	1991	if (! curlocales[j]) {
	1992	setlocale_failure = TRUE;
	1993	}
	1994	DEBUG_LOCALE_INIT(categories[j], trial_locale, curlocales[j]);
	1995	}
	1996
	1997	if (! setlocale_failure) { /* All succeeded */
	1998	break; /* Exit trial_locales loop */
	1999	}
	2000	}
	2001
	2002	/* Here, something failed; will need to try a fallback. */
	2003	ok = 0;
	2004
	2005	if (i == 0) {
	2006	unsigned int j;
	2007
	2008	if (locwarn) { /* Output failure info only on the first one */
	2009
	2010	# ifdef LC_ALL
	2011
	2012	PerlIO_printf(Perl_error_log,
	2013	"perl: warning: Setting locale failed.\n");
	2014
	2015	# else /* !LC_ALL */
	2016
	2017	PerlIO_printf(Perl_error_log,
	2018	"perl: warning: Setting locale failed for the categories:\n\t");
	2019
	2020	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	2021	if (! curlocales[j]) {
	2022	PerlIO_printf(Perl_error_log, category_names[j]);
	2023	}
	2024	else {
	2025	Safefree(curlocales[j]);
	2026	}
	2027	}
	2028
	2029	PerlIO_printf(Perl_error_log, "and possibly others\n");
	2030
	2031	# endif /* LC_ALL */
	2032
	2033	PerlIO_printf(Perl_error_log,
	2034	"perl: warning: Please check that your locale settings:\n");
	2035
	2036	# ifdef __GLIBC__
	2037
	2038	PerlIO_printf(Perl_error_log,
	2039	"\tLANGUAGE = %c%s%c,\n",
	2040	language ? '"' : '(',
	2041	language ? language : "unset",
	2042	language ? '"' : ')');
	2043	# endif
	2044
	2045	PerlIO_printf(Perl_error_log,
	2046	"\tLC_ALL = %c%s%c,\n",
	2047	lc_all ? '"' : '(',
	2048	lc_all ? lc_all : "unset",
	2049	lc_all ? '"' : ')');
	2050
	2051	# if defined(USE_ENVIRON_ARRAY)
	2052
	2053	{
	2054	char **e;
	2055
	2056	/* Look through the environment for any variables of the
	2057	* form qr/ ^ LC_ [A-Z]+ = /x, except LC_ALL which was
	2058	* already handled above. These are assumed to be locale
	2059	* settings. Output them and their values. */
	2060	for (e = environ; *e; e++) {
	2061	const STRLEN prefix_len = sizeof("LC_") - 1;
	2062	STRLEN uppers_len;
	2063
	2064	if ( strBEGINs(*e, "LC_")
	2065	&& ! strBEGINs(*e, "LC_ALL=")
	2066	&& (uppers_len = strspn(*e + prefix_len,
	2067	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
	2068	&& ((*e)[prefix_len + uppers_len] == '='))
	2069	{
	2070	PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n",
	2071	(int) (prefix_len + uppers_len), *e,
	2072	*e + prefix_len + uppers_len + 1);
	2073	}
	2074	}
	2075	}
	2076
	2077	# else
	2078
	2079	PerlIO_printf(Perl_error_log,
	2080	"\t(possibly more locale environment variables)\n");
	2081
	2082	# endif
	2083
	2084	PerlIO_printf(Perl_error_log,
	2085	"\tLANG = %c%s%c\n",
	2086	lang ? '"' : '(',
	2087	lang ? lang : "unset",
	2088	lang ? '"' : ')');
	2089
	2090	PerlIO_printf(Perl_error_log,
	2091	" are supported and installed on your system.\n");
	2092	}
	2093
	2094	/* Calculate what fallback locales to try. We have avoided this
	2095	* until we have to, because failure is quite unlikely. This will
	2096	* usually change the upper bound of the loop we are in.
	2097	*
	2098	* Since the system's default way of setting the locale has not
	2099	* found one that works, We use Perl's defined ordering: LC_ALL,
	2100	* LANG, and the C locale. We don't try the same locale twice, so
	2101	* don't add to the list if already there. (On POSIX systems, the
	2102	* LC_ALL element will likely be a repeat of the 0th element "",
	2103	* but there's no harm done by doing it explicitly.
	2104	*
	2105	* Note that this tries the LC_ALL environment variable even on
	2106	* systems which have no LC_ALL locale setting. This may or may
	2107	* not have been originally intentional, but there's no real need
	2108	* to change the behavior. */
	2109	if (lc_all) {
	2110	for (j = 0; j < trial_locales_count; j++) {
	2111	if (strEQ(lc_all, trial_locales[j])) {
	2112	goto done_lc_all;
	2113	}
	2114	}
	2115	trial_locales[trial_locales_count++] = lc_all;
	2116	}
	2117	done_lc_all:
	2118
	2119	if (lang) {
	2120	for (j = 0; j < trial_locales_count; j++) {
	2121	if (strEQ(lang, trial_locales[j])) {
	2122	goto done_lang;
	2123	}
	2124	}
	2125	trial_locales[trial_locales_count++] = lang;
	2126	}
	2127	done_lang:
	2128
	2129	# if defined(WIN32) && defined(LC_ALL)
	2130
	2131	/* For Windows, we also try the system default locale before "C".
	2132	* (If there exists a Windows without LC_ALL we skip this because
	2133	* it gets too complicated. For those, the "C" is the next
	2134	* fallback possibility). The "" is the same as the 0th element of
	2135	* the array, but the code at the loop above knows to treat it
	2136	* differently when not the 0th */
	2137	trial_locales[trial_locales_count++] = "";
	2138
	2139	# endif
	2140
	2141	for (j = 0; j < trial_locales_count; j++) {
	2142	if (strEQ("C", trial_locales[j])) {
	2143	goto done_C;
	2144	}
	2145	}
	2146	trial_locales[trial_locales_count++] = "C";
	2147
	2148	done_C: ;
	2149	} /* end of first time through the loop */
	2150
	2151	# ifdef WIN32
	2152
	2153	next_iteration: ;
	2154
	2155	# endif
	2156
	2157	} /* end of looping through the trial locales */
	2158
	2159	if (ok < 1) { /* If we tried to fallback */
	2160	const char* msg;
	2161	if (! setlocale_failure) { /* fallback succeeded */
	2162	msg = "Falling back to";
	2163	}
	2164	else { /* fallback failed */
	2165	unsigned int j;
	2166
	2167	/* We dropped off the end of the loop, so have to decrement i to
	2168	* get back to the value the last time through */
	2169	i--;
	2170
	2171	ok = -1;
	2172	msg = "Failed to fall back to";
	2173
	2174	/* To continue, we should use whatever values we've got */
	2175
	2176	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	2177	Safefree(curlocales[j]);
	2178	curlocales[j] = savepv(do_setlocale_r(categories[j], NULL));
	2179	DEBUG_LOCALE_INIT(categories[j], NULL, curlocales[j]);
	2180	}
	2181	}
	2182
	2183	if (locwarn) {
	2184	const char * description;
	2185	const char * name = "";
	2186	if (strEQ(trial_locales[i], "C")) {
	2187	description = "the standard locale";
	2188	name = "C";
	2189	}
	2190
	2191	# ifdef SYSTEM_DEFAULT_LOCALE
	2192
	2193	else if (strEQ(trial_locales[i], "")) {
	2194	description = "the system default locale";
	2195	if (system_default_locale) {
	2196	name = system_default_locale;
	2197	}
	2198	}
	2199
	2200	# endif /* SYSTEM_DEFAULT_LOCALE */
	2201
	2202	else {
	2203	description = "a fallback locale";
	2204	name = trial_locales[i];
	2205	}
	2206	if (name && strNE(name, "")) {
	2207	PerlIO_printf(Perl_error_log,
	2208	"perl: warning: %s %s (\"%s\").\n", msg, description, name);
	2209	}
	2210	else {
	2211	PerlIO_printf(Perl_error_log,
	2212	"perl: warning: %s %s.\n", msg, description);
	2213	}
	2214	}
	2215	} /* End of tried to fallback */
	2216
	2217	/* Done with finding the locales; update our records */
	2218
	2219	# ifdef USE_LOCALE_CTYPE
	2220
	2221	new_ctype(curlocales[LC_CTYPE_INDEX]);
	2222
	2223	# endif
	2224	# ifdef USE_LOCALE_COLLATE
	2225
	2226	new_collate(curlocales[LC_COLLATE_INDEX]);
	2227
	2228	# endif
	2229	# ifdef USE_LOCALE_NUMERIC
	2230
	2231	new_numeric(curlocales[LC_NUMERIC_INDEX]);
	2232
	2233	# endif
	2234
	2235
	2236	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	2237	Safefree(curlocales[i]);
	2238	}
	2239
	2240	# if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE)
	2241
	2242	/* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE
	2243	* locale is UTF-8. If PL_utf8locale and PL_unicode (set by -C or by
	2244	* $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the
	2245	* PerlIO :utf8 layer on STDIN, STDOUT, STDERR, _and_ the default open
	2246	* discipline. */
	2247	PL_utf8locale = _is_cur_LC_category_utf8(LC_CTYPE);
	2248
	2249	/* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
	2250	This is an alternative to using the -C command line switch
	2251	(the -C if present will override this). */
	2252	{
	2253	const char *p = PerlEnv_getenv("PERL_UNICODE");
	2254	PL_unicode = p ? parse_unicode_opts(&p) : 0;
	2255	if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
	2256	PL_utf8cache = -1;
	2257	}
	2258
	2259	# endif
	2260	# ifdef __GLIBC__
	2261
	2262	Safefree(language);
	2263
	2264	# endif
	2265
	2266	Safefree(lc_all);
	2267	Safefree(lang);
	2268
	2269	#endif /* USE_LOCALE */
	2270	#ifdef DEBUGGING
	2271
	2272	/* So won't continue to output stuff */
	2273	DEBUG_INITIALIZATION_set(FALSE);
	2274
	2275	#endif
	2276
	2277	return ok;
	2278	}
	2279
	2280	#ifdef USE_LOCALE_COLLATE
	2281
	2282	char *
	2283	Perl__mem_collxfrm(pTHX_ const char *input_string,
	2284	STRLEN len, /* Length of 'input_string' */
	2285	STRLEN xlen, / Set to length of returned string
	2286	(not including the collation index
	2287	prefix) */
	2288	bool utf8 /* Is the input in UTF-8? */
	2289	)
	2290	{
	2291
	2292	/* _mem_collxfrm() is a bit like strxfrm() but with two important
	2293	* differences. First, it handles embedded NULs. Second, it allocates a bit
	2294	* more memory than needed for the transformed data itself. The real
	2295	* transformed data begins at offset COLLXFRM_HDR_LEN. *xlen is set to
	2296	* the length of that, and doesn't include the collation index size.
	2297	* Please see sv_collxfrm() to see how this is used. */
	2298
	2299	#define COLLXFRM_HDR_LEN sizeof(PL_collation_ix)
	2300
	2301	char * s = (char *) input_string;
	2302	STRLEN s_strlen = strlen(input_string);
	2303	char *xbuf = NULL;
	2304	STRLEN xAlloc; /* xalloc is a reserved word in VC */
	2305	STRLEN length_in_chars;
	2306	bool first_time = TRUE; /* Cleared after first loop iteration */
	2307
	2308	PERL_ARGS_ASSERT__MEM_COLLXFRM;
	2309
	2310	/* Must be NUL-terminated */
	2311	assert(*(input_string + len) == '\0');
	2312
	2313	/* If this locale has defective collation, skip */
	2314	if (PL_collxfrm_base == 0 && PL_collxfrm_mult == 0) {
	2315	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2316	"_mem_collxfrm: locale's collation is defective\n"));
	2317	goto bad;
	2318	}
	2319
	2320	/* Replace any embedded NULs with the control that sorts before any others.
	2321	* This will give as good as possible results on strings that don't
	2322	* otherwise contain that character, but otherwise there may be
	2323	* less-than-perfect results with that character and NUL. This is
	2324	* unavoidable unless we replace strxfrm with our own implementation. */
	2325	if (UNLIKELY(s_strlen < len)) { /* Only execute if there is an embedded
	2326	NUL */
	2327	char * e = s + len;
	2328	char * sans_nuls;
	2329	STRLEN sans_nuls_len;
	2330	int try_non_controls;
	2331	char this_replacement_char[] = "?\0"; /* Room for a two-byte string,
	2332	making sure 2nd byte is NUL.
	2333	*/
	2334	STRLEN this_replacement_len;
	2335
	2336	/* If we don't know what non-NUL control character sorts lowest for
	2337	* this locale, find it */
	2338	if (PL_strxfrm_NUL_replacement == '\0') {
	2339	int j;
	2340	char * cur_min_x = NULL; /* The min_char's xfrm, (except it also
	2341	includes the collation index
	2342	prefixed. */
	2343
	2344	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "Looking to replace NUL\n"));
	2345
	2346	/* Unlikely, but it may be that no control will work to replace
	2347	* NUL, in which case we instead look for any character. Controls
	2348	* are preferred because collation order is, in general, context
	2349	* sensitive, with adjoining characters affecting the order, and
	2350	* controls are less likely to have such interactions, allowing the
	2351	* NUL-replacement to stand on its own. (Another way to look at it
	2352	* is to imagine what would happen if the NUL were replaced by a
	2353	* combining character; it wouldn't work out all that well.) */
	2354	for (try_non_controls = 0;
	2355	try_non_controls < 2;
	2356	try_non_controls++)
	2357	{
	2358	/* Look through all legal code points (NUL isn't) */
	2359	for (j = 1; j < 256; j++) {
	2360	char * x; /* j's xfrm plus collation index */
	2361	STRLEN x_len; /* length of 'x' */
	2362	STRLEN trial_len = 1;
	2363	char cur_source[] = { '\0', '\0' };
	2364
	2365	/* Skip non-controls the first time through the loop. The
	2366	* controls in a UTF-8 locale are the L1 ones */
	2367	if (! try_non_controls && (PL_in_utf8_COLLATE_locale)
	2368	? ! isCNTRL_L1(j)
	2369	: ! isCNTRL_LC(j))
	2370	{
	2371	continue;
	2372	}
	2373
	2374	/* Create a 1-char string of the current code point */
	2375	cur_source[0] = (char) j;
	2376
	2377	/* Then transform it */
	2378	x = _mem_collxfrm(cur_source, trial_len, &x_len,
	2379	0 /* The string is not in UTF-8 */);
	2380
	2381	/* Ignore any character that didn't successfully transform.
	2382	* */
	2383	if (! x) {
	2384	continue;
	2385	}
	2386
	2387	/* If this character's transformation is lower than
	2388	* the current lowest, this one becomes the lowest */
	2389	if ( cur_min_x == NULL
	2390	\|\| strLT(x + COLLXFRM_HDR_LEN,
	2391	cur_min_x + COLLXFRM_HDR_LEN))
	2392	{
	2393	PL_strxfrm_NUL_replacement = j;
	2394	cur_min_x = x;
	2395	}
	2396	else {
	2397	Safefree(x);
	2398	}
	2399	} /* end of loop through all 255 characters */
	2400
	2401	/* Stop looking if found */
	2402	if (cur_min_x) {
	2403	break;
	2404	}
	2405
	2406	/* Unlikely, but possible, if there aren't any controls that
	2407	* work in the locale, repeat the loop, looking for any
	2408	* character that works */
	2409	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2410	"_mem_collxfrm: No control worked. Trying non-controls\n"));
	2411	} /* End of loop to try first the controls, then any char */
	2412
	2413	if (! cur_min_x) {
	2414	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2415	"_mem_collxfrm: Couldn't find any character to replace"
	2416	" embedded NULs in locale %s with", PL_collation_name));
	2417	goto bad;
	2418	}
	2419
	2420	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2421	"_mem_collxfrm: Replacing embedded NULs in locale %s with "
	2422	"0x%02X\n", PL_collation_name, PL_strxfrm_NUL_replacement));
	2423
	2424	Safefree(cur_min_x);
	2425	} /* End of determining the character that is to replace NULs */
	2426
	2427	/* If the replacement is variant under UTF-8, it must match the
	2428	* UTF8-ness as the original */
	2429	if ( ! UVCHR_IS_INVARIANT(PL_strxfrm_NUL_replacement) && utf8) {
	2430	this_replacement_char[0] =
	2431	UTF8_EIGHT_BIT_HI(PL_strxfrm_NUL_replacement);
	2432	this_replacement_char[1] =
	2433	UTF8_EIGHT_BIT_LO(PL_strxfrm_NUL_replacement);
	2434	this_replacement_len = 2;
	2435	}
	2436	else {
	2437	this_replacement_char[0] = PL_strxfrm_NUL_replacement;
	2438	/* this_replacement_char[1] = '\0' was done at initialization */
	2439	this_replacement_len = 1;
	2440	}
	2441
	2442	/* The worst case length for the replaced string would be if every
	2443	* character in it is NUL. Multiply that by the length of each
	2444	* replacement, and allow for a trailing NUL */
	2445	sans_nuls_len = (len * this_replacement_len) + 1;
	2446	Newx(sans_nuls, sans_nuls_len, char);
	2447	*sans_nuls = '\0';
	2448
	2449	/* Replace each NUL with the lowest collating control. Loop until have
	2450	* exhausted all the NULs */
	2451	while (s + s_strlen < e) {
	2452	my_strlcat(sans_nuls, s, sans_nuls_len);
	2453
	2454	/* Do the actual replacement */
	2455	my_strlcat(sans_nuls, this_replacement_char, sans_nuls_len);
	2456
	2457	/* Move past the input NUL */
	2458	s += s_strlen + 1;
	2459	s_strlen = strlen(s);
	2460	}
	2461
	2462	/* And add anything that trails the final NUL */
	2463	my_strlcat(sans_nuls, s, sans_nuls_len);
	2464
	2465	/* Switch so below we transform this modified string */
	2466	s = sans_nuls;
	2467	len = strlen(s);
	2468	} /* End of replacing NULs */
	2469
	2470	/* Make sure the UTF8ness of the string and locale match */
	2471	if (utf8 != PL_in_utf8_COLLATE_locale) {
	2472	const char * const t = s; /* Temporary so we can later find where the
	2473	input was */
	2474
	2475	/* Here they don't match. Change the string's to be what the locale is
	2476	* expecting */
	2477
	2478	if (! utf8) { /* locale is UTF-8, but input isn't; upgrade the input */
	2479	s = (char ) bytes_to_utf8((const U8 ) s, &len);
	2480	utf8 = TRUE;
	2481	}
	2482	else { /* locale is not UTF-8; but input is; downgrade the input */
	2483
	2484	s = (char ) bytes_from_utf8((const U8 ) s, &len, &utf8);
	2485
	2486	/* If the downgrade was successful we are done, but if the input
	2487	* contains things that require UTF-8 to represent, have to do
	2488	* damage control ... */
	2489	if (UNLIKELY(utf8)) {
	2490
	2491	/* What we do is construct a non-UTF-8 string with
	2492	* 1) the characters representable by a single byte converted
	2493	* to be so (if necessary);
	2494	* 2) and the rest converted to collate the same as the
	2495	* highest collating representable character. That makes
	2496	* them collate at the end. This is similar to how we
	2497	* handle embedded NULs, but we use the highest collating
	2498	* code point instead of the smallest. Like the NUL case,
	2499	* this isn't perfect, but is the best we can reasonably
	2500	* do. Every above-255 code point will sort the same as
	2501	* the highest-sorting 0-255 code point. If that code
	2502	* point can combine in a sequence with some other code
	2503	* points for weight calculations, us changing something to
	2504	* be it can adversely affect the results. But in most
	2505	* cases, it should work reasonably. And note that this is
	2506	* really an illegal situation: using code points above 255
	2507	* on a locale where only 0-255 are valid. If two strings
	2508	* sort entirely equal, then the sort order for the
	2509	* above-255 code points will be in code point order. */
	2510
	2511	utf8 = FALSE;
	2512
	2513	/* If we haven't calculated the code point with the maximum
	2514	* collating order for this locale, do so now */
	2515	if (! PL_strxfrm_max_cp) {
	2516	int j;
	2517
	2518	/* The current transformed string that collates the
	2519	* highest (except it also includes the prefixed collation
	2520	* index. */
	2521	char * cur_max_x = NULL;
	2522
	2523	/* Look through all legal code points (NUL isn't) */
	2524	for (j = 1; j < 256; j++) {
	2525	char * x;
	2526	STRLEN x_len;
	2527	char cur_source[] = { '\0', '\0' };
	2528
	2529	/* Create a 1-char string of the current code point */
	2530	cur_source[0] = (char) j;
	2531
	2532	/* Then transform it */
	2533	x = _mem_collxfrm(cur_source, 1, &x_len, FALSE);
	2534
	2535	/* If something went wrong (which it shouldn't), just
	2536	* ignore this code point */
	2537	if (! x) {
	2538	continue;
	2539	}
	2540
	2541	/* If this character's transformation is higher than
	2542	* the current highest, this one becomes the highest */
	2543	if ( cur_max_x == NULL
	2544	\|\| strGT(x + COLLXFRM_HDR_LEN,
	2545	cur_max_x + COLLXFRM_HDR_LEN))
	2546	{
	2547	PL_strxfrm_max_cp = j;
	2548	cur_max_x = x;
	2549	}
	2550	else {
	2551	Safefree(x);
	2552	}
	2553	}
	2554
	2555	if (! cur_max_x) {
	2556	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2557	"_mem_collxfrm: Couldn't find any character to"
	2558	" replace above-Latin1 chars in locale %s with",
	2559	PL_collation_name));
	2560	goto bad;
	2561	}
	2562
	2563	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2564	"_mem_collxfrm: highest 1-byte collating character"
	2565	" in locale %s is 0x%02X\n",
	2566	PL_collation_name,
	2567	PL_strxfrm_max_cp));
	2568
	2569	Safefree(cur_max_x);
	2570	}
	2571
	2572	/* Here we know which legal code point collates the highest.
	2573	* We are ready to construct the non-UTF-8 string. The length
	2574	* will be at least 1 byte smaller than the input string
	2575	* (because we changed at least one 2-byte character into a
	2576	* single byte), but that is eaten up by the trailing NUL */
	2577	Newx(s, len, char);
	2578
	2579	{
	2580	STRLEN i;
	2581	STRLEN d= 0;
	2582	char * e = (char *) t + len;
	2583
	2584	for (i = 0; i < len; i+= UTF8SKIP(t + i)) {
	2585	U8 cur_char = t[i];
	2586	if (UTF8_IS_INVARIANT(cur_char)) {
	2587	s[d++] = cur_char;
	2588	}
	2589	else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(t + i, e)) {
	2590	s[d++] = EIGHT_BIT_UTF8_TO_NATIVE(cur_char, t[i+1]);
	2591	}
	2592	else { /* Replace illegal cp with highest collating
	2593	one */
	2594	s[d++] = PL_strxfrm_max_cp;
	2595	}
	2596	}
	2597	s[d++] = '\0';
	2598	Renew(s, d, char); /* Free up unused space */
	2599	}
	2600	}
	2601	}
	2602
	2603	/* Here, we have constructed a modified version of the input. It could
	2604	* be that we already had a modified copy before we did this version.
	2605	* If so, that copy is no longer needed */
	2606	if (t != input_string) {
	2607	Safefree(t);
	2608	}
	2609	}
	2610
	2611	length_in_chars = (utf8)
	2612	? utf8_length((U8 ) s, (U8 ) s + len)
	2613	: len;
	2614
	2615	/* The first element in the output is the collation id, used by
	2616	* sv_collxfrm(); then comes the space for the transformed string. The
	2617	* equation should give us a good estimate as to how much is needed */
	2618	xAlloc = COLLXFRM_HDR_LEN
	2619	+ PL_collxfrm_base
	2620	+ (PL_collxfrm_mult * length_in_chars);
	2621	Newx(xbuf, xAlloc, char);
	2622	if (UNLIKELY(! xbuf)) {
	2623	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2624	"_mem_collxfrm: Couldn't malloc %zu bytes\n", xAlloc));
	2625	goto bad;
	2626	}
	2627
	2628	/* Store the collation id */
	2629	(U32)xbuf = PL_collation_ix;
	2630
	2631	/* Then the transformation of the input. We loop until successful, or we
	2632	* give up */
	2633	for (;;) {
	2634
	2635	*xlen = strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN);
	2636
	2637	/* If the transformed string occupies less space than we told strxfrm()
	2638	* was available, it means it successfully transformed the whole
	2639	* string. */
	2640	if (*xlen < xAlloc - COLLXFRM_HDR_LEN) {
	2641
	2642	/* Some systems include a trailing NUL in the returned length.
	2643	* Ignore it, using a loop in case multiple trailing NULs are
	2644	* returned. */
	2645	while ( (*xlen) > 0
	2646	&& (xbuf + COLLXFRM_HDR_LEN + (xlen) - 1) == '\0')
	2647	{
	2648	(*xlen)--;
	2649	}
	2650
	2651	/* If the first try didn't get it, it means our prediction was low.
	2652	* Modify the coefficients so that we predict a larger value in any
	2653	* future transformations */
	2654	if (! first_time) {
	2655	STRLEN needed = xlen + 1; / +1 For trailing NUL */
	2656	STRLEN computed_guess = PL_collxfrm_base
	2657	+ (PL_collxfrm_mult * length_in_chars);
	2658
	2659	/* On zero-length input, just keep current slope instead of
	2660	* dividing by 0 */
	2661	const STRLEN new_m = (length_in_chars != 0)
	2662	? needed / length_in_chars
	2663	: PL_collxfrm_mult;
	2664
	2665	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	2666	"%s: %d: initial size of %zu bytes for a length "
	2667	"%zu string was insufficient, %zu needed\n",
	2668	__FILE__, __LINE__,
	2669	computed_guess, length_in_chars, needed));
	2670
	2671	/* If slope increased, use it, but discard this result for
	2672	* length 1 strings, as we can't be sure that it's a real slope
	2673	* change */
	2674	if (length_in_chars > 1 && new_m > PL_collxfrm_mult) {
	2675
	2676	# ifdef DEBUGGING
	2677
	2678	STRLEN old_m = PL_collxfrm_mult;
	2679	STRLEN old_b = PL_collxfrm_base;
	2680
	2681	# endif
	2682
	2683	PL_collxfrm_mult = new_m;
	2684	PL_collxfrm_base = 1; /* +1 For trailing NUL */
	2685	computed_guess = PL_collxfrm_base
	2686	+ (PL_collxfrm_mult * length_in_chars);
	2687	if (computed_guess < needed) {
	2688	PL_collxfrm_base += needed - computed_guess;
	2689	}
	2690
	2691	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	2692	"%s: %d: slope is now %zu; was %zu, base "
	2693	"is now %zu; was %zu\n",
	2694	__FILE__, __LINE__,
	2695	PL_collxfrm_mult, old_m,
	2696	PL_collxfrm_base, old_b));
	2697	}
	2698	else { /* Slope didn't change, but 'b' did */
	2699	const STRLEN new_b = needed
	2700	- computed_guess
	2701	+ PL_collxfrm_base;
	2702	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	2703	"%s: %d: base is now %zu; was %zu\n",
	2704	__FILE__, __LINE__,
	2705	new_b, PL_collxfrm_base));
	2706	PL_collxfrm_base = new_b;
	2707	}
	2708	}
	2709
	2710	break;
	2711	}
	2712
	2713	if (UNLIKELY(*xlen >= PERL_INT_MAX)) {
	2714	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2715	"_mem_collxfrm: Needed %zu bytes, max permissible is %u\n",
	2716	*xlen, PERL_INT_MAX));
	2717	goto bad;
	2718	}
	2719
	2720	/* A well-behaved strxfrm() returns exactly how much space it needs
	2721	* (usually not including the trailing NUL) when it fails due to not
	2722	* enough space being provided. Assume that this is the case unless
	2723	* it's been proven otherwise */
	2724	if (LIKELY(PL_strxfrm_is_behaved) && first_time) {
	2725	xAlloc = *xlen + COLLXFRM_HDR_LEN + 1;
	2726	}
	2727	else { /* Here, either:
	2728	* 1) The strxfrm() has previously shown bad behavior; or
	2729	* 2) It isn't the first time through the loop, which means
	2730	* that the strxfrm() is now showing bad behavior, because
	2731	* we gave it what it said was needed in the previous
	2732	* iteration, and it came back saying it needed still more.
	2733	* (Many versions of cygwin fit this. When the buffer size
	2734	* isn't sufficient, they return the input size instead of
	2735	* how much is needed.)
	2736	* Increase the buffer size by a fixed percentage and try again.
	2737	* */
	2738	xAlloc += (xAlloc / 4) + 1;
	2739	PL_strxfrm_is_behaved = FALSE;
	2740
	2741	# ifdef DEBUGGING
	2742
	2743	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	2744	PerlIO_printf(Perl_debug_log,
	2745	"_mem_collxfrm required more space than previously calculated"
	2746	" for locale %s, trying again with new guess=%d+%zu\n",
	2747	PL_collation_name, (int) COLLXFRM_HDR_LEN,
	2748	xAlloc - COLLXFRM_HDR_LEN);
	2749	}
	2750
	2751	# endif
	2752
	2753	}
	2754
	2755	Renew(xbuf, xAlloc, char);
	2756	if (UNLIKELY(! xbuf)) {
	2757	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2758	"_mem_collxfrm: Couldn't realloc %zu bytes\n", xAlloc));
	2759	goto bad;
	2760	}
	2761
	2762	first_time = FALSE;
	2763	}
	2764
	2765
	2766	# ifdef DEBUGGING
	2767
	2768	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	2769
	2770	print_collxfrm_input_and_return(s, s + len, xlen, utf8);
	2771	PerlIO_printf(Perl_debug_log, "Its xfrm is:");
	2772	PerlIO_printf(Perl_debug_log, "%s\n",
	2773	_byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN,
	2774	*xlen, 1));
	2775	}
	2776
	2777	# endif
	2778
	2779	/* Free up unneeded space; retain ehough for trailing NUL */
	2780	Renew(xbuf, COLLXFRM_HDR_LEN + *xlen + 1, char);
	2781
	2782	if (s != input_string) {
	2783	Safefree(s);
	2784	}
	2785
	2786	return xbuf;
	2787
	2788	bad:
	2789	Safefree(xbuf);
	2790	if (s != input_string) {
	2791	Safefree(s);
	2792	}
	2793	*xlen = 0;
	2794
	2795	# ifdef DEBUGGING
	2796
	2797	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	2798	print_collxfrm_input_and_return(s, s + len, NULL, utf8);
	2799	}
	2800
	2801	# endif
	2802
	2803	return NULL;
	2804	}
	2805
	2806	# ifdef DEBUGGING
	2807
	2808	STATIC void
	2809	S_print_collxfrm_input_and_return(pTHX_
	2810	const char * const s,
	2811	const char * const e,
	2812	const STRLEN * const xlen,
	2813	const bool is_utf8)
	2814	{
	2815
	2816	PERL_ARGS_ASSERT_PRINT_COLLXFRM_INPUT_AND_RETURN;
	2817
	2818	PerlIO_printf(Perl_debug_log, "_mem_collxfrm[%" UVuf "]: returning ",
	2819	(UV)PL_collation_ix);
	2820	if (xlen) {
	2821	PerlIO_printf(Perl_debug_log, "%zu", *xlen);
	2822	}
	2823	else {
	2824	PerlIO_printf(Perl_debug_log, "NULL");
	2825	}
	2826	PerlIO_printf(Perl_debug_log, " for locale '%s', string='",
	2827	PL_collation_name);
	2828	print_bytes_for_locale(s, e, is_utf8);
	2829
	2830	PerlIO_printf(Perl_debug_log, "'\n");
	2831	}
	2832
	2833	STATIC void
	2834	S_print_bytes_for_locale(pTHX_
	2835	const char * const s,
	2836	const char * const e,
	2837	const bool is_utf8)
	2838	{
	2839	const char * t = s;
	2840	bool prev_was_printable = TRUE;
	2841	bool first_time = TRUE;
	2842
	2843	PERL_ARGS_ASSERT_PRINT_BYTES_FOR_LOCALE;
	2844
	2845	while (t < e) {
	2846	UV cp = (is_utf8)
	2847	? utf8_to_uvchr_buf((U8 *) t, e, NULL)
	2848	: * (U8 *) t;
	2849	if (isPRINT(cp)) {
	2850	if (! prev_was_printable) {
	2851	PerlIO_printf(Perl_debug_log, " ");
	2852	}
	2853	PerlIO_printf(Perl_debug_log, "%c", (U8) cp);
	2854	prev_was_printable = TRUE;
	2855	}
	2856	else {
	2857	if (! first_time) {
	2858	PerlIO_printf(Perl_debug_log, " ");
	2859	}
	2860	PerlIO_printf(Perl_debug_log, "%02" UVXf, cp);
	2861	prev_was_printable = FALSE;
	2862	}
	2863	t += (is_utf8) ? UTF8SKIP(t) : 1;
	2864	first_time = FALSE;
	2865	}
	2866	}
	2867
	2868	# endif /* #ifdef DEBUGGING */
	2869	#endif /* USE_LOCALE_COLLATE */
	2870
	2871	#ifdef USE_LOCALE
	2872
	2873	bool
	2874	Perl__is_cur_LC_category_utf8(pTHX_ int category)
	2875	{
	2876	/* Returns TRUE if the current locale for 'category' is UTF-8; FALSE
	2877	* otherwise. 'category' may not be LC_ALL. If the platform doesn't have
	2878	* nl_langinfo(), nor MB_CUR_MAX, this employs a heuristic, which hence
	2879	* could give the wrong result. The result will very likely be correct for
	2880	* languages that have commonly used non-ASCII characters, but for notably
	2881	* English, it comes down to if the locale's name ends in something like
	2882	* "UTF-8". It errs on the side of not being a UTF-8 locale. */
	2883
	2884	char *save_input_locale = NULL;
	2885	STRLEN final_pos;
	2886
	2887	# ifdef LC_ALL
	2888
	2889	assert(category != LC_ALL);
	2890
	2891	# endif
	2892
	2893	/* First dispose of the trivial cases */
	2894	save_input_locale = do_setlocale_r(category, NULL);
	2895	if (! save_input_locale) {
	2896	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2897	"Could not find current locale for category %d\n",
	2898	category));
	2899	return FALSE; /* XXX maybe should croak */
	2900	}
	2901	save_input_locale = stdize_locale(savepv(save_input_locale));
	2902	if (isNAME_C_OR_POSIX(save_input_locale)) {
	2903	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2904	"Current locale for category %d is %s\n",
	2905	category, save_input_locale));
	2906	Safefree(save_input_locale);
	2907	return FALSE;
	2908	}
	2909
	2910	# if defined(USE_LOCALE_CTYPE) \
	2911	&& (defined(MB_CUR_MAX) \|\| (defined(HAS_NL_LANGINFO) && defined(CODESET)))
	2912
	2913	{ /* Next try nl_langinfo or MB_CUR_MAX if available */
	2914
	2915	char *save_ctype_locale = NULL;
	2916	bool is_utf8;
	2917
	2918	if (category != LC_CTYPE) { /* These work only on LC_CTYPE */
	2919
	2920	/* Get the current LC_CTYPE locale */
	2921	save_ctype_locale = do_setlocale_c(LC_CTYPE, NULL);
	2922	if (! save_ctype_locale) {
	2923	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2924	"Could not find current locale for LC_CTYPE\n"));
	2925	goto cant_use_nllanginfo;
	2926	}
	2927	save_ctype_locale = stdize_locale(savepv(save_ctype_locale));
	2928
	2929	/* If LC_CTYPE and the desired category use the same locale, this
	2930	* means that finding the value for LC_CTYPE is the same as finding
	2931	* the value for the desired category. Otherwise, switch LC_CTYPE
	2932	* to the desired category's locale */
	2933	if (strEQ(save_ctype_locale, save_input_locale)) {
	2934	Safefree(save_ctype_locale);
	2935	save_ctype_locale = NULL;
	2936	}
	2937	else if (! do_setlocale_c(LC_CTYPE, save_input_locale)) {
	2938	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2939	"Could not change LC_CTYPE locale to %s\n",
	2940	save_input_locale));
	2941	Safefree(save_ctype_locale);
	2942	goto cant_use_nllanginfo;
	2943	}
	2944	}
	2945
	2946	DEBUG_L(PerlIO_printf(Perl_debug_log, "Current LC_CTYPE locale=%s\n",
	2947	save_input_locale));
	2948
	2949	/* Here the current LC_CTYPE is set to the locale of the category whose
	2950	* information is desired. This means that nl_langinfo() and MB_CUR_MAX
	2951	* should give the correct results */
	2952
	2953	# if defined(HAS_NL_LANGINFO) && defined(CODESET)
	2954	/* The task is easiest if has this POSIX 2001 function */
	2955
	2956	{
	2957	const char *codeset = my_nl_langinfo(PERL_CODESET, FALSE);
	2958	/* FALSE => already in dest locale */
	2959
	2960	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2961	"\tnllanginfo returned CODESET '%s'\n", codeset));
	2962
	2963	if (codeset && strNE(codeset, "")) {
	2964	/* If we switched LC_CTYPE, switch back */
	2965	if (save_ctype_locale) {
	2966	do_setlocale_c(LC_CTYPE, save_ctype_locale);
	2967	Safefree(save_ctype_locale);
	2968	}
	2969
	2970	is_utf8 = ( ( strlen(codeset) == STRLENs("UTF-8")
	2971	&& foldEQ(codeset, STR_WITH_LEN("UTF-8")))
	2972	\|\| ( strlen(codeset) == STRLENs("UTF8")
	2973	&& foldEQ(codeset, STR_WITH_LEN("UTF8"))));
	2974
	2975	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2976	"\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n",
	2977	codeset, is_utf8));
	2978	Safefree(save_input_locale);
	2979	return is_utf8;
	2980	}
	2981	}
	2982
	2983	# endif
	2984	# ifdef MB_CUR_MAX
	2985
	2986	/* Here, either we don't have nl_langinfo, or it didn't return a
	2987	* codeset. Try MB_CUR_MAX */
	2988
	2989	/* Standard UTF-8 needs at least 4 bytes to represent the maximum
	2990	* Unicode code point. Since UTF-8 is the only non-single byte
	2991	* encoding we handle, we just say any such encoding is UTF-8, and if
	2992	* turns out to be wrong, other things will fail */
	2993	is_utf8 = (unsigned) MB_CUR_MAX >= STRLENs(MAX_UNICODE_UTF8);
	2994
	2995	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2996	"\tMB_CUR_MAX=%d; ?UTF8 locale=%d\n",
	2997	(int) MB_CUR_MAX, is_utf8));
	2998
	2999	Safefree(save_input_locale);
	3000
	3001	# ifdef HAS_MBTOWC
	3002
	3003	/* ... But, most system that have MB_CUR_MAX will also have mbtowc(),
	3004	* since they are both in the C99 standard. We can feed a known byte
	3005	* string to the latter function, and check that it gives the expected
	3006	* result */
	3007	if (is_utf8) {
	3008	wchar_t wc;
	3009	int len;
	3010
	3011	PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
	3012	errno = 0;
	3013	len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
	3014
	3015
	3016	if ( len != STRLENs(REPLACEMENT_CHARACTER_UTF8)
	3017	\|\| wc != (wchar_t) UNICODE_REPLACEMENT)
	3018	{
	3019	is_utf8 = FALSE;
	3020	DEBUG_L(PerlIO_printf(Perl_debug_log, "\replacement=U+%x\n",
	3021	(unsigned int)wc));
	3022	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3023	"\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n",
	3024	len, errno));
	3025	}
	3026	}
	3027
	3028	# endif
	3029
	3030	/* If we switched LC_CTYPE, switch back */
	3031	if (save_ctype_locale) {
	3032	do_setlocale_c(LC_CTYPE, save_ctype_locale);
	3033	Safefree(save_ctype_locale);
	3034	}
	3035
	3036	return is_utf8;
	3037
	3038	# endif
	3039
	3040	}
	3041
	3042	cant_use_nllanginfo:
	3043
	3044	# else /* nl_langinfo should work if available, so don't bother compiling this
	3045	fallback code. The final fallback of looking at the name is
	3046	compiled, and will be executed if nl_langinfo fails */
	3047
	3048	/* nl_langinfo not available or failed somehow. Next try looking at the
	3049	* currency symbol to see if it disambiguates things. Often that will be
	3050	* in the native script, and if the symbol isn't in UTF-8, we know that the
	3051	* locale isn't. If it is non-ASCII UTF-8, we infer that the locale is
	3052	* too, as the odds of a non-UTF8 string being valid UTF-8 are quite small
	3053	* */
	3054
	3055	# ifdef HAS_LOCALECONV
	3056	# ifdef USE_LOCALE_MONETARY
	3057
	3058	{
	3059	char *save_monetary_locale = NULL;
	3060	bool only_ascii = FALSE;
	3061	bool is_utf8 = FALSE;
	3062	struct lconv* lc;
	3063
	3064	/* Like above for LC_CTYPE, we first set LC_MONETARY to the locale of
	3065	* the desired category, if it isn't that locale already */
	3066
	3067	if (category != LC_MONETARY) {
	3068
	3069	save_monetary_locale = do_setlocale_c(LC_MONETARY, NULL);
	3070	if (! save_monetary_locale) {
	3071	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3072	"Could not find current locale for LC_MONETARY\n"));
	3073	goto cant_use_monetary;
	3074	}
	3075	save_monetary_locale = stdize_locale(savepv(save_monetary_locale));
	3076
	3077	if (strEQ(save_monetary_locale, save_input_locale)) {
	3078	Safefree(save_monetary_locale);
	3079	save_monetary_locale = NULL;
	3080	}
	3081	else if (! do_setlocale_c(LC_MONETARY, save_input_locale)) {
	3082	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3083	"Could not change LC_MONETARY locale to %s\n",
	3084	save_input_locale));
	3085	Safefree(save_monetary_locale);
	3086	goto cant_use_monetary;
	3087	}
	3088	}
	3089
	3090	/* Here the current LC_MONETARY is set to the locale of the category
	3091	* whose information is desired. */
	3092
	3093	lc = localeconv();
	3094	if (! lc
	3095	\|\| ! lc->currency_symbol
	3096	\|\| is_utf8_invariant_string((U8 *) lc->currency_symbol, 0))
	3097	{
	3098	DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	3099	only_ascii = TRUE;
	3100	}
	3101	else {
	3102	is_utf8 = is_utf8_string((U8 *) lc->currency_symbol, 0);
	3103	}
	3104
	3105	/* If we changed it, restore LC_MONETARY to its original locale */
	3106	if (save_monetary_locale) {
	3107	do_setlocale_c(LC_MONETARY, save_monetary_locale);
	3108	Safefree(save_monetary_locale);
	3109	}
	3110
	3111	if (! only_ascii) {
	3112
	3113	/* It isn't a UTF-8 locale if the symbol is not legal UTF-8;
	3114	* otherwise assume the locale is UTF-8 if and only if the symbol
	3115	* is non-ascii UTF-8. */
	3116	DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n",
	3117	save_input_locale, is_utf8));
	3118	Safefree(save_input_locale);
	3119	return is_utf8;
	3120	}
	3121	}
	3122	cant_use_monetary:
	3123
	3124	# endif /* USE_LOCALE_MONETARY */
	3125	# endif /* HAS_LOCALECONV */
	3126
	3127	# if defined(HAS_STRFTIME) && defined(USE_LOCALE_TIME)
	3128
	3129	/* Still haven't found a non-ASCII string to disambiguate UTF-8 or not. Try
	3130	* the names of the months and weekdays, timezone, and am/pm indicator */
	3131	{
	3132	char *save_time_locale = NULL;
	3133	int hour = 10;
	3134	bool is_dst = FALSE;
	3135	int dom = 1;
	3136	int month = 0;
	3137	int i;
	3138	char * formatted_time;
	3139
	3140
	3141	/* Like above for LC_MONETARY, we set LC_TIME to the locale of the
	3142	* desired category, if it isn't that locale already */
	3143
	3144	if (category != LC_TIME) {
	3145
	3146	save_time_locale = do_setlocale_c(LC_TIME, NULL);
	3147	if (! save_time_locale) {
	3148	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3149	"Could not find current locale for LC_TIME\n"));
	3150	goto cant_use_time;
	3151	}
	3152	save_time_locale = stdize_locale(savepv(save_time_locale));
	3153
	3154	if (strEQ(save_time_locale, save_input_locale)) {
	3155	Safefree(save_time_locale);
	3156	save_time_locale = NULL;
	3157	}
	3158	else if (! do_setlocale_c(LC_TIME, save_input_locale)) {
	3159	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3160	"Could not change LC_TIME locale to %s\n",
	3161	save_input_locale));
	3162	Safefree(save_time_locale);
	3163	goto cant_use_time;
	3164	}
	3165	}
	3166
	3167	/* Here the current LC_TIME is set to the locale of the category
	3168	* whose information is desired. Look at all the days of the week and
	3169	* month names, and the timezone and am/pm indicator for UTF-8 variant
	3170	* characters. The first such a one found will tell us if the locale
	3171	* is UTF-8 or not */
	3172
	3173	for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */
	3174	formatted_time = my_strftime("%A %B %Z %p",
	3175	0, 0, hour, dom, month, 2012 - 1900, 0, 0, is_dst);
	3176	if ( ! formatted_time
	3177	\|\| is_utf8_invariant_string((U8 *) formatted_time, 0))
	3178	{
	3179
	3180	/* Here, we didn't find a non-ASCII. Try the next time through
	3181	* with the complemented dst and am/pm, and try with the next
	3182	* weekday. After we have gotten all weekdays, try the next
	3183	* month */
	3184	is_dst = ! is_dst;
	3185	hour = (hour + 12) % 24;
	3186	dom++;
	3187	if (i > 6) {
	3188	month++;
	3189	}
	3190	continue;
	3191	}
	3192
	3193	/* Here, we have a non-ASCII. Return TRUE is it is valid UTF8;
	3194	* false otherwise. But first, restore LC_TIME to its original
	3195	* locale if we changed it */
	3196	if (save_time_locale) {
	3197	do_setlocale_c(LC_TIME, save_time_locale);
	3198	Safefree(save_time_locale);
	3199	}
	3200
	3201	DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n",
	3202	save_input_locale,
	3203	is_utf8_string((U8 *) formatted_time, 0)));
	3204	Safefree(save_input_locale);
	3205	return is_utf8_string((U8 *) formatted_time, 0);
	3206	}
	3207
	3208	/* Falling off the end of the loop indicates all the names were just
	3209	* ASCII. Go on to the next test. If we changed it, restore LC_TIME
	3210	* to its original locale */
	3211	if (save_time_locale) {
	3212	do_setlocale_c(LC_TIME, save_time_locale);
	3213	Safefree(save_time_locale);
	3214	}
	3215	DEBUG_L(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	3216	}
	3217	cant_use_time:
	3218
	3219	# endif
	3220
	3221	# if 0 && defined(USE_LOCALE_MESSAGES) && defined(HAS_SYS_ERRLIST)
	3222
	3223	/* This code is ifdefd out because it was found to not be necessary in testing
	3224	* on our dromedary test machine, which has over 700 locales. There, this
	3225	* added no value to looking at the currency symbol and the time strings. I
	3226	* left it in so as to avoid rewriting it if real-world experience indicates
	3227	* that dromedary is an outlier. Essentially, instead of returning abpve if we
	3228	* haven't found illegal utf8, we continue on and examine all the strerror()
	3229	* messages on the platform for utf8ness. If all are ASCII, we still don't
	3230	* know the answer; but otherwise we have a pretty good indication of the
	3231	* utf8ness. The reason this doesn't help much is that the messages may not
	3232	* have been translated into the locale. The currency symbol and time strings
	3233	* are much more likely to have been translated. */
	3234	{
	3235	int e;
	3236	bool is_utf8 = FALSE;
	3237	bool non_ascii = FALSE;
	3238	char *save_messages_locale = NULL;
	3239	const char * errmsg = NULL;
	3240
	3241	/* Like above, we set LC_MESSAGES to the locale of the desired
	3242	* category, if it isn't that locale already */
	3243
	3244	if (category != LC_MESSAGES) {
	3245
	3246	save_messages_locale = do_setlocale_c(LC_MESSAGES, NULL);
	3247	if (! save_messages_locale) {
	3248	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3249	"Could not find current locale for LC_MESSAGES\n"));
	3250	goto cant_use_messages;
	3251	}
	3252	save_messages_locale = stdize_locale(savepv(save_messages_locale));
	3253
	3254	if (strEQ(save_messages_locale, save_input_locale)) {
	3255	Safefree(save_messages_locale);
	3256	save_messages_locale = NULL;
	3257	}
	3258	else if (! do_setlocale_c(LC_MESSAGES, save_input_locale)) {
	3259	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3260	"Could not change LC_MESSAGES locale to %s\n",
	3261	save_input_locale));
	3262	Safefree(save_messages_locale);
	3263	goto cant_use_messages;
	3264	}
	3265	}
	3266
	3267	/* Here the current LC_MESSAGES is set to the locale of the category
	3268	* whose information is desired. Look through all the messages. We
	3269	* can't use Strerror() here because it may expand to code that
	3270	* segfaults in miniperl */
	3271
	3272	for (e = 0; e <= sys_nerr; e++) {
	3273	errno = 0;
	3274	errmsg = sys_errlist[e];
	3275	if (errno \|\| !errmsg) {
	3276	break;
	3277	}
	3278	errmsg = savepv(errmsg);
	3279	if (! is_utf8_invariant_string((U8 *) errmsg, 0)) {
	3280	non_ascii = TRUE;
	3281	is_utf8 = is_utf8_string((U8 *) errmsg, 0);
	3282	break;
	3283	}
	3284	}
	3285	Safefree(errmsg);
	3286
	3287	/* And, if we changed it, restore LC_MESSAGES to its original locale */
	3288	if (save_messages_locale) {
	3289	do_setlocale_c(LC_MESSAGES, save_messages_locale);
	3290	Safefree(save_messages_locale);
	3291	}
	3292
	3293	if (non_ascii) {
	3294
	3295	/* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
	3296	* any non-ascii means it is one; otherwise we assume it isn't */
	3297	DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n",
	3298	save_input_locale,
	3299	is_utf8));
	3300	Safefree(save_input_locale);
	3301	return is_utf8;
	3302	}
	3303
	3304	DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	3305	}
	3306	cant_use_messages:
	3307
	3308	# endif
	3309	# endif /* the code that is compiled when no nl_langinfo */
	3310
	3311	# ifndef EBCDIC /* On os390, even if the name ends with "UTF-8', it isn't a
	3312	UTF-8 locale */
	3313
	3314	/* As a last resort, look at the locale name to see if it matches
	3315	* qr/UTF -? * 8 /ix, or some other common locale names. This "name", the
	3316	* return of setlocale(), is actually defined to be opaque, so we can't
	3317	* really rely on the absence of various substrings in the name to indicate
	3318	* its UTF-8ness, but if it has UTF8 in the name, it is extremely likely to
	3319	* be a UTF-8 locale. Similarly for the other common names */
	3320
	3321	final_pos = strlen(save_input_locale) - 1;
	3322	if (final_pos >= 3) {
	3323	char *name = save_input_locale;
	3324
	3325	/* Find next 'U' or 'u' and look from there */
	3326	while ((name += strcspn(name, "Uu") + 1)
	3327	<= save_input_locale + final_pos - 2)
	3328	{
	3329	if ( isALPHA_FOLD_NE(*name, 't')
	3330	\|\| isALPHA_FOLD_NE(*(name + 1), 'f'))
	3331	{
	3332	continue;
	3333	}
	3334	name += 2;
	3335	if (*(name) == '-') {
	3336	if ((name > save_input_locale + final_pos - 1)) {
	3337	break;
	3338	}
	3339	name++;
	3340	}
	3341	if (*(name) == '8') {
	3342	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3343	"Locale %s ends with UTF-8 in name\n",
	3344	save_input_locale));
	3345	Safefree(save_input_locale);
	3346	return TRUE;
	3347	}
	3348	}
	3349	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3350	"Locale %s doesn't end with UTF-8 in name\n",
	3351	save_input_locale));
	3352	}
	3353
	3354	# endif
	3355	# ifdef WIN32
	3356
	3357	/* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */
	3358	if (memENDs(save_input_locale, final_pos, "65001")) {
	3359	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3360	"Locale %s ends with 65001 in name, is UTF-8 locale\n",
	3361	save_input_locale));
	3362	Safefree(save_input_locale);
	3363	return TRUE;
	3364	}
	3365
	3366	# endif
	3367
	3368	/* Other common encodings are the ISO 8859 series, which aren't UTF-8. But
	3369	* since we are about to return FALSE anyway, there is no point in doing
	3370	* this extra work */
	3371
	3372	# if 0
	3373	if (instr(save_input_locale, "8859")) {
	3374	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3375	"Locale %s has 8859 in name, not UTF-8 locale\n",
	3376	save_input_locale));
	3377	Safefree(save_input_locale);
	3378	return FALSE;
	3379	}
	3380	# endif
	3381
	3382	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3383	"Assuming locale %s is not a UTF-8 locale\n",
	3384	save_input_locale));
	3385	Safefree(save_input_locale);
	3386	return FALSE;
	3387	}
	3388
	3389	#endif
	3390
	3391
	3392	bool
	3393	Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
	3394	{
	3395	dVAR;
	3396	/* Internal function which returns if we are in the scope of a pragma that
	3397	* enables the locale category 'category'. 'compiling' should indicate if
	3398	* this is during the compilation phase (TRUE) or not (FALSE). */
	3399
	3400	const COP * const cop = (compiling) ? &PL_compiling : PL_curcop;
	3401
	3402	SV *categories = cop_hints_fetch_pvs(cop, "locale", 0);
	3403	if (! categories \|\| categories == &PL_sv_placeholder) {
	3404	return FALSE;
	3405	}
	3406
	3407	/* The pseudo-category 'not_characters' is -1, so just add 1 to each to get
	3408	* a valid unsigned */
	3409	assert(category >= -1);
	3410	return cBOOL(SvUV(categories) & (1U << (category + 1)));
	3411	}
	3412
	3413	char *
	3414	Perl_my_strerror(pTHX_ const int errnum)
	3415	{
	3416	/* Returns a mortalized copy of the text of the error message associated
	3417	* with 'errnum'. It uses the current locale's text unless the platform
	3418	* doesn't have the LC_MESSAGES category or we are not being called from
	3419	* within the scope of 'use locale'. In the former case, it uses whatever
	3420	* strerror returns; in the latter case it uses the text from the C locale.
	3421	*
	3422	* The function just calls strerror(), but temporarily switches, if needed,
	3423	* to the C locale */
	3424
	3425	char *errstr;
	3426	dVAR;
	3427
	3428	#ifndef USE_LOCALE_MESSAGES
	3429
	3430	/* If platform doesn't have messages category, we don't do any switching to
	3431	* the C locale; we just use whatever strerror() returns */
	3432
	3433	errstr = savepv(Strerror(errnum));
	3434
	3435	#else /* Has locale messages */
	3436
	3437	const bool within_locale_scope = IN_LC(LC_MESSAGES);
	3438
	3439	# if defined(HAS_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L)
	3440
	3441	/* This function is trivial if we don't have to worry about thread safety
	3442	* and have strerror_l(), as it handles the switch of locales so we don't
	3443	* have to deal with that. We don't have to worry about thread safety if
	3444	* this is an unthreaded build, or if strerror_r() is also available. Both
	3445	* it and strerror_l() are thread-safe. Plain strerror() isn't thread
	3446	* safe. But on threaded builds when strerror_r() is available, the
	3447	* apparent call to strerror() below is actually a macro that
	3448	* behind-the-scenes calls strerror_r().
	3449	*/
	3450
	3451	# if ! defined(USE_ITHREADS) \|\| defined(HAS_STRERROR_R)
	3452
	3453	if (within_locale_scope) {
	3454	errstr = savepv(strerror(errnum));
	3455	}
	3456	else {
	3457	errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
	3458	}
	3459
	3460	# else
	3461
	3462	/* Here we have strerror_l(), but not strerror_r() and we are on a
	3463	* threaded-build. We use strerror_l() for everything, constructing a
	3464	* locale to pass to it if necessary */
	3465
	3466	bool do_free = FALSE;
	3467	locale_t locale_to_use;
	3468
	3469	if (within_locale_scope) {
	3470	locale_to_use = uselocale((locale_t) 0);
	3471	if (locale_to_use == LC_GLOBAL_LOCALE) {
	3472	locale_to_use = duplocale(LC_GLOBAL_LOCALE);
	3473	do_free = TRUE;
	3474	}
	3475	}
	3476	else { /* Use C locale if not within 'use locale' scope */
	3477	locale_to_use = PL_C_locale_obj;
	3478	}
	3479
	3480	errstr = savepv(strerror_l(errnum, locale_to_use));
	3481
	3482	if (do_free) {
	3483	freelocale(locale_to_use);
	3484	}
	3485
	3486	# endif
	3487	# else /* Doesn't have strerror_l() */
	3488
	3489	# ifdef USE_POSIX_2008_LOCALE
	3490
	3491	locale_t save_locale = NULL;
	3492
	3493	# else
	3494
	3495	char * save_locale = NULL;
	3496	bool locale_is_C = FALSE;
	3497
	3498	/* We have a critical section to prevent another thread from changing the
	3499	* locale out from under us (or zapping the buffer returned from
	3500	* setlocale() ) */
	3501	LOCALE_LOCK;
	3502
	3503	# endif
	3504
	3505	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3506	"my_strerror called with errnum %d\n", errnum));
	3507	if (! within_locale_scope) {
	3508	errno = 0;
	3509
	3510	# ifdef USE_POSIX_2008_LOCALE /* Use the thread-safe locale functions */
	3511
	3512	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3513	"Not within locale scope, about to call"
	3514	" uselocale(0x%p)\n", PL_C_locale_obj));
	3515	save_locale = uselocale(PL_C_locale_obj);
	3516	if (! save_locale) {
	3517	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3518	"uselocale failed, errno=%d\n", errno));
	3519	}
	3520	else {
	3521	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3522	"uselocale returned 0x%p\n", save_locale));
	3523	}
	3524
	3525	# else /* Not thread-safe build */
	3526
	3527	save_locale = do_setlocale_c(LC_MESSAGES, NULL);
	3528	if (! save_locale) {
	3529	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3530	"setlocale failed, errno=%d\n", errno));
	3531	}
	3532	else {
	3533	locale_is_C = isNAME_C_OR_POSIX(save_locale);
	3534
	3535	/* Switch to the C locale if not already in it */
	3536	if (! locale_is_C) {
	3537
	3538	/* The setlocale() just below likely will zap 'save_locale', so
	3539	* create a copy. */
	3540	save_locale = savepv(save_locale);
	3541	do_setlocale_c(LC_MESSAGES, "C");
	3542	}
	3543	}
	3544
	3545	# endif
	3546
	3547	} /* end of ! within_locale_scope */
	3548	else {
	3549	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s: %d: WITHIN locale scope\n",
	3550	__FILE__, __LINE__));
	3551	}
	3552
	3553	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3554	"Any locale change has been done; about to call Strerror\n"));
	3555	errstr = savepv(Strerror(errnum));
	3556
	3557	if (! within_locale_scope) {
	3558	errno = 0;
	3559
	3560	# ifdef USE_POSIX_2008_LOCALE
	3561
	3562	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3563	"%s: %d: not within locale scope, restoring the locale\n",
	3564	__FILE__, __LINE__));
	3565	if (save_locale && ! uselocale(save_locale)) {
	3566	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3567	"uselocale restore failed, errno=%d\n", errno));
	3568	}
	3569	}
	3570
	3571	# else
	3572
	3573	if (save_locale && ! locale_is_C) {
	3574	if (! do_setlocale_c(LC_MESSAGES, save_locale)) {
	3575	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3576	"setlocale restore failed, errno=%d\n", errno));
	3577	}
	3578	Safefree(save_locale);
	3579	}
	3580	}
	3581
	3582	LOCALE_UNLOCK;
	3583
	3584	# endif
	3585	# endif /* End of doesn't have strerror_l */
	3586	#endif /* End of does have locale messages */
	3587
	3588	#ifdef DEBUGGING
	3589
	3590	if (DEBUG_Lv_TEST) {
	3591	PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '");
	3592	print_bytes_for_locale(errstr, errstr + strlen(errstr), 0);
	3593	PerlIO_printf(Perl_debug_log, "'\n");
	3594	}
	3595
	3596	#endif
	3597
	3598	SAVEFREEPV(errstr);
	3599	return errstr;
	3600	}
	3601
	3602	/*
	3603
	3604	=for apidoc sync_locale
	3605
	3606	Changing the program's locale should be avoided by XS code. Nevertheless,
	3607	certain non-Perl libraries called from XS, such as C<Gtk> do so. When this
	3608	happens, Perl needs to be told that the locale has changed. Use this function
	3609	to do so, before returning to Perl.
	3610
	3611	=cut
	3612	*/
	3613
	3614	void
	3615	Perl_sync_locale(pTHX)
	3616	{
	3617	char * newlocale;
	3618
	3619	#ifdef USE_LOCALE_CTYPE
	3620
	3621	newlocale = do_setlocale_c(LC_CTYPE, NULL);
	3622	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3623	"%s:%d: %s\n", __FILE__, __LINE__,
	3624	setlocale_debug_string(LC_CTYPE, NULL, newlocale)));
	3625	new_ctype(newlocale);
	3626
	3627	#endif /* USE_LOCALE_CTYPE */
	3628	#ifdef USE_LOCALE_COLLATE
	3629
	3630	newlocale = do_setlocale_c(LC_COLLATE, NULL);
	3631	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3632	"%s:%d: %s\n", __FILE__, __LINE__,
	3633	setlocale_debug_string(LC_COLLATE, NULL, newlocale)));
	3634	new_collate(newlocale);
	3635
	3636	#endif
	3637	#ifdef USE_LOCALE_NUMERIC
	3638
	3639	newlocale = do_setlocale_c(LC_NUMERIC, NULL);
	3640	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	3641	"%s:%d: %s\n", __FILE__, __LINE__,
	3642	setlocale_debug_string(LC_NUMERIC, NULL, newlocale)));
	3643	new_numeric(newlocale);
	3644
	3645	#endif /* USE_LOCALE_NUMERIC */
	3646
	3647	}
	3648
	3649	#if defined(DEBUGGING) && defined(USE_LOCALE)
	3650
	3651	STATIC char *
	3652	S_setlocale_debug_string(const int category, /* category number,
	3653	like LC_ALL */
	3654	const char* const locale, /* locale name */
	3655
	3656	/* return value from setlocale() when attempting to
	3657	* set 'category' to 'locale' */
	3658	const char* const retval)
	3659	{
	3660	/* Returns a pointer to a NUL-terminated string in static storage with
	3661	* added text about the info passed in. This is not thread safe and will
	3662	* be overwritten by the next call, so this should be used just to
	3663	* formulate a string to immediately print or savepv() on. */
	3664
	3665	/* initialise to a non-null value to keep it out of BSS and so keep
	3666	* -DPERL_GLOBAL_STRUCT_PRIVATE happy */
	3667	static char ret[128] = "If you can read this, thank your buggy C"
	3668	" library strlcpy(), and change your hints file"
	3669	" to undef it";
	3670
	3671	my_strlcpy(ret, "setlocale(", sizeof(ret));
	3672	my_strlcat(ret, category_name(category), sizeof(ret));
	3673	my_strlcat(ret, ", ", sizeof(ret));
	3674
	3675	if (locale) {
	3676	my_strlcat(ret, "\"", sizeof(ret));
	3677	my_strlcat(ret, locale, sizeof(ret));
	3678	my_strlcat(ret, "\"", sizeof(ret));
	3679	}
	3680	else {
	3681	my_strlcat(ret, "NULL", sizeof(ret));
	3682	}
	3683
	3684	my_strlcat(ret, ") returned ", sizeof(ret));
	3685
	3686	if (retval) {
	3687	my_strlcat(ret, "\"", sizeof(ret));
	3688	my_strlcat(ret, retval, sizeof(ret));
	3689	my_strlcat(ret, "\"", sizeof(ret));
	3690	}
	3691	else {
	3692	my_strlcat(ret, "NULL", sizeof(ret));
	3693	}
	3694
	3695	assert(strlen(ret) < sizeof(ret));
	3696
	3697	return ret;
	3698	}
	3699
	3700	#endif
	3701
	3702
	3703	/*
	3704	* ex: set ts=8 sts=4 sw=4 et:
	3705	*/