perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* locale.c
	2	*
	3	* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
	4	* 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* A Elbereth Gilthoniel,
	13	* silivren penna míriel
	14	* o menel aglar elenath!
	15	* Na-chaered palan-díriel
	16	* o galadhremmin ennorath,
	17	* Fanuilos, le linnathon
	18	* nef aear, si nef aearon!
	19	*
	20	* [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"]
	21	*/
	22
	23	/* utility functions for handling locale-specific stuff like what
	24	* character represents the decimal point.
	25	*
	26	* All C programs have an underlying locale. Perl code generally doesn't pay
	27	* any attention to it except within the scope of a 'use locale'. For most
	28	* categories, it accomplishes this by just using different operations if it is
	29	* in such scope than if not. However, various libc functions called by Perl
	30	* are affected by the LC_NUMERIC category, so there are macros in perl.h that
	31	* are used to toggle between the current locale and the C locale depending on
	32	* the desired behavior of those functions at the moment. And, LC_MESSAGES is
	33	* switched to the C locale for outputting the message unless within the scope
	34	* of 'use locale'.
	35	*
	36	* This code now has multi-thread-safe locale handling on systems that support
	37	* that. This is completely transparent to most XS code. On earlier systems,
	38	* it would be possible to emulate thread-safe locales, but this likely would
	39	* involve a lot of locale switching, and would require XS code changes.
	40	* Macros could be written so that the code wouldn't have to know which type of
	41	* system is being used. It's unlikely that we would ever do that, since most
	42	* modern systems support thread-safe locales, but there was code written to
	43	* this end, and is retained, #ifdef'd out.
	44	*/
	45
	46	#include "EXTERN.h"
	47	#define PERL_IN_LOCALE_C
	48	#include "perl_langinfo.h"
	49	#include "perl.h"
	50
	51	#include "reentr.h"
	52
	53	#ifdef I_WCHAR
	54	# include <wchar.h>
	55	#endif
	56	#ifdef I_WCTYPE
	57	# include <wctype.h>
	58	#endif
	59
	60	/* If the environment says to, we can output debugging information during
	61	* initialization. This is done before option parsing, and before any thread
	62	* creation, so can be a file-level static */
	63	#if ! defined(DEBUGGING) \|\| defined(PERL_GLOBAL_STRUCT)
	64	# define debug_initialization 0
	65	# define DEBUG_INITIALIZATION_set(v)
	66	#else
	67	static bool debug_initialization = FALSE;
	68	# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v)
	69	#endif
	70
	71
	72	/* Returns the Unix errno portion; ignoring any others. This is a macro here
	73	* instead of putting it into perl.h, because unclear to khw what should be
	74	* done generally. */
	75	#define GET_ERRNO saved_errno
	76
	77	/* strlen() of a literal string constant. We might want this more general,
	78	* but using it in just this file for now. A problem with more generality is
	79	* the compiler warnings about comparing unlike signs */
	80	#define STRLENs(s) (sizeof("" s "") - 1)
	81
	82	/* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the
	83	* return of setlocale(), then this is extremely likely to be the C or POSIX
	84	* locale. However, the output of setlocale() is documented to be opaque, but
	85	* the odds are extremely small that it would return these two strings for some
	86	* other locale. Note that VMS in these two locales includes many non-ASCII
	87	* characters as controls and punctuation (below are hex bytes):
	88	* cntrl: 84-97 9B-9F
	89	* punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD
	90	* Oddly, none there are listed as alphas, though some represent alphabetics
	91	* http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */
	92	#define isNAME_C_OR_POSIX(name) \
	93	( (name) != NULL \
	94	&& (( (name) == 'C' && ((name + 1)) == '\0') \
	95	\|\| strEQ((name), "POSIX")))
	96
	97	#ifdef USE_LOCALE
	98
	99	/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
	100	* looked up. This is in the form of a C string: */
	101
	102	#define UTF8NESS_SEP "\v"
	103	#define UTF8NESS_PREFIX "\f"
	104
	105	/* So, the string looks like:
	106	*
	107	* \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0
	108	*
	109	* where the digit 0 after the \a indicates that the locale starting just
	110	* after the preceding \v is not UTF-8, and the digit 1 mean it is. */
	111
	112	STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
	113	STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
	114
	115	#define C_and_POSIX_utf8ness UTF8NESS_SEP "C" UTF8NESS_PREFIX "0" \
	116	UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
	117
	118	/* The cache is initialized to C_and_POSIX_utf8ness at start up. These are
	119	* kept there always. The remining portion of the cache is LRU, with the
	120	* oldest looked-up locale at the tail end */
	121
	122	STATIC char *
	123	S_stdize_locale(pTHX_ char *locs)
	124	{
	125	/* Standardize the locale name from a string returned by 'setlocale',
	126	* possibly modifying that string.
	127	*
	128	* The typical return value of setlocale() is either
	129	* (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
	130	* (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
	131	* (the space-separated values represent the various sublocales,
	132	* in some unspecified order). This is not handled by this function.
	133	*
	134	* In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
	135	* which is harmful for further use of the string in setlocale(). This
	136	* function removes the trailing new line and everything up through the '='
	137	* */
	138
	139	const char * const s = strchr(locs, '=');
	140	bool okay = TRUE;
	141
	142	PERL_ARGS_ASSERT_STDIZE_LOCALE;
	143
	144	if (s) {
	145	const char * const t = strchr(s, '.');
	146	okay = FALSE;
	147	if (t) {
	148	const char * const u = strchr(t, '\n');
	149	if (u && (u[1] == 0)) {
	150	const STRLEN len = u - s;
	151	Move(s + 1, locs, len, char);
	152	locs[len] = 0;
	153	okay = TRUE;
	154	}
	155	}
	156	}
	157
	158	if (!okay)
	159	Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
	160
	161	return locs;
	162	}
	163
	164	/* Two parallel arrays; first the locale categories Perl uses on this system;
	165	* the second array is their names. These arrays are in mostly arbitrary
	166	* order. */
	167
	168	const int categories[] = {
	169
	170	# ifdef USE_LOCALE_NUMERIC
	171	LC_NUMERIC,
	172	# endif
	173	# ifdef USE_LOCALE_CTYPE
	174	LC_CTYPE,
	175	# endif
	176	# ifdef USE_LOCALE_COLLATE
	177	LC_COLLATE,
	178	# endif
	179	# ifdef USE_LOCALE_TIME
	180	LC_TIME,
	181	# endif
	182	# ifdef USE_LOCALE_MESSAGES
	183	LC_MESSAGES,
	184	# endif
	185	# ifdef USE_LOCALE_MONETARY
	186	LC_MONETARY,
	187	# endif
	188	# ifdef USE_LOCALE_ADDRESS
	189	LC_ADDRESS,
	190	# endif
	191	# ifdef USE_LOCALE_IDENTIFICATION
	192	LC_IDENTIFICATION,
	193	# endif
	194	# ifdef USE_LOCALE_MEASUREMENT
	195	LC_MEASUREMENT,
	196	# endif
	197	# ifdef USE_LOCALE_PAPER
	198	LC_PAPER,
	199	# endif
	200	# ifdef USE_LOCALE_TELEPHONE
	201	LC_TELEPHONE,
	202	# endif
	203	# ifdef LC_ALL
	204	LC_ALL,
	205	# endif
	206	-1 /* Placeholder because C doesn't allow a
	207	trailing comma, and it would get complicated
	208	with all the #ifdef's */
	209	};
	210
	211	/* The top-most real element is LC_ALL */
	212
	213	const char * const category_names[] = {
	214
	215	# ifdef USE_LOCALE_NUMERIC
	216	"LC_NUMERIC",
	217	# endif
	218	# ifdef USE_LOCALE_CTYPE
	219	"LC_CTYPE",
	220	# endif
	221	# ifdef USE_LOCALE_COLLATE
	222	"LC_COLLATE",
	223	# endif
	224	# ifdef USE_LOCALE_TIME
	225	"LC_TIME",
	226	# endif
	227	# ifdef USE_LOCALE_MESSAGES
	228	"LC_MESSAGES",
	229	# endif
	230	# ifdef USE_LOCALE_MONETARY
	231	"LC_MONETARY",
	232	# endif
	233	# ifdef USE_LOCALE_ADDRESS
	234	"LC_ADDRESS",
	235	# endif
	236	# ifdef USE_LOCALE_IDENTIFICATION
	237	"LC_IDENTIFICATION",
	238	# endif
	239	# ifdef USE_LOCALE_MEASUREMENT
	240	"LC_MEASUREMENT",
	241	# endif
	242	# ifdef USE_LOCALE_PAPER
	243	"LC_PAPER",
	244	# endif
	245	# ifdef USE_LOCALE_TELEPHONE
	246	"LC_TELEPHONE",
	247	# endif
	248	# ifdef LC_ALL
	249	"LC_ALL",
	250	# endif
	251	NULL /* Placeholder */
	252	};
	253
	254	# ifdef LC_ALL
	255
	256	/* On systems with LC_ALL, it is kept in the highest index position. (-2
	257	* to account for the final unused placeholder element.) */
	258	# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
	259
	260	# else
	261
	262	/* On systems without LC_ALL, we pretend it is there, one beyond the real
	263	* top element, hence in the unused placeholder element. */
	264	# define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
	265
	266	# endif
	267
	268	/* Pretending there is an LC_ALL element just above allows us to avoid most
	269	* special cases. Most loops through these arrays in the code below are
	270	* written like 'for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++)'. They will work
	271	* on either type of system. But the code must be written to not access the
	272	* element at 'LC_ALL_INDEX' except on platforms that have it. This can be
	273	* checked for at compile time by using the #define LC_ALL_INDEX which is only
	274	* defined if we do have LC_ALL. */
	275
	276	STATIC const char *
	277	S_category_name(const int category)
	278	{
	279	unsigned int i;
	280
	281	#ifdef LC_ALL
	282
	283	if (category == LC_ALL) {
	284	return "LC_ALL";
	285	}
	286
	287	#endif
	288
	289	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	290	if (category == categories[i]) {
	291	return category_names[i];
	292	}
	293	}
	294
	295	{
	296	const char suffix[] = " (unknown)";
	297	int temp = category;
	298	Size_t length = sizeof(suffix) + 1;
	299	char * unknown;
	300	dTHX;
	301
	302	if (temp < 0) {
	303	length++;
	304	temp = - temp;
	305	}
	306
	307	/* Calculate the number of digits */
	308	while (temp >= 10) {
	309	temp /= 10;
	310	length++;
	311	}
	312
	313	Newx(unknown, length, char);
	314	my_snprintf(unknown, length, "%d%s", category, suffix);
	315	SAVEFREEPV(unknown);
	316	return unknown;
	317	}
	318	}
	319
	320	/* Now create LC_foo_INDEX #defines for just those categories on this system */
	321	# ifdef USE_LOCALE_NUMERIC
	322	# define LC_NUMERIC_INDEX 0
	323	# define _DUMMY_NUMERIC LC_NUMERIC_INDEX
	324	# else
	325	# define _DUMMY_NUMERIC -1
	326	# endif
	327	# ifdef USE_LOCALE_CTYPE
	328	# define LC_CTYPE_INDEX _DUMMY_NUMERIC + 1
	329	# define _DUMMY_CTYPE LC_CTYPE_INDEX
	330	# else
	331	# define _DUMMY_CTYPE _DUMMY_NUMERIC
	332	# endif
	333	# ifdef USE_LOCALE_COLLATE
	334	# define LC_COLLATE_INDEX _DUMMY_CTYPE + 1
	335	# define _DUMMY_COLLATE LC_COLLATE_INDEX
	336	# else
	337	# define _DUMMY_COLLATE _DUMMY_CTYPE
	338	# endif
	339	# ifdef USE_LOCALE_TIME
	340	# define LC_TIME_INDEX _DUMMY_COLLATE + 1
	341	# define _DUMMY_TIME LC_TIME_INDEX
	342	# else
	343	# define _DUMMY_TIME _DUMMY_COLLATE
	344	# endif
	345	# ifdef USE_LOCALE_MESSAGES
	346	# define LC_MESSAGES_INDEX _DUMMY_TIME + 1
	347	# define _DUMMY_MESSAGES LC_MESSAGES_INDEX
	348	# else
	349	# define _DUMMY_MESSAGES _DUMMY_TIME
	350	# endif
	351	# ifdef USE_LOCALE_MONETARY
	352	# define LC_MONETARY_INDEX _DUMMY_MESSAGES + 1
	353	# define _DUMMY_MONETARY LC_MONETARY_INDEX
	354	# else
	355	# define _DUMMY_MONETARY _DUMMY_MESSAGES
	356	# endif
	357	# ifdef USE_LOCALE_ADDRESS
	358	# define LC_ADDRESS_INDEX _DUMMY_MONETARY + 1
	359	# define _DUMMY_ADDRESS LC_ADDRESS_INDEX
	360	# else
	361	# define _DUMMY_ADDRESS _DUMMY_MONETARY
	362	# endif
	363	# ifdef USE_LOCALE_IDENTIFICATION
	364	# define LC_IDENTIFICATION_INDEX _DUMMY_ADDRESS + 1
	365	# define _DUMMY_IDENTIFICATION LC_IDENTIFICATION_INDEX
	366	# else
	367	# define _DUMMY_IDENTIFICATION _DUMMY_ADDRESS
	368	# endif
	369	# ifdef USE_LOCALE_MEASUREMENT
	370	# define LC_MEASUREMENT_INDEX _DUMMY_IDENTIFICATION + 1
	371	# define _DUMMY_MEASUREMENT LC_MEASUREMENT_INDEX
	372	# else
	373	# define _DUMMY_MEASUREMENT _DUMMY_IDENTIFICATION
	374	# endif
	375	# ifdef USE_LOCALE_PAPER
	376	# define LC_PAPER_INDEX _DUMMY_MEASUREMENT + 1
	377	# define _DUMMY_PAPER LC_PAPER_INDEX
	378	# else
	379	# define _DUMMY_PAPER _DUMMY_MEASUREMENT
	380	# endif
	381	# ifdef USE_LOCALE_TELEPHONE
	382	# define LC_TELEPHONE_INDEX _DUMMY_PAPER + 1
	383	# define _DUMMY_TELEPHONE LC_TELEPHONE_INDEX
	384	# else
	385	# define _DUMMY_TELEPHONE _DUMMY_PAPER
	386	# endif
	387	# ifdef LC_ALL
	388	# define LC_ALL_INDEX _DUMMY_TELEPHONE + 1
	389	# endif
	390	#endif /* ifdef USE_LOCALE */
	391
	392	/* Windows requres a customized base-level setlocale() */
	393	#ifdef WIN32
	394	# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
	395	#else
	396	# define my_setlocale(cat, locale) setlocale(cat, locale)
	397	#endif
	398
	399	#ifndef USE_POSIX_2008_LOCALE
	400
	401	/* "do_setlocale_c" is intended to be called when the category is a constant
	402	* known at compile time; "do_setlocale_r", not known until run time */
	403	# define do_setlocale_c(cat, locale) my_setlocale(cat, locale)
	404	# define do_setlocale_r(cat, locale) my_setlocale(cat, locale)
	405
	406	#else /* Below uses POSIX 2008 */
	407
	408	/* We emulate setlocale with our own function. LC_foo is not valid for the
	409	* POSIX 2008 functions. Instead LC_foo_MASK is used, which we use an array
	410	* lookup to convert to. At compile time we have defined LC_foo_INDEX as the
	411	* proper offset into the array 'category_masks[]'. At runtime, we have to
	412	* search through the array (as the actual numbers may not be small contiguous
	413	* positive integers which would lend themselves to array lookup). */
	414	# define do_setlocale_c(cat, locale) \
	415	emulate_setlocale(cat, locale, cat ## _INDEX, TRUE)
	416	# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE)
	417
	418	/* A third array, parallel to the ones above to map from category to its
	419	* equivalent mask */
	420	const int category_masks[] = {
	421	# ifdef USE_LOCALE_NUMERIC
	422	LC_NUMERIC_MASK,
	423	# endif
	424	# ifdef USE_LOCALE_CTYPE
	425	LC_CTYPE_MASK,
	426	# endif
	427	# ifdef USE_LOCALE_COLLATE
	428	LC_COLLATE_MASK,
	429	# endif
	430	# ifdef USE_LOCALE_TIME
	431	LC_TIME_MASK,
	432	# endif
	433	# ifdef USE_LOCALE_MESSAGES
	434	LC_MESSAGES_MASK,
	435	# endif
	436	# ifdef USE_LOCALE_MONETARY
	437	LC_MONETARY_MASK,
	438	# endif
	439	# ifdef USE_LOCALE_ADDRESS
	440	LC_ADDRESS_MASK,
	441	# endif
	442	# ifdef USE_LOCALE_IDENTIFICATION
	443	LC_IDENTIFICATION_MASK,
	444	# endif
	445	# ifdef USE_LOCALE_MEASUREMENT
	446	LC_MEASUREMENT_MASK,
	447	# endif
	448	# ifdef USE_LOCALE_PAPER
	449	LC_PAPER_MASK,
	450	# endif
	451	# ifdef USE_LOCALE_TELEPHONE
	452	LC_TELEPHONE_MASK,
	453	# endif
	454	/* LC_ALL can't be turned off by a Configure
	455	* option, and in Posix 2008, should always be
	456	* here, so compile it in unconditionally.
	457	* This could catch some glitches at compile
	458	* time */
	459	LC_ALL_MASK
	460	};
	461
	462	STATIC const char *
	463	S_emulate_setlocale(const int category,
	464	const char * locale,
	465	unsigned int index,
	466	const bool is_index_valid
	467	)
	468	{
	469	/* This function effectively performs a setlocale() on just the current
	470	* thread; thus it is thread-safe. It does this by using the POSIX 2008
	471	* locale functions to emulate the behavior of setlocale(). Similar to
	472	* regular setlocale(), the return from this function points to memory that
	473	* can be overwritten by other system calls, so needs to be copied
	474	* immediately if you need to retain it. The difference here is that
	475	* system calls besides another setlocale() can overwrite it.
	476	*
	477	* By doing this, most locale-sensitive functions become thread-safe. The
	478	* exceptions are mostly those that return a pointer to static memory.
	479	*
	480	* This function takes the same parameters, 'category' and 'locale', that
	481	* the regular setlocale() function does, but it also takes two additional
	482	* ones. This is because the 2008 functions don't use a category; instead
	483	* they use a corresponding mask. Because this function operates in both
	484	* worlds, it may need one or the other or both. This function can
	485	* calculate the mask from the input category, but to avoid this
	486	* calculation, if the caller knows at compile time what the mask is, it
	487	* can pass it, setting 'is_index_valid' to TRUE; otherwise the mask
	488	* parameter is ignored.
	489	*
	490	* POSIX 2008, for some sick reason, chose not to provide a method to find
	491	* the category name of a locale. Some vendors have created a
	492	* querylocale() function to do just that. This function is a lot simpler
	493	* to implement on systems that have this. Otherwise, we have to keep
	494	* track of what the locale has been set to, so that we can return its
	495	* name to emulate setlocale(). It's also possible for C code in some
	496	* library to change the locale without us knowing it, though as of
	497	* September 2017, there are no occurrences in CPAN of uselocale(). Some
	498	* libraries do use setlocale(), but that changes the global locale, and
	499	* threads using per-thread locales will just ignore those changes.
	500	* Another problem is that without querylocale(), we have to guess at what
	501	* was meant by setting a locale of "". We handle this by not actually
	502	* ever setting to "" (unless querylocale exists), but to emulate what we
	503	* think should happen for "".
	504	*/
	505
	506	int mask;
	507	locale_t old_obj;
	508	locale_t new_obj;
	509	dTHX;
	510
	511	# ifdef DEBUGGING
	512
	513	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	514	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale input=%d (%s), \"%s\", %d, %d\n", __FILE__, __LINE__, category, category_name(category), locale, index, is_index_valid);
	515	}
	516
	517	# endif
	518
	519	/* If the input mask might be incorrect, calculate the correct one */
	520	if (! is_index_valid) {
	521	unsigned int i;
	522
	523	# ifdef DEBUGGING
	524
	525	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	526	PerlIO_printf(Perl_debug_log, "%s:%d: finding index of category %d (%s)\n", __FILE__, __LINE__, category, category_name(category));
	527	}
	528
	529	# endif
	530
	531	for (i = 0; i <= LC_ALL_INDEX; i++) {
	532	if (category == categories[i]) {
	533	index = i;
	534	goto found_index;
	535	}
	536	}
	537
	538	/* Here, we don't know about this category, so can't handle it.
	539	* Fallback to the early POSIX usages */
	540	Perl_warner(aTHX_ packWARN(WARN_LOCALE),
	541	"Unknown locale category %d; can't set it to %s\n",
	542	category, locale);
	543	return NULL;
	544
	545	found_index: ;
	546
	547	# ifdef DEBUGGING
	548
	549	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	550	PerlIO_printf(Perl_debug_log, "%s:%d: index is %d for %s\n", __FILE__, __LINE__, index, category_name(category));
	551	}
	552
	553	# endif
	554
	555	}
	556
	557	mask = category_masks[index];
	558
	559	# ifdef DEBUGGING
	560
	561	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	562	PerlIO_printf(Perl_debug_log, "%s:%d: category name is %s; mask is 0x%x\n", __FILE__, __LINE__, category_names[index], mask);
	563	}
	564
	565	# endif
	566
	567	/* If just querying what the existing locale is ... */
	568	if (locale == NULL) {
	569	locale_t cur_obj = uselocale((locale_t) 0);
	570
	571	# ifdef DEBUGGING
	572
	573	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	574	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale querying %p\n", __FILE__, __LINE__, cur_obj);
	575	}
	576
	577	# endif
	578
	579	if (cur_obj == LC_GLOBAL_LOCALE) {
	580	return my_setlocale(category, NULL);
	581	}
	582
	583	# ifdef HAS_QUERYLOCALE
	584
	585	return (char *) querylocale(mask, cur_obj);
	586
	587	# else
	588
	589	/* If this assert fails, adjust the size of curlocales in intrpvar.h */
	590	STATIC_ASSERT_STMT(C_ARRAY_LENGTH(PL_curlocales) > LC_ALL_INDEX);
	591
	592	# if defined(_NL_LOCALE_NAME) \
	593	&& defined(DEBUGGING) \
	594	&& ! defined(SETLOCALE_ACCEPTS_ANY_LOCALE_NAME)
	595	/* On systems that accept any locale name, the real underlying locale
	596	* is often returned by this internal function, so we can't use it */
	597	{
	598	/* Internal glibc for querylocale(), but doesn't handle
	599	* empty-string ("") locale properly; who knows what other
	600	* glitches. Check for it now, under debug. */
	601
	602	char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
	603	uselocale((locale_t) 0));
	604	/*
	605	PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL");
	606	PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index);
	607	PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]);
	608	*/
	609	if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) {
	610	if ( strNE(PL_curlocales[index], temp_name)
	611	&& ! ( isNAME_C_OR_POSIX(temp_name)
	612	&& isNAME_C_OR_POSIX(PL_curlocales[index]))) {
	613
	614	# ifdef USE_C_BACKTRACE
	615
	616	dump_c_backtrace(Perl_debug_log, 20, 1);
	617
	618	# endif
	619
	620	Perl_croak(aTHX_ "panic: Mismatch between what Perl thinks %s is"
	621	" (%s) and what internal glibc thinks"
	622	" (%s)\n", category_names[index],
	623	PL_curlocales[index], temp_name);
	624	}
	625
	626	return temp_name;
	627	}
	628	}
	629
	630	# endif
	631
	632	/* Without querylocale(), we have to use our record-keeping we've
	633	* done. */
	634
	635	if (category != LC_ALL) {
	636
	637	# ifdef DEBUGGING
	638
	639	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	640	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[index]);
	641	}
	642
	643	# endif
	644
	645	return PL_curlocales[index];
	646	}
	647	else { /* For LC_ALL */
	648	unsigned int i;
	649	Size_t names_len = 0;
	650	char * all_string;
	651	bool are_all_categories_the_same_locale = TRUE;
	652
	653	/* If we have a valid LC_ALL value, just return it */
	654	if (PL_curlocales[LC_ALL_INDEX]) {
	655
	656	# ifdef DEBUGGING
	657
	658	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	659	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX]);
	660	}
	661
	662	# endif
	663
	664	return PL_curlocales[LC_ALL_INDEX];
	665	}
	666
	667	/* Otherwise, we need to construct a string of name=value pairs.
	668	* We use the glibc syntax, like
	669	* LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
	670	* First calculate the needed size. Along the way, check if all
	671	* the locale names are the same */
	672	for (i = 0; i < LC_ALL_INDEX; i++) {
	673
	674	# ifdef DEBUGGING
	675
	676	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	677	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
	678	}
	679
	680	# endif
	681
	682	names_len += strlen(category_names[i])
	683	+ 1 /* '=' */
	684	+ strlen(PL_curlocales[i])
	685	+ 1; /* ';' */
	686
	687	if (i > 0 && strNE(PL_curlocales[i], PL_curlocales[i-1])) {
	688	are_all_categories_the_same_locale = FALSE;
	689	}
	690	}
	691
	692	/* If they are the same, we don't actually have to construct the
	693	* string; we just make the entry in LC_ALL_INDEX valid, and be
	694	* that single name */
	695	if (are_all_categories_the_same_locale) {
	696	PL_curlocales[LC_ALL_INDEX] = savepv(PL_curlocales[0]);
	697	return PL_curlocales[LC_ALL_INDEX];
	698	}
	699
	700	names_len++; /* Trailing '\0' */
	701	SAVEFREEPV(Newx(all_string, names_len, char));
	702	*all_string = '\0';
	703
	704	/* Then fill in the string */
	705	for (i = 0; i < LC_ALL_INDEX; i++) {
	706
	707	# ifdef DEBUGGING
	708
	709	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	710	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
	711	}
	712
	713	# endif
	714
	715	my_strlcat(all_string, category_names[i], names_len);
	716	my_strlcat(all_string, "=", names_len);
	717	my_strlcat(all_string, PL_curlocales[i], names_len);
	718	my_strlcat(all_string, ";", names_len);
	719	}
	720
	721	# ifdef DEBUGGING
	722
	723	if (DEBUG_L_TEST \|\| debug_initialization) {
	724	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, all_string);
	725	}
	726
	727	#endif
	728
	729	return all_string;
	730	}
	731
	732	# ifdef EINVAL
	733
	734	SETERRNO(EINVAL, LIB_INVARG);
	735
	736	# endif
	737
	738	return NULL;
	739
	740	# endif
	741
	742	} /* End of this being setlocale(LC_foo, NULL) */
	743
	744	/* Here, we are switching locales. */
	745
	746	# ifndef HAS_QUERYLOCALE
	747
	748	if (strEQ(locale, "")) {
	749
	750	/* For non-querylocale() systems, we do the setting of "" ourselves to
	751	* be sure that we really know what's going on. We follow the Linux
	752	* documented behavior (but if that differs from the actual behavior,
	753	* this won't work exactly as the OS implements). We go out and
	754	* examine the environment based on our understanding of how the system
	755	* works, and use that to figure things out */
	756
	757	const char * const lc_all = PerlEnv_getenv("LC_ALL");
	758
	759	/* Use any "LC_ALL" environment variable, as it overrides everything
	760	* else. */
	761	if (lc_all && strNE(lc_all, "")) {
	762	locale = lc_all;
	763	}
	764	else {
	765
	766	/* Otherwise, we need to dig deeper. Unless overridden, the
	767	* default is the LANG environment variable; if it doesn't exist,
	768	* then "C" */
	769
	770	const char * default_name;
	771
	772	default_name = PerlEnv_getenv("LANG");
	773
	774	if (! default_name \|\| strEQ(default_name, "")) {
	775	default_name = "C";
	776	}
	777	else if (PL_scopestack_ix != 0) {
	778	/* To minimize other threads messing with the environment,
	779	* we copy the variable, making it a temporary. But this
	780	* doesn't work upon program initialization before any
	781	* scopes are created, and at this time, there's nothing
	782	* else going on that would interfere. So skip the copy
	783	* in that case */
	784	default_name = savepv(default_name);
	785	SAVEFREEPV(default_name);
	786	}
	787
	788	if (category != LC_ALL) {
	789	const char * const name = PerlEnv_getenv(category_names[index]);
	790
	791	/* Here we are setting a single category. Assume will have the
	792	* default name */
	793	locale = default_name;
	794
	795	/* But then look for an overriding environment variable */
	796	if (name && strNE(name, "")) {
	797	locale = name;
	798	}
	799	}
	800	else {
	801	bool did_override = FALSE;
	802	unsigned int i;
	803
	804	/* Here, we are getting LC_ALL. Any categories that don't have
	805	* a corresponding environment variable set should be set to
	806	* LANG, or to "C" if there is no LANG. If no individual
	807	* categories differ from this, we can just set LC_ALL. This
	808	* is buggy on systems that have extra categories that we don't
	809	* know about. If there is an environment variable that sets
	810	* that category, we won't know to look for it, and so our use
	811	* of LANG or "C" improperly overrides it. On the other hand,
	812	* if we don't do what is done here, and there is no
	813	* environment variable, the category's locale should be set to
	814	* LANG or "C". So there is no good solution. khw thinks the
	815	* best is to look at systems to see what categories they have,
	816	* and include them, and then to assume that we know the
	817	* complete set */
	818
	819	for (i = 0; i < LC_ALL_INDEX; i++) {
	820	const char * const env_override
	821	= savepv(PerlEnv_getenv(category_names[i]));
	822	const char * this_locale = ( env_override
	823	&& strNE(env_override, ""))
	824	? env_override
	825	: default_name;
	826	if (! emulate_setlocale(categories[i], this_locale, i, TRUE))
	827	{
	828	Safefree(env_override);
	829	return NULL;
	830	}
	831
	832	if (strNE(this_locale, default_name)) {
	833	did_override = TRUE;
	834	}
	835
	836	Safefree(env_override);
	837	}
	838
	839	/* If all the categories are the same, we can set LC_ALL to
	840	* that */
	841	if (! did_override) {
	842	locale = default_name;
	843	}
	844	else {
	845
	846	/* Here, LC_ALL is no longer valid, as some individual
	847	* categories don't match it. We call ourselves
	848	* recursively, as that will execute the code that
	849	* generates the proper locale string for this situation.
	850	* We don't do the remainder of this function, as that is
	851	* to update our records, and we've just done that for the
	852	* individual categories in the loop above, and doing so
	853	* would cause LC_ALL to be done as well */
	854	return emulate_setlocale(LC_ALL, NULL, LC_ALL_INDEX, TRUE);
	855	}
	856	}
	857	}
	858	} /* End of this being setlocale(LC_foo, "") */
	859	else if (strchr(locale, ';')) {
	860
	861	/* LC_ALL may actually incude a conglomeration of various categories.
	862	* Without querylocale, this code uses the glibc (as of this writing)
	863	* syntax for representing that, but that is not a stable API, and
	864	* other platforms do it differently, so we have to handle all cases
	865	* ourselves */
	866
	867	unsigned int i;
	868	const char * s = locale;
	869	const char * e = locale + strlen(locale);
	870	const char * p = s;
	871	const char * category_end;
	872	const char * name_start;
	873	const char * name_end;
	874
	875	/* If the string that gives what to set doesn't include all categories,
	876	* the omitted ones get set to "C". To get this behavior, first set
	877	* all the individual categories to "C", and override the furnished
	878	* ones below */
	879	for (i = 0; i < LC_ALL_INDEX; i++) {
	880	if (! emulate_setlocale(categories[i], "C", i, TRUE)) {
	881	return NULL;
	882	}
	883	}
	884
	885	while (s < e) {
	886
	887	/* Parse through the category */
	888	while (isWORDCHAR(*p)) {
	889	p++;
	890	}
	891	category_end = p;
	892
	893	if (*p++ != '=') {
	894	Perl_croak(aTHX_
	895	"panic: %s: %d: Unexpected character in locale name '%02X",
	896	__FILE__, __LINE__, *(p-1));
	897	}
	898
	899	/* Parse through the locale name */
	900	name_start = p;
	901	while (p < e && *p != ';') {
	902	if (! isGRAPH(*p)) {
	903	Perl_croak(aTHX_
	904	"panic: %s: %d: Unexpected character in locale name '%02X",
	905	__FILE__, __LINE__, *(p-1));
	906	}
	907	p++;
	908	}
	909	name_end = p;
	910
	911	/* Space past the semi-colon */
	912	if (p < e) {
	913	p++;
	914	}
	915
	916	/* Find the index of the category name in our lists */
	917	for (i = 0; i < LC_ALL_INDEX; i++) {
	918	char * individ_locale;
	919
	920	/* Keep going if this isn't the index. The strnNE() avoids a
	921	* Perl_form(), but would fail if ever a category name could be
	922	* a substring of another one, like if there were a
	923	* "LC_TIME_DATE" */
	924	if strnNE(s, category_names[i], category_end - s) {
	925	continue;
	926	}
	927
	928	/* If this index is for the single category we're changing, we
	929	* have found the locale to set it to. */
	930	if (category == categories[i]) {
	931	locale = Perl_form(aTHX_ "%.*s",
	932	(int) (name_end - name_start),
	933	name_start);
	934	goto ready_to_set;
	935	}
	936
	937	assert(category == LC_ALL);
	938	individ_locale = Perl_form(aTHX_ "%.*s",
	939	(int) (name_end - name_start), name_start);
	940	if (! emulate_setlocale(categories[i], individ_locale, i, TRUE))
	941	{
	942	return NULL;
	943	}
	944	}
	945
	946	s = p;
	947	}
	948
	949	/* Here we have set all the individual categories by recursive calls.
	950	* These collectively should have fixed up LC_ALL, so can just query
	951	* what that now is */
	952	assert(category == LC_ALL);
	953
	954	return do_setlocale_c(LC_ALL, NULL);
	955	} /* End of this being setlocale(LC_ALL,
	956	"LC_CTYPE=foo;LC_NUMERIC=bar;...") */
	957
	958	ready_to_set: ;
	959
	960	/* Here at the end of having to deal with the absence of querylocale().
	961	* Some cases have already been fully handled by recursive calls to this
	962	* function. But at this point, we haven't dealt with those, but are now
	963	* prepared to, knowing what the locale name to set this category to is.
	964	* This would have come for free if this system had had querylocale() */
	965
	966	# endif /* end of ! querylocale */
	967
	968	assert(PL_C_locale_obj);
	969
	970	/* Switching locales generally entails freeing the current one's space (at
	971	* the C library's discretion). We need to stop using that locale before
	972	* the switch. So switch to a known locale object that we don't otherwise
	973	* mess with. This returns the locale object in effect at the time of the
	974	* switch. */
	975	old_obj = uselocale(PL_C_locale_obj);
	976
	977	# ifdef DEBUGGING
	978
	979	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	980	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj);
	981	}
	982
	983	# endif
	984
	985	if (! old_obj) {
	986
	987	# ifdef DEBUGGING
	988
	989	if (DEBUG_L_TEST \|\| debug_initialization) {
	990	dSAVE_ERRNO;
	991	PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
	992	RESTORE_ERRNO;
	993	}
	994
	995	# endif
	996
	997	return NULL;
	998	}
	999
	1000	# ifdef DEBUGGING
	1001
	1002	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1003	PerlIO_printf(Perl_debug_log,
	1004	"%s:%d: emulate_setlocale now using %p\n",
	1005	__FILE__, __LINE__, PL_C_locale_obj);
	1006	}
	1007
	1008	# endif
	1009
	1010	/* If this call is to switch to the LC_ALL C locale, it already exists, and
	1011	* in fact, we already have switched to it (in preparation for what
	1012	* normally is to come). But since we're already there, continue to use
	1013	* it instead of trying to create a new locale */
	1014	if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) {
	1015
	1016	# ifdef DEBUGGING
	1017
	1018	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1019	PerlIO_printf(Perl_debug_log,
	1020	"%s:%d: will stay in C object\n", __FILE__, __LINE__);
	1021	}
	1022
	1023	# endif
	1024
	1025	new_obj = PL_C_locale_obj;
	1026
	1027	/* We already had switched to the C locale in preparation for freeing
	1028	* 'old_obj' */
	1029	if (old_obj != LC_GLOBAL_LOCALE && old_obj != PL_C_locale_obj) {
	1030	freelocale(old_obj);
	1031	}
	1032	}
	1033	else {
	1034	/* If we weren't in a thread safe locale, set so that newlocale() below
	1035	* which uses 'old_obj', uses an empty one. Same for our reserved C
	1036	* object. The latter is defensive coding, so that, even if there is
	1037	* some bug, we will never end up trying to modify either of these, as
	1038	* if passed to newlocale(), they can be. */
	1039	if (old_obj == LC_GLOBAL_LOCALE \|\| old_obj == PL_C_locale_obj) {
	1040	old_obj = (locale_t) 0;
	1041	}
	1042
	1043	/* Ready to create a new locale by modification of the exising one */
	1044	new_obj = newlocale(mask, locale, old_obj);
	1045
	1046	if (! new_obj) {
	1047	dSAVE_ERRNO;
	1048
	1049	# ifdef DEBUGGING
	1050
	1051	if (DEBUG_L_TEST \|\| debug_initialization) {
	1052	PerlIO_printf(Perl_debug_log,
	1053	"%s:%d: emulate_setlocale creating new object"
	1054	" failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
	1055	}
	1056
	1057	# endif
	1058
	1059	if (! uselocale(old_obj)) {
	1060
	1061	# ifdef DEBUGGING
	1062
	1063	if (DEBUG_L_TEST \|\| debug_initialization) {
	1064	PerlIO_printf(Perl_debug_log,
	1065	"%s:%d: switching back failed: %d\n",
	1066	__FILE__, __LINE__, GET_ERRNO);
	1067	}
	1068
	1069	# endif
	1070
	1071	}
	1072	RESTORE_ERRNO;
	1073	return NULL;
	1074	}
	1075
	1076	# ifdef DEBUGGING
	1077
	1078	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1079	PerlIO_printf(Perl_debug_log,
	1080	"%s:%d: emulate_setlocale created %p",
	1081	__FILE__, __LINE__, new_obj);
	1082	if (old_obj) {
	1083	PerlIO_printf(Perl_debug_log,
	1084	"; should have freed %p", old_obj);
	1085	}
	1086	PerlIO_printf(Perl_debug_log, "\n");
	1087	}
	1088
	1089	# endif
	1090
	1091	/* And switch into it */
	1092	if (! uselocale(new_obj)) {
	1093	dSAVE_ERRNO;
	1094
	1095	# ifdef DEBUGGING
	1096
	1097	if (DEBUG_L_TEST \|\| debug_initialization) {
	1098	PerlIO_printf(Perl_debug_log,
	1099	"%s:%d: emulate_setlocale switching to new object"
	1100	" failed\n", __FILE__, __LINE__);
	1101	}
	1102
	1103	# endif
	1104
	1105	if (! uselocale(old_obj)) {
	1106
	1107	# ifdef DEBUGGING
	1108
	1109	if (DEBUG_L_TEST \|\| debug_initialization) {
	1110	PerlIO_printf(Perl_debug_log,
	1111	"%s:%d: switching back failed: %d\n",
	1112	__FILE__, __LINE__, GET_ERRNO);
	1113	}
	1114
	1115	# endif
	1116
	1117	}
	1118	freelocale(new_obj);
	1119	RESTORE_ERRNO;
	1120	return NULL;
	1121	}
	1122	}
	1123
	1124	# ifdef DEBUGGING
	1125
	1126	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1127	PerlIO_printf(Perl_debug_log,
	1128	"%s:%d: emulate_setlocale now using %p\n",
	1129	__FILE__, __LINE__, new_obj);
	1130	}
	1131
	1132	# endif
	1133
	1134	/* We are done, except for updating our records (if the system doesn't keep
	1135	* them) and in the case of locale "", we don't actually know what the
	1136	* locale that got switched to is, as it came from the environment. So
	1137	* have to find it */
	1138
	1139	# ifdef HAS_QUERYLOCALE
	1140
	1141	if (strEQ(locale, "")) {
	1142	locale = querylocale(mask, new_obj);
	1143	}
	1144
	1145	# else
	1146
	1147	/* Here, 'locale' is the return value */
	1148
	1149	/* Without querylocale(), we have to update our records */
	1150
	1151	if (category == LC_ALL) {
	1152	unsigned int i;
	1153
	1154	/* For LC_ALL, we change all individual categories to correspond */
	1155	/* PL_curlocales is a parallel array, so has same
	1156	* length as 'categories' */
	1157	for (i = 0; i <= LC_ALL_INDEX; i++) {
	1158	Safefree(PL_curlocales[i]);
	1159	PL_curlocales[i] = savepv(locale);
	1160	}
	1161	}
	1162	else {
	1163
	1164	/* For a single category, if it's not the same as the one in LC_ALL, we
	1165	* nullify LC_ALL */
	1166
	1167	if (PL_curlocales[LC_ALL_INDEX] && strNE(PL_curlocales[LC_ALL_INDEX], locale)) {
	1168	Safefree(PL_curlocales[LC_ALL_INDEX]);
	1169	PL_curlocales[LC_ALL_INDEX] = NULL;
	1170	}
	1171
	1172	/* Then update the category's record */
	1173	Safefree(PL_curlocales[index]);
	1174	PL_curlocales[index] = savepv(locale);
	1175	}
	1176
	1177	# endif
	1178
	1179	return locale;
	1180	}
	1181
	1182	#endif /* USE_POSIX_2008_LOCALE */
	1183
	1184	#if 0 /* Code that was to emulate thread-safe locales on platforms that
	1185	didn't natively support them */
	1186
	1187	/* The way this would work is that we would keep a per-thread list of the
	1188	* correct locale for that thread. Any operation that was locale-sensitive
	1189	* would have to be changed so that it would look like this:
	1190	*
	1191	* LOCALE_LOCK;
	1192	* setlocale to the correct locale for this operation
	1193	* do operation
	1194	* LOCALE_UNLOCK
	1195	*
	1196	* This leaves the global locale in the most recently used operation's, but it
	1197	* was locked long enough to get the result. If that result is static, it
	1198	* needs to be copied before the unlock.
	1199	*
	1200	* Macros could be written like SETUP_LOCALE_DEPENDENT_OP(category) that did
	1201	* the setup, but are no-ops when not needed, and similarly,
	1202	* END_LOCALE_DEPENDENT_OP for the tear-down
	1203	*
	1204	* But every call to a locale-sensitive function would have to be changed, and
	1205	* if a module didn't cooperate by using the mutex, things would break.
	1206	*
	1207	* This code was abandoned before being completed or tested, and is left as-is
	1208	*/
	1209
	1210	# define do_setlocale_c(cat, locale) locking_setlocale(cat, locale, cat ## _INDEX, TRUE)
	1211	# define do_setlocale_r(cat, locale) locking_setlocale(cat, locale, 0, FALSE)
	1212
	1213	STATIC char *
	1214	S_locking_setlocale(pTHX_
	1215	const int category,
	1216	const char * locale,
	1217	int index,
	1218	const bool is_index_valid
	1219	)
	1220	{
	1221	/* This function kind of performs a setlocale() on just the current thread;
	1222	* thus it is kind of thread-safe. It does this by keeping a thread-level
	1223	* array of the current locales for each category. Every time a locale is
	1224	* switched to, it does the switch globally, but updates the thread's
	1225	* array. A query as to what the current locale is just returns the
	1226	* appropriate element from the array, and doesn't actually call the system
	1227	* setlocale(). The saving into the array is done in an uninterruptible
	1228	* section of code, so is unaffected by whatever any other threads might be
	1229	* doing.
	1230	*
	1231	* All locale-sensitive operations must work by first starting a critical
	1232	* section, then switching to the thread's locale as kept by this function,
	1233	* and then doing the operation, then ending the critical section. Thus,
	1234	* each gets done in the appropriate locale. simulating thread-safety.
	1235	*
	1236	* This function takes the same parameters, 'category' and 'locale', that
	1237	* the regular setlocale() function does, but it also takes two additional
	1238	* ones. This is because as described earlier. If we know on input the
	1239	* index corresponding to the category into the array where we store the
	1240	* current locales, we don't have to calculate it. If the caller knows at
	1241	* compile time what the index is, it it can pass it, setting
	1242	* 'is_index_valid' to TRUE; otherwise the index parameter is ignored.
	1243	*
	1244	*/
	1245
	1246	/* If the input index might be incorrect, calculate the correct one */
	1247	if (! is_index_valid) {
	1248	unsigned int i;
	1249
	1250	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1251	PerlIO_printf(Perl_debug_log, "%s:%d: converting category %d to index\n", __FILE__, __LINE__, category);
	1252	}
	1253
	1254	for (i = 0; i <= LC_ALL_INDEX; i++) {
	1255	if (category == categories[i]) {
	1256	index = i;
	1257	goto found_index;
	1258	}
	1259	}
	1260
	1261	/* Here, we don't know about this category, so can't handle it.
	1262	* XXX best we can do is to unsafely set this
	1263	* XXX warning */
	1264
	1265	return my_setlocale(category, locale);
	1266
	1267	found_index: ;
	1268
	1269	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	1270	PerlIO_printf(Perl_debug_log, "%s:%d: index is 0x%x\n", __FILE__, __LINE__, index);
	1271	}
	1272	}
	1273
	1274	/* For a query, just return what's in our records */
	1275	if (new_locale == NULL) {
	1276	return curlocales[index];
	1277	}
	1278
	1279
	1280	/* Otherwise, we need to do the switch, and save the result, all in a
	1281	* critical section */
	1282
	1283	Safefree(curlocales[[index]]);
	1284
	1285	/* It might be that this is called from an already-locked section of code.
	1286	* We would have to detect and skip the LOCK/UNLOCK if so */
	1287	LOCALE_LOCK;
	1288
	1289	curlocales[index] = savepv(my_setlocale(category, new_locale));
	1290
	1291	if (strEQ(new_locale, "")) {
	1292
	1293	#ifdef LC_ALL
	1294
	1295	/* The locale values come from the environment, and may not all be the
	1296	* same, so for LC_ALL, we have to update all the others, while the
	1297	* mutex is still locked */
	1298
	1299	if (category == LC_ALL) {
	1300	unsigned int i;
	1301	for (i = 0; i < LC_ALL_INDEX) {
	1302	curlocales[i] = my_setlocale(categories[i], NULL);
	1303	}
	1304	}
	1305	}
	1306
	1307	#endif
	1308
	1309	LOCALE_UNLOCK;
	1310
	1311	return curlocales[index];
	1312	}
	1313
	1314	#endif
	1315	#ifdef USE_LOCALE
	1316
	1317	STATIC void
	1318	S_set_numeric_radix(pTHX_ const bool use_locale)
	1319	{
	1320	/* If 'use_locale' is FALSE, set to use a dot for the radix character. If
	1321	* TRUE, use the radix character derived from the current locale */
	1322
	1323	#if defined(USE_LOCALE_NUMERIC) && ( defined(HAS_LOCALECONV) \
	1324	\|\| defined(HAS_NL_LANGINFO))
	1325
	1326	const char * radix = (use_locale)
	1327	? my_nl_langinfo(RADIXCHAR, FALSE)
	1328	/* FALSE => already in dest locale */
	1329	: ".";
	1330
	1331	sv_setpv(PL_numeric_radix_sv, radix);
	1332
	1333	/* If this is valid UTF-8 that isn't totally ASCII, and we are in
	1334	* a UTF-8 locale, then mark the radix as being in UTF-8 */
	1335	if (is_utf8_non_invariant_string((U8 *) SvPVX(PL_numeric_radix_sv),
	1336	SvCUR(PL_numeric_radix_sv))
	1337	&& _is_cur_LC_category_utf8(LC_NUMERIC))
	1338	{
	1339	SvUTF8_on(PL_numeric_radix_sv);
	1340	}
	1341
	1342	# ifdef DEBUGGING
	1343
	1344	if (DEBUG_L_TEST \|\| debug_initialization) {
	1345	PerlIO_printf(Perl_debug_log, "Locale radix is '%s', ?UTF-8=%d\n",
	1346	SvPVX(PL_numeric_radix_sv),
	1347	cBOOL(SvUTF8(PL_numeric_radix_sv)));
	1348	}
	1349
	1350	# endif
	1351	#else
	1352
	1353	PERL_UNUSED_ARG(use_locale);
	1354
	1355	#endif /* USE_LOCALE_NUMERIC and can find the radix char */
	1356
	1357	}
	1358
	1359	STATIC void
	1360	S_new_numeric(pTHX_ const char *newnum)
	1361	{
	1362
	1363	#ifndef USE_LOCALE_NUMERIC
	1364
	1365	PERL_UNUSED_ARG(newnum);
	1366
	1367	#else
	1368
	1369	/* Called after each libc setlocale() call affecting LC_NUMERIC, to tell
	1370	* core Perl this and that 'newnum' is the name of the new locale.
	1371	* It installs this locale as the current underlying default.
	1372	*
	1373	* The default locale and the C locale can be toggled between by use of the
	1374	* set_numeric_underlying() and set_numeric_standard() functions, which
	1375	* should probably not be called directly, but only via macros like
	1376	* SET_NUMERIC_STANDARD() in perl.h.
	1377	*
	1378	* The toggling is necessary mainly so that a non-dot radix decimal point
	1379	* character can be output, while allowing internal calculations to use a
	1380	* dot.
	1381	*
	1382	* This sets several interpreter-level variables:
	1383	* PL_numeric_name The underlying locale's name: a copy of 'newnum'
	1384	* PL_numeric_underlying A boolean indicating if the toggled state is such
	1385	* that the current locale is the program's underlying
	1386	* locale
	1387	* PL_numeric_standard An int indicating if the toggled state is such
	1388	* that the current locale is the C locale or
	1389	* indistinguishable from the C locale. If non-zero, it
	1390	* is in C; if > 1, it means it may not be toggled away
	1391	* from C.
	1392	* PL_numeric_underlying_is_standard A bool kept by this function
	1393	* indicating that the underlying locale and the standard
	1394	* C locale are indistinguishable for the purposes of
	1395	* LC_NUMERIC. This happens when both of the above two
	1396	* variables are true at the same time. (Toggling is a
	1397	* no-op under these circumstances.) This variable is
	1398	* used to avoid having to recalculate.
	1399	*/
	1400
	1401	char *save_newnum;
	1402
	1403	if (! newnum) {
	1404	Safefree(PL_numeric_name);
	1405	PL_numeric_name = NULL;
	1406	PL_numeric_standard = TRUE;
	1407	PL_numeric_underlying = TRUE;
	1408	PL_numeric_underlying_is_standard = TRUE;
	1409	return;
	1410	}
	1411
	1412	save_newnum = stdize_locale(savepv(newnum));
	1413	PL_numeric_underlying = TRUE;
	1414	PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum);
	1415
	1416	#ifndef TS_W32_BROKEN_LOCALECONV
	1417
	1418	/* If its name isn't C nor POSIX, it could still be indistinguishable from
	1419	* them. But on broken Windows systems calling my_nl_langinfo() for
	1420	* THOUSEP can currently (but rarely) cause a race, so avoid doing that,
	1421	* and just always change the locale if not C nor POSIX on those systems */
	1422	if (! PL_numeric_standard) {
	1423	PL_numeric_standard = cBOOL(strEQ(".", my_nl_langinfo(RADIXCHAR,
	1424	FALSE /* Don't toggle locale */ ))
	1425	&& strEQ("", my_nl_langinfo(THOUSEP, FALSE)));
	1426	}
	1427
	1428	#endif
	1429
	1430	/* Save the new name if it isn't the same as the previous one, if any */
	1431	if (! PL_numeric_name \|\| strNE(PL_numeric_name, save_newnum)) {
	1432	Safefree(PL_numeric_name);
	1433	PL_numeric_name = save_newnum;
	1434	}
	1435	else {
	1436	Safefree(save_newnum);
	1437	}
	1438
	1439	PL_numeric_underlying_is_standard = PL_numeric_standard;
	1440
	1441	# ifdef HAS_POSIX_2008_LOCALE
	1442
	1443	PL_underlying_numeric_obj = newlocale(LC_NUMERIC_MASK,
	1444	PL_numeric_name,
	1445	PL_underlying_numeric_obj);
	1446
	1447	#endif
	1448
	1449	if (DEBUG_L_TEST \|\| debug_initialization) {
	1450	PerlIO_printf(Perl_debug_log, "Called new_numeric with %s, PL_numeric_name=%s\n", newnum, PL_numeric_name);
	1451	}
	1452
	1453	/* Keep LC_NUMERIC in the C locale. This is for XS modules, so they don't
	1454	* have to worry about the radix being a non-dot. (Core operations that
	1455	* need the underlying locale change to it temporarily). */
	1456	if (PL_numeric_standard) {
	1457	set_numeric_radix(0);
	1458	}
	1459	else {
	1460	set_numeric_standard();
	1461	}
	1462
	1463	#endif /* USE_LOCALE_NUMERIC */
	1464
	1465	}
	1466
	1467	void
	1468	Perl_set_numeric_standard(pTHX)
	1469	{
	1470
	1471	#ifdef USE_LOCALE_NUMERIC
	1472
	1473	/* Toggle the LC_NUMERIC locale to C. Most code should use the macros like
	1474	* SET_NUMERIC_STANDARD() in perl.h instead of calling this directly. The
	1475	* macro avoids calling this routine if toggling isn't necessary according
	1476	* to our records (which could be wrong if some XS code has changed the
	1477	* locale behind our back) */
	1478
	1479	# ifdef DEBUGGING
	1480
	1481	if (DEBUG_L_TEST \|\| debug_initialization) {
	1482	PerlIO_printf(Perl_debug_log,
	1483	"Setting LC_NUMERIC locale to standard C\n");
	1484	}
	1485
	1486	# endif
	1487
	1488	do_setlocale_c(LC_NUMERIC, "C");
	1489	PL_numeric_standard = TRUE;
	1490	PL_numeric_underlying = PL_numeric_underlying_is_standard;
	1491	set_numeric_radix(0);
	1492
	1493	#endif /* USE_LOCALE_NUMERIC */
	1494
	1495	}
	1496
	1497	void
	1498	Perl_set_numeric_underlying(pTHX)
	1499	{
	1500
	1501	#ifdef USE_LOCALE_NUMERIC
	1502
	1503	/* Toggle the LC_NUMERIC locale to the current underlying default. Most
	1504	* code should use the macros like SET_NUMERIC_UNDERLYING() in perl.h
	1505	* instead of calling this directly. The macro avoids calling this routine
	1506	* if toggling isn't necessary according to our records (which could be
	1507	* wrong if some XS code has changed the locale behind our back) */
	1508
	1509	# ifdef DEBUGGING
	1510
	1511	if (DEBUG_L_TEST \|\| debug_initialization) {
	1512	PerlIO_printf(Perl_debug_log,
	1513	"Setting LC_NUMERIC locale to %s\n",
	1514	PL_numeric_name);
	1515	}
	1516
	1517	# endif
	1518
	1519	do_setlocale_c(LC_NUMERIC, PL_numeric_name);
	1520	PL_numeric_standard = PL_numeric_underlying_is_standard;
	1521	PL_numeric_underlying = TRUE;
	1522	set_numeric_radix(! PL_numeric_standard);
	1523
	1524	#endif /* USE_LOCALE_NUMERIC */
	1525
	1526	}
	1527
	1528	/*
	1529	* Set up for a new ctype locale.
	1530	*/
	1531	STATIC void
	1532	S_new_ctype(pTHX_ const char *newctype)
	1533	{
	1534
	1535	#ifndef USE_LOCALE_CTYPE
	1536
	1537	PERL_UNUSED_ARG(newctype);
	1538	PERL_UNUSED_CONTEXT;
	1539
	1540	#else
	1541
	1542	/* Called after each libc setlocale() call affecting LC_CTYPE, to tell
	1543	* core Perl this and that 'newctype' is the name of the new locale.
	1544	*
	1545	* This function sets up the folding arrays for all 256 bytes, assuming
	1546	* that tofold() is tolc() since fold case is not a concept in POSIX,
	1547	*
	1548	* Any code changing the locale (outside this file) should use
	1549	* Perl_setlocale or POSIX::setlocale, which call this function. Therefore
	1550	* this function should be called directly only from this file and from
	1551	* POSIX::setlocale() */
	1552
	1553	dVAR;
	1554	unsigned int i;
	1555
	1556	/* Don't check for problems if we are suppressing the warnings */
	1557	bool check_for_problems = ckWARN_d(WARN_LOCALE) \|\| UNLIKELY(DEBUG_L_TEST);
	1558	bool maybe_utf8_turkic = FALSE;
	1559
	1560	PERL_ARGS_ASSERT_NEW_CTYPE;
	1561
	1562	/* We will replace any bad locale warning with 1) nothing if the new one is
	1563	* ok; or 2) a new warning for the bad new locale */
	1564	if (PL_warn_locale) {
	1565	SvREFCNT_dec_NN(PL_warn_locale);
	1566	PL_warn_locale = NULL;
	1567	}
	1568
	1569	PL_in_utf8_CTYPE_locale = _is_cur_LC_category_utf8(LC_CTYPE);
	1570
	1571	/* A UTF-8 locale gets standard rules. But note that code still has to
	1572	* handle this specially because of the three problematic code points */
	1573	if (PL_in_utf8_CTYPE_locale) {
	1574	Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
	1575
	1576	/* UTF-8 locales can have special handling for 'I' and 'i' if they are
	1577	* Turkic. Make sure these two are the only anomalies. (We don't use
	1578	* towupper and towlower because they aren't in C89.) */
	1579
	1580	#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER)
	1581
	1582	if (towupper('i') == 0x130 && towlower('I') == 0x131) {
	1583
	1584	#else
	1585
	1586	if (toupper('i') == 'i' && tolower('I') == 'I') {
	1587
	1588	#endif
	1589	check_for_problems = TRUE;
	1590	maybe_utf8_turkic = TRUE;
	1591	}
	1592	}
	1593
	1594	/* We don't populate the other lists if a UTF-8 locale, but do check that
	1595	* everything works as expected, unless checking turned off */
	1596	if (check_for_problems \|\| ! PL_in_utf8_CTYPE_locale) {
	1597	/* Assume enough space for every character being bad. 4 spaces each
	1598	* for the 94 printable characters that are output like "'x' "; and 5
	1599	* spaces each for "'\\' ", "'\t' ", and "'\n' "; plus a terminating
	1600	* NUL */
	1601	char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ] = { '\0' };
	1602	bool multi_byte_locale = FALSE; /* Assume is a single-byte locale
	1603	to start */
	1604	unsigned int bad_count = 0; /* Count of bad characters */
	1605
	1606	for (i = 0; i < 256; i++) {
	1607	if (! PL_in_utf8_CTYPE_locale) {
	1608	if (isupper(i))
	1609	PL_fold_locale[i] = (U8) tolower(i);
	1610	else if (islower(i))
	1611	PL_fold_locale[i] = (U8) toupper(i);
	1612	else
	1613	PL_fold_locale[i] = (U8) i;
	1614	}
	1615
	1616	/* If checking for locale problems, see if the native ASCII-range
	1617	* printables plus \n and \t are in their expected categories in
	1618	* the new locale. If not, this could mean big trouble, upending
	1619	* Perl's and most programs' assumptions, like having a
	1620	* metacharacter with special meaning become a \w. Fortunately,
	1621	* it's very rare to find locales that aren't supersets of ASCII
	1622	* nowadays. It isn't a problem for most controls to be changed
	1623	* into something else; we check only \n and \t, though perhaps \r
	1624	* could be an issue as well. */
	1625	if ( check_for_problems
	1626	&& (isGRAPH_A(i) \|\| isBLANK_A(i) \|\| i == '\n'))
	1627	{
	1628	bool is_bad = FALSE;
	1629	char name[4] = { '\0' };
	1630
	1631	/* Convert the name into a string */
	1632	if (isGRAPH_A(i)) {
	1633	name[0] = i;
	1634	name[1] = '\0';
	1635	}
	1636	else if (i == '\n') {
	1637	my_strlcpy(name, "\\n", sizeof(name));
	1638	}
	1639	else if (i == '\t') {
	1640	my_strlcpy(name, "\\t", sizeof(name));
	1641	}
	1642	else {
	1643	assert(i == ' ');
	1644	my_strlcpy(name, "' '", sizeof(name));
	1645	}
	1646
	1647	/* Check each possibe class */
	1648	if (UNLIKELY(cBOOL(isalnum(i)) != cBOOL(isALPHANUMERIC_A(i)))) {
	1649	is_bad = TRUE;
	1650	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1651	"isalnum('%s') unexpectedly is %d\n",
	1652	name, cBOOL(isalnum(i))));
	1653	}
	1654	if (UNLIKELY(cBOOL(isalpha(i)) != cBOOL(isALPHA_A(i)))) {
	1655	is_bad = TRUE;
	1656	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1657	"isalpha('%s') unexpectedly is %d\n",
	1658	name, cBOOL(isalpha(i))));
	1659	}
	1660	if (UNLIKELY(cBOOL(isdigit(i)) != cBOOL(isDIGIT_A(i)))) {
	1661	is_bad = TRUE;
	1662	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1663	"isdigit('%s') unexpectedly is %d\n",
	1664	name, cBOOL(isdigit(i))));
	1665	}
	1666	if (UNLIKELY(cBOOL(isgraph(i)) != cBOOL(isGRAPH_A(i)))) {
	1667	is_bad = TRUE;
	1668	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1669	"isgraph('%s') unexpectedly is %d\n",
	1670	name, cBOOL(isgraph(i))));
	1671	}
	1672	if (UNLIKELY(cBOOL(islower(i)) != cBOOL(isLOWER_A(i)))) {
	1673	is_bad = TRUE;
	1674	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1675	"islower('%s') unexpectedly is %d\n",
	1676	name, cBOOL(islower(i))));
	1677	}
	1678	if (UNLIKELY(cBOOL(isprint(i)) != cBOOL(isPRINT_A(i)))) {
	1679	is_bad = TRUE;
	1680	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1681	"isprint('%s') unexpectedly is %d\n",
	1682	name, cBOOL(isprint(i))));
	1683	}
	1684	if (UNLIKELY(cBOOL(ispunct(i)) != cBOOL(isPUNCT_A(i)))) {
	1685	is_bad = TRUE;
	1686	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1687	"ispunct('%s') unexpectedly is %d\n",
	1688	name, cBOOL(ispunct(i))));
	1689	}
	1690	if (UNLIKELY(cBOOL(isspace(i)) != cBOOL(isSPACE_A(i)))) {
	1691	is_bad = TRUE;
	1692	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1693	"isspace('%s') unexpectedly is %d\n",
	1694	name, cBOOL(isspace(i))));
	1695	}
	1696	if (UNLIKELY(cBOOL(isupper(i)) != cBOOL(isUPPER_A(i)))) {
	1697	is_bad = TRUE;
	1698	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1699	"isupper('%s') unexpectedly is %d\n",
	1700	name, cBOOL(isupper(i))));
	1701	}
	1702	if (UNLIKELY(cBOOL(isxdigit(i))!= cBOOL(isXDIGIT_A(i)))) {
	1703	is_bad = TRUE;
	1704	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1705	"isxdigit('%s') unexpectedly is %d\n",
	1706	name, cBOOL(isxdigit(i))));
	1707	}
	1708	if (UNLIKELY(tolower(i) != (int) toLOWER_A(i))) {
	1709	is_bad = TRUE;
	1710	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1711	"tolower('%s')=0x%x instead of the expected 0x%x\n",
	1712	name, tolower(i), (int) toLOWER_A(i)));
	1713	}
	1714	if (UNLIKELY(toupper(i) != (int) toUPPER_A(i))) {
	1715	is_bad = TRUE;
	1716	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1717	"toupper('%s')=0x%x instead of the expected 0x%x\n",
	1718	name, toupper(i), (int) toUPPER_A(i)));
	1719	}
	1720	if (UNLIKELY((i == '\n' && ! isCNTRL_LC(i)))) {
	1721	is_bad = TRUE;
	1722	DEBUG_L(PerlIO_printf(Perl_debug_log,
	1723	"'\\n' (=%02X) is not a control\n", (int) i));
	1724	}
	1725
	1726	/* Add to the list; Separate multiple entries with a blank */
	1727	if (is_bad) {
	1728	if (bad_count) {
	1729	my_strlcat(bad_chars_list, " ", sizeof(bad_chars_list));
	1730	}
	1731	my_strlcat(bad_chars_list, name, sizeof(bad_chars_list));
	1732	bad_count++;
	1733	}
	1734	}
	1735	}
	1736
	1737	if (bad_count == 2 && maybe_utf8_turkic) {
	1738	bad_count = 0;
	1739	*bad_chars_list = '\0';
	1740	PL_fold_locale['I'] = 'I';
	1741	PL_fold_locale['i'] = 'i';
	1742	PL_in_utf8_turkic_locale = TRUE;
	1743	DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n",
	1744	__FILE__, __LINE__, newctype));
	1745	}
	1746	else {
	1747	PL_in_utf8_turkic_locale = FALSE;
	1748	}
	1749
	1750	# ifdef MB_CUR_MAX
	1751
	1752	/* We only handle single-byte locales (outside of UTF-8 ones; so if
	1753	* this locale requires more than one byte, there are going to be
	1754	* problems. */
	1755	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	1756	"%s:%d: check_for_problems=%d, MB_CUR_MAX=%d\n",
	1757	__FILE__, __LINE__, check_for_problems, (int) MB_CUR_MAX));
	1758
	1759	if ( check_for_problems && MB_CUR_MAX > 1
	1760	&& ! PL_in_utf8_CTYPE_locale
	1761
	1762	/* Some platforms return MB_CUR_MAX > 1 for even the "C"
	1763	* locale. Just assume that the implementation for them (plus
	1764	* for POSIX) is correct and the > 1 value is spurious. (Since
	1765	* these are specially handled to never be considered UTF-8
	1766	* locales, as long as this is the only problem, everything
	1767	* should work fine */
	1768	&& strNE(newctype, "C") && strNE(newctype, "POSIX"))
	1769	{
	1770	multi_byte_locale = TRUE;
	1771	}
	1772
	1773	# endif
	1774
	1775	/* If we found problems and we want them output, do so */
	1776	if ( (UNLIKELY(bad_count) \|\| UNLIKELY(multi_byte_locale))
	1777	&& (LIKELY(ckWARN_d(WARN_LOCALE)) \|\| UNLIKELY(DEBUG_L_TEST)))
	1778	{
	1779	if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) {
	1780	PL_warn_locale = Perl_newSVpvf(aTHX_
	1781	"Locale '%s' contains (at least) the following characters"
	1782	" which have\nunexpected meanings: %s\nThe Perl program"
	1783	" will use the expected meanings",
	1784	newctype, bad_chars_list);
	1785	}
	1786	else {
	1787	PL_warn_locale = Perl_newSVpvf(aTHX_
	1788	"Locale '%s' may not work well.%s%s%s\n",
	1789	newctype,
	1790	(multi_byte_locale)
	1791	? " Some characters in it are not recognized by"
	1792	" Perl."
	1793	: "",
	1794	(bad_count)
	1795	? "\nThe following characters (and maybe others)"
	1796	" may not have the same meaning as the Perl"
	1797	" program expects:\n"
	1798	: "",
	1799	(bad_count)
	1800	? bad_chars_list
	1801	: ""
	1802	);
	1803	}
	1804
	1805	# ifdef HAS_NL_LANGINFO
	1806
	1807	Perl_sv_catpvf(aTHX_ PL_warn_locale, "; codeset=%s",
	1808	/* parameter FALSE is a don't care here */
	1809	my_nl_langinfo(CODESET, FALSE));
	1810
	1811	# endif
	1812
	1813	Perl_sv_catpvf(aTHX_ PL_warn_locale, "\n");
	1814
	1815	/* If we are actually in the scope of the locale or are debugging,
	1816	* output the message now. If not in that scope, we save the
	1817	* message to be output at the first operation using this locale,
	1818	* if that actually happens. Most programs don't use locales, so
	1819	* they are immune to bad ones. */
	1820	if (IN_LC(LC_CTYPE) \|\| UNLIKELY(DEBUG_L_TEST)) {
	1821
	1822	/* The '0' below suppresses a bogus gcc compiler warning */
	1823	Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0);
	1824
	1825	if (IN_LC(LC_CTYPE)) {
	1826	SvREFCNT_dec_NN(PL_warn_locale);
	1827	PL_warn_locale = NULL;
	1828	}
	1829	}
	1830	}
	1831	}
	1832
	1833	#endif /* USE_LOCALE_CTYPE */
	1834
	1835	}
	1836
	1837	void
	1838	Perl__warn_problematic_locale()
	1839	{
	1840
	1841	#ifdef USE_LOCALE_CTYPE
	1842
	1843	dTHX;
	1844
	1845	/* Internal-to-core function that outputs the message in PL_warn_locale,
	1846	* and then NULLS it. Should be called only through the macro
	1847	* _CHECK_AND_WARN_PROBLEMATIC_LOCALE */
	1848
	1849	if (PL_warn_locale) {
	1850	Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
	1851	SvPVX(PL_warn_locale),
	1852	0 /* dummy to avoid compiler warning */ );
	1853	SvREFCNT_dec_NN(PL_warn_locale);
	1854	PL_warn_locale = NULL;
	1855	}
	1856
	1857	#endif
	1858
	1859	}
	1860
	1861	STATIC void
	1862	S_new_collate(pTHX_ const char *newcoll)
	1863	{
	1864
	1865	#ifndef USE_LOCALE_COLLATE
	1866
	1867	PERL_UNUSED_ARG(newcoll);
	1868	PERL_UNUSED_CONTEXT;
	1869
	1870	#else
	1871
	1872	/* Called after each libc setlocale() call affecting LC_COLLATE, to tell
	1873	* core Perl this and that 'newcoll' is the name of the new locale.
	1874	*
	1875	* The design of locale collation is that every locale change is given an
	1876	* index 'PL_collation_ix'. The first time a string particpates in an
	1877	* operation that requires collation while locale collation is active, it
	1878	* is given PERL_MAGIC_collxfrm magic (via sv_collxfrm_flags()). That
	1879	* magic includes the collation index, and the transformation of the string
	1880	* by strxfrm(), q.v. That transformation is used when doing comparisons,
	1881	* instead of the string itself. If a string changes, the magic is
	1882	* cleared. The next time the locale changes, the index is incremented,
	1883	* and so we know during a comparison that the transformation is not
	1884	* necessarily still valid, and so is recomputed. Note that if the locale
	1885	* changes enough times, the index could wrap (a U32), and it is possible
	1886	* that a transformation would improperly be considered valid, leading to
	1887	* an unlikely bug */
	1888
	1889	if (! newcoll) {
	1890	if (PL_collation_name) {
	1891	++PL_collation_ix;
	1892	Safefree(PL_collation_name);
	1893	PL_collation_name = NULL;
	1894	}
	1895	PL_collation_standard = TRUE;
	1896	is_standard_collation:
	1897	PL_collxfrm_base = 0;
	1898	PL_collxfrm_mult = 2;
	1899	PL_in_utf8_COLLATE_locale = FALSE;
	1900	PL_strxfrm_NUL_replacement = '\0';
	1901	PL_strxfrm_max_cp = 0;
	1902	return;
	1903	}
	1904
	1905	/* If this is not the same locale as currently, set the new one up */
	1906	if (! PL_collation_name \|\| strNE(PL_collation_name, newcoll)) {
	1907	++PL_collation_ix;
	1908	Safefree(PL_collation_name);
	1909	PL_collation_name = stdize_locale(savepv(newcoll));
	1910	PL_collation_standard = isNAME_C_OR_POSIX(newcoll);
	1911	if (PL_collation_standard) {
	1912	goto is_standard_collation;
	1913	}
	1914
	1915	PL_in_utf8_COLLATE_locale = _is_cur_LC_category_utf8(LC_COLLATE);
	1916	PL_strxfrm_NUL_replacement = '\0';
	1917	PL_strxfrm_max_cp = 0;
	1918
	1919	/* A locale collation definition includes primary, secondary, tertiary,
	1920	* etc. weights for each character. To sort, the primary weights are
	1921	* used, and only if they compare equal, then the secondary weights are
	1922	* used, and only if they compare equal, then the tertiary, etc.
	1923	*
	1924	* strxfrm() works by taking the input string, say ABC, and creating an
	1925	* output transformed string consisting of first the primary weights,
	1926	* A¹B¹C¹ followed by the secondary ones, A²B²C²; and then the
	1927	* tertiary, etc, yielding A¹B¹C¹ A²B²C² A³B³C³ .... Some characters
	1928	* may not have weights at every level. In our example, let's say B
	1929	* doesn't have a tertiary weight, and A doesn't have a secondary
	1930	* weight. The constructed string is then going to be
	1931	* A¹B¹C¹ B²C² A³C³ ....
	1932	* This has the desired effect that strcmp() will look at the secondary
	1933	* or tertiary weights only if the strings compare equal at all higher
	1934	* priority weights. The spaces shown here, like in
	1935	* "A¹B¹C¹ A²B²C² "
	1936	* are not just for readability. In the general case, these must
	1937	* actually be bytes, which we will call here 'separator weights'; and
	1938	* they must be smaller than any other weight value, but since these
	1939	* are C strings, only the terminating one can be a NUL (some
	1940	* implementations may include a non-NUL separator weight just before
	1941	* the NUL). Implementations tend to reserve 01 for the separator
	1942	* weights. They are needed so that a shorter string's secondary
	1943	* weights won't be misconstrued as primary weights of a longer string,
	1944	* etc. By making them smaller than any other weight, the shorter
	1945	* string will sort first. (Actually, if all secondary weights are
	1946	* smaller than all primary ones, there is no need for a separator
	1947	* weight between those two levels, etc.)
	1948	*
	1949	* The length of the transformed string is roughly a linear function of
	1950	* the input string. It's not exactly linear because some characters
	1951	* don't have weights at all levels. When we call strxfrm() we have to
	1952	* allocate some memory to hold the transformed string. The
	1953	* calculations below try to find coefficients 'm' and 'b' for this
	1954	* locale so that m*x + b equals how much space we need, given the size
	1955	* of the input string in 'x'. If we calculate too small, we increase
	1956	* the size as needed, and call strxfrm() again, but it is better to
	1957	* get it right the first time to avoid wasted expensive string
	1958	* transformations. */
	1959
	1960	{
	1961	/* We use the string below to find how long the tranformation of it
	1962	* is. Almost all locales are supersets of ASCII, or at least the
	1963	* ASCII letters. We use all of them, half upper half lower,
	1964	* because if we used fewer, we might hit just the ones that are
	1965	* outliers in a particular locale. Most of the strings being
	1966	* collated will contain a preponderance of letters, and even if
	1967	* they are above-ASCII, they are likely to have the same number of
	1968	* weight levels as the ASCII ones. It turns out that digits tend
	1969	* to have fewer levels, and some punctuation has more, but those
	1970	* are relatively sparse in text, and khw believes this gives a
	1971	* reasonable result, but it could be changed if experience so
	1972	* dictates. */
	1973	const char longer[] = "ABCDEFGHIJKLMnopqrstuvwxyz";
	1974	char * x_longer; /* Transformed 'longer' */
	1975	Size_t x_len_longer; /* Length of 'x_longer' */
	1976
	1977	char * x_shorter; /* We also transform a substring of 'longer' */
	1978	Size_t x_len_shorter;
	1979
	1980	/* _mem_collxfrm() is used get the transformation (though here we
	1981	* are interested only in its length). It is used because it has
	1982	* the intelligence to handle all cases, but to work, it needs some
	1983	* values of 'm' and 'b' to get it started. For the purposes of
	1984	* this calculation we use a very conservative estimate of 'm' and
	1985	* 'b'. This assumes a weight can be multiple bytes, enough to
	1986	* hold any UV on the platform, and there are 5 levels, 4 weight
	1987	* bytes, and a trailing NUL. */
	1988	PL_collxfrm_base = 5;
	1989	PL_collxfrm_mult = 5 * sizeof(UV);
	1990
	1991	/* Find out how long the transformation really is */
	1992	x_longer = _mem_collxfrm(longer,
	1993	sizeof(longer) - 1,
	1994	&x_len_longer,
	1995
	1996	/* We avoid converting to UTF-8 in the
	1997	* called function by telling it the
	1998	* string is in UTF-8 if the locale is a
	1999	* UTF-8 one. Since the string passed
	2000	* here is invariant under UTF-8, we can
	2001	* claim it's UTF-8 even though it isn't.
	2002	* */
	2003	PL_in_utf8_COLLATE_locale);
	2004	Safefree(x_longer);
	2005
	2006	/* Find out how long the transformation of a substring of 'longer'
	2007	* is. Together the lengths of these transformations are
	2008	* sufficient to calculate 'm' and 'b'. The substring is all of
	2009	* 'longer' except the first character. This minimizes the chances
	2010	* of being swayed by outliers */
	2011	x_shorter = _mem_collxfrm(longer + 1,
	2012	sizeof(longer) - 2,
	2013	&x_len_shorter,
	2014	PL_in_utf8_COLLATE_locale);
	2015	Safefree(x_shorter);
	2016
	2017	/* If the results are nonsensical for this simple test, the whole
	2018	* locale definition is suspect. Mark it so that locale collation
	2019	* is not active at all for it. XXX Should we warn? */
	2020	if ( x_len_shorter == 0
	2021	\|\| x_len_longer == 0
	2022	\|\| x_len_shorter >= x_len_longer)
	2023	{
	2024	PL_collxfrm_mult = 0;
	2025	PL_collxfrm_base = 0;
	2026	}
	2027	else {
	2028	SSize_t base; /* Temporary */
	2029
	2030	/* We have both: m * strlen(longer) + b = x_len_longer
	2031	* m * strlen(shorter) + b = x_len_shorter;
	2032	* subtracting yields:
	2033	* m * (strlen(longer) - strlen(shorter))
	2034	* = x_len_longer - x_len_shorter
	2035	* But we have set things up so that 'shorter' is 1 byte smaller
	2036	* than 'longer'. Hence:
	2037	* m = x_len_longer - x_len_shorter
	2038	*
	2039	* But if something went wrong, make sure the multiplier is at
	2040	* least 1.
	2041	*/
	2042	if (x_len_longer > x_len_shorter) {
	2043	PL_collxfrm_mult = (STRLEN) x_len_longer - x_len_shorter;
	2044	}
	2045	else {
	2046	PL_collxfrm_mult = 1;
	2047	}
	2048
	2049	/* mx + b = len
	2050	* so: b = len - mx
	2051	* but in case something has gone wrong, make sure it is
	2052	* non-negative */
	2053	base = x_len_longer - PL_collxfrm_mult * (sizeof(longer) - 1);
	2054	if (base < 0) {
	2055	base = 0;
	2056	}
	2057
	2058	/* Add 1 for the trailing NUL */
	2059	PL_collxfrm_base = base + 1;
	2060	}
	2061
	2062	# ifdef DEBUGGING
	2063
	2064	if (DEBUG_L_TEST \|\| debug_initialization) {
	2065	PerlIO_printf(Perl_debug_log,
	2066	"%s:%d: ?UTF-8 locale=%d; x_len_shorter=%zu, "
	2067	"x_len_longer=%zu,"
	2068	" collate multipler=%zu, collate base=%zu\n",
	2069	__FILE__, __LINE__,
	2070	PL_in_utf8_COLLATE_locale,
	2071	x_len_shorter, x_len_longer,
	2072	PL_collxfrm_mult, PL_collxfrm_base);
	2073	}
	2074	# endif
	2075
	2076	}
	2077	}
	2078
	2079	#endif /* USE_LOCALE_COLLATE */
	2080
	2081	}
	2082
	2083	#endif
	2084
	2085	#ifdef WIN32
	2086
	2087	STATIC char *
	2088	S_win32_setlocale(pTHX_ int category, const char* locale)
	2089	{
	2090	/* This, for Windows, emulates POSIX setlocale() behavior. There is no
	2091	* difference between the two unless the input locale is "", which normally
	2092	* means on Windows to get the machine default, which is set via the
	2093	* computer's "Regional and Language Options" (or its current equivalent).
	2094	* In POSIX, it instead means to find the locale from the user's
	2095	* environment. This routine changes the Windows behavior to first look in
	2096	* the environment, and, if anything is found, use that instead of going to
	2097	* the machine default. If there is no environment override, the machine
	2098	* default is used, by calling the real setlocale() with "".
	2099	*
	2100	* The POSIX behavior is to use the LC_ALL variable if set; otherwise to
	2101	* use the particular category's variable if set; otherwise to use the LANG
	2102	* variable. */
	2103
	2104	bool override_LC_ALL = FALSE;
	2105	char * result;
	2106	unsigned int i;
	2107
	2108	if (locale && strEQ(locale, "")) {
	2109
	2110	# ifdef LC_ALL
	2111
	2112	locale = PerlEnv_getenv("LC_ALL");
	2113	if (! locale) {
	2114	if (category == LC_ALL) {
	2115	override_LC_ALL = TRUE;
	2116	}
	2117	else {
	2118
	2119	# endif
	2120
	2121	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	2122	if (category == categories[i]) {
	2123	locale = PerlEnv_getenv(category_names[i]);
	2124	goto found_locale;
	2125	}
	2126	}
	2127
	2128	locale = PerlEnv_getenv("LANG");
	2129	if (! locale) {
	2130	locale = "";
	2131	}
	2132
	2133	found_locale: ;
	2134
	2135	# ifdef LC_ALL
	2136
	2137	}
	2138	}
	2139
	2140	# endif
	2141
	2142	}
	2143
	2144	result = setlocale(category, locale);
	2145	DEBUG_L(STMT_START {
	2146	dSAVE_ERRNO;
	2147	PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__,
	2148	setlocale_debug_string(category, locale, result));
	2149	RESTORE_ERRNO;
	2150	} STMT_END);
	2151
	2152	if (! override_LC_ALL) {
	2153	return result;
	2154	}
	2155
	2156	/* Here the input category was LC_ALL, and we have set it to what is in the
	2157	* LANG variable or the system default if there is no LANG. But these have
	2158	* lower priority than the other LC_foo variables, so override it for each
	2159	* one that is set. (If they are set to "", it means to use the same thing
	2160	* we just set LC_ALL to, so can skip) */
	2161
	2162	for (i = 0; i < LC_ALL_INDEX; i++) {
	2163	result = PerlEnv_getenv(category_names[i]);
	2164	if (result && strNE(result, "")) {
	2165	setlocale(categories[i], result);
	2166	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
	2167	__FILE__, __LINE__,
	2168	setlocale_debug_string(categories[i], result, "not captured")));
	2169	}
	2170	}
	2171
	2172	result = setlocale(LC_ALL, NULL);
	2173	DEBUG_L(STMT_START {
	2174	dSAVE_ERRNO;
	2175	PerlIO_printf(Perl_debug_log, "%s:%d: %s\n",
	2176	__FILE__, __LINE__,
	2177	setlocale_debug_string(LC_ALL, NULL, result));
	2178	RESTORE_ERRNO;
	2179	} STMT_END);
	2180
	2181	return result;
	2182	}
	2183
	2184	#endif
	2185
	2186	/*
	2187
	2188	=head1 Locale-related functions and macros
	2189
	2190	=for apidoc Perl_setlocale
	2191
	2192	This is an (almost) drop-in replacement for the system L<C<setlocale(3)>>,
	2193	taking the same parameters, and returning the same information, except that it
	2194	returns the correct underlying C<LC_NUMERIC> locale. Regular C<setlocale> will
	2195	instead return C<C> if the underlying locale has a non-dot decimal point
	2196	character, or a non-empty thousands separator for displaying floating point
	2197	numbers. This is because perl keeps that locale category such that it has a
	2198	dot and empty separator, changing the locale briefly during the operations
	2199	where the underlying one is required. C<Perl_setlocale> knows about this, and
	2200	compensates; regular C<setlocale> doesn't.
	2201
	2202	Another reason it isn't completely a drop-in replacement is that it is
	2203	declared to return S<C<const char *>>, whereas the system setlocale omits the
	2204	C<const> (presumably because its API was specified long ago, and can't be
	2205	updated; it is illegal to change the information C<setlocale> returns; doing
	2206	so leads to segfaults.)
	2207
	2208	Finally, C<Perl_setlocale> works under all circumstances, whereas plain
	2209	C<setlocale> can be completely ineffective on some platforms under some
	2210	configurations.
	2211
	2212	C<Perl_setlocale> should not be used to change the locale except on systems
	2213	where the predefined variable C<${^SAFE_LOCALES}> is 1. On some such systems,
	2214	the system C<setlocale()> is ineffective, returning the wrong information, and
	2215	failing to actually change the locale. C<Perl_setlocale>, however works
	2216	properly in all circumstances.
	2217
	2218	The return points to a per-thread static buffer, which is overwritten the next
	2219	time C<Perl_setlocale> is called from the same thread.
	2220
	2221	=cut
	2222
	2223	*/
	2224
	2225	const char *
	2226	Perl_setlocale(const int category, const char * locale)
	2227	{
	2228	/* This wraps POSIX::setlocale() */
	2229
	2230	#ifndef USE_LOCALE
	2231
	2232	PERL_UNUSED_ARG(category);
	2233	PERL_UNUSED_ARG(locale);
	2234
	2235	return "C";
	2236
	2237	#else
	2238
	2239	const char * retval;
	2240	const char * newlocale;
	2241	dSAVEDERRNO;
	2242	dTHX;
	2243	DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
	2244
	2245	#ifdef USE_LOCALE_NUMERIC
	2246
	2247	/* A NULL locale means only query what the current one is. We have the
	2248	* LC_NUMERIC name saved, because we are normally switched into the C
	2249	* (or equivalent) locale for it. For an LC_ALL query, switch back to get
	2250	* the correct results. All other categories don't require special
	2251	* handling */
	2252	if (locale == NULL) {
	2253	if (category == LC_NUMERIC) {
	2254
	2255	/* We don't have to copy this return value, as it is a per-thread
	2256	* variable, and won't change until a future setlocale */
	2257	return PL_numeric_name;
	2258	}
	2259
	2260	# ifdef LC_ALL
	2261
	2262	else if (category == LC_ALL) {
	2263	STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
	2264	}
	2265
	2266	# endif
	2267
	2268	}
	2269
	2270	#endif
	2271
	2272	retval = save_to_buffer(do_setlocale_r(category, locale),
	2273	&PL_setlocale_buf, &PL_setlocale_bufsize, 0);
	2274	SAVE_ERRNO;
	2275
	2276	#if defined(USE_LOCALE_NUMERIC) && defined(LC_ALL)
	2277
	2278	if (locale == NULL && category == LC_ALL) {
	2279	RESTORE_LC_NUMERIC();
	2280	}
	2281
	2282	#endif
	2283
	2284	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2285	"%s:%d: %s\n", __FILE__, __LINE__,
	2286	setlocale_debug_string(category, locale, retval)));
	2287
	2288	RESTORE_ERRNO;
	2289
	2290	if (! retval) {
	2291	return NULL;
	2292	}
	2293
	2294	/* If locale == NULL, we are just querying the state */
	2295	if (locale == NULL) {
	2296	return retval;
	2297	}
	2298
	2299	/* Now that have switched locales, we have to update our records to
	2300	* correspond. */
	2301
	2302	switch (category) {
	2303
	2304	#ifdef USE_LOCALE_CTYPE
	2305
	2306	case LC_CTYPE:
	2307	new_ctype(retval);
	2308	break;
	2309
	2310	#endif
	2311	#ifdef USE_LOCALE_COLLATE
	2312
	2313	case LC_COLLATE:
	2314	new_collate(retval);
	2315	break;
	2316
	2317	#endif
	2318	#ifdef USE_LOCALE_NUMERIC
	2319
	2320	case LC_NUMERIC:
	2321	new_numeric(retval);
	2322	break;
	2323
	2324	#endif
	2325	#ifdef LC_ALL
	2326
	2327	case LC_ALL:
	2328
	2329	/* LC_ALL updates all the things we care about. The values may not
	2330	* be the same as 'retval', as the locale "" may have set things
	2331	* individually */
	2332
	2333	# ifdef USE_LOCALE_CTYPE
	2334
	2335	newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL));
	2336	new_ctype(newlocale);
	2337	Safefree(newlocale);
	2338
	2339	# endif /* USE_LOCALE_CTYPE */
	2340	# ifdef USE_LOCALE_COLLATE
	2341
	2342	newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL));
	2343	new_collate(newlocale);
	2344	Safefree(newlocale);
	2345
	2346	# endif
	2347	# ifdef USE_LOCALE_NUMERIC
	2348
	2349	newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL));
	2350	new_numeric(newlocale);
	2351	Safefree(newlocale);
	2352
	2353	# endif /* USE_LOCALE_NUMERIC */
	2354	#endif /* LC_ALL */
	2355
	2356	default:
	2357	break;
	2358	}
	2359
	2360	return retval;
	2361
	2362	#endif
	2363
	2364	}
	2365
	2366	PERL_STATIC_INLINE const char *
	2367	S_save_to_buffer(const char * string, char *buf, Size_t buf_size, const Size_t offset)
	2368	{
	2369	/* Copy the NUL-terminated 'string' to 'buf' + 'offset'. 'buf' has size 'buf_size',
	2370	* growing it if necessary */
	2371
	2372	Size_t string_size;
	2373
	2374	PERL_ARGS_ASSERT_SAVE_TO_BUFFER;
	2375
	2376	if (! string) {
	2377	return NULL;
	2378	}
	2379
	2380	string_size = strlen(string) + offset + 1;
	2381
	2382	if (*buf_size == 0) {
	2383	Newx(*buf, string_size, char);
	2384	*buf_size = string_size;
	2385	}
	2386	else if (string_size > *buf_size) {
	2387	Renew(*buf, string_size, char);
	2388	*buf_size = string_size;
	2389	}
	2390
	2391	Copy(string, *buf + offset, string_size - offset, char);
	2392	return *buf;
	2393	}
	2394
	2395	/*
	2396
	2397	=for apidoc Perl_langinfo
	2398
	2399	This is an (almost) drop-in replacement for the system C<L<nl_langinfo(3)>>,
	2400	taking the same C<item> parameter values, and returning the same information.
	2401	But it is more thread-safe than regular C<nl_langinfo()>, and hides the quirks
	2402	of Perl's locale handling from your code, and can be used on systems that lack
	2403	a native C<nl_langinfo>.
	2404
	2405	Expanding on these:
	2406
	2407	=over
	2408
	2409	=item *
	2410
	2411	The reason it isn't quite a drop-in replacement is actually an advantage. The
	2412	only difference is that it returns S<C<const char *>>, whereas plain
	2413	C<nl_langinfo()> returns S<C<char *>>, but you are (only by documentation)
	2414	forbidden to write into the buffer. By declaring this C<const>, the compiler
	2415	enforces this restriction, so if it is violated, you know at compilation time,
	2416	rather than getting segfaults at runtime.
	2417
	2418	=item *
	2419
	2420	It delivers the correct results for the C<RADIXCHAR> and C<THOUSEP> items,
	2421	without you having to write extra code. The reason for the extra code would be
	2422	because these are from the C<LC_NUMERIC> locale category, which is normally
	2423	kept set by Perl so that the radix is a dot, and the separator is the empty
	2424	string, no matter what the underlying locale is supposed to be, and so to get
	2425	the expected results, you have to temporarily toggle into the underlying
	2426	locale, and later toggle back. (You could use plain C<nl_langinfo> and
	2427	C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this but then you wouldn't get
	2428	the other advantages of C<Perl_langinfo()>; not keeping C<LC_NUMERIC> in the C
	2429	(or equivalent) locale would break a lot of CPAN, which is expecting the radix
	2430	(decimal point) character to be a dot.)
	2431
	2432	=item *
	2433
	2434	The system function it replaces can have its static return buffer trashed,
	2435	not only by a subesequent call to that function, but by a C<freelocale>,
	2436	C<setlocale>, or other locale change. The returned buffer of this function is
	2437	not changed until the next call to it, so the buffer is never in a trashed
	2438	state.
	2439
	2440	=item *
	2441
	2442	Its return buffer is per-thread, so it also is never overwritten by a call to
	2443	this function from another thread; unlike the function it replaces.
	2444
	2445	=item *
	2446
	2447	But most importantly, it works on systems that don't have C<nl_langinfo>, such
	2448	as Windows, hence makes your code more portable. Of the fifty-some possible
	2449	items specified by the POSIX 2008 standard,
	2450	L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
	2451	only one is completely unimplemented, though on non-Windows platforms, another
	2452	significant one is also not implemented). It uses various techniques to
	2453	recover the other items, including calling C<L<localeconv(3)>>, and
	2454	C<L<strftime(3)>>, both of which are specified in C89, so should be always be
	2455	available. Later C<strftime()> versions have additional capabilities; C<""> is
	2456	returned for those not available on your system.
	2457
	2458	It is important to note that when called with an item that is recovered by
	2459	using C<localeconv>, the buffer from any previous explicit call to
	2460	C<localeconv> will be overwritten. This means you must save that buffer's
	2461	contents if you need to access them after a call to this function. (But note
	2462	that you might not want to be using C<localeconv()> directly anyway, because of
	2463	issues like the ones listed in the second item of this list (above) for
	2464	C<RADIXCHAR> and C<THOUSEP>. You can use the methods given in L<perlcall> to
	2465	call L<POSIX/localeconv> and avoid all the issues, but then you have a hash to
	2466	unpack).
	2467
	2468	The details for those items which may deviate from what this emulation returns
	2469	and what a native C<nl_langinfo()> would return are specified in
	2470	L<I18N::Langinfo>.
	2471
	2472	=back
	2473
	2474	When using C<Perl_langinfo> on systems that don't have a native
	2475	C<nl_langinfo()>, you must
	2476
	2477	#include "perl_langinfo.h"
	2478
	2479	before the C<perl.h> C<#include>. You can replace your C<langinfo.h>
	2480	C<#include> with this one. (Doing it this way keeps out the symbols that plain
	2481	C<langinfo.h> would try to import into the namespace for code that doesn't need
	2482	it.)
	2483
	2484	The original impetus for C<Perl_langinfo()> was so that code that needs to
	2485	find out the current currency symbol, floating point radix character, or digit
	2486	grouping separator can use, on all systems, the simpler and more
	2487	thread-friendly C<nl_langinfo> API instead of C<L<localeconv(3)>> which is a
	2488	pain to make thread-friendly. For other fields returned by C<localeconv>, it
	2489	is better to use the methods given in L<perlcall> to call
	2490	L<C<POSIX::localeconv()>\|POSIX/localeconv>, which is thread-friendly.
	2491
	2492	=cut
	2493
	2494	*/
	2495
	2496	const char *
	2497	#ifdef HAS_NL_LANGINFO
	2498	Perl_langinfo(const nl_item item)
	2499	#else
	2500	Perl_langinfo(const int item)
	2501	#endif
	2502	{
	2503	return my_nl_langinfo(item, TRUE);
	2504	}
	2505
	2506	STATIC const char *
	2507	#ifdef HAS_NL_LANGINFO
	2508	S_my_nl_langinfo(const nl_item item, bool toggle)
	2509	#else
	2510	S_my_nl_langinfo(const int item, bool toggle)
	2511	#endif
	2512	{
	2513	dTHX;
	2514	const char * retval;
	2515
	2516	#ifdef USE_LOCALE_NUMERIC
	2517
	2518	/* We only need to toggle into the underlying LC_NUMERIC locale for these
	2519	* two items, and only if not already there */
	2520	if (toggle && (( item != RADIXCHAR && item != THOUSEP)
	2521	\|\| PL_numeric_underlying))
	2522
	2523	#endif /* No toggling needed if not using LC_NUMERIC */
	2524
	2525	toggle = FALSE;
	2526
	2527	#if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */
	2528	# if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \
	2529	\|\| ! defined(HAS_POSIX_2008_LOCALE) \
	2530	\|\| ! defined(DUPLOCALE)
	2531
	2532	/* Here, use plain nl_langinfo(), switching to the underlying LC_NUMERIC
	2533	* for those items dependent on it. This must be copied to a buffer before
	2534	* switching back, as some systems destroy the buffer when setlocale() is
	2535	* called */
	2536
	2537	{
	2538	DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
	2539
	2540	if (toggle) {
	2541	STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
	2542	}
	2543
	2544	LOCALE_LOCK; /* Prevent interference from another thread executing
	2545	this code section (the only call to nl_langinfo in
	2546	the core) */
	2547
	2548
	2549	/* Copy to a per-thread buffer, which is also one that won't be
	2550	* destroyed by a subsequent setlocale(), such as the
	2551	* RESTORE_LC_NUMERIC may do just below. */
	2552	retval = save_to_buffer(nl_langinfo(item),
	2553	&PL_langinfo_buf, &PL_langinfo_bufsize, 0);
	2554
	2555	LOCALE_UNLOCK;
	2556
	2557	if (toggle) {
	2558	RESTORE_LC_NUMERIC();
	2559	}
	2560	}
	2561
	2562	# else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */
	2563
	2564	{
	2565	bool do_free = FALSE;
	2566	locale_t cur = uselocale((locale_t) 0);
	2567
	2568	if (cur == LC_GLOBAL_LOCALE) {
	2569	cur = duplocale(LC_GLOBAL_LOCALE);
	2570	do_free = TRUE;
	2571	}
	2572
	2573	# ifdef USE_LOCALE_NUMERIC
	2574
	2575	if (toggle) {
	2576	if (PL_underlying_numeric_obj) {
	2577	cur = PL_underlying_numeric_obj;
	2578	}
	2579	else {
	2580	cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur);
	2581	do_free = TRUE;
	2582	}
	2583	}
	2584
	2585	# endif
	2586
	2587	/* We have to save it to a buffer, because the freelocale() just below
	2588	* can invalidate the internal one */
	2589	retval = save_to_buffer(nl_langinfo_l(item, cur),
	2590	&PL_langinfo_buf, &PL_langinfo_bufsize, 0);
	2591
	2592	if (do_free) {
	2593	freelocale(cur);
	2594	}
	2595	}
	2596
	2597	# endif
	2598
	2599	if (strEQ(retval, "")) {
	2600	if (item == YESSTR) {
	2601	return "yes";
	2602	}
	2603	if (item == NOSTR) {
	2604	return "no";
	2605	}
	2606	}
	2607
	2608	return retval;
	2609
	2610	#else /* Below, emulate nl_langinfo as best we can */
	2611
	2612	{
	2613
	2614	# ifdef HAS_LOCALECONV
	2615
	2616	const struct lconv* lc;
	2617	const char * temp;
	2618	DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
	2619
	2620	# ifdef TS_W32_BROKEN_LOCALECONV
	2621
	2622	const char * save_global;
	2623	const char * save_thread;
	2624	int needed_size;
	2625	char * ptr;
	2626	char * e;
	2627	char * item_start;
	2628
	2629	# endif
	2630	# endif
	2631	# ifdef HAS_STRFTIME
	2632
	2633	struct tm tm;
	2634	bool return_format = FALSE; /* Return the %format, not the value */
	2635	const char * format;
	2636
	2637	# endif
	2638
	2639	/* We copy the results to a per-thread buffer, even if not
	2640	* multi-threaded. This is in part to simplify this code, and partly
	2641	* because we need a buffer anyway for strftime(), and partly because a
	2642	* call of localeconv() could otherwise wipe out the buffer, and the
	2643	* programmer would not be expecting this, as this is a nl_langinfo()
	2644	* substitute after all, so s/he might be thinking their localeconv()
	2645	* is safe until another localeconv() call. */
	2646
	2647	switch (item) {
	2648	Size_t len;
	2649
	2650	/* This is unimplemented */
	2651	case ERA: /* For use with strftime() %E modifier */
	2652
	2653	default:
	2654	return "";
	2655
	2656	/* We use only an English set, since we don't know any more */
	2657	case YESEXPR: return "^[+1yY]";
	2658	case YESSTR: return "yes";
	2659	case NOEXPR: return "^[-0nN]";
	2660	case NOSTR: return "no";
	2661
	2662	case CODESET:
	2663
	2664	# ifndef WIN32
	2665
	2666	/* On non-windows, this is unimplemented, in part because of
	2667	* inconsistencies between vendors. The Darwin native
	2668	* nl_langinfo() implementation simply looks at everything past
	2669	* any dot in the name, but that doesn't work for other
	2670	* vendors. Many Linux locales that don't have UTF-8 in their
	2671	* names really are UTF-8, for example; z/OS locales that do
	2672	* have UTF-8 in their names, aren't really UTF-8 */
	2673	return "";
	2674
	2675	# else
	2676
	2677	{ /* But on Windows, the name does seem to be consistent, so
	2678	use that. */
	2679	const char * p;
	2680	const char * first;
	2681	Size_t offset = 0;
	2682	const char * name = my_setlocale(LC_CTYPE, NULL);
	2683
	2684	if (isNAME_C_OR_POSIX(name)) {
	2685	return "ANSI_X3.4-1968";
	2686	}
	2687
	2688	/* Find the dot in the locale name */
	2689	first = (const char *) strchr(name, '.');
	2690	if (! first) {
	2691	first = name;
	2692	goto has_nondigit;
	2693	}
	2694
	2695	/* Look at everything past the dot */
	2696	first++;
	2697	p = first;
	2698
	2699	while (*p) {
	2700	if (! isDIGIT(*p)) {
	2701	goto has_nondigit;
	2702	}
	2703
	2704	p++;
	2705	}
	2706
	2707	/* Here everything past the dot is a digit. Treat it as a
	2708	* code page */
	2709	retval = save_to_buffer("CP", &PL_langinfo_buf,
	2710	&PL_langinfo_bufsize, 0);
	2711	offset = STRLENs("CP");
	2712
	2713	has_nondigit:
	2714
	2715	retval = save_to_buffer(first, &PL_langinfo_buf,
	2716	&PL_langinfo_bufsize, offset);
	2717	}
	2718
	2719	break;
	2720
	2721	# endif
	2722	# ifdef HAS_LOCALECONV
	2723
	2724	case CRNCYSTR:
	2725
	2726	/* We don't bother with localeconv_l() because any system that
	2727	* has it is likely to also have nl_langinfo() */
	2728
	2729	LOCALE_LOCK_V; /* Prevent interference with other threads
	2730	using localeconv() */
	2731
	2732	# ifdef TS_W32_BROKEN_LOCALECONV
	2733
	2734	/* This is a workaround for a Windows bug prior to VS 15.
	2735	* What we do here is, while locked, switch to the global
	2736	* locale so localeconv() works; then switch back just before
	2737	* the unlock. This can screw things up if some thread is
	2738	* already using the global locale while assuming no other is.
	2739	* A different workaround would be to call GetCurrencyFormat on
	2740	* a known value, and parse it; patches welcome
	2741	*
	2742	* We have to use LC_ALL instead of LC_MONETARY because of
	2743	* another bug in Windows */
	2744
	2745	save_thread = savepv(my_setlocale(LC_ALL, NULL));
	2746	_configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
	2747	save_global= savepv(my_setlocale(LC_ALL, NULL));
	2748	my_setlocale(LC_ALL, save_thread);
	2749
	2750	# endif
	2751
	2752	lc = localeconv();
	2753	if ( ! lc
	2754	\|\| ! lc->currency_symbol
	2755	\|\| strEQ("", lc->currency_symbol))
	2756	{
	2757	LOCALE_UNLOCK_V;
	2758	return "";
	2759	}
	2760
	2761	/* Leave the first spot empty to be filled in below */
	2762	retval = save_to_buffer(lc->currency_symbol, &PL_langinfo_buf,
	2763	&PL_langinfo_bufsize, 1);
	2764	if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, ""))
	2765	{ /* khw couldn't figure out how the localedef specifications
	2766	would show that the $ should replace the radix; this is
	2767	just a guess as to how it might work.*/
	2768	PL_langinfo_buf[0] = '.';
	2769	}
	2770	else if (lc->p_cs_precedes) {
	2771	PL_langinfo_buf[0] = '-';
	2772	}
	2773	else {
	2774	PL_langinfo_buf[0] = '+';
	2775	}
	2776
	2777	# ifdef TS_W32_BROKEN_LOCALECONV
	2778
	2779	my_setlocale(LC_ALL, save_global);
	2780	_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
	2781	my_setlocale(LC_ALL, save_thread);
	2782	Safefree(save_global);
	2783	Safefree(save_thread);
	2784
	2785	# endif
	2786
	2787	LOCALE_UNLOCK_V;
	2788	break;
	2789
	2790	# ifdef TS_W32_BROKEN_LOCALECONV
	2791
	2792	case RADIXCHAR:
	2793
	2794	/* For this, we output a known simple floating point number to
	2795	* a buffer, and parse it, looking for the radix */
	2796
	2797	if (toggle) {
	2798	STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
	2799	}
	2800
	2801	if (PL_langinfo_bufsize < 10) {
	2802	PL_langinfo_bufsize = 10;
	2803	Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
	2804	}
	2805
	2806	needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
	2807	"%.1f", 1.5);
	2808	if (needed_size >= (int) PL_langinfo_bufsize) {
	2809	PL_langinfo_bufsize = needed_size + 1;
	2810	Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
	2811	needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
	2812	"%.1f", 1.5);
	2813	assert(needed_size < (int) PL_langinfo_bufsize);
	2814	}
	2815
	2816	ptr = PL_langinfo_buf;
	2817	e = PL_langinfo_buf + PL_langinfo_bufsize;
	2818
	2819	/* Find the '1' */
	2820	while (ptr < e && *ptr != '1') {
	2821	ptr++;
	2822	}
	2823	ptr++;
	2824
	2825	/* Find the '5' */
	2826	item_start = ptr;
	2827	while (ptr < e && *ptr != '5') {
	2828	ptr++;
	2829	}
	2830
	2831	/* Everything in between is the radix string */
	2832	if (ptr >= e) {
	2833	PL_langinfo_buf[0] = '?';
	2834	PL_langinfo_buf[1] = '\0';
	2835	}
	2836	else {
	2837	*ptr = '\0';
	2838	Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf, char);
	2839	}
	2840
	2841	if (toggle) {
	2842	RESTORE_LC_NUMERIC();
	2843	}
	2844
	2845	retval = PL_langinfo_buf;
	2846	break;
	2847
	2848	# else
	2849
	2850	case RADIXCHAR: /* No special handling needed */
	2851
	2852	# endif
	2853
	2854	case THOUSEP:
	2855
	2856	if (toggle) {
	2857	STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
	2858	}
	2859
	2860	LOCALE_LOCK_V; /* Prevent interference with other threads
	2861	using localeconv() */
	2862
	2863	# ifdef TS_W32_BROKEN_LOCALECONV
	2864
	2865	/* This should only be for the thousands separator. A
	2866	* different work around would be to use GetNumberFormat on a
	2867	* known value and parse the result to find the separator */
	2868	save_thread = savepv(my_setlocale(LC_ALL, NULL));
	2869	_configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
	2870	save_global = savepv(my_setlocale(LC_ALL, NULL));
	2871	my_setlocale(LC_ALL, save_thread);
	2872	# if 0
	2873	/* This is the start of code that for broken Windows replaces
	2874	* the above and below code, and instead calls
	2875	* GetNumberFormat() and then would parse that to find the
	2876	* thousands separator. It needs to handle UTF-16 vs -8
	2877	* issues. */
	2878
	2879	needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5", NULL, PL_langinfo_buf, PL_langinfo_bufsize);
	2880	DEBUG_L(PerlIO_printf(Perl_debug_log,
	2881	"%s: %d: return from GetNumber, count=%d, val=%s\n",
	2882	__FILE__, __LINE__, needed_size, PL_langinfo_buf));
	2883
	2884	# endif
	2885	# endif
	2886
	2887	lc = localeconv();
	2888	if (! lc) {
	2889	temp = "";
	2890	}
	2891	else {
	2892	temp = (item == RADIXCHAR)
	2893	? lc->decimal_point
	2894	: lc->thousands_sep;
	2895	if (! temp) {
	2896	temp = "";
	2897	}
	2898	}
	2899
	2900	retval = save_to_buffer(temp, &PL_langinfo_buf,
	2901	&PL_langinfo_bufsize, 0);
	2902
	2903	# ifdef TS_W32_BROKEN_LOCALECONV
	2904
	2905	my_setlocale(LC_ALL, save_global);
	2906	_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
	2907	my_setlocale(LC_ALL, save_thread);
	2908	Safefree(save_global);
	2909	Safefree(save_thread);
	2910
	2911	# endif
	2912
	2913	LOCALE_UNLOCK_V;
	2914
	2915	if (toggle) {
	2916	RESTORE_LC_NUMERIC();
	2917	}
	2918
	2919	break;
	2920
	2921	# endif
	2922	# ifdef HAS_STRFTIME
	2923
	2924	/* These are defined by C89, so we assume that strftime supports
	2925	* them, and so are returned unconditionally; they may not be what
	2926	* the locale actually says, but should give good enough results
	2927	* for someone using them as formats (as opposed to trying to parse
	2928	* them to figure out what the locale says). The other format
	2929	* items are actually tested to verify they work on the platform */
	2930	case D_FMT: return "%x";
	2931	case T_FMT: return "%X";
	2932	case D_T_FMT: return "%c";
	2933
	2934	/* These formats are only available in later strfmtime's */
	2935	case ERA_D_FMT: case ERA_T_FMT: case ERA_D_T_FMT: case T_FMT_AMPM:
	2936
	2937	/* The rest can be gotten from most versions of strftime(). */
	2938	case ABDAY_1: case ABDAY_2: case ABDAY_3:
	2939	case ABDAY_4: case ABDAY_5: case ABDAY_6: case ABDAY_7:
	2940	case ALT_DIGITS:
	2941	case AM_STR: case PM_STR:
	2942	case ABMON_1: case ABMON_2: case ABMON_3: case ABMON_4:
	2943	case ABMON_5: case ABMON_6: case ABMON_7: case ABMON_8:
	2944	case ABMON_9: case ABMON_10: case ABMON_11: case ABMON_12:
	2945	case DAY_1: case DAY_2: case DAY_3: case DAY_4:
	2946	case DAY_5: case DAY_6: case DAY_7:
	2947	case MON_1: case MON_2: case MON_3: case MON_4:
	2948	case MON_5: case MON_6: case MON_7: case MON_8:
	2949	case MON_9: case MON_10: case MON_11: case MON_12:
	2950
	2951	LOCALE_LOCK;
	2952
	2953	init_tm(&tm); /* Precaution against core dumps */
	2954	tm.tm_sec = 30;
	2955	tm.tm_min = 30;
	2956	tm.tm_hour = 6;
	2957	tm.tm_year = 2017 - 1900;
	2958	tm.tm_wday = 0;
	2959	tm.tm_mon = 0;
	2960	switch (item) {
	2961	default:
	2962	LOCALE_UNLOCK;
	2963	Perl_croak(aTHX_
	2964	"panic: %s: %d: switch case: %d problem",
	2965	__FILE__, __LINE__, item);
	2966	NOT_REACHED; /* NOTREACHED */
	2967
	2968	case PM_STR: tm.tm_hour = 18;
	2969	case AM_STR:
	2970	format = "%p";
	2971	break;
	2972
	2973	case ABDAY_7: tm.tm_wday++;
	2974	case ABDAY_6: tm.tm_wday++;
	2975	case ABDAY_5: tm.tm_wday++;
	2976	case ABDAY_4: tm.tm_wday++;
	2977	case ABDAY_3: tm.tm_wday++;
	2978	case ABDAY_2: tm.tm_wday++;
	2979	case ABDAY_1:
	2980	format = "%a";
	2981	break;
	2982
	2983	case DAY_7: tm.tm_wday++;
	2984	case DAY_6: tm.tm_wday++;
	2985	case DAY_5: tm.tm_wday++;
	2986	case DAY_4: tm.tm_wday++;
	2987	case DAY_3: tm.tm_wday++;
	2988	case DAY_2: tm.tm_wday++;
	2989	case DAY_1:
	2990	format = "%A";
	2991	break;
	2992
	2993	case ABMON_12: tm.tm_mon++;
	2994	case ABMON_11: tm.tm_mon++;
	2995	case ABMON_10: tm.tm_mon++;
	2996	case ABMON_9: tm.tm_mon++;
	2997	case ABMON_8: tm.tm_mon++;
	2998	case ABMON_7: tm.tm_mon++;
	2999	case ABMON_6: tm.tm_mon++;
	3000	case ABMON_5: tm.tm_mon++;
	3001	case ABMON_4: tm.tm_mon++;
	3002	case ABMON_3: tm.tm_mon++;
	3003	case ABMON_2: tm.tm_mon++;
	3004	case ABMON_1:
	3005	format = "%b";
	3006	break;
	3007
	3008	case MON_12: tm.tm_mon++;
	3009	case MON_11: tm.tm_mon++;
	3010	case MON_10: tm.tm_mon++;
	3011	case MON_9: tm.tm_mon++;
	3012	case MON_8: tm.tm_mon++;
	3013	case MON_7: tm.tm_mon++;
	3014	case MON_6: tm.tm_mon++;
	3015	case MON_5: tm.tm_mon++;
	3016	case MON_4: tm.tm_mon++;
	3017	case MON_3: tm.tm_mon++;
	3018	case MON_2: tm.tm_mon++;
	3019	case MON_1:
	3020	format = "%B";
	3021	break;
	3022
	3023	case T_FMT_AMPM:
	3024	format = "%r";
	3025	return_format = TRUE;
	3026	break;
	3027
	3028	case ERA_D_FMT:
	3029	format = "%Ex";
	3030	return_format = TRUE;
	3031	break;
	3032
	3033	case ERA_T_FMT:
	3034	format = "%EX";
	3035	return_format = TRUE;
	3036	break;
	3037
	3038	case ERA_D_T_FMT:
	3039	format = "%Ec";
	3040	return_format = TRUE;
	3041	break;
	3042
	3043	case ALT_DIGITS:
	3044	tm.tm_wday = 0;
	3045	format = "%Ow"; /* Find the alternate digit for 0 */
	3046	break;
	3047	}
	3048
	3049	/* We can't use my_strftime() because it doesn't look at
	3050	* tm_wday */
	3051	while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize,
	3052	format, &tm))
	3053	{
	3054	/* A zero return means one of:
	3055	* a) there wasn't enough space in PL_langinfo_buf
	3056	* b) the format, like a plain %p, returns empty
	3057	* c) it was an illegal format, though some
	3058	* implementations of strftime will just return the
	3059	* illegal format as a plain character sequence.
	3060	*
	3061	* To quickly test for case 'b)', try again but precede
	3062	* the format with a plain character. If that result is
	3063	* still empty, the problem is either 'a)' or 'c)' */
	3064
	3065	Size_t format_size = strlen(format) + 1;
	3066	Size_t mod_size = format_size + 1;
	3067	char * mod_format;
	3068	char * temp_result;
	3069
	3070	Newx(mod_format, mod_size, char);
	3071	Newx(temp_result, PL_langinfo_bufsize, char);
	3072	*mod_format = ' ';
	3073	my_strlcpy(mod_format + 1, format, mod_size);
	3074	len = strftime(temp_result,
	3075	PL_langinfo_bufsize,
	3076	mod_format, &tm);
	3077	Safefree(mod_format);
	3078	Safefree(temp_result);
	3079
	3080	/* If 'len' is non-zero, it means that we had a case like
	3081	* %p which means the current locale doesn't use a.m. or
	3082	* p.m., and that is valid */
	3083	if (len == 0) {
	3084
	3085	/* Here, still didn't work. If we get well beyond a
	3086	* reasonable size, bail out to prevent an infinite
	3087	* loop. */
	3088
	3089	if (PL_langinfo_bufsize > 100 * format_size) {
	3090	*PL_langinfo_buf = '\0';
	3091	}
	3092	else {
	3093	/* Double the buffer size to retry; Add 1 in case
	3094	* original was 0, so we aren't stuck at 0. */
	3095	PL_langinfo_bufsize *= 2;
	3096	PL_langinfo_bufsize++;
	3097	Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
	3098	continue;
	3099	}
	3100	}
	3101
	3102	break;
	3103	}
	3104
	3105	/* Here, we got a result.
	3106	*
	3107	* If the item is 'ALT_DIGITS', PL_langinfo_buf contains the
	3108	* alternate format for wday 0. If the value is the same as
	3109	* the normal 0, there isn't an alternate, so clear the buffer.
	3110	* */
	3111	if ( item == ALT_DIGITS
	3112	&& strEQ(PL_langinfo_buf, "0"))
	3113	{
	3114	*PL_langinfo_buf = '\0';
	3115	}
	3116
	3117	/* ALT_DIGITS is problematic. Experiments on it showed that
	3118	* strftime() did not always work properly when going from
	3119	* alt-9 to alt-10. Only a few locales have this item defined,
	3120	* and in all of them on Linux that khw was able to find,
	3121	* nl_langinfo() merely returned the alt-0 character, possibly
	3122	* doubled. Most Unicode digits are in blocks of 10
	3123	* consecutive code points, so that is sufficient information
	3124	* for those scripts, as we can infer alt-1, alt-2, .... But
	3125	* for a Japanese locale, a CJK ideographic 0 is returned, and
	3126	* the CJK digits are not in code point order, so you can't
	3127	* really infer anything. The localedef for this locale did
	3128	* specify the succeeding digits, so that strftime() works
	3129	* properly on them, without needing to infer anything. But
	3130	* the nl_langinfo() return did not give sufficient information
	3131	* for the caller to understand what's going on. So until
	3132	* there is evidence that it should work differently, this
	3133	* returns the alt-0 string for ALT_DIGITS.
	3134	*
	3135	* wday was chosen because its range is all a single digit.
	3136	* Things like tm_sec have two digits as the minimum: '00' */
	3137
	3138	LOCALE_UNLOCK;
	3139
	3140	retval = PL_langinfo_buf;
	3141
	3142	/* If to return the format, not the value, overwrite the buffer
	3143	* with it. But some strftime()s will keep the original format
	3144	* if illegal, so change those to "" */
	3145	if (return_format) {
	3146	if (strEQ(PL_langinfo_buf, format)) {
	3147	*PL_langinfo_buf = '\0';
	3148	}
	3149	else {
	3150	retval = save_to_buffer(format, &PL_langinfo_buf,
	3151	&PL_langinfo_bufsize, 0);
	3152	}
	3153	}
	3154
	3155	break;
	3156
	3157	# endif
	3158
	3159	}
	3160	}
	3161
	3162	return retval;
	3163
	3164	#endif
	3165
	3166	}
	3167
	3168	/*
	3169	* Initialize locale awareness.
	3170	*/
	3171	int
	3172	Perl_init_i18nl10n(pTHX_ int printwarn)
	3173	{
	3174	/* printwarn is
	3175	*
	3176	* 0 if not to output warning when setup locale is bad
	3177	* 1 if to output warning based on value of PERL_BADLANG
	3178	* >1 if to output regardless of PERL_BADLANG
	3179	*
	3180	* returns
	3181	* 1 = set ok or not applicable,
	3182	* 0 = fallback to a locale of lower priority
	3183	* -1 = fallback to all locales failed, not even to the C locale
	3184	*
	3185	* Under -DDEBUGGING, if the environment variable PERL_DEBUG_LOCALE_INIT is
	3186	* set, debugging information is output.
	3187	*
	3188	* This looks more complicated than it is, mainly due to the #ifdefs.
	3189	*
	3190	* We try to set LC_ALL to the value determined by the environment. If
	3191	* there is no LC_ALL on this platform, we try the individual categories we
	3192	* know about. If this works, we are done.
	3193	*
	3194	* But if it doesn't work, we have to do something else. We search the
	3195	* environment variables ourselves instead of relying on the system to do
	3196	* it. We look at, in order, LC_ALL, LANG, a system default locale (if we
	3197	* think there is one), and the ultimate fallback "C". This is all done in
	3198	* the same loop as above to avoid duplicating code, but it makes things
	3199	* more complex. The 'trial_locales' array is initialized with just one
	3200	* element; it causes the behavior described in the paragraph above this to
	3201	* happen. If that fails, we add elements to 'trial_locales', and do extra
	3202	* loop iterations to cause the behavior described in this paragraph.
	3203	*
	3204	* On Ultrix, the locale MUST come from the environment, so there is
	3205	* preliminary code to set it. I (khw) am not sure that it is necessary,
	3206	* and that this couldn't be folded into the loop, but barring any real
	3207	* platforms to test on, it's staying as-is
	3208	*
	3209	* A slight complication is that in embedded Perls, the locale may already
	3210	* be set-up, and we don't want to get it from the normal environment
	3211	* variables. This is handled by having a special environment variable
	3212	* indicate we're in this situation. We simply set setlocale's 2nd
	3213	* parameter to be a NULL instead of "". That indicates to setlocale that
	3214	* it is not to change anything, but to return the current value,
	3215	* effectively initializing perl's db to what the locale already is.
	3216	*
	3217	* We play the same trick with NULL if a LC_ALL succeeds. We call
	3218	* setlocale() on the individual categores with NULL to get their existing
	3219	* values for our db, instead of trying to change them.
	3220	* */
	3221
	3222	dVAR;
	3223
	3224	int ok = 1;
	3225
	3226	#ifndef USE_LOCALE
	3227
	3228	PERL_UNUSED_ARG(printwarn);
	3229
	3230	#else /* USE_LOCALE */
	3231	# ifdef __GLIBC__
	3232
	3233	const char * const language = savepv(PerlEnv_getenv("LANGUAGE"));
	3234
	3235	# endif
	3236
	3237	/* NULL uses the existing already set up locale */
	3238	const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT"))
	3239	? NULL
	3240	: "";
	3241	const char* trial_locales[5]; /* 5 = 1 each for "", LC_ALL, LANG, "", C */
	3242	unsigned int trial_locales_count;
	3243	const char * const lc_all = savepv(PerlEnv_getenv("LC_ALL"));
	3244	const char * const lang = savepv(PerlEnv_getenv("LANG"));
	3245	bool setlocale_failure = FALSE;
	3246	unsigned int i;
	3247
	3248	/* A later getenv() could zap this, so only use here */
	3249	const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG");
	3250
	3251	const bool locwarn = (printwarn > 1
	3252	\|\| ( printwarn
	3253	&& ( ! bad_lang_use_once
	3254	\|\| (
	3255	/* disallow with "" or "0" */
	3256	*bad_lang_use_once
	3257	&& strNE("0", bad_lang_use_once)))));
	3258
	3259	/* setlocale() return vals; not copied so must be looked at immediately */
	3260	const char * sl_result[NOMINAL_LC_ALL_INDEX + 1];
	3261
	3262	/* current locale for given category; should have been copied so aren't
	3263	* volatile */
	3264	const char * curlocales[NOMINAL_LC_ALL_INDEX + 1];
	3265
	3266	# ifdef WIN32
	3267
	3268	/* In some systems you can find out the system default locale
	3269	* and use that as the fallback locale. */
	3270	# define SYSTEM_DEFAULT_LOCALE
	3271	# endif
	3272	# ifdef SYSTEM_DEFAULT_LOCALE
	3273
	3274	const char *system_default_locale = NULL;
	3275
	3276	# endif
	3277
	3278	# ifndef DEBUGGING
	3279	# define DEBUG_LOCALE_INIT(a,b,c)
	3280	# else
	3281
	3282	DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT")));
	3283
	3284	# define DEBUG_LOCALE_INIT(category, locale, result) \
	3285	STMT_START { \
	3286	if (debug_initialization) { \
	3287	PerlIO_printf(Perl_debug_log, \
	3288	"%s:%d: %s\n", \
	3289	__FILE__, __LINE__, \
	3290	setlocale_debug_string(category, \
	3291	locale, \
	3292	result)); \
	3293	} \
	3294	} STMT_END
	3295
	3296	/* Make sure the parallel arrays are properly set up */
	3297	# ifdef USE_LOCALE_NUMERIC
	3298	assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC);
	3299	assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC"));
	3300	# ifdef USE_POSIX_2008_LOCALE
	3301	assert(category_masks[LC_NUMERIC_INDEX] == LC_NUMERIC_MASK);
	3302	# endif
	3303	# endif
	3304	# ifdef USE_LOCALE_CTYPE
	3305	assert(categories[LC_CTYPE_INDEX] == LC_CTYPE);
	3306	assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE"));
	3307	# ifdef USE_POSIX_2008_LOCALE
	3308	assert(category_masks[LC_CTYPE_INDEX] == LC_CTYPE_MASK);
	3309	# endif
	3310	# endif
	3311	# ifdef USE_LOCALE_COLLATE
	3312	assert(categories[LC_COLLATE_INDEX] == LC_COLLATE);
	3313	assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE"));
	3314	# ifdef USE_POSIX_2008_LOCALE
	3315	assert(category_masks[LC_COLLATE_INDEX] == LC_COLLATE_MASK);
	3316	# endif
	3317	# endif
	3318	# ifdef USE_LOCALE_TIME
	3319	assert(categories[LC_TIME_INDEX] == LC_TIME);
	3320	assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME"));
	3321	# ifdef USE_POSIX_2008_LOCALE
	3322	assert(category_masks[LC_TIME_INDEX] == LC_TIME_MASK);
	3323	# endif
	3324	# endif
	3325	# ifdef USE_LOCALE_MESSAGES
	3326	assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES);
	3327	assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES"));
	3328	# ifdef USE_POSIX_2008_LOCALE
	3329	assert(category_masks[LC_MESSAGES_INDEX] == LC_MESSAGES_MASK);
	3330	# endif
	3331	# endif
	3332	# ifdef USE_LOCALE_MONETARY
	3333	assert(categories[LC_MONETARY_INDEX] == LC_MONETARY);
	3334	assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY"));
	3335	# ifdef USE_POSIX_2008_LOCALE
	3336	assert(category_masks[LC_MONETARY_INDEX] == LC_MONETARY_MASK);
	3337	# endif
	3338	# endif
	3339	# ifdef USE_LOCALE_ADDRESS
	3340	assert(categories[LC_ADDRESS_INDEX] == LC_ADDRESS);
	3341	assert(strEQ(category_names[LC_ADDRESS_INDEX], "LC_ADDRESS"));
	3342	# ifdef USE_POSIX_2008_LOCALE
	3343	assert(category_masks[LC_ADDRESS_INDEX] == LC_ADDRESS_MASK);
	3344	# endif
	3345	# endif
	3346	# ifdef USE_LOCALE_IDENTIFICATION
	3347	assert(categories[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION);
	3348	assert(strEQ(category_names[LC_IDENTIFICATION_INDEX], "LC_IDENTIFICATION"));
	3349	# ifdef USE_POSIX_2008_LOCALE
	3350	assert(category_masks[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION_MASK);
	3351	# endif
	3352	# endif
	3353	# ifdef USE_LOCALE_MEASUREMENT
	3354	assert(categories[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT);
	3355	assert(strEQ(category_names[LC_MEASUREMENT_INDEX], "LC_MEASUREMENT"));
	3356	# ifdef USE_POSIX_2008_LOCALE
	3357	assert(category_masks[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT_MASK);
	3358	# endif
	3359	# endif
	3360	# ifdef USE_LOCALE_PAPER
	3361	assert(categories[LC_PAPER_INDEX] == LC_PAPER);
	3362	assert(strEQ(category_names[LC_PAPER_INDEX], "LC_PAPER"));
	3363	# ifdef USE_POSIX_2008_LOCALE
	3364	assert(category_masks[LC_PAPER_INDEX] == LC_PAPER_MASK);
	3365	# endif
	3366	# endif
	3367	# ifdef USE_LOCALE_TELEPHONE
	3368	assert(categories[LC_TELEPHONE_INDEX] == LC_TELEPHONE);
	3369	assert(strEQ(category_names[LC_TELEPHONE_INDEX], "LC_TELEPHONE"));
	3370	# ifdef USE_POSIX_2008_LOCALE
	3371	assert(category_masks[LC_TELEPHONE_INDEX] == LC_TELEPHONE_MASK);
	3372	# endif
	3373	# endif
	3374	# ifdef LC_ALL
	3375	assert(categories[LC_ALL_INDEX] == LC_ALL);
	3376	assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL"));
	3377	assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX);
	3378	# ifdef USE_POSIX_2008_LOCALE
	3379	assert(category_masks[LC_ALL_INDEX] == LC_ALL_MASK);
	3380	# endif
	3381	# endif
	3382	# endif /* DEBUGGING */
	3383
	3384	/* Initialize the cache of the program's UTF-8ness for the always known
	3385	* locales C and POSIX */
	3386	my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness,
	3387	sizeof(PL_locale_utf8ness));
	3388
	3389	# ifdef USE_THREAD_SAFE_LOCALE
	3390	# ifdef WIN32
	3391
	3392	_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
	3393
	3394	# endif
	3395	# endif
	3396	# ifdef USE_POSIX_2008_LOCALE
	3397
	3398	PL_C_locale_obj = newlocale(LC_ALL_MASK, "C", (locale_t) 0);
	3399	if (! PL_C_locale_obj) {
	3400	Perl_croak_nocontext(
	3401	"panic: Cannot create POSIX 2008 C locale object; errno=%d", errno);
	3402	}
	3403	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	3404	PerlIO_printf(Perl_debug_log, "%s:%d: created C object %p\n", __FILE__, __LINE__, PL_C_locale_obj);
	3405	}
	3406
	3407	# endif
	3408
	3409	# ifdef USE_LOCALE_NUMERIC
	3410
	3411	PL_numeric_radix_sv = newSVpvs(".");
	3412
	3413	# endif
	3414
	3415	# if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE)
	3416
	3417	/* Initialize our records. If we have POSIX 2008, we have LC_ALL */
	3418	do_setlocale_c(LC_ALL, my_setlocale(LC_ALL, NULL));
	3419
	3420	# endif
	3421	# ifdef LOCALE_ENVIRON_REQUIRED
	3422
	3423	/*
	3424	* Ultrix setlocale(..., "") fails if there are no environment
	3425	* variables from which to get a locale name.
	3426	*/
	3427
	3428	# ifndef LC_ALL
	3429	# error Ultrix without LC_ALL not implemented
	3430	# else
	3431
	3432	{
	3433	bool done = FALSE;
	3434	if (lang) {
	3435	sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, setlocale_init);
	3436	DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result[LC_ALL_INDEX]);
	3437	if (sl_result[LC_ALL_INDEX])
	3438	done = TRUE;
	3439	else
	3440	setlocale_failure = TRUE;
	3441	}
	3442	if (! setlocale_failure) {
	3443	const char * locale_param;
	3444	for (i = 0; i < LC_ALL_INDEX; i++) {
	3445	locale_param = (! done && (lang \|\| PerlEnv_getenv(category_names[i])))
	3446	? setlocale_init
	3447	: NULL;
	3448	sl_result[i] = do_setlocale_r(categories[i], locale_param);
	3449	if (! sl_result[i]) {
	3450	setlocale_failure = TRUE;
	3451	}
	3452	DEBUG_LOCALE_INIT(categories[i], locale_param, sl_result[i]);
	3453	}
	3454	}
	3455	}
	3456
	3457	# endif /* LC_ALL */
	3458	# endif /* LOCALE_ENVIRON_REQUIRED */
	3459
	3460	/* We try each locale in the list until we get one that works, or exhaust
	3461	* the list. Normally the loop is executed just once. But if setting the
	3462	* locale fails, inside the loop we add fallback trials to the array and so
	3463	* will execute the loop multiple times */
	3464	trial_locales[0] = setlocale_init;
	3465	trial_locales_count = 1;
	3466
	3467	for (i= 0; i < trial_locales_count; i++) {
	3468	const char * trial_locale = trial_locales[i];
	3469
	3470	if (i > 0) {
	3471
	3472	/* XXX This is to preserve old behavior for LOCALE_ENVIRON_REQUIRED
	3473	* when i==0, but I (khw) don't think that behavior makes much
	3474	* sense */
	3475	setlocale_failure = FALSE;
	3476
	3477	# ifdef SYSTEM_DEFAULT_LOCALE
	3478	# ifdef WIN32 /* Note that assumes Win32 has LC_ALL */
	3479
	3480	/* On Windows machines, an entry of "" after the 0th means to use
	3481	* the system default locale, which we now proceed to get. */
	3482	if (strEQ(trial_locale, "")) {
	3483	unsigned int j;
	3484
	3485	/* Note that this may change the locale, but we are going to do
	3486	* that anyway just below */
	3487	system_default_locale = do_setlocale_c(LC_ALL, "");
	3488	DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale);
	3489
	3490	/* Skip if invalid or if it's already on the list of locales to
	3491	* try */
	3492	if (! system_default_locale) {
	3493	goto next_iteration;
	3494	}
	3495	for (j = 0; j < trial_locales_count; j++) {
	3496	if (strEQ(system_default_locale, trial_locales[j])) {
	3497	goto next_iteration;
	3498	}
	3499	}
	3500
	3501	trial_locale = system_default_locale;
	3502	}
	3503	# else
	3504	# error SYSTEM_DEFAULT_LOCALE only implemented for Win32
	3505	# endif
	3506	# endif /* SYSTEM_DEFAULT_LOCALE */
	3507
	3508	} /* For i > 0 */
	3509
	3510	# ifdef LC_ALL
	3511
	3512	sl_result[LC_ALL_INDEX] = do_setlocale_c(LC_ALL, trial_locale);
	3513	DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result[LC_ALL_INDEX]);
	3514	if (! sl_result[LC_ALL_INDEX]) {
	3515	setlocale_failure = TRUE;
	3516	}
	3517	else {
	3518	/* Since LC_ALL succeeded, it should have changed all the other
	3519	* categories it can to its value; so we massage things so that the
	3520	* setlocales below just return their category's current values.
	3521	* This adequately handles the case in NetBSD where LC_COLLATE may
	3522	* not be defined for a locale, and setting it individually will
	3523	* fail, whereas setting LC_ALL succeeds, leaving LC_COLLATE set to
	3524	* the POSIX locale. */
	3525	trial_locale = NULL;
	3526	}
	3527
	3528	# endif /* LC_ALL */
	3529
	3530	if (! setlocale_failure) {
	3531	unsigned int j;
	3532	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	3533	curlocales[j]
	3534	= savepv(do_setlocale_r(categories[j], trial_locale));
	3535	if (! curlocales[j]) {
	3536	setlocale_failure = TRUE;
	3537	}
	3538	DEBUG_LOCALE_INIT(categories[j], trial_locale, curlocales[j]);
	3539	}
	3540
	3541	if (! setlocale_failure) { /* All succeeded */
	3542	break; /* Exit trial_locales loop */
	3543	}
	3544	}
	3545
	3546	/* Here, something failed; will need to try a fallback. */
	3547	ok = 0;
	3548
	3549	if (i == 0) {
	3550	unsigned int j;
	3551
	3552	if (locwarn) { /* Output failure info only on the first one */
	3553
	3554	# ifdef LC_ALL
	3555
	3556	PerlIO_printf(Perl_error_log,
	3557	"perl: warning: Setting locale failed.\n");
	3558
	3559	# else /* !LC_ALL */
	3560
	3561	PerlIO_printf(Perl_error_log,
	3562	"perl: warning: Setting locale failed for the categories:\n\t");
	3563
	3564	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	3565	if (! curlocales[j]) {
	3566	PerlIO_printf(Perl_error_log, category_names[j]);
	3567	}
	3568	else {
	3569	Safefree(curlocales[j]);
	3570	}
	3571	}
	3572
	3573	# endif /* LC_ALL */
	3574
	3575	PerlIO_printf(Perl_error_log,
	3576	"perl: warning: Please check that your locale settings:\n");
	3577
	3578	# ifdef __GLIBC__
	3579
	3580	PerlIO_printf(Perl_error_log,
	3581	"\tLANGUAGE = %c%s%c,\n",
	3582	language ? '"' : '(',
	3583	language ? language : "unset",
	3584	language ? '"' : ')');
	3585	# endif
	3586
	3587	PerlIO_printf(Perl_error_log,
	3588	"\tLC_ALL = %c%s%c,\n",
	3589	lc_all ? '"' : '(',
	3590	lc_all ? lc_all : "unset",
	3591	lc_all ? '"' : ')');
	3592
	3593	# if defined(USE_ENVIRON_ARRAY)
	3594
	3595	{
	3596	char **e;
	3597
	3598	/* Look through the environment for any variables of the
	3599	* form qr/ ^ LC_ [A-Z]+ = /x, except LC_ALL which was
	3600	* already handled above. These are assumed to be locale
	3601	* settings. Output them and their values. */
	3602	for (e = environ; *e; e++) {
	3603	const STRLEN prefix_len = sizeof("LC_") - 1;
	3604	STRLEN uppers_len;
	3605
	3606	if ( strBEGINs(*e, "LC_")
	3607	&& ! strBEGINs(*e, "LC_ALL=")
	3608	&& (uppers_len = strspn(*e + prefix_len,
	3609	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
	3610	&& ((*e)[prefix_len + uppers_len] == '='))
	3611	{
	3612	PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n",
	3613	(int) (prefix_len + uppers_len), *e,
	3614	*e + prefix_len + uppers_len + 1);
	3615	}
	3616	}
	3617	}
	3618
	3619	# else
	3620
	3621	PerlIO_printf(Perl_error_log,
	3622	"\t(possibly more locale environment variables)\n");
	3623
	3624	# endif
	3625
	3626	PerlIO_printf(Perl_error_log,
	3627	"\tLANG = %c%s%c\n",
	3628	lang ? '"' : '(',
	3629	lang ? lang : "unset",
	3630	lang ? '"' : ')');
	3631
	3632	PerlIO_printf(Perl_error_log,
	3633	" are supported and installed on your system.\n");
	3634	}
	3635
	3636	/* Calculate what fallback locales to try. We have avoided this
	3637	* until we have to, because failure is quite unlikely. This will
	3638	* usually change the upper bound of the loop we are in.
	3639	*
	3640	* Since the system's default way of setting the locale has not
	3641	* found one that works, We use Perl's defined ordering: LC_ALL,
	3642	* LANG, and the C locale. We don't try the same locale twice, so
	3643	* don't add to the list if already there. (On POSIX systems, the
	3644	* LC_ALL element will likely be a repeat of the 0th element "",
	3645	* but there's no harm done by doing it explicitly.
	3646	*
	3647	* Note that this tries the LC_ALL environment variable even on
	3648	* systems which have no LC_ALL locale setting. This may or may
	3649	* not have been originally intentional, but there's no real need
	3650	* to change the behavior. */
	3651	if (lc_all) {
	3652	for (j = 0; j < trial_locales_count; j++) {
	3653	if (strEQ(lc_all, trial_locales[j])) {
	3654	goto done_lc_all;
	3655	}
	3656	}
	3657	trial_locales[trial_locales_count++] = lc_all;
	3658	}
	3659	done_lc_all:
	3660
	3661	if (lang) {
	3662	for (j = 0; j < trial_locales_count; j++) {
	3663	if (strEQ(lang, trial_locales[j])) {
	3664	goto done_lang;
	3665	}
	3666	}
	3667	trial_locales[trial_locales_count++] = lang;
	3668	}
	3669	done_lang:
	3670
	3671	# if defined(WIN32) && defined(LC_ALL)
	3672
	3673	/* For Windows, we also try the system default locale before "C".
	3674	* (If there exists a Windows without LC_ALL we skip this because
	3675	* it gets too complicated. For those, the "C" is the next
	3676	* fallback possibility). The "" is the same as the 0th element of
	3677	* the array, but the code at the loop above knows to treat it
	3678	* differently when not the 0th */
	3679	trial_locales[trial_locales_count++] = "";
	3680
	3681	# endif
	3682
	3683	for (j = 0; j < trial_locales_count; j++) {
	3684	if (strEQ("C", trial_locales[j])) {
	3685	goto done_C;
	3686	}
	3687	}
	3688	trial_locales[trial_locales_count++] = "C";
	3689
	3690	done_C: ;
	3691	} /* end of first time through the loop */
	3692
	3693	# ifdef WIN32
	3694
	3695	next_iteration: ;
	3696
	3697	# endif
	3698
	3699	} /* end of looping through the trial locales */
	3700
	3701	if (ok < 1) { /* If we tried to fallback */
	3702	const char* msg;
	3703	if (! setlocale_failure) { /* fallback succeeded */
	3704	msg = "Falling back to";
	3705	}
	3706	else { /* fallback failed */
	3707	unsigned int j;
	3708
	3709	/* We dropped off the end of the loop, so have to decrement i to
	3710	* get back to the value the last time through */
	3711	i--;
	3712
	3713	ok = -1;
	3714	msg = "Failed to fall back to";
	3715
	3716	/* To continue, we should use whatever values we've got */
	3717
	3718	for (j = 0; j < NOMINAL_LC_ALL_INDEX; j++) {
	3719	Safefree(curlocales[j]);
	3720	curlocales[j] = savepv(do_setlocale_r(categories[j], NULL));
	3721	DEBUG_LOCALE_INIT(categories[j], NULL, curlocales[j]);
	3722	}
	3723	}
	3724
	3725	if (locwarn) {
	3726	const char * description;
	3727	const char * name = "";
	3728	if (strEQ(trial_locales[i], "C")) {
	3729	description = "the standard locale";
	3730	name = "C";
	3731	}
	3732
	3733	# ifdef SYSTEM_DEFAULT_LOCALE
	3734
	3735	else if (strEQ(trial_locales[i], "")) {
	3736	description = "the system default locale";
	3737	if (system_default_locale) {
	3738	name = system_default_locale;
	3739	}
	3740	}
	3741
	3742	# endif /* SYSTEM_DEFAULT_LOCALE */
	3743
	3744	else {
	3745	description = "a fallback locale";
	3746	name = trial_locales[i];
	3747	}
	3748	if (name && strNE(name, "")) {
	3749	PerlIO_printf(Perl_error_log,
	3750	"perl: warning: %s %s (\"%s\").\n", msg, description, name);
	3751	}
	3752	else {
	3753	PerlIO_printf(Perl_error_log,
	3754	"perl: warning: %s %s.\n", msg, description);
	3755	}
	3756	}
	3757	} /* End of tried to fallback */
	3758
	3759	/* Done with finding the locales; update our records */
	3760
	3761	# ifdef USE_LOCALE_CTYPE
	3762
	3763	new_ctype(curlocales[LC_CTYPE_INDEX]);
	3764
	3765	# endif
	3766	# ifdef USE_LOCALE_COLLATE
	3767
	3768	new_collate(curlocales[LC_COLLATE_INDEX]);
	3769
	3770	# endif
	3771	# ifdef USE_LOCALE_NUMERIC
	3772
	3773	new_numeric(curlocales[LC_NUMERIC_INDEX]);
	3774
	3775	# endif
	3776
	3777	for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
	3778
	3779	# if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE)
	3780
	3781	/* This caches whether each category's locale is UTF-8 or not. This
	3782	* may involve changing the locale. It is ok to do this at
	3783	* initialization time before any threads have started, but not later
	3784	* unless thread-safe operations are used.
	3785	* Caching means that if the program heeds our dictate not to change
	3786	* locales in threaded applications, this data will remain valid, and
	3787	* it may get queried without having to change locales. If the
	3788	* environment is such that all categories have the same locale, this
	3789	* isn't needed, as the code will not change the locale; but this
	3790	* handles the uncommon case where the environment has disparate
	3791	* locales for the categories */
	3792	(void) _is_cur_LC_category_utf8(categories[i]);
	3793
	3794	# endif
	3795
	3796	Safefree(curlocales[i]);
	3797	}
	3798
	3799	# if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE)
	3800
	3801	/* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE
	3802	* locale is UTF-8. The call to new_ctype() just above has already
	3803	* calculated the latter value and saved it in PL_in_utf8_CTYPE_locale. If
	3804	* both PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE})
	3805	* are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer on
	3806	* STDIN, STDOUT, STDERR, _and_ the default open discipline. */
	3807	PL_utf8locale = PL_in_utf8_CTYPE_locale;
	3808
	3809	/* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
	3810	This is an alternative to using the -C command line switch
	3811	(the -C if present will override this). */
	3812	{
	3813	const char *p = PerlEnv_getenv("PERL_UNICODE");
	3814	PL_unicode = p ? parse_unicode_opts(&p) : 0;
	3815	if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG)
	3816	PL_utf8cache = -1;
	3817	}
	3818
	3819	# endif
	3820	# ifdef __GLIBC__
	3821
	3822	Safefree(language);
	3823
	3824	# endif
	3825
	3826	Safefree(lc_all);
	3827	Safefree(lang);
	3828
	3829	#endif /* USE_LOCALE */
	3830	#ifdef DEBUGGING
	3831
	3832	/* So won't continue to output stuff */
	3833	DEBUG_INITIALIZATION_set(FALSE);
	3834
	3835	#endif
	3836
	3837	return ok;
	3838	}
	3839
	3840	#ifdef USE_LOCALE_COLLATE
	3841
	3842	char *
	3843	Perl__mem_collxfrm(pTHX_ const char *input_string,
	3844	STRLEN len, /* Length of 'input_string' */
	3845	STRLEN xlen, / Set to length of returned string
	3846	(not including the collation index
	3847	prefix) */
	3848	bool utf8 /* Is the input in UTF-8? */
	3849	)
	3850	{
	3851
	3852	/* _mem_collxfrm() is a bit like strxfrm() but with two important
	3853	* differences. First, it handles embedded NULs. Second, it allocates a bit
	3854	* more memory than needed for the transformed data itself. The real
	3855	* transformed data begins at offset COLLXFRM_HDR_LEN. *xlen is set to
	3856	* the length of that, and doesn't include the collation index size.
	3857	* Please see sv_collxfrm() to see how this is used. */
	3858
	3859	#define COLLXFRM_HDR_LEN sizeof(PL_collation_ix)
	3860
	3861	char * s = (char *) input_string;
	3862	STRLEN s_strlen = strlen(input_string);
	3863	char *xbuf = NULL;
	3864	STRLEN xAlloc; /* xalloc is a reserved word in VC */
	3865	STRLEN length_in_chars;
	3866	bool first_time = TRUE; /* Cleared after first loop iteration */
	3867
	3868	PERL_ARGS_ASSERT__MEM_COLLXFRM;
	3869
	3870	/* Must be NUL-terminated */
	3871	assert(*(input_string + len) == '\0');
	3872
	3873	/* If this locale has defective collation, skip */
	3874	if (PL_collxfrm_base == 0 && PL_collxfrm_mult == 0) {
	3875	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3876	"_mem_collxfrm: locale's collation is defective\n"));
	3877	goto bad;
	3878	}
	3879
	3880	/* Replace any embedded NULs with the control that sorts before any others.
	3881	* This will give as good as possible results on strings that don't
	3882	* otherwise contain that character, but otherwise there may be
	3883	* less-than-perfect results with that character and NUL. This is
	3884	* unavoidable unless we replace strxfrm with our own implementation. */
	3885	if (UNLIKELY(s_strlen < len)) { /* Only execute if there is an embedded
	3886	NUL */
	3887	char * e = s + len;
	3888	char * sans_nuls;
	3889	STRLEN sans_nuls_len;
	3890	int try_non_controls;
	3891	char this_replacement_char[] = "?\0"; /* Room for a two-byte string,
	3892	making sure 2nd byte is NUL.
	3893	*/
	3894	STRLEN this_replacement_len;
	3895
	3896	/* If we don't know what non-NUL control character sorts lowest for
	3897	* this locale, find it */
	3898	if (PL_strxfrm_NUL_replacement == '\0') {
	3899	int j;
	3900	char * cur_min_x = NULL; /* The min_char's xfrm, (except it also
	3901	includes the collation index
	3902	prefixed. */
	3903
	3904	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "Looking to replace NUL\n"));
	3905
	3906	/* Unlikely, but it may be that no control will work to replace
	3907	* NUL, in which case we instead look for any character. Controls
	3908	* are preferred because collation order is, in general, context
	3909	* sensitive, with adjoining characters affecting the order, and
	3910	* controls are less likely to have such interactions, allowing the
	3911	* NUL-replacement to stand on its own. (Another way to look at it
	3912	* is to imagine what would happen if the NUL were replaced by a
	3913	* combining character; it wouldn't work out all that well.) */
	3914	for (try_non_controls = 0;
	3915	try_non_controls < 2;
	3916	try_non_controls++)
	3917	{
	3918	/* Look through all legal code points (NUL isn't) */
	3919	for (j = 1; j < 256; j++) {
	3920	char * x; /* j's xfrm plus collation index */
	3921	STRLEN x_len; /* length of 'x' */
	3922	STRLEN trial_len = 1;
	3923	char cur_source[] = { '\0', '\0' };
	3924
	3925	/* Skip non-controls the first time through the loop. The
	3926	* controls in a UTF-8 locale are the L1 ones */
	3927	if (! try_non_controls && (PL_in_utf8_COLLATE_locale)
	3928	? ! isCNTRL_L1(j)
	3929	: ! isCNTRL_LC(j))
	3930	{
	3931	continue;
	3932	}
	3933
	3934	/* Create a 1-char string of the current code point */
	3935	cur_source[0] = (char) j;
	3936
	3937	/* Then transform it */
	3938	x = _mem_collxfrm(cur_source, trial_len, &x_len,
	3939	0 /* The string is not in UTF-8 */);
	3940
	3941	/* Ignore any character that didn't successfully transform.
	3942	* */
	3943	if (! x) {
	3944	continue;
	3945	}
	3946
	3947	/* If this character's transformation is lower than
	3948	* the current lowest, this one becomes the lowest */
	3949	if ( cur_min_x == NULL
	3950	\|\| strLT(x + COLLXFRM_HDR_LEN,
	3951	cur_min_x + COLLXFRM_HDR_LEN))
	3952	{
	3953	PL_strxfrm_NUL_replacement = j;
	3954	Safefree(cur_min_x);
	3955	cur_min_x = x;
	3956	}
	3957	else {
	3958	Safefree(x);
	3959	}
	3960	} /* end of loop through all 255 characters */
	3961
	3962	/* Stop looking if found */
	3963	if (cur_min_x) {
	3964	break;
	3965	}
	3966
	3967	/* Unlikely, but possible, if there aren't any controls that
	3968	* work in the locale, repeat the loop, looking for any
	3969	* character that works */
	3970	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3971	"_mem_collxfrm: No control worked. Trying non-controls\n"));
	3972	} /* End of loop to try first the controls, then any char */
	3973
	3974	if (! cur_min_x) {
	3975	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3976	"_mem_collxfrm: Couldn't find any character to replace"
	3977	" embedded NULs in locale %s with", PL_collation_name));
	3978	goto bad;
	3979	}
	3980
	3981	DEBUG_L(PerlIO_printf(Perl_debug_log,
	3982	"_mem_collxfrm: Replacing embedded NULs in locale %s with "
	3983	"0x%02X\n", PL_collation_name, PL_strxfrm_NUL_replacement));
	3984
	3985	Safefree(cur_min_x);
	3986	} /* End of determining the character that is to replace NULs */
	3987
	3988	/* If the replacement is variant under UTF-8, it must match the
	3989	* UTF8-ness of the original */
	3990	if ( ! UVCHR_IS_INVARIANT(PL_strxfrm_NUL_replacement) && utf8) {
	3991	this_replacement_char[0] =
	3992	UTF8_EIGHT_BIT_HI(PL_strxfrm_NUL_replacement);
	3993	this_replacement_char[1] =
	3994	UTF8_EIGHT_BIT_LO(PL_strxfrm_NUL_replacement);
	3995	this_replacement_len = 2;
	3996	}
	3997	else {
	3998	this_replacement_char[0] = PL_strxfrm_NUL_replacement;
	3999	/* this_replacement_char[1] = '\0' was done at initialization */
	4000	this_replacement_len = 1;
	4001	}
	4002
	4003	/* The worst case length for the replaced string would be if every
	4004	* character in it is NUL. Multiply that by the length of each
	4005	* replacement, and allow for a trailing NUL */
	4006	sans_nuls_len = (len * this_replacement_len) + 1;
	4007	Newx(sans_nuls, sans_nuls_len, char);
	4008	*sans_nuls = '\0';
	4009
	4010	/* Replace each NUL with the lowest collating control. Loop until have
	4011	* exhausted all the NULs */
	4012	while (s + s_strlen < e) {
	4013	my_strlcat(sans_nuls, s, sans_nuls_len);
	4014
	4015	/* Do the actual replacement */
	4016	my_strlcat(sans_nuls, this_replacement_char, sans_nuls_len);
	4017
	4018	/* Move past the input NUL */
	4019	s += s_strlen + 1;
	4020	s_strlen = strlen(s);
	4021	}
	4022
	4023	/* And add anything that trails the final NUL */
	4024	my_strlcat(sans_nuls, s, sans_nuls_len);
	4025
	4026	/* Switch so below we transform this modified string */
	4027	s = sans_nuls;
	4028	len = strlen(s);
	4029	} /* End of replacing NULs */
	4030
	4031	/* Make sure the UTF8ness of the string and locale match */
	4032	if (utf8 != PL_in_utf8_COLLATE_locale) {
	4033	/* XXX convert above Unicode to 10FFFF? */
	4034	const char * const t = s; /* Temporary so we can later find where the
	4035	input was */
	4036
	4037	/* Here they don't match. Change the string's to be what the locale is
	4038	* expecting */
	4039
	4040	if (! utf8) { /* locale is UTF-8, but input isn't; upgrade the input */
	4041	s = (char ) bytes_to_utf8((const U8 ) s, &len);
	4042	utf8 = TRUE;
	4043	}
	4044	else { /* locale is not UTF-8; but input is; downgrade the input */
	4045
	4046	s = (char ) bytes_from_utf8((const U8 ) s, &len, &utf8);
	4047
	4048	/* If the downgrade was successful we are done, but if the input
	4049	* contains things that require UTF-8 to represent, have to do
	4050	* damage control ... */
	4051	if (UNLIKELY(utf8)) {
	4052
	4053	/* What we do is construct a non-UTF-8 string with
	4054	* 1) the characters representable by a single byte converted
	4055	* to be so (if necessary);
	4056	* 2) and the rest converted to collate the same as the
	4057	* highest collating representable character. That makes
	4058	* them collate at the end. This is similar to how we
	4059	* handle embedded NULs, but we use the highest collating
	4060	* code point instead of the smallest. Like the NUL case,
	4061	* this isn't perfect, but is the best we can reasonably
	4062	* do. Every above-255 code point will sort the same as
	4063	* the highest-sorting 0-255 code point. If that code
	4064	* point can combine in a sequence with some other code
	4065	* points for weight calculations, us changing something to
	4066	* be it can adversely affect the results. But in most
	4067	* cases, it should work reasonably. And note that this is
	4068	* really an illegal situation: using code points above 255
	4069	* on a locale where only 0-255 are valid. If two strings
	4070	* sort entirely equal, then the sort order for the
	4071	* above-255 code points will be in code point order. */
	4072
	4073	utf8 = FALSE;
	4074
	4075	/* If we haven't calculated the code point with the maximum
	4076	* collating order for this locale, do so now */
	4077	if (! PL_strxfrm_max_cp) {
	4078	int j;
	4079
	4080	/* The current transformed string that collates the
	4081	* highest (except it also includes the prefixed collation
	4082	* index. */
	4083	char * cur_max_x = NULL;
	4084
	4085	/* Look through all legal code points (NUL isn't) */
	4086	for (j = 1; j < 256; j++) {
	4087	char * x;
	4088	STRLEN x_len;
	4089	char cur_source[] = { '\0', '\0' };
	4090
	4091	/* Create a 1-char string of the current code point */
	4092	cur_source[0] = (char) j;
	4093
	4094	/* Then transform it */
	4095	x = _mem_collxfrm(cur_source, 1, &x_len, FALSE);
	4096
	4097	/* If something went wrong (which it shouldn't), just
	4098	* ignore this code point */
	4099	if (! x) {
	4100	continue;
	4101	}
	4102
	4103	/* If this character's transformation is higher than
	4104	* the current highest, this one becomes the highest */
	4105	if ( cur_max_x == NULL
	4106	\|\| strGT(x + COLLXFRM_HDR_LEN,
	4107	cur_max_x + COLLXFRM_HDR_LEN))
	4108	{
	4109	PL_strxfrm_max_cp = j;
	4110	Safefree(cur_max_x);
	4111	cur_max_x = x;
	4112	}
	4113	else {
	4114	Safefree(x);
	4115	}
	4116	}
	4117
	4118	if (! cur_max_x) {
	4119	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4120	"_mem_collxfrm: Couldn't find any character to"
	4121	" replace above-Latin1 chars in locale %s with",
	4122	PL_collation_name));
	4123	goto bad;
	4124	}
	4125
	4126	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4127	"_mem_collxfrm: highest 1-byte collating character"
	4128	" in locale %s is 0x%02X\n",
	4129	PL_collation_name,
	4130	PL_strxfrm_max_cp));
	4131
	4132	Safefree(cur_max_x);
	4133	}
	4134
	4135	/* Here we know which legal code point collates the highest.
	4136	* We are ready to construct the non-UTF-8 string. The length
	4137	* will be at least 1 byte smaller than the input string
	4138	* (because we changed at least one 2-byte character into a
	4139	* single byte), but that is eaten up by the trailing NUL */
	4140	Newx(s, len, char);
	4141
	4142	{
	4143	STRLEN i;
	4144	STRLEN d= 0;
	4145	char * e = (char *) t + len;
	4146
	4147	for (i = 0; i < len; i+= UTF8SKIP(t + i)) {
	4148	U8 cur_char = t[i];
	4149	if (UTF8_IS_INVARIANT(cur_char)) {
	4150	s[d++] = cur_char;
	4151	}
	4152	else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(t + i, e)) {
	4153	s[d++] = EIGHT_BIT_UTF8_TO_NATIVE(cur_char, t[i+1]);
	4154	}
	4155	else { /* Replace illegal cp with highest collating
	4156	one */
	4157	s[d++] = PL_strxfrm_max_cp;
	4158	}
	4159	}
	4160	s[d++] = '\0';
	4161	Renew(s, d, char); /* Free up unused space */
	4162	}
	4163	}
	4164	}
	4165
	4166	/* Here, we have constructed a modified version of the input. It could
	4167	* be that we already had a modified copy before we did this version.
	4168	* If so, that copy is no longer needed */
	4169	if (t != input_string) {
	4170	Safefree(t);
	4171	}
	4172	}
	4173
	4174	length_in_chars = (utf8)
	4175	? utf8_length((U8 ) s, (U8 ) s + len)
	4176	: len;
	4177
	4178	/* The first element in the output is the collation id, used by
	4179	* sv_collxfrm(); then comes the space for the transformed string. The
	4180	* equation should give us a good estimate as to how much is needed */
	4181	xAlloc = COLLXFRM_HDR_LEN
	4182	+ PL_collxfrm_base
	4183	+ (PL_collxfrm_mult * length_in_chars);
	4184	Newx(xbuf, xAlloc, char);
	4185	if (UNLIKELY(! xbuf)) {
	4186	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4187	"_mem_collxfrm: Couldn't malloc %zu bytes\n", xAlloc));
	4188	goto bad;
	4189	}
	4190
	4191	/* Store the collation id */
	4192	(U32)xbuf = PL_collation_ix;
	4193
	4194	/* Then the transformation of the input. We loop until successful, or we
	4195	* give up */
	4196	for (;;) {
	4197
	4198	*xlen = strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN);
	4199
	4200	/* If the transformed string occupies less space than we told strxfrm()
	4201	* was available, it means it successfully transformed the whole
	4202	* string. */
	4203	if (*xlen < xAlloc - COLLXFRM_HDR_LEN) {
	4204
	4205	/* Some systems include a trailing NUL in the returned length.
	4206	* Ignore it, using a loop in case multiple trailing NULs are
	4207	* returned. */
	4208	while ( (*xlen) > 0
	4209	&& (xbuf + COLLXFRM_HDR_LEN + (xlen) - 1) == '\0')
	4210	{
	4211	(*xlen)--;
	4212	}
	4213
	4214	/* If the first try didn't get it, it means our prediction was low.
	4215	* Modify the coefficients so that we predict a larger value in any
	4216	* future transformations */
	4217	if (! first_time) {
	4218	STRLEN needed = xlen + 1; / +1 For trailing NUL */
	4219	STRLEN computed_guess = PL_collxfrm_base
	4220	+ (PL_collxfrm_mult * length_in_chars);
	4221
	4222	/* On zero-length input, just keep current slope instead of
	4223	* dividing by 0 */
	4224	const STRLEN new_m = (length_in_chars != 0)
	4225	? needed / length_in_chars
	4226	: PL_collxfrm_mult;
	4227
	4228	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	4229	"%s: %d: initial size of %zu bytes for a length "
	4230	"%zu string was insufficient, %zu needed\n",
	4231	__FILE__, __LINE__,
	4232	computed_guess, length_in_chars, needed));
	4233
	4234	/* If slope increased, use it, but discard this result for
	4235	* length 1 strings, as we can't be sure that it's a real slope
	4236	* change */
	4237	if (length_in_chars > 1 && new_m > PL_collxfrm_mult) {
	4238
	4239	# ifdef DEBUGGING
	4240
	4241	STRLEN old_m = PL_collxfrm_mult;
	4242	STRLEN old_b = PL_collxfrm_base;
	4243
	4244	# endif
	4245
	4246	PL_collxfrm_mult = new_m;
	4247	PL_collxfrm_base = 1; /* +1 For trailing NUL */
	4248	computed_guess = PL_collxfrm_base
	4249	+ (PL_collxfrm_mult * length_in_chars);
	4250	if (computed_guess < needed) {
	4251	PL_collxfrm_base += needed - computed_guess;
	4252	}
	4253
	4254	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	4255	"%s: %d: slope is now %zu; was %zu, base "
	4256	"is now %zu; was %zu\n",
	4257	__FILE__, __LINE__,
	4258	PL_collxfrm_mult, old_m,
	4259	PL_collxfrm_base, old_b));
	4260	}
	4261	else { /* Slope didn't change, but 'b' did */
	4262	const STRLEN new_b = needed
	4263	- computed_guess
	4264	+ PL_collxfrm_base;
	4265	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	4266	"%s: %d: base is now %zu; was %zu\n",
	4267	__FILE__, __LINE__,
	4268	new_b, PL_collxfrm_base));
	4269	PL_collxfrm_base = new_b;
	4270	}
	4271	}
	4272
	4273	break;
	4274	}
	4275
	4276	if (UNLIKELY(*xlen >= PERL_INT_MAX)) {
	4277	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4278	"_mem_collxfrm: Needed %zu bytes, max permissible is %u\n",
	4279	*xlen, PERL_INT_MAX));
	4280	goto bad;
	4281	}
	4282
	4283	/* A well-behaved strxfrm() returns exactly how much space it needs
	4284	* (usually not including the trailing NUL) when it fails due to not
	4285	* enough space being provided. Assume that this is the case unless
	4286	* it's been proven otherwise */
	4287	if (LIKELY(PL_strxfrm_is_behaved) && first_time) {
	4288	xAlloc = *xlen + COLLXFRM_HDR_LEN + 1;
	4289	}
	4290	else { /* Here, either:
	4291	* 1) The strxfrm() has previously shown bad behavior; or
	4292	* 2) It isn't the first time through the loop, which means
	4293	* that the strxfrm() is now showing bad behavior, because
	4294	* we gave it what it said was needed in the previous
	4295	* iteration, and it came back saying it needed still more.
	4296	* (Many versions of cygwin fit this. When the buffer size
	4297	* isn't sufficient, they return the input size instead of
	4298	* how much is needed.)
	4299	* Increase the buffer size by a fixed percentage and try again.
	4300	* */
	4301	xAlloc += (xAlloc / 4) + 1;
	4302	PL_strxfrm_is_behaved = FALSE;
	4303
	4304	# ifdef DEBUGGING
	4305
	4306	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	4307	PerlIO_printf(Perl_debug_log,
	4308	"_mem_collxfrm required more space than previously calculated"
	4309	" for locale %s, trying again with new guess=%d+%zu\n",
	4310	PL_collation_name, (int) COLLXFRM_HDR_LEN,
	4311	xAlloc - COLLXFRM_HDR_LEN);
	4312	}
	4313
	4314	# endif
	4315
	4316	}
	4317
	4318	Renew(xbuf, xAlloc, char);
	4319	if (UNLIKELY(! xbuf)) {
	4320	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4321	"_mem_collxfrm: Couldn't realloc %zu bytes\n", xAlloc));
	4322	goto bad;
	4323	}
	4324
	4325	first_time = FALSE;
	4326	}
	4327
	4328
	4329	# ifdef DEBUGGING
	4330
	4331	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	4332
	4333	print_collxfrm_input_and_return(s, s + len, xlen, utf8);
	4334	PerlIO_printf(Perl_debug_log, "Its xfrm is:");
	4335	PerlIO_printf(Perl_debug_log, "%s\n",
	4336	_byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN,
	4337	*xlen, 1));
	4338	}
	4339
	4340	# endif
	4341
	4342	/* Free up unneeded space; retain ehough for trailing NUL */
	4343	Renew(xbuf, COLLXFRM_HDR_LEN + *xlen + 1, char);
	4344
	4345	if (s != input_string) {
	4346	Safefree(s);
	4347	}
	4348
	4349	return xbuf;
	4350
	4351	bad:
	4352	Safefree(xbuf);
	4353	if (s != input_string) {
	4354	Safefree(s);
	4355	}
	4356	*xlen = 0;
	4357
	4358	# ifdef DEBUGGING
	4359
	4360	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	4361	print_collxfrm_input_and_return(s, s + len, NULL, utf8);
	4362	}
	4363
	4364	# endif
	4365
	4366	return NULL;
	4367	}
	4368
	4369	# ifdef DEBUGGING
	4370
	4371	STATIC void
	4372	S_print_collxfrm_input_and_return(pTHX_
	4373	const char * const s,
	4374	const char * const e,
	4375	const STRLEN * const xlen,
	4376	const bool is_utf8)
	4377	{
	4378
	4379	PERL_ARGS_ASSERT_PRINT_COLLXFRM_INPUT_AND_RETURN;
	4380
	4381	PerlIO_printf(Perl_debug_log, "_mem_collxfrm[%" UVuf "]: returning ",
	4382	(UV)PL_collation_ix);
	4383	if (xlen) {
	4384	PerlIO_printf(Perl_debug_log, "%zu", *xlen);
	4385	}
	4386	else {
	4387	PerlIO_printf(Perl_debug_log, "NULL");
	4388	}
	4389	PerlIO_printf(Perl_debug_log, " for locale '%s', string='",
	4390	PL_collation_name);
	4391	print_bytes_for_locale(s, e, is_utf8);
	4392
	4393	PerlIO_printf(Perl_debug_log, "'\n");
	4394	}
	4395
	4396	# endif /* DEBUGGING */
	4397	#endif /* USE_LOCALE_COLLATE */
	4398	#ifdef USE_LOCALE
	4399	# ifdef DEBUGGING
	4400
	4401	STATIC void
	4402	S_print_bytes_for_locale(pTHX_
	4403	const char * const s,
	4404	const char * const e,
	4405	const bool is_utf8)
	4406	{
	4407	const char * t = s;
	4408	bool prev_was_printable = TRUE;
	4409	bool first_time = TRUE;
	4410
	4411	PERL_ARGS_ASSERT_PRINT_BYTES_FOR_LOCALE;
	4412
	4413	while (t < e) {
	4414	UV cp = (is_utf8)
	4415	? utf8_to_uvchr_buf((U8 *) t, e, NULL)
	4416	: * (U8 *) t;
	4417	if (isPRINT(cp)) {
	4418	if (! prev_was_printable) {
	4419	PerlIO_printf(Perl_debug_log, " ");
	4420	}
	4421	PerlIO_printf(Perl_debug_log, "%c", (U8) cp);
	4422	prev_was_printable = TRUE;
	4423	}
	4424	else {
	4425	if (! first_time) {
	4426	PerlIO_printf(Perl_debug_log, " ");
	4427	}
	4428	PerlIO_printf(Perl_debug_log, "%02" UVXf, cp);
	4429	prev_was_printable = FALSE;
	4430	}
	4431	t += (is_utf8) ? UTF8SKIP(t) : 1;
	4432	first_time = FALSE;
	4433	}
	4434	}
	4435
	4436	# endif /* #ifdef DEBUGGING */
	4437
	4438	STATIC const char *
	4439	S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale)
	4440	{
	4441	/* Changes the locale for LC_'switch_category" to that of
	4442	* LC_'template_category', if they aren't already the same. If not NULL,
	4443	* 'template_locale' is the locale that 'template_category' is in.
	4444	*
	4445	* Returns a copy of the name of the original locale for 'switch_category'
	4446	* so can be switched back to with the companion function
	4447	* restore_switched_locale(), (NULL if no restoral is necessary.) */
	4448
	4449	char * restore_to_locale = NULL;
	4450
	4451	if (switch_category == template_category) { /* No changes needed */
	4452	return NULL;
	4453	}
	4454
	4455	/* Find the original locale of the category we may need to change, so that
	4456	* it can be restored to later */
	4457	restore_to_locale = stdize_locale(savepv(do_setlocale_r(switch_category,
	4458	NULL)));
	4459	if (! restore_to_locale) {
	4460	Perl_croak(aTHX_
	4461	"panic: %s: %d: Could not find current %s locale, errno=%d\n",
	4462	__FILE__, __LINE__, category_name(switch_category), errno);
	4463	}
	4464
	4465	/* If the locale of the template category wasn't passed in, find it now */
	4466	if (template_locale == NULL) {
	4467	template_locale = do_setlocale_r(template_category, NULL);
	4468	if (! template_locale) {
	4469	Perl_croak(aTHX_
	4470	"panic: %s: %d: Could not find current %s locale, errno=%d\n",
	4471	__FILE__, __LINE__, category_name(template_category), errno);
	4472	}
	4473	}
	4474
	4475	/* It the locales are the same, there's nothing to do */
	4476	if (strEQ(restore_to_locale, template_locale)) {
	4477	Safefree(restore_to_locale);
	4478
	4479	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale unchanged as %s\n",
	4480	category_name(switch_category), restore_to_locale));
	4481
	4482	return NULL;
	4483	}
	4484
	4485	/* Finally, change the locale to the template one */
	4486	if (! do_setlocale_r(switch_category, template_locale)) {
	4487	Perl_croak(aTHX_
	4488	"panic: %s: %d: Could not change %s locale to %s, errno=%d\n",
	4489	__FILE__, __LINE__, category_name(switch_category),
	4490	template_locale, errno);
	4491	}
	4492
	4493	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale switched to %s\n",
	4494	category_name(switch_category), template_locale));
	4495
	4496	return restore_to_locale;
	4497	}
	4498
	4499	STATIC void
	4500	S_restore_switched_locale(pTHX_ const int category, const char * const original_locale)
	4501	{
	4502	/* Restores the locale for LC_'category' to 'original_locale' (which is a
	4503	* copy that will be freed by this function), or do nothing if the latter
	4504	* parameter is NULL */
	4505
	4506	if (original_locale == NULL) {
	4507	return;
	4508	}
	4509
	4510	if (! do_setlocale_r(category, original_locale)) {
	4511	Perl_croak(aTHX_
	4512	"panic: %s: %d: setlocale %s restore to %s failed, errno=%d\n",
	4513	__FILE__, __LINE__,
	4514	category_name(category), original_locale, errno);
	4515	}
	4516
	4517	Safefree(original_locale);
	4518	}
	4519
	4520	/* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */
	4521	#define CUR_LC_BUFFER_SIZE 64
	4522
	4523	bool
	4524	Perl__is_cur_LC_category_utf8(pTHX_ int category)
	4525	{
	4526	/* Returns TRUE if the current locale for 'category' is UTF-8; FALSE
	4527	* otherwise. 'category' may not be LC_ALL. If the platform doesn't have
	4528	* nl_langinfo(), nor MB_CUR_MAX, this employs a heuristic, which hence
	4529	* could give the wrong result. The result will very likely be correct for
	4530	* languages that have commonly used non-ASCII characters, but for notably
	4531	* English, it comes down to if the locale's name ends in something like
	4532	* "UTF-8". It errs on the side of not being a UTF-8 locale.
	4533	*
	4534	* If the platform is early C89, not containing mbtowc(), or we are
	4535	* compiled to not pay attention to LC_CTYPE, this employs heuristics.
	4536	* These work very well for non-Latin locales or those whose currency
	4537	* symbol isn't a '$' nor plain ASCII text. But without LC_CTYPE and at
	4538	* least MB_CUR_MAX, English locales with an ASCII currency symbol depend
	4539	* on the name containing UTF-8 or not. */
	4540
	4541	/* Name of current locale corresponding to the input category */
	4542	const char *save_input_locale = NULL;
	4543
	4544	bool is_utf8 = FALSE; /* The return value */
	4545
	4546	/* The variables below are for the cache of previous lookups using this
	4547	* function. The cache is a C string, described at the definition for
	4548	* 'C_and_POSIX_utf8ness'.
	4549	*
	4550	* The first part of the cache is fixed, for the C and POSIX locales. The
	4551	* varying part starts just after them. */
	4552	char * utf8ness_cache = PL_locale_utf8ness + STRLENs(C_and_POSIX_utf8ness);
	4553
	4554	Size_t utf8ness_cache_size; /* Size of the varying portion */
	4555	Size_t input_name_len; /* Length in bytes of save_input_locale */
	4556	Size_t input_name_len_with_overhead; /* plus extra chars used to store
	4557	the name in the cache */
	4558	char * delimited; /* The name plus the delimiters used to store
	4559	it in the cache */
	4560	char buffer[CUR_LC_BUFFER_SIZE]; /* small buffer */
	4561	char * name_pos; /* position of 'delimited' in the cache, or 0
	4562	if not there */
	4563
	4564
	4565	# ifdef LC_ALL
	4566
	4567	assert(category != LC_ALL);
	4568
	4569	# endif
	4570
	4571	/* Get the desired category's locale */
	4572	save_input_locale = stdize_locale(savepv(do_setlocale_r(category, NULL)));
	4573	if (! save_input_locale) {
	4574	Perl_croak(aTHX_
	4575	"panic: %s: %d: Could not find current %s locale, errno=%d\n",
	4576	__FILE__, __LINE__, category_name(category), errno);
	4577	}
	4578
	4579	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4580	"Current locale for %s is %s\n",
	4581	category_name(category), save_input_locale));
	4582
	4583	input_name_len = strlen(save_input_locale);
	4584
	4585	/* In our cache, each name is accompanied by two delimiters and a single
	4586	* utf8ness digit */
	4587	input_name_len_with_overhead = input_name_len + 3;
	4588
	4589	if ( input_name_len_with_overhead <= CUR_LC_BUFFER_SIZE ) {
	4590	/* we can use the buffer, avoid a malloc */
	4591	delimited = buffer;
	4592	} else { /* need a malloc */
	4593	/* Allocate and populate space for a copy of the name surrounded by the
	4594	* delimiters */
	4595	Newx(delimited, input_name_len_with_overhead, char);
	4596	}
	4597
	4598	delimited[0] = UTF8NESS_SEP[0];
	4599	Copy(save_input_locale, delimited + 1, input_name_len, char);
	4600	delimited[input_name_len+1] = UTF8NESS_PREFIX[0];
	4601	delimited[input_name_len+2] = '\0';
	4602
	4603	/* And see if that is in the cache */
	4604	name_pos = instr(PL_locale_utf8ness, delimited);
	4605	if (name_pos) {
	4606	is_utf8 = *(name_pos + input_name_len_with_overhead - 1) - '0';
	4607
	4608	# ifdef DEBUGGING
	4609
	4610	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	4611	PerlIO_printf(Perl_debug_log, "UTF8ness for locale %s=%d, \n",
	4612	save_input_locale, is_utf8);
	4613	}
	4614
	4615	# endif
	4616
	4617	/* And, if not already in that position, move it to the beginning of
	4618	* the non-constant portion of the list, since it is the most recently
	4619	* used. (We don't have to worry about overflow, since just moving
	4620	* existing names around) */
	4621	if (name_pos > utf8ness_cache) {
	4622	Move(utf8ness_cache,
	4623	utf8ness_cache + input_name_len_with_overhead,
	4624	name_pos - utf8ness_cache, char);
	4625	Copy(delimited,
	4626	utf8ness_cache,
	4627	input_name_len_with_overhead - 1, char);
	4628	utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
	4629	}
	4630
	4631	/* free only when not using the buffer */
	4632	if ( delimited != buffer ) Safefree(delimited);
	4633	Safefree(save_input_locale);
	4634	return is_utf8;
	4635	}
	4636
	4637	/* Here we don't have stored the utf8ness for the input locale. We have to
	4638	* calculate it */
	4639
	4640	# if defined(USE_LOCALE_CTYPE) \
	4641	&& ( defined(HAS_NL_LANGINFO) \
	4642	\|\| (defined(HAS_MBTOWC) \|\| defined(HAS_MBRTOWC)))
	4643
	4644	{
	4645	const char *original_ctype_locale
	4646	= switch_category_locale_to_template(LC_CTYPE,
	4647	category,
	4648	save_input_locale);
	4649
	4650	/* Here the current LC_CTYPE is set to the locale of the category whose
	4651	* information is desired. This means that nl_langinfo() and mbtowc()
	4652	* should give the correct results */
	4653
	4654	# ifdef MB_CUR_MAX /* But we can potentially rule out UTF-8ness, avoiding
	4655	calling the functions if we have this */
	4656
	4657	/* Standard UTF-8 needs at least 4 bytes to represent the maximum
	4658	* Unicode code point. */
	4659
	4660	DEBUG_L(PerlIO_printf(Perl_debug_log, "%s: %d: MB_CUR_MAX=%d\n",
	4661	__FILE__, __LINE__, (int) MB_CUR_MAX));
	4662	if ((unsigned) MB_CUR_MAX < STRLENs(MAX_UNICODE_UTF8)) {
	4663	is_utf8 = FALSE;
	4664	restore_switched_locale(LC_CTYPE, original_ctype_locale);
	4665	goto finish_and_return;
	4666	}
	4667
	4668	# endif
	4669	# if defined(HAS_NL_LANGINFO)
	4670
	4671	{ /* The task is easiest if the platform has this POSIX 2001 function.
	4672	Except on some platforms it can wrongly return "", so have to have
	4673	a fallback. And it can return that it's UTF-8, even if there are
	4674	variances from that. For example, Turkish locales may use the
	4675	alternate dotted I rules, and sometimes it appears to be a
	4676	defective locale definition. XXX We should probably check for
	4677	these in the Latin1 range and warn (but on glibc, requires
	4678	iswalnum() etc. due to their not handling 80-FF correctly */
	4679	const char *codeset = my_nl_langinfo(CODESET, FALSE);
	4680	/* FALSE => already in dest locale */
	4681
	4682	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	4683	"\tnllanginfo returned CODESET '%s'\n", codeset));
	4684
	4685	if (codeset && strNE(codeset, "")) {
	4686
	4687	/* If the implementation of foldEQ() somehow were
	4688	* to change to not go byte-by-byte, this could
	4689	* read past end of string, as only one length is
	4690	* checked. But currently, a premature NUL will
	4691	* compare false, and it will stop there */
	4692	is_utf8 = cBOOL( foldEQ(codeset, STR_WITH_LEN("UTF-8"))
	4693	\|\| foldEQ(codeset, STR_WITH_LEN("UTF8")));
	4694
	4695	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4696	"\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n",
	4697	codeset, is_utf8));
	4698	restore_switched_locale(LC_CTYPE, original_ctype_locale);
	4699	goto finish_and_return;
	4700	}
	4701	}
	4702
	4703	# endif
	4704	# if defined(HAS_MBTOWC) \|\| defined(HAS_MBRTOWC)
	4705	/* We can see if this is a UTF-8-like locale if have mbtowc(). It was a
	4706	* late adder to C89, so very likely to have it. However, testing has
	4707	* shown that, like nl_langinfo() above, there are locales that are not
	4708	* strictly UTF-8 that this will return that they are */
	4709
	4710	{
	4711	wchar_t wc;
	4712	int len;
	4713	dSAVEDERRNO;
	4714
	4715	# if defined(HAS_MBRTOWC) && defined(USE_ITHREADS)
	4716
	4717	mbstate_t ps;
	4718
	4719	# endif
	4720
	4721	/* mbrtowc() and mbtowc() convert a byte string to a wide
	4722	* character. Feed a byte string to one of them and check that the
	4723	* result is the expected Unicode code point */
	4724
	4725	# if defined(HAS_MBRTOWC) && defined(USE_ITHREADS)
	4726	/* Prefer this function if available, as it's reentrant */
	4727
	4728	memset(&ps, 0, sizeof(ps));;
	4729	PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift
	4730	state */
	4731	SETERRNO(0, 0);
	4732	len = mbrtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8), &ps);
	4733	SAVE_ERRNO;
	4734
	4735	# else
	4736
	4737	LOCALE_LOCK;
	4738	PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
	4739	SETERRNO(0, 0);
	4740	len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
	4741	SAVE_ERRNO;
	4742	LOCALE_UNLOCK;
	4743
	4744	# endif
	4745
	4746	RESTORE_ERRNO;
	4747	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	4748	"\treturn from mbtowc; len=%d; code_point=%x; errno=%d\n",
	4749	len, (unsigned int) wc, GET_ERRNO));
	4750
	4751	is_utf8 = cBOOL( len == STRLENs(REPLACEMENT_CHARACTER_UTF8)
	4752	&& wc == (wchar_t) UNICODE_REPLACEMENT);
	4753	}
	4754
	4755	# endif
	4756
	4757	restore_switched_locale(LC_CTYPE, original_ctype_locale);
	4758	goto finish_and_return;
	4759	}
	4760
	4761	# else
	4762
	4763	/* Here, we must have a C89 compiler that doesn't have mbtowc(). Next
	4764	* try looking at the currency symbol to see if it disambiguates
	4765	* things. Often that will be in the native script, and if the symbol
	4766	* isn't in UTF-8, we know that the locale isn't. If it is non-ASCII
	4767	* UTF-8, we infer that the locale is too, as the odds of a non-UTF8
	4768	* string being valid UTF-8 are quite small */
	4769
	4770	# ifdef USE_LOCALE_MONETARY
	4771
	4772	/* If have LC_MONETARY, we can look at the currency symbol. Often that
	4773	* will be in the native script. We do this one first because there is
	4774	* just one string to examine, so potentially avoids work */
	4775
	4776	{
	4777	const char *original_monetary_locale
	4778	= switch_category_locale_to_template(LC_MONETARY,
	4779	category,
	4780	save_input_locale);
	4781	bool only_ascii = FALSE;
	4782	const U8 * currency_string
	4783	= (const U8 *) my_nl_langinfo(CRNCYSTR, FALSE);
	4784	/* 2nd param not relevant for this item */
	4785	const U8 * first_variant;
	4786
	4787	assert( *currency_string == '-'
	4788	\|\| *currency_string == '+'
	4789	\|\| *currency_string == '.');
	4790
	4791	currency_string++;
	4792
	4793	if (is_utf8_invariant_string_loc(currency_string, 0, &first_variant))
	4794	{
	4795	DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	4796	only_ascii = TRUE;
	4797	}
	4798	else {
	4799	is_utf8 = is_strict_utf8_string(first_variant, 0);
	4800	}
	4801
	4802	restore_switched_locale(LC_MONETARY, original_monetary_locale);
	4803
	4804	if (! only_ascii) {
	4805
	4806	/* It isn't a UTF-8 locale if the symbol is not legal UTF-8;
	4807	* otherwise assume the locale is UTF-8 if and only if the symbol
	4808	* is non-ascii UTF-8. */
	4809	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n",
	4810	save_input_locale, is_utf8));
	4811	goto finish_and_return;
	4812	}
	4813	}
	4814
	4815	# endif /* USE_LOCALE_MONETARY */
	4816	# if defined(HAS_STRFTIME) && defined(USE_LOCALE_TIME)
	4817
	4818	/* Still haven't found a non-ASCII string to disambiguate UTF-8 or not. Try
	4819	* the names of the months and weekdays, timezone, and am/pm indicator */
	4820	{
	4821	const char *original_time_locale
	4822	= switch_category_locale_to_template(LC_TIME,
	4823	category,
	4824	save_input_locale);
	4825	int hour = 10;
	4826	bool is_dst = FALSE;
	4827	int dom = 1;
	4828	int month = 0;
	4829	int i;
	4830	char * formatted_time;
	4831
	4832	/* Here the current LC_TIME is set to the locale of the category
	4833	* whose information is desired. Look at all the days of the week and
	4834	* month names, and the timezone and am/pm indicator for UTF-8 variant
	4835	* characters. The first such a one found will tell us if the locale
	4836	* is UTF-8 or not */
	4837
	4838	for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */
	4839	formatted_time = my_strftime("%A %B %Z %p",
	4840	0, 0, hour, dom, month, 2012 - 1900, 0, 0, is_dst);
	4841	if ( ! formatted_time
	4842	\|\| is_utf8_invariant_string((U8 *) formatted_time, 0))
	4843	{
	4844
	4845	/* Here, we didn't find a non-ASCII. Try the next time through
	4846	* with the complemented dst and am/pm, and try with the next
	4847	* weekday. After we have gotten all weekdays, try the next
	4848	* month */
	4849	is_dst = ! is_dst;
	4850	hour = (hour + 12) % 24;
	4851	dom++;
	4852	if (i > 6) {
	4853	month++;
	4854	}
	4855	continue;
	4856	}
	4857
	4858	/* Here, we have a non-ASCII. Return TRUE is it is valid UTF8;
	4859	* false otherwise. But first, restore LC_TIME to its original
	4860	* locale if we changed it */
	4861	restore_switched_locale(LC_TIME, original_time_locale);
	4862
	4863	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n",
	4864	save_input_locale,
	4865	is_utf8_string((U8 *) formatted_time, 0)));
	4866	is_utf8 = is_utf8_string((U8 *) formatted_time, 0);
	4867	goto finish_and_return;
	4868	}
	4869
	4870	/* Falling off the end of the loop indicates all the names were just
	4871	* ASCII. Go on to the next test. If we changed it, restore LC_TIME
	4872	* to its original locale */
	4873	restore_switched_locale(LC_TIME, original_time_locale);
	4874	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	4875	}
	4876
	4877	# endif
	4878
	4879	# if 0 && defined(USE_LOCALE_MESSAGES) && defined(HAS_SYS_ERRLIST)
	4880
	4881	/* This code is ifdefd out because it was found to not be necessary in testing
	4882	* on our dromedary test machine, which has over 700 locales. There, this
	4883	* added no value to looking at the currency symbol and the time strings. I
	4884	* left it in so as to avoid rewriting it if real-world experience indicates
	4885	* that dromedary is an outlier. Essentially, instead of returning abpve if we
	4886	* haven't found illegal utf8, we continue on and examine all the strerror()
	4887	* messages on the platform for utf8ness. If all are ASCII, we still don't
	4888	* know the answer; but otherwise we have a pretty good indication of the
	4889	* utf8ness. The reason this doesn't help much is that the messages may not
	4890	* have been translated into the locale. The currency symbol and time strings
	4891	* are much more likely to have been translated. */
	4892	{
	4893	int e;
	4894	bool non_ascii = FALSE;
	4895	const char *original_messages_locale
	4896	= switch_category_locale_to_template(LC_MESSAGES,
	4897	category,
	4898	save_input_locale);
	4899	const char * errmsg = NULL;
	4900
	4901	/* Here the current LC_MESSAGES is set to the locale of the category
	4902	* whose information is desired. Look through all the messages. We
	4903	* can't use Strerror() here because it may expand to code that
	4904	* segfaults in miniperl */
	4905
	4906	for (e = 0; e <= sys_nerr; e++) {
	4907	errno = 0;
	4908	errmsg = sys_errlist[e];
	4909	if (errno \|\| !errmsg) {
	4910	break;
	4911	}
	4912	errmsg = savepv(errmsg);
	4913	if (! is_utf8_invariant_string((U8 *) errmsg, 0)) {
	4914	non_ascii = TRUE;
	4915	is_utf8 = is_utf8_string((U8 *) errmsg, 0);
	4916	break;
	4917	}
	4918	}
	4919	Safefree(errmsg);
	4920
	4921	restore_switched_locale(LC_MESSAGES, original_messages_locale);
	4922
	4923	if (non_ascii) {
	4924
	4925	/* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
	4926	* any non-ascii means it is one; otherwise we assume it isn't */
	4927	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n",
	4928	save_input_locale,
	4929	is_utf8));
	4930	goto finish_and_return;
	4931	}
	4932
	4933	DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
	4934	}
	4935
	4936	# endif
	4937	# ifndef EBCDIC /* On os390, even if the name ends with "UTF-8', it isn't a
	4938	UTF-8 locale */
	4939
	4940	/* As a last resort, look at the locale name to see if it matches
	4941	* qr/UTF -? * 8 /ix, or some other common locale names. This "name", the
	4942	* return of setlocale(), is actually defined to be opaque, so we can't
	4943	* really rely on the absence of various substrings in the name to indicate
	4944	* its UTF-8ness, but if it has UTF8 in the name, it is extremely likely to
	4945	* be a UTF-8 locale. Similarly for the other common names */
	4946
	4947	{
	4948	const Size_t final_pos = strlen(save_input_locale) - 1;
	4949
	4950	if (final_pos >= 3) {
	4951	const char *name = save_input_locale;
	4952
	4953	/* Find next 'U' or 'u' and look from there */
	4954	while ((name += strcspn(name, "Uu") + 1)
	4955	<= save_input_locale + final_pos - 2)
	4956	{
	4957	if ( isALPHA_FOLD_NE(*name, 't')
	4958	\|\| isALPHA_FOLD_NE(*(name + 1), 'f'))
	4959	{
	4960	continue;
	4961	}
	4962	name += 2;
	4963	if (*(name) == '-') {
	4964	if ((name > save_input_locale + final_pos - 1)) {
	4965	break;
	4966	}
	4967	name++;
	4968	}
	4969	if (*(name) == '8') {
	4970	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4971	"Locale %s ends with UTF-8 in name\n",
	4972	save_input_locale));
	4973	is_utf8 = TRUE;
	4974	goto finish_and_return;
	4975	}
	4976	}
	4977	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4978	"Locale %s doesn't end with UTF-8 in name\n",
	4979	save_input_locale));
	4980	}
	4981
	4982	# ifdef WIN32
	4983
	4984	/* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */
	4985	if (memENDs(save_input_locale, final_pos, "65001")) {
	4986	DEBUG_L(PerlIO_printf(Perl_debug_log,
	4987	"Locale %s ends with 65001 in name, is UTF-8 locale\n",
	4988	save_input_locale));
	4989	is_utf8 = TRUE;
	4990	goto finish_and_return;
	4991	}
	4992
	4993	# endif
	4994	}
	4995	# endif
	4996
	4997	/* Other common encodings are the ISO 8859 series, which aren't UTF-8. But
	4998	* since we are about to return FALSE anyway, there is no point in doing
	4999	* this extra work */
	5000
	5001	# if 0
	5002	if (instr(save_input_locale, "8859")) {
	5003	DEBUG_L(PerlIO_printf(Perl_debug_log,
	5004	"Locale %s has 8859 in name, not UTF-8 locale\n",
	5005	save_input_locale));
	5006	is_utf8 = FALSE;
	5007	goto finish_and_return;
	5008	}
	5009	# endif
	5010
	5011	DEBUG_L(PerlIO_printf(Perl_debug_log,
	5012	"Assuming locale %s is not a UTF-8 locale\n",
	5013	save_input_locale));
	5014	is_utf8 = FALSE;
	5015
	5016	# endif /* the code that is compiled when no modern LC_CTYPE */
	5017
	5018	finish_and_return:
	5019
	5020	/* Cache this result so we don't have to go through all this next time. */
	5021	utf8ness_cache_size = sizeof(PL_locale_utf8ness)
	5022	- (utf8ness_cache - PL_locale_utf8ness);
	5023
	5024	/* But we can't save it if it is too large for the total space available */
	5025	if (LIKELY(input_name_len_with_overhead < utf8ness_cache_size)) {
	5026	Size_t utf8ness_cache_len = strlen(utf8ness_cache);
	5027
	5028	/* Here it can fit, but we may need to clear out the oldest cached
	5029	* result(s) to do so. Check */
	5030	if (utf8ness_cache_len + input_name_len_with_overhead
	5031	>= utf8ness_cache_size)
	5032	{
	5033	/* Here we have to clear something out to make room for this.
	5034	* Start looking at the rightmost place where it could fit and find
	5035	* the beginning of the entry that extends past that. */
	5036	char * cutoff = (char *) my_memrchr(utf8ness_cache,
	5037	UTF8NESS_SEP[0],
	5038	utf8ness_cache_size
	5039	- input_name_len_with_overhead);
	5040
	5041	assert(cutoff);
	5042	assert(cutoff >= utf8ness_cache);
	5043
	5044	/* This and all subsequent entries must be removed */
	5045	*cutoff = '\0';
	5046	utf8ness_cache_len = strlen(utf8ness_cache);
	5047	}
	5048
	5049	/* Make space for the new entry */
	5050	Move(utf8ness_cache,
	5051	utf8ness_cache + input_name_len_with_overhead,
	5052	utf8ness_cache_len + 1 /* Incl. trailing NUL */, char);
	5053
	5054	/* And insert it */
	5055	Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char);
	5056	utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
	5057
	5058	if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1] & ~1) != '0') {
	5059	Perl_croak(aTHX_
	5060	"panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%zu,"
	5061	" inserted_name=%s, its_len=%zu\n",
	5062	__FILE__, __LINE__,
	5063	PL_locale_utf8ness, strlen(PL_locale_utf8ness),
	5064	delimited, input_name_len_with_overhead);
	5065	}
	5066	}
	5067
	5068	# ifdef DEBUGGING
	5069
	5070	if (DEBUG_Lv_TEST) {
	5071	const char * s = PL_locale_utf8ness;
	5072
	5073	/* Audit the structure */
	5074	while (s < PL_locale_utf8ness + strlen(PL_locale_utf8ness)) {
	5075	const char *e;
	5076
	5077	if (*s != UTF8NESS_SEP[0]) {
	5078	Perl_croak(aTHX_
	5079	"panic: %s: %d: Corrupt utf8ness_cache: missing"
	5080	" separator %.*s<-- HERE %s\n",
	5081	__FILE__, __LINE__,
	5082	(int) (s - PL_locale_utf8ness), PL_locale_utf8ness,
	5083	s);
	5084	}
	5085	s++;
	5086	e = strchr(s, UTF8NESS_PREFIX[0]);
	5087	if (! e) {
	5088	e = PL_locale_utf8ness + strlen(PL_locale_utf8ness);
	5089	Perl_croak(aTHX_
	5090	"panic: %s: %d: Corrupt utf8ness_cache: missing"
	5091	" separator %.*s<-- HERE %s\n",
	5092	__FILE__, __LINE__,
	5093	(int) (e - PL_locale_utf8ness), PL_locale_utf8ness,
	5094	e);
	5095	}
	5096	e++;
	5097	if (e != '0' && e != '1') {
	5098	Perl_croak(aTHX_
	5099	"panic: %s: %d: Corrupt utf8ness_cache: utf8ness"
	5100	" must be [01] %.*s<-- HERE %s\n",
	5101	__FILE__, __LINE__,
	5102	(int) (e + 1 - PL_locale_utf8ness),
	5103	PL_locale_utf8ness, e + 1);
	5104	}
	5105	if (ninstr(PL_locale_utf8ness, s, s-1, e)) {
	5106	Perl_croak(aTHX_
	5107	"panic: %s: %d: Corrupt utf8ness_cache: entry"
	5108	" has duplicate %.*s<-- HERE %s\n",
	5109	__FILE__, __LINE__,
	5110	(int) (e - PL_locale_utf8ness), PL_locale_utf8ness,
	5111	e);
	5112	}
	5113	s = e + 1;
	5114	}
	5115	}
	5116
	5117	if (DEBUG_Lv_TEST \|\| debug_initialization) {
	5118
	5119	PerlIO_printf(Perl_debug_log,
	5120	"PL_locale_utf8ness is now %s; returning %d\n",
	5121	PL_locale_utf8ness, is_utf8);
	5122	}
	5123
	5124	# endif
	5125
	5126	/* free only when not using the buffer */
	5127	if ( delimited != buffer ) Safefree(delimited);
	5128	Safefree(save_input_locale);
	5129	return is_utf8;
	5130	}
	5131
	5132	#endif
	5133
	5134	bool
	5135	Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
	5136	{
	5137	dVAR;
	5138	/* Internal function which returns if we are in the scope of a pragma that
	5139	* enables the locale category 'category'. 'compiling' should indicate if
	5140	* this is during the compilation phase (TRUE) or not (FALSE). */
	5141
	5142	const COP * const cop = (compiling) ? &PL_compiling : PL_curcop;
	5143
	5144	SV *these_categories = cop_hints_fetch_pvs(cop, "locale", 0);
	5145	if (! these_categories \|\| these_categories == &PL_sv_placeholder) {
	5146	return FALSE;
	5147	}
	5148
	5149	/* The pseudo-category 'not_characters' is -1, so just add 1 to each to get
	5150	* a valid unsigned */
	5151	assert(category >= -1);
	5152	return cBOOL(SvUV(these_categories) & (1U << (category + 1)));
	5153	}
	5154
	5155	char *
	5156	Perl_my_strerror(pTHX_ const int errnum)
	5157	{
	5158	/* Returns a mortalized copy of the text of the error message associated
	5159	* with 'errnum'. It uses the current locale's text unless the platform
	5160	* doesn't have the LC_MESSAGES category or we are not being called from
	5161	* within the scope of 'use locale'. In the former case, it uses whatever
	5162	* strerror returns; in the latter case it uses the text from the C locale.
	5163	*
	5164	* The function just calls strerror(), but temporarily switches, if needed,
	5165	* to the C locale */
	5166
	5167	char *errstr;
	5168	dVAR;
	5169
	5170	#ifndef USE_LOCALE_MESSAGES
	5171
	5172	/* If platform doesn't have messages category, we don't do any switching to
	5173	* the C locale; we just use whatever strerror() returns */
	5174
	5175	errstr = savepv(Strerror(errnum));
	5176
	5177	#else /* Has locale messages */
	5178
	5179	const bool within_locale_scope = IN_LC(LC_MESSAGES);
	5180
	5181	# ifndef USE_ITHREADS
	5182
	5183	/* This function is trivial without threads. */
	5184	if (within_locale_scope) {
	5185	errstr = savepv(strerror(errnum));
	5186	}
	5187	else {
	5188	const char * save_locale = savepv(do_setlocale_c(LC_MESSAGES, NULL));
	5189
	5190	do_setlocale_c(LC_MESSAGES, "C");
	5191	errstr = savepv(strerror(errnum));
	5192	do_setlocale_c(LC_MESSAGES, save_locale);
	5193	Safefree(save_locale);
	5194	}
	5195
	5196	# elif defined(HAS_POSIX_2008_LOCALE) \
	5197	&& defined(HAS_STRERROR_L) \
	5198	&& defined(HAS_DUPLOCALE)
	5199
	5200	/* This function is also trivial if we don't have to worry about thread
	5201	* safety and have strerror_l(), as it handles the switch of locales so we
	5202	* don't have to deal with that. We don't have to worry about thread
	5203	* safety if strerror_r() is also available. Both it and strerror_l() are
	5204	* thread-safe. Plain strerror() isn't thread safe. But on threaded
	5205	* builds when strerror_r() is available, the apparent call to strerror()
	5206	* below is actually a macro that behind-the-scenes calls strerror_r(). */
	5207
	5208	# ifdef HAS_STRERROR_R
	5209
	5210	if (within_locale_scope) {
	5211	errstr = savepv(strerror(errnum));
	5212	}
	5213	else {
	5214	errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
	5215	}
	5216
	5217	# else
	5218
	5219	/* Here we have strerror_l(), but not strerror_r() and we are on a
	5220	* threaded-build. We use strerror_l() for everything, constructing a
	5221	* locale to pass to it if necessary */
	5222
	5223	bool do_free = FALSE;
	5224	locale_t locale_to_use;
	5225
	5226	if (within_locale_scope) {
	5227	locale_to_use = uselocale((locale_t) 0);
	5228	if (locale_to_use == LC_GLOBAL_LOCALE) {
	5229	locale_to_use = duplocale(LC_GLOBAL_LOCALE);
	5230	do_free = TRUE;
	5231	}
	5232	}
	5233	else { /* Use C locale if not within 'use locale' scope */
	5234	locale_to_use = PL_C_locale_obj;
	5235	}
	5236
	5237	errstr = savepv(strerror_l(errnum, locale_to_use));
	5238
	5239	if (do_free) {
	5240	freelocale(locale_to_use);
	5241	}
	5242
	5243	# endif
	5244	# else /* Doesn't have strerror_l() */
	5245
	5246	const char * save_locale = NULL;
	5247	bool locale_is_C = FALSE;
	5248
	5249	/* We have a critical section to prevent another thread from executing this
	5250	* same code at the same time. (On thread-safe perls, the LOCK is a
	5251	* no-op.) Since this is the only place in core that changes LC_MESSAGES
	5252	* (unless the user has called setlocale(), this works to prevent races. */
	5253	LOCALE_LOCK;
	5254
	5255	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	5256	"my_strerror called with errnum %d\n", errnum));
	5257	if (! within_locale_scope) {
	5258	save_locale = do_setlocale_c(LC_MESSAGES, NULL);
	5259	if (! save_locale) {
	5260	Perl_croak(aTHX_
	5261	"panic: %s: %d: Could not find current LC_MESSAGES locale,"
	5262	" errno=%d\n", __FILE__, __LINE__, errno);
	5263	}
	5264	else {
	5265	locale_is_C = isNAME_C_OR_POSIX(save_locale);
	5266
	5267	/* Switch to the C locale if not already in it */
	5268	if (! locale_is_C) {
	5269
	5270	/* The setlocale() just below likely will zap 'save_locale', so
	5271	* create a copy. */
	5272	save_locale = savepv(save_locale);
	5273	do_setlocale_c(LC_MESSAGES, "C");
	5274	}
	5275	}
	5276	} /* end of ! within_locale_scope */
	5277	else {
	5278	DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s: %d: WITHIN locale scope\n",
	5279	__FILE__, __LINE__));
	5280	}
	5281
	5282	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	5283	"Any locale change has been done; about to call Strerror\n"));
	5284	errstr = savepv(Strerror(errnum));
	5285
	5286	if (! within_locale_scope) {
	5287	if (save_locale && ! locale_is_C) {
	5288	if (! do_setlocale_c(LC_MESSAGES, save_locale)) {
	5289	Perl_croak(aTHX_
	5290	"panic: %s: %d: setlocale restore failed, errno=%d\n",
	5291	__FILE__, __LINE__, errno);
	5292	}
	5293	Safefree(save_locale);
	5294	}
	5295	}
	5296
	5297	LOCALE_UNLOCK;
	5298
	5299	# endif /* End of doesn't have strerror_l */
	5300	# ifdef DEBUGGING
	5301
	5302	if (DEBUG_Lv_TEST) {
	5303	PerlIO_printf(Perl_debug_log, "Strerror returned; saving a copy: '");
	5304	print_bytes_for_locale(errstr, errstr + strlen(errstr), 0);
	5305	PerlIO_printf(Perl_debug_log, "'\n");
	5306	}
	5307
	5308	# endif
	5309	#endif /* End of does have locale messages */
	5310
	5311	SAVEFREEPV(errstr);
	5312	return errstr;
	5313	}
	5314
	5315	/*
	5316
	5317	=for apidoc switch_to_global_locale
	5318
	5319	On systems without locale support, or on typical single-threaded builds, or on
	5320	platforms that do not support per-thread locale operations, this function does
	5321	nothing. On such systems that do have locale support, only a locale global to
	5322	the whole program is available.
	5323
	5324	On multi-threaded builds on systems that do have per-thread locale operations,
	5325	this function converts the thread it is running in to use the global locale.
	5326	This is for code that has not yet or cannot be updated to handle multi-threaded
	5327	locale operation. As long as only a single thread is so-converted, everything
	5328	works fine, as all the other threads continue to ignore the global one, so only
	5329	this thread looks at it.
	5330
	5331	However, on Windows systems this isn't quite true prior to Visual Studio 15,
	5332	at which point Microsoft fixed a bug. A race can occur if you use the
	5333	following operations on earlier Windows platforms:
	5334
	5335	=over
	5336
	5337	=item L<POSIX::localeconv\|POSIX/localeconv>
	5338
	5339	=item L<I18N::Langinfo>, items C<CRNCYSTR> and C<THOUSEP>
	5340
	5341	=item L<perlapi/Perl_langinfo>, items C<CRNCYSTR> and C<THOUSEP>
	5342
	5343	=back
	5344
	5345	The first item is not fixable (except by upgrading to a later Visual Studio
	5346	release), but it would be possible to work around the latter two items by using
	5347	the Windows API functions C<GetNumberFormat> and C<GetCurrencyFormat>; patches
	5348	welcome.
	5349
	5350	Without this function call, threads that use the L<C<setlocale(3)>> system
	5351	function will not work properly, as all the locale-sensitive functions will
	5352	look at the per-thread locale, and C<setlocale> will have no effect on this
	5353	thread.
	5354
	5355	Perl code should convert to either call
	5356	L<C<Perl_setlocale>\|perlapi/Perl_setlocale> (which is a drop-in for the system
	5357	C<setlocale>) or use the methods given in L<perlcall> to call
	5358	L<C<POSIX::setlocale>\|POSIX/setlocale>. Either one will transparently properly
	5359	handle all cases of single- vs multi-thread, POSIX 2008-supported or not.
	5360
	5361	Non-Perl libraries, such as C<gtk>, that call the system C<setlocale> can
	5362	continue to work if this function is called before transferring control to the
	5363	library.
	5364
	5365	Upon return from the code that needs to use the global locale,
	5366	L<C<sync_locale()>\|perlapi/sync_locale> should be called to restore the safe
	5367	multi-thread operation.
	5368
	5369	=cut
	5370	*/
	5371
	5372	void
	5373	Perl_switch_to_global_locale()
	5374	{
	5375
	5376	#ifdef USE_THREAD_SAFE_LOCALE
	5377	# ifdef WIN32
	5378
	5379	_configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
	5380
	5381	# else
	5382	# ifdef HAS_QUERYLOCALE
	5383
	5384	setlocale(LC_ALL, querylocale(LC_ALL_MASK, uselocale((locale_t) 0)));
	5385
	5386	# else
	5387
	5388	{
	5389	unsigned int i;
	5390
	5391	for (i = 0; i < LC_ALL_INDEX; i++) {
	5392	setlocale(categories[i], do_setlocale_r(categories[i], NULL));
	5393	}
	5394	}
	5395
	5396	# endif
	5397
	5398	uselocale(LC_GLOBAL_LOCALE);
	5399
	5400	# endif
	5401	#endif
	5402
	5403	}
	5404
	5405	/*
	5406
	5407	=for apidoc sync_locale
	5408
	5409	L<C<Perl_setlocale>\|perlapi/Perl_setlocale> can be used at any time to query or
	5410	change the locale (though changing the locale is antisocial and dangerous on
	5411	multi-threaded systems that don't have multi-thread safe locale operations.
	5412	(See L<perllocale/Multi-threaded operation>). Using the system
	5413	L<C<setlocale(3)>> should be avoided. Nevertheless, certain non-Perl libraries
	5414	called from XS, such as C<Gtk> do so, and this can't be changed. When the
	5415	locale is changed by XS code that didn't use
	5416	L<C<Perl_setlocale>\|perlapi/Perl_setlocale>, Perl needs to be told that the
	5417	locale has changed. Use this function to do so, before returning to Perl.
	5418
	5419	The return value is a boolean: TRUE if the global locale at the time of call
	5420	was in effect; and FALSE if a per-thread locale was in effect. This can be
	5421	used by the caller that needs to restore things as-they-were to decide whether
	5422	or not to call
	5423	L<C<Perl_switch_to_global_locale>\|perlapi/switch_to_global_locale>.
	5424
	5425	=cut
	5426	*/
	5427
	5428	bool
	5429	Perl_sync_locale()
	5430	{
	5431
	5432	#ifndef USE_LOCALE
	5433
	5434	return TRUE;
	5435
	5436	#else
	5437
	5438	const char * newlocale;
	5439	dTHX;
	5440
	5441	# ifdef USE_POSIX_2008_LOCALE
	5442
	5443	bool was_in_global_locale = FALSE;
	5444	locale_t cur_obj = uselocale((locale_t) 0);
	5445
	5446	/* On Windows, unless the foreign code has turned off the thread-safe
	5447	* locale setting, any plain setlocale() will have affected what we see, so
	5448	* no need to worry. Otherwise, If the foreign code has done a plain
	5449	* setlocale(), it will only affect the global locale on POSIX systems, but
	5450	* will affect the */
	5451	if (cur_obj == LC_GLOBAL_LOCALE) {
	5452
	5453	# ifdef HAS_QUERY_LOCALE
	5454
	5455	do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL));
	5456
	5457	# else
	5458
	5459	unsigned int i;
	5460
	5461	/* We can't trust that we can read the LC_ALL format on the
	5462	* platform, so do them individually */
	5463	for (i = 0; i < LC_ALL_INDEX; i++) {
	5464	do_setlocale_r(categories[i], setlocale(categories[i], NULL));
	5465	}
	5466
	5467	# endif
	5468
	5469	was_in_global_locale = TRUE;
	5470	}
	5471
	5472	# else
	5473
	5474	bool was_in_global_locale = TRUE;
	5475
	5476	# endif
	5477	# ifdef USE_LOCALE_CTYPE
	5478
	5479	newlocale = savepv(do_setlocale_c(LC_CTYPE, NULL));
	5480	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	5481	"%s:%d: %s\n", __FILE__, __LINE__,
	5482	setlocale_debug_string(LC_CTYPE, NULL, newlocale)));
	5483	new_ctype(newlocale);
	5484	Safefree(newlocale);
	5485
	5486	# endif /* USE_LOCALE_CTYPE */
	5487	# ifdef USE_LOCALE_COLLATE
	5488
	5489	newlocale = savepv(do_setlocale_c(LC_COLLATE, NULL));
	5490	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	5491	"%s:%d: %s\n", __FILE__, __LINE__,
	5492	setlocale_debug_string(LC_COLLATE, NULL, newlocale)));
	5493	new_collate(newlocale);
	5494	Safefree(newlocale);
	5495
	5496	# endif
	5497	# ifdef USE_LOCALE_NUMERIC
	5498
	5499	newlocale = savepv(do_setlocale_c(LC_NUMERIC, NULL));
	5500	DEBUG_Lv(PerlIO_printf(Perl_debug_log,
	5501	"%s:%d: %s\n", __FILE__, __LINE__,
	5502	setlocale_debug_string(LC_NUMERIC, NULL, newlocale)));
	5503	new_numeric(newlocale);
	5504	Safefree(newlocale);
	5505
	5506	# endif /* USE_LOCALE_NUMERIC */
	5507
	5508	return was_in_global_locale;
	5509
	5510	#endif
	5511
	5512	}
	5513
	5514	#if defined(DEBUGGING) && defined(USE_LOCALE)
	5515
	5516	STATIC char *
	5517	S_setlocale_debug_string(const int category, /* category number,
	5518	like LC_ALL */
	5519	const char* const locale, /* locale name */
	5520
	5521	/* return value from setlocale() when attempting to
	5522	* set 'category' to 'locale' */
	5523	const char* const retval)
	5524	{
	5525	/* Returns a pointer to a NUL-terminated string in static storage with
	5526	* added text about the info passed in. This is not thread safe and will
	5527	* be overwritten by the next call, so this should be used just to
	5528	* formulate a string to immediately print or savepv() on. */
	5529
	5530	/* initialise to a non-null value to keep it out of BSS and so keep
	5531	* -DPERL_GLOBAL_STRUCT_PRIVATE happy */
	5532	static char ret[256] = "If you can read this, thank your buggy C"
	5533	" library strlcpy(), and change your hints file"
	5534	" to undef it";
	5535
	5536	my_strlcpy(ret, "setlocale(", sizeof(ret));
	5537	my_strlcat(ret, category_name(category), sizeof(ret));
	5538	my_strlcat(ret, ", ", sizeof(ret));
	5539
	5540	if (locale) {
	5541	my_strlcat(ret, "\"", sizeof(ret));
	5542	my_strlcat(ret, locale, sizeof(ret));
	5543	my_strlcat(ret, "\"", sizeof(ret));
	5544	}
	5545	else {
	5546	my_strlcat(ret, "NULL", sizeof(ret));
	5547	}
	5548
	5549	my_strlcat(ret, ") returned ", sizeof(ret));
	5550
	5551	if (retval) {
	5552	my_strlcat(ret, "\"", sizeof(ret));
	5553	my_strlcat(ret, retval, sizeof(ret));
	5554	my_strlcat(ret, "\"", sizeof(ret));
	5555	}
	5556	else {
	5557	my_strlcat(ret, "NULL", sizeof(ret));
	5558	}
	5559
	5560	assert(strlen(ret) < sizeof(ret));
	5561
	5562	return ret;
	5563	}
	5564
	5565	#endif
	5566
	5567	void
	5568	Perl_thread_locale_init()
	5569	{
	5570	/* Called from a thread on startup*/
	5571
	5572	#ifdef USE_THREAD_SAFE_LOCALE
	5573
	5574	dTHX_DEBUGGING;
	5575
	5576	/* C starts the new thread in the global C locale. If we are thread-safe,
	5577	* we want to not be in the global locale */
	5578
	5579	DEBUG_L(PerlIO_printf(Perl_debug_log,
	5580	"%s:%d: new thread, initial locale is %s; calling setlocale\n",
	5581	__FILE__, __LINE__, setlocale(LC_ALL, NULL)));
	5582
	5583	# ifdef WIN32
	5584
	5585	_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
	5586
	5587	# else
	5588
	5589	Perl_setlocale(LC_ALL, "C");
	5590
	5591	# endif
	5592	#endif
	5593
	5594	}
	5595
	5596	void
	5597	Perl_thread_locale_term()
	5598	{
	5599	/* Called from a thread as it gets ready to terminate */
	5600
	5601	#ifdef USE_THREAD_SAFE_LOCALE
	5602
	5603	/* C starts the new thread in the global C locale. If we are thread-safe,
	5604	* we want to not be in the global locale */
	5605
	5606	# ifndef WIN32
	5607
	5608	{ /* Free up */
	5609	dVAR;
	5610	locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE);
	5611	if (cur_obj != LC_GLOBAL_LOCALE && cur_obj != PL_C_locale_obj) {
	5612	freelocale(cur_obj);
	5613	}
	5614	}
	5615
	5616	# endif
	5617	#endif
	5618
	5619	}
	5620
	5621	/*
	5622	* ex: set ts=8 sts=4 sw=4 et:
	5623	*/