+ /* ( = NULL silences a compiler warning; would segfault if it could
+ * actually happen.) */
+ const struct lconv_offset *strings = NULL;
+
+# ifdef USE_LOCALE_NUMERIC
+ if (cat_index == LC_NUMERIC_INDEX_) {
+ locale_is_utf8 = numeric_locale_is_utf8;
+ strings = lconv_numeric_strings;
+ }
+# else
+ PERL_UNUSED_ARG(numeric_locale_is_utf8);
+# endif
+# ifdef USE_LOCALE_MONETARY
+ if (cat_index == LC_MONETARY_INDEX_) {
+ locale_is_utf8 = monetary_locale_is_utf8;
+ strings = lconv_monetary_strings;
+ }
+# else
+ PERL_UNUSED_ARG(monetary_locale_is_utf8);
+# endif
+
+ assert(locale_is_utf8 != LOCALE_UTF8NESS_UNKNOWN);
+
+ /* Iterate over the strings structure for this category */
+ locale = querylocale_i(cat_index);
+
+ while (strings->name) {
+ const char *value = *((const char **)(ptr + strings->offset));
+ if (value && *value) {
+ bool is_utf8 = /* Only make UTF-8 if required to */
+ (UTF8NESS_YES == (get_locale_string_utf8ness_i(locale,
+ cat_index,
+ value,
+ locale_is_utf8)));
+ (void) hv_store(retval,
+ strings->name,
+ strlen(strings->name),
+ newSVpvn_utf8(value, strlen(value), is_utf8),
+ 0);
+ }
+
+ strings++;
+ }
+ }
+
+ while (integers->name) {
+ const char value = *((const char *)(ptr + integers->offset));
+
+ if (value != CHAR_MAX)
+ (void) hv_store(retval, integers->name,
+ strlen(integers->name), newSViv(value), 0);
+ integers++;
+ }
+
+ return retval;
+}
+
+# ifndef HAS_SOME_LANGINFO
+
+STATIC HV *
+S_get_nl_item_from_localeconv(pTHX_ const struct lconv *lcbuf,
+ const int item,
+ const locale_utf8ness_t unused1,
+ const locale_utf8ness_t unused2)
+{
+ /* This is a helper function for my_localeconv(), which is called from
+ * my_langinfo() to emulate the libc nl_langinfo() function on platforms
+ * that don't have it available.
+ *
+ * This function acts as an extension to my_langinfo(), the intermediate
+ * my_localeconv() call is to set up the locks and switch into the proper
+ * locale. That logic exists for other reasons, and by doing it this way,
+ * it doesn't have to be duplicated.
+ *
+ * This function extracts the current value of 'item' in the current locale
+ * using the localconv() result also passed in, via 'lcbuf'. The other
+ * parameter is unused, a placeholder so the signature of this function
+ * matches another that does need it, and so the two functions can be
+ * referred to by a single function pointer, to simplify the code below */
+
+ const char * prefix = "";
+ const char * temp = NULL;
+
+ PERL_ARGS_ASSERT_GET_NL_ITEM_FROM_LOCALECONV;
+ PERL_UNUSED_ARG(unused1);
+ PERL_UNUSED_ARG(unused2);
+
+ switch (item) {
+ case CRNCYSTR:
+ temp = lcbuf->currency_symbol;
+
+ if (lcbuf->p_cs_precedes) {
+
+ /* khw couldn't find any documentation that CHAR_MAX is the signal,
+ * but cygwin uses it thusly */
+ if (lcbuf->p_cs_precedes == CHAR_MAX) {
+ prefix = ".";
+ }
+ else {
+ prefix = "-";
+ }
+ }
+ else {
+ prefix = "+";
+ }
+
+ break;
+
+ case RADIXCHAR:
+ temp = lcbuf->decimal_point;
+ break;
+
+ case THOUSEP:
+ temp = lcbuf->thousands_sep;
+ break;
+
+ default:
+ locale_panic_(Perl_form(aTHX_
+ "Unexpected item passed to populate_localeconv: %d", item));
+ }
+
+ return (HV *) Perl_newSVpvf(aTHX_ "%s%s", prefix, temp);
+}
+
+# endif /* ! Has some form of langinfo() */
+#endif /* Has some form of localeconv() and paying attn to a category it
+ traffics in */
+
+#ifndef HAS_SOME_LANGINFO
+
+typedef int nl_item; /* Substitute 'int' for emulated nl_langinfo() */
+
+#endif
+
+/*
+
+=for apidoc Perl_langinfo
+=for apidoc_item Perl_langinfo8
+
+C<Perl_langinfo> is an (almost) drop-in replacement for the system
+C<L<nl_langinfo(3)>>, taking the same C<item> parameter values, and returning
+the same information. But it is more thread-safe than regular
+C<nl_langinfo()>, and hides the quirks of Perl's locale handling from your
+code, and can be used on systems that lack a native C<nl_langinfo>.
+
+However, you should instead use the improved version of this:
+L</Perl_langinfo8>, which behaves identically except for an additional
+parameter, a pointer to a variable declared as L</C<utf8ness_t>>, into which it
+returns to you how you should treat the returned string with regards to it
+being encoded in UTF-8 or not.
+
+Concerning the differences between these and plain C<nl_langinfo()>:
+
+=over
+
+=item a.
+
+C<Perl_langinfo8> has an extra parameter, described above. Besides this, the
+other reasons they aren't quite a drop-in replacement is actually an advantage.
+The C<const>ness of the return allows the compiler to catch attempts to write
+into the returned buffer, which is illegal and could cause run-time crashes.
+
+=item b.
+
+They deliver the correct results for the C<RADIXCHAR> and C<THOUSEP> items,
+without you having to write extra code. The reason for the extra code would be
+because these are from the C<LC_NUMERIC> locale category, which is normally
+kept set by Perl so that the radix is a dot, and the separator is the empty
+string, no matter what the underlying locale is supposed to be, and so to get
+the expected results, you have to temporarily toggle into the underlying
+locale, and later toggle back. (You could use plain C<nl_langinfo> and
+C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this but then you wouldn't get
+the other advantages of C<Perl_langinfo()>; not keeping C<LC_NUMERIC> in the C
+(or equivalent) locale would break a lot of CPAN, which is expecting the radix
+(decimal point) character to be a dot.)
+
+=item c.
+
+The system function they replace can have its static return buffer trashed,
+not only by a subsequent call to that function, but by a C<freelocale>,
+C<setlocale>, or other locale change. The returned buffer of these functions
+is not changed until the next call to one or the other, so the buffer is never
+in a trashed state.
+
+=item d.
+
+The return buffer is per-thread, so it also is never overwritten by a call to
+these functions from another thread; unlike the function it replaces.
+
+=item e.
+
+But most importantly, they work on systems that don't have C<nl_langinfo>, such
+as Windows, hence making your code more portable. Of the fifty-some possible
+items specified by the POSIX 2008 standard,
+L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
+only one is completely unimplemented, though on non-Windows platforms, another
+significant one is not fully implemented). They use various techniques to
+recover the other items, including calling C<L<localeconv(3)>>, and
+C<L<strftime(3)>>, both of which are specified in C89, so should be always be
+available. Later C<strftime()> versions have additional capabilities; C<""> is
+returned for any item not available on your system.
+
+It is important to note that, when called with an item that is recovered by
+using C<localeconv>, the buffer from any previous explicit call to
+C<L<localeconv(3)>> will be overwritten. But you shouldn't be using
+C<localeconv> anyway because it is is very much not thread-safe, and suffers
+from the same problems outlined in item 'b.' above for the fields it returns that
+are controlled by the LC_NUMERIC locale category. Instead, avoid all of those
+problems by calling L</Perl_localeconv>, which is thread-safe; or by using the
+methods given in L<perlcall> to call
+L<C<POSIX::localeconv()>|POSIX/localeconv>, which is also thread-safe.
+
+=back
+
+The details for those items which may deviate from what this emulation returns
+and what a native C<nl_langinfo()> would return are specified in
+L<I18N::Langinfo>.
+
+When using C<Perl_langinfo8> (or plain C<Perl_langinfo>) on systems that don't
+have a native C<nl_langinfo()>, you must
+
+ #include "perl_langinfo.h"
+
+before the C<perl.h> C<#include>. You can replace your C<langinfo.h>
+C<#include> with this one. (Doing it this way keeps out the symbols that plain
+C<langinfo.h> would try to import into the namespace for code that doesn't need
+it.)
+
+=cut
+
+*/
+
+const char *
+Perl_langinfo(const nl_item item)
+{
+ return Perl_langinfo8(item, NULL);
+}
+
+const char *
+Perl_langinfo8(const nl_item item, utf8ness_t * utf8ness)
+{
+ dTHX;
+ unsigned cat_index;
+
+ PERL_ARGS_ASSERT_PERL_LANGINFO8;
+
+ if (utf8ness) { /* Assume for now */
+ *utf8ness = UTF8NESS_IMMATERIAL;
+ }
+
+ /* Find the locale category that controls the input 'item'. If we are not
+ * paying attention to that category, instead return a default value. Also
+ * return the default value if there is no way for us to figure out the
+ * correct value. If we have some form of nl_langinfo(), we can always
+ * figure it out, but lacking that, there may be alternative methods that
+ * can be used to recover most of the possible items. Some of those
+ * methods need libc functions, which may or may not be available. If
+ * unavailable, we can't compute the correct value, so must here return the
+ * default. */
+ switch (item) {
+
+ case CODESET:
+
+#ifdef USE_LOCALE_CTYPE
+
+ cat_index = LC_CTYPE_INDEX_;
+ break;
+
+#else
+ return C_codeset;
+#endif
+#if defined(USE_LOCALE_MESSAGES) && defined(HAS_SOME_LANGINFO)
+
+ case YESEXPR: case YESSTR: case NOEXPR: case NOSTR:
+ cat_index = LC_MESSAGES_INDEX_;
+ break;
+#else
+ case YESEXPR: return "^[+1yY]";
+ case YESSTR: return "yes";
+ case NOEXPR: return "^[-0nN]";
+ case NOSTR: return "no";
+#endif
+
+ case CRNCYSTR:
+
+#if defined(USE_LOCALE_MONETARY) \
+ && (defined(HAS_SOME_LANGINFO) || defined(HAS_SOME_LOCALECONV))
+
+ cat_index = LC_MONETARY_INDEX_;
+ break;
+#else
+ return "-";
+#endif
+
+ case RADIXCHAR:
+
+#ifdef CAN_CALCULATE_RADIX
+
+ cat_index = LC_NUMERIC_INDEX_;
+ break;
+#else
+ return C_decimal_point;
+#endif
+
+ case THOUSEP:
+
+#if defined(USE_LOCALE_NUMERIC) \
+ && (defined(HAS_SOME_LANGINFO) || defined(HAS_SOME_LOCALECONV))
+
+ cat_index = LC_NUMERIC_INDEX_;
+ break;
+#else
+ return C_thousands_sep;
+#endif
+
+/* The other possible items are all in LC_TIME. */
+#ifdef USE_LOCALE_TIME
+
+ default:
+ cat_index = LC_TIME_INDEX_;
+ break;
+
+#endif
+#if ! defined(USE_LOCALE_TIME) || ! defined(HAS_SOME_LANGINFO)
+
+ /* If not using LC_TIME, hard code the rest. Or, if there is no
+ * nl_langinfo(), we use strftime() as an alternative, and it is missing
+ * functionality to get every single one, so hard-code those */
+
+ case ERA: return ""; /* Unimplemented; for use with strftime() %E
+ modifier */
+
+ /* These formats are defined by C89, so we assume that strftime supports
+ * them, and so are returned unconditionally; they may not be what the
+ * locale actually says, but should give good enough results for someone
+ * using them as formats (as opposed to trying to parse them to figure
+ * out what the locale says). The other format items are actually tested
+ * to verify they work on the platform */
+ case D_FMT: return "%x";
+ case T_FMT: return "%X";
+ case D_T_FMT: return "%c";
+
+# if defined(WIN32) || ! defined(USE_LOCALE_TIME)
+
+ /* strftime() on Windows doesn't have the POSIX (beyond C89) extensions
+ * that would allow it to recover these */
+ case ERA_D_FMT: return "%x";
+ case ERA_T_FMT: return "%X";
+ case ERA_D_T_FMT: return "%c";
+ case ALT_DIGITS: return "0";
+
+# endif
+# ifndef USE_LOCALE_TIME
+
+ case T_FMT_AMPM: return "%r";
+ case ABDAY_1: return "Sun";
+ case ABDAY_2: return "Mon";
+ case ABDAY_3: return "Tue";
+ case ABDAY_4: return "Wed";
+ case ABDAY_5: return "Thu";
+ case ABDAY_6: return "Fri";
+ case ABDAY_7: return "Sat";
+ case AM_STR: return "AM";
+ case PM_STR: return "PM";
+ case ABMON_1: return "Jan";
+ case ABMON_2: return "Feb";
+ case ABMON_3: return "Mar";
+ case ABMON_4: return "Apr";
+ case ABMON_5: return "May";
+ case ABMON_6: return "Jun";
+ case ABMON_7: return "Jul";
+ case ABMON_8: return "Aug";
+ case ABMON_9: return "Sep";
+ case ABMON_10: return "Oct";
+ case ABMON_11: return "Nov";
+ case ABMON_12: return "Dec";
+ case DAY_1: return "Sunday";
+ case DAY_2: return "Monday";
+ case DAY_3: return "Tuesday";
+ case DAY_4: return "Wednesday";
+ case DAY_5: return "Thursday";
+ case DAY_6: return "Friday";
+ case DAY_7: return "Saturday";
+ case MON_1: return "January";
+ case MON_2: return "February";
+ case MON_3: return "March";
+ case MON_4: return "April";
+ case MON_5: return "May";
+ case MON_6: return "June";
+ case MON_7: return "July";
+ case MON_8: return "August";
+ case MON_9: return "September";
+ case MON_10: return "October";
+ case MON_11: return "November";
+ case MON_12: return "December";
+
+# endif
+#endif
+
+ } /* End of switch on item */
+
+#ifndef USE_LOCALE
+
+ Perl_croak_nocontext("panic: Unexpected nl_langinfo() item %d", item);
+ NOT_REACHED; /* NOTREACHED */
+ PERL_UNUSED_VAR(cat_index);
+
+#else
+# ifdef USE_LOCALE_NUMERIC
+
+ /* Use either the underlying numeric, or the other underlying categories */
+ if (cat_index == LC_NUMERIC_INDEX_) {
+ return my_langinfo_c(item, LC_NUMERIC, PL_numeric_name,
+ &PL_langinfo_buf, &PL_langinfo_bufsize, utf8ness);
+ }
+ else
+
+# endif
+
+ {
+ return my_langinfo_i(item, cat_index, querylocale_i(cat_index),
+ &PL_langinfo_buf, &PL_langinfo_bufsize, utf8ness);
+ }
+
+#endif
+
+}
+
+#ifdef USE_LOCALE
+
+/* There are several implementations of my_langinfo, depending on the
+ * Configuration. They all share the same beginning of the function */
+STATIC const char *
+S_my_langinfo_i(pTHX_
+ const nl_item item, /* The item to look up */
+ const unsigned int cat_index, /* The locale category that
+ controls it */
+ /* The locale to look up 'item' in. */
+ const char * locale,
+
+ /* Where to store the result, and where the size of that buffer
+ * is stored, updated on exit. retbuf_sizep may be NULL for an
+ * empty-on-entry, single use buffer whose size we don't need
+ * to keep track of */
+ const char ** retbufp,
+ Size_t * retbuf_sizep,
+
+ /* If not NULL, the location to store the UTF8-ness of 'item's
+ * value, as documented */
+ utf8ness_t * utf8ness)
+{
+ const char * retval = NULL;
+
+ PERL_ARGS_ASSERT_MY_LANGINFO_I;
+ assert(cat_index <= NOMINAL_LC_ALL_INDEX);
+
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+ "Entering my_langinfo item=%d, using locale %s\n",
+ item, locale));
+/*--------------------------------------------------------------------------*/
+/* Above is the common beginning to all the implementations of my_langinfo().
+ * Below are the various completions.
+ *
+ * Some platforms don't deal well with non-ASCII strings in locale X when
+ * LC_CTYPE is not in X. (Actually it is probably when X is UTF-8 and LC_CTYPE
+ * isn't, or vice versa). There is explicit code to bring the categories into
+ * sync. This doesn't seem to be a problem with nl_langinfo(), so that
+ * implementation doesn't currently worry about it. But it is a problem on
+ * Windows boxes, which don't have nl_langinfo(). */
+
+# if defined(HAS_THREAD_SAFE_NL_LANGINFO_L) && defined(USE_POSIX_2008_LOCALE)
+
+ /* Simplest is if we can use nl_langinfo_l()
+ *
+ * With it, we can change LC_CTYPE in the same call as the other category */
+# ifdef USE_LOCALE_CTYPE
+# define CTYPE_SAFETY_MASK LC_CTYPE_MASK
+# else
+# define CTYPE_SAFETY_MASK 0
+# endif
+
+ locale_t cur = newlocale((category_masks[cat_index] | CTYPE_SAFETY_MASK),
+ locale, (locale_t) 0);
+
+ retval = save_to_buffer(nl_langinfo_l(item, cur), retbufp, retbuf_sizep);
+ if (utf8ness) {
+ *utf8ness = get_locale_string_utf8ness_i(locale, cat_index, retval,
+ LOCALE_UTF8NESS_UNKNOWN);
+ }
+
+ freelocale(cur);
+
+ return retval;
+/*--------------------------------------------------------------------------*/
+# elif defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */
+
+ /* The second version of my_langinfo() is if we have plain nl_langinfo() */
+
+# ifdef USE_LOCALE_CTYPE
+
+ /* Ths function sorts out if things actually have to be switched or not,
+ * for both calls. */
+ const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
+
+# endif
+
+ const char * orig_switched_locale = toggle_locale_i(cat_index, locale);
+
+ NL_LANGINFO_LOCK;
+ retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep);
+ NL_LANGINFO_UNLOCK;
+
+ if (utf8ness) {
+ *utf8ness = get_locale_string_utf8ness_i(locale, cat_index,
+ retval, LOCALE_UTF8NESS_UNKNOWN);
+ }
+
+ restore_toggled_locale_i(cat_index, orig_switched_locale);
+
+# ifdef USE_LOCALE_CTYPE
+ restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
+# endif
+
+ return retval;
+/*--------------------------------------------------------------------------*/
+# else /* Below, emulate nl_langinfo as best we can */
+
+ /* And the third and final completion is where we have to emulate
+ * nl_langinfo(). There are various possibilities depending on the
+ * Configuration */
+
+# ifdef USE_LOCALE_CTYPE
+
+ const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
+
+# endif
+
+ const char * orig_switched_locale = toggle_locale_i(cat_index, locale);
+
+ /* Here, we are in the locale we want information about */
+
+ /* Almost all the items will have ASCII return values. Set that here, and
+ * override if necessary */
+ utf8ness_t is_utf8 = UTF8NESS_IMMATERIAL;
+
+ switch (item) {
+ default:
+ retval = "";
+ break;
+
+ case RADIXCHAR:
+
+# if defined(HAS_SNPRINTF) \
+ && (! defined(HAS_SOME_LOCALECONV) || defined(TS_W32_BROKEN_LOCALECONV))
+
+ {
+ /* snprintf() can be used to find the radix character by outputting
+ * a known simple floating point number to a buffer, and parsing
+ * it, inferring the radix as the bytes separating the integer and
+ * fractional parts. But localeconv() is more direct, not
+ * requiring inference, so use it instead of the code just below,
+ * if (likely) it is available and works ok */
+
+ char * floatbuf = NULL;
+ const Size_t initial_size = 10;
+
+ Newx(floatbuf, initial_size, char);
+
+ /* 1.5 is exactly representable on binary computers */
+ Size_t needed_size = snprintf(floatbuf, initial_size, "%.1f", 1.5);
+
+ /* If our guess wasn't big enough, increase and try again, based on
+ * the real number that strnprintf() is supposed to return */
+ if (UNLIKELY(needed_size >= initial_size)) {
+ needed_size++; /* insurance */
+ Renew(floatbuf, needed_size, char);
+ Size_t new_needed = snprintf(floatbuf, needed_size, "%.1f", 1.5);
+ assert(new_needed <= needed_size);
+ needed_size = new_needed;
+ }
+
+ char * s = floatbuf;
+ char * e = floatbuf + needed_size;
+
+ /* Find the '1' */
+ while (s < e && *s != '1') {
+ s++;
+ }
+
+ if (LIKELY(s < e)) {
+ s++;
+ }
+
+ /* Find the '5' */
+ char * item_start = s;
+ while (s < e && *s != '5') {
+ s++;
+ }
+
+ /* Everything in between is the radix string */
+ if (LIKELY(s < e)) {
+ *s = '\0';
+ retval = save_to_buffer(item_start,
+ (const char **) &PL_langinfo_buf,
+ &PL_langinfo_bufsize);
+ Safefree(floatbuf);
+
+ if (utf8ness) {
+ is_utf8 = get_locale_string_utf8ness_i(locale, cat_index,
+ retval,
+ LOCALE_UTF8NESS_UNKNOWN);