locale.c: Fix bug in getting langinfo(CRNCYSTR)

[perl5.git] / locale.c
diff --git a/locale.c b/locale.c

index 242fa75..bf381d9 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -94,8 +94,8 @@
   *          setlocale() knows about, there is a layer to cope with that.
   *      b)  stdized_setlocale() is a layer above a) that fixes some vagaries in
   *          the return value of the libc setlocale().  On most platforms this
- *          layer is empty; it requires perl to be Configured with a parameter
- *          indicating the platform's defect, in order to be activated.  The
+ *          layer is empty; in order to be activated, it requires perl to be
+ *          Configured with a parameter indicating the platform's defect.  The
   *          current ones are listed at the definition of the macro.
   *
   * 2) An implementation that adds a minimal layer above implementation 1),
@@ -252,13 +252,22 @@
   *          querylocale() is called only while the locale mutex is locked, and
   *          the result is copied to a per-thread place before unlocking.
   *
- *      -Accflags=-DNO_LOCALE_CTYE
+ *      -Accflags=-DUSE_NL_LOCALE_NAME
+ *          glibc has an undocumented equivalent function to querylocale().  It
+ *          currently isn't used by default because it is undocumented.  But
+ *          testing hasn't found any problems with it.  Using this Configure
+ *          option enables it on systems that have it (with no effect on
+ *          systems lacking it).  Enabling this removes the need for perl
+ *          to keep its own records, hence is more efficient and guaranteed to
+ *          be accurate.
+ *
+ *      -Accflags=-DNO_LOCALE_CTYPE
   *      -Accflags=-DNO_LOCALE_NUMERIC
   *          etc.
   *
   *          If the named category(ies) does(do) not exist on this platform,
   *          these have no effect.  Otherwise they cause perl to be compiled to
- *          always keep the named category(ies) in the C locale XXX
+ *          always keep the named category(ies) in the C locale.
   *
   *      -Accflags=-DHAS_BROKEN_SETLOCALE_QUERY_LC_ALL
   *          This would be set in a hints file to tell perl that doing a libc
@@ -273,6 +282,23 @@
   *          to be stripped off.  khw believes there aren't any such platforms
   *          still in existence.
   *
+ *      -Accflags=-DLIBC_HANDLES_MISMATCHED_CTYPE
+ *          Consider the name of a month in some language, Chinese for example.
+ *          If LC_TIME has been set to a Chinese locale, strftime() can be used
+ *          to generate the Chinese month name for any given date, by using the
+ *          %B format.  But also suppose that LC_CTYPE is set to, say, "C".
+ *          The return from strftime() on many platforms will be mojibake given
+ *          that no Chinese month name is composed of just ASCII characters.
+ *          Perl handles this for you by automatically toggling LC_CTYPE to
+ *          whatever LC_TIME is during the execution of strftime(), and
+ *          afterwards restoring it to its prior value.  But the strftime()
+ *          (and similar functions) in some libc implementations already do
+ *          this toggle, meaning perl's action is redundant.  You can tell perl
+ *          that a libc does this by setting this Configure option, and it will
+ *          skip its syncing LC_CTYPE and whatever the other locale is.
+ *          Currently, perl ignores this Configuration option and  syncs anyway
+ *          for LC_COLLATE-related operations, due to perl's internal needs.
+ *
   *      -Accflags=USE_FAKE_LC_ALL_POSITIONAL_NOTATION
   *          This is used when developing Perl on a platform that uses
   *          'name=value;' notation to represent LC_ALL when not all categories
@@ -358,18 +384,29 @@ static int debug_initialization = 0;
  #define PERL_IN_LOCALE_C
  #include "perl.h"
  
+/* Some platforms require LC_CTYPE to be congruent with the category we are
+ * looking for.  XXX This still presumes that we have to match COLLATE and
+ * CTYPE even on platforms that apparently handle this. */
+#if defined(USE_LOCALE_CTYPE) && ! defined(LIBC_HANDLES_MISMATCHED_CTYPE)
+#  define WE_MUST_DEAL_WITH_MISMATCHED_CTYPE
+#endif
+
+#if PERL_VERSION_GT(5,39,9)
+#  error Revert the commit that added this line
+#endif
+
  #ifdef WIN32_USE_FAKE_OLD_MINGW_LOCALES
  
     /* Use -Accflags=-DWIN32_USE_FAKE_OLD_MINGW_LOCALES on a POSIX or *nix box
      * to get a semblance of pretending the locale handling is that of a MingW
      * that doesn't use UCRT (hence 'OLD' in the name).  This exercizes code
-    * paths that are not compiled on non-Windows boxes, and allows for ASAN.
-    * This is thus a way to see if locale.c on Windows is likely going to
-    * compile, without having to use a real Win32 box.  And running the test
-    * suite will verify to a large extent our logic and memory allocation
-    * handling for such boxes.  And access to ASAN and PERL_MEMLOG Of course the underlying calls are to the POSIX
-    * libc, so any differences in implementation between those and the Windows
-    * versions will not be caught by this. */
+    * paths that are not compiled on non-Windows boxes, and allows for ASAN
+    * and PERL_MEMLOG.  This is thus a way to see if locale.c on Windows is
+    * likely going to compile, without having to use a real Win32 box.  And
+    * running the test suite will verify to a large extent our logic and memory
+    * allocation handling for such boxes.  Of course the underlying calls are
+    * to the POSIX libc, so any differences in implementation between those and
+    * the Windows versions will not be caught by this. */
  
  #  define WIN32
  #  undef P_CS_PRECEDES
@@ -379,17 +416,24 @@ static int debug_initialization = 0;
  #  define _configthreadlocale(arg) NOOP
  
  #  define MultiByteToWideChar(cp, flags, byte_string, m1, wstring, req_size) \
-                    (mbsrtowcs(wstring, &(byte_string), req_size, NULL) + 1)
+                    (PERL_UNUSED_ARG(cp),                                    \
+                     mbsrtowcs(wstring, &(byte_string), req_size, NULL) + 1)
  #  define WideCharToMultiByte(cp, flags, wstring, m1, byte_string,          \
                                req_size, default_char, found_default_char)   \
-                    (wcsrtombs(byte_string, &(wstring), req_size, NULL) + 1)
+                    (PERL_UNUSED_ARG(cp),                                   \
+                     wcsrtombs(byte_string, &(wstring), req_size, NULL) + 1)
  
  #  ifdef USE_LOCALE
  
  static const wchar_t * wsetlocale_buf = NULL;
  static Size_t wsetlocale_buf_size = 0;
+
+#    ifdef MULTIPLICITY
+
  static PerlInterpreter * wsetlocale_buf_aTHX = NULL;
  
+#    endif
+
  STATIC
  const wchar_t *
  S_wsetlocale(const int category, const wchar_t * wlocale)
@@ -574,15 +618,6 @@ S_positional_newlocale(int mask, const char * locale, locale_t base)
  #  define SET_EINVAL
  #endif
  
-/* If we have any of these library functions, we can reliably determine is a
- * locale is a UTF-8 one or not.  And if we aren't using locales at all, we act
- * as if everything is the C locale, so the answer there is always "No, it
- * isn't UTF-8"; this too is reliably accurate */
-#if   defined(HAS_SOME_LANGINFO) || defined(HAS_MBTOWC)                 \
-   || defined(HAS_MBRTOWC) || ! defined(USE_LOCALE)
-#  define HAS_RELIABLE_UTF8NESS_DETERMINATION
-#endif
-
  /* This is a starting guess as to when this is true.  It definititely isn't
   * true on *BSD where positional LC_ALL notation is used.  Likely this will end
   * up being defined in hints files. */
@@ -655,6 +690,10 @@ S_mortalized_pv_copy(pTHX_ const char * const pv)
                                        a single instance, so is a #define */
  static const char C_decimal_point[] = ".";
  
+#if defined(HAS_NL_LANGINFO_L) || defined(HAS_NL_LANGINFO)
+#  define HAS_SOME_LANGINFO
+#endif
+
  #if (defined(USE_LOCALE_NUMERIC) && ! defined(TS_W32_BROKEN_LOCALECONV))    \
   || ! (   defined(USE_LOCALE_NUMERIC)                                       \
         && (defined(HAS_SOME_LANGINFO) || defined(HAS_LOCALECONV)))
@@ -676,10 +715,6 @@ static const char C_thousands_sep[] = "";
                                && (( *(name) == 'C' && (*(name + 1)) == '\0') \
                                     || strEQ((name), "POSIX")))
  
-#if defined(HAS_NL_LANGINFO_L) || defined(HAS_NL_LANGINFO)
-#  define HAS_SOME_LANGINFO
-#endif
-
  #define my_langinfo_c(item, category, locale, retbufp, retbuf_sizep, utf8ness) \
              my_langinfo_i(item, category##_INDEX_, locale, retbufp,            \
                                                        retbuf_sizep,  utf8ness)
@@ -814,7 +849,7 @@ STATIC const bool category_available[] = {
  #  ifdef LC_ALL
      true,
  #  else
-    false
+    false,
  #  endif
  
      false   /* LC_UNKNOWN_AVAIL_ */
@@ -1065,7 +1100,7 @@ Perl_locale_panic(const char * msg,
  #define setlocale_failure_panic_c(cat, cur, fail, line, higher_line)        \
     setlocale_failure_panic_i(cat##_INDEX_, cur, fail, line, higher_line)
  
-#if defined(LC_ALL) && defined(USE_LOCALE)
+#if defined(USE_LOCALE)
  
  /* Expands to the code to
   *      result = savepvn(s, len)
@@ -3423,15 +3458,15 @@ S_new_numeric(pTHX_ const char *newnum, bool force)
       * get this value, which doesn't appear to be used in any of the Microsoft
       * library routines anyway. */
  
-    char * scratch_buffer = NULL;
      if (PL_numeric_underlying_is_standard) {
+        char * scratch_buffer = NULL;
          PL_numeric_underlying_is_standard = strEQ(C_thousands_sep,
                                               my_langinfo_c(THOUSEP, LC_NUMERIC,
                                                             PL_numeric_name,
                                                             &scratch_buffer,
                                                             NULL, NULL));
+        Safefree(scratch_buffer);
      }
-    Safefree(scratch_buffer);
  
  #    endif
  
@@ -3949,17 +3984,7 @@ Perl_warn_problematic_locale()
  #  endif /* USE_LOCALE_CTYPE */
  
  STATIC void
-
-#  ifdef LC_ALL
-
  S_new_LC_ALL(pTHX_ const char *lc_all, bool force)
-
-#  else
-
-S_new_LC_ALL(pTHX_ const char ** individ_locales, bool force)
-
-#  endif
-
  {
      PERL_ARGS_ASSERT_NEW_LC_ALL;
  
@@ -3968,8 +3993,6 @@ S_new_LC_ALL(pTHX_ const char ** individ_locales, bool force)
       * set, and not the nominal one (should they differ, as they may in
       * LC_NUMERIC). */
  
-#  ifdef LC_ALL
-
      const char * individ_locales[LC_ALL_INDEX_] = { NULL };
  
      switch (parse_LC_ALL_string(lc_all,
@@ -3981,29 +4004,22 @@ S_new_LC_ALL(pTHX_ const char ** individ_locales, bool force)
                                             earlier had to have succeeded */
                                  __LINE__))
      {
-        case invalid:
-        case no_array:
-        case only_element_0:
+      case invalid:
+      case no_array:
+      case only_element_0:
          locale_panic_("Unexpected return from parse_LC_ALL_string");
  
-        case full_array:
+      case full_array:
          break;
      }
  
-#  endif
-
      for_all_individual_category_indexes(i) {
          if (update_functions[i]) {
              const char * this_locale = individ_locales[i];
              update_functions[i](aTHX_ this_locale, force);
          }
  
-#  ifdef LC_ALL
-
          Safefree(individ_locales[i]);
-
-#  endif
-
      }
  }
  
@@ -4511,20 +4527,15 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * string,
       * If we already know the UTF-8ness of the locale, then we immediately know
       * what the string is */
      if (UNLIKELY(known_utf8 != LOCALE_UTF8NESS_UNKNOWN)) {
-        if (known_utf8 == LOCALE_IS_UTF8) {
-            return UTF8NESS_YES;
-        }
-        else {
-            return UTF8NESS_NO;
-        }
+        return (known_utf8 == LOCALE_IS_UTF8) ? UTF8NESS_YES : UTF8NESS_NO;
      }
  
-#    ifdef HAS_RELIABLE_UTF8NESS_DETERMINATION
+    if (locale == NULL) {
+        locale = querylocale_i(cat_index);
+    }
  
-    /* Here, we have available the libc functions that can be used to
-     * accurately determine the UTF8ness of the underlying locale.  If it is a
-     * UTF-8 locale, the string is UTF-8;  otherwise it was coincidental that
-     * the string is legal UTF-8
+    /* If the locale is UTF-8, the string is UTF-8;  otherwise it was
+     * coincidental that the string is legal UTF-8
       *
       * However, if the perl is compiled to not pay attention to the category
       * being passed in, you might think that that locale is essentially always
@@ -4535,31 +4546,8 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * string,
       * messages really could be UTF-8, and given that the odds are rather small
       * of something not being UTF-8 but being syntactically valid UTF-8, khw
       * has decided to call such strings as UTF-8. */
+    return (is_locale_utf8(locale)) ? UTF8NESS_YES : UTF8NESS_NO;
  
-    if (locale == NULL) {
-        locale = querylocale_i(cat_index);
-    }
-
-    if (is_locale_utf8(locale)) {
-        return UTF8NESS_YES;
-    }
-
-    return UTF8NESS_NO;
-
-#    else
-
-    /* Here, we have a valid UTF-8 string containing non-ASCII characters, and
-     * don't have access to functions to check if the locale is UTF-8 or not.
-     * Assume that it is.  khw tried adding a check that the string is entirely
-     * in a single Unicode script, but discovered the strftime() timezone is
-     * user-settable through the environment, which may be in a different
-     * script than the locale-expected value. */
-    PERL_UNUSED_ARG(locale);
-    PERL_UNUSED_ARG(cat_index);
-
-    return UTF8NESS_YES;
-
-#    endif
  #  endif
  
  }
@@ -4567,16 +4555,9 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * string,
  STATIC bool
  S_is_locale_utf8(pTHX_ const char * locale)
  {
-    /* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise.  It uses
-     * my_langinfo(), which employs various methods to get this information
-     * if nl_langinfo() isn't available, using heuristics as a last resort, in
-     * which case, the result will very likely be correct for locales for
-     * languages that have commonly used non-ASCII characters, but for notably
-     * English, it comes down to if the locale's name ends in something like
-     * "UTF-8".  It errs on the side of not being a UTF-8 locale.
-     *
-     * Systems conforming to C99 should have the needed libc calls to give us a
-     * completely reliable result. */
+    PERL_ARGS_ASSERT_IS_LOCALE_UTF8;
+
+    /* Returns TRUE if the locale 'locale' is UTF-8; FALSE otherwise. */
  
  #  if ! defined(USE_LOCALE)                                                   \
     || ! defined(USE_LOCALE_CTYPE)                                             \
@@ -4586,21 +4567,99 @@ S_is_locale_utf8(pTHX_ const char * locale)
  
      return FALSE;
  
+     /* Definitively, can't be UTF-8 */
+#    define HAS_DEFINITIVE_UTF8NESS_DETERMINATION
  #  else
  
-    char * scratch_buffer = NULL;
-    const char * codeset;
-    bool retval;
-
-    PERL_ARGS_ASSERT_IS_LOCALE_UTF8;
-
+    /* If the input happens to be the same locale as we are currently setup
+     * for, the answer has already been cached. */
      if (strEQ(locale, PL_ctype_name)) {
          return PL_in_utf8_CTYPE_locale;
      }
  
-    codeset = my_langinfo_c(CODESET, LC_CTYPE, locale,
-                            &scratch_buffer, NULL, NULL);
-    retval = is_codeset_name_UTF8(codeset);
+    if (isNAME_C_OR_POSIX(locale)) {
+        return false;
+    }
+
+#    if ! defined(HAS_SOME_LANGINFO) && ! defined(WIN32)
+
+    /* On non-Windows without nl_langinfo(), we have to do some digging to get
+     * the answer.  First, toggle to the desired locale so can query its state
+     * */
+    const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
+
+#      define TEARDOWN_FOR_IS_LOCALE_UTF8                                   \
+                      restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale)
+
+#      ifdef MB_CUR_MAX
+
+    /* If there are fewer bytes available in this locale than are required
+     * to represent the largest legal UTF-8 code point, this isn't a UTF-8
+     * locale. */
+    const int mb_cur_max = MB_CUR_MAX;
+    if (mb_cur_max < (int) UNISKIP(PERL_UNICODE_MAX)) {
+        TEARDOWN_FOR_IS_LOCALE_UTF8;
+        return false;
+    }
+
+#      endif
+#      if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
+
+         /* With these functions, we can definitively determine a locale's
+          * UTF-8ness */
+#        define HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+
+    /* If libc mbtowc() evaluates the bytes that form the REPLACEMENT CHARACTER
+     * as that Unicode code point, this has to be a UTF-8 locale; otherwise it
+     * can't be  */
+    wchar_t wc = 0;
+    (void) Perl_mbtowc_(aTHX_ NULL, NULL, 0);/* Reset shift state */
+    int mbtowc_ret = Perl_mbtowc_(aTHX_ &wc,
+                                  STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
+    TEARDOWN_FOR_IS_LOCALE_UTF8;
+    return (   mbtowc_ret == STRLENs(REPLACEMENT_CHARACTER_UTF8)
+            && wc == UNICODE_REPLACEMENT);
+
+#      else
+
+        /* If the above two C99 functions aren't working, you could try some
+         * different methods.  It seems likely that the obvious choices,
+         * wctomb() and wcrtomb(), wouldn't be working either.  But you could
+         * choose one of the dozen-ish Unicode titlecase triples and verify
+         * that towupper/towlower work as expected.
+         *
+         * But, our emulation of nl_langinfo() works quite well, so avoid the
+         * extra code until forced to by some weird non-conforming platform. */
+#        define USE_LANGINFO_FOR_UTF8NESS
+#        undef HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+#      endif
+#    else
+
+     /* On Windows or on platforms with nl_langinfo(), there is a direct way to
+      * get the locale's codeset, which will be some form of 'UTF-8' for a
+      * UTF-8 locale.  my_langinfo_i() handles this, and we will call that
+      * below */
+#      define HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+#      define USE_LANGINFO_FOR_UTF8NESS
+#      define TEARDOWN_FOR_IS_LOCALE_UTF8
+#    endif  /* USE_LANGINFO_FOR_UTF8NESS */
+
+     /* If the above compiled into code, it found the locale's UTF-8ness,
+      * nothing more to do; if it didn't get compiled,
+      * USE_LANGINFO_FOR_UTF8NESS is defined.  There are two possible reasons:
+      *   1)  it is the preferred method because it knows directly for sure
+      *       what the codeset is because the platform has libc functions that
+      *       return this; or
+      *   2)  the functions the above code section would compile to use don't
+      *       exist or are unreliable on this platform; we are less sure of the
+      *       my_langinfo() result, though it is very unlikely to be wrong
+      *       about if it is UTF-8 or not */
+#    ifdef USE_LANGINFO_FOR_UTF8NESS
+
+    char * scratch_buffer = NULL;
+    const char * codeset = my_langinfo_c(CODESET, LC_CTYPE, locale,
+                                         &scratch_buffer, NULL, NULL);
+    bool retval = is_codeset_name_UTF8(codeset);
  
      DEBUG_Lv(PerlIO_printf(Perl_debug_log,
                             "found codeset=%s, is_utf8=%d\n", codeset, retval));
@@ -4609,9 +4668,11 @@ S_is_locale_utf8(pTHX_ const char * locale)
  
      DEBUG_Lv(PerlIO_printf(Perl_debug_log, "is_locale_utf8(%s) returning %d\n",
                                                              locale, retval));
+    TEARDOWN_FOR_IS_LOCALE_UTF8;
      return retval;
  
-#  endif
+#    endif
+#  endif      /* End of the #else clause, for the non-trivial case */
  
  }
  
@@ -4711,10 +4772,20 @@ Perl_get_win32_message_utf8ness(pTHX_ const char * string)
  
  #    ifdef USE_LOCALE_CTYPE
  
+    /* We don't know the locale utf8ness here, and not even the locale itself.
+     * Since Windows uses a different mechanism to specify message language
+     * output than the locale system, it is going to be problematic deciding
+     * if we are to store it as UTF-8 or not.  By specifying LOCALE_IS_UTF8, we
+     * are telling the called function to return true iff the string has
+     * non-ASCII characters in it that are all syntactically UTF-8.  We are
+     * thus relying on the fact that a string that is syntactically valid UTF-8
+     * is likely to be UTF-8.  Should this ever cause problems, this function
+     * could be replaced by something more Windows-specific */
      return get_locale_string_utf8ness_i(string, LOCALE_IS_UTF8,
                                          NULL, LC_CTYPE_INDEX_);
  #    else
  
+    PERL_UNUSED_ARG(string);
      return false;
  
  #    endif
@@ -4815,7 +4886,7 @@ Perl_localeconv(pTHX)
  
  }
  
-#if  defined(HAS_LOCALECONV)
+#if defined(HAS_LOCALECONV)
  
  HV *
  S_my_localeconv(pTHX_ const int item)
@@ -4823,14 +4894,31 @@ S_my_localeconv(pTHX_ const int item)
      PERL_ARGS_ASSERT_MY_LOCALECONV;
  
      /* This returns a mortalized hash containing all or certain elements
-     * returned by localeconv().  It is used by Perl_localeconv() and
-     * POSIX::localeconv() and is thread-safe.
+     * returned by localeconv(). */
+    HV * hv = newHV();      /* The returned hash, initially empty */
+    sv_2mortal((SV*)hv);
+
+    /* The function is used by Perl_localeconv() and POSIX::localeconv(), or
+     * internally from this file, and is thread-safe.
       *
-     * There are two use cases:
-     * 1) Called from POSIX::locale_conv().  This returns the lconv structure
-     *    copied to a hash, based on the current underlying locales for
-     *    LC_NUMERIC and LC_MONETARY. An input item==0 signifies this case, or
-     *    on many platforms it is the only use case compiled.
+     * localeconv() returns items from two different locale categories,
+     * LC_MONETARY and LC_NUMERIC.  Various data structures in this function
+     * are arrays with two elements, one for each category, and these indexes
+     * indicate which array element applies to which category */
+#define NUMERIC_OFFSET   0
+#define MONETARY_OFFSET  1
+
+    /* Some operations apply to one or the other category, or both.  A mask
+     * is used to specify all the possibilities.  This macro converts from the
+     * category offset to its bit position in the mask. */
+#define OFFSET_TO_BIT(i)  (1 << (i))
+
+    /* There are two use cases for this function:
+     * 1) Called as Perl_localeconv(), or from POSIX::locale_conv().  This
+     *    returns the lconv structure copied to a hash, based on the current
+     *    underlying locales for LC_NUMERIC and LC_MONETARY. An input item==0
+     *    signifies this case, or on many platforms it is the only use case
+     *    compiled.
       * 2) Certain items that nl_langinfo() provides are also derivable from
       *    the return of localeconv().  Windows notably doesn't have
       *    nl_langinfo(), so on that, and actually any platform lacking it,
@@ -4863,7 +4951,9 @@ S_my_localeconv(pTHX_ const int item)
          LCONV_NUMERIC_ENTRY(grouping),
  #   endif
          LCONV_NUMERIC_ENTRY(thousands_sep),
+#   define THOUSANDS_SEP_LITERAL  "thousands_sep"
          LCONV_NUMERIC_ENTRY(decimal_point),
+#   define DECIMAL_POINT_LITERAL "decimal_point"
          {NULL, 0}
      };
  
@@ -4892,6 +4982,7 @@ S_my_localeconv(pTHX_ const int item)
          LCONV_MONETARY_ENTRY(positive_sign),
          LCONV_MONETARY_ENTRY(negative_sign),
          LCONV_MONETARY_ENTRY(currency_symbol),
+#  define CURRENCY_SYMBOL_LITERAL  "currency_symbol"
          {NULL, 0}
      };
  
@@ -4916,6 +5007,7 @@ S_my_localeconv(pTHX_ const int item)
          LCONV_ENTRY(int_p_sign_posn),
          LCONV_ENTRY(int_n_sign_posn),
  #  endif
+#      define P_CS_PRECEDES_LITERAL    "p_cs_precedes"
          LCONV_ENTRY(p_cs_precedes),
          {NULL, 0}
      };
@@ -4924,27 +5016,49 @@ S_my_localeconv(pTHX_ const int item)
  #  define P_CS_PRECEDES_ADDRESS                                       \
        &lconv_integers[(C_ARRAY_LENGTH(lconv_integers) - 2)]
  
-    /* If we aren't paying attention to a given category, use LC_CTYPE instead;
-     * If not paying attention to that either, the code below should end up not
-     * using this.  Make sure that things blow up if that avoidance gets lost,
-     * by setting the category to an out-of-bounds value */
-    locale_category_index numeric_index;
-    locale_category_index monetary_index;
+    /* The actual populating of the hash is done by two sub functions that get
+     * passed an array of length two containing the data structure they are
+     * supposed to use to get the key names to fill the hash with.  One element
+     * is always for the NUMERIC strings (or NULL if none to use), and the
+     * other element similarly for the MONETARY ones. */
+    const lconv_offset_t * strings[2] = { lconv_numeric_strings,
+                                          lconv_monetary_strings
+                                        };
+
+    /* The LC_MONETARY category also has some integer-valued fields, whose
+     * information is kept in a separate parallel array to 'strings' */
+    const lconv_offset_t * integers[2] = {
+                                           NULL,
+                                           lconv_integers
+                                         };
+
+#  if ! defined(USE_LOCALE_NUMERIC) && ! defined(USE_LOCALE_MONETARY)
+
+    /* If both NUMERIC and MONETARY must be the "C" locale, simply populate the
+     * hash using the function that works on just that locale. */
+    populate_hash_from_C_localeconv(hv,
+                                    "C",
+                                    (  OFFSET_TO_BIT(NUMERIC_OFFSET)
+                                     | OFFSET_TO_BIT(MONETARY_OFFSET)),
+                                     strings, integers);
+
+    /* We shouldn't get to here for the case of an individual item, as
+     * preprocessor directives elsewhere in this file should have filled in the
+     * correct values at a higher level */
+    assert(item == 0);
+    PERL_UNUSED_ARG(item);
+
+    return hv;
  
-#  ifdef USE_LOCALE_NUMERIC
-    numeric_index = LC_NUMERIC_INDEX_;
-#  elif defined(USE_LOCALE_CTYPE)
-    numeric_index = LC_CTYPE_INDEX_;
-#  else
-    numeric_index = LC_ALL_INDEX_;      /* Out-of-bounds */
-#  endif
-#  ifdef USE_LOCALE_MONETARY
-    monetary_index = LC_MONETARY_INDEX_;
-#  elif defined(USE_LOCALE_CTYPE)
-    monetary_index = LC_CTYPE_INDEX_;
  #  else
-    monetary_index = LC_ALL_INDEX_;     /* Out-of-bounds */
-#  endif
+
+    /* From here to the end of this function, at least one of NUMERIC or
+     * MONETARY can be non-C */
+
+    /* This is a mask, with one bit to tell the populate functions to populate
+     * the NUMERIC items; another bit for the MONETARY ones.  This way they can
+     * choose which (or both) to populate from */
+    U32 index_bits = 0;
  
      /* Some platforms, for correct non-mojibake results, require LC_CTYPE's
       * locale to match LC_NUMERIC's for the numeric fields, and LC_MONETARY's
@@ -4955,36 +5069,19 @@ S_my_localeconv(pTHX_ const int item)
       * a second call.  Assume this is the case unless overridden below */
      bool requires_2nd_localeconv = false;
  
-    /* The actual hash populating is done by S_populate_hash_from_localeconv().
-     * It gets passed an array of length two containing the data structure it
-     * is supposed to use to get the key names to fill the hash with.  One
-     * element is always for the NUMERIC strings (or NULL if none to use), and
-     * the other element similarly for the MONETARY ones. */
-#  define NUMERIC_STRING_OFFSET   0
-#  define MONETARY_STRING_OFFSET  1
-    const lconv_offset_t * strings[2] = { NULL, NULL };
-
-    /* This is a mask, with one bit to tell S_populate_hash_from_localeconv to
-     * populate the NUMERIC items; another bit for the MONETARY ones.  This way
-     * it can choose which (or both) to populate from */
-    U32 index_bits = 0;
-
-    /* This converts from a locale index to its bit position in the above mask.
-     * */
-#  define INDEX_TO_BIT(i)  (1 << (i))
+    /* The actual hash populating is done by one of the two populate functions.
+     * Which one is appropriate for either the MONETARY_OFFSET or the
+     * NUMERIC_OFFSET is calculated and then stored in this table */
+    void (*populate[2]) (pTHX_
+                         HV * ,
+                         const char *,
+                         const U32,
+                         const lconv_offset_t **,
+                         const lconv_offset_t **);
  
-    /* The two categories can have disparate locales.  Initialize them to C and
-     * override later whichever one(s) we pay attention to */
-    const char * numeric_locale = "C";
-    const char * monetary_locale = "C";
-
-    /* This will be either 'numeric_locale' or 'monetary_locale' depending on
-     * what we are working on at the moment */
-    const char * locale;
-
-    /* The LC_MONETARY category also has some integer-valued fields, whose
-     * information is kept in a separate list */
-    const lconv_offset_t * integers;
+    /* This gives the locale to use for the corresponding OFFSET, like the
+     * 'populate' array above */
+    const char * locales[2];
  
  #  ifdef HAS_SOME_LANGINFO
  
@@ -4992,12 +5089,10 @@ S_my_localeconv(pTHX_ const int item)
       * parameter is ignored. */
      PERL_UNUSED_ARG(item);
  
-#  else
-
-    /* This only gets compiled for the use-case of using localeconv() to
-     * emulate an nl_langinfo() missing from the platform. */
+#  else     /* This only gets compiled for the use-case of using localeconv()
+               to emulate nl_langinfo() when missing from the platform. */
  
-#    ifdef USE_LOCALE_NUMERIC
+#      ifdef USE_LOCALE_NUMERIC
  
      /* We need this substructure to only return this field for the THOUSEP
       * item.  The other items also need substructures, but they were handled
@@ -5010,125 +5105,172 @@ S_my_localeconv(pTHX_ const int item)
          {NULL, 0}
      };
  
-#    endif
+#      endif
  
      /* End of all the initialization of data structures.  Now for actual code.
       *
-     * Without nl_langinfo(), the call to my_localeconv() could be for just one
-     * of the following 3 items to emulate nl_langinfo().  This is compiled
-     * only when using perl_langinfo.h, which we control, and it has been
-     * constructed so that no item is numbered 0.
+     * Without nl_langinfo(), the call to my_localeconv() could be for all of
+     * the localeconv() items or for just one of the following 3 items to
+     * emulate nl_langinfo().
       *
-     * For each, set up the appropriate parameters for the call below to
-     * S_populate_hash_from_localeconv() */
-    if (item != 0) switch (item) {
-      default:
-        locale_panic_(Perl_form(aTHX_
-                    "Unexpected item passed to my_localeconv: %d", item));
-        break;
+     * This is compiled only when using perl_langinfo.h, which we control, and
+     * it has been constructed so that no item is numbered 0.
+     *
+     * For each individual item, either return the known value if the current
+     * locale is "C", or set up the appropriate parameters for the call below
+     * to the populate function */
+    if (item != 0) {
+        const char *locale;
+
+        switch (item) {
+          default:
+            locale_panic_(Perl_form(aTHX_
+                          "Unexpected item passed to my_localeconv: %d", item));
+            break;
  
-#    ifdef USE_LOCALE_NUMERIC
+#      ifdef USE_LOCALE_NUMERIC
  
-      case RADIXCHAR:
-        locale = numeric_locale = PL_numeric_name;
-        index_bits = INDEX_TO_BIT(LC_NUMERIC_INDEX_);
-        strings[NUMERIC_STRING_OFFSET] = DECIMAL_POINT_ADDRESS;
-        integers = NULL;
-        break;
+          case RADIXCHAR:
+            if (isNAME_C_OR_POSIX(PL_numeric_name)) {
+                (void) hv_stores(hv, DECIMAL_POINT_LITERAL, newSVpvs("."));
+                return hv;
+            }
  
-      case THOUSEP:
-        index_bits = INDEX_TO_BIT(LC_NUMERIC_INDEX_);
-        locale = numeric_locale = PL_numeric_name;
-        strings[NUMERIC_STRING_OFFSET] = thousands_sep_string;
-        integers = NULL;
-        break;
+            strings[NUMERIC_OFFSET] = DECIMAL_POINT_ADDRESS;
+            goto numeric_common;
  
-#    endif
-#    ifdef USE_LOCALE_MONETARY
+          case THOUSEP:
+            if (isNAME_C_OR_POSIX(PL_numeric_name)) {
+                (void) hv_stores(hv, THOUSANDS_SEP_LITERAL, newSVpvs(""));
+                return hv;
+            }
  
-      case CRNCYSTR:
-        index_bits = INDEX_TO_BIT(LC_MONETARY_INDEX_);
-        locale = monetary_locale = querylocale_i(LC_MONETARY_INDEX_);
+            strings[NUMERIC_OFFSET] = thousands_sep_string;
  
-        /* This item needs the values for both the currency symbol, and another
-         * one used to construct the nl_langino()-compatible return */
-        strings[MONETARY_STRING_OFFSET] = CURRENCY_SYMBOL_ADDRESS;
-        integers = P_CS_PRECEDES_ADDRESS;
-        break;
+          numeric_common:
+            index_bits = OFFSET_TO_BIT(NUMERIC_OFFSET);
+            locale = PL_numeric_name;
+            break;
+
+#      endif
+#      ifdef USE_LOCALE_MONETARY
+
+          case CRNCYSTR:    /* This item needs the values for both the currency
+                               symbol, and another one used to construct the
+                               nl_langino()-compatible return. */
+
+            locale = querylocale_c(LC_MONETARY);
+            if (isNAME_C_OR_POSIX(locale)) {
+                (void) hv_stores(hv, CURRENCY_SYMBOL_LITERAL, newSVpvs(""));
+                (void) hv_stores(hv, P_CS_PRECEDES_LITERAL, newSViv(-1));
+                return hv;
+            }
+
+            strings[MONETARY_OFFSET] = CURRENCY_SYMBOL_ADDRESS;
+            integers[MONETARY_OFFSET] = P_CS_PRECEDES_ADDRESS;
+
+            index_bits = OFFSET_TO_BIT(MONETARY_OFFSET);
+            break;
+
+#      endif
+
+        } /* End of switch() */
+
+        /* There's only one item, so only one of each of these will get used,
+         * but cheap to initialize both */
+        populate[MONETARY_OFFSET] =
+        populate[NUMERIC_OFFSET]  = S_populate_hash_from_localeconv;
+        locales[MONETARY_OFFSET] = locales[NUMERIC_OFFSET]  = locale;
+    }
+    else   /* End of for just one item to emulate nl_langinfo() */
  
  #    endif
  
-    } /* End of switch() */
+    {
+        /* Here, the call is for all of localeconv().  It has a bunch of
+         * items.  The first function call always gets the MONETARY values */
+        index_bits = OFFSET_TO_BIT(MONETARY_OFFSET);
  
-    else    /* End of for just one item to emulate nl_langinfo() */
+#    ifdef USE_LOCALE_MONETARY
  
-#  endif
+        locales[MONETARY_OFFSET] = querylocale_c(LC_MONETARY);
+        populate[MONETARY_OFFSET] =
+                                (isNAME_C_OR_POSIX(locales[MONETARY_OFFSET]))
+                                ?  S_populate_hash_from_C_localeconv
+                                :  S_populate_hash_from_localeconv;
  
-    {   /* Here, the call is for all of localeconv().  It has a bunch of
-         * items.  As in the individual item case, set up the parameters for
-         * S_populate_hash_from_localeconv(); */
+#    else
  
-#  ifdef USE_LOCALE_NUMERIC
-        numeric_locale = PL_numeric_name;
-#  elif defined(USE_LOCALE_CTYPE)
-        numeric_locale = querylocale_i(numeric_index);
-#  endif
-#  if defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_CTYPE)
-        monetary_locale = querylocale_i(monetary_index);
-#  endif
+        locales[MONETARY_OFFSET] = "C";
+        populate[MONETARY_OFFSET] = S_populate_hash_from_C_localeconv;
  
-        /* The first call to S_populate_hash_from_localeconv() will be for the
-         * MONETARY values */
-        index_bits = INDEX_TO_BIT(monetary_index);
-        locale = monetary_locale;
+#    endif
+#    ifdef USE_LOCALE_NUMERIC
  
          /* And if the locales for the two categories are the same, we can also
           * do the NUMERIC values in the same call */
-        if (strEQ(numeric_locale, monetary_locale)) {
-            index_bits |= INDEX_TO_BIT(numeric_index);
+        if (strEQ(PL_numeric_name, locales[MONETARY_OFFSET])) {
+            index_bits |= OFFSET_TO_BIT(NUMERIC_OFFSET);
+            locales[NUMERIC_OFFSET] = locales[MONETARY_OFFSET];
+            populate[NUMERIC_OFFSET] = populate[MONETARY_OFFSET];
          }
          else {
              requires_2nd_localeconv = true;
+            locales[NUMERIC_OFFSET] = PL_numeric_name;
+            populate[NUMERIC_OFFSET] = (isNAME_C_OR_POSIX(PL_numeric_name))
+                                       ?  S_populate_hash_from_C_localeconv
+                                       :  S_populate_hash_from_localeconv;
          }
  
-        /* We always pass both sets of strings. 'index_bits' tells
-         * S_populate_hash_from_localeconv which to actually look at */
-        strings[NUMERIC_STRING_OFFSET] = lconv_numeric_strings;
-        strings[MONETARY_STRING_OFFSET] = lconv_monetary_strings;
+#    else
+
+        /* When LC_NUMERIC is confined to "C", the two locales are the same
+           iff LC_MONETARY in this case is also "C".  We set up the function
+           for that case above, so fastest to test just its address */
+        locales[NUMERIC_OFFSET] = "C";
+        if (populate[MONETARY_OFFSET] == S_populate_hash_from_C_localeconv) {
+            index_bits |= OFFSET_TO_BIT(NUMERIC_OFFSET);
+            populate[NUMERIC_OFFSET] = populate[MONETARY_OFFSET];
+        }
+        else {
+            requires_2nd_localeconv = true;
+            populate[NUMERIC_OFFSET] = S_populate_hash_from_C_localeconv;
+        }
  
-        /* And pass the integer values to populate; again 'index_bits' will
-         * say to use them or not */
-        integers = lconv_integers;
+#    endif
  
      }   /* End of call is for localeconv() */
  
-    /* The code above has determined the parameters to
-       S_populate_hash_from_localeconv() for both cases of an individual item
-       and for the entire structure.  Below is code common to both */
+    /* Call the proper populate function (which may call localeconv()) and copy
+     * its results into the hash.  All the parameters have been initialized
+     * above */
+    (*populate[MONETARY_OFFSET])(aTHX_
+                                 hv, locales[MONETARY_OFFSET],
+                                 index_bits, strings, integers);
  
-    HV * hv = newHV();      /* The returned hash, initially empty */
-    sv_2mortal((SV*)hv);
+#    ifndef HAS_SOME_LANGINFO  /* Could be using this function to emulate
+                                nl_langinfo() */
+
+    /* We are done when called with an individual item.  There are no integer
+     * items to adjust, and it's best for the caller to determine if this
+     * string item is UTF-8 or not.  This is because the locale's UTF-8ness is
+     * calculated below, and in some Configurations, that can lead to a
+     * recursive call to here, which could recurse infinitely. */
+    if (item != 0) {
+        return hv;
+    }
  
-    /* Call localeconv() and copy its results into the hash.  All the
-     * parameters have been initialized above */
-    populate_hash_from_localeconv(hv,
-                                  locale,
-                                  index_bits,
-                                  strings,
-                                  integers
-                                 );
+#    endif
  
      /* The above call may have done all the hash fields, but not always, as
       * already explained.  If we need a second call it is always for the
       * NUMERIC fields */
      if (requires_2nd_localeconv) {
-        populate_hash_from_localeconv(hv,
-                                      numeric_locale,
-                                      INDEX_TO_BIT(numeric_index),
-                                      strings,
-                                      NULL      /* There are no NUMERIC integer
-                                                   fields */
-                                     );
+        (*populate[NUMERIC_OFFSET])(aTHX_
+                                    hv,
+                                    locales[NUMERIC_OFFSET],
+                                    OFFSET_TO_BIT(NUMERIC_OFFSET),
+                                    strings, integers);
      }
  
      /* Here, the hash has been completely populated.
@@ -5150,66 +5292,42 @@ S_my_localeconv(pTHX_ const int item)
       * corrections determined at hash population time, at an extra maintenance
       * cost which khw doesn't think is worth it
       */
-    for (unsigned int i = 0; i < 2; i++) {  /* Try both types of strings */
-        if (! strings[i]) {     /* Skip if no strings of this type */
-            continue;
-        }
-
-        locale = (i == NUMERIC_STRING_OFFSET)
-                 ? numeric_locale
-                 : monetary_locale;
-
-        locale_utf8ness_t locale_is_utf8 = LOCALE_UTF8NESS_UNKNOWN;
-
-#  ifdef HAS_RELIABLE_UTF8NESS_DETERMINATION
  
-        /* It saves time in the loop below to have predetermined the UTF8ness
-         * of the locale.  But only do so if the platform reliably has this
-         * information; otherwise it's better to do it only it should become
-         * necessary, which happens on a per-element basis in the loop. */
-
-        locale_is_utf8 = (is_locale_utf8(locale))
-                         ? LOCALE_IS_UTF8
-                         : LOCALE_NOT_UTF8;
+    for (unsigned int i = 0; i < 2; i++) {  /* Try both types of strings */
  
-        if (locale_is_utf8 == LOCALE_NOT_UTF8) {
-            continue;   /* No string can be UTF-8 if the locale isn't */
+        /* The return from this function is already adjusted */
+        if (populate[i] == S_populate_hash_from_C_localeconv) {
+            continue;
          }
  
-#  endif
-
          /* Examine each string */
-        while (1) {
-            const char * name = strings[i]->name;
-
-            if (! name) {   /* Reached the end */
-                break;
-            }
+        for (const lconv_offset_t *strp = strings[i]; strp->name; strp++) {
+            const char * name = strp->name;
  
              /* 'value' will contain the string that may need to be marked as
               * UTF-8 */
              SV ** value = hv_fetch(hv, name, strlen(name), true);
-            if (! value) {
+            if (value == NULL) {
                  continue;
              }
  
              /* Determine if the string should be marked as UTF-8. */
              if (UTF8NESS_YES == (get_locale_string_utf8ness_i(SvPVX(*value),
-                                                  locale_is_utf8,
-                                                  NULL,
-                                                  (locale_category_index) 0)))
+                                                  LOCALE_UTF8NESS_UNKNOWN,
+                                                  locales[i],
+                                                  LC_ALL_INDEX_ /* OOB */)))
              {
                  SvUTF8_on(*value);
              }
-
-            strings[i]++;   /* Iterate */
          }
-    }   /* End of fixing up UTF8ness */
  
+        if (integers[i] == NULL) {
+            continue;
+        }
  
-    /* Examine each integer */
-    if (integers) while (1) {
-        const char * name = integers->name;
+        /* And each integer */
+        for (const lconv_offset_t *intp = integers[i]; intp->name; intp++) {
+            const char * name = intp->name;
  
          if (! name) {   /* Reached the end */
              break;
@@ -5224,13 +5342,97 @@ S_my_localeconv(pTHX_ const int item)
          if (SvIV(*value) == CHAR_MAX) {
              sv_setiv(*value, -1);
          }
-
-        integers++;   /* Iterate */
+        }
      }
  
      return hv;
+
+#  endif    /* End of must have one or both USE_MONETARY, USE_NUMERIC */
+
+}
+
+STATIC void
+S_populate_hash_from_C_localeconv(pTHX_ HV * hv,
+                                        const char * locale,  /* Unused */
+
+                                        /* bit mask of which categories to
+                                         * populate */
+                                        const U32 which_mask,
+
+                                        /* The string type values to return;
+                                         * one element for numeric; the other
+                                         * for monetary */
+                                        const lconv_offset_t * strings[2],
+
+                                        /* And the integer fields */
+                                        const lconv_offset_t * integers[2])
+{
+    PERL_ARGS_ASSERT_POPULATE_HASH_FROM_C_LOCALECONV;
+    PERL_UNUSED_ARG(locale);
+    assert(isNAME_C_OR_POSIX(locale));
+
+    /* Fill hv with the values that localeconv() is supposed to return for
+     * the C locale */
+
+    U32 working_mask = which_mask;
+    while (working_mask) {
+
+        /* Get the bit position of the next lowest set bit.  That is the
+         * index into the 'strings' array of the category we use in this loop
+         * iteration.  Turn the bit off so we don't work on this category
+         * again in this function call. */
+        const PERL_UINT_FAST8_T i = lsbit_pos(working_mask);
+        working_mask &= ~ (1 << i);
+
+        /* This category's string fields */
+        const lconv_offset_t * category_strings = strings[i];
+
+#  ifndef HAS_SOME_LANGINFO /* This doesn't work properly if called on a single
+                               item, which could only happen when there isn't
+                               nl_langinfo on the platform */
+        assert(category_strings[1].name != NULL);
+#  endif
+
+        /* All string fields are empty except for one NUMERIC one.  That one
+         * has been initialized to be the final one in the NUMERIC strings, so
+         * stop the loop early in that case.  Otherwise, we would store an
+         * empty string to the hash, and immediately overwrite it with the
+         * correct value */
+        const unsigned int stop_early = (i == NUMERIC_OFFSET) ? 1 : 0;
+
+        /* A NULL element terminates the list */
+        while ((category_strings + stop_early)->name) {
+            (void) hv_store(hv,
+                            category_strings->name,
+                            strlen(category_strings->name),
+                            newSVpvs(""),
+                            0);
+
+            category_strings++;
+        }
+
+        /* And fill in the NUMERIC exception */
+        if (i == NUMERIC_OFFSET) {
+            (void) hv_stores(hv, "decimal_point", newSVpvs("."));
+            category_strings++;
+        }
+
+        /* Add any int fields.  In the C locale, all are -1 */
+        if (integers[i]) {
+            const lconv_offset_t * current = integers[i];
+            while (current->name) {
+                (void) hv_store(hv,
+                                current->name, strlen(current->name),
+                                newSViv(-1),
+                                0);
+                current++;
+            }
+        }
+    }
  }
  
+#  if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_MONETARY)
+
  STATIC void
  S_populate_hash_from_localeconv(pTHX_ HV * hv,
  
@@ -5242,19 +5444,17 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv,
                                         * populate */
                                        const U32 which_mask,
  
-                                      /* strings[0] points to the numeric
-                                       * string fields; [1] to the monetary */
+                                      /* The string type values to return; one
+                                       * element for numeric; the other for
+                                       * monetary */
                                        const lconv_offset_t * strings[2],
  
-                                      /* And to the monetary integer fields */
-                                      const lconv_offset_t * integers)
+                                      /* And similarly the integer fields */
+                                      const lconv_offset_t * integers[2])
  {
      PERL_ARGS_ASSERT_POPULATE_HASH_FROM_LOCALECONV;
      PERL_UNUSED_ARG(which_mask);    /* Some configurations don't use this;
                                         complicated to figure out which */
-#  ifndef USE_LOCALE
-    PERL_UNUSED_ARG(locale);
-#  endif
  
      /* Run localeconv() and copy some or all of its results to the input 'hv'
       * hash.  Most localeconv() implementations return the values in a global
@@ -5264,63 +5464,57 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv,
       * global static buffer.  Some locks might be no-ops on this platform, but
       * not others.  We need to lock if any one isn't a no-op. */
  
-#  ifdef USE_LOCALE_CTYPE
+#    ifdef WE_MUST_DEAL_WITH_MISMATCHED_CTYPE
  
-    /* Some platforms require LC_CTYPE to be congruent with the category we are
-     * looking for */
      const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
  
-#  endif
-#  ifdef USE_LOCALE_NUMERIC
+#    endif
+#    ifdef USE_LOCALE_NUMERIC
  
      /* We need to toggle to the underlying NUMERIC locale if we are getting
       * NUMERIC strings */
      const char * orig_NUMERIC_locale = NULL;
-    if (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) {
+    if (which_mask & OFFSET_TO_BIT(NUMERIC_OFFSET)) {
          LC_NUMERIC_LOCK(0);
  
-#    if defined(WIN32)
+#      if defined(WIN32)
  
          /* There is a bug in Windows in which setting LC_CTYPE after the others
           * doesn't actually take effect for localeconv().  See commit
           * 418efacd1950763f74ed3cc22f8cf9206661b892 for details.  Thus we have
           * to make sure that the locale we want is set after LC_CTYPE.  We
           * unconditionally toggle away from and back to the current locale
-         * prior to calling localeconv().
-         *
-         * This code will have no effect if we already are in C, but khw
-         * hasn't seen any cases where this causes problems when we are in the
-         * C locale. */
-        orig_NUMERIC_locale = toggle_locale_i(LC_NUMERIC_INDEX_, "C");
-        toggle_locale_i(LC_NUMERIC_INDEX_, locale);
+         * prior to calling localeconv(). */
+        orig_NUMERIC_locale = toggle_locale_c(LC_NUMERIC, "C");
+        toggle_locale_c(LC_NUMERIC, locale);
  
-#    else
+#      else
  
          /* No need for the extra toggle when not on Windows */
-        orig_NUMERIC_locale = toggle_locale_i(LC_NUMERIC_INDEX_, locale);
+        orig_NUMERIC_locale = toggle_locale_c(LC_NUMERIC, locale);
  
-#    endif
+#      endif
  
      }
  
-#  endif
-#  if defined(USE_LOCALE_MONETARY) && defined(WIN32)
+#    endif
+#    if defined(USE_LOCALE_MONETARY) && defined(WIN32)
  
      /* Same Windows bug as described just above for NUMERIC.  Otherwise, no
       * need to toggle LC_MONETARY, as it is kept in the underlying locale */
      const char * orig_MONETARY_locale = NULL;
-    if (which_mask & INDEX_TO_BIT(LC_MONETARY_INDEX_)) {
-        orig_MONETARY_locale = toggle_locale_i(LC_MONETARY_INDEX_, "C");
-        toggle_locale_i(LC_MONETARY_INDEX_, locale);
+    if (which_mask & OFFSET_TO_BIT(MONETARY_OFFSET)) {
+        orig_MONETARY_locale = toggle_locale_c(LC_MONETARY, "C");
+        toggle_locale_c(LC_MONETARY, locale);
      }
  
-#  endif
+#    endif
  
      /* Finally ready to do the actual localeconv().  Lock to prevent other
       * accesses until we have made a copy of its returned static buffer */
      gwLOCALE_LOCK;
  
-#  if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE)
+#    if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE)
  
      /* This is a workaround for another bug in Windows.  localeconv() was
       * broken with thread-safe locales prior to VS 15.  It looks at the global
@@ -5359,87 +5553,61 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv,
      const char * save_global = querylocale_c(LC_ALL);
      void_setlocale_c(LC_ALL, save_thread);
  
-#  endif  /* TS_W32_BROKEN_LOCALECONV */
+#    endif  /* TS_W32_BROKEN_LOCALECONV */
  
      /* Finally, do the actual localeconv */
      const char *lcbuf_as_string = (const char *) localeconv();
  
-    /* Fill in the string fields of the HV* */
-    for (unsigned int i = 0; i < 2; i++) {
-
-        /* One iteration is only for the numeric string fields.  Skip these
-         * unless we are compiled to care about those fields and the input
-         * parameters indicate we want their values */
-        if (   i == NUMERIC_STRING_OFFSET
+    /* Copy its results for each desired category as determined by
+     * 'which_mask' */
+    U32 working_mask = which_mask;
+    while (working_mask) {
  
-#  ifdef USE_LOCALE_NUMERIC
-
-            && (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) == 0
-
-#  endif
-
-        ) {
-            continue;
-        }
+        /* Get the bit position of the next lowest set bit.  That is the
+         * index into the 'strings' array of the category we use in this loop
+         * iteration.  Turn the bit off so we don't work on this category
+         * again in this function call. */
+        const PERL_UINT_FAST8_T i = lsbit_pos32(working_mask);
+        working_mask &= ~ (1 << i);
  
-        /* The other iteration is only for the monetary string fields.  Again
-         * skip it unless we want those values */
-        if (   i == MONETARY_STRING_OFFSET
-
-#  ifdef USE_LOCALE_MONETARY
-
-            && (which_mask & INDEX_TO_BIT(LC_MONETARY_INDEX_)) == 0
-
-#  endif
-        ) {
-
-            continue;
-        }
-
-        /* For each field for the given category ... */
+        /* Point to the string field list for the given category ... */
          const lconv_offset_t * category_strings = strings[i];
-        while (1) {
-            const char * name = category_strings->name;
-            if (! name) {   /* Quit at the end */
-                break;
-            }
+        while (category_strings->name) {
  
-            /* we have set things up so that we know where in the returned
+            /* We have set things up so that we know where in the returned
               * structure, when viewed as a string, the corresponding value is.
               * */
              const char *value = *((const char **)(  lcbuf_as_string
                                                    + category_strings->offset));
-
-            /* Set to get next string on next iteration */
-            category_strings++;
-
-            /* Skip if this platform doesn't have this field. */
-            if (! value) {
-                continue;
+            if (value) {    /* Copy to the hash */
+                (void) hv_store(hv,
+                                category_strings->name,
+                                strlen(category_strings->name),
+                                newSVpv(value, strlen(value)),
+                                0);
              }
  
-            /* Copy to the hash */
-            (void) hv_store(hv,
-                            name, strlen(name),
-                            newSVpv(value, strlen(value)),
-                            0);
+            category_strings++;
          }
  
-        /* Add any int fields to the HV* */
-        if (i == MONETARY_STRING_OFFSET && integers) {
-            while (integers->name) {
+        /* Add any int fields to the HV*. */
+        if (integers[i]) {
+            const lconv_offset_t * current = integers[i];
+            while (current->name) {
                  const char value = *((const char *)(  lcbuf_as_string
-                                                    + integers->offset));
-                (void) hv_store(hv, integers->name,
-                                strlen(integers->name), newSViv(value), 0);
-                integers++;
+                                                    + current->offset));
+                (void) hv_store(hv,
+                                current->name, strlen(current->name),
+                                newSViv(value),
+                                0);
+                current++;
              }
          }
      }   /* End of loop through the fields */
  
      /* Done with copying to the hash.  Can unwind the critical section locks */
  
-#  if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE)
+#    if defined(TS_W32_BROKEN_LOCALECONV) && defined(USE_THREAD_SAFE_LOCALE)
  
      /* Restore the global locale's prior state */
      void_setlocale_c(LC_ALL, save_global);
@@ -5454,32 +5622,33 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv,
      /* Restore the per-thread locale state */
      void_setlocale_c(LC_ALL, save_thread);
  
-#  endif  /* TS_W32_BROKEN_LOCALECONV */
+#    endif  /* TS_W32_BROKEN_LOCALECONV */
  
      gwLOCALE_UNLOCK;    /* Finished with the critical section of a
                             globally-accessible buffer */
  
-#  if defined(USE_LOCALE_MONETARY) && defined(WIN32)
+#    if defined(USE_LOCALE_MONETARY) && defined(WIN32)
  
-    restore_toggled_locale_i(LC_MONETARY_INDEX_, orig_MONETARY_locale);
+    restore_toggled_locale_c(LC_MONETARY, orig_MONETARY_locale);
  
-#  endif
-#  ifdef USE_LOCALE_NUMERIC
+#    endif
+#    ifdef USE_LOCALE_NUMERIC
  
-    restore_toggled_locale_i(LC_NUMERIC_INDEX_, orig_NUMERIC_locale);
-    if (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) {
+    restore_toggled_locale_c(LC_NUMERIC, orig_NUMERIC_locale);
+    if (which_mask & OFFSET_TO_BIT(NUMERIC_OFFSET)) {
          LC_NUMERIC_UNLOCK;
      }
  
-#  endif
-#  ifdef USE_LOCALE_CTYPE
+#    endif
+#    ifdef WE_MUST_DEAL_WITH_MISMATCHED_CTYPE
  
      restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
  
-#  endif
+#    endif
  
  }
  
+#  endif    /* defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_MONETARY) */
  #endif /* defined(HAS_LOCALECONV) */
  #ifndef HAS_SOME_LANGINFO
  
@@ -5800,9 +5969,8 @@ Perl_sv_strftime_tm(pTHX_ SV * fmt, const struct tm * mytm)
                               );
      SV * sv = NULL;
      if (retval) {
-        STRLEN len = strlen(retval);
-        sv = newSV(len);
-        sv_usepvn_flags(sv, retval, len, SV_HAS_TRAILING_NUL);
+        sv = newSV_type(SVt_PV);
+        sv_usepvn_flags(sv, retval, strlen(retval), SV_HAS_TRAILING_NUL);
  
          if (result_utf8ness == UTF8NESS_YES) {
              SvUTF8_on(sv);
@@ -5826,6 +5994,16 @@ Perl_sv_strftime_ints(pTHX_ SV * fmt, int sec, int min, int hour,
  }
  
  #ifdef USE_LOCALE
+#  ifndef HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+
+/* Forward declaration of function that we don't put into embed.fnc so as to
+ * make its removal easier, as there may not be any extant platforms that need
+ * it; and the function is located after S_my_langinfo_i() because it's easier
+ * to understand when placed in the context of that code */
+STATIC const char * S_override_codeset_if_utf8_found(pTHX_
+                                                     const char *codeset,
+                                                     const char *locale);
+#  endif
  
  /* There are several implementations of my_langinfo, depending on the
   * Configuration.  They all share the same beginning of the function */
@@ -5848,8 +6026,6 @@ S_my_langinfo_i(pTHX_
                   * value, as documented */
                  utf8ness_t * utf8ness)
  {
-    const char * retval = NULL;
-
      PERL_ARGS_ASSERT_MY_LANGINFO_I;
      assert(cat_index < LC_ALL_INDEX_);
  
@@ -5865,11 +6041,17 @@ S_my_langinfo_i(pTHX_
   * isn't, or vice versa).  There is explicit code to bring the categories into
   * sync.  This doesn't seem to be a problem with nl_langinfo(), so that
   * implementation doesn't currently worry about it.  But it is a problem on
- * Windows boxes, which don't have nl_langinfo(). */
+ * Windows boxes, which don't have nl_langinfo().
+ *
+ * One might be tempted to avoid any toggling by instead using nl_langinfo_l()
+ * on platforms that have it.  This would entail creating a locale object with
+ * newlocale() and freeing it afterwards.  But doing so runs significantly
+ * slower than just doing the toggle ourselves.  lib/locale_threads.t was
+ * slowed down by 25% on Ubuntu 22.04 */
  
  /*--------------------------------------------------------------------------*/
  #  if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available.  */
-#    ifdef USE_LOCALE_CTYPE
+#    ifdef WE_MUST_DEAL_WITH_MISMATCHED_CTYPE
  
      /* This function sorts out if things actually have to be switched or not,
       * for both save and restore. */
@@ -5880,7 +6062,7 @@ S_my_langinfo_i(pTHX_
      const char * orig_switched_locale = toggle_locale_i(cat_index, locale);
  
      gwLOCALE_LOCK;
-    retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep);
+    const char * retval = save_to_buffer(nl_langinfo(item), retbufp, retbuf_sizep);
      gwLOCALE_UNLOCK;
  
      if (utf8ness) {
@@ -5891,7 +6073,7 @@ S_my_langinfo_i(pTHX_
  
      restore_toggled_locale_i(cat_index, orig_switched_locale);
  
-#    ifdef USE_LOCALE_CTYPE
+#    ifdef WE_MUST_DEAL_WITH_MISMATCHED_CTYPE
  
      restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
  
@@ -5902,21 +6084,20 @@ S_my_langinfo_i(pTHX_
  #  else   /* Below, emulate nl_langinfo as best we can */
  
      /* The other completion is where we have to emulate nl_langinfo().  There
-     * are various possibilities depending on the Configuration */
-
-#    ifdef USE_LOCALE_CTYPE
-
-    const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
-
-#    endif
-
-    const char * orig_switched_locale = toggle_locale_i(cat_index, locale);
-
-    /* Here, we are in the locale we want information about */
-
+     * are various possibilities depending on the Configuration.   The major
+     * platform lacking nl_langinfo is Windows.  It does have GetLocaleInfoEx()
+     * that could be used to get most of the items, but it (and other similar
+     * Windows API functions) use what MS calls "locale names", whereas the C
+     * functions use what MS calls "locale strings".  The locale string
+     * "English_United_States.1252" is equivalent to the locale name "en_US".
+     * There are tables inside Windows that translate between the two forms,
+     * but they are not exposed.  Also calling setlocale(), then calling
+     * GetThreadLocale() doesn't work, as the former doesn't change the
+     * latter's return.  Therefore we are stuck using the mechanisms below. */
      /* Almost all the items will have ASCII return values.  Set that here, and
       * override if necessary */
      utf8ness_t is_utf8 = UTF8NESS_IMMATERIAL;
+    const char * retval = NULL;
  
      switch (item) {
        default:
@@ -5942,6 +6123,12 @@ S_my_langinfo_i(pTHX_
  
              Newx(floatbuf, initial_size, char);
  
+#      if defined(WE_MUST_DEAL_WITH_MISMATCHED_CTYPE)
+            const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
+#      endif
+
+            const char * orig_NUMERIC_locale = toggle_locale_c(LC_NUMERIC,
+                                                               locale);
              /* 1.5 is exactly representable on binary computers */
              Size_t needed_size = snprintf(floatbuf, initial_size, "%.1f", 1.5);
  
@@ -5955,6 +6142,12 @@ S_my_langinfo_i(pTHX_
                  needed_size = new_needed;
              }
  
+            restore_toggled_locale_c(LC_NUMERIC, orig_NUMERIC_locale);
+
+#      if defined(WE_MUST_DEAL_WITH_MISMATCHED_CTYPE)
+            restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
+#      endif
+
              char * s = floatbuf;
              char * e = floatbuf + needed_size;
  
@@ -6005,12 +6198,7 @@ S_my_langinfo_i(pTHX_
  #    endif
  #    ifdef HAS_LOCALECONV
  
-    /* These items are available from localeconv().  (To avoid using
-     * TS_W32_BROKEN_LOCALECONV, one could use GetNumberFormat and
-     * GetCurrencyFormat; patches welcome) */
-
-#      define P_CS_PRECEDES    "p_cs_precedes"
-#      define CURRENCY_SYMBOL  "currency_symbol"
+    /* These items are available from localeconv(). */
  
     /* case RADIXCHAR:   // May drop down to here in some configurations */
        case THOUSEP:
@@ -6037,24 +6225,23 @@ S_my_langinfo_i(pTHX_
               * with exactly both fields.  Delete this one, leaving just the
               * CRNCYSTR one in the hash */
              SV* precedes = hv_delete(result_hv,
-                                     P_CS_PRECEDES, STRLENs(P_CS_PRECEDES),
+                                     P_CS_PRECEDES_LITERAL,
+                                     STRLENs(P_CS_PRECEDES_LITERAL),
                                       0);
              if (! precedes) {
                  locale_panic_("my_localeconv() unexpectedly didn't return"
-                              " a value for " P_CS_PRECEDES);
+                              " a value for " P_CS_PRECEDES_LITERAL);
              }
  
              /* The modification is to prefix the localeconv() return with a
               * single byte, calculated as follows: */
-            char prefix = (LIKELY(SvIV(precedes) != -1))
-                          ? ((precedes != 0) ?  '-' : '+')
-
-                            /* khw couldn't find any documentation that
-                             * CHAR_MAX (which we modify to -1) is the signal,
-                             * but cygwin uses it thusly, and it makes sense
-                             * given that CHAR_MAX indicates the value isn't
-                             * used, so it neither precedes nor succeeds */
-                          : '.';
+            const char * prefix = (LIKELY(SvIV(precedes) != -1))
+                                   ? ((precedes != 0) ?  "-" : "+")
+                                   : ".";
+            /* (khw couldn't find any documentation that the dot is signalled
+             * by CHAR_MAX (which we modify to -1), but cygwin uses it thusly,
+             * and it makes sense given that CHAR_MAX indicates the value isn't
+             * used, so it neither precedes nor succeeds) */
  
              /* Now get CRNCYSTR */
              (void) hv_iterinit(result_hv);
@@ -6062,19 +6249,17 @@ S_my_langinfo_i(pTHX_
              string = hv_iterval(result_hv, entry);
  
              /* And perform the modification */
-            Perl_sv_setpvf(aTHX_ string, "%c%s", prefix, SvPV_nolen(string));
+            sv_insert(string, 0, 0, prefix, 1);
          }
  
          /* Here, 'string' contains the value we want to return */
          retval = save_to_buffer(SvPV_nolen(string), retbufp, retbuf_sizep);
  
          if (utf8ness) {
-            is_utf8 = (SvUTF8(string))
-                      ? UTF8NESS_YES
-                      : (is_utf8_invariant_string( (U8 *) retval,
-                                                  strlen(retval)))
-                        ? UTF8NESS_IMMATERIAL
-                        : UTF8NESS_NO;
+            is_utf8 = get_locale_string_utf8ness_i(retval,
+                                                   LOCALE_UTF8NESS_UNKNOWN,
+                                                   locale,
+                                                   cat_index);
          }
  
          break;
@@ -6191,6 +6376,9 @@ S_my_langinfo_i(pTHX_
               * first day of the week.  Since we're only getting one thing at a
               * time, it all works */
              struct tm  mytm;
+
+            const char * orig_TIME_locale = toggle_locale_c(LC_TIME, locale);
+
              ints_to_tm(&mytm, 30, 30, hour, mday, mon, 2011, 0, 0, 0);
              char * temp;
              if (utf8ness) {
@@ -6207,6 +6395,8 @@ S_my_langinfo_i(pTHX_
                  temp = strftime_tm(format, &mytm);
              }
  
+            restore_toggled_locale_c(LC_TIME, orig_TIME_locale);
+
              retval = save_to_buffer(temp, retbufp, retbuf_sizep);
              Safefree(temp);
  
@@ -6268,14 +6458,20 @@ S_my_langinfo_i(pTHX_
              break;
          }
  
+        /* If this happens to match our cached value */
+        if (PL_in_utf8_CTYPE_locale && strEQ(locale, PL_ctype_name)) {
+            retval = "UTF-8";
+            break;
+        }
+
  #      ifdef WIN32
  
-        /* This function retrieves the code page.  It is subject to change, but
-         * is documented and has been stable for many releases */
-        UINT ___lc_codepage_func(void);
+        const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
  
  #        ifndef WIN32_USE_FAKE_OLD_MINGW_LOCALES
  
+        /* This function retrieves the code page.  It is subject to change, but
+         * is documented and has been stable for many releases */
          retval = save_to_buffer(Perl_form(aTHX_ "%d", ___lc_codepage_func()),
                                  retbufp, retbuf_sizep);
  #        else
@@ -6284,6 +6480,8 @@ S_my_langinfo_i(pTHX_
                                  retbufp, retbuf_sizep);
  #        endif
  
+        restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
+
          DEBUG_Lv(PerlIO_printf(Perl_debug_log, "locale='%s' cp=%s\n",
                                                 locale, retval));
          break;
@@ -6296,234 +6494,348 @@ S_my_langinfo_i(pTHX_
           * UTF-8 locale or not.  If it is UTF-8, we (correctly) use that for
           * the code set. */
  
-#        if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
+#        ifdef HAS_DEFINITIVE_UTF8NESS_DETERMINATION
  
-        /* If libc mbtowc() evaluates the bytes that form the REPLACEMENT
-         * CHARACTER as that Unicode code point, this has to be a UTF-8 locale.
-         * */
-        wchar_t wc = 0;
-        (void) Perl_mbtowc_(aTHX_ NULL, NULL, 0);/* Reset shift state */
-        int mbtowc_ret = Perl_mbtowc_(aTHX_ &wc,
-                                      STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
-        if (mbtowc_ret >= 0 && wc == UNICODE_REPLACEMENT) {
-            DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-                                   "mbtowc returned REPLACEMENT\n"));
+        if (is_locale_utf8(locale)) {
              retval = "UTF-8";
              break;
          }
  
-        /* Here, it isn't a UTF-8 locale. */
-
-#        else   /* mbtowc() is not available.  The chances of this code getting
-                   compiled are very small, as it is a C99 required function,
-                   and we are now requiring C99; perhaps if it is a defective
-                   implementation.  But if so, there are other libc functions
-                   that could be used instead. */
+#        endif
  
-        /* Sling together several possibilities, depending on platform
-         * capabilities and what we found.
+        /* Here, the code set has not been found.  The only other option khw
+         * could think of is to see if the codeset is part of the locale name.
+         * This is very less than ideal; often there is no code set in the
+         * name; and at other times they even lie.
           *
-         * For non-English locales or non-dollar currency locales, we likely
-         * will find out whether a locale is UTF-8 or not */
-
-        utf8ness_t is_utf8 = UTF8NESS_UNKNOWN;
-        char * scratch_buf = NULL;
-
-#          if defined(USE_LOCALE_MONETARY) && defined(HAS_LOCALECONV)
-#            define LANGINFO_RECURSED_MONETARY  0x1
-#            define LANGINFO_RECURSED_TIME      0x2
-
-        /* Can't use this method unless localeconv() is available, as that's
-         * the way we find out the currency symbol.
+         * But there is an XPG standard syntax, which many locales follow:
           *
-         * First try looking at the currency symbol (via a recursive call) to
-         * see if it disambiguates things.  Often that will be in the native
-         * script, and if the symbol isn't legal UTF-8, we know that the locale
-         * isn't either.
+         *    language[_territory[.codeset]][@modifier]
           *
-         * The recursion calls my_localeconv() to find CRNCYSTR, and that can
-         * call is_locale_utf8() which will call my_langinfo(CODESET) which
-         * will get to here again, ad infinitum.  The guard prevents that.
-         */
-        if ((PL_langinfo_recursed & LANGINFO_RECURSED_MONETARY) == 0) {
-            PL_langinfo_recursed |= LANGINFO_RECURSED_MONETARY;
-            (void) my_langinfo_c(CRNCYSTR, LC_MONETARY, locale, &scratch_buf,
-                                 NULL, &is_utf8);
-            PL_langinfo_recursed &= ~LANGINFO_RECURSED_MONETARY;
+         * So we take the part between the dot and any '@' */
+        retval = strchr(locale, '.');
+        if (! retval) {
+            retval = "";  /* Alas, no dot */
          }
+        else {
  
-        Safefree(scratch_buf);
+            /* Don't include the dot */
+            retval++;
+
+            /* And stop before any '@' */
+            const char * modifier = strchr(retval, '@');
+            if (modifier) {
+                char * code_set_name;
+                const Size_t name_len = modifier - retval;
+                Newx(code_set_name, name_len + 1, char);    /* +1 for NUL */
+                my_strlcpy(code_set_name, retval, name_len + 1);
+                SAVEFREEPV(code_set_name);
+                retval = code_set_name;
+            }
  
-#          endif
-#          ifdef USE_LOCALE_TIME
+            /* The code set name is considered to be everything between the dot
+             * and the '@' */
+            retval = save_to_buffer(retval, retbufp, retbuf_sizep);
+        }
  
-        /* If we have ruled out being UTF-8, no point in checking further. */
-        if (   is_utf8 != UTF8NESS_NO
-            && (PL_langinfo_recursed & LANGINFO_RECURSED_TIME) == 0)
-        {
-            /* But otherwise do check more.  This is done even if the currency
-             * symbol looks to be UTF-8, just in case that's a false positive.
-             *
-             * Look at the LC_TIME entries, like the names of the months or
-             * weekdays.  We quit at the first one that is illegal UTF-8
-             *
-             * The recursion guard is because the recursed my_langinfo_c() will
-             * call strftime8() to find the LC_TIME value passed to it, and
-             * that will call my_langinfo(CODESET) for non-ASCII returns,
-             * which will get here again, ad infinitum
-             */
+#        ifndef HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+
+        /* Here, 'retval' contains any codeset name derived from the locale
+         * name.  That derived name may be empty or not necessarily indicative
+         * of the real codeset.  But we can often determine if it should be
+         * UTF-8, regardless of what the name is.  On most platforms, that
+         * determination is definitive, and was already done.  But for this
+         * code to be compiled, this platform is not one of them.  However,
+         * there are typically tools available to make a very good guess, and
+         * knowing the derived codeset name improves the quality of that guess.
+         * The following function overrides the derived codeset name when it
+         * guesses that it actually should be UTF-8.  It could be inlined here,
+         * but was moved out of this switch() so as to make the switch()
+         * control flow easier to follow */
+        retval = S_override_codeset_if_utf8_found(aTHX_ retval, locale);
  
-            utf8ness_t this_is_utf8 = UTF8NESS_UNKNOWN;
-            const int times[] = {
-                DAY_1, DAY_2, DAY_3, DAY_4, DAY_5, DAY_6, DAY_7,
-                MON_1, MON_2, MON_3, MON_4, MON_5, MON_6, MON_7, MON_8,
-                                            MON_9, MON_10, MON_11, MON_12,
-                ALT_DIGITS, AM_STR, PM_STR,
-                ABDAY_1, ABDAY_2, ABDAY_3, ABDAY_4, ABDAY_5, ABDAY_6,
-                                                             ABDAY_7,
-                ABMON_1, ABMON_2, ABMON_3, ABMON_4, ABMON_5, ABMON_6,
-                ABMON_7, ABMON_8, ABMON_9, ABMON_10, ABMON_11, ABMON_12
-            };
-
-            /* The code in the recursive call can handle switching the locales,
-             * but by doing it here, we avoid switching each iteration of the
-             * loop */
-            const char * orig_TIME_locale = toggle_locale_c(LC_TIME, locale);
+#        endif
  
-            PL_langinfo_recursed |= LANGINFO_RECURSED_TIME;
-            for (PERL_UINT_FAST8_T i = 0; i < C_ARRAY_LENGTH(times); i++) {
-                scratch_buf = NULL;
-                (void) my_langinfo_c(times[i], LC_TIME, locale, &scratch_buf,
-                                     NULL, &this_is_utf8);
-                Safefree(scratch_buf);
-                if (this_is_utf8 == UTF8NESS_NO) {
-                    is_utf8 = UTF8NESS_NO;
-                    break;
-                }
+        break;  /* All done */
  
-                if (this_is_utf8 == UTF8NESS_YES) {
-                    is_utf8 = UTF8NESS_YES;
-                }
-            }
-            PL_langinfo_recursed &= ~LANGINFO_RECURSED_TIME;
+#      endif    /* ! WIN32 */
+#    endif      /* USE_LOCALE_CTYPE */
  
-            /* Here we have gone through all the LC_TIME elements.  is_utf8 has
-             * been set as follows:
-             *      UTF8NESS_NO           If at least one isn't legal UTF-8
-             *      UTF8NESS_IMMMATERIAL  If all are ASCII
-             *      UTF8NESS_YES          If all are legal UTF-8 (including
-             *                            ASCII), and at least one isn't
-             *                            ASCII. */
+    } /* Giant switch() of nl_langinfo() items */
  
-            restore_toggled_locale_c(LC_TIME, orig_TIME_locale);
-        }
+    if (utf8ness) {
+        *utf8ness = is_utf8;
+    }
  
-#          endif    /* LC_TIME */
+    return retval;
  
-        /* If nothing examined above rules out it being UTF-8, and at least one
-         * thing fits as UTF-8 (and not plain ASCII), assume the codeset is
-         * UTF-8. */
-        if (is_utf8 == UTF8NESS_YES) {
-            retval = "UTF-8";
-            break;
+#  endif    /* All the implementations of my_langinfo() */
+
+/*--------------------------------------------------------------------------*/
+
+}   /* my_langinfo() */
+
+#  ifndef HAS_DEFINITIVE_UTF8NESS_DETERMINATION
+
+STATIC const char *
+S_override_codeset_if_utf8_found(pTHX_ const char * codeset,
+                                       const char * locale)
+{
+#    define NAME_INDICATES_UTF8       0x1
+#    define MB_CUR_MAX_SUGGESTS_UTF8  0x2
+
+    /* Override 'codeset' with UTF-8 if this routine guesses that it should be.
+     * Conversely (but rarely), "UTF-8" in the locale name might be wrong.  We
+     * return "" as the code set name if we find that to be the case.  */
+
+    unsigned int lean_towards_being_utf8 = 0;
+    if (is_codeset_name_UTF8(codeset)) {
+        lean_towards_being_utf8 |= NAME_INDICATES_UTF8;
+    }
+
+    /* For this portion of the file to compile, some C99 functions aren't
+     * available to us, even though we now require C99.  So, something must be
+     * wrong with them.  The code here should be good enough to work around
+     * this issue, but should the need arise, comments in S_is_locale_utf8()
+     * list some alternative C99 functions that could be tried.
+     *
+     * But MB_CUR_MAX is a C89 construct that helps a lot, is simple for a
+     * vendor to implement, and our experience with it is that it works well on
+     * a variety of platforms.  We have found that it returns a too-large
+     * number on some platforms for the C locale, but for no others.  That
+     * locale was already ruled out in the code that called this function.  (If
+     * MB_CUR_MAX returned too small a number, that would break a lot of
+     * things, and likely would be quickly corrected by the vendor.)  khw has
+     * some confidence that it doesn't return >1 when 1 is meant, as that would
+     * trigger a Perl warning, and we've had no reports of invalid occurrences
+     * of such. */
+#    ifdef MB_CUR_MAX
+
+    /* If there are fewer bytes available in this locale than are required to
+     * represent the largest legal UTF-8 code point, this definitely isn't a
+     * UTF-8 locale, even if the locale name says it is. */
+    const int mb_cur_max = MB_CUR_MAX;
+    if (mb_cur_max < (int) UNISKIP(PERL_UNICODE_MAX)) {
+        if (lean_towards_being_utf8 & NAME_INDICATES_UTF8) {
+            return "";    /* The name is wrong; override */
          }
  
-        /* Here, nothing examined indicates that the codeset is UTF-8.  But
-         * what is it?  The other locale categories are not likely to be of
-         * further help:
-         *
-         * LC_NUMERIC   Only a few locales in the world have a non-ASCII radix
-         *              or group separator.
-         * LC_CTYPE     This code wouldn't be compiled if mbtowc() existed and
-         *              was reliable.  This is unlikely in C99.  There are
-         *              other functions that could be used instead, but are
-         *              they going to exist, and be able to distinguish between
-         *              UTF-8 and 8859-1?  Deal with this only if it becomes
-         *              necessary.
-         * LC_MESSAGES  The strings returned from strerror() would seem likely
-         *              candidates, but experience has shown that many systems
-         *              don't actually have translations installed for them.
-         *              They are instead always in English, so everything in
-         *              them is ASCII, which is of no help to us.  A Configure
-         *              probe could possibly be written to see if this platform
-         *              has non-ASCII error messages.  But again, wait until it
-         *              turns out to be an actual problem. */
-
-#        endif    /* ! mbtowc() */
-
-        /* Rejoin the mbtowc available/not-available cases.
-         *
-         * We got here only because we haven't been able to find the codeset.
-         * The only other option khw could think of is to see if the codeset is
-         * part of the locale name.  This is very less than ideal; often there
-         * is no code set in the name; and at other times they even lie.
-         *
-         * But there is an XPG standard syntax, which many locales follow:
-         *
-         * language[_territory[.codeset]][@modifier]
+        return codeset;
+    }
+
+    /* But if the locale could be UTF-8, and also the name corroborates this,
+     * assume it is so */
+    if (lean_towards_being_utf8 & NAME_INDICATES_UTF8) {
+        return codeset;
+    }
+
+    /* Here, the name doesn't indicate UTF-8, but MB_CUR_MAX indicates it could
+     * be.  khw knows of only two other locales in the world, EUC-TW and GB
+     * 18030, that legitimately require this many bytes (4).  In both, the
+     * single byte characters are the same as ASCII.  No multi-byte character
+     * in EUC-TW is legal UTF-8 (since the first byte of each is a
+     * continuation).  GB 18030 has no three byte sequences, and none of the
+     * four byte ones is legal UTF-8 (as the second byte for these is a
+     * non-continuation).  But every legal UTF-8 two byte sequence is also
+     * legal in GB 18030, though none have the same meaning, and no Han code
+     * point expressed in UTF-8 is two byte.  So the further tests below which
+     * look for native expressions of currency and time will not return two
+     * byte sequences, hence they will reliably rule out this locale as being
+     * UTF-8.  So, if we get this far, the result is almost certainly UTF-8.
+     * But to be really sure, also check that there is no illegal UTF-8. */
+    lean_towards_being_utf8 |= MB_CUR_MAX_SUGGESTS_UTF8;
+
+#    endif    /* has MB_CUR_MAX */
+
+    /* Here, MB_CUR_MAX is not available, or was inconclusive.  What we do is
+     * to look at various strings associated with the locale:
+     *  1)  If any are illegal UTF-8, the locale can't be UTF-8.
+     *  2)  If all are legal UTF-8, and some non-ASCII characters are present,
+     *      it is likely to be UTF-8, because of the strictness of UTF-8
+     *      syntax. So assume it is UTF-8
+     *  3)  If all are ASCII and the locale name and/or MB_CUR_MAX indicate
+     *      UTF-8, assume the locale is UTF-8.
+     *  4)  Otherwise, assume the locale isn't UTF-8
+     *
+     * To save cycles, if the locale name indicates it is a UTF-8 locale, we
+     * stop looking at the first instance with legal non-ASCII UTF-8.  It is
+     * very unlikely this combination is coincidental. */
+
+    utf8ness_t strings_utf8ness = UTF8NESS_UNKNOWN;
+    char * scratch_buf = NULL;
+    Size_t scratch_buf_size = 0;
+
+    /* List of strings to look at */
+    const int trials[] = {
+
+#    if defined(USE_LOCALE_MONETARY) && defined(HAS_LOCALECONV)
+
+        /* The first string tried is the locale currency name.  Often that will
+         * be in the native script.
           *
-         * So we take the part between the dot and any '@' */
-        retval = (const char *) strchr(locale, '.');
-        if (! retval) {
-            retval = "";  /* Alas, no dot */
-            break;
-        }
+         * But this is usable only if localeconv() is available, as that's the
+         * way we find out the currency symbol. */
  
-        /* Don't include the dot */
-        retval++;
-
-        /* And stop before any '@' */
-        const char * modifier = strchr(retval, '@');
-        if (modifier) {
-            char * code_set_name;
-            const Size_t name_len = modifier - retval;
-            Newx(code_set_name, name_len + 1, char);         /* +1 for NUL */
-            my_strlcpy(code_set_name, retval, name_len + 1);
-            SAVEFREEPV(code_set_name);
-            retval = code_set_name;
-        }
+        CRNCYSTR,
  
-#        if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
+#    endif
+#    ifdef USE_LOCALE_TIME
  
-        /* When these functions, are available, they were tried earlier and
-         * indicated that the locale did not act like a proper UTF-8 one.  So
-         * if it claims to be UTF-8, it is a lie */
-        if (is_codeset_name_UTF8(retval)) {
-            retval = "";
-            break;
-        }
+    /* We can also try various strings associated with LC_TIME, like the names
+     * of months or days of the week */
  
-#        endif
+        DAY_1, DAY_2, DAY_3, DAY_4, DAY_5, DAY_6, DAY_7,
+        MON_1, MON_2, MON_3, MON_4, MON_5, MON_6, MON_7, MON_8,
+                                    MON_9, MON_10, MON_11, MON_12,
+        ALT_DIGITS, AM_STR, PM_STR,
+        ABDAY_1, ABDAY_2, ABDAY_3, ABDAY_4, ABDAY_5, ABDAY_6, ABDAY_7,
+        ABMON_1, ABMON_2, ABMON_3, ABMON_4, ABMON_5, ABMON_6,
+        ABMON_7, ABMON_8, ABMON_9, ABMON_10, ABMON_11, ABMON_12
  
-        /* Otherwise the code set name is considered to be everything between
-         * the dot and the '@' */
-        retval = save_to_buffer(retval, retbufp, retbuf_sizep);
+#    endif
  
-        break;
+    };
  
-#      endif    /* ! WIN32 */
-#    endif      /* USE_LOCALE_CTYPE */
+#    ifdef USE_LOCALE_TIME
  
-    } /* Giant switch() of nl_langinfo() items */
+    /* The code in the recursive call below can handle switching the locales,
+     * but by doing it now here, that code will check and discover that there
+     * is no need to switch then restore, avoiding those each loop iteration */
+    const char * orig_TIME_locale = toggle_locale_c(LC_TIME, locale);
  
-    restore_toggled_locale_i(cat_index, orig_switched_locale);
+#    endif
+
+    /* The trials array may consist of strings from two different locale
+     * categories.  The call to my_langinfo_i() below needs to pass the proper
+     * category for each string.  There is a max of 1 trial for LC_MONETARY;
+     * the rest are LC_TIME.  So the array is arranged so the LC_MONETARY item
+     * (if any) is first, and all subsequent iterations will use LC_TIME.
+     * These #ifdefs set up the values for all possible combinations. */
+#    if defined(USE_LOCALE_MONETARY) && defined(HAS_LOCALECONV)
+
+    locale_category_index  cat_index = LC_MONETARY_INDEX_;
+
+#      ifdef USE_LOCALE_TIME
+
+    const locale_category_index  follow_on_cat_index = LC_TIME_INDEX_;
+    assert(trials[1] == DAY_1); /* Make sure only a single non-time entry */
+
+#      else
+
+    /* Effectively out-of-bounds, as there is only the monetary entry */
+    const locale_category_index  follow_on_cat_index = LC_ALL_INDEX_;
+
+#      endif
+#    elif defined(USE_LOCALE_TIME)
+
+    locale_category_index  cat_index = LC_TIME_INDEX_;
+    const locale_category_index  follow_on_cat_index = LC_TIME_INDEX_;
+
+#    else
+
+    /* Effectively out-of-bounds, as here there are no trial entries at all.
+     * This allows this code to compile, but there are no strings to test, and
+     * so the answer will always be non-UTF-8. */
+    locale_category_index  cat_index = LC_ALL_INDEX_;
+    const locale_category_index  follow_on_cat_index = LC_ALL_INDEX_;
  
-#    ifdef USE_LOCALE_CTYPE
-    restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
  #    endif
  
-    if (utf8ness) {
-        *utf8ness = is_utf8;
+    /* Everything set up; look through all the strings */
+    for (PERL_UINT_FAST8_T i = 0; i < C_ARRAY_LENGTH(trials); i++) {
+        (void) my_langinfo_i(trials[i], cat_index, locale,
+                             &scratch_buf, &scratch_buf_size, NULL);
+        cat_index = follow_on_cat_index;
+
+        /* To prevent infinite recursive calls, we don't ask for the UTF-8ness
+         * of the string (in 'trials[i]') above.  Instead we examine the
+         * returned string here */
+        const Size_t len = strlen(scratch_buf);
+        const U8 * first_variant;
+
+        /* If the string is identical whether or not it is encoded as UTF-8, it
+         * isn't helpful in determining UTF8ness. */
+        if (is_utf8_invariant_string_loc((U8 *) scratch_buf, len,
+                                         &first_variant))
+        {
+            continue;
+        }
+
+        /* Here, has non-ASCII.  If not legal UTF-8, isn't a UTF-8 locale */
+        if (! is_utf8_string(first_variant,
+                             len - (first_variant - (U8 *) scratch_buf)))
+        {
+            strings_utf8ness = UTF8NESS_NO;
+            break;
+        }
+
+        /* Here, is a legal non-ASCII UTF-8 string; tentatively set the return
+         * to YES; possibly overridden by later iterations */
+        strings_utf8ness = UTF8NESS_YES;
+
+        /* But if this corroborates our expectation, quit now */
+        if (lean_towards_being_utf8 & NAME_INDICATES_UTF8) {
+            break;
+        }
      }
  
-    return retval;
+#    ifdef USE_LOCALE_TIME
  
-#  endif    /* All the implementations of my_langinfo() */
+    restore_toggled_locale_c(LC_TIME, orig_TIME_locale);
  
-/*--------------------------------------------------------------------------*/
+#    endif
  
-}   /* my_langinfo() */
+    Safefree(scratch_buf);
+    scratch_buf = NULL;
+
+    if (strings_utf8ness == UTF8NESS_NO) {
+        return codeset;     /* No override */
+    }
+
+    /* Here all tested strings are legal UTF-8.
+     *
+     * Above we set UTF8NESS_YES if any string wasn't ASCII.  But even if they
+     * are all ascii, and the locale name indicates it is a UTF-8 locale,
+     * assume the locale is UTF-8. */
+    if (lean_towards_being_utf8) {
+        strings_utf8ness = UTF8NESS_YES;
+    }
+
+    if (strings_utf8ness == UTF8NESS_YES) {
+        return "UTF-8";
+    }
+
+    /* Here, nothing examined indicates that the codeset is or isn't UTF-8.
+     * But what is it?  The other locale categories are not likely to be of
+     * further help:
+     *
+     * LC_NUMERIC   Only a few locales in the world have a non-ASCII radix or
+     *              group separator.
+     * LC_CTYPE     This code wouldn't be compiled if mbtowc() existed and was
+     *              reliable.  This is unlikely in C99.  There are other
+     *              functions that could be used instead, but are they going to
+     *              exist, and be able to distinguish between UTF-8 and 8859-1?
+     *              Deal with this only if it becomes necessary.
+     * LC_MESSAGES  The strings returned from strerror() would seem likely
+     *              candidates, but experience has shown that many systems
+     *              don't actually have translations installed for them.  They
+     *              are instead always in English, so everything in them is
+     *              ASCII, which is of no help to us.  A Configure probe could
+     *              possibly be written to see if this platform has non-ASCII
+     *              error messages.  But again, wait until it turns out to be
+     *              an actual problem.
+     *
+     *              Things like YESSTR, NOSTR, might not be in ASCII, but need
+     *              nl_langinfo() to access, which we don't have.
+     */
+
+    /* Otherwise, assume the locale isn't UTF-8.  This can be wrong if we don't
+     * have MB_CUR_MAX, and the locale is English without UTF-8 in its name,
+     * and with a dollar currency symbol. */
+    return codeset;     /* No override */
+}
  
+#  endif /* ! HAS_DEFINITIVE_UTF8NESS_DETERMINATION */
  #endif      /* USE_LOCALE */
  
  /*
@@ -6547,7 +6859,7 @@ only in the form of their inputs.  C<sv_strftime_tm> takes a filled-in
  S<C<struct tm>> parameter.  C<sv_strftime_ints> takes a bunch of integer
  parameters that together completely define a given time.
  
-C<my_strftime> is kept for backwards compatibility.  Knowing if the result
+C<my_strftime> is kept for backwards compatibility.  Knowing if its result
  should be considered UTF-8 or not requires significant extra logic.
  
  Note that C<yday> and C<wday> effectively are ignored by C<sv_strftime_ints>
@@ -6617,7 +6929,7 @@ S_strftime_tm(pTHX_ const char *fmt, const struct tm *mytm)
  #ifndef HAS_STRFTIME
      Perl_croak(aTHX_ "panic: no strftime");
  #else
-#  if defined(USE_LOCALE_CTYPE) && defined(USE_LOCALE_TIME)
+#  if defined(WE_MUST_DEAL_WITH_MISMATCHED_CTYPE) && defined(USE_LOCALE_TIME)
  
      const char * orig_CTYPE_LOCALE = toggle_locale_c(LC_CTYPE,
                                                       querylocale_c(LC_TIME));
@@ -6698,7 +7010,7 @@ S_strftime_tm(pTHX_ const char *fmt, const struct tm *mytm)
  
    strftime_return:
  
-#  if defined(USE_LOCALE_CTYPE) && defined(USE_LOCALE_TIME)
+#  if defined(WE_MUST_DEAL_WITH_MISMATCHED_CTYPE) && defined(USE_LOCALE_TIME)
  
      restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_LOCALE);
  
@@ -6749,8 +7061,7 @@ S_strftime8(pTHX_ const char * fmt,
          break;
  
        case UTF8NESS_YES:    /* Known to be UTF-8; must be UTF-8 locale if can't
-                               downgrade.  But downgrading assumes the locale
-                               is latin 1.  Maybe just fail XXX */
+                               downgrade. */
          if (! is_locale_utf8(locale)) {
              locale_utf8ness = LOCALE_NOT_UTF8;
  
@@ -6870,9 +7181,13 @@ S_give_perl_locale_control(pTHX_
  
  #    else
  
-    new_LC_ALL(locales, true);
-
+    new_LC_ALL(calculate_LC_ALL_string(locales,
+                                       INTERNAL_FORMAT,
+                                       WANT_TEMP_PV,
+                                       caller_line),
+               true);
  #    endif
+
  }
  
  STATIC void
@@ -7197,7 +7512,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      } trials;
  
      trials trial;
-    SSize_t already_checked = 0;
+    unsigned int already_checked = 0;
      const char * checked[C_trial];
  
  #  ifdef LC_ALL
@@ -7283,6 +7598,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
              }
  
              /* And, for future iterations, indicate we've tried this locale */
+            assert(already_checked < C_ARRAY_LENGTH(checked));
              checked[already_checked] = savepv(locale);
              SAVEFREEPV(checked[already_checked]);
              already_checked++;
@@ -8372,10 +8688,10 @@ S_toggle_locale_i(pTHX_ const locale_category_index cat_index,
      const char * locale_to_restore_to = querylocale_i(cat_index);
  
      DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-             "(%" LINE_Tf "): toggle_locale_i: index=%d(%s), wanted=%s,"
-             " actual=%s\n",
-             caller_line, cat_index, category_names[cat_index],
-             new_locale, locale_to_restore_to));
+                           "Entering toggle_locale_i: index=%d(%s),"        \
+                           " wanted=%s, actual=%s; called from %" LINE_Tf   \
+                           "\n", cat_index, category_names[cat_index],
+                           new_locale, locale_to_restore_to, caller_line));
  
      if (! locale_to_restore_to) {
          locale_panic_via_(Perl_form(aTHX_
@@ -8386,10 +8702,9 @@ S_toggle_locale_i(pTHX_ const locale_category_index cat_index,
  
      /* If the locales are the same, there's nothing to do */
      if (strEQ(locale_to_restore_to, new_locale)) {
-        DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-                               "(%" LINE_Tf "): %s locale unchanged as %s\n",
-                               caller_line, category_names[cat_index],
-                               new_locale));
+        DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s locale unchanged as %s\n",
+                                               category_names[cat_index],
+                                               new_locale));
  
          return NULL;
      }
@@ -8398,8 +8713,8 @@ S_toggle_locale_i(pTHX_ const locale_category_index cat_index,
      void_setlocale_i_with_caller(cat_index, new_locale, __FILE__, caller_line);
  
      DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-                           "(%" LINE_Tf "): %s locale switched to %s\n",
-                           caller_line, category_names[cat_index], new_locale));
+                           "%s locale switched to %s\n",
+                           category_names[cat_index], new_locale));
  
      return locale_to_restore_to;
  
@@ -8423,15 +8738,17 @@ S_restore_toggled_locale_i(pTHX_ const locale_category_index cat_index,
  
      if (restore_locale == NULL) {
          DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-                               "(%" LINE_Tf "): No need to restore %s\n",
-                               caller_line, category_names[cat_index]));
+                               "restore_toggled_locale_i: No need to"       \
+                               " restore %s; called from %" LINE_Tf "\n",   \
+                               category_names[cat_index], caller_line));
          return;
      }
  
      DEBUG_Lv(PerlIO_printf(Perl_debug_log,
-                           "(%" LINE_Tf "): %s restoring locale to %s\n",
-                           caller_line, category_names[cat_index],
-                           restore_locale));
+                           "restore_toggled_locale_i: restoring locale for" \
+                           " %s to  %s; called from %" LINE_Tf "\n",        \
+                           category_names[cat_index], restore_locale,
+                           caller_line));
  
      void_setlocale_i_with_caller(cat_index, restore_locale,
                                    __FILE__, caller_line);
@@ -9092,9 +9409,8 @@ Perl_switch_locale_context(pTHX)
       *              per-thread variables:
       *                  PL_controls_locale  indicates if this thread is using
       *                                      per-thread locales or not
-     *                  PL_cur_LC_ALL       indicates what the the locale
-     *                                      should be if it is a per-thread
-     *                                      locale.
+     *                  PL_cur_LC_ALL       indicates what the locale should be
+     *                                      if it is a per-thread locale.
       */
  
      if (UNLIKELY(   PL_veto_switch_non_tTHX_context