locale.c: Replace by function that does the same thing

[perl5.git] / locale.c
diff --git a/locale.c b/locale.c

index 4e0015e..e29d0c4 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -74,24 +74,46 @@ static bool debug_initialization = FALSE;
  
  #ifdef USE_LOCALE
  
-/*
- * Standardize the locale name from a string returned by 'setlocale', possibly
- * modifying that string.
- *
- * The typical return value of setlocale() is either
- * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
- * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
- *     (the space-separated values represent the various sublocales,
- *      in some unspecified order).  This is not handled by this function.
+/* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
+ * looked up.  This is in the form of a C string:  */
+
+#define UTF8NESS_SEP     "\v"
+#define UTF8NESS_PREFIX  "\f"
+
+/* So, the string looks like:
   *
- * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
- * which is harmful for further use of the string in setlocale().  This
- * function removes the trailing new line and everything up through the '='
+ *      \vC\a0\vPOSIX\a0\vam_ET\a0\vaf_ZA.utf8\a1\ven_US.UTF-8\a1\0
   *
- */
+ * where the digit 0 after the \a indicates that the locale starting just
+ * after the preceding \v is not UTF-8, and the digit 1 mean it is. */
+
+STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
+STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
+
+#define C_and_POSIX_utf8ness    UTF8NESS_SEP "C"     UTF8NESS_PREFIX "0"    \
+                                UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
+
+/* The cache is initialized to C_and_POSIX_utf8ness at start up.  These are
+ * kept there always.  The remining portion of the cache is LRU, with the
+ * oldest looked-up locale at the tail end */
+
  STATIC char *
  S_stdize_locale(pTHX_ char *locs)
  {
+    /* Standardize the locale name from a string returned by 'setlocale',
+     * possibly modifying that string.
+     *
+     * The typical return value of setlocale() is either
+     * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
+     * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
+     *     (the space-separated values represent the various sublocales,
+     *      in some unspecified order).  This is not handled by this function.
+     *
+     * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
+     * which is harmful for further use of the string in setlocale().  This
+     * function removes the trailing new line and everything up through the '='
+     * */
+
      const char * const s = strchr(locs, '=');
      bool okay = TRUE;
  
@@ -372,7 +394,6 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
                                            /* FALSE => already in dest locale */
          /* ... and the character being used isn't a dot */
          if (strNE(radix, ".")) {
-            const U8 * first_variant;
  
              if (PL_numeric_radix_sv) {
                  sv_setpv(PL_numeric_radix_sv, radix);
@@ -381,17 +402,10 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
                  PL_numeric_radix_sv = newSVpv(radix, 0);
              }
  
-            /* If there is a byte variant under UTF-8, and if the remainder of
-             * the string starting there is valid UTF-8, and we are in a UTF-8
-             * locale, then mark the radix as being in UTF-8 */
-            if ( !  is_utf8_invariant_string_loc(
-                                            (U8 *) SvPVX(PL_numeric_radix_sv),
-                                            SvCUR(PL_numeric_radix_sv),
-                                            &first_variant)
-                &&  is_utf8_string(first_variant,
-                                   SvCUR(PL_numeric_radix_sv)
-                                 - ((char *) first_variant
-                                 - SvPVX(PL_numeric_radix_sv)))
+            /* If this is valid UTF-8 that isn't totally ASCII, and we are in
+             * a UTF-8 locale, then mark the radix as being in UTF-8 */
+            if (is_utf8_non_invariant_string((U8 *) SvPVX(PL_numeric_radix_sv),
+                                            SvCUR(PL_numeric_radix_sv))
                  && _is_cur_LC_category_utf8(LC_NUMERIC))
              {
                  SvUTF8_on(PL_numeric_radix_sv);
@@ -560,7 +574,7 @@ Perl_set_numeric_underlying(pTHX)
      do_setlocale_c(LC_NUMERIC, PL_numeric_name);
      PL_numeric_standard = PL_numeric_underlying_is_standard;
      PL_numeric_underlying = TRUE;
-    set_numeric_radix(1);
+    set_numeric_radix(! PL_numeric_standard);
  
  #  ifdef DEBUGGING
  
@@ -1412,6 +1426,14 @@ S_my_nl_langinfo(const int item, bool toggle)
  {
      dTHX;
  
+    /* We only need to toggle into the underlying LC_NUMERIC locale for these
+     * two items, and only if not already there */
+    if (toggle && ((   item != PERL_RADIXCHAR && item != PERL_THOUSEP)
+                    || PL_numeric_underlying))
+    {
+        toggle = FALSE;
+    }
+
  #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available.  */
  #if   ! defined(HAS_POSIX_2008_LOCALE)
  
@@ -1420,46 +1442,48 @@ S_my_nl_langinfo(const int item, bool toggle)
       * switching back, as some systems destroy the buffer when setlocale() is
       * called */
  
-    LOCALE_LOCK;
+    {
+        DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  
-    if (toggle) {
-        if (  ! PL_numeric_underlying
-            && (item == PERL_RADIXCHAR || item == PERL_THOUSEP))
-        {
-            do_setlocale_c(LC_NUMERIC, PL_numeric_name);
+        if (toggle) {
+            STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
          }
-        else {
-            toggle = FALSE;
-        }
-    }
  
-    save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
+        LOCALE_LOCK;    /* Prevent interference from another thread executing
+                           this code section (the only call to nl_langinfo in
+                           the core) */
  
-    if (toggle) {
-        do_setlocale_c(LC_NUMERIC, "C");
-    }
+        save_to_buffer(nl_langinfo(item), &PL_langinfo_buf,
+                                          &PL_langinfo_bufsize, 0);
  
-    LOCALE_UNLOCK;
+        LOCALE_UNLOCK;
+
+        if (toggle) {
+            RESTORE_LC_NUMERIC();
+        }
+    }
  
  #  else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */
  
-    bool do_free = FALSE;
-    locale_t cur = uselocale((locale_t) 0);
+    {
+        bool do_free = FALSE;
+        locale_t cur = uselocale((locale_t) 0);
  
-    if (cur == LC_GLOBAL_LOCALE) {
-        cur = duplocale(LC_GLOBAL_LOCALE);
-        do_free = TRUE;
-    }
+        if (cur == LC_GLOBAL_LOCALE) {
+            cur = duplocale(LC_GLOBAL_LOCALE);
+            do_free = TRUE;
+        }
  
-    if (toggle) {
-        cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur);
-        do_free = TRUE;
-    }
+        if (toggle) {
+            cur = newlocale(LC_NUMERIC_MASK, PL_numeric_name, cur);
+            do_free = TRUE;
+        }
  
-    save_to_buffer(nl_langinfo_l(item, cur),
-                   &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
-    if (do_free) {
-        freelocale(cur);
+        save_to_buffer(nl_langinfo_l(item, cur),
+                       &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
+        if (do_free) {
+            freelocale(cur);
+        }
      }
  
  #  endif
@@ -1482,6 +1506,7 @@ S_my_nl_langinfo(const int item, bool toggle)
  #  ifdef HAS_LOCALECONV
  
          const struct lconv* lc;
+        DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  
  #  endif
  #  ifdef HAS_STRFTIME
@@ -1521,11 +1546,12 @@ S_my_nl_langinfo(const int item, bool toggle)
  
              case PERL_CRNCYSTR:
  
-                LOCALE_LOCK;
-
                  /* We don't bother with localeconv_l() because any system that
                   * has it is likely to also have nl_langinfo() */
  
+                LOCALE_LOCK;    /* Prevent interference with other threads
+                                   using localeconv() */
+
                  lc = localeconv();
                  if (   ! lc
                      || ! lc->currency_symbol
@@ -1557,17 +1583,13 @@ S_my_nl_langinfo(const int item, bool toggle)
              case PERL_RADIXCHAR:
              case PERL_THOUSEP:
  
-                LOCALE_LOCK;
-
                  if (toggle) {
-                    if (! PL_numeric_underlying) {
-                        do_setlocale_c(LC_NUMERIC, PL_numeric_name);
-                    }
-                    else {
-                        toggle = FALSE;
-                    }
+                    STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
                  }
  
+                LOCALE_LOCK;    /* Prevent interference with other threads
+                                   using localeconv() */
+
                  lc = localeconv();
                  if (! lc) {
                      retval = "";
@@ -1584,12 +1606,12 @@ S_my_nl_langinfo(const int item, bool toggle)
                  save_to_buffer(retval, &PL_langinfo_buf,
                                 &PL_langinfo_bufsize, 0);
  
+                LOCALE_UNLOCK;
+
                  if (toggle) {
-                    do_setlocale_c(LC_NUMERIC, "C");
+                    RESTORE_LC_NUMERIC();
                  }
  
-                LOCALE_UNLOCK;
-
                  break;
  
  #  endif
@@ -2018,6 +2040,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX);
  #    endif
  #  endif    /* DEBUGGING */
+
+    /* Initialize the cache of the program's UTF-8ness for the always known
+     * locales C and POSIX */
+    my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness,
+               sizeof(PL_locale_utf8ness));
+
  #  ifdef LOCALE_ENVIRON_REQUIRED
  
      /*
@@ -2374,19 +2402,36 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
  
  #  endif
  
-
      for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
+
+#  if defined(USE_ITHREADS)
+
+        /* This caches whether each category's locale is UTF-8 or not.  This
+         * may involve changing the locale.  It is ok to do this at
+         * initialization time before any threads have started, but not later.
+         * Caching means that if the program heeds our dictate not to change
+         * locales in threaded applications, this data will remain valid, and
+         * it may get queried without changing locales.  If the environment is
+         * such that all categories have the same locale, this isn't needed, as
+         * the code will not change the locale; but this handles the uncommon
+         * case where the environment has disparate locales for the categories
+         * */
+        (void) _is_cur_LC_category_utf8(categories[i]);
+
+#  endif
+
          Safefree(curlocales[i]);
      }
  
  #  if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE)
  
      /* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE
-     * locale is UTF-8.  If PL_utf8locale and PL_unicode (set by -C or by
-     * $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the
-     * PerlIO :utf8 layer on STDIN, STDOUT, STDERR, _and_ the default open
-     * discipline.  */
-    PL_utf8locale = _is_cur_LC_category_utf8(LC_CTYPE);
+     * locale is UTF-8.  The call to new_ctype() just above has already
+     * calculated the latter value and saved it in PL_in_utf8_CTYPE_locale. If
+     * both PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE})
+     * are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer on
+     * STDIN, STDOUT, STDERR, _and_ the default open discipline.  */
+    PL_utf8locale = PL_in_utf8_CTYPE_locale;
  
      /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO.
         This is an alternative to using the -C command line switch
@@ -3023,39 +3068,105 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
       * English, it comes down to if the locale's name ends in something like
       * "UTF-8".  It errs on the side of not being a UTF-8 locale. */
  
+    /* Name of current locale corresponding to the input category */
      const char *save_input_locale = NULL;
+
+    bool is_utf8 = FALSE;                /* The return value */
      STRLEN final_pos;
  
+    /* The variables below are for the cache of previous lookups using this
+     * function.  The cache is a C string, described at the definition for
+     * 'C_and_POSIX_utf8ness'.
+     *
+     * The first part of the cache is fixed, for the C and POSIX locales.  The
+     * varying part starts just after them. */
+    char * utf8ness_cache = PL_locale_utf8ness + STRLENs(C_and_POSIX_utf8ness);
+
+    Size_t utf8ness_cache_size; /* Size of the varying portion */
+    Size_t input_name_len;      /* Length in bytes of save_input_locale */
+    Size_t input_name_len_with_overhead;    /* plus extra chars used to store
+                                               the name in the cache */
+    char * delimited;           /* The name plus the delimiters used to store
+                                   it in the cache */
+    char * name_pos;            /* position of 'delimited' in the cache, or 0
+                                   if not there */
+
+
  #  ifdef LC_ALL
  
      assert(category != LC_ALL);
  
  #  endif
  
-    /* First dispose of the trivial cases */
+    /* Get the desired category's locale */
      save_input_locale = do_setlocale_r(category, NULL);
      if (! save_input_locale) {
-        DEBUG_L(PerlIO_printf(Perl_debug_log,
-                              "Could not find current locale for category %d\n",
-                              category));
-        return FALSE;   /* XXX maybe should croak */
+        Perl_croak(aTHX_
+             "panic: %s: %d: Could not find current locale for %s\n",
+                     __FILE__, __LINE__, category_name(category));
      }
+
      save_input_locale = stdize_locale(savepv(save_input_locale));
-    if (isNAME_C_OR_POSIX(save_input_locale)) {
-        DEBUG_L(PerlIO_printf(Perl_debug_log,
-                              "Current locale for category %d is %s\n",
-                              category, save_input_locale));
+    DEBUG_L(PerlIO_printf(Perl_debug_log,
+                          "Current locale for %s is %s\n",
+                          category_name(category), save_input_locale));
+
+    input_name_len = strlen(save_input_locale);
+
+    /* In our cache, each name is accompanied by two delimiters and a single
+     * utf8ness digit */
+    input_name_len_with_overhead = input_name_len + 3;
+
+    /* Allocate and populate space for a copy of the name surrounded by the
+     * delimiters */
+    Newx(delimited, input_name_len_with_overhead, char);
+    delimited[0] = UTF8NESS_SEP[0];
+    Copy(save_input_locale, delimited + 1, input_name_len, char);
+    delimited[input_name_len+1] = UTF8NESS_PREFIX[0];
+    delimited[input_name_len+2] = '\0';
+
+    /* And see if that is in the cache */
+    name_pos = instr(PL_locale_utf8ness, delimited);
+    if (name_pos) {
+        is_utf8 = *(name_pos + input_name_len_with_overhead - 1) - '0';
+
+#  ifdef DEBUGGING
+
+        if (DEBUG_Lv_TEST || debug_initialization) {
+            PerlIO_printf(Perl_debug_log, "UTF8ness for locale %s=%d, \n",
+                                          save_input_locale, is_utf8);
+        }
+
+#  endif
+
+        /* And, if not already in that position, move it to the beginning of
+         * the non-constant portion of the list, since it is the most recently
+         * used.  (We don't have to worry about overflow, since just moving
+         * existing names around) */
+        if (name_pos > utf8ness_cache) {
+            Move(utf8ness_cache,
+                 utf8ness_cache + input_name_len_with_overhead,
+                 name_pos - utf8ness_cache, char);
+            Copy(delimited,
+                 utf8ness_cache,
+                 input_name_len_with_overhead - 1, char);
+            utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
+        }
+
+        Safefree(delimited);
          Safefree(save_input_locale);
-        return FALSE;
+        return is_utf8;
      }
  
+    /* Here we don't have stored the utf8ness for the input locale.  We have to
+     * calculate it */
+
  #  if defined(USE_LOCALE_CTYPE)    \
      && (defined(MB_CUR_MAX) || (defined(HAS_NL_LANGINFO) && defined(CODESET)))
  
      { /* Next try nl_langinfo or MB_CUR_MAX if available */
  
          char *save_ctype_locale = NULL;
-        bool is_utf8;
  
          if (category != LC_CTYPE) { /* These work only on LC_CTYPE */
  
@@ -3108,16 +3219,18 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                      Safefree(save_ctype_locale);
                  }
  
-                is_utf8 = (   (   strlen(codeset) == STRLENs("UTF-8")
-                               && foldEQ(codeset, STR_WITH_LEN("UTF-8")))
-                           || (   strlen(codeset) == STRLENs("UTF8")
-                               && foldEQ(codeset, STR_WITH_LEN("UTF8"))));
+                              /* If the implementation of foldEQ() somehow were
+                               * to change to not go byte-by-byte, this could
+                               * read past end of string, as only one length is
+                               * checked.  But currently, a premature NUL will
+                               * compare false, and it will stop there */
+                is_utf8 = cBOOL(   foldEQ(codeset, STR_WITH_LEN("UTF-8"))
+                                || foldEQ(codeset, STR_WITH_LEN("UTF8")));
  
                  DEBUG_L(PerlIO_printf(Perl_debug_log,
                         "\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n",
                                                       codeset,         is_utf8));
-                Safefree(save_input_locale);
-                return is_utf8;
+                goto finish_and_return;
              }
          }
  
@@ -3174,7 +3287,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
              Safefree(save_ctype_locale);
          }
  
-        return is_utf8;
+        goto finish_and_return;
  
  #    endif
  
@@ -3199,7 +3312,6 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
      {
          char *save_monetary_locale = NULL;
          bool only_ascii = FALSE;
-        bool is_utf8 = FALSE;
          struct lconv* lc;
  
          /* Like above for LC_CTYPE, we first set LC_MONETARY to the locale of
@@ -3256,8 +3368,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
               * is non-ascii UTF-8. */
              DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n",
                                      save_input_locale, is_utf8));
-            Safefree(save_input_locale);
-            return is_utf8;
+            goto finish_and_return;
          }
      }
    cant_use_monetary:
@@ -3342,8 +3453,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
              DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n",
                                  save_input_locale,
                                  is_utf8_string((U8 *) formatted_time, 0)));
-            Safefree(save_input_locale);
-            return is_utf8_string((U8 *) formatted_time, 0);
+            is_utf8 = is_utf8_string((U8 *) formatted_time, 0);
+            goto finish_and_return;
          }
  
          /* Falling off the end of the loop indicates all the names were just
@@ -3374,7 +3485,6 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
   * are much more likely to have been translated.  */
      {
          int e;
-        bool is_utf8 = FALSE;
          bool non_ascii = FALSE;
          char *save_messages_locale = NULL;
          const char * errmsg = NULL;
@@ -3438,8 +3548,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
              DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n",
                                  save_input_locale,
                                  is_utf8));
-            Safefree(save_input_locale);
-            return is_utf8;
+            goto finish_and_return;
          }
  
          DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
@@ -3483,8 +3592,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                  DEBUG_L(PerlIO_printf(Perl_debug_log,
                                        "Locale %s ends with UTF-8 in name\n",
                                        save_input_locale));
-                Safefree(save_input_locale);
-                return TRUE;
+                is_utf8 = TRUE;
+                goto finish_and_return;
              }
          }
          DEBUG_L(PerlIO_printf(Perl_debug_log,
@@ -3492,19 +3601,19 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                                  save_input_locale));
      }
  
-#  endif
-#  ifdef WIN32
+#      ifdef WIN32
  
      /* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */
      if (memENDs(save_input_locale, final_pos, "65001")) {
          DEBUG_L(PerlIO_printf(Perl_debug_log,
                          "Locale %s ends with 65001 in name, is UTF-8 locale\n",
                          save_input_locale));
-        Safefree(save_input_locale);
-        return TRUE;
+        is_utf8 = TRUE;
+        goto finish_and_return;
      }
  
-#  endif
+#      endif
+#    endif
  
      /* Other common encodings are the ISO 8859 series, which aren't UTF-8.  But
       * since we are about to return FALSE anyway, there is no point in doing
@@ -3515,16 +3624,81 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
          DEBUG_L(PerlIO_printf(Perl_debug_log,
                               "Locale %s has 8859 in name, not UTF-8 locale\n",
                               save_input_locale));
-        Safefree(save_input_locale);
-        return FALSE;
+        is_utf8 = FALSE;
+        goto finish_and_return;
      }
  #  endif
  
      DEBUG_L(PerlIO_printf(Perl_debug_log,
                            "Assuming locale %s is not a UTF-8 locale\n",
                                      save_input_locale));
+    is_utf8 = FALSE;
+
+  finish_and_return:
+
+    /* Cache this result so we don't have to go through all this next time. */
+    utf8ness_cache_size = sizeof(PL_locale_utf8ness)
+                       - (utf8ness_cache - PL_locale_utf8ness);
+
+    /* But we can't save it if it is too large for the total space available */
+    if (LIKELY(input_name_len_with_overhead < utf8ness_cache_size)) {
+        Size_t utf8ness_cache_len = strlen(utf8ness_cache);
+
+        /* Here it can fit, but we may need to clear out the oldest cached
+         * result(s) to do so.  Check */
+        if (utf8ness_cache_len + input_name_len_with_overhead
+                                                        >= utf8ness_cache_size)
+        {
+            /* Here we have to clear something out to make room for this.
+             * Start looking at the rightmost place where it could fit and find
+             * the beginning of the entry that extends past that. */
+            char * cutoff = (char *) my_memrchr(utf8ness_cache,
+                                                UTF8NESS_SEP[0],
+                                                utf8ness_cache_size
+                                              - input_name_len_with_overhead);
+
+            assert(cutoff);
+            assert(cutoff >= utf8ness_cache);
+
+            /* This and all subsequent entries must be removed */
+            *cutoff = '\0';
+            utf8ness_cache_len = strlen(utf8ness_cache);
+        }
+
+        /* Make space for the new entry */
+        Move(utf8ness_cache,
+             utf8ness_cache + input_name_len_with_overhead,
+             utf8ness_cache_len + 1 /* Incl. trailing NUL */, char);
+
+        /* And insert it */
+        Copy(delimited, utf8ness_cache, input_name_len_with_overhead - 1, char);
+        utf8ness_cache[input_name_len_with_overhead - 1] = is_utf8 + '0';
+
+        if ((PL_locale_utf8ness[strlen(PL_locale_utf8ness)-1]
+                                                & (PERL_UINTMAX_T) ~1) != '0')
+        {
+            Perl_croak(aTHX_
+             "panic: %s: %d: Corrupt utf8ness_cache=%s\nlen=%u,"
+             " inserted_name=%s, its_len=%u\n",
+                __FILE__, __LINE__,
+                PL_locale_utf8ness, strlen(PL_locale_utf8ness),
+                delimited, input_name_len_with_overhead);
+        }
+    }
+
+#  ifdef DEBUGGING
+
+    if (DEBUG_Lv_TEST || debug_initialization) {
+        PerlIO_printf(Perl_debug_log,
+                "PL_locale_utf8ness is now %s; returning %d\n",
+                                     PL_locale_utf8ness, is_utf8);
+    }
+
+#  endif
+
+    Safefree(delimited);
      Safefree(save_input_locale);
-    return FALSE;
+    return is_utf8;
  }
  
  #endif