amigaos4: translate $^X to unix style

[perl5.git] / locale.c
diff --git a/locale.c b/locale.c

index 291e178..9b0979d 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -106,7 +106,7 @@ Perl_set_numeric_radix(pTHX)
                 sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
             else
                 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
-            if (! is_ascii_string((U8 *) lc->decimal_point, 0)
+            if (! is_invariant_string((U8 *) lc->decimal_point, 0)
                  && is_utf8_string((U8 *) lc->decimal_point, 0)
                  && _is_cur_LC_category_utf8(LC_NUMERIC))
              {
@@ -159,7 +159,7 @@ Perl_new_numeric(pTHX_ const char *newnum)
       * dot.
       *
       * This sets several interpreter-level variables:
-     * PL_numeric_name  The default locale's name: a copy of 'newnum'
+     * PL_numeric_name  The underlying locale's name: a copy of 'newnum'
       * PL_numeric_local A boolean indicating if the toggled state is such
       *                  that the current locale is the program's underlying
       *                  locale
@@ -272,6 +272,13 @@ Perl_new_ctype(pTHX_ const char *newctype)
  
      PERL_ARGS_ASSERT_NEW_CTYPE;
  
+    /* We will replace any bad locale warning with 1) nothing if the new one is
+     * ok; or 2) a new warning for the bad new locale */
+    if (PL_warn_locale) {
+        SvREFCNT_dec_NN(PL_warn_locale);
+        PL_warn_locale = NULL;
+    }
+
      PL_in_utf8_CTYPE_locale = _is_cur_LC_category_utf8(LC_CTYPE);
  
      /* A UTF-8 locale gets standard rules.  But note that code still has to
@@ -280,6 +287,18 @@ Perl_new_ctype(pTHX_ const char *newctype)
          Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
      }
      else {
+        /* Assume enough space for every character being bad.  4 spaces each
+         * for the 94 printable characters that are output like "'x' "; and 5
+         * spaces each for "'\\' ", "'\t' ", and "'\n' "; plus a terminating
+         * NUL */
+        char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ];
+
+        bool check_for_problems = ckWARN_d(WARN_LOCALE); /* No warnings means
+                                                            no check */
+        bool multi_byte_locale = FALSE;     /* Assume is a single-byte locale
+                                               to start */
+        unsigned int bad_count = 0;         /* Count of bad characters */
+
          for (i = 0; i < 256; i++) {
              if (isUPPER_LC((U8) i))
                  PL_fold_locale[i] = (U8) toLOWER_LC((U8) i);
@@ -287,6 +306,104 @@ Perl_new_ctype(pTHX_ const char *newctype)
                  PL_fold_locale[i] = (U8) toUPPER_LC((U8) i);
              else
                  PL_fold_locale[i] = (U8) i;
+
+            /* If checking for locale problems, see if the native ASCII-range
+             * printables plus \n and \t are in their expected categories in
+             * the new locale.  If not, this could mean big trouble, upending
+             * Perl's and most programs' assumptions, like having a
+             * metacharacter with special meaning become a \w.  Fortunately,
+             * it's very rare to find locales that aren't supersets of ASCII
+             * nowadays.  It isn't a problem for most controls to be changed
+             * into something else; we check only \n and \t, though perhaps \r
+             * could be an issue as well. */
+            if (check_for_problems
+                && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n'))
+            {
+                if ((isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i))
+                     || (isPUNCT_A(i) && ! isPUNCT_LC(i))
+                     || (isBLANK_A(i) && ! isBLANK_LC(i))
+                     || (i == '\n' && ! isCNTRL_LC(i)))
+                {
+                    if (bad_count) {    /* Separate multiple entries with a
+                                           blank */
+                        bad_chars_list[bad_count++] = ' ';
+                    }
+                    bad_chars_list[bad_count++] = '\'';
+                    if (isPRINT_A(i)) {
+                        bad_chars_list[bad_count++] = (char) i;
+                    }
+                    else {
+                        bad_chars_list[bad_count++] = '\\';
+                        if (i == '\n') {
+                            bad_chars_list[bad_count++] = 'n';
+                        }
+                        else {
+                            assert(i == '\t');
+                            bad_chars_list[bad_count++] = 't';
+                        }
+                    }
+                    bad_chars_list[bad_count++] = '\'';
+                    bad_chars_list[bad_count] = '\0';
+                }
+            }
+        }
+
+#ifdef MB_CUR_MAX
+        /* We only handle single-byte locales (outside of UTF-8 ones; so if
+         * this locale requires than one byte, there are going to be
+         * problems. */
+        if (check_for_problems && MB_CUR_MAX > 1
+
+               /* Some platforms return MB_CUR_MAX > 1 for even the "C"
+                * locale.  Just assume that the implementation for them (plus
+                * for POSIX) is correct and the > 1 value is spurious.  (Since
+                * these are specially handled to never be considered UTF-8
+                * locales, as long as this is the only problem, everything
+                * should work fine */
+            && strNE(newctype, "C") && strNE(newctype, "POSIX"))
+        {
+            multi_byte_locale = TRUE;
+        }
+#endif
+
+        if (bad_count || multi_byte_locale) {
+            PL_warn_locale = Perl_newSVpvf(aTHX_
+                             "Locale '%s' may not work well.%s%s%s\n",
+                             newctype,
+                             (multi_byte_locale)
+                              ? "  Some characters in it are not recognized by"
+                                " Perl."
+                              : "",
+                             (bad_count)
+                              ? "\nThe following characters (and maybe others)"
+                                " may not have the same meaning as the Perl"
+                                " program expects:\n"
+                              : "",
+                             (bad_count)
+                              ? bad_chars_list
+                              : ""
+                            );
+            /* If we are actually in the scope of the locale, output the
+             * message now.  Otherwise we save it to be output at the first
+             * operation using this locale, if that actually happens.  Most
+             * programs don't use locales, so they are immune to bad ones */
+            if (IN_LC(LC_CTYPE)) {
+
+                /* We have to save 'newctype' because the setlocale() just
+                 * below may destroy it.  The next setlocale() further down
+                 * should restore it properly so that the intermediate change
+                 * here is transparent to this function's caller */
+                const char * const badlocale = savepv(newctype);
+
+                setlocale(LC_CTYPE, "C");
+
+                /* The '0' below suppresses a bogus gcc compiler warning */
+                Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0);
+                setlocale(LC_CTYPE, badlocale);
+                Safefree(badlocale);
+                SvREFCNT_dec_NN(PL_warn_locale);
+                PL_warn_locale = NULL;
+            }
          }
      }
  
@@ -297,6 +414,32 @@ Perl_new_ctype(pTHX_ const char *newctype)
  }
  
  void
+Perl__warn_problematic_locale()
+{
+
+#ifdef USE_LOCALE_CTYPE
+
+    dTHX;
+
+    /* Internal-to-core function that outputs the message in PL_warn_locale,
+     * and then NULLS it.  Should be called only through the macro
+     * _CHECK_AND_WARN_PROBLEMATIC_LOCALE */
+
+    if (PL_warn_locale) {
+        /*GCC_DIAG_IGNORE(-Wformat-security);   Didn't work */
+        Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+                             SvPVX(PL_warn_locale),
+                             0 /* dummy to avoid compiler warning */ );
+        /* GCC_DIAG_RESTORE; */
+        SvREFCNT_dec_NN(PL_warn_locale);
+        PL_warn_locale = NULL;
+    }
+
+#endif
+
+}
+
+void
  Perl_new_collate(pTHX_ const char *newcoll)
  {
  #ifdef USE_LOCALE_COLLATE
@@ -365,7 +508,7 @@ Perl_my_setlocale(pTHX_ int category, const char* locale)
       * otherwise to use the particular category's variable if set; otherwise to
       * use the LANG variable. */
  
-    bool override_LC_ALL = 0;
+    bool override_LC_ALL = FALSE;
      char * result;
  
      if (locale && strEQ(locale, "")) {
@@ -431,7 +574,7 @@ Perl_my_setlocale(pTHX_ int category, const char* locale)
          return result;
      }
  
-    /* Here the input locale was LC_ALL, and we have set it to what is in the
+    /* Here the input category was LC_ALL, and we have set it to what is in the
       * LANG variable or the system default if there is no LANG.  But these have
       * lower priority than the other LC_foo variables, so override it for each
       * one that is set.  (If they are set to "", it means to use the same thing
@@ -511,7 +654,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      char *curnum     = NULL;
  #endif /* USE_LOCALE_NUMERIC */
  #ifdef __GLIBC__
-    char * const language   = PerlEnv_getenv("LANGUAGE");
+    const char * const language   = savepv(PerlEnv_getenv("LANGUAGE"));
  #endif
  
      /* NULL uses the existing already set up locale */
@@ -520,14 +663,22 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
                                          : "";
      const char* trial_locales[5];   /* 5 = 1 each for "", LC_ALL, LANG, "", C */
      unsigned int trial_locales_count;
-    char * const lc_all     = PerlEnv_getenv("LC_ALL");
-    char * const lang       = PerlEnv_getenv("LANG");
+    const char * const lc_all     = savepv(PerlEnv_getenv("LC_ALL"));
+    const char * const lang       = savepv(PerlEnv_getenv("LANG"));
      bool setlocale_failure = FALSE;
      unsigned int i;
      char *p;
-    const bool locwarn = (printwarn > 1 ||
-                    (printwarn &&
-                     (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
+
+    /* A later getenv() could zap this, so only use here */
+    const char * const bad_lang_use_once = PerlEnv_getenv("PERL_BADLANG");
+
+    const bool locwarn = (printwarn > 1
+                          || (printwarn
+                              && (! bad_lang_use_once
+                                  || (
+                                    /* disallow with "" or "0" */
+                                    *bad_lang_use_once
+                                    && strNE("0", bad_lang_use_once)))));
      bool done = FALSE;
  #ifdef WIN32
      /* In some systems you can find out the system default locale
@@ -556,7 +707,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      }
      if (!setlocale_failure) {
  #       ifdef USE_LOCALE_CTYPE
-       Safefree(curctype);
         if (! (curctype =
                my_setlocale(LC_CTYPE,
                          (!done && (lang || PerlEnv_getenv("LC_CTYPE")))
@@ -566,7 +716,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
             curctype = savepv(curctype);
  #       endif /* USE_LOCALE_CTYPE */
  #       ifdef USE_LOCALE_COLLATE
-       Safefree(curcoll);
         if (! (curcoll =
                my_setlocale(LC_COLLATE,
                          (!done && (lang || PerlEnv_getenv("LC_COLLATE")))
@@ -576,7 +725,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
             curcoll = savepv(curcoll);
  #       endif /* USE_LOCALE_COLLATE */
  #       ifdef USE_LOCALE_NUMERIC
-       Safefree(curnum);
         if (! (curnum =
                my_setlocale(LC_NUMERIC,
                          (!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
@@ -608,7 +756,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
  #endif /* !LOCALE_ENVIRON_REQUIRED */
  
      /* We try each locale in the list until we get one that works, or exhaust
-     * the list */
+     * the list.  Normally the loop is executed just once.  But if setting the
+     * locale fails, inside the loop we add fallback trials to the array and so
+     * will execute the loop multiple times */
      trial_locales[0] = setlocale_init;
      trial_locales_count = 1;
      for (i= 0; i < trial_locales_count; i++) {
@@ -717,18 +867,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
  
                  PerlIO_printf(Perl_error_log,
                  "perl: warning: Setting locale failed for the categories:\n\t");
-#ifdef USE_LOCALE_CTYPE
+#  ifdef USE_LOCALE_CTYPE
                  if (! curctype)
                      PerlIO_printf(Perl_error_log, "LC_CTYPE ");
-#endif /* USE_LOCALE_CTYPE */
-#ifdef USE_LOCALE_COLLATE
+#  endif /* USE_LOCALE_CTYPE */
+#  ifdef USE_LOCALE_COLLATE
                  if (! curcoll)
                      PerlIO_printf(Perl_error_log, "LC_COLLATE ");
-#endif /* USE_LOCALE_COLLATE */
-#ifdef USE_LOCALE_NUMERIC
+#  endif /* USE_LOCALE_COLLATE */
+#  ifdef USE_LOCALE_NUMERIC
                  if (! curnum)
                      PerlIO_printf(Perl_error_log, "LC_NUMERIC ");
-#endif /* USE_LOCALE_NUMERIC */
+#  endif /* USE_LOCALE_NUMERIC */
                  PerlIO_printf(Perl_error_log, "and possibly others\n");
  
  #endif /* LC_ALL */
@@ -777,7 +927,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
              }
  
              /* Calculate what fallback locales to try.  We have avoided this
-             * until we have to, becuase failure is quite unlikely.  This will
+             * until we have to, because failure is quite unlikely.  This will
               * usually change the upper bound of the loop we are in.
               *
               * Since the system's default way of setting the locale has not
@@ -932,6 +1082,13 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      Safefree(curnum);
  #endif /* USE_LOCALE_NUMERIC */
  
+#ifdef __GLIBC__
+    Safefree(language);
+#endif
+
+    Safefree(lc_all);
+    Safefree(lang);
+
  #else  /* !USE_LOCALE */
      PERL_UNUSED_ARG(printwarn);
  #endif /* USE_LOCALE */
@@ -1010,8 +1167,10 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
      /* Returns TRUE if the current locale for 'category' is UTF-8; FALSE
       * otherwise. 'category' may not be LC_ALL.  If the platform doesn't have
       * nl_langinfo(), nor MB_CUR_MAX, this employs a heuristic, which hence
-     * could give the wrong result.  It errs on the side of not being a UTF-8
-     * locale. */
+     * could give the wrong result.  The result will very likely be correct for
+     * languages that have commonly used non-ASCII characters, but for notably
+     * English, it comes down to if the locale's name ends in something like
+     * "UTF-8".  It errs on the side of not being a UTF-8 locale. */
  
      char *save_input_locale = NULL;
      STRLEN final_pos;
@@ -1130,16 +1289,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
           * result */
          if (is_utf8) {
              wchar_t wc;
-            GCC_DIAG_IGNORE(-Wunused-result);
-            (void) mbtowc(&wc, NULL, 0);    /* Reset any shift state */
-            GCC_DIAG_RESTORE;
+            PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
              errno = 0;
              if ((size_t)mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8))
                                                          != strlen(HYPHEN_UTF8)
                  || wc != (wchar_t) 0x2010)
              {
                  is_utf8 = FALSE;
-                DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", wc));
+                DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", (unsigned int)wc));
                  DEBUG_L(PerlIO_printf(Perl_debug_log,
                          "\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n",
                          mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)), errno));
@@ -1167,7 +1324,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
       * currency symbol to see if it disambiguates things.  Often that will be
       * in the native script, and if the symbol isn't in UTF-8, we know that the
       * locale isn't.  If it is non-ASCII UTF-8, we infer that the locale is
-     * too. */
+     * too, as the odds of a non-UTF8 string being valid UTF-8 are quite small
+     * */
  
  #ifdef HAS_LOCALECONV
  #   ifdef USE_LOCALE_MONETARY
@@ -1209,7 +1367,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
          lc = localeconv();
          if (! lc
              || ! lc->currency_symbol
-            || is_ascii_string((U8 *) lc->currency_symbol, 0))
+            || is_invariant_string((U8 *) lc->currency_symbol, 0))
          {
              DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
              only_ascii = TRUE;
@@ -1282,14 +1440,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
  
          /* Here the current LC_TIME is set to the locale of the category
           * whose information is desired.  Look at all the days of the week and
-         * month names, and the timezone and am/pm indicator for non-ASCII
+         * month names, and the timezone and am/pm indicator for UTF-8 variant
           * characters.  The first such a one found will tell us if the locale
           * is UTF-8 or not */
  
          for (i = 0; i < 7 + 12; i++) {  /* 7 days; 12 months */
              formatted_time = my_strftime("%A %B %Z %p",
                                      0, 0, hour, dom, month, 112, 0, 0, is_dst);
-            if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) {
+            if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) {
  
                  /* Here, we didn't find a non-ASCII.  Try the next time through
                   * with the complemented dst and am/pm, and try with the next
@@ -1335,63 +1493,68 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
  #if 0 && defined(USE_LOCALE_MESSAGES) && defined(HAS_SYS_ERRLIST)
  
  /* This code is ifdefd out because it was found to not be necessary in testing
- * on our dromedary test machine, which has over 700 locales.  There, looking
- * at just the currency symbol gave essentially the same results as doing this
- * extra work.  Executing this also caused segfaults in miniperl.  I left it in
- * so as to avoid rewriting it if real-world experience indicates that
- * dromedary is an outlier.  Essentially, instead of returning abpve if we
+ * on our dromedary test machine, which has over 700 locales.  There, this
+ * added no value to looking at the currency symbol and the time strings.  I
+ * left it in so as to avoid rewriting it if real-world experience indicates
+ * that dromedary is an outlier.  Essentially, instead of returning abpve if we
   * haven't found illegal utf8, we continue on and examine all the strerror()
   * messages on the platform for utf8ness.  If all are ASCII, we still don't
   * know the answer; but otherwise we have a pretty good indication of the
- * utf8ness.  The reason this doesn't necessarily help much is that the
- * messages may not have been translated into the locale.  The currency symbol
- * is much more likely to have been translated.  The code below would need to
- * be altered somewhat to just be a continuation of testing the currency
- * symbol. */
+ * utf8ness.  The reason this doesn't help much is that the messages may not
+ * have been translated into the locale.  The currency symbol and time strings
+ * are much more likely to have been translated.  */
+    {
          int e;
-        unsigned int failures = 0, non_ascii = 0;
+        bool is_utf8 = FALSE;
+        bool non_ascii = FALSE;
          char *save_messages_locale = NULL;
+        const char * errmsg = NULL;
  
-        /* Like above for LC_CTYPE, we set LC_MESSAGES to the locale of the
-         * desired category, if it isn't that locale already */
+        /* Like above, we set LC_MESSAGES to the locale of the desired
+         * category, if it isn't that locale already */
  
          if (category != LC_MESSAGES) {
  
-            save_messages_locale = stdize_locale(savepv(setlocale(LC_MESSAGES,
-                                                                  NULL)));
+            save_messages_locale = setlocale(LC_MESSAGES, NULL);
              if (! save_messages_locale) {
+                DEBUG_L(PerlIO_printf(Perl_debug_log,
+                            "Could not find current locale for LC_MESSAGES\n"));
                  goto cant_use_messages;
              }
+            save_messages_locale = stdize_locale(savepv(save_messages_locale));
  
              if (strEQ(save_messages_locale, save_input_locale)) {
-                Safefree(save_input_locale);
+                Safefree(save_messages_locale);
+                save_messages_locale = NULL;
              }
              else if (! setlocale(LC_MESSAGES, save_input_locale)) {
+                DEBUG_L(PerlIO_printf(Perl_debug_log,
+                            "Could not change LC_MESSAGES locale to %s\n",
+                                                        save_input_locale));
                  Safefree(save_messages_locale);
                  goto cant_use_messages;
              }
          }
  
          /* Here the current LC_MESSAGES is set to the locale of the category
-         * whose information is desired.  Look through all the messages */
+         * whose information is desired.  Look through all the messages.  We
+         * can't use Strerror() here because it may expand to code that
+         * segfaults in miniperl */
  
-        for (e = 0;
-#ifdef HAS_SYS_ERRLIST
-             e <= sys_nerr
-#endif
-             ; e++)
-        {
-            const U8* const errmsg = (U8 *) Strerror(e) ;
-            if (!errmsg)
-                break;
-            if (! is_utf8_string(errmsg, 0)) {
-                failures++;
+        for (e = 0; e <= sys_nerr; e++) {
+            errno = 0;
+            errmsg = sys_errlist[e];
+            if (errno || !errmsg) {
                  break;
              }
-            else if (! is_ascii_string(errmsg, 0)) {
-                non_ascii++;
+            errmsg = savepv(errmsg);
+            if (! is_invariant_string((U8 *) errmsg, 0)) {
+                non_ascii = TRUE;
+                is_utf8 = is_utf8_string((U8 *) errmsg, 0);
+                break;
              }
          }
+        Safefree(errmsg);
  
          /* And, if we changed it, restore LC_MESSAGES to its original locale */
          if (save_messages_locale) {
@@ -1399,10 +1562,18 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
              Safefree(save_messages_locale);
          }
  
-        /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
-         * any non-ascii means it is one; otherwise we assume it isn't */
-        return (failures) ? FALSE : non_ascii;
+        if (non_ascii) {
+
+            /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
+             * any non-ascii means it is one; otherwise we assume it isn't */
+            DEBUG_L(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n",
+                                save_input_locale,
+                                is_utf8));
+            Safefree(save_input_locale);
+            return is_utf8;
+        }
  
+        DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
      }
    cant_use_messages:
  
@@ -1410,6 +1581,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
  
  #endif /* the code that is compiled when no nl_langinfo */
  
+#ifndef EBCDIC  /* On os390, even if the name ends with "UTF-8', it isn't a
+                   UTF-8 locale */
      /* As a last resort, look at the locale name to see if it matches
       * qr/UTF -?  * 8 /ix, or some other common locale names.  This "name", the
       * return of setlocale(), is actually defined to be opaque, so we can't
@@ -1425,8 +1598,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
          while ((name += strcspn(name, "Uu") + 1)
                                              <= save_input_locale + final_pos - 2)
          {
-            if (toFOLD(*(name)) != 't'
-                || toFOLD(*(name + 1)) != 'f')
+            if (!isALPHA_FOLD_NE(*name, 't')
+                || isALPHA_FOLD_NE(*(name + 1), 'f'))
              {
                  continue;
              }
@@ -1449,6 +1622,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                                "Locale %s doesn't end with UTF-8 in name\n",
                                  save_input_locale));
      }
+#endif
  
  #ifdef WIN32
      /* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */
@@ -1543,11 +1717,40 @@ Perl_my_strerror(pTHX_ const int errnum) {
  }
  
  /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
+
+=head1 Locale-related functions and macros
+
+=for apidoc sync_locale
+
+Changing the program's locale should be avoided by XS code.  Nevertheless,
+certain non-Perl libraries called from XS, such as C<Gtk> do so.  When this
+happens, Perl needs to be told that the locale has changed.  Use this function
+to do so, before returning to Perl.
+
+=cut
+*/
+
+void
+Perl_sync_locale(pTHX)
+{
+
+#ifdef USE_LOCALE_CTYPE
+    new_ctype(setlocale(LC_CTYPE, NULL));
+#endif /* USE_LOCALE_CTYPE */
+
+#ifdef USE_LOCALE_COLLATE
+    new_collate(setlocale(LC_COLLATE, NULL));
+#endif
+
+#ifdef USE_LOCALE_NUMERIC
+    set_numeric_local();    /* Switch from "C" to underlying LC_NUMERIC */
+    new_numeric(setlocale(LC_NUMERIC, NULL));
+#endif /* USE_LOCALE_NUMERIC */
+
+}
+
+
+
+/*
   * ex: set ts=8 sts=4 sw=4 et:
   */