Put back an #if-0-ed chunk 7053d92 removed.

author Jarkko Hietaniemi <jhi@iki.fi>

Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)

committer Jarkko Hietaniemi <jhi@iki.fi>

Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)
author Jarkko Hietaniemi <jhi@iki.fi>
Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)
committer Jarkko Hietaniemi <jhi@iki.fi>
Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)
diff --git a/locale.c b/locale.c

index 2b66c8c..64c0d8d 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -1300,6 +1300,81 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
  #   endif /* USE_LOCALE_MONETARY */
  #endif /* HAS_LOCALECONV */
  
+#if 0 && defined(HAS_STRERROR) && defined(USE_LOCALE_MESSAGES)
+
+/* This code is ifdefd out because it was found to not be necessary in testing
+ * on our dromedary test machine, which has over 700 locales.  There, looking
+ * at just the currency symbol gave essentially the same results as doing this
+ * extra work.  Executing this also caused segfaults in miniperl.  I left it in
+ * so as to avoid rewriting it if real-world experience indicates that
+ * dromedary is an outlier.  Essentially, instead of returning abpve if we
+ * haven't found illegal utf8, we continue on and examine all the strerror()
+ * messages on the platform for utf8ness.  If all are ASCII, we still don't
+ * know the answer; but otherwise we have a pretty good indication of the
+ * utf8ness.  The reason this doesn't necessarily help much is that the
+ * messages may not have been translated into the locale.  The currency symbol
+ * is much more likely to have been translated.  The code below would need to
+ * be altered somewhat to just be a continuation of testing the currency
+ * symbol. */
+        int e;
+        unsigned int failures = 0, non_ascii = 0;
+        char *save_messages_locale = NULL;
+
+        /* Like above for LC_CTYPE, we set LC_MESSAGES to the locale of the
+         * desired category, if it isn't that locale already */
+
+        if (category != LC_MESSAGES) {
+
+            save_messages_locale = stdize_locale(savepv(setlocale(LC_MESSAGES,
+                                                                  NULL)));
+            if (! save_messages_locale) {
+                goto cant_use_messages;
+            }
+
+            if (strEQ(save_messages_locale, save_input_locale)) {
+                Safefree(save_input_locale);
+            }
+            else if (! setlocale(LC_MESSAGES, save_input_locale)) {
+                Safefree(save_messages_locale);
+                goto cant_use_messages;
+            }
+        }
+
+        /* Here the current LC_MESSAGES is set to the locale of the category
+         * whose information is desired.  Look through all the messages */
+
+        for (e = 0;
+#ifdef HAS_SYS_ERRLIST
+             e <= sys_nerr
+#endif
+             ; e++)
+        {
+            const U8* const errmsg = (U8 *) Strerror(e) ;
+            if (!errmsg)
+                break;
+            if (! is_utf8_string(errmsg, 0)) {
+                failures++;
+                break;
+            }
+            else if (! is_ascii_string(errmsg, 0)) {
+                non_ascii++;
+            }
+        }
+
+        /* And, if we changed it, restore LC_MESSAGES to its original locale */
+        if (save_messages_locale) {
+            setlocale(LC_MESSAGES, save_messages_locale);
+            Safefree(save_messages_locale);
+        }
+
+        /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
+         * any non-ascii means it is one; otherwise we assume it isn't */
+        return (failures) ? FALSE : non_ascii;
+
+    }
+  cant_use_messages:
+
+#endif
  
      DEBUG_L(PerlIO_printf(Perl_debug_log,
                            "Assuming locale %s is not a UTF-8 locale\n",
author	Jarkko Hietaniemi <jhi@iki.fi>
	Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Fri, 13 Jun 2014 19:58:09 +0000 (15:58 -0400)