This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
locale.c: Prefer mbrtowc(), as its reentrant
authorKarl Williamson <khw@cpan.org>
Sat, 6 Jan 2018 04:41:27 +0000 (21:41 -0700)
committerKarl Williamson <khw@cpan.org>
Wed, 31 Jan 2018 13:33:02 +0000 (06:33 -0700)
If it's available and this is a threaded build, it's preferred.

locale.c

index f49dc56..66c64cc 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -3160,9 +3160,9 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
     /* Here we don't have stored the utf8ness for the input locale.  We have to
      * calculate it */
 
-#  if         defined(USE_LOCALE_CTYPE)                                  \
-      && (   (defined(HAS_NL_LANGINFO) && defined(CODESET))              \
-          ||  defined(HAS_MBTOWC))
+#  if        defined(USE_LOCALE_CTYPE)                                  \
+     && (   (defined(HAS_NL_LANGINFO) && defined(CODESET))              \
+         || (defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)))
 
     {
         const char *save_ctype_locale = NULL;
@@ -3248,7 +3248,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
         }
 
 #    endif
-#    if defined(HAS_MBTOWC)
+#    if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
      /* We can see if this is a UTF-8-like locale if have mbtowc().  It was a
       * late adder to C89, so very likely to have it.  However, testing has
       * shown that, like nl_langinfo() above, there are locales that are not
@@ -3258,14 +3258,33 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
             wchar_t wc;
             int len;
 
-           /* mbtowc() converts a byte string to a wide character.  Feed a byte
-            * string to it and check that the result is the expected Unicode
-            * code point */
+#      if defined(HAS_MBRTOWC) && defined(USE_ITHREADS)
+
+            mbstate_t ps;
+
+#      endif
+
+            /* mbrtowc() and mbtowc() convert a byte string to a wide
+             * character.  Feed a byte string to one of them and check that the
+             * result is the expected Unicode code point */
+
+#      if defined(HAS_MBRTOWC) && defined(USE_ITHREADS)
+            /* Prefer this function if available, as it's reentrant */
+
+            memset(&ps, 0, sizeof(ps));;
+            PERL_UNUSED_RESULT(mbrtowc(&wc, NULL, 0, &ps)); /* Reset any shift
+                                                               state */
+            errno = 0;
+            len = mbrtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8), &ps);
+
+#      else
 
             PERL_UNUSED_RESULT(mbtowc(&wc, NULL, 0));/* Reset any shift state */
             errno = 0;
             len = mbtowc(&wc, STR_WITH_LEN(REPLACEMENT_CHARACTER_UTF8));
 
+#      endif
+
             DEBUG_L(PerlIO_printf(Perl_debug_log,
                     "\treturn from mbtowc; len=%d; code_point=%x; errno=%d\n",
                                    len,      (unsigned int) wc, errno));