PATCH: [perl #127288] I18N::Langinfo sets UTF-8 bit

author Karl Williamson <khw@cpan.org>

Thu, 8 Mar 2018 05:48:55 +0000 (22:48 -0700)

committer Karl Williamson <khw@cpan.org>

Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
author Karl Williamson <khw@cpan.org>
Thu, 8 Mar 2018 05:48:55 +0000 (22:48 -0700)
committer Karl Williamson <khw@cpan.org>
Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
diff --git a/embed.fnc b/embed.fnc

index 6c4f859..5adc705 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2831,7 +2831,8 @@ snR       |char * |setlocale_debug_string |const int category                 \
  #if        defined(USE_LOCALE)         \
      && (   defined(PERL_IN_LOCALE_C)   \
          || defined(PERL_IN_MG_C)       \
-       || defined (PERL_EXT_POSIX))
+       || defined (PERL_EXT_POSIX)     \
+       || defined (PERL_EXT_LANGINFO))
  ApM    |bool   |_is_cur_LC_category_utf8|int category
  #endif
  
diff --git a/embed.h b/embed.h

index 9bc1fdb..b95410c 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -877,7 +877,7 @@
  #define sv_dup(a,b)            Perl_sv_dup(aTHX_ a,b)
  #define sv_dup_inc(a,b)                Perl_sv_dup_inc(aTHX_ a,b)
  #endif
-#if defined(USE_LOCALE)                    && (   defined(PERL_IN_LOCALE_C)            || defined(PERL_IN_MG_C)                || defined (PERL_EXT_POSIX))
+#if defined(USE_LOCALE)                    && (   defined(PERL_IN_LOCALE_C)            || defined(PERL_IN_MG_C)                || defined (PERL_EXT_POSIX)             || defined (PERL_EXT_LANGINFO))
  #define _is_cur_LC_category_utf8(a)    Perl__is_cur_LC_category_utf8(aTHX_ a)
  #endif
  #if defined(USE_LOCALE_COLLATE)
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs

index 663cb2a..904b424 100644 (file)
--- a/ext/I18N-Langinfo/Langinfo.xs
+++ b/ext/I18N-Langinfo/Langinfo.xs
@@ -1,4 +1,6 @@
  #define PERL_NO_GET_CONTEXT
+#define PERL_EXT
+#define PERL_EXT_LANGINFO
  
  #include "EXTERN.h"
  #include "perl.h"
@@ -22,17 +24,77 @@ INCLUDE: const-xs.inc
  SV*
  langinfo(code)
         int     code
+  PREINIT:
+        const   char * value;
+        STRLEN  len;
    PROTOTYPE: _
    CODE:
  #ifdef HAS_NL_LANGINFO
         if (code < 0) {
             SETERRNO(EINVAL, LIB_INVARG);
             RETVAL = &PL_sv_undef;
-       } else {
-            RETVAL = newSVpv(Perl_langinfo(code), 0);
-        }
-#else
-        RETVAL = newSVpv(Perl_langinfo(code), 0);
+       } else
  #endif
+        {
+            value = Perl_langinfo(code);
+            len = strlen(value);
+            RETVAL = newSVpvn(Perl_langinfo(code), len);
+
+            /* Now see if the UTF-8 flag should be turned on */
+#ifdef USE_LOCALE_CTYPE     /* No utf8 strings if not using LC_CTYPE */
+
+            /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get
+             * turned on, so skip the followin code */
+            if (is_utf8_non_invariant_string((U8 *) value, len)) {
+                int category;
+
+                /* Check if the locale is a UTF-8 one.  The returns from
+                 * Perl_langinfo() are in different locale categories, so check the
+                 * category corresponding to this item */
+                switch (code) {
+
+                    /* This should always return ASCII, so we could instead
+                     * legitimately panic here, but soldier on */
+                    case CODESET:
+                        category = LC_CTYPE;
+                        break;
+
+                    case RADIXCHAR:
+                    case THOUSEP:
+#  ifdef USE_LOCALE_NUMERIC
+                        category = LC_NUMERIC;
+#  else
+                        /* Not ideal, but the best we can do on such a platform */
+                        category = LC_CTYPE;
+#  endif
+                        break;
+
+                    case CRNCYSTR:
+#  ifdef USE_LOCALE_MONETARY
+                        category = LC_MONETARY;
+#  else
+                        category = LC_CTYPE;
+#  endif
+                        break;
+
+                    default:
+#  ifdef USE_LOCALE_TIME
+                        category = LC_TIME;
+#  else
+                        category = LC_CTYPE;
+#  endif
+                        break;
+                }
+
+                /* Here the return is legal UTF-8.  Turn on that flag if the
+                 * locale is UTF-8.  (Otherwise, could just be a coincidence.)
+                 * */
+                if (_is_cur_LC_category_utf8(category)) {
+                    SvUTF8_on(RETVAL);
+                }
+            }
+#endif /* USE_LOCALE_CTYPE */
+        }
+
    OUTPUT:
-       RETVAL
+        RETVAL
diff --git a/ext/I18N-Langinfo/t/Langinfo.t b/ext/I18N-Langinfo/t/Langinfo.t

index 10a660e..a26abb5 100644 (file)
--- a/ext/I18N-Langinfo/t/Langinfo.t
+++ b/ext/I18N-Langinfo/t/Langinfo.t
@@ -7,7 +7,12 @@ require "../../t/loc_tools.pl";
  plan skip_all => "I18N::Langinfo or POSIX unavailable" 
      if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!;
  
-my @constants = qw(ABDAY_1 DAY_1 ABMON_1 MON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT D_FMT T_FMT);
+my @times  = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7
+                 MON_8 MON_9 MON_10 MON_11 MON_12
+                 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7);
+my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT
+                   D_FMT T_FMT);
+push @constants, @times;
  
  my %want =
      (
@@ -21,9 +26,9 @@ my %want =
  
  my @want = sort keys %want;
  
-plan tests => 1 + 3 * @constants + keys(@want) + 1;
+plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2;
  
-use_ok('I18N::Langinfo', 'langinfo', @constants);
+use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR');
  
  use POSIX;
  setlocale(LC_ALL, "C");
@@ -69,3 +74,72 @@ SKIP: {
      is (langinfo(&RADIXCHAR), ",",
          "Returns ',' for decimal pt for locale '$comma_locale'");
  }
+
+SKIP: {
+
+    my $found_time = 0;
+    my $found_monetary = 0;
+    my @locales = find_locales( [ 'LC_TIME', 'LC_CTYPE', 'LC_MONETARY' ]);
+
+    while (defined (my $utf8_locale = find_utf8_ctype_locale(\@locales))) {
+        if (! $found_time) {
+            setlocale(&LC_TIME, $utf8_locale);
+            foreach my $time_item (@times) {
+                my $eval_string = "langinfo(&$time_item)";
+                my $time_name = eval $eval_string;
+                if ($@) {
+                    fail("'$eval_string' failed: $@");
+                    last SKIP;
+                }
+                if (! defined $time_name) {
+                    fail("'$eval_string' returned undef");
+                    last SKIP;
+                }
+                if ($time_name eq "") {
+                    fail("'$eval_string' returned an empty name");
+                    last SKIP;
+                }
+
+                if ($time_name =~ /\P{ASCII}/) {
+                    ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string");
+                    $found_time = 1;
+                    last;
+                }
+            }
+        }
+
+        if (! $found_monetary) {
+            setlocale(&LC_MONETARY, $utf8_locale);
+            my $eval_string = "langinfo(&CRNCYSTR)";
+            my $symbol = eval $eval_string;
+            if ($@) {
+                fail("'$eval_string' failed: $@");
+                last SKIP;
+            }
+            if (! defined $symbol) {
+                fail("'$eval_string' returned undef");
+                last SKIP;
+            }
+            if ($symbol =~ /\P{ASCII}/) {
+                ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string");
+                $found_monetary = 1;
+            }
+        }
+
+        last if $found_monetary && $found_time;
+
+        # Remove this locale from the list, and loop to find another utf8
+        # locale
+        @locales = grep { $_ ne $utf8_locale } @locales;
+    }
+
+    if ($found_time + $found_monetary < 2) {
+        my $message = "";
+        $message .= "time name" unless $found_time;
+        if (! $found_monetary) {
+            $message .= " nor" if $message;
+            "monetary name";
+        }
+        skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary);
+    }
+}
diff --git a/pod/perldelta.pod b/pod/perldelta.pod

index 3bf0bd4..42db70d 100644 (file)
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -143,6 +143,9 @@ L<detailed in its documentation|I18N::Langinfo>, the most severe being
  that the C<CODESET> item is not implemented on those systems, always
  returning C<"">.
  
+It now sets the UTF-8 flag in its returned scalar if the string contains
+legal non-ASCII UTF-8, and the locale is UTF-8 ([perl #127288].
+
  =back
  
  =head2 Removed Modules and Pragmata
diff --git a/proto.h b/proto.h

index 2259c77..e711e10 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6221,7 +6221,7 @@ PERL_CALLCONV SV* Perl_sv_dup_inc(pTHX_ const SV *const sstr, CLONE_PARAMS *cons
         assert(param)
  
  #endif
-#if defined(USE_LOCALE)                    && (   defined(PERL_IN_LOCALE_C)            || defined(PERL_IN_MG_C)                || defined (PERL_EXT_POSIX))
+#if defined(USE_LOCALE)                    && (   defined(PERL_IN_LOCALE_C)            || defined(PERL_IN_MG_C)                || defined (PERL_EXT_POSIX)             || defined (PERL_EXT_LANGINFO))
  PERL_CALLCONV bool     Perl__is_cur_LC_category_utf8(pTHX_ int category);
  #endif
  #if defined(USE_LOCALE_COLLATE)
author	Karl Williamson <khw@cpan.org>
	Thu, 8 Mar 2018 05:48:55 +0000 (22:48 -0700)
committer	Karl Williamson <khw@cpan.org>
	Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
embed.fnc		patch \| blob \| blame \| history
embed.h		patch \| blob \| blame \| history
ext/I18N-Langinfo/Langinfo.xs		patch \| blob \| blame \| history
ext/I18N-Langinfo/t/Langinfo.t		patch \| blob \| blame \| history
pod/perldelta.pod		patch \| blob \| blame \| history
proto.h		patch \| blob \| blame \| history