Langinfo: Implement CODESET on Windows

author Karl Williamson <khw@cpan.org>

Thu, 8 Mar 2018 20:00:40 +0000 (13:00 -0700)

committer Karl Williamson <khw@cpan.org>

Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
author Karl Williamson <khw@cpan.org>
Thu, 8 Mar 2018 20:00:40 +0000 (13:00 -0700)
committer Karl Williamson <khw@cpan.org>
Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
diff --git a/ext/I18N-Langinfo/Langinfo.pm b/ext/I18N-Langinfo/Langinfo.pm

index bcc1527..e9e84d2 100644 (file)
--- a/ext/I18N-Langinfo/Langinfo.pm
+++ b/ext/I18N-Langinfo/Langinfo.pm
@@ -168,12 +168,15 @@ glitches.  These are the items that could be different:
  
  =over
  
-=item C<CODESET>
-
  =item C<ERA>
  
  Unimplemented, so returns C<"">.
  
+=item C<CODESET>
+
+Unimplemented, except on Windows, due to the vagaries of vendor locale names,
+returning C<""> on non-Windows.
+
  =item C<YESEXPR>
  
  =item C<YESSTR>
diff --git a/locale.c b/locale.c

index b90d69f..277e038 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -2312,12 +2312,12 @@ But most importantly, it works on systems that don't have C<nl_langinfo>, such
  as Windows, hence makes your code more portable.  Of the fifty-some possible
  items specified by the POSIX 2008 standard,
  L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
-only two are completely unimplemented (though the loss of one of these is
-significant).  It uses various techniques to recover the other items, including
-calling C<L<localeconv(3)>>, and C<L<strftime(3)>>, both of which are specified
-in C89, so should be always be available.  Later C<strftime()> versions have
-additional capabilities; C<""> is returned for those not available on your
-system.
+only one is completely unimplemented, though on non-Windows platforms, another
+significant one is also not implemented).  It uses various techniques to
+recover the other items, including calling C<L<localeconv(3)>>, and
+C<L<strftime(3)>>, both of which are specified in C89, so should be always be
+available.  Later C<strftime()> versions have additional capabilities; C<""> is
+returned for those not available on your system.
  
  It is important to note that when called with an item that is recovered by
  using C<localeconv>, the buffer from any previous explicit call to
@@ -2493,8 +2493,7 @@ S_my_nl_langinfo(const int item, bool toggle)
          switch (item) {
              Size_t len;
  
-            /* These 2 are unimplemented */
-            case CODESET:
+            /* This is unimplemented */
              case ERA:      /* For use with strftime() %E modifier */
  
              default:
@@ -2506,7 +2505,66 @@ S_my_nl_langinfo(const int item, bool toggle)
              case NOEXPR:    return "^[-0nN]";
              case NOSTR:     return "no";
  
+            case CODESET:
+
+#  ifndef WIN32
+
+                /* On non-windows, this is unimplemented, in part because of
+                 * inconsistencies between vendors.  The Darwin native
+                 * nl_langinfo() implementation simply looks at everything past
+                 * any dot in the name, but that doesn't work for other
+                 * vendors.  Many Linux locales that don't have UTF-8 in their
+                 * names really are UTF-8, for example; z/OS locales that do
+                 * have UTF-8 in their names, aren't really UTF-8 */
+                return "";
+
+#  else
+
+                {   /* But on Windows, the name does seem to be consistent, so
+                       use that. */
+                    const char * p;
+                    const char * first;
+                    Size_t offset = 0;
+                    const char * name = my_setlocale(LC_CTYPE, NULL);
+
+                    if (isNAME_C_OR_POSIX(name)) {
+                        return "ANSI_X3.4-1968";
+                    }
+
+                    /* Find the dot in the locale name */
+                    first = (const char *) strchr(name, '.');
+                    if (! first) {
+                        first = name;
+                        goto has_nondigit;
+                    }
  
+                    /* Look at everything past the dot */
+                    first++;
+                    p = first;
+
+                    while (*p) {
+                        if (! isDIGIT(*p)) {
+                            goto has_nondigit;
+                        }
+
+                        p++;
+                    }
+
+                    /* Here everything past the dot is a digit.  Treat it as a
+                     * code page */
+                    save_to_buffer("CP", &PL_langinfo_buf,
+                                         &PL_langinfo_bufsize, 0);
+                    offset = STRLENs("CP");
+
+                  has_nondigit:
+
+                    retval = save_to_buffer(first, &PL_langinfo_buf,
+                                            &PL_langinfo_bufsize, offset);
+                }
+
+                break;
+
+#  endif
  #  ifdef HAS_LOCALECONV
  
              case CRNCYSTR:
diff --git a/pod/perldelta.pod b/pod/perldelta.pod

index 42db70d..9baa05a 100644 (file)
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -140,8 +140,8 @@ L<I18N::Langinfo> has been upgraded from version 0.15 to 0.16.
  This module is now available on all platforms, emulating the system
  L<nl_langinfo(3)> on systems that lack it.  Some caveats apply, as
  L<detailed in its documentation|I18N::Langinfo>, the most severe being
-that the C<CODESET> item is not implemented on those systems, always
-returning C<"">.
+that, except for MS Windows, the C<CODESET> item is not implemented on
+those systems, always returning C<"">.
  
  It now sets the UTF-8 flag in its returned scalar if the string contains
  legal non-ASCII UTF-8, and the locale is UTF-8 ([perl #127288].
author	Karl Williamson <khw@cpan.org>
	Thu, 8 Mar 2018 20:00:40 +0000 (13:00 -0700)
committer	Karl Williamson <khw@cpan.org>
	Mon, 12 Mar 2018 16:17:14 +0000 (10:17 -0600)
ext/I18N-Langinfo/Langinfo.pm		patch \| blob \| blame \| history
locale.c		patch \| blob \| blame \| history
pod/perldelta.pod		patch \| blob \| blame \| history