Skip test on Basque locales on Mac OS X 10.5

[perl5.git] / lib / locale.t
diff --git a/lib/locale.t b/lib/locale.t

index 679aaf0..aa8aa07 100644 (file)
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -43,13 +43,21 @@ eval {
  
  # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
  # and mingw32 uses said silly CRT
-$have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i);
+# This doesn't seem to be an issue any more, at least on Windows XP,
+# so re-enable the tests for Windows XP onwards.
+my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion &&
+               join('.', (Win32::GetOSVersion())[1..2]) >= 5.1);
+$have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') &&
+               $Config{cc} =~ /^(cl|gcc)/i);
+
+# UWIN seems to loop after test 98, just skip for now
+$have_setlocale = 0 if ($^O =~ /^uwin/);
  
  my $last = $have_setlocale ? &last : &last_without_setlocale;
  
  print "1..$last\n";
  
-use vars qw(&LC_ALL);
+sub LC_ALL ();
  
  $a = 'abc %';
  
@@ -379,6 +387,10 @@ delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  
  if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
      while (<LOCALES>) {
+       # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which
+       # ain't great when we're running this testPERL_UNICODE= so that utf8
+       # locales will cause all IO hadles to default to (assume) utf8
+       next unless utf8::valid($_);
          chomp;
         trylocale($_);
      }
@@ -392,6 +404,17 @@ if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
          trylocale($_);
      }
      close(LOCALES);
+} elsif ($^O eq 'openbsd' && -e '/usr/share/locale') {
+
+   # OpenBSD doesn't have a locale executable, so reading /usr/share/locale
+   # is much easier and faster than the last resort method.
+
+    opendir(LOCALES, '/usr/share/locale');
+    while ($_ = readdir(LOCALES)) {
+        chomp;
+        trylocale($_);
+    }
+    close(LOCALES);
  } else {
  
      # This is going to be slow.
@@ -430,6 +453,16 @@ if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
  
  setlocale(LC_ALL, "C");
  
+if ($^O eq 'darwin') {
+    # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895,
+    # Apple bug ID# 4139653. It also has a problem in Byelorussian.
+    (my $v) = $Config{osvers} =~ /^(\d+)/;
+    if ($v >= 8 and $v < 10) {
+       debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n";
+       @Locale = grep ! m/^(eu_ES|be_BY.CP1131$)/, @Locale;
+    }
+}
+
  @Locale = sort @Locale;
  
  debug "# Locales =\n";
@@ -523,7 +556,17 @@ foreach $Locale (@Locale) {
      
         my $word = join('', @Neoalpha);
  
-       if ($Locale =~ /utf-?8/i) {
+       my $badutf8;
+       {
+           local $SIG{__WARN__} = sub {
+               $badutf8 = $_[0] =~ /Malformed UTF-8/;
+           };
+           $Locale =~ /utf-?8/i;
+       }
+
+       if ($badutf8) {
+           debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n";
+       } elsif ($Locale =~ /utf-?8/i) {
             debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
             push @{$Okay{99}}, $Locale;
         } else {
@@ -725,6 +768,7 @@ foreach $Locale (@Locale) {
             print "# UPPER $x lc $y ",
             $x =~ /$y/i ? 1 : 0, " ",
             $y =~ /$x/i ? 1 : 0, "\n" if 0;
+           #
             # If $x and $y contain regular expression characters
             # AND THEY lowercase (/i) to regular expression characters,
             # regcomp() will be mightily confused.  No, the \Q doesn't
@@ -732,12 +776,22 @@ foreach $Locale (@Locale) {
             # is done after the \Q?)  An example of this happening is
             # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
             # the chr(173) (the "[") is the lowercase of the chr(235).
+           #
             # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
             # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
             # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
             # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
             # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
             # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
+           #
+           # Similar things can happen even under (bastardised)
+           # non-EBCDIC locales: in many European countries before the
+           # advent of ISO 8859-x nationally customised versions of
+           # ISO 646 were devised, reusing certain punctuation
+           # characters for modified characters needed by the
+           # country/language.  For example, the "|" might have
+           # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
+           #
             if ($x =~ $re || $y =~ $re) {
                 print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
                 next;