X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b94d11d663eee9a5d693a9ebb9cdad0f6ba6b881..aed13984243c935bb8b73650a63e730cbc85c26a:/t/loc_tools.pl diff --git a/t/loc_tools.pl b/t/loc_tools.pl index 90865e4..bead7c7 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -8,27 +8,42 @@ # anyway later during the scanning process (and besides, some clueless # vendor might have them capitalized erroneously anyway). -sub _trylocale { # Adds the locale given by the first parameter to the list - # given by the 3rd iff the platform supports the locale in - # each of the categories given by the 2nd parameter, which - # is either a single category or a reference to a list of - # categories +# Functions whose names begin with underscore are internal helper functions +# for this file, and are not to be used by outside callers. + +eval { require POSIX; import POSIX 'locale_h'; }; +$has_locale_h = ! $@; + +sub _trylocale ($$$$) { # For use only by other functions in this file! + + # Adds the locale given by the first parameter to the list given by the + # 3rd iff the platform supports the locale in each of the categories given + # by the 2nd parameter, which is either a single category or a reference + # to a list of categories The 4th parameter is true if to reject locales + # that aren't apparently fully compatible with Perl. + my $locale = shift; my $categories = shift; my $list = shift; - return if grep { $locale eq $_ } @$list; + my $only_plays_well = shift; + + return if ! $locale || grep { $locale eq $_ } @$list; $categories = [ $categories ] unless ref $categories; + my $badutf8 = 0; + my $plays_well = 1; + + use warnings 'locale'; + + local $SIG{__WARN__} = sub { + $badutf8 = 1 if $_[0] =~ /Malformed UTF-8/; + $plays_well = 0 if $_[0] =~ /Locale .* may not work well/i + }; + foreach my $category (@$categories) { return unless setlocale($category, $locale); - } - - my $badutf8; - { - local $SIG{__WARN__} = sub { - $badutf8 = $_[0] =~ /Malformed UTF-8/; - }; + return if $only_plays_well && ! $plays_well; } if ($badutf8) { @@ -38,7 +53,7 @@ sub _trylocale { # Adds the locale given by the first parameter to the list push @$list, $locale; } -sub _decode_encodings { +sub _decode_encodings { # For use only by other functions in this file! my @enc; foreach (split(/ /, shift)) { @@ -53,6 +68,7 @@ sub _decode_encodings { push @enc, "$_.65001"; # Windows UTF-8 push @enc, "$_.ACP"; # Windows ANSI code page push @enc, "$_.OCP"; # Windows OEM code page + push @enc, "$_.1252"; # Windows } } if ($^O eq 'os390') { @@ -64,15 +80,119 @@ sub _decode_encodings { return @enc; } -sub find_locales ($) { # Returns an array of all the locales we found on the - # system. The parameter is either a single locale - # category or a reference to a list of categories to - # find valid locales for it (or in the case of - # multiple) for all of them +# LC_ALL can be -1 on some platforms. And, in fact the implementors could +# legally use any integer to represent any category. But it makes the most +# sense for them to have used small integers. Below, we create new locale +# numbers for ones missing from this machine. We make them very negative, +# hopefully more negative than anything likely to be a valid category on the +# platform, but also below is a check to be sure that our guess is valid. +my $max_bad_category_number = -1000000; + +# Initialize this hash so that it looks like e.g., +# 6 => 'CTYPE', +# where 6 is the value of &POSIX::LC_CTYPE +my %category_name; +unless ($@) { + my $number_for_missing_category = $max_bad_category_number; + foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { + my $number = eval "&POSIX::LC_$name"; + + if ($@) { + # Use a negative number (smaller than any legitimate category + # number) if the platform doesn't support this category, so we + # have an entry for all the ones that might be specified in calls + # to us. + $number = $number_for_missing_category-- if $@; + } + elsif ( $number !~ / ^ -? \d+ $ /x + || $number <= $max_bad_category_number) + { + # We think this should be an int. And it has to be larger than + # any of our synthetic numbers. + die "Unexpected locale category number '$number' for LC_$name" + } + + $category_name{$number} = "$name"; + } +} + +sub locales_enabled(;$) { + # Returns 0 if no locale handling is available on this platform; otherwise + # 1. + # + # The optional parameter is a reference to a list of individual POSIX + # locale categories. If present, this function also returns 0 if any of + # them are individually not available on this platform; otherwise 1. + # Actually, it is acceptable for the list to be just a simple scalar + # denoting a single category. + # + # If any of the individual categories specified by the optional parameter + # is all digits (and an optional leading minus), it is taken to be the C + # enum for the category (e.g., &POSIX::LC_CTYPE). Otherwise it should be + # a string name of the category, like 'LC_TIME'. The initial 'LC_' is + # optional. It is a fatal error to call this with something that isn't a + # known category + + use Config; + + return 0 unless $Config{d_setlocale} + # I (khw) cargo-culted the '?' in the pattern on the + # next line. + && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ + && $has_locale_h; + + # Done with the global possibilities. Now check if any passed in category + # is disabled. + my $categories_ref = shift; + if (defined $categories_ref) { + $categories_ref = [ $categories_ref ] if ! ref $categories_ref; + my @local_categories_copy = @$categories_ref; + for my $category_name_or_number (@local_categories_copy) { + my $name; + my $number; + if ($category_name_or_number =~ / ^ -? \d+ $ /x) { + $number = $category_name_or_number; + die "Invalid locale category number '$number'" + unless grep { $number == $_ } keys %category_name; + $name = $category_name{$number}; + } + else { + $name = $category_name_or_number; + $name =~ s/ ^ LC_ //x; + foreach my $trial (keys %category_name) { + if ($category_name{$trial} eq $name) { + $number = $trial; + last; + } + } + die "Invalid locale category name '$name'" + unless defined $number; + } + + return 0 if $number <= $max_bad_category_number + || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/; + + eval "defined &POSIX::LC_$name"; + return 0 if $@; + } + } + + return 1; +} + + +sub find_locales ($;$) { # Returns an array of all the locales we found on the + # system. If the optional 2nd parameter is + # non-zero, the list is restricted to those locales + # that play well with Perl. + # The first parameter is either a single locale + # category or a reference to a list of categories to + # find valid locales for it (or in the case of + # multiple) for all of them. my $categories = shift; + my $only_plays_well = shift // 0; - use Config;; - my $have_setlocale = $Config{d_setlocale}; + return unless locales_enabled($categories); # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" # and mingw32 uses said silly CRT @@ -80,28 +200,22 @@ sub find_locales ($) { # Returns an array of all the locales we found on the # so re-enable the tests for Windows XP onwards. my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); - $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && - $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); + return if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') + && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); # UWIN seems to loop after taint tests, just skip for now - $have_setlocale = 0 if ($^O =~ /^uwin/); - - return unless $have_setlocale; + return if ($^O =~ /^uwin/); - # Done this way in case this is 'required' in the caller before seeing if - # this is miniperl. - require POSIX; import POSIX 'locale_h'; - - _trylocale("C", $categories, \@Locale); - _trylocale("POSIX", $categories, \@Locale); + _trylocale("C", $categories, \@Locale, $only_plays_well); + _trylocale("POSIX", $categories, \@Locale, $only_plays_well); foreach (0..15) { - _trylocale("ISO8859-$_", $categories, \@Locale); - _trylocale("iso8859$_", $categories, \@Locale); - _trylocale("iso8859-$_", $categories, \@Locale); - _trylocale("iso_8859_$_", $categories, \@Locale); - _trylocale("isolatin$_", $categories, \@Locale); - _trylocale("isolatin-$_", $categories, \@Locale); - _trylocale("iso_latin_$_", $categories, \@Locale); + _trylocale("ISO8859-$_", $categories, \@Locale, $only_plays_well); + _trylocale("iso8859$_", $categories, \@Locale, $only_plays_well); + _trylocale("iso8859-$_", $categories, \@Locale, $only_plays_well); + _trylocale("iso_8859_$_", $categories, \@Locale, $only_plays_well); + _trylocale("isolatin$_", $categories, \@Locale, $only_plays_well); + _trylocale("isolatin-$_", $categories, \@Locale, $only_plays_well); + _trylocale("iso_latin_$_", $categories, \@Locale, $only_plays_well); } # Sanitize the environment so that we can run the external 'locale' @@ -122,7 +236,7 @@ sub find_locales ($) { # Returns an array of all the locales we found on the # locales will cause all IO hadles to default to (assume) utf8 next unless utf8::valid($_); chomp; - _trylocale($_, $categories, \@Locale); + _trylocale($_, $categories, \@Locale, $only_plays_well); } close(LOCALES); } elsif ($^O eq 'VMS' @@ -134,18 +248,19 @@ sub find_locales ($) { # Returns an array of all the locales we found on the opendir(LOCALES, "SYS\$I18N_LOCALE:"); while ($_ = readdir(LOCALES)) { chomp; - _trylocale($_, $categories, \@Locale); + _trylocale($_, $categories, \@Locale, $only_plays_well); } close(LOCALES); } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { - # OpenBSD doesn't have a locale executable, so reading /usr/share/locale - # is much easier and faster than the last resort method. + # OpenBSD doesn't have a locale executable, so reading + # /usr/share/locale is much easier and faster than the last resort + # method. opendir(LOCALES, '/usr/share/locale'); while ($_ = readdir(LOCALES)) { chomp; - _trylocale($_, $categories, \@Locale); + _trylocale($_, $categories, \@Locale, $only_plays_well); } close(LOCALES); } else { # Final fallback. Try our list of locales hard-coded here @@ -153,7 +268,6 @@ sub find_locales ($) { # Returns an array of all the locales we found on the # This is going to be slow. my @Data; - # Locales whose name differs if the utf8 bit is on are stored in these two # files with appropriate encodings. if ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) { @@ -170,27 +284,31 @@ sub find_locales ($) { # Returns an array of all the locales we found on the split /:/, $line; my @enc = _decode_encodings($encodings); foreach my $loc (split(/ /, $locale_name)) { - _trylocale($loc, $categories, \@Locale); + _trylocale($loc, $categories, \@Locale, $only_plays_well); foreach my $enc (@enc) { - _trylocale("$loc.$enc", $categories, \@Locale); + _trylocale("$loc.$enc", $categories, \@Locale, + $only_plays_well); } $loc = lc $loc; foreach my $enc (@enc) { - _trylocale("$loc.$enc", $categories, \@Locale); + _trylocale("$loc.$enc", $categories, \@Locale, + $only_plays_well); } } foreach my $lang (split(/ /, $language_codes)) { - _trylocale($lang, $categories, \@Locale); + _trylocale($lang, $categories, \@Locale, $only_plays_well); foreach my $country (split(/ /, $country_codes)) { my $lc = "${lang}_${country}"; - _trylocale($lc, $categories, \@Locale); + _trylocale($lc, $categories, \@Locale, $only_plays_well); foreach my $enc (@enc) { - _trylocale("$lc.$enc", $categories, \@Locale); + _trylocale("$lc.$enc", $categories, \@Locale, + $only_plays_well); } my $lC = "${lang}_\U${country}"; - _trylocale($lC, $categories, \@Locale); + _trylocale($lC, $categories, \@Locale, $only_plays_well); foreach my $enc (@enc) { - _trylocale("$lC.$enc", $categories, \@Locale); + _trylocale("$lC.$enc", $categories, \@Locale, + $only_plays_well); } } } @@ -200,15 +318,20 @@ sub find_locales ($) { # Returns an array of all the locales we found on the @Locale = sort @Locale; return @Locale; - - } sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input # is a UTF-8 locale + + # On z/OS, even locales marked as UTF-8 aren't. + return 0 if ord "A" != 65; + + return 0 unless locales_enabled('LC_CTYPE'); + my $locale = shift; use locale; + no warnings 'locale'; # We may be trying out a weird locale my $save_locale = setlocale(&POSIX::LC_CTYPE()); if (! $save_locale) { @@ -243,16 +366,21 @@ sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input return $ret; } -sub find_utf8_ctype_locale (;$) { # Return the name of locale that core Perl +sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl # thinks is a UTF-8 LC_CTYPE locale. # Optional parameter is a reference to a # list of locales to try; if omitted, this # tries all locales it can find on the # platform + return unless locales_enabled('LC_CTYPE'); + my $locales_ref = shift; - return if !defined &POSIX::LC_CTYPE; + if (! defined $locales_ref) { - my @locales = find_locales(&POSIX::LC_CTYPE()); + + my @locales = find_locales(&POSIX::LC_CTYPE(), + 1 # Reject iffy locales. + ); $locales_ref = \@locales; }