X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2cc6a9db5a91276f3ff662b3e5befa6799fde3ed..caf06cdc26767e627e2fd43f5ba3280a3ebcf760:/t/loc_tools.pl diff --git a/t/loc_tools.pl b/t/loc_tools.pl index ae8ff64..4b99c45 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -8,17 +8,70 @@ # anyway later during the scanning process (and besides, some clueless # vendor might have them capitalized erroneously anyway). -sub _trylocale ($$$$) { # Adds the locale given by the first parameter to the - # list given by the 3rd iff the platform supports the - # locale in each of the categories given by the 2nd - # parameter, which is either a single category or a - # reference to a list of categories - # The 4th parameter is true if to reject locales that - # aren't apparently fully compatible with Perl. +# Functions whose names begin with underscore are internal helper functions +# for this file, and are not to be used by outside callers. + +use strict; + +eval { require POSIX; import POSIX 'locale_h'; }; +my $has_locale_h = ! $@; + +# LC_ALL can be -1 on some platforms. And, in fact the implementors could +# legally use any integer to represent any category. But it makes the most +# sense for them to have used small integers. Below, we create new locale +# numbers for ones missing from this machine. We make them very negative, +# hopefully more negative than anything likely to be a valid category on the +# platform, but also below is a check to be sure that our guess is valid. +my $max_bad_category_number = -1000000; + +# Initialize this hash so that it looks like e.g., +# 6 => 'CTYPE', +# where 6 is the value of &POSIX::LC_CTYPE +my %category_name; +my %category_number; +unless ($@) { + my $number_for_missing_category = $max_bad_category_number; + foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { + my $number = eval "&POSIX::LC_$name"; + + if ($@) { + # Use a negative number (smaller than any legitimate category + # number) if the platform doesn't support this category, so we + # have an entry for all the ones that might be specified in calls + # to us. + $number = $number_for_missing_category-- if $@; + } + elsif ( $number !~ / ^ -? \d+ $ /x + || $number <= $max_bad_category_number) + { + # We think this should be an int. And it has to be larger than + # any of our synthetic numbers. + die "Unexpected locale category number '$number' for LC_$name" + } + + $category_name{$number} = "$name"; + $category_number{$name} = $number; + } +} + +sub _trylocale ($$$$) { # For use only by other functions in this file! + + # Adds the locale given by the first parameter to the list given by the + # 3rd iff the platform supports the locale in each of the category numbers + # given by the 2nd parameter, which is either a single category or a + # reference to a list of categories. The list MUST be sorted so that + # CTYPE is first, COLLATE is last unless ALL is present, in which case + # that comes after COLLATE. This is because locale.c detects bad locales + # only with CTYPE, and COLLATE on some platforms can core dump if it is a + # bad locale. + # + # The 4th parameter is true if to accept locales that aren't apparently + # fully compatible with Perl. + my $locale = shift; my $categories = shift; my $list = shift; - my $only_plays_well = shift; + my $allow_incompatible = shift; return if ! $locale || grep { $locale eq $_ } @$list; @@ -30,23 +83,29 @@ sub _trylocale ($$$$) { # Adds the locale given by the first parameter to the use warnings 'locale'; local $SIG{__WARN__} = sub { - $badutf8 = 1 if $_[0] =~ /Malformed UTF-8/; - $plays_well = 0 if $_[0] =~ /Locale .* may not work well/i + $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; + $plays_well = 0 if grep { /Locale .* may not work well/i } @_; }; + # Incompatible locales aren't warned about unless using locales. + use locale; + foreach my $category (@$categories) { + die "category '$category' must instead be a number" + unless $category =~ / ^ -? \d+ $ /x; + return unless setlocale($category, $locale); - return if $only_plays_well && ! $plays_well; + last if $badutf8 || ! $plays_well; } if ($badutf8) { ok(0, "Verify locale name doesn't contain malformed utf8"); return; } - push @$list, $locale; + push @$list, $locale if $plays_well || $allow_incompatible; } -sub _decode_encodings { +sub _decode_encodings { # For use only by other functions in this file! my @enc; foreach (split(/ /, shift)) { @@ -73,83 +132,137 @@ sub _decode_encodings { return @enc; } -# Initialize this hash so that it looks like e.g., -# 6 => 'CTYPE', -# where 6 is the value of &POSIX::LC_CTYPE -my %category_name; -eval { require POSIX; import POSIX 'locale_h'; }; -unless ($@) { - my $number_for_missing_category = 0; - foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { - my $number = eval "&POSIX::LC_$name"; - - # Use a negative number if the platform doesn't support this category, - # so we have an entry for all ones that might be specified in calls to - # us. - $number = --$number_for_missing_category if $@; - - $category_name{$number} = "$name"; - } -} - sub locales_enabled(;$) { # Returns 0 if no locale handling is available on this platform; otherwise # 1. # # The optional parameter is a reference to a list of individual POSIX - # locale categories. If present, this function also returns 0 if any of - # them are individually not available on this platform; otherwise 1. - # Actually, it is acceptable for the list to be just a simple scalar - # denoting a single category. + # locale categories. If any of the individual categories specified by the + # optional parameter is all digits (and an optional leading minus), it is + # taken to be the C enum for the category (e.g., &POSIX::LC_CTYPE). + # Otherwise it should be a string name of the category, like 'LC_TIME'. + # The initial 'LC_' is optional. It is a fatal error to call this with + # something that isn't a known category to the platform. # - # If any of the individual categories specified by the optional parameter - # is all digits, it is taken to be the C enum for the category (e.g., - # &POSIX::LC_CTYPE). Otherwise it should be a string name of the - # category, like 'LC_TIME'. The initial 'LC_' is optional. It is a fatal - # error to call this with something that isn't a known category + # This optional parameter denotes which POSIX locale categories must be + # available on the platform. If any aren't available, this function + # returns 0; otherwise it returns 1 and changes the list for the caller so + # that any category names are converted into their equivalent numbers, and + # sorts it to match the expectations of _trylocale. + # + # It is acceptable for the second parameter to be just a simple scalar + # denoting a single category (either name or number). No conversion into + # a number is done in this case. - use Config;; + use Config; return 0 unless $Config{d_setlocale} # I (khw) cargo-culted the '?' in the pattern on the # next line. - && $Config{ccflags} !~ /\bD?NO_LOCALE\b/; + && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ + && $has_locale_h; # Done with the global possibilities. Now check if any passed in category # is disabled. + my $categories_ref = shift; - $categories_ref = [ $categories_ref ] if ! ref $categories_ref; - for my $category (@$categories_ref) { - if ($category =~ /\D/) { - $category =~ s/ ^ LC_ //x; - die "Invalid locale category name '$category'" - unless grep { $category eq $_ } values %category_name; + my $return_categories_numbers = 0; + my @categories_numbers; + my $has_LC_ALL = 0; + my $has_LC_COLLATE = 0; + + if (defined $categories_ref) { + my @local_categories_copy; + + if (ref $categories_ref) { + @local_categories_copy = @$$categories_ref; + $return_categories_numbers = 1; } - else { - die "Invalid locale category number '$category'" - unless grep { $category == $_ } keys %category_name; - $category = $category_name{$category}; + else { # Single category passed in + @local_categories_copy = $categories_ref; } - return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE_$category\b/; + for my $category_name_or_number (@local_categories_copy) { + my $name; + my $number; + if ($category_name_or_number =~ / ^ -? \d+ $ /x) { + $number = $category_name_or_number; + die "Invalid locale category number '$number'" + unless grep { $number == $_ } keys %category_name; + $name = $category_name{$number}; + } + else { + $name = $category_name_or_number; + $name =~ s/ ^ LC_ //x; + foreach my $trial (keys %category_name) { + if ($category_name{$trial} eq $name) { + $number = $trial; + last; + } + } + die "Invalid locale category name '$name'" + unless defined $number; + } + + return 0 if $number <= $max_bad_category_number + || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/; + + eval "defined &POSIX::LC_$name"; + return 0 if $@; + + if ($return_categories_numbers) { + if ($name eq 'CTYPE') { + unshift @categories_numbers, $number; # Always first + } + elsif ($name eq 'ALL') { + $has_LC_ALL = 1; + } + elsif ($name eq 'COLLATE') { + $has_LC_COLLATE = 1; + } + else { + push @categories_numbers, $number; + } + } + } + } + + if ($return_categories_numbers) { + + # COLLATE comes after all other locales except ALL, which comes last + if ($has_LC_COLLATE) { + push @categories_numbers, $category_number{'COLLATE'}; + } + if ($has_LC_ALL) { + push @categories_numbers, $category_number{'ALL'}; + } + $$categories_ref = \@categories_numbers; } return 1; } -sub find_locales ($;$) { # Returns an array of all the locales we found on the - # system. If the optional 2nd parameter is - # non-zero, the list is restricted to those locales - # that play well with Perl. - # The first parameter is either a single locale - # category or a reference to a list of categories to - # find valid locales for it (or in the case of - # multiple) for all of them. +sub find_locales ($;$) { + + # Returns an array of all the locales we found on the system. If the + # optional 2nd parameter is non-zero, the list includes all found locales; + # otherwise it is restricted to those locales that play well with Perl, as + # far as we can easily determine. + # + # The first parameter is either a single locale category or a reference to + # a list of categories to find valid locales for it (or in the case of + # multiple) for all of them. Each category can be a name (like 'LC_ALL' + # or simply 'ALL') or the C enum value for the category. + my $categories = shift; - my $only_plays_well = shift // 0; + my $allow_incompatible = shift // 0; + + $categories = [ $categories ] unless ref $categories; + return unless locales_enabled(\$categories); - return unless locales_enabled($categories); + # Note, the subroutine call above converts the $categories into a form + # suitable for _trylocale(). # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" # and mingw32 uses said silly CRT @@ -163,23 +276,17 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the # UWIN seems to loop after taint tests, just skip for now return if ($^O =~ /^uwin/); - # Done this way in case this is 'required' in the caller before seeing if - # this is miniperl. - eval { require POSIX; import POSIX 'locale_h'; }; - unless (defined &POSIX::LC_CTYPE) { - return; - } - - _trylocale("C", $categories, \@Locale, $only_plays_well); - _trylocale("POSIX", $categories, \@Locale, $only_plays_well); + my @Locale; + _trylocale("C", $categories, \@Locale, $allow_incompatible); + _trylocale("POSIX", $categories, \@Locale, $allow_incompatible); foreach (0..15) { - _trylocale("ISO8859-$_", $categories, \@Locale, $only_plays_well); - _trylocale("iso8859$_", $categories, \@Locale, $only_plays_well); - _trylocale("iso8859-$_", $categories, \@Locale, $only_plays_well); - _trylocale("iso_8859_$_", $categories, \@Locale, $only_plays_well); - _trylocale("isolatin$_", $categories, \@Locale, $only_plays_well); - _trylocale("isolatin-$_", $categories, \@Locale, $only_plays_well); - _trylocale("iso_latin_$_", $categories, \@Locale, $only_plays_well); + _trylocale("ISO8859-$_", $categories, \@Locale, $allow_incompatible); + _trylocale("iso8859$_", $categories, \@Locale, $allow_incompatible); + _trylocale("iso8859-$_", $categories, \@Locale, $allow_incompatible); + _trylocale("iso_8859_$_", $categories, \@Locale, $allow_incompatible); + _trylocale("isolatin$_", $categories, \@Locale, $allow_incompatible); + _trylocale("isolatin-$_", $categories, \@Locale, $allow_incompatible); + _trylocale("iso_latin_$_", $categories, \@Locale, $allow_incompatible); } # Sanitize the environment so that we can run the external 'locale' @@ -192,7 +299,7 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; if (-x "/usr/bin/locale" - && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) + && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) { while () { # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which @@ -200,7 +307,7 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the # locales will cause all IO hadles to default to (assume) utf8 next unless utf8::valid($_); chomp; - _trylocale($_, $categories, \@Locale, $only_plays_well); + _trylocale($_, $categories, \@Locale, $allow_incompatible); } close(LOCALES); } elsif ($^O eq 'VMS' @@ -212,18 +319,19 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the opendir(LOCALES, "SYS\$I18N_LOCALE:"); while ($_ = readdir(LOCALES)) { chomp; - _trylocale($_, $categories, \@Locale, $only_plays_well); + _trylocale($_, $categories, \@Locale, $allow_incompatible); } close(LOCALES); } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { - # OpenBSD doesn't have a locale executable, so reading /usr/share/locale - # is much easier and faster than the last resort method. + # OpenBSD doesn't have a locale executable, so reading + # /usr/share/locale is much easier and faster than the last resort + # method. opendir(LOCALES, '/usr/share/locale'); while ($_ = readdir(LOCALES)) { chomp; - _trylocale($_, $categories, \@Locale, $only_plays_well); + _trylocale($_, $categories, \@Locale, $allow_incompatible); } close(LOCALES); } else { # Final fallback. Try our list of locales hard-coded here @@ -247,31 +355,31 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the split /:/, $line; my @enc = _decode_encodings($encodings); foreach my $loc (split(/ /, $locale_name)) { - _trylocale($loc, $categories, \@Locale, $only_plays_well); + _trylocale($loc, $categories, \@Locale, $allow_incompatible); foreach my $enc (@enc) { _trylocale("$loc.$enc", $categories, \@Locale, - $only_plays_well); + $allow_incompatible); } $loc = lc $loc; foreach my $enc (@enc) { _trylocale("$loc.$enc", $categories, \@Locale, - $only_plays_well); + $allow_incompatible); } } foreach my $lang (split(/ /, $language_codes)) { - _trylocale($lang, $categories, \@Locale, $only_plays_well); + _trylocale($lang, $categories, \@Locale, $allow_incompatible); foreach my $country (split(/ /, $country_codes)) { my $lc = "${lang}_${country}"; - _trylocale($lc, $categories, \@Locale, $only_plays_well); + _trylocale($lc, $categories, \@Locale, $allow_incompatible); foreach my $enc (@enc) { _trylocale("$lc.$enc", $categories, \@Locale, - $only_plays_well); + $allow_incompatible); } my $lC = "${lang}_\U${country}"; - _trylocale($lC, $categories, \@Locale, $only_plays_well); + _trylocale($lC, $categories, \@Locale, $allow_incompatible); foreach my $enc (@enc) { _trylocale("$lC.$enc", $categories, \@Locale, - $only_plays_well); + $allow_incompatible); } } } @@ -289,9 +397,7 @@ sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input # On z/OS, even locales marked as UTF-8 aren't. return 0 if ord "A" != 65; - eval { require POSIX; import POSIX 'locale_h'; }; - return 0 if ! defined &POSIX::LC_CTYPE; - return 0 if ! locales_enabled('LC_CTYPE'); + return 0 unless locales_enabled('LC_CTYPE'); my $locale = shift; @@ -337,15 +443,13 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl # list of locales to try; if omitted, this # tries all locales it can find on the # platform + return unless locales_enabled('LC_CTYPE'); + my $locales_ref = shift; if (! defined $locales_ref) { - eval { require POSIX; import POSIX 'locale_h'; }; - return if ! defined &POSIX::LC_CTYPE; - my @locales = find_locales(&POSIX::LC_CTYPE(), - 1 # Reject iffy locales. - ); + my @locales = find_locales(&POSIX::LC_CTYPE()); $locales_ref = \@locales; }