X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2c6c88ec63f27f7733250293ba1c0b4621f88bce..36dbc955f3cb37b884c4e6b5e96cd676aef3d047:/t/loc_tools.pl diff --git a/t/loc_tools.pl b/t/loc_tools.pl index 0f782f8..5d11d06 100644 --- a/t/loc_tools.pl +++ b/t/loc_tools.pl @@ -1,6 +1,5 @@ -# Common tools for test files files to find the locales which exist on the -# system. Caller should have defined ok() for the unlikely event that setup -# here fails, and should have verified that this isn't miniperl before calling +# Common tools for test files to find the locales which exist on the +# system. Caller should have verified that this isn't miniperl before calling # the functions. # Note that it's okay that some languages have their native names @@ -11,16 +10,94 @@ # Functions whose names begin with underscore are internal helper functions # for this file, and are not to be used by outside callers. +use Config; +use strict; + eval { require POSIX; import POSIX 'locale_h'; }; -$has_locale_h = ! $@; +my $has_locale_h = ! $@; + +my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY + LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION + LC_MEASUREMENT LC_PAPER LC_TELEPHONE LC_SYNTAX + LC_TOD)); +my @platform_categories; + +# LC_ALL can be -1 on some platforms. And, in fact the implementors could +# legally use any integer to represent any category. But it makes the most +# sense for them to have used small integers. Below, we create new locale +# numbers for ones missing from this machine. We make them very negative, +# hopefully more negative than anything likely to be a valid category on the +# platform, but also below is a check to be sure that our guess is valid. +my $max_bad_category_number = -1000000; + +# Initialize this hash so that it looks like e.g., +# 6 => 'CTYPE', +# where 6 is the value of &POSIX::LC_CTYPE +my %category_name; +my %category_number; +if ($has_locale_h) { + my $number_for_missing_category = $max_bad_category_number; + foreach my $name (@known_categories) { + my $number = eval "&POSIX::$name"; + if ($@) { + # Use a negative number (smaller than any legitimate category + # number) if the platform doesn't support this category, so we + # have an entry for all the ones that might be specified in calls + # to us. + $number = $number_for_missing_category-- if $@; + } + elsif ( $number !~ / ^ -? \d+ $ /x + || $number <= $max_bad_category_number) + { + # We think this should be an int. And it has to be larger than + # any of our synthetic numbers. + die "Unexpected locale category number '$number' for $name" + } + else { + push @platform_categories, $name; + } + + $name =~ s/LC_//; + $category_name{$number} = "$name"; + $category_number{$name} = $number; + } +} + +sub _my_diag($) { + my $message = shift; + if (defined &main::diag) { + diag($message); + } + else { + local($\, $", $,) = (undef, ' ', ''); + print STDERR $message, "\n"; + } +} + +sub _my_fail($) { + my $message = shift; + if (defined &main::fail) { + fail($message); + } + else { + local($\, $", $,) = (undef, ' ', ''); + print "not ok 0 $message\n"; + } +} sub _trylocale ($$$$) { # For use only by other functions in this file! # Adds the locale given by the first parameter to the list given by the - # 3rd iff the platform supports the locale in each of the categories given - # by the 2nd parameter, which is either a single category or a reference - # to a list of categories The 4th parameter is true if to accept locales - # that aren't apparently fully compatible with Perl. + # 3rd iff the platform supports the locale in each of the category numbers + # given by the 2nd parameter, which is either a single category or a + # reference to a list of categories. The list MUST be sorted so that + # CTYPE is first, COLLATE is last unless ALL is present, in which case + # that comes after COLLATE. This is because locale.c detects bad locales + # only with CTYPE, and COLLATE on some platforms can core dump if it is a + # bad locale. + # + # The 4th parameter is true if to accept locales that aren't apparently + # fully compatible with Perl. my $locale = shift; my $categories = shift; @@ -29,6 +106,19 @@ sub _trylocale ($$$$) { # For use only by other functions in this file! return if ! $locale || grep { $locale eq $_ } @$list; + # This is a toy (pig latin) locale that is not fully implemented on some + # systems + return if $locale =~ / ^ pig $ /ix; + + # Certain platforms have a crippled locale system in which setlocale + # returns success for just about any possible locale name, but if anything + # actually happens as a result of the call, it is that the underlying + # locale is set to a system default, likely C or C.UTF-8. We can't test + # such systems fully, but we shouldn't disable the user from using + # locales, as it may work out for them (or not). + return if defined $Config{d_setlocale_accepts_any_locale_name} + && $locale !~ / ^ (?: C | POSIX | C\.UTF-8 ) $/ix; + $categories = [ $categories ] unless ref $categories; my $badutf8 = 0; @@ -37,23 +127,29 @@ sub _trylocale ($$$$) { # For use only by other functions in this file! use warnings 'locale'; local $SIG{__WARN__} = sub { - $badutf8 = 1 if $_[0] =~ /Malformed UTF-8/; - $plays_well = 0 if $_[0] =~ /Locale .* may not work well/i + $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; + $plays_well = 0 if grep { + /Locale .* may not work well(?# + )|The Perl program will use the expected meanings/i + } @_; }; # Incompatible locales aren't warned about unless using locales. use locale; foreach my $category (@$categories) { + die "category '$category' must instead be a number" + unless $category =~ / ^ -? \d+ $ /x; + return unless setlocale($category, $locale); - return if ! $plays_well && ! $allow_incompatible; + last if $badutf8 || ! $plays_well; } if ($badutf8) { - ok(0, "Verify locale name doesn't contain malformed utf8"); + _my_fail("Verify locale name doesn't contain malformed utf8"); return; } - push @$list, $locale; + push @$list, $locale if $plays_well || $allow_incompatible; } sub _decode_encodings { # For use only by other functions in this file! @@ -83,40 +179,11 @@ sub _decode_encodings { # For use only by other functions in this file! return @enc; } -# LC_ALL can be -1 on some platforms. And, in fact the implementors could -# legally use any integer to represent any category. But it makes the most -# sense for them to have used small integers. Below, we create new locale -# numbers for ones missing from this machine. We make them very negative, -# hopefully more negative than anything likely to be a valid category on the -# platform, but also below is a check to be sure that our guess is valid. -my $max_bad_category_number = -1000000; - -# Initialize this hash so that it looks like e.g., -# 6 => 'CTYPE', -# where 6 is the value of &POSIX::LC_CTYPE -my %category_name; -unless ($@) { - my $number_for_missing_category = $max_bad_category_number; - foreach my $name (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { - my $number = eval "&POSIX::LC_$name"; - - if ($@) { - # Use a negative number (smaller than any legitimate category - # number) if the platform doesn't support this category, so we - # have an entry for all the ones that might be specified in calls - # to us. - $number = $number_for_missing_category-- if $@; - } - elsif ( $number !~ / ^ -? \d+ $ /x - || $number <= $max_bad_category_number) - { - # We think this should be an int. And it has to be larger than - # any of our synthetic numbers. - die "Unexpected locale category number '$number' for LC_$name" - } +sub valid_locale_categories() { + # Returns a list of the locale categories (expressed as strings, like + # "LC_ALL) known to this program that are available on this platform. - $category_name{$number} = "$name"; - } + return @platform_categories; } sub locales_enabled(;$) { @@ -124,32 +191,58 @@ sub locales_enabled(;$) { # 1. # # The optional parameter is a reference to a list of individual POSIX - # locale categories. If present, this function also returns 0 if any of - # them are individually not available on this platform; otherwise 1. - # Actually, it is acceptable for the list to be just a simple scalar - # denoting a single category. + # locale categories. If any of the individual categories specified by the + # optional parameter is all digits (and an optional leading minus), it is + # taken to be the C enum for the category (e.g., &POSIX::LC_CTYPE). + # Otherwise it should be a string name of the category, like 'LC_TIME'. + # The initial 'LC_' is optional. It is a fatal error to call this with + # something that isn't a known category to this file. # - # If any of the individual categories specified by the optional parameter - # is all digits (and an optional leading minus), it is taken to be the C - # enum for the category (e.g., &POSIX::LC_CTYPE). Otherwise it should be - # a string name of the category, like 'LC_TIME'. The initial 'LC_' is - # optional. It is a fatal error to call this with something that isn't a - # known category - - use Config; - - return 0 unless $Config{d_setlocale} - # I (khw) cargo-culted the '?' in the pattern on the - # next line. - && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ - && $has_locale_h; + # This optional parameter denotes which POSIX locale categories must be + # available on the platform. If any aren't available, this function + # returns 0; otherwise it returns 1 and changes the list for the caller so + # that any category names are converted into their equivalent numbers, and + # sorts it to match the expectations of _trylocale. + # + # It is acceptable for the second parameter to be just a simple scalar + # denoting a single category (either name or number). No conversion into + # a number is done in this case. + + # khw cargo-culted the '?' in the pattern on the next line. + return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/; + + # If we can't load the POSIX XS module, we can't have locales even if they + # normally would be available + return 0 if ! defined &DynaLoader::boot_DynaLoader; + + if (! $Config{d_setlocale}) { + return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/; + return 0 unless $Config{d_newlocale}; + return 0 unless $Config{d_uselocale}; + return 0 unless $Config{d_duplocale}; + return 0 unless $Config{d_freelocale}; + } # Done with the global possibilities. Now check if any passed in category # is disabled. + my $categories_ref = shift; + my $return_categories_numbers = 0; + my @categories_numbers; + my $has_LC_ALL = 0; + my $has_LC_COLLATE = 0; + if (defined $categories_ref) { - $categories_ref = [ $categories_ref ] if ! ref $categories_ref; - my @local_categories_copy = @$categories_ref; + my @local_categories_copy; + + if (ref $categories_ref) { + @local_categories_copy = @$$categories_ref; + $return_categories_numbers = 1; + } + else { # Single category passed in + @local_categories_copy = $categories_ref; + } + for my $category_name_or_number (@local_categories_copy) { my $name; my $number; @@ -177,27 +270,60 @@ sub locales_enabled(;$) { eval "defined &POSIX::LC_$name"; return 0 if $@; + + if ($return_categories_numbers) { + if ($name eq 'CTYPE') { + unshift @categories_numbers, $number; # Always first + } + elsif ($name eq 'ALL') { + $has_LC_ALL = 1; + } + elsif ($name eq 'COLLATE') { + $has_LC_COLLATE = 1; + } + else { + push @categories_numbers, $number; + } + } + } + } + + if ($return_categories_numbers) { + + # COLLATE comes after all other locales except ALL, which comes last + if ($has_LC_COLLATE) { + push @categories_numbers, $category_number{'COLLATE'}; + } + if ($has_LC_ALL) { + push @categories_numbers, $category_number{'ALL'}; } + $$categories_ref = \@categories_numbers; } return 1; } -sub find_locales ($;$) { # Returns an array of all the locales we found on the - # system. If the optional 2nd parameter is - # non-zero, the list includes all found locales; - # otherwise it is restricted to those locales - # that play well with Perl, as far as we can - # easily determine. - # The first parameter is either a single locale - # category or a reference to a list of categories to - # find valid locales for it (or in the case of - # multiple) for all of them. +sub find_locales ($;$) { + + # Returns an array of all the locales we found on the system. If the + # optional 2nd parameter is non-zero, the list includes all found locales; + # otherwise it is restricted to those locales that play well with Perl, as + # far as we can easily determine. + # + # The first parameter is either a single locale category or a reference to + # a list of categories to find valid locales for it (or in the case of + # multiple) for all of them. Each category can be a name (like 'LC_ALL' + # or simply 'ALL') or the C enum value for the category. + my $categories = shift; my $allow_incompatible = shift // 0; - return unless locales_enabled($categories); + $categories = [ $categories ] unless ref $categories; + return unless locales_enabled(\$categories); + + # Note, the subroutine call above converts the $categories into a form + # suitable for _trylocale(). # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" # and mingw32 uses said silly CRT @@ -211,9 +337,19 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the # UWIN seems to loop after taint tests, just skip for now return if ($^O =~ /^uwin/); + my @Locale; _trylocale("C", $categories, \@Locale, $allow_incompatible); _trylocale("POSIX", $categories, \@Locale, $allow_incompatible); - foreach (0..15) { + + if ($Config{d_has_C_UTF8} && $Config{d_has_C_UTF8} eq 'true') { + _trylocale("C.UTF-8", $categories, \@Locale, $allow_incompatible); + } + + # There's no point in looking at anything more if we know that setlocale + # will return success on any garbage or non-garbage name. + return sort @Locale if defined $Config{d_setlocale_accepts_any_locale_name}; + + foreach (1..16) { _trylocale("ISO8859-$_", $categories, \@Locale, $allow_incompatible); _trylocale("iso8859$_", $categories, \@Locale, $allow_incompatible); _trylocale("iso8859-$_", $categories, \@Locale, $allow_incompatible); @@ -233,7 +369,7 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; if (-x "/usr/bin/locale" - && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) + && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) { while () { # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which @@ -273,20 +409,27 @@ sub find_locales ($;$) { # Returns an array of all the locales we found on the # This is going to be slow. my @Data; - # Locales whose name differs if the utf8 bit is on are stored in these two - # files with appropriate encodings. - if ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) { - @Data = do "lib/locale/utf8"; - } else { - @Data = do "lib/locale/latin1"; + # Locales whose name differs if the utf8 bit is on are stored in these + # two files with appropriate encodings. + my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) + ? _source_location() . "/lib/locale/utf8" + : _source_location() . "/lib/locale/latin1"; + if (-e $data_file) { + @Data = do $data_file; + } + else { + _my_diag(__FILE__ . ":" . __LINE__ . ": '$data_file' doesn't exist"); } # The rest of the locales are in this file. - push @Data, ; + push @Data, ; close DATA; foreach my $line (@Data) { + chomp $line; my ($locale_name, $language_codes, $country_codes, $encodings) = split /:/, $line; + _my_diag(__FILE__ . ":" . __LINE__ . ": Unexpected syntax in '$line'") + unless defined $locale_name; my @enc = _decode_encodings($encodings); foreach my $loc (split(/ /, $locale_name)) { _trylocale($loc, $categories, \@Locale, $allow_incompatible); @@ -371,8 +514,8 @@ sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input return $ret; } -sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl - # thinks is a UTF-8 LC_CTYPE locale. +sub find_utf8_ctype_locales (;$) { # Return the names of the locales that core + # Perl thinks are UTF-8 LC_CTYPE locales. # Optional parameter is a reference to a # list of locales to try; if omitted, this # tries all locales it can find on the @@ -380,6 +523,7 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl return unless locales_enabled('LC_CTYPE'); my $locales_ref = shift; + my @return; if (! defined $locales_ref) { @@ -388,12 +532,82 @@ sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl } foreach my $locale (@$locales_ref) { - return $locale if is_locale_utf8($locale); + push @return, $locale if is_locale_utf8($locale); + } + + return @return; +} + + +sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl + # thinks is a UTF-8 LC_CTYPE non-turkic + # locale. + # Optional parameter is a reference to a + # list of locales to try; if omitted, this + # tries all locales it can find on the + # platform + my $try_locales_ref = shift; + + my @utf8_locales = find_utf8_ctype_locales($try_locales_ref); + my @turkic_locales = find_utf8_turkic_locales($try_locales_ref); + + my %seen_turkic; + + # Create undef elements in the hash for turkic locales + @seen_turkic{@turkic_locales} = (); + + foreach my $locale (@utf8_locales) { + return $locale unless exists $seen_turkic{$locale}; } return; } +sub find_utf8_turkic_locales (;$) { + + # Return the name of all the locales that core Perl thinks are UTF-8 + # Turkic LC_CTYPE. Optional parameter is a reference to a list of locales + # to try; if omitted, this tries all locales it can find on the platform + + my @return; + + return unless locales_enabled('LC_CTYPE'); + + my $save_locale = setlocale(&POSIX::LC_CTYPE()); + foreach my $locale (find_utf8_ctype_locales(shift)) { + use locale; + setlocale(&POSIX::LC_CTYPE(), $locale); + push @return, $locale if uc('i') eq "\x{130}"; + } + setlocale(&POSIX::LC_CTYPE(), $save_locale); + + return @return; +} + +sub find_utf8_turkic_locale (;$) { + my @turkics = find_utf8_turkic_locales(shift); + + return unless @turkics; + return $turkics[0] +} + + +# returns full path to the directory containing the current source +# file, inspired by mauke's Dir::Self +sub _source_location { + require File::Spec; + + my $caller_filename = (caller)[1]; + + my $loc = File::Spec->rel2abs( + File::Spec->catpath( + (File::Spec->splitpath($caller_filename))[0, 1], '' + ) + ); + + return ($loc =~ /^(.*)$/)[0]; # untaint +} + 1 # Format of data is: locale_name, language_codes, country_codes, encodings