Commit | Line | Data |
---|---|---|
73fc293b KW |
1 | # Common tools for test files files to find the locales which exist on the |
2 | # system. | |
3 | ||
4 | # Note that it's okay that some languages have their native names | |
5 | # capitalized here even though that's not "right". They are lowercased | |
6 | # anyway later during the scanning process (and besides, some clueless | |
7 | # vendor might have them capitalized erroneously anyway). | |
8 | ||
9 | ||
10 | sub _trylocale { # Adds the locale given by the first parameter to the list | |
11 | # given by the 2nd iff the platform supports the locale, | |
12 | # and it is not already on the list | |
13 | my $locale = shift; | |
14 | my $list = shift; | |
15 | return if grep { $locale eq $_ } @$list; | |
16 | return unless setlocale(&POSIX::LC_ALL, $locale); | |
17 | my $badutf8; | |
18 | { | |
19 | local $SIG{__WARN__} = sub { | |
20 | $badutf8 = $_[0] =~ /Malformed UTF-8/; | |
21 | }; | |
22 | } | |
23 | ||
24 | if ($badutf8) { | |
25 | ok(0, "Verify locale name doesn't contain malformed utf8"); | |
26 | return; | |
27 | } | |
28 | push @$list, $locale; | |
29 | } | |
30 | ||
31 | sub _decode_encodings { | |
32 | my @enc; | |
33 | ||
34 | foreach (split(/ /, shift)) { | |
35 | if (/^(\d+)$/) { | |
36 | push @enc, "ISO8859-$1"; | |
37 | push @enc, "iso8859$1"; # HP | |
38 | if ($1 eq '1') { | |
39 | push @enc, "roman8"; # HP | |
40 | } | |
41 | push @enc, $_; | |
42 | push @enc, "$_.UTF-8"; | |
d646bffe KW |
43 | push @enc, "$_.65001"; # Windows UTF-8 |
44 | push @enc, "$_.ACP"; # Windows ANSI code page | |
45 | push @enc, "$_.OCP"; # Windows OEM code page | |
73fc293b KW |
46 | } |
47 | } | |
48 | if ($^O eq 'os390') { | |
49 | push @enc, qw(IBM-037 IBM-819 IBM-1047); | |
50 | } | |
51 | push @enc, "UTF-8"; | |
d646bffe | 52 | push @enc, "65001"; # Windows UTF-8 |
73fc293b KW |
53 | |
54 | return @enc; | |
55 | } | |
56 | ||
57 | sub find_locales { # Returns an array of all the locales we found on the | |
58 | # system | |
59 | ||
d369fd5b KW |
60 | |
61 | my $have_setlocale = 0; | |
62 | eval { | |
63 | require POSIX; | |
64 | import POSIX ':locale_h'; | |
65 | $have_setlocale++; | |
66 | }; | |
67 | ||
68 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" | |
69 | # and mingw32 uses said silly CRT | |
70 | # This doesn't seem to be an issue any more, at least on Windows XP, | |
71 | # so re-enable the tests for Windows XP onwards. | |
72 | my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && | |
73 | join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); | |
74 | $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && | |
75 | $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); | |
76 | ||
77 | # UWIN seems to loop after taint tests, just skip for now | |
78 | $have_setlocale = 0 if ($^O =~ /^uwin/); | |
79 | ||
80 | return unless $have_setlocale; | |
81 | ||
73fc293b KW |
82 | _trylocale("C", \@Locale); |
83 | _trylocale("POSIX", \@Locale); | |
84 | foreach (0..15) { | |
85 | _trylocale("ISO8859-$_", \@Locale); | |
86 | _trylocale("iso8859$_", \@Locale); | |
87 | _trylocale("iso8859-$_", \@Locale); | |
88 | _trylocale("iso_8859_$_", \@Locale); | |
89 | _trylocale("isolatin$_", \@Locale); | |
90 | _trylocale("isolatin-$_", \@Locale); | |
91 | _trylocale("iso_latin_$_", \@Locale); | |
92 | } | |
93 | ||
94 | # Sanitize the environment so that we can run the external 'locale' | |
95 | # program without the taint mode getting grumpy. | |
96 | ||
97 | # $ENV{PATH} is special in VMS. | |
98 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; | |
99 | ||
100 | # Other subversive stuff. | |
101 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; | |
102 | ||
103 | if (-x "/usr/bin/locale" | |
104 | && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) | |
105 | { | |
106 | while (<LOCALES>) { | |
107 | # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which | |
108 | # ain't great when we're running this testPERL_UNICODE= so that utf8 | |
109 | # locales will cause all IO hadles to default to (assume) utf8 | |
110 | next unless utf8::valid($_); | |
111 | chomp; | |
112 | _trylocale($_, \@Locale); | |
113 | } | |
114 | close(LOCALES); | |
115 | } elsif ($^O eq 'VMS' | |
116 | && defined($ENV{'SYS$I18N_LOCALE'}) | |
117 | && -d 'SYS$I18N_LOCALE') | |
118 | { | |
119 | # The SYS$I18N_LOCALE logical name search list was not present on | |
120 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. | |
121 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); | |
122 | while ($_ = readdir(LOCALES)) { | |
123 | chomp; | |
124 | _trylocale($_, \@Locale); | |
125 | } | |
126 | close(LOCALES); | |
127 | } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { | |
128 | ||
129 | # OpenBSD doesn't have a locale executable, so reading /usr/share/locale | |
130 | # is much easier and faster than the last resort method. | |
131 | ||
132 | opendir(LOCALES, '/usr/share/locale'); | |
133 | while ($_ = readdir(LOCALES)) { | |
134 | chomp; | |
135 | _trylocale($_, \@Locale); | |
136 | } | |
137 | close(LOCALES); | |
138 | } else { # Final fallback. Try our list of locales hard-coded here | |
139 | ||
140 | # This is going to be slow. | |
141 | my @Data; | |
142 | ||
143 | ||
144 | # Locales whose name differs if the utf8 bit is on are stored in these two | |
145 | # files with appropriate encodings. | |
146 | if ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) { | |
147 | @Data = do "lib/locale/utf8"; | |
148 | } else { | |
149 | @Data = do "lib/locale/latin1"; | |
150 | } | |
151 | ||
152 | # The rest of the locales are in this file. | |
153 | push @Data, <DATA>; | |
154 | ||
155 | foreach my $line (@Data) { | |
156 | my ($locale_name, $language_codes, $country_codes, $encodings) = | |
157 | split /:/, $line; | |
158 | my @enc = _decode_encodings($encodings); | |
159 | foreach my $loc (split(/ /, $locale_name)) { | |
160 | _trylocale($loc, \@Locale); | |
161 | foreach my $enc (@enc) { | |
162 | _trylocale("$loc.$enc", \@Locale); | |
163 | } | |
164 | $loc = lc $loc; | |
165 | foreach my $enc (@enc) { | |
166 | _trylocale("$loc.$enc", \@Locale); | |
167 | } | |
168 | } | |
169 | foreach my $lang (split(/ /, $language_codes)) { | |
170 | _trylocale($lang, \@Locale); | |
171 | foreach my $country (split(/ /, $country_codes)) { | |
172 | my $lc = "${lang}_${country}"; | |
173 | _trylocale($lc, \@Locale); | |
174 | foreach my $enc (@enc) { | |
175 | _trylocale("$lc.$enc", \@Locale); | |
176 | } | |
177 | my $lC = "${lang}_\U${country}"; | |
178 | _trylocale($lC, \@Locale); | |
179 | foreach my $enc (@enc) { | |
180 | _trylocale("$lC.$enc", \@Locale); | |
181 | } | |
182 | } | |
183 | } | |
184 | } | |
185 | } | |
186 | ||
187 | @Locale = sort @Locale; | |
188 | ||
189 | return @Locale; | |
190 | ||
191 | ||
192 | } | |
193 | ||
194 | 1 | |
195 | ||
196 | # Format of data is: locale_name, language_codes, country_codes, encodings | |
197 | __DATA__ | |
198 | Afrikaans:af:za:1 15 | |
199 | Arabic:ar:dz eg sa:6 arabic8 | |
200 | Brezhoneg Breton:br:fr:1 15 | |
201 | Bulgarski Bulgarian:bg:bg:5 | |
202 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC | |
203 | Hrvatski Croatian:hr:hr:2 | |
204 | Cymraeg Welsh:cy:cy:1 14 15 | |
205 | Czech:cs:cz:2 | |
206 | Dansk Danish:da:dk:1 15 | |
207 | Nederlands Dutch:nl:be nl:1 15 | |
208 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 | |
209 | Esperanto:eo:eo:3 | |
210 | Eesti Estonian:et:ee:4 6 13 | |
211 | Suomi Finnish:fi:fi:1 15 | |
212 | Flamish::fl:1 15 | |
213 | Deutsch German:de:at be ch de lu:1 15 | |
214 | Euskaraz Basque:eu:es fr:1 15 | |
215 | Galego Galician:gl:es:1 15 | |
216 | Ellada Greek:el:gr:7 g8 | |
217 | Frysk:fy:nl:1 15 | |
218 | Greenlandic:kl:gl:4 6 | |
219 | Hebrew:iw:il:8 hebrew8 | |
220 | Hungarian:hu:hu:2 | |
221 | Indonesian:id:id:1 15 | |
222 | Gaeilge Irish:ga:IE:1 14 15 | |
223 | Italiano Italian:it:ch it:1 15 | |
224 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis | |
225 | Korean:ko:kr: | |
226 | Latine Latin:la:va:1 15 | |
227 | Latvian:lv:lv:4 6 13 | |
228 | Lithuanian:lt:lt:4 6 13 | |
229 | Macedonian:mk:mk:1 15 | |
230 | Maltese:mt:mt:3 | |
231 | Moldovan:mo:mo:2 | |
232 | Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 | |
233 | Occitan:oc:es:1 15 | |
234 | Polski Polish:pl:pl:2 | |
235 | Rumanian:ro:ro:2 | |
236 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 | |
237 | Serbski Serbian:sr:yu:5 | |
238 | Slovak:sk:sk:2 | |
239 | Slovene Slovenian:sl:si:2 | |
240 | Sqhip Albanian:sq:sq:1 15 | |
241 | Svenska Swedish:sv:fi se:1 15 | |
242 | Thai:th:th:11 tis620 | |
243 | Turkish:tr:tr:9 turkish8 | |
244 | Yiddish:yi::1 15 |