This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix multicharacter titlecase (ucfirst).
[perl5.git] / locale.c
CommitLineData
98994639
HS
1/* locale.c
2 *
3 * Copyright (c) 2001, Larry Wall
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
10/*
11 * A Elbereth Gilthoniel,
12 * silivren penna míriel
13 * o menel aglar elenath!
14 * Na-chaered palan-díriel
15 * o galadhremmin ennorath,
16 * Fanuilos, le linnathon
17 * nef aear, si nef aearon!
18 */
19
20#include "EXTERN.h"
21#define PERL_IN_LOCALE_C
22#include "perl.h"
23
24#ifdef I_LOCALE
25# include <locale.h>
26#endif
27
28/*
29 * Standardize the locale name from a string returned by 'setlocale'.
30 *
31 * The standard return value of setlocale() is either
32 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL
33 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL
34 * (the space-separated values represent the various sublocales,
35 * in some unspecificed order)
36 *
37 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n",
38 * which is harmful for further use of the string in setlocale().
39 *
40 */
41STATIC char *
42S_stdize_locale(pTHX_ char *locs)
43{
44 char *s;
45 bool okay = TRUE;
46
47 if ((s = strchr(locs, '='))) {
48 char *t;
49
50 okay = FALSE;
51 if ((t = strchr(s, '.'))) {
52 char *u;
53
54 if ((u = strchr(t, '\n'))) {
55
56 if (u[1] == 0) {
57 STRLEN len = u - s;
58 Move(s + 1, locs, len, char);
59 locs[len] = 0;
60 okay = TRUE;
61 }
62 }
63 }
64 }
65
66 if (!okay)
67 Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs);
68
69 return locs;
70}
71
72void
73Perl_set_numeric_radix(pTHX)
74{
75#ifdef USE_LOCALE_NUMERIC
76# ifdef HAS_LOCALECONV
77 struct lconv* lc;
78
79 lc = localeconv();
80 if (lc && lc->decimal_point) {
81 if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) {
82 SvREFCNT_dec(PL_numeric_radix_sv);
83 PL_numeric_radix_sv = Nullsv;
84 }
85 else {
86 if (PL_numeric_radix_sv)
87 sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
88 else
89 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
90 }
91 }
92 else
93 PL_numeric_radix_sv = Nullsv;
94# endif /* HAS_LOCALECONV */
95#endif /* USE_LOCALE_NUMERIC */
96}
97
98/*
99 * Set up for a new numeric locale.
100 */
101void
102Perl_new_numeric(pTHX_ char *newnum)
103{
104#ifdef USE_LOCALE_NUMERIC
105
106 if (! newnum) {
107 if (PL_numeric_name) {
108 Safefree(PL_numeric_name);
109 PL_numeric_name = NULL;
110 }
111 PL_numeric_standard = TRUE;
112 PL_numeric_local = TRUE;
113 return;
114 }
115
116 if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) {
117 Safefree(PL_numeric_name);
118 PL_numeric_name = stdize_locale(savepv(newnum));
119 PL_numeric_standard = (strEQ(newnum, "C") || strEQ(newnum, "POSIX"));
120 PL_numeric_local = TRUE;
121 set_numeric_radix();
122 }
123
124#endif /* USE_LOCALE_NUMERIC */
125}
126
127void
128Perl_set_numeric_standard(pTHX)
129{
130#ifdef USE_LOCALE_NUMERIC
131
132 if (! PL_numeric_standard) {
133 setlocale(LC_NUMERIC, "C");
134 PL_numeric_standard = TRUE;
135 PL_numeric_local = FALSE;
136 set_numeric_radix();
137 }
138
139#endif /* USE_LOCALE_NUMERIC */
140}
141
142void
143Perl_set_numeric_local(pTHX)
144{
145#ifdef USE_LOCALE_NUMERIC
146
147 if (! PL_numeric_local) {
148 setlocale(LC_NUMERIC, PL_numeric_name);
149 PL_numeric_standard = FALSE;
150 PL_numeric_local = TRUE;
151 set_numeric_radix();
152 }
153
154#endif /* USE_LOCALE_NUMERIC */
155}
156
157/*
158 * Set up for a new ctype locale.
159 */
160void
161Perl_new_ctype(pTHX_ char *newctype)
162{
163#ifdef USE_LOCALE_CTYPE
164
165 int i;
166
167 for (i = 0; i < 256; i++) {
168 if (isUPPER_LC(i))
169 PL_fold_locale[i] = toLOWER_LC(i);
170 else if (isLOWER_LC(i))
171 PL_fold_locale[i] = toUPPER_LC(i);
172 else
173 PL_fold_locale[i] = i;
174 }
175
176#endif /* USE_LOCALE_CTYPE */
177}
178
179/*
180 * Set up for a new collation locale.
181 */
182void
183Perl_new_collate(pTHX_ char *newcoll)
184{
185#ifdef USE_LOCALE_COLLATE
186
187 if (! newcoll) {
188 if (PL_collation_name) {
189 ++PL_collation_ix;
190 Safefree(PL_collation_name);
191 PL_collation_name = NULL;
192 }
193 PL_collation_standard = TRUE;
194 PL_collxfrm_base = 0;
195 PL_collxfrm_mult = 2;
196 return;
197 }
198
199 if (! PL_collation_name || strNE(PL_collation_name, newcoll)) {
200 ++PL_collation_ix;
201 Safefree(PL_collation_name);
202 PL_collation_name = stdize_locale(savepv(newcoll));
203 PL_collation_standard = (strEQ(newcoll, "C") || strEQ(newcoll, "POSIX"));
204
205 {
206 /* 2: at most so many chars ('a', 'b'). */
207 /* 50: surely no system expands a char more. */
208#define XFRMBUFSIZE (2 * 50)
209 char xbuf[XFRMBUFSIZE];
210 Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE);
211 Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE);
212 SSize_t mult = fb - fa;
213 if (mult < 1)
214 Perl_croak(aTHX_ "strxfrm() gets absurd");
215 PL_collxfrm_base = (fa > mult) ? (fa - mult) : 0;
216 PL_collxfrm_mult = mult;
217 }
218 }
219
220#endif /* USE_LOCALE_COLLATE */
221}
222
223/*
224 * Initialize locale awareness.
225 */
226int
227Perl_init_i18nl10n(pTHX_ int printwarn)
228{
229 int ok = 1;
230 /* returns
231 * 1 = set ok or not applicable,
232 * 0 = fallback to C locale,
233 * -1 = fallback to C locale failed
234 */
235
236#if defined(USE_LOCALE)
237
238#ifdef USE_LOCALE_CTYPE
239 char *curctype = NULL;
240#endif /* USE_LOCALE_CTYPE */
241#ifdef USE_LOCALE_COLLATE
242 char *curcoll = NULL;
243#endif /* USE_LOCALE_COLLATE */
244#ifdef USE_LOCALE_NUMERIC
245 char *curnum = NULL;
246#endif /* USE_LOCALE_NUMERIC */
247#ifdef __GLIBC__
248 char *language = PerlEnv_getenv("LANGUAGE");
249#endif
250 char *lc_all = PerlEnv_getenv("LC_ALL");
251 char *lang = PerlEnv_getenv("LANG");
252 bool setlocale_failure = FALSE;
253
254#ifdef LOCALE_ENVIRON_REQUIRED
255
256 /*
257 * Ultrix setlocale(..., "") fails if there are no environment
258 * variables from which to get a locale name.
259 */
260
261 bool done = FALSE;
262
263#ifdef LC_ALL
264 if (lang) {
265 if (setlocale(LC_ALL, ""))
266 done = TRUE;
267 else
268 setlocale_failure = TRUE;
269 }
270 if (!setlocale_failure) {
271#ifdef USE_LOCALE_CTYPE
272 if (! (curctype =
273 setlocale(LC_CTYPE,
274 (!done && (lang || PerlEnv_getenv("LC_CTYPE")))
275 ? "" : Nullch)))
276 setlocale_failure = TRUE;
277 else
278 curctype = savepv(curctype);
279#endif /* USE_LOCALE_CTYPE */
280#ifdef USE_LOCALE_COLLATE
281 if (! (curcoll =
282 setlocale(LC_COLLATE,
283 (!done && (lang || PerlEnv_getenv("LC_COLLATE")))
284 ? "" : Nullch)))
285 setlocale_failure = TRUE;
286 else
287 curcoll = savepv(curcoll);
288#endif /* USE_LOCALE_COLLATE */
289#ifdef USE_LOCALE_NUMERIC
290 if (! (curnum =
291 setlocale(LC_NUMERIC,
292 (!done && (lang || PerlEnv_getenv("LC_NUMERIC")))
293 ? "" : Nullch)))
294 setlocale_failure = TRUE;
295 else
296 curnum = savepv(curnum);
297#endif /* USE_LOCALE_NUMERIC */
298 }
299
300#endif /* LC_ALL */
301
302#endif /* !LOCALE_ENVIRON_REQUIRED */
303
304#ifdef LC_ALL
305 if (! setlocale(LC_ALL, ""))
306 setlocale_failure = TRUE;
307#endif /* LC_ALL */
308
309 if (!setlocale_failure) {
310#ifdef USE_LOCALE_CTYPE
311 if (! (curctype = setlocale(LC_CTYPE, "")))
312 setlocale_failure = TRUE;
313 else
314 curctype = savepv(curctype);
315#endif /* USE_LOCALE_CTYPE */
316#ifdef USE_LOCALE_COLLATE
317 if (! (curcoll = setlocale(LC_COLLATE, "")))
318 setlocale_failure = TRUE;
319 else
320 curcoll = savepv(curcoll);
321#endif /* USE_LOCALE_COLLATE */
322#ifdef USE_LOCALE_NUMERIC
323 if (! (curnum = setlocale(LC_NUMERIC, "")))
324 setlocale_failure = TRUE;
325 else
326 curnum = savepv(curnum);
327#endif /* USE_LOCALE_NUMERIC */
328 }
329
330 if (setlocale_failure) {
331 char *p;
332 bool locwarn = (printwarn > 1 ||
333 (printwarn &&
334 (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
335
336 if (locwarn) {
337#ifdef LC_ALL
338
339 PerlIO_printf(Perl_error_log,
340 "perl: warning: Setting locale failed.\n");
341
342#else /* !LC_ALL */
343
344 PerlIO_printf(Perl_error_log,
345 "perl: warning: Setting locale failed for the categories:\n\t");
346#ifdef USE_LOCALE_CTYPE
347 if (! curctype)
348 PerlIO_printf(Perl_error_log, "LC_CTYPE ");
349#endif /* USE_LOCALE_CTYPE */
350#ifdef USE_LOCALE_COLLATE
351 if (! curcoll)
352 PerlIO_printf(Perl_error_log, "LC_COLLATE ");
353#endif /* USE_LOCALE_COLLATE */
354#ifdef USE_LOCALE_NUMERIC
355 if (! curnum)
356 PerlIO_printf(Perl_error_log, "LC_NUMERIC ");
357#endif /* USE_LOCALE_NUMERIC */
358 PerlIO_printf(Perl_error_log, "\n");
359
360#endif /* LC_ALL */
361
362 PerlIO_printf(Perl_error_log,
363 "perl: warning: Please check that your locale settings:\n");
364
365#ifdef __GLIBC__
366 PerlIO_printf(Perl_error_log,
367 "\tLANGUAGE = %c%s%c,\n",
368 language ? '"' : '(',
369 language ? language : "unset",
370 language ? '"' : ')');
371#endif
372
373 PerlIO_printf(Perl_error_log,
374 "\tLC_ALL = %c%s%c,\n",
375 lc_all ? '"' : '(',
376 lc_all ? lc_all : "unset",
377 lc_all ? '"' : ')');
378
379#if defined(USE_ENVIRON_ARRAY)
380 {
381 char **e;
382 for (e = environ; *e; e++) {
383 if (strnEQ(*e, "LC_", 3)
384 && strnNE(*e, "LC_ALL=", 7)
385 && (p = strchr(*e, '=')))
386 PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n",
387 (int)(p - *e), *e, p + 1);
388 }
389 }
390#else
391 PerlIO_printf(Perl_error_log,
392 "\t(possibly more locale environment variables)\n");
393#endif
394
395 PerlIO_printf(Perl_error_log,
396 "\tLANG = %c%s%c\n",
397 lang ? '"' : '(',
398 lang ? lang : "unset",
399 lang ? '"' : ')');
400
401 PerlIO_printf(Perl_error_log,
402 " are supported and installed on your system.\n");
403 }
404
405#ifdef LC_ALL
406
407 if (setlocale(LC_ALL, "C")) {
408 if (locwarn)
409 PerlIO_printf(Perl_error_log,
410 "perl: warning: Falling back to the standard locale (\"C\").\n");
411 ok = 0;
412 }
413 else {
414 if (locwarn)
415 PerlIO_printf(Perl_error_log,
416 "perl: warning: Failed to fall back to the standard locale (\"C\").\n");
417 ok = -1;
418 }
419
420#else /* ! LC_ALL */
421
422 if (0
423#ifdef USE_LOCALE_CTYPE
424 || !(curctype || setlocale(LC_CTYPE, "C"))
425#endif /* USE_LOCALE_CTYPE */
426#ifdef USE_LOCALE_COLLATE
427 || !(curcoll || setlocale(LC_COLLATE, "C"))
428#endif /* USE_LOCALE_COLLATE */
429#ifdef USE_LOCALE_NUMERIC
430 || !(curnum || setlocale(LC_NUMERIC, "C"))
431#endif /* USE_LOCALE_NUMERIC */
432 )
433 {
434 if (locwarn)
435 PerlIO_printf(Perl_error_log,
436 "perl: warning: Cannot fall back to the standard locale (\"C\").\n");
437 ok = -1;
438 }
439
440#endif /* ! LC_ALL */
441
442#ifdef USE_LOCALE_CTYPE
443 curctype = savepv(setlocale(LC_CTYPE, Nullch));
444#endif /* USE_LOCALE_CTYPE */
445#ifdef USE_LOCALE_COLLATE
446 curcoll = savepv(setlocale(LC_COLLATE, Nullch));
447#endif /* USE_LOCALE_COLLATE */
448#ifdef USE_LOCALE_NUMERIC
449 curnum = savepv(setlocale(LC_NUMERIC, Nullch));
450#endif /* USE_LOCALE_NUMERIC */
451 }
452 else {
453
454#ifdef USE_LOCALE_CTYPE
455 new_ctype(curctype);
456#endif /* USE_LOCALE_CTYPE */
457
458#ifdef USE_LOCALE_COLLATE
459 new_collate(curcoll);
460#endif /* USE_LOCALE_COLLATE */
461
462#ifdef USE_LOCALE_NUMERIC
463 new_numeric(curnum);
464#endif /* USE_LOCALE_NUMERIC */
465 }
466
467#endif /* USE_LOCALE */
468
469#ifdef USE_LOCALE_CTYPE
470 if (curctype != NULL)
471 Safefree(curctype);
472#endif /* USE_LOCALE_CTYPE */
473#ifdef USE_LOCALE_COLLATE
474 if (curcoll != NULL)
475 Safefree(curcoll);
476#endif /* USE_LOCALE_COLLATE */
477#ifdef USE_LOCALE_NUMERIC
478 if (curnum != NULL)
479 Safefree(curnum);
480#endif /* USE_LOCALE_NUMERIC */
481 return ok;
482}
483
484/* Backwards compatibility. */
485int
486Perl_init_i18nl14n(pTHX_ int printwarn)
487{
488 return init_i18nl10n(printwarn);
489}
490
491#ifdef USE_LOCALE_COLLATE
492
493/*
494 * mem_collxfrm() is a bit like strxfrm() but with two important
495 * differences. First, it handles embedded NULs. Second, it allocates
496 * a bit more memory than needed for the transformed data itself.
497 * The real transformed data begins at offset sizeof(collationix).
498 * Please see sv_collxfrm() to see how this is used.
499 */
500char *
501Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen)
502{
503 char *xbuf;
504 STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */
505
506 /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */
507 /* the +1 is for the terminating NUL. */
508
509 xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1;
510 New(171, xbuf, xAlloc, char);
511 if (! xbuf)
512 goto bad;
513
514 *(U32*)xbuf = PL_collation_ix;
515 xout = sizeof(PL_collation_ix);
516 for (xin = 0; xin < len; ) {
517 SSize_t xused;
518
519 for (;;) {
520 xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout);
521 if (xused == -1)
522 goto bad;
523 if (xused < xAlloc - xout)
524 break;
525 xAlloc = (2 * xAlloc) + 1;
526 Renew(xbuf, xAlloc, char);
527 if (! xbuf)
528 goto bad;
529 }
530
531 xin += strlen(s + xin) + 1;
532 xout += xused;
533
534 /* Embedded NULs are understood but silently skipped
535 * because they make no sense in locale collation. */
536 }
537
538 xbuf[xout] = '\0';
539 *xlen = xout - sizeof(PL_collation_ix);
540 return xbuf;
541
542 bad:
543 Safefree(xbuf);
544 *xlen = 0;
545 return NULL;
546}
547
548#endif /* USE_LOCALE_COLLATE */
549