This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
XS-APItest/t/handy.t: Change output message
[perl5.git] / ext / XS-APItest / t / handy.t
CommitLineData
bdd8600f
KW
1#!perl -w
2
31f05a37 3BEGIN {
9b0711ee 4 require 'loc_tools.pl'; # Contains find_utf8_ctype_locale()
31f05a37
KW
5}
6
bdd8600f
KW
7use strict;
8use Test::More;
569f7fc5 9use Config;
bdd8600f
KW
10
11use XS::APItest;
12
2e1414ce 13use Unicode::UCD qw(prop_invlist prop_invmap);
c9c05358 14
5073ffbd 15sub truth($) { # Converts values so is() works
e2efe419 16 return (shift) ? 1 : 0;
c9c05358
KW
17}
18
569f7fc5 19my $locale;
31f05a37 20my $utf8_locale;
569f7fc5
JR
21if($Config{d_setlocale}) {
22 require POSIX;
23 $locale = POSIX::setlocale( &POSIX::LC_ALL, "C");
24 if (defined $locale && $locale eq 'C') {
25 BEGIN {
26 if($Config{d_setlocale}) {
27 require locale; import locale; # make \w work right in non-ASCII lands
28 }
29 }
30
31 # Some locale implementations don't have the 128-255 characters all
32 # mean nothing. Skip the locale tests in that situation
33 for my $i (128 .. 255) {
34 if (chr($i) =~ /[[:print:]]/) {
35 undef $locale;
36 last;
37 }
981746b9 38 }
31f05a37 39
9b0711ee 40 $utf8_locale = find_utf8_ctype_locale();
981746b9
KW
41 }
42}
43
e2efe419
KW
44my %properties = (
45 # name => Lookup-property name
46 alnum => 'Word',
981746b9 47 wordchar => 'Word',
15861f94 48 alphanumeric => 'Alnum',
e2efe419
KW
49 alpha => 'Alpha',
50 ascii => 'ASCII',
51 blank => 'Blank',
52 cntrl => 'Control',
53 digit => 'Digit',
54 graph => 'Graph',
55 idfirst => '_Perl_IDStart',
eba68aa0 56 idcont => '_Perl_IDCont',
e2efe419
KW
57 lower => 'Lower',
58 print => 'Print',
59 psxspc => 'XPosixSpace',
60 punct => 'XPosixPunct',
61 quotemeta => '_Perl_Quotemeta',
62 space => 'XPerlSpace',
840f8e92 63 vertws => 'VertSpace',
e2efe419
KW
64 upper => 'Upper',
65 xdigit => 'XDigit',
66 );
67
5073ffbd
KW
68my @warnings;
69local $SIG{__WARN__} = sub { push @warnings, @_ };
70
c9c05358 71use charnames ();
e2efe419
KW
72foreach my $name (sort keys %properties) {
73 my $property = $properties{$name};
74 my @invlist = prop_invlist($property, '_perl_core_internal_ok');
75 if (! @invlist) {
76 fail("No inversion list found for $property");
77 next;
78 }
79
80 # Include all the Latin1 code points, plus 0x100.
81 my @code_points = (0 .. 256);
82
83 # Then include the next few boundaries above those from this property
84 my $above_latins = 0;
85 foreach my $range_start (@invlist) {
86 next if $range_start < 257;
87 push @code_points, $range_start - 1, $range_start;
88 $above_latins++;
89 last if $above_latins > 5;
90 }
91
eba68aa0
KW
92 # This makes sure we are using the Perl definition of idfirst and idcont,
93 # and not the Unicode. There are a few differences.
94 push @code_points, ord "\N{ESTIMATED SYMBOL}" if $name =~ /^id(first|cont)/;
2f4622f0
KW
95 if ($name eq "idcont") { # And some that are continuation but not start
96 push @code_points, ord("\N{GREEK ANO TELEIA}"),
97 ord("\N{COMBINING GRAVE ACCENT}");
98 }
f91dcd13 99
e2efe419
KW
100 # And finally one non-Unicode code point.
101 push @code_points, 0x110000; # Above Unicode, no prop should match
4f6ef7cc 102 no warnings 'non_unicode';
e2efe419 103
2e1414ce
KW
104 for my $j (@code_points) {
105 my $i = utf8::native_to_unicode($j);
c9c05358 106 my $function = uc($name);
c9c05358 107
1fdd5e53 108 my $matches = Unicode::UCD::search_invlist(\@invlist, $i);
e2efe419
KW
109 if (! defined $matches) {
110 $matches = 0;
111 }
112 else {
113 $matches = truth(! ($matches % 2));
114 }
5073ffbd 115
e2efe419
KW
116 my $ret;
117 my $char_name = charnames::viacode($i) // "No name";
94a62bdc 118 my $display_name = sprintf "\\x{%02X, %s}", $i, $char_name;
e2efe419
KW
119
120 if ($name eq 'quotemeta') { # There is only one macro for this, and is
121 # defined only for Latin1 range
122 $ret = truth eval "test_is${function}($i)";
123 if ($@) {
124 fail $@;
125 }
126 else {
127 my $truth = truth($matches && $i < 256);
128 is ($ret, $truth, "is${function}( $display_name ) == $truth");
129 }
c9c05358
KW
130 next;
131 }
981746b9
KW
132
133 # vertws is always all of Unicode; ALNUM_A and ALNUM_L1 are not
134 # defined as they were added later, after WORDCHAR was created to be a
135 # clearer synonym for ALNUM
840f8e92 136 if ($name ne 'vertws') {
981746b9 137 if ($name ne 'alnum') {
195d0f3b
KW
138 $ret = truth eval "test_is${function}_A($i)";
139 if ($@) {
140 fail($@);
141 }
142 else {
143 my $truth = truth($matches && $i < 128);
144 is ($ret, $truth, "is${function}_A( $display_name ) == $truth");
145 }
146 $ret = truth eval "test_is${function}_L1($i)";
147 if ($@) {
148 fail($@);
149 }
150 else {
151 my $truth = truth($matches && $i < 256);
152 is ($ret, $truth, "is${function}_L1( $display_name ) == $truth");
153 }
981746b9
KW
154 }
155
8ff203a1 156 if (defined $locale) {
569f7fc5 157 require locale; import locale;
981746b9 158
31f05a37 159 POSIX::setlocale( &POSIX::LC_ALL, "C");
31a09021
KW
160 $ret = truth eval "test_is${function}_LC($i)";
161 if ($@) {
162 fail($@);
163 }
164 else {
165 my $truth = truth($matches && $i < 128);
31f05a37
KW
166 is ($ret, $truth, "is${function}_LC( $display_name ) == $truth (C locale)");
167 }
168 }
169
170 if (defined $utf8_locale) {
171 use locale;
172
173 POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale);
174 $ret = truth eval "test_is${function}_LC($i)";
175 if ($@) {
176 fail($@);
177 }
178 else {
179
180 # UTF-8 locale works on full range 0-255
181 my $truth = truth($matches && $i < 256);
182 is ($ret, $truth, "is${function}_LC( $display_name ) == $truth ($utf8_locale)");
31a09021 183 }
8ff203a1 184 }
840f8e92 185 }
c9c05358 186
e2efe419
KW
187 $ret = truth eval "test_is${function}_uni($i)";
188 if ($@) {
189 fail($@);
190 }
191 else {
192 is ($ret, $matches, "is${function}_uni( $display_name ) == $matches");
193 }
c9c05358 194
981746b9 195 if (defined $locale && $name ne 'vertws') {
569f7fc5 196 require locale; import locale;
981746b9 197
31f05a37 198 POSIX::setlocale( &POSIX::LC_ALL, "C");
981746b9
KW
199 $ret = truth eval "test_is${function}_LC_uvchr('$i')";
200 if ($@) {
201 fail($@);
202 }
203 else {
204 my $truth = truth($matches && ($i < 128 || $i > 255));
31f05a37
KW
205 is ($ret, $truth, "is${function}_LC_uvchr( $display_name ) == $truth (C locale)");
206 }
207 }
208
209 if (defined $utf8_locale && $name ne 'vertws') {
210 use locale;
211
212 POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale);
213 $ret = truth eval "test_is${function}_LC_uvchr('$i')";
214 if ($@) {
215 fail($@);
216 }
217 else {
218 my $truth = truth($matches);
219 is ($ret, $truth, "is${function}_LC_uvchr( $display_name ) == $truth ($utf8_locale)");
981746b9
KW
220 }
221 }
222
c9c05358
KW
223 my $char = chr($i);
224 utf8::upgrade($char);
225 $char = quotemeta $char if $char eq '\\' || $char eq "'";
e2efe419
KW
226 $ret = truth eval "test_is${function}_utf8('$char')";
227 if ($@) {
228 fail($@);
229 }
230 else {
231 is ($ret, $matches, "is${function}_utf8( $display_name ) == $matches");
232 }
981746b9 233
8ff203a1 234 if ($name ne 'vertws' && defined $locale) {
569f7fc5 235 require locale; import locale;
981746b9 236
31f05a37 237 POSIX::setlocale( &POSIX::LC_ALL, "C");
31a09021
KW
238 $ret = truth eval "test_is${function}_LC_utf8('$char')";
239 if ($@) {
240 fail($@);
241 }
242 else {
243 my $truth = truth($matches && ($i < 128 || $i > 255));
31f05a37
KW
244 is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth (C locale)");
245 }
246 }
247
248 if ($name ne 'vertws' && defined $utf8_locale) {
249 use locale;
250
251 POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale);
252 $ret = truth eval "test_is${function}_LC_utf8('$char')";
253 if ($@) {
254 fail($@);
255 }
256 else {
257 my $truth = truth($matches);
258 is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth ($utf8_locale)");
31a09021 259 }
8ff203a1 260 }
c9c05358
KW
261 }
262}
263
2e1414ce
KW
264my %to_properties = (
265 FOLD => 'Case_Folding',
266 LOWER => 'Lowercase_Mapping',
267 TITLE => 'Titlecase_Mapping',
268 UPPER => 'Uppercase_Mapping',
269 );
270
271
272foreach my $name (sort keys %to_properties) {
273 my $property = $to_properties{$name};
274 my ($list_ref, $map_ref, $format, $missing)
275 = prop_invmap($property, );
276 if (! $list_ref || ! $map_ref) {
277 fail("No inversion map found for $property");
278 next;
279 }
280 if ($format ne "al") {
281 fail("Unexpected inversion map format ('$format') found for $property");
282 next;
283 }
284
285 # Include all the Latin1 code points, plus 0x100.
286 my @code_points = (0 .. 256);
287
288 # Then include the next few multi-char folds above those from this
289 # property, and include the next few single folds as well
290 my $above_latins = 0;
291 my $multi_char = 0;
292 for my $i (0 .. @{$list_ref} - 1) {
293 my $range_start = $list_ref->[$i];
294 next if $range_start < 257;
295 if (ref $map_ref->[$i] && $multi_char < 5) {
296 push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
297 push @code_points, $range_start;
298 $multi_char++;
299 }
300 elsif ($above_latins < 5) {
301 push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
302 push @code_points, $range_start;
303 $above_latins++;
304 }
305 last if $above_latins >= 5 && $multi_char >= 5;
306 }
307
308 # And finally one non-Unicode code point.
309 push @code_points, 0x110000; # Above Unicode, no prop should match
310 no warnings 'non_unicode';
311
312 # $j is native; $i unicode.
313 for my $j (@code_points) {
314 my $i = utf8::native_to_unicode($j);
315 my $function = $name;
316
1fdd5e53 317 my $index = Unicode::UCD::search_invlist(\@{$list_ref}, $j);
2e1414ce
KW
318
319 my $ret;
320 my $char_name = charnames::viacode($j) // "No name";
321 my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
322
323 # Test the base function
324 $ret = eval "test_to${function}($j)";
325 if ($@) {
326 fail($@);
327 }
328 else {
329 my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
330 ? $map_ref->[$index] + $j - $list_ref->[$index]
331 : $j;
332 is ($ret, $should_be, sprintf("to${function}( $display_name ) == 0x%02X", $should_be));
333 }
334
335 # Test _L1
336 if ($name eq 'LOWER') {
337 $ret = eval "test_to${function}_L1($j)";
338 if ($@) {
339 fail($@);
340 }
341 else {
342 my $should_be = ($i < 256 && $map_ref->[$index] != $missing)
343 ? $map_ref->[$index] + $j - $list_ref->[$index]
344 : $j;
345 is ($ret, $should_be, sprintf("to${function}_L1( $display_name ) == 0x%02X", $should_be));
346 }
347 }
348
31f05a37
KW
349 if ($name ne 'TITLE') { # Test _LC; titlecase is not defined in locales.
350 if (defined $locale) {
baa60164 351 require locale; import locale;
2e1414ce 352
baa60164
KW
353 POSIX::setlocale( &POSIX::LC_ALL, "C");
354 $ret = eval "test_to${function}_LC($j)";
355 if ($@) {
356 fail($@);
357 }
358 else {
359 my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
2e1414ce
KW
360 ? $map_ref->[$index] + $j - $list_ref->[$index]
361 : $j;
baa60164
KW
362 is ($ret, $should_be, sprintf("to${function}_LC( $display_name ) == 0x%02X (C locale)", $should_be));
363 }
31f05a37
KW
364 }
365
366 if (defined $utf8_locale) {
367 use locale;
368
369 SKIP: {
370 skip "to${property}_LC does not work for LATIN SMALL LETTER SHARP S", 1
371 if $j == 0xDF && ($name eq 'FOLD' || $name eq 'UPPER');
372
373 POSIX::setlocale( &POSIX::LC_ALL, $utf8_locale);
374 $ret = eval "test_to${function}_LC($j)";
375 if ($@) {
376 fail($@);
377 }
378 else {
379 my $should_be = ($i < 256
380 && ! ref $map_ref->[$index]
381 && $map_ref->[$index] != $missing
382 )
383 ? $map_ref->[$index] + $j - $list_ref->[$index]
384 : $j;
385 is ($ret, $should_be, sprintf("to${function}_LC( $display_name ) == 0x%02X ($utf8_locale)", $should_be));
386 }
387 }
2e1414ce
KW
388 }
389 }
390
391 # The _uni and _utf8 functions return both the ordinal of the first
392 # code point of the result, and the result in utf8. The .xs tests
393 # return these in an array, in [0] and [1] respectively, with [2] the
394 # length of the utf8 in bytes.
395 my $utf8_should_be = "";
396 my $first_ord_should_be;
397 if (ref $map_ref->[$index]) { # A multi-char result
398 for my $j (0 .. @{$map_ref->[$index]} - 1) {
399 $utf8_should_be .= chr $map_ref->[$index][$j];
400 }
401
402 $first_ord_should_be = $map_ref->[$index][0];
403 }
404 else { # A single-char result
405 $first_ord_should_be = ($map_ref->[$index] != $missing)
406 ? $map_ref->[$index] + $j - $list_ref->[$index]
407 : $j;
408 $utf8_should_be = chr $first_ord_should_be;
409 }
410 utf8::upgrade($utf8_should_be);
411
412 # Test _uni
413 my $s;
414 my $len;
415 $ret = eval "test_to${function}_uni($j)";
416 if ($@) {
417 fail($@);
418 }
419 else {
420 is ($ret->[0], $first_ord_should_be, sprintf("to${function}_uni( $display_name ) == 0x%02X", $first_ord_should_be));
421 is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_uni( $display_name )"));
422 use bytes;
423 is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
424 }
425
426 # Test _utf8
427 my $char = chr($j);
428 utf8::upgrade($char);
429 $char = quotemeta $char if $char eq '\\' || $char eq "'";
430 $ret = eval "test_to${function}_utf8('$char')";
431 if ($@) {
432 fail($@);
433 }
434 else {
435 is ($ret->[0], $first_ord_should_be, sprintf("to${function}_utf8( $display_name ) == 0x%02X", $first_ord_should_be));
436 is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_utf8( $display_name )"));
437 use bytes;
438 is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
439 }
440
441 }
442}
443
5073ffbd
KW
444# This is primarily to make sure that no non-Unicode warnings get generated
445is(scalar @warnings, 0, "No warnings were generated " . join ", ", @warnings);
446
bdd8600f 447done_testing;