Commit | Line | Data |
---|---|---|
bdd8600f KW |
1 | #!perl -w |
2 | ||
31f05a37 | 3 | BEGIN { |
629eeaee KW |
4 | require 'loc_tools.pl'; # Contains locales_enabled() and |
5 | # find_utf8_ctype_locale() | |
31f05a37 KW |
6 | } |
7 | ||
bdd8600f KW |
8 | use strict; |
9 | use Test::More; | |
569f7fc5 | 10 | use Config; |
bdd8600f KW |
11 | |
12 | use XS::APItest; | |
13 | ||
01a11ab9 KW |
14 | my $tab = " " x 4; # Indent subsidiary tests this much |
15 | ||
4acecd39 KW |
16 | use Unicode::UCD qw(search_invlist prop_invmap prop_invlist); |
17 | my ($charname_list, $charname_map, $format, $default) = prop_invmap("Name Alias"); | |
18 | ||
19 | sub get_charname($) { | |
20 | my $cp = shift; | |
21 | ||
22 | # If there is a an abbreviation for the code point name, use it | |
23 | my $name_index = search_invlist(\@{$charname_list}, $cp); | |
24 | if (defined $name_index) { | |
25 | my $synonyms = $charname_map->[$name_index]; | |
26 | if (ref $synonyms) { | |
27 | my $pat = qr/: abbreviation/; | |
28 | my @abbreviations = grep { $_ =~ $pat } @$synonyms; | |
29 | if (@abbreviations) { | |
30 | return $abbreviations[0] =~ s/$pat//r; | |
31 | } | |
32 | } | |
33 | } | |
34 | ||
35 | # Otherwise, use the full name | |
36 | use charnames (); | |
37 | return charnames::viacode($cp) // "No name"; | |
38 | } | |
39 | ||
5073ffbd | 40 | sub truth($) { # Converts values so is() works |
e2efe419 | 41 | return (shift) ? 1 : 0; |
c9c05358 KW |
42 | } |
43 | ||
01a11ab9 | 44 | my $base_locale; |
31f05a37 | 45 | my $utf8_locale; |
629eeaee | 46 | if(locales_enabled('LC_ALL')) { |
569f7fc5 | 47 | require POSIX; |
01a11ab9 KW |
48 | $base_locale = POSIX::setlocale( &POSIX::LC_ALL, "C"); |
49 | if (defined $base_locale && $base_locale eq 'C') { | |
5f1269ab | 50 | use locale; # make \w work right in non-ASCII lands |
569f7fc5 JR |
51 | |
52 | # Some locale implementations don't have the 128-255 characters all | |
53 | # mean nothing. Skip the locale tests in that situation | |
54 | for my $i (128 .. 255) { | |
b5c3a14a | 55 | if (chr(utf8::unicode_to_native($i)) =~ /[[:print:]]/) { |
01a11ab9 | 56 | undef $base_locale; |
569f7fc5 JR |
57 | last; |
58 | } | |
981746b9 | 59 | } |
31f05a37 | 60 | |
01a11ab9 | 61 | $utf8_locale = find_utf8_ctype_locale() if $base_locale; |
981746b9 KW |
62 | } |
63 | } | |
64 | ||
01a11ab9 KW |
65 | sub get_display_locale_or_skip($$) { |
66 | ||
67 | # Helper function intimately tied to its callers. It knows the loop | |
68 | # iterates with a locale of "", meaning don't use locale; $base_locale | |
69 | # meaning to use a non-UTF-8 locale; and $utf8_locale. | |
70 | # | |
71 | # It checks to see if the current test should be skipped or executed, | |
72 | # returning an empty list for the former, and for the latter: | |
73 | # ( 'locale display name', | |
74 | # bool of is this a UTF-8 locale ) | |
75 | # | |
76 | # The display name is the empty string if not using locale. Functions | |
77 | # with _LC in their name are skipped unless in locale, and functions | |
78 | # without _LC are executed only outside locale. However, if no locales at | |
79 | # all are on the system, the _LC functions are executed outside locale. | |
80 | ||
81 | my ($locale, $suffix) = @_; | |
82 | ||
83 | # The test should be skipped if the input is for a non-existent locale | |
84 | return unless defined $locale; | |
85 | ||
86 | # Here the input is defined, either a locale name or "". If the test is | |
87 | # for not using locales, we want to do the test for non-LC functions, | |
88 | # and skip it for LC ones (except if there are no locales on the system, | |
89 | # we do it for LC ones as if they weren't LC). | |
90 | if ($locale eq "") { | |
91 | return ("", 0) if $suffix !~ /LC/ || ! defined $base_locale; | |
92 | return; | |
93 | } | |
94 | ||
95 | # Here the input is for a real locale. We don't test the non-LC functions | |
96 | # for locales. | |
97 | return if $suffix !~ /LC/; | |
98 | ||
99 | # Here is for a LC function and a real locale. The base locale is not | |
100 | # UTF-8. | |
101 | return (" ($locale locale)", 0) if $locale eq $base_locale; | |
102 | ||
103 | # The only other possibility is that we have a UTF-8 locale | |
104 | return (" ($locale)", 1); | |
105 | } | |
106 | ||
da8c1a98 KW |
107 | sub try_malforming($$$) |
108 | { | |
109 | # Determines if the tests for malformed UTF-8 should be done. When done, | |
110 | # the .xs code creates malformations by pretending the length is shorter | |
111 | # than it actually is. Some things can't be malformed, and sometimes this | |
112 | # test knows that the current code doesn't look for a malformation under | |
113 | # various circumstances. | |
114 | ||
115 | my ($i, $function, $using_locale) = @_; | |
51238c0c | 116 | # $i is unicode code point; |
da8c1a98 KW |
117 | |
118 | # Single bytes can't be malformed | |
119 | return 0 if $i < ((ord "A" == 65) ? 128 : 160); | |
120 | ||
121 | # ASCII doesn't need to ever look beyond the first byte. | |
122 | return 0 if $function eq "ASCII"; | |
123 | ||
124 | # No controls above 255, so the code doesn't look at those | |
125 | return 0 if $i > 255 && $function eq "CNTRL"; | |
126 | ||
127 | # No non-ASCII digits below 256, except if using locales. | |
128 | return 0 if $i < 256 && ! $using_locale && $function =~ /X?DIGIT/; | |
129 | ||
130 | return 1; | |
131 | } | |
132 | ||
e2efe419 KW |
133 | my %properties = ( |
134 | # name => Lookup-property name | |
135 | alnum => 'Word', | |
981746b9 | 136 | wordchar => 'Word', |
15861f94 | 137 | alphanumeric => 'Alnum', |
ebdbc726 | 138 | alpha => 'XPosixAlpha', |
e2efe419 KW |
139 | ascii => 'ASCII', |
140 | blank => 'Blank', | |
141 | cntrl => 'Control', | |
142 | digit => 'Digit', | |
143 | graph => 'Graph', | |
144 | idfirst => '_Perl_IDStart', | |
eba68aa0 | 145 | idcont => '_Perl_IDCont', |
ebdbc726 | 146 | lower => 'XPosixLower', |
e2efe419 KW |
147 | print => 'Print', |
148 | psxspc => 'XPosixSpace', | |
149 | punct => 'XPosixPunct', | |
150 | quotemeta => '_Perl_Quotemeta', | |
151 | space => 'XPerlSpace', | |
840f8e92 | 152 | vertws => 'VertSpace', |
ebdbc726 | 153 | upper => 'XPosixUpper', |
e2efe419 KW |
154 | xdigit => 'XDigit', |
155 | ); | |
156 | ||
34aeb2e9 | 157 | my %seen; |
5073ffbd KW |
158 | my @warnings; |
159 | local $SIG{__WARN__} = sub { push @warnings, @_ }; | |
160 | ||
da8c1a98 KW |
161 | my %utf8_param_code = ( |
162 | "_safe" => 0, | |
163 | "_safe, malformed" => 1, | |
34aeb2e9 | 164 | "deprecated unsafe" => -1, |
607313a1 | 165 | "deprecated mathoms" => -2, |
da8c1a98 | 166 | ); |
4acecd39 | 167 | |
8dc91396 KW |
168 | foreach my $name (sort keys %properties, 'octal') { |
169 | my @invlist; | |
170 | if ($name eq 'octal') { | |
171 | # Hand-roll an inversion list with 0-7 in it and nothing else. | |
172 | push @invlist, ord "0", ord "8"; | |
173 | } | |
174 | else { | |
9a009363 KW |
175 | my $property = $properties{$name}; |
176 | @invlist = prop_invlist($property, '_perl_core_internal_ok'); | |
177 | if (! @invlist) { | |
178 | ||
179 | # An empty return could mean an unknown property, or merely that | |
180 | # it is empty. Call in scalar context to differentiate | |
181 | if (! prop_invlist($property, '_perl_core_internal_ok')) { | |
182 | fail("No inversion list found for $property"); | |
183 | next; | |
184 | } | |
ebdbc726 | 185 | } |
e2efe419 KW |
186 | } |
187 | ||
188 | # Include all the Latin1 code points, plus 0x100. | |
189 | my @code_points = (0 .. 256); | |
190 | ||
191 | # Then include the next few boundaries above those from this property | |
192 | my $above_latins = 0; | |
193 | foreach my $range_start (@invlist) { | |
194 | next if $range_start < 257; | |
195 | push @code_points, $range_start - 1, $range_start; | |
196 | $above_latins++; | |
197 | last if $above_latins > 5; | |
198 | } | |
199 | ||
eba68aa0 KW |
200 | # This makes sure we are using the Perl definition of idfirst and idcont, |
201 | # and not the Unicode. There are a few differences. | |
202 | push @code_points, ord "\N{ESTIMATED SYMBOL}" if $name =~ /^id(first|cont)/; | |
2f4622f0 KW |
203 | if ($name eq "idcont") { # And some that are continuation but not start |
204 | push @code_points, ord("\N{GREEK ANO TELEIA}"), | |
205 | ord("\N{COMBINING GRAVE ACCENT}"); | |
206 | } | |
f91dcd13 | 207 | |
e2efe419 KW |
208 | # And finally one non-Unicode code point. |
209 | push @code_points, 0x110000; # Above Unicode, no prop should match | |
4f6ef7cc | 210 | no warnings 'non_unicode'; |
e2efe419 | 211 | |
2e1414ce KW |
212 | for my $j (@code_points) { |
213 | my $i = utf8::native_to_unicode($j); | |
c9c05358 | 214 | my $function = uc($name); |
c9c05358 | 215 | |
01a11ab9 KW |
216 | is (@warnings, 0, "Got no unexpected warnings in previous iteration") |
217 | or diag("@warnings"); | |
218 | undef @warnings; | |
219 | ||
51238c0c | 220 | my $matches = search_invlist(\@invlist, $j); |
e2efe419 KW |
221 | if (! defined $matches) { |
222 | $matches = 0; | |
223 | } | |
224 | else { | |
225 | $matches = truth(! ($matches % 2)); | |
226 | } | |
5073ffbd | 227 | |
e2efe419 | 228 | my $ret; |
4acecd39 | 229 | my $char_name = get_charname($j); |
51238c0c | 230 | my $display_name = sprintf "\\x{%02X, %s}", $j, $char_name; |
01a11ab9 | 231 | my $display_call = "is${function}( $display_name )"; |
e2efe419 | 232 | |
a7fe8528 KW |
233 | foreach my $suffix ("", "_A", "_L1", "_LC", "_uni", "_uvchr", |
234 | "_LC_uvchr", "_utf8", "_LC_utf8") | |
ee9e5f10 | 235 | { |
31f05a37 | 236 | |
01a11ab9 KW |
237 | # Not all possible macros have been defined |
238 | if ($name eq 'vertws') { | |
31f05a37 | 239 | |
01a11ab9 | 240 | # vertws is always all of Unicode |
a7fe8528 | 241 | next if $suffix !~ / ^ _ ( uni | uvchr | utf8 ) $ /x; |
981746b9 | 242 | } |
01a11ab9 | 243 | elsif ($name eq 'alnum') { |
981746b9 | 244 | |
a7fe8528 KW |
245 | # ALNUM_A, ALNUM_L1, and ALNUM_uvchr are not defined as these |
246 | # suffixes were added later, after WORDCHAR was created to be | |
247 | # a clearer synonym for ALNUM | |
248 | next if $suffix eq '_A' | |
249 | || $suffix eq '_L1' | |
250 | || $suffix eq '_uvchr'; | |
31a09021 | 251 | } |
8dc91396 | 252 | elsif ($name eq 'octal') { |
ee9e5f10 | 253 | next if $suffix ne "" && $suffix ne '_A' && $suffix ne '_L1'; |
8dc91396 | 254 | } |
42c03a9a KW |
255 | elsif ($name eq 'quotemeta') { |
256 | # There is only one macro for this, and is defined only for | |
257 | # Latin1 range | |
258 | next if $suffix ne "" | |
259 | } | |
a9aff56e | 260 | |
01a11ab9 KW |
261 | foreach my $locale ("", $base_locale, $utf8_locale) { |
262 | ||
263 | my ($display_locale, $locale_is_utf8) | |
264 | = get_display_locale_or_skip($locale, $suffix); | |
265 | next unless defined $display_locale; | |
266 | ||
267 | use if $locale, "locale"; | |
268 | POSIX::setlocale( &POSIX::LC_ALL, $locale) if $locale; | |
269 | ||
270 | if ($suffix !~ /utf8/) { # _utf8 has to handled specially | |
271 | my $display_call | |
272 | = "is${function}$suffix( $display_name )$display_locale"; | |
51238c0c | 273 | $ret = truth eval "test_is${function}$suffix($j)"; |
01a11ab9 KW |
274 | if (is ($@, "", "$display_call didn't give error")) { |
275 | my $truth = $matches; | |
276 | if ($truth) { | |
277 | ||
278 | # The single byte functions are false for | |
279 | # above-Latin1 | |
51238c0c | 280 | if ($j >= 256) { |
ee9e5f10 KW |
281 | $truth = 0 |
282 | if $suffix=~ / ^ ( _A | _L [1C] )? $ /x; | |
01a11ab9 | 283 | } |
51238c0c | 284 | elsif ( $i >= 128 |
42c03a9a KW |
285 | && $name ne 'quotemeta') |
286 | { | |
01a11ab9 | 287 | |
ee9e5f10 | 288 | # The no-suffix and _A functions are false |
01a11ab9 KW |
289 | # for non-ASCII. So are _LC functions on a |
290 | # non-UTF-8 locale | |
291 | $truth = 0 if $suffix eq "_A" | |
ee9e5f10 | 292 | || $suffix eq "" |
01a11ab9 KW |
293 | || ( $suffix =~ /LC/ |
294 | && ! $locale_is_utf8); | |
295 | } | |
296 | } | |
297 | ||
298 | is ($ret, $truth, "${tab}And correctly returns $truth"); | |
299 | } | |
300 | } | |
301 | else { # _utf8 suffix | |
51238c0c | 302 | my $char = chr($j); |
01a11ab9 KW |
303 | utf8::upgrade($char); |
304 | $char = quotemeta $char if $char eq '\\' || $char eq "'"; | |
305 | my $truth; | |
306 | if ( $suffix =~ /LC/ | |
307 | && ! $locale_is_utf8 | |
51238c0c KW |
308 | && $j < 256 |
309 | && $i >= 128) | |
01a11ab9 KW |
310 | { # The C-locale _LC function returns FALSE for Latin1 |
311 | # above ASCII | |
312 | $truth = 0; | |
313 | } | |
314 | else { | |
315 | $truth = $matches; | |
316 | } | |
31f05a37 | 317 | |
da8c1a98 KW |
318 | foreach my $utf8_param("_safe", |
319 | "_safe, malformed", | |
34aeb2e9 | 320 | "deprecated unsafe" |
da8c1a98 KW |
321 | ) |
322 | { | |
323 | my $utf8_param_code = $utf8_param_code{$utf8_param}; | |
324 | my $expect_error = $utf8_param_code > 0; | |
325 | next if $expect_error | |
34aeb2e9 KW |
326 | && ! try_malforming($i, $function, |
327 | $suffix =~ /LC/); | |
da8c1a98 KW |
328 | |
329 | my $display_call = "is${function}$suffix( $display_name" | |
330 | . ", $utf8_param )$display_locale"; | |
331 | $ret = truth eval "test_is${function}$suffix('$char'," | |
332 | . " $utf8_param_code)"; | |
333 | if ($expect_error) { | |
334 | isnt ($@, "", | |
335 | "expected and got error in $display_call"); | |
336 | like($@, qr/Malformed UTF-8 character/, | |
337 | "${tab}And got expected message"); | |
338 | if (is (@warnings, 1, | |
339 | "${tab}Got a single warning besides")) | |
340 | { | |
341 | like($warnings[0], | |
342 | qr/Malformed UTF-8 character.*short/, | |
343 | "${tab}Got expected warning"); | |
344 | } | |
345 | else { | |
346 | diag("@warnings"); | |
347 | } | |
348 | undef @warnings; | |
349 | } | |
350 | elsif (is ($@, "", "$display_call didn't give error")) { | |
01a11ab9 KW |
351 | is ($ret, $truth, |
352 | "${tab}And correctly returned $truth"); | |
34aeb2e9 KW |
353 | if ($utf8_param_code < 0) { |
354 | my $warnings_ok; | |
355 | my $unique_function = "is" . $function . $suffix; | |
356 | if (! $seen{$unique_function}++) { | |
357 | $warnings_ok = is(@warnings, 1, | |
358 | "${tab}This is first call to" | |
359 | . " $unique_function; Got a single" | |
360 | . " warning"); | |
361 | if ($warnings_ok) { | |
362 | $warnings_ok = like($warnings[0], | |
363 | qr/starting in Perl .* will require an additional parameter/, | |
364 | "${tab}The warning was the expected" | |
365 | . " deprecation one"); | |
366 | } | |
367 | } | |
368 | else { | |
369 | $warnings_ok = is(@warnings, 0, | |
370 | "${tab}This subsequent call to" | |
371 | . " $unique_function did not warn"); | |
372 | } | |
373 | $warnings_ok or diag("@warnings"); | |
374 | undef @warnings; | |
375 | } | |
01a11ab9 | 376 | } |
da8c1a98 | 377 | } |
01a11ab9 | 378 | } |
31a09021 | 379 | } |
8ff203a1 | 380 | } |
c9c05358 KW |
381 | } |
382 | } | |
383 | ||
2e1414ce | 384 | my %to_properties = ( |
01a11ab9 | 385 | FOLD => 'Case_Folding', |
2e1414ce KW |
386 | LOWER => 'Lowercase_Mapping', |
387 | TITLE => 'Titlecase_Mapping', | |
388 | UPPER => 'Uppercase_Mapping', | |
389 | ); | |
390 | ||
391 | ||
392 | foreach my $name (sort keys %to_properties) { | |
393 | my $property = $to_properties{$name}; | |
394 | my ($list_ref, $map_ref, $format, $missing) | |
395 | = prop_invmap($property, ); | |
396 | if (! $list_ref || ! $map_ref) { | |
397 | fail("No inversion map found for $property"); | |
398 | next; | |
399 | } | |
ebdbc726 | 400 | if ($format !~ / ^ a l? $ /x) { |
2e1414ce KW |
401 | fail("Unexpected inversion map format ('$format') found for $property"); |
402 | next; | |
403 | } | |
404 | ||
405 | # Include all the Latin1 code points, plus 0x100. | |
406 | my @code_points = (0 .. 256); | |
407 | ||
408 | # Then include the next few multi-char folds above those from this | |
409 | # property, and include the next few single folds as well | |
410 | my $above_latins = 0; | |
411 | my $multi_char = 0; | |
412 | for my $i (0 .. @{$list_ref} - 1) { | |
413 | my $range_start = $list_ref->[$i]; | |
414 | next if $range_start < 257; | |
415 | if (ref $map_ref->[$i] && $multi_char < 5) { | |
01a11ab9 KW |
416 | push @code_points, $range_start - 1 |
417 | if $code_points[-1] != $range_start - 1; | |
2e1414ce KW |
418 | push @code_points, $range_start; |
419 | $multi_char++; | |
420 | } | |
421 | elsif ($above_latins < 5) { | |
01a11ab9 KW |
422 | push @code_points, $range_start - 1 |
423 | if $code_points[-1] != $range_start - 1; | |
2e1414ce KW |
424 | push @code_points, $range_start; |
425 | $above_latins++; | |
426 | } | |
427 | last if $above_latins >= 5 && $multi_char >= 5; | |
428 | } | |
429 | ||
430 | # And finally one non-Unicode code point. | |
431 | push @code_points, 0x110000; # Above Unicode, no prop should match | |
432 | no warnings 'non_unicode'; | |
433 | ||
434 | # $j is native; $i unicode. | |
435 | for my $j (@code_points) { | |
436 | my $i = utf8::native_to_unicode($j); | |
437 | my $function = $name; | |
438 | ||
4acecd39 | 439 | my $index = search_invlist(\@{$list_ref}, $j); |
2e1414ce KW |
440 | |
441 | my $ret; | |
4acecd39 | 442 | my $char_name = get_charname($j); |
51238c0c | 443 | my $display_name = sprintf "\\N{U+%02X, %s}", $j, $char_name; |
2e1414ce | 444 | |
01a11ab9 | 445 | foreach my $suffix ("", "_L1", "_LC") { |
2e1414ce | 446 | |
01a11ab9 KW |
447 | # This is the only macro defined for L1 |
448 | next if $suffix eq "_L1" && $function ne "LOWER"; | |
2e1414ce | 449 | |
01a11ab9 KW |
450 | SKIP: |
451 | foreach my $locale ("", $base_locale, $utf8_locale) { | |
31f05a37 | 452 | |
01a11ab9 KW |
453 | # titlecase is not defined in locales. |
454 | next if $name eq 'TITLE' && $suffix eq "_LC"; | |
31f05a37 | 455 | |
01a11ab9 KW |
456 | my ($display_locale, $locale_is_utf8) |
457 | = get_display_locale_or_skip($locale, $suffix); | |
458 | next unless defined $display_locale; | |
31f05a37 | 459 | |
01a11ab9 KW |
460 | skip("to${name}_LC does not work for LATIN SMALL LETTER SHARP S" |
461 | . "$display_locale", 1) | |
462 | if $i == 0xDF && $name =~ / FOLD | UPPER /x | |
463 | && $suffix eq "_LC" && $locale_is_utf8; | |
464 | ||
465 | use if $locale, "locale"; | |
466 | POSIX::setlocale( &POSIX::LC_ALL, $locale) if $locale; | |
467 | ||
468 | my $display_call = "to${function}$suffix(" | |
469 | . " $display_name )$display_locale"; | |
470 | $ret = eval "test_to${function}$suffix($j)"; | |
471 | if (is ($@, "", "$display_call didn't give error")) { | |
472 | my $should_be; | |
51238c0c | 473 | if ($j > 255) { |
01a11ab9 KW |
474 | $should_be = $j; |
475 | } | |
51238c0c | 476 | elsif ( $i > 127 |
01a11ab9 KW |
477 | && ( $suffix eq "" |
478 | || ($suffix eq "_LC" && ! $locale_is_utf8))) | |
479 | { | |
480 | $should_be = $j; | |
481 | } | |
482 | elsif ($map_ref->[$index] != $missing) { | |
483 | $should_be = $map_ref->[$index] + $j - $list_ref->[$index] | |
31f05a37 KW |
484 | } |
485 | else { | |
01a11ab9 | 486 | $should_be = $j; |
31f05a37 | 487 | } |
01a11ab9 KW |
488 | |
489 | is ($ret, $should_be, | |
490 | sprintf("${tab}And correctly returned 0x%02X", | |
491 | $should_be)); | |
31f05a37 | 492 | } |
2e1414ce KW |
493 | } |
494 | } | |
495 | ||
a7fe8528 KW |
496 | # The _uni, uvchr, and _utf8 functions return both the ordinal of the |
497 | # first code point of the result, and the result in utf8. The .xs | |
498 | # tests return these in an array, in [0] and [1] respectively, with | |
499 | # [2] the length of the utf8 in bytes. | |
2e1414ce KW |
500 | my $utf8_should_be = ""; |
501 | my $first_ord_should_be; | |
502 | if (ref $map_ref->[$index]) { # A multi-char result | |
503 | for my $j (0 .. @{$map_ref->[$index]} - 1) { | |
504 | $utf8_should_be .= chr $map_ref->[$index][$j]; | |
505 | } | |
506 | ||
507 | $first_ord_should_be = $map_ref->[$index][0]; | |
508 | } | |
509 | else { # A single-char result | |
510 | $first_ord_should_be = ($map_ref->[$index] != $missing) | |
01a11ab9 KW |
511 | ? $map_ref->[$index] + $j |
512 | - $list_ref->[$index] | |
2e1414ce KW |
513 | : $j; |
514 | $utf8_should_be = chr $first_ord_should_be; | |
515 | } | |
516 | utf8::upgrade($utf8_should_be); | |
517 | ||
a7fe8528 KW |
518 | # Test _uni, uvchr |
519 | foreach my $suffix ('_uni', '_uvchr') { | |
c5546fac KW |
520 | my $s; |
521 | my $len; | |
522 | my $display_call = "to${function}$suffix( $display_name )"; | |
523 | $ret = eval "test_to${function}$suffix($j)"; | |
524 | if (is ($@, "", "$display_call didn't give error")) { | |
525 | is ($ret->[0], $first_ord_should_be, | |
526 | sprintf("${tab}And correctly returned 0x%02X", | |
527 | $first_ord_should_be)); | |
528 | is ($ret->[1], $utf8_should_be, "${tab}Got correct utf8"); | |
529 | use bytes; | |
530 | is ($ret->[2], length $utf8_should_be, | |
531 | "${tab}Got correct number of bytes for utf8 length"); | |
532 | } | |
a7fe8528 | 533 | } |
2e1414ce KW |
534 | |
535 | # Test _utf8 | |
536 | my $char = chr($j); | |
537 | utf8::upgrade($char); | |
538 | $char = quotemeta $char if $char eq '\\' || $char eq "'"; | |
a239b1e2 KW |
539 | foreach my $utf8_param("_safe", |
540 | "_safe, malformed", | |
607313a1 KW |
541 | "deprecated unsafe", |
542 | "deprecated mathoms", | |
a239b1e2 | 543 | ) |
01a11ab9 | 544 | { |
607313a1 KW |
545 | use Config; |
546 | next if $utf8_param eq 'deprecated mathoms' | |
547 | && $Config{'ccflags'} =~ /-DNO_MATHOMS/; | |
548 | ||
a239b1e2 KW |
549 | my $utf8_param_code = $utf8_param_code{$utf8_param}; |
550 | my $expect_error = $utf8_param_code > 0; | |
551 | ||
552 | # Skip if can't malform (because is a UTF-8 invariant) | |
553 | next if $expect_error && $i < ((ord "A" == 65) ? 128 : 160); | |
554 | ||
555 | my $display_call = "to${function}_utf8($display_name, $utf8_param )"; | |
556 | $ret = eval "test_to${function}_utf8('$char', $utf8_param_code)"; | |
557 | if ($expect_error) { | |
558 | isnt ($@, "", "expected and got error in $display_call"); | |
559 | like($@, qr/Malformed UTF-8 character/, | |
560 | "${tab}And got expected message"); | |
561 | undef @warnings; | |
562 | } | |
563 | elsif (is ($@, "", "$display_call didn't give error")) { | |
01a11ab9 KW |
564 | is ($ret->[0], $first_ord_should_be, |
565 | sprintf("${tab}And correctly returned 0x%02X", | |
566 | $first_ord_should_be)); | |
567 | is ($ret->[1], $utf8_should_be, "${tab}Got correct utf8"); | |
568 | use bytes; | |
569 | is ($ret->[2], length $utf8_should_be, | |
570 | "${tab}Got correct number of bytes for utf8 length"); | |
607313a1 KW |
571 | if ($utf8_param_code < 0) { |
572 | my $warnings_ok; | |
573 | if (! $seen{"${function}_utf8$utf8_param"}++) { | |
574 | $warnings_ok = is(@warnings, 1, | |
575 | "${tab}Got a single warning"); | |
576 | if ($warnings_ok) { | |
577 | my $expected; | |
578 | if ($utf8_param_code == -2) { | |
579 | my $lc_func = lc $function; | |
580 | $expected | |
581 | = qr/starting in Perl .* to_utf8_$lc_func\(\) will be removed/; | |
582 | } | |
583 | else { | |
584 | $expected | |
585 | = qr/starting in Perl .* will require an additional parameter/; | |
586 | } | |
587 | $warnings_ok = like($warnings[0], $expected, | |
588 | "${tab}Got expected deprecation warning"); | |
589 | } | |
590 | } | |
591 | else { | |
592 | $warnings_ok = is(@warnings, 0, | |
593 | "${tab}Deprecation warned only the one time"); | |
594 | } | |
595 | $warnings_ok or diag("@warnings"); | |
596 | undef @warnings; | |
597 | } | |
01a11ab9 | 598 | } |
2e1414ce | 599 | } |
2e1414ce KW |
600 | } |
601 | } | |
602 | ||
5073ffbd | 603 | # This is primarily to make sure that no non-Unicode warnings get generated |
01a11ab9 KW |
604 | is(scalar @warnings, 0, "No unexpected warnings were generated in the tests") |
605 | or diag @warnings; | |
5073ffbd | 606 | |
bdd8600f | 607 | done_testing; |