RETVAL = _isQUOTEMETA(ord);
OUTPUT:
RETVAL
+
+UV
+test_toLOWER(UV ord)
+ CODE:
+ RETVAL = toLOWER(ord);
+ OUTPUT:
+ RETVAL
+
+UV
+test_toLOWER_L1(UV ord)
+ CODE:
+ RETVAL = toLOWER_L1(ord);
+ OUTPUT:
+ RETVAL
+
+UV
+test_toLOWER_LC(UV ord)
+ CODE:
+ RETVAL = toLOWER_LC(ord);
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toLOWER_uni(UV ord)
+ PREINIT:
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ av = newAV();
+ av_push(av, newSVuv(toLOWER_uni(ord, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toLOWER_utf8(SV * p)
+ PREINIT:
+ U8 *input;
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ input = (U8 *) SvPV(p, len);
+ av = newAV();
+ av_push(av, newSVuv(toLOWER_utf8(input, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+UV
+test_toFOLD(UV ord)
+ CODE:
+ RETVAL = toFOLD(ord);
+ OUTPUT:
+ RETVAL
+
+UV
+test_toFOLD_LC(UV ord)
+ CODE:
+ RETVAL = toFOLD_LC(ord);
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toFOLD_uni(UV ord)
+ PREINIT:
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ av = newAV();
+ av_push(av, newSVuv(toFOLD_uni(ord, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toFOLD_utf8(SV * p)
+ PREINIT:
+ U8 *input;
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ input = (U8 *) SvPV(p, len);
+ av = newAV();
+ av_push(av, newSVuv(toFOLD_utf8(input, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+UV
+test_toUPPER(UV ord)
+ CODE:
+ RETVAL = toUPPER(ord);
+ OUTPUT:
+ RETVAL
+
+UV
+test_toUPPER_LC(UV ord)
+ CODE:
+ RETVAL = toUPPER_LC(ord);
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toUPPER_uni(UV ord)
+ PREINIT:
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ av = newAV();
+ av_push(av, newSVuv(toUPPER_uni(ord, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toUPPER_utf8(SV * p)
+ PREINIT:
+ U8 *input;
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ input = (U8 *) SvPV(p, len);
+ av = newAV();
+ av_push(av, newSVuv(toUPPER_utf8(input, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+UV
+test_toTITLE(UV ord)
+ CODE:
+ RETVAL = toTITLE(ord);
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toTITLE_uni(UV ord)
+ PREINIT:
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ av = newAV();
+ av_push(av, newSVuv(toTITLE_uni(ord, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
+
+AV *
+test_toTITLE_utf8(SV * p)
+ PREINIT:
+ U8 *input;
+ U8 s[UTF8_MAXBYTES_CASE + 1];
+ STRLEN len;
+ AV *av;
+ SV *utf8;
+ CODE:
+ input = (U8 *) SvPV(p, len);
+ av = newAV();
+ av_push(av, newSVuv(toTITLE_utf8(input, s, &len)));
+
+ utf8 = newSVpvn((char *) s, len);
+ SvUTF8_on(utf8);
+ av_push(av, utf8);
+
+ av_push(av, newSVuv(len));
+ RETVAL = av;
+ OUTPUT:
+ RETVAL
use XS::APItest;
-use Unicode::UCD qw(prop_invlist);
+use Unicode::UCD qw(prop_invlist prop_invmap);
sub truth($) { # Converts values so is() works
return (shift) ? 1 : 0;
push @code_points, 0x110000; # Above Unicode, no prop should match
no warnings 'non_unicode';
- for my $i (@code_points) {
+ for my $j (@code_points) {
+ my $i = utf8::native_to_unicode($j);
my $function = uc($name);
my $matches = Unicode::UCD::_search_invlist(\@invlist, $i);
}
}
+my %to_properties = (
+ FOLD => 'Case_Folding',
+ LOWER => 'Lowercase_Mapping',
+ TITLE => 'Titlecase_Mapping',
+ UPPER => 'Uppercase_Mapping',
+ );
+
+
+foreach my $name (sort keys %to_properties) {
+ my $property = $to_properties{$name};
+ my ($list_ref, $map_ref, $format, $missing)
+ = prop_invmap($property, );
+ if (! $list_ref || ! $map_ref) {
+ fail("No inversion map found for $property");
+ next;
+ }
+ if ($format ne "al") {
+ fail("Unexpected inversion map format ('$format') found for $property");
+ next;
+ }
+
+ # Include all the Latin1 code points, plus 0x100.
+ my @code_points = (0 .. 256);
+
+ # Then include the next few multi-char folds above those from this
+ # property, and include the next few single folds as well
+ my $above_latins = 0;
+ my $multi_char = 0;
+ for my $i (0 .. @{$list_ref} - 1) {
+ my $range_start = $list_ref->[$i];
+ next if $range_start < 257;
+ if (ref $map_ref->[$i] && $multi_char < 5) {
+ push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
+ push @code_points, $range_start;
+ $multi_char++;
+ }
+ elsif ($above_latins < 5) {
+ push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
+ push @code_points, $range_start;
+ $above_latins++;
+ }
+ last if $above_latins >= 5 && $multi_char >= 5;
+ }
+
+ # And finally one non-Unicode code point.
+ push @code_points, 0x110000; # Above Unicode, no prop should match
+ no warnings 'non_unicode';
+
+ # $j is native; $i unicode.
+ for my $j (@code_points) {
+ my $i = utf8::native_to_unicode($j);
+ my $function = $name;
+
+ my $index = Unicode::UCD::_search_invlist(\@{$list_ref}, $j);
+
+ my $ret;
+ my $char_name = charnames::viacode($j) // "No name";
+ my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
+
+ # Test the base function
+ $ret = eval "test_to${function}($j)";
+ if ($@) {
+ fail($@);
+ }
+ else {
+ my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
+ ? $map_ref->[$index] + $j - $list_ref->[$index]
+ : $j;
+ is ($ret, $should_be, sprintf("to${function}( $display_name ) == 0x%02X", $should_be));
+ }
+
+ # Test _L1
+ if ($name eq 'LOWER') {
+ $ret = eval "test_to${function}_L1($j)";
+ if ($@) {
+ fail($@);
+ }
+ else {
+ my $should_be = ($i < 256 && $map_ref->[$index] != $missing)
+ ? $map_ref->[$index] + $j - $list_ref->[$index]
+ : $j;
+ is ($ret, $should_be, sprintf("to${function}_L1( $display_name ) == 0x%02X", $should_be));
+ }
+ }
+
+ if ($name ne 'TITLE' && defined $locale) {
+ require locale; import locale;
+
+ # Test _LC; titlecase is not defined in locales.
+ $ret = eval "test_to${function}_LC($j)";
+ if ($@) {
+ fail($@);
+ }
+ else {
+ my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
+ ? $map_ref->[$index] + $j - $list_ref->[$index]
+ : $j;
+ is ($ret, $should_be, sprintf("to${function}_LC( $display_name ) == 0x%02X", $should_be));
+ }
+ }
+
+ # The _uni and _utf8 functions return both the ordinal of the first
+ # code point of the result, and the result in utf8. The .xs tests
+ # return these in an array, in [0] and [1] respectively, with [2] the
+ # length of the utf8 in bytes.
+ my $utf8_should_be = "";
+ my $first_ord_should_be;
+ if (ref $map_ref->[$index]) { # A multi-char result
+ for my $j (0 .. @{$map_ref->[$index]} - 1) {
+ $utf8_should_be .= chr $map_ref->[$index][$j];
+ }
+
+ $first_ord_should_be = $map_ref->[$index][0];
+ }
+ else { # A single-char result
+ $first_ord_should_be = ($map_ref->[$index] != $missing)
+ ? $map_ref->[$index] + $j - $list_ref->[$index]
+ : $j;
+ $utf8_should_be = chr $first_ord_should_be;
+ }
+ utf8::upgrade($utf8_should_be);
+
+ # Test _uni
+ my $s;
+ my $len;
+ $ret = eval "test_to${function}_uni($j)";
+ if ($@) {
+ fail($@);
+ }
+ else {
+ is ($ret->[0], $first_ord_should_be, sprintf("to${function}_uni( $display_name ) == 0x%02X", $first_ord_should_be));
+ is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_uni( $display_name )"));
+ use bytes;
+ is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
+ }
+
+ # Test _utf8
+ my $char = chr($j);
+ utf8::upgrade($char);
+ $char = quotemeta $char if $char eq '\\' || $char eq "'";
+ $ret = eval "test_to${function}_utf8('$char')";
+ if ($@) {
+ fail($@);
+ }
+ else {
+ is ($ret->[0], $first_ord_should_be, sprintf("to${function}_utf8( $display_name ) == 0x%02X", $first_ord_should_be));
+ is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_utf8( $display_name )"));
+ use bytes;
+ is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
+ }
+
+ }
+}
+
# This is primarily to make sure that no non-Unicode warnings get generated
is(scalar @warnings, 0, "No warnings were generated " . join ", ", @warnings);