This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add tests for case-change macros API
authorKarl Williamson <public@khwilliamson.com>
Mon, 20 May 2013 16:39:56 +0000 (10:39 -0600)
committerKarl Williamson <public@khwilliamson.com>
Mon, 20 May 2013 17:01:50 +0000 (11:01 -0600)
ext/XS-APItest/APItest.pm
ext/XS-APItest/APItest.xs
ext/XS-APItest/t/handy.t

index 410477a..b87c4e1 100644 (file)
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.51';
+our $VERSION = '0.52';
 
 require XSLoader;
 
index dbb4f50..3f76dd7 100644 (file)
@@ -4443,3 +4443,227 @@ test_isQUOTEMETA(UV ord)
         RETVAL = _isQUOTEMETA(ord);
     OUTPUT:
         RETVAL
+
+UV
+test_toLOWER(UV ord)
+    CODE:
+        RETVAL = toLOWER(ord);
+    OUTPUT:
+        RETVAL
+
+UV
+test_toLOWER_L1(UV ord)
+    CODE:
+        RETVAL = toLOWER_L1(ord);
+    OUTPUT:
+        RETVAL
+
+UV
+test_toLOWER_LC(UV ord)
+    CODE:
+        RETVAL = toLOWER_LC(ord);
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toLOWER_uni(UV ord)
+    PREINIT:
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        av = newAV();
+        av_push(av, newSVuv(toLOWER_uni(ord, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toLOWER_utf8(SV * p)
+    PREINIT:
+        U8 *input;
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        input = (U8 *) SvPV(p, len);
+        av = newAV();
+        av_push(av, newSVuv(toLOWER_utf8(input, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+UV
+test_toFOLD(UV ord)
+    CODE:
+        RETVAL = toFOLD(ord);
+    OUTPUT:
+        RETVAL
+
+UV
+test_toFOLD_LC(UV ord)
+    CODE:
+        RETVAL = toFOLD_LC(ord);
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toFOLD_uni(UV ord)
+    PREINIT:
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        av = newAV();
+        av_push(av, newSVuv(toFOLD_uni(ord, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toFOLD_utf8(SV * p)
+    PREINIT:
+        U8 *input;
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        input = (U8 *) SvPV(p, len);
+        av = newAV();
+        av_push(av, newSVuv(toFOLD_utf8(input, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+UV
+test_toUPPER(UV ord)
+    CODE:
+        RETVAL = toUPPER(ord);
+    OUTPUT:
+        RETVAL
+
+UV
+test_toUPPER_LC(UV ord)
+    CODE:
+        RETVAL = toUPPER_LC(ord);
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toUPPER_uni(UV ord)
+    PREINIT:
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        av = newAV();
+        av_push(av, newSVuv(toUPPER_uni(ord, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toUPPER_utf8(SV * p)
+    PREINIT:
+        U8 *input;
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        input = (U8 *) SvPV(p, len);
+        av = newAV();
+        av_push(av, newSVuv(toUPPER_utf8(input, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+UV
+test_toTITLE(UV ord)
+    CODE:
+        RETVAL = toTITLE(ord);
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toTITLE_uni(UV ord)
+    PREINIT:
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        av = newAV();
+        av_push(av, newSVuv(toTITLE_uni(ord, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
+
+AV *
+test_toTITLE_utf8(SV * p)
+    PREINIT:
+        U8 *input;
+        U8 s[UTF8_MAXBYTES_CASE + 1];
+        STRLEN len;
+        AV *av;
+        SV *utf8;
+    CODE:
+        input = (U8 *) SvPV(p, len);
+        av = newAV();
+        av_push(av, newSVuv(toTITLE_utf8(input, s, &len)));
+
+        utf8 = newSVpvn((char *) s, len);
+        SvUTF8_on(utf8);
+        av_push(av, utf8);
+
+        av_push(av, newSVuv(len));
+        RETVAL = av;
+    OUTPUT:
+        RETVAL
index eb620ec..84ffb02 100644 (file)
@@ -6,7 +6,7 @@ use Config;
 
 use XS::APItest;
 
-use Unicode::UCD qw(prop_invlist);
+use Unicode::UCD qw(prop_invlist prop_invmap);
 
 sub truth($) {  # Converts values so is() works
     return (shift) ? 1 : 0;
@@ -94,7 +94,8 @@ foreach my $name (sort keys %properties) {
     push @code_points, 0x110000;    # Above Unicode, no prop should match
     no warnings 'non_unicode';
 
-    for my $i (@code_points) {
+    for my $j (@code_points) {
+        my $i = utf8::native_to_unicode($j);
         my $function = uc($name);
 
         my $matches = Unicode::UCD::_search_invlist(\@invlist, $i);
@@ -206,6 +207,160 @@ foreach my $name (sort keys %properties) {
     }
 }
 
+my %to_properties = (
+                FOLD => 'Case_Folding',
+                LOWER => 'Lowercase_Mapping',
+                TITLE => 'Titlecase_Mapping',
+                UPPER => 'Uppercase_Mapping',
+            );
+
+
+foreach my $name (sort keys %to_properties) {
+    my $property = $to_properties{$name};
+    my ($list_ref, $map_ref, $format, $missing)
+                                      = prop_invmap($property, );
+    if (! $list_ref || ! $map_ref) {
+        fail("No inversion map found for $property");
+        next;
+    }
+    if ($format ne "al") {
+        fail("Unexpected inversion map format ('$format') found for $property");
+        next;
+    }
+
+    # Include all the Latin1 code points, plus 0x100.
+    my @code_points = (0 .. 256);
+
+    # Then include the next few multi-char folds above those from this
+    # property, and include the next few single folds as well
+    my $above_latins = 0;
+    my $multi_char = 0;
+    for my $i (0 .. @{$list_ref} - 1) {
+        my $range_start = $list_ref->[$i];
+        next if $range_start < 257;
+        if (ref $map_ref->[$i] && $multi_char < 5)  {
+            push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
+            push @code_points, $range_start;
+            $multi_char++;
+        }
+        elsif ($above_latins < 5) {
+            push @code_points, $range_start - 1 if $code_points[-1] != $range_start - 1;
+            push @code_points, $range_start;
+            $above_latins++;
+        }
+        last if $above_latins >= 5 && $multi_char >= 5;
+    }
+
+    # And finally one non-Unicode code point.
+    push @code_points, 0x110000;    # Above Unicode, no prop should match
+    no warnings 'non_unicode';
+
+    # $j is native; $i unicode.
+    for my $j (@code_points) {
+        my $i = utf8::native_to_unicode($j);
+        my $function = $name;
+
+        my $index = Unicode::UCD::_search_invlist(\@{$list_ref}, $j);
+
+        my $ret;
+        my $char_name = charnames::viacode($j) // "No name";
+        my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
+
+        # Test the base function
+        $ret = eval "test_to${function}($j)";
+        if ($@) {
+            fail($@);
+        }
+        else {
+            my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
+                             ? $map_ref->[$index] + $j - $list_ref->[$index]
+                             : $j;
+            is ($ret, $should_be, sprintf("to${function}( $display_name ) == 0x%02X", $should_be));
+        }
+
+        # Test _L1
+        if ($name eq 'LOWER') {
+            $ret = eval "test_to${function}_L1($j)";
+            if ($@) {
+                fail($@);
+            }
+            else {
+                my $should_be = ($i < 256 && $map_ref->[$index] != $missing)
+                                ? $map_ref->[$index] + $j - $list_ref->[$index]
+                                : $j;
+                is ($ret, $should_be, sprintf("to${function}_L1( $display_name ) == 0x%02X", $should_be));
+            }
+        }
+
+        if ($name ne 'TITLE' && defined $locale) {
+            require locale; import locale;
+
+            # Test _LC;  titlecase is not defined in locales.
+            $ret = eval "test_to${function}_LC($j)";
+            if ($@) {
+                fail($@);
+            }
+            else {
+                my $should_be = ($i < 128 && $map_ref->[$index] != $missing)
+                                ? $map_ref->[$index] + $j - $list_ref->[$index]
+                                : $j;
+                is ($ret, $should_be, sprintf("to${function}_LC( $display_name ) == 0x%02X", $should_be));
+            }
+        }
+
+        # The _uni and _utf8 functions return both the ordinal of the first
+        # code point of the result, and the result in utf8.  The .xs tests
+        # return these in an array, in [0] and [1] respectively, with [2] the
+        # length of the utf8 in bytes.
+        my $utf8_should_be = "";
+        my $first_ord_should_be;
+        if (ref $map_ref->[$index]) {   # A multi-char result
+            for my $j (0 .. @{$map_ref->[$index]} - 1) {
+                $utf8_should_be .= chr $map_ref->[$index][$j];
+            }
+
+            $first_ord_should_be = $map_ref->[$index][0];
+        }
+        else {  # A single-char result
+            $first_ord_should_be = ($map_ref->[$index] != $missing)
+                                    ? $map_ref->[$index] + $j - $list_ref->[$index]
+                                    : $j;
+            $utf8_should_be = chr $first_ord_should_be;
+        }
+        utf8::upgrade($utf8_should_be);
+
+        # Test _uni
+        my $s;
+        my $len;
+        $ret = eval "test_to${function}_uni($j)";
+        if ($@) {
+            fail($@);
+        }
+        else {
+            is ($ret->[0], $first_ord_should_be, sprintf("to${function}_uni( $display_name ) == 0x%02X", $first_ord_should_be));
+            is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_uni( $display_name )"));
+            use bytes;
+            is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
+        }
+
+        # Test _utf8
+        my $char = chr($j);
+        utf8::upgrade($char);
+        $char = quotemeta $char if $char eq '\\' || $char eq "'";
+        $ret = eval "test_to${function}_utf8('$char')";
+        if ($@) {
+            fail($@);
+        }
+        else {
+            is ($ret->[0], $first_ord_should_be, sprintf("to${function}_utf8( $display_name ) == 0x%02X", $first_ord_should_be));
+            is ($ret->[1], $utf8_should_be, sprintf("utf8 of to${function}_utf8( $display_name )"));
+            use bytes;
+            is ($ret->[2], length $utf8_should_be, sprintf("number of bytes in utf8 of to${function}_uni( $display_name )"));
+        }
+
+    }
+}
+
 # This is primarily to make sure that no non-Unicode warnings get generated
 is(scalar @warnings, 0, "No warnings were generated " . join ", ", @warnings);