Add all_casefolds()
authorKarl Williamson <public@khwilliamson.com>
Wed, 28 Mar 2012 13:50:12 +0000 (07:50 -0600)
committerKarl Williamson <public@khwilliamson.com>
Sat, 2 Jun 2012 14:29:16 +0000 (08:29 -0600)
This function returns the entire structure that casefold() builds.  It
is useful for a .t.

lib/Unicode/UCD.pm
pod/perldelta.pod

index 3dbd059..67bedd0 100644 (file)
@@ -17,7 +17,7 @@ our @EXPORT_OK = qw(charinfo
                    charinrange
                    general_categories bidi_types
                    compexcl
-                   casefold casespec
+                   casefold all_casefolds casespec
                    namedseq
                     num
                     prop_aliases
@@ -41,6 +41,9 @@ Unicode::UCD - Unicode character database
     use Unicode::UCD 'casefold';
     my $casefold = casefold(0xFB00);
 
+    use Unicode::UCD 'all_casefolds';
+    my $all_casefolds_ref = all_casefolds();
+
     use Unicode::UCD 'casespec';
     my $casespec = casespec(0xFB00);
 
@@ -1153,6 +1156,55 @@ sub casefold {
     return $CASEFOLD{$code};
 }
 
+=head2 B<all_casefolds()>
+
+
+    use Unicode::UCD 'all_casefolds';
+
+    my $all_folds_ref = all_casefolds();
+    foreach my $char_with_casefold (sort { $a <=> $b }
+                                    keys %$all_folds_ref)
+    {
+        printf "%04X:", $char_with_casefold;
+        my $casefold = $all_folds_ref->{$char_with_casefold};
+
+        # Get folds for $char_with_casefold
+
+        my @full_fold_hex = split / /, $casefold->{'full'};
+        my $full_fold_string =
+                    join "", map {chr(hex($_))} @full_fold_hex;
+        print " full=", join " ", @full_fold_hex;
+        my @turkic_fold_hex =
+                        split / /, ($casefold->{'turkic'} ne "")
+                                        ? $casefold->{'turkic'}
+                                        : $casefold->{'full'};
+        my $turkic_fold_string =
+                        join "", map {chr(hex($_))} @turkic_fold_hex;
+        print "; turkic=", join " ", @turkic_fold_hex;
+        if (defined $casefold && $casefold->{'simple'} ne "") {
+            my $simple_fold_hex = $casefold->{'simple'};
+            my $simple_fold_string = chr(hex($simple_fold_hex));
+            print "; simple=$simple_fold_hex";
+        }
+        print "\n";
+    }
+
+This returns all the case foldings in the current version of Unicode in the
+form of a reference to a hash.  Each key to the hash is the decimal
+representation of a Unicode character that has a casefold to other than
+itself.  The casefold of a semi-colon is itself, so it isn't in the hash;
+likewise for a lowercase "a", but there is an entry for a capital "A".  The
+hash value for each key is another hash, identical to what is returned by
+L</casefold()> if called with that code point as its argument.  So the value
+C<< all_casefolds()->{ord("A")}' >> is equivalent to C<casefold(ord("A"))>;
+
+=cut
+
+sub all_casefolds () {
+    _casefold() unless %CASEFOLD;
+    return _dclone \%CASEFOLD;
+}
+
 =head2 B<casespec()>
 
     use Unicode::UCD 'casespec';
index b05718c..f49aaff 100644 (file)
@@ -101,6 +101,12 @@ L<IO> has been upgraded from version 1.25_06 to version 1.25_07.
 
 C<sync()> can now be called on read only file handles [perl #64772].
 
+=item *
+
+L<Unicode::UCD> has been upgraded from version 0.43 to version 0.44.
+This adds a function L<all_casefolds()|Unicode::UCD/all_casefolds()>
+that returns all the casefolds.
+
 =back
 
 =head2 Removed Modules and Pragmata