lib/Unicode/UCD.pm: Clarify pod

[perl5.git] / lib / Unicode / UCD.pm
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm

index 81d729e..2ec57ad 100644 (file)
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -5,7 +5,7 @@ use warnings;
  no warnings 'surrogate';    # surrogates can be inputs to this
  use charnames ();
  
-our $VERSION = '0.44';
+our $VERSION = '0.49';
  
  require Exporter;
  
@@ -1006,12 +1006,12 @@ dotless lowercase i:
  
  =over
  
-=item B<*> If you use this C<I> mapping
+=item Z<>B<*> If you use this C<I> mapping
  
  the result is case-insensitive,
  but dotless and dotted I's are not distinguished
  
-=item B<*> If you exclude this C<I> mapping
+=item Z<>B<*> If you exclude this C<I> mapping
  
  the result is not fully case-insensitive, but
  dotless and dotted I's are distinguished
@@ -2017,7 +2017,8 @@ by the input parameter string:
   prints:
   0, 1114112
  
-An empty list is returned if the input is unknown; the number of elements in
+If the input is unknown C<undef> is returned in scalar context; an empty-list
+in list context.  If the input is known, the number of elements in
  the list is returned if called in scalar context.
  
  L<perluniprops|perluniprops/Properties accessible through \p{} and \P{}> gives
@@ -2127,8 +2128,12 @@ properties, and will return C<undef> if called with one of those.
  our %loose_defaults;
  our $MAX_UNICODE_CODEPOINT;
  
-sub prop_invlist ($) {
+sub prop_invlist ($;$) {
      my $prop = $_[0];
+
+    # Undocumented way to get at Perl internal properties
+    my $internal_ok = defined $_[1] && $_[1] eq '_perl_core_internal_ok';
+
      return if ! defined $prop;
  
      require "utf8_heavy.pl";
@@ -2145,7 +2150,7 @@ sub prop_invlist ($) {
                || ref $swash eq ""
                || $swash->{'BITS'} != 1
                || $swash->{'USER_DEFINED'}
-              || $prop =~ /^\s*_/;
+              || (! $internal_ok && $prop =~ /^\s*_/);
  
      if ($swash->{'EXTRAS'}) {
          carp __PACKAGE__, "::prop_invlist: swash returned for $prop unexpectedly has EXTRAS magic";
@@ -2250,7 +2255,8 @@ sub prop_invlist ($) {
  
  sub _search_invlist {
      # Find the range in the inversion list which contains a code point; that
-    # is, find i such that l[i] <= code_point < l[i+1]
+    # is, find i such that l[i] <= code_point < l[i+1].  Returns undef if no
+    # such i.
  
      # If this is ever made public, could use to speed up .t specials.  Would
      # need to use code point argument, as in other functions in this pm
@@ -2260,7 +2266,10 @@ sub _search_invlist {
      # Verify non-neg numeric  XXX
  
      my $max_element = @$list_ref - 1;
-    return if ! $max_element < 0;     # Undef if list is empty.
+
+    # Return undef if list is empty or requested item is before the first element.
+    return if $max_element < 0;
+    return if $code_point < $list_ref->[0];
  
      # Short cut something at the far-end of the table.  This also allows us to
      # refer to element [$i+1] without fear of being out-of-bounds in the loop
@@ -3531,7 +3540,8 @@ sub UnicodeVersion {
  The difference between a block and a script is that scripts are closer
  to the linguistic notion of a set of code points required to present
  languages, while block is more of an artifact of the Unicode code point
-numbering and separation into blocks of (mostly) 256 code points.
+numbering and separation into blocks of consecutive code points (so far the
+size of a block is some multiple of 16, like 128 or 256).
  
  For example the Latin B<script> is spread over several B<blocks>, such
  as C<Basic Latin>, C<Latin 1 Supplement>, C<Latin Extended-A>, and