Present_In => 0, # Suppress, as easily computed from Age
Block => 0, # Suppress, as Blocks.txt is retained.
+
+ # Suppress, as mapping can be found instead from the
+ # Perl_Decomposition_Mapping file
+ Decomposition_Type => 0,
);
# Properties that this program ignores.
'Other_Lowercase' => $contributory,
'Other_Math' => $contributory,
'Other_Uppercase' => $contributory,
+ 'Expands_On_NFC' => $why_no_expand,
+ 'Expands_On_NFD' => $why_no_expand,
+ 'Expands_On_NFKC' => $why_no_expand,
+ 'Expands_On_NFKD' => $why_no_expand,
);
%why_suppressed = (
'Name_Alias' => "Accessible via 'use charnames;'",
FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful',
- Expands_On_NFC => $why_no_expand,
- Expands_On_NFD => $why_no_expand,
- Expands_On_NFKC => $why_no_expand,
- Expands_On_NFKD => $why_no_expand,
);
# The following are suppressed because they were made contributory or
# deprecated by Unicode before Perl ever thought about supporting them.
- foreach my $property ('Jamo_Short_Name', 'Grapheme_Link') {
+ foreach my $property ('Jamo_Short_Name',
+ 'Grapheme_Link',
+ 'Expands_On_NFC',
+ 'Expands_On_NFD',
+ 'Expands_On_NFKC',
+ 'Expands_On_NFKD'
+ ) {
$why_suppressed{$property} = $why_deprecated{$property};
}
if ($v_version ge 5.2.0 && $v_version lt 6.0.0) {
$why_obsolete{'ISO_Comment'} = 'Code points for it have been removed';
if ($v_version ge 6.0.0) {
- $why_deprecated{'ISO_Comment'} = 'No longer needed for chart generation; otherwise not useful, and code points for it have been removed';
+ $why_deprecated{'ISO_Comment'} = 'No longer needed for Unicode\'s internal chart generation; otherwise not useful, and code points for it have been removed';
}
}
# Below are files that Unicode furnishes, but this program ignores, and why
my %ignored_files = (
- 'CJKRadicals.txt' => 'Unihan data',
- 'Index.txt' => 'An index, not actual data',
- 'NamedSqProv.txt' => 'Not officially part of the Unicode standard; Append it to NamedSequences.txt if you want to process the contents.',
- 'NamesList.txt' => 'Just adds commentary',
- 'NormalizationCorrections.txt' => 'Data is already in other files.',
- 'Props.txt' => 'Adds nothing to PropList.txt; only in very early releases',
- 'ReadMe.txt' => 'Just comments',
- 'README.TXT' => 'Just comments',
- 'StandardizedVariants.txt' => 'Only for glyph changes, not a Unicode character property. Does not fit into current scheme where one code point is mapped',
- 'EmojiSources.txt' => 'Not of general utility: for Japanese legacy cell-phone applications',
- 'IndicMatraCategory.txt' => 'Provisional',
- 'IndicSyllabicCategory.txt' => 'Provisional',
+ 'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points',
+ 'Index.txt' => 'Alphabetical index of Unicode characters',
+ 'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl',
+ 'NamesList.txt' => 'Annotated list of characters',
+ 'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base',
+ 'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)',
+ 'ReadMe.txt' => 'Documentation',
+ 'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized. This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
+ 'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
+ 'IndicMatraCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
+ 'IndicSyllabicCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
+ 'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
+ 'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
+ 'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
+ 'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
);
### End of externally interesting definitions, except for @input_file_objects
EOF
-my $LAST_UNICODE_CODEPOINT_STRING = "10FFFF";
-my $LAST_UNICODE_CODEPOINT = hex $LAST_UNICODE_CODEPOINT_STRING;
-my $MAX_UNICODE_CODEPOINTS = $LAST_UNICODE_CODEPOINT + 1;
+my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF";
+my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING;
+my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1;
# Matches legal code point. 4-6 hex numbers, If there are 6, the first
# two must be 10; if there are 5, the first must not be a 0. Written this way
-# to decrease backtracking. The first one allows the code point to be at the
-# end of a word, but to work properly, the word shouldn't end with a valid hex
-# character. The second one won't match a code point at the end of a word,
-# and doesn't have the run-on issue
+# to decrease backtracking. The first regex allows the code point to be at
+# the end of a word, but to work properly, the word shouldn't end with a valid
+# hex character. The second one won't match a code point at the end of a
+# word, and doesn't have the run-on issue
my $run_on_code_point_re =
qr/ (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b/x;
my $code_point_re = qr/\b$run_on_code_point_re/;
# depends on this ending with a semi-colon, so it can assume it is a valid
# field when the line is split() by semi-colons
my $missing_defaults_prefix =
- qr/^#\s+\@missing:\s+0000\.\.$LAST_UNICODE_CODEPOINT_STRING\s*;/;
+ qr/^#\s+\@missing:\s+0000\.\.$MAX_UNICODE_CODEPOINT_STRING\s*;/;
# Property types. Unicode has more types, but these are sufficient for our
# purposes.
my %Jamo_V; # Vowels
my %Jamo_T; # Trailing consonants
+# For code points whose name contains its ordinal as a '-ABCD' suffix.
+# The key is the base name of the code point, and the value is an
+# array giving all the ranges that use this base name. Each range
+# is actually a hash giving the 'low' and 'high' values of it.
+my %names_ending_in_code_point;
+my %loose_names_ending_in_code_point; # Same as above, but has blanks, dashes
+ # removed from the names
+# Inverse mapping. The list of ranges that have these kinds of
+# names. Each element contains the low, high, and base names in an
+# anonymous hash.
+my @code_points_ending_in_code_point;
+
+# Boolean: does this Unicode version have the hangul syllables, and are we
+# writing out a table for them?
+my $has_hangul_syllables = 0;
+
+# Does this Unicode version have code points whose names end in their
+# respective code points, and are we writing out a table for them? 0 for no;
+# otherwise points to first property that a table is needed for them, so that
+# if multiple tables are needed, we don't create duplicates
+my $needing_code_points_ending_in_code_point = 0;
+
my @backslash_X_tests; # List of tests read in for testing \X
my @unhandled_properties; # Will contain a list of properties found in
# the input that we didn't process.
main::set_access('non_skip', \%non_skip, 'c');
my %skip;
- # This is used to skip processing of this input file semi-permanently.
- # It is used for files that we aren't planning to process anytime soon,
- # but want to allow to be in the directory and not raise a message that we
- # are not handling. Mostly for test files. This is in contrast to the
- # non_skip element, which is supposed to be used very temporarily for
- # debugging. Sets 'optional' to 1
+ # This is used to skip processing of this input file semi-permanently,
+ # when it evaluates to true. The value should be the reason the file is
+ # being skipped. It is used for files that we aren't planning to process
+ # anytime soon, but want to allow to be in the directory and not raise a
+ # message that we are not handling. Mostly for test files. This is in
+ # contrast to the non_skip element, which is supposed to be used very
+ # temporarily for debugging. Sets 'optional' to 1. Also, files that we
+ # pretty much will never look at can be placed in the global
+ # %ignored_files instead. Ones used here will be added to that list.
main::set_access('skip', \%skip, 'c');
my %each_line_handler;
print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
}
- $optional{$addr} = 1 if $skip{$addr};
+ # If skipping, set to optional, and add to list of ignored files,
+ # including its reason
+ if ($skip{$addr}) {
+ $optional{$addr} = 1;
+ $ignored_files{$file{$addr}} = $skip{$addr}
+ }
return $self;
}
# Should this name match loosely or not.
main::set_access('loose_match', \%loose_match, 'r');
- my %make_pod_entry;
- # Some aliases should not get their own entries because they are covered
- # by a wild-card, and some we want to discourage use of. Binary
- main::set_access('make_pod_entry', \%make_pod_entry, 'r');
+ my %make_re_pod_entry;
+ # Some aliases should not get their own entries in the re section of the
+ # pod, because they are covered by a wild-card, and some we want to
+ # discourage use of. Binary
+ main::set_access('make_re_pod_entry', \%make_re_pod_entry, 'r');
my %status;
# Aliases have a status, like deprecated, or even suppressed (which means
$name{$addr} = shift;
$loose_match{$addr} = shift;
- $make_pod_entry{$addr} = shift;
+ $make_re_pod_entry{$addr} = shift;
$externally_ok{$addr} = shift;
$status{$addr} = shift;
# If the range list is empty, return a large value that isn't adjacent
# to any that could be in the range list, for simpler tests
- return $LAST_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
+ return $MAX_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
return $ranges{$addr}->[0]->start;
}
# And finally, add the gap from the end of the table to the max
# possible code point
- if ($max < $LAST_UNICODE_CODEPOINT) {
- $new->add_range($max + 1, $LAST_UNICODE_CODEPOINT);
+ if ($max < $MAX_UNICODE_CODEPOINT) {
+ $new->add_range($max + 1, $MAX_UNICODE_CODEPOINT);
}
return $new;
}
return $try_hard if $code >= 0xFDD0 && $code <= 0xFDEF;
return $try_hard if ($code & 0xFFFE) == 0xFFFE; # includes FFFF
- return $try_hard if $code > $LAST_UNICODE_CODEPOINT; # keep in range
+ return $try_hard if $code > $MAX_UNICODE_CODEPOINT; # keep in range
return $try_hard if $code >= 0xD800 && $code <= 0xDFFF; # no surrogate
return 1;
my %internal_only;
# Boolean; if set this table is for internal core Perl only use.
- main::set_access('internal_only', \%internal_only);
+ main::set_access('internal_only', \%internal_only, 'r');
my %find_table_from_alias;
# The parent property passes this pointer to a hash which this class adds
sub new {
# All arguments are key => value pairs, which you can see below, most
- # of which match fields documented above. Otherwise: Pod_Entry,
+ # of which match fields documented above. Otherwise: Re_Pod_Entry,
# Externally_Ok, and Fuzzy apply to the names of the table, and are
# documented in the Alias package
my $complete_name = $complete_name{$addr}
= delete $args{'Complete_Name'};
$format{$addr} = delete $args{'Format'};
- $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
+ $internal_only{$addr} = delete $args{'Internal_Only'} || 0;
$output_range_counts{$addr} = delete $args{'Output_Range_Counts'};
$property{$addr} = delete $args{'_Property'};
$range_list{$addr} = delete $args{'_Range_List'};
my $externally_ok = delete $args{'Externally_Ok'};
my $loose_match = delete $args{'Fuzzy'};
my $note = delete $args{'Note'};
- my $make_pod_entry = delete $args{'Pod_Entry'};
+ my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
my $perl_extension = delete $args{'Perl_Extension'};
# Shouldn't have any left over
# A placeholder table doesn't get documented, is a perl extension,
# and quite likely will be empty
- $make_pod_entry = 0 if ! defined $make_pod_entry;
+ $make_re_pod_entry = 0 if ! defined $make_re_pod_entry;
$perl_extension = 1 if ! defined $perl_extension;
push @tables_that_may_be_empty, $complete_name{$addr};
}
$perl_extension{$addr} = $perl_extension || 0;
+ # Don't list a property by default that is internal only
+ $make_re_pod_entry = 0 if ! defined $make_re_pod_entry
+ && $internal_only{$addr};
+
# By convention what typically gets printed only or first is what's
# first in the list, so put the full name there for good output
# clarity. Other routines rely on the full name being first on the
$self->add_alias($full_name{$addr},
Externally_Ok => $externally_ok,
Fuzzy => $loose_match,
- Pod_Entry => $make_pod_entry,
+ Re_Pod_Entry => $make_re_pod_entry,
Status => $status{$addr},
);
$self->add_alias($name{$addr},
Externally_Ok => $externally_ok,
Fuzzy => $loose_match,
- Pod_Entry => $make_pod_entry,
+ Re_Pod_Entry => $make_re_pod_entry,
Status => $status{$addr},
);
}
my %args = @_;
my $loose_match = delete $args{'Fuzzy'};
- my $make_pod_entry = delete $args{'Pod_Entry'};
- $make_pod_entry = $YES unless defined $make_pod_entry;
+ my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
+ $make_re_pod_entry = $YES unless defined $make_re_pod_entry;
my $externally_ok = delete $args{'Externally_Ok'};
$externally_ok = 1 unless defined $externally_ok;
splice @$list,
$insert_position,
0,
- Alias->new($name, $loose_match, $make_pod_entry,
+ Alias->new($name, $loose_match, $make_re_pod_entry,
$externally_ok, $status);
# This name may be shorter than any existing ones, so clear the cache
my $return = "";
$return .= $DEVELOPMENT_ONLY if $compare_versions;
$return .= $HEADER;
- no overloading;
- $return .= $INTERNAL_ONLY if $internal_only{pack 'J', $self};
return $return;
}
if defined $global_to_output_map{$full_name};
# If table says to output, do so; if says to suppress it, do so.
+ return $INTERNAL_MAP if $self->internal_only;
return $EXTERNAL_MAP if grep { $_ eq $full_name } @output_mapped_properties;
return 0 if $self->status eq $SUPPRESSED;
# The remaining variables are temporaries used while writing each table,
# to output special ranges.
- my $has_hangul_syllables;
my @multi_code_point_maps; # Map is to more than one code point.
- # The key is the base name of the code point, and the value is an
- # array giving all the ranges that use this base name. Each range
- # is actually a hash giving the 'low' and 'high' values of it.
- my %names_ending_in_code_point;
- my %loose_names_ending_in_code_point;
-
- # Inverse mapping. The list of ranges that have these kinds of
- # names. Each element contains the low, high, and base names in an
- # anonymous hash.
- my @code_points_ending_in_code_point;
-
sub handle_special_range {
# Called in the middle of write when it finds a range it doesn't know
# how to handle.
# No need to output the range if it maps to the default.
return if $map eq $default_map{$addr};
+ my $property = $self->property;
+
# Switch based on the map type...
if ($type == $HANGUL_SYLLABLE) {
# These are entirely algorithmically determinable based on
# some constants furnished by Unicode; for now, just set a
# flag to indicate that have them. After everything is figured
- # out, we will output the code that does the algorithm.
- $has_hangul_syllables = 1;
+ # out, we will output the code that does the algorithm. (Don't
+ # output them if not needed because we are suppressing this
+ # property.)
+ $has_hangul_syllables = 1 if $property->to_output_map;
}
elsif ($type == $CP_IN_NAME) {
- # Code points whose the name ends in their code point are also
+ # Code points whose name ends in their code point are also
# algorithmically determinable, but need information about the map
# to do so. Both the map and its inverse are stored in data
- # structures output in the file.
- push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
- push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
-
- my $squeezed = $map =~ s/[-\s]+//gr;
- push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}}, $low;
- push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}}, $high;
-
- push @code_points_ending_in_code_point, { low => $low,
- high => $high,
- name => $map
- };
+ # structures output in the file. They are stored in the mean time
+ # in global lists The lists will be written out later into Name.pm,
+ # which is created only if needed. In order to prevent duplicates
+ # in the list, only add to them for one property, should multiple
+ # ones need them.
+ if ($needing_code_points_ending_in_code_point == 0) {
+ $needing_code_points_ending_in_code_point = $property;
+ }
+ if ($property == $needing_code_points_ending_in_code_point) {
+ push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+ push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+ my $squeezed = $map =~ s/[-\s]+//gr;
+ push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}},
+ $low;
+ push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}},
+ $high;
+
+ push @code_points_ending_in_code_point, { low => $low,
+ high => $high,
+ name => $map
+ };
+ }
}
elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
$pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
}
- if ($has_hangul_syllables || @code_points_ending_in_code_point) {
-
- # Convert these structures to output format.
- my $code_points_ending_in_code_point =
- main::simple_dumper(\@code_points_ending_in_code_point,
- ' ' x 8);
- my $names = main::simple_dumper(\%names_ending_in_code_point,
- ' ' x 8);
- my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point,
- ' ' x 8);
-
- # Do the same with the Hangul names,
- my $jamo;
- my $jamo_l;
- my $jamo_v;
- my $jamo_t;
- my $jamo_re;
- if ($has_hangul_syllables) {
-
- # Construct a regular expression of all the possible
- # combinations of the Hangul syllables.
- my @L_re; # Leading consonants
- for my $i ($LBase .. $LBase + $LCount - 1) {
- push @L_re, $Jamo{$i}
- }
- my @V_re; # Middle vowels
- for my $i ($VBase .. $VBase + $VCount - 1) {
- push @V_re, $Jamo{$i}
- }
- my @T_re; # Trailing consonants
- for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
- push @T_re, $Jamo{$i}
- }
-
- # The whole re is made up of the L V T combination.
- $jamo_re = '('
- . join ('|', sort @L_re)
- . ')('
- . join ('|', sort @V_re)
- . ')('
- . join ('|', sort @T_re)
- . ')?';
-
- # These hashes needed by the algorithm were generated
- # during reading of the Jamo.txt file
- $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
- $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
- $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
- $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
- }
-
- $pre_body .= <<END;
-
-# To achieve significant memory savings when this file is read in,
-# algorithmically derivable code points are omitted from the main body below.
-# Instead, the following routines can be used to translate between name and
-# code point and vice versa
-
-{ # Closure
-
- # Matches legal code point. 4-6 hex numbers, If there are 6, the
- # first two must be '10'; if there are 5, the first must not be a '0'.
- # First can match at the end of a word provided that the end of the
- # word doesn't look like a hex number.
- my \$run_on_code_point_re = qr/$run_on_code_point_re/;
- my \$code_point_re = qr/$code_point_re/;
-
- # In the following hash, the keys are the bases of names which includes
- # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The values
- # of each key is another hash which is used to get the low and high ends
- # for each range of code points that apply to the name.
- my %names_ending_in_code_point = (
-$names
- );
-
- # The following hash is a copy of the previous one, except is for loose
- # matching, so each name has blanks and dashes squeezed out
- my %loose_names_ending_in_code_point = (
-$loose_names
- );
-
- # And the following array gives the inverse mapping from code points to
- # names. Lowest code points are first
- my \@code_points_ending_in_code_point = (
-$code_points_ending_in_code_point
- );
-END
- # Earlier releases didn't have Jamos. No sense outputting
- # them unless will be used.
- if ($has_hangul_syllables) {
- $pre_body .= <<END;
-
- # Convert from code point to Jamo short name for use in composing Hangul
- # syllable names
- my %Jamo = (
-$jamo
- );
-
- # Leading consonant (can be null)
- my %Jamo_L = (
-$jamo_l
- );
-
- # Vowel
- my %Jamo_V = (
-$jamo_v
- );
-
- # Optional trailing consonant
- my %Jamo_T = (
-$jamo_t
- );
-
- # Computed re that splits up a Hangul name into LVT or LV syllables
- my \$syllable_re = qr/$jamo_re/;
-
- my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
- my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
-
- # These constants names and values were taken from the Unicode standard,
- # version 5.1, section 3.12. They are used in conjunction with Hangul
- # syllables
- my \$SBase = $SBase_string;
- my \$LBase = $LBase_string;
- my \$VBase = $VBase_string;
- my \$TBase = $TBase_string;
- my \$SCount = $SCount;
- my \$LCount = $LCount;
- my \$VCount = $VCount;
- my \$TCount = $TCount;
- my \$NCount = \$VCount * \$TCount;
-END
- } # End of has Jamos
-
- $pre_body .= << 'END';
-
- sub name_to_code_point_special {
- my ($name, $loose) = @_;
-
- # Returns undef if not one of the specially handled names; otherwise
- # returns the code point equivalent to the input name
- # $loose is non-zero if to use loose matching, 'name' in that case
- # must be input as upper case with all blanks and dashes squeezed out.
-END
- if ($has_hangul_syllables) {
- $pre_body .= << 'END';
-
- if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
- || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
- {
- return if $name !~ qr/^$syllable_re$/;
- my $L = $Jamo_L{$1};
- my $V = $Jamo_V{$2};
- my $T = (defined $3) ? $Jamo_T{$3} : 0;
- return ($L * $VCount + $V) * $TCount + $T + $SBase;
- }
-END
- }
- $pre_body .= << 'END';
-
- # Name must end in 'code_point' for this to handle.
- return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
- || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
-
- my $base = $1;
- my $code_point = CORE::hex $2;
- my $names_ref;
-
- if ($loose) {
- $names_ref = \%loose_names_ending_in_code_point;
- }
- else {
- return if $base !~ s/-$//;
- $names_ref = \%names_ending_in_code_point;
- }
-
- # Name must be one of the ones which has the code point in it.
- return if ! $names_ref->{$base};
-
- # Look through the list of ranges that apply to this name to see if
- # the code point is in one of them.
- for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
- return if $names_ref->{$base}{'low'}->[$i] > $code_point;
- next if $names_ref->{$base}{'high'}->[$i] < $code_point;
-
- # Here, the code point is in the range.
- return $code_point;
- }
-
- # Here, looked like the name had a code point number in it, but
- # did not match one of the valid ones.
- return;
- }
-
- sub code_point_to_name_special {
- my $code_point = shift;
-
- # Returns the name of a code point if algorithmically determinable;
- # undef if not
-END
- if ($has_hangul_syllables) {
- $pre_body .= << 'END';
-
- # If in the Hangul range, calculate the name based on Unicode's
- # algorithm
- if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
- use integer;
- my $SIndex = $code_point - $SBase;
- my $L = $LBase + $SIndex / $NCount;
- my $V = $VBase + ($SIndex % $NCount) / $TCount;
- my $T = $TBase + $SIndex % $TCount;
- $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
- $name .= $Jamo{$T} if $T != $TBase;
- return $name;
- }
-END
- }
- $pre_body .= << 'END';
-
- # Look through list of these code points for one in range.
- foreach my $hash (@code_points_ending_in_code_point) {
- return if $code_point < $hash->{'low'};
- if ($code_point <= $hash->{'high'}) {
- return sprintf("%s-%04X", $hash->{'name'}, $code_point);
- }
- }
- return; # None found
- }
-} # End closure
-
-END
- } # End of has hangul or code point in name maps.
-
my $format = $self->format;
my $return = <<END;
my $addr = do { no overloading; pack 'J', $self; };
# Clear the temporaries
- $has_hangul_syllables = 0;
undef @multi_code_point_maps;
- undef %names_ending_in_code_point;
- undef %loose_names_ending_in_code_point;
- undef @code_points_ending_in_code_point;
# Calculate the format of the table if not already done.
my $format = $self->format;
return $self->_range_list->add_range(@_);
}
+ sub header {
+ my $self = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ # All match tables are to be used only by the Perl core.
+ return $self->SUPER::header() . $INTERNAL_ONLY;
+ }
+
sub pre_body { # Does nothing for match tables.
return
}
# gets property's status by default
Status => $self->status,
_Status_Info => $self->status_info,
- %args,
- Internal_Only_Warning => 1); # Override any
- # input param
+ %args);
return unless defined $table;
}
my $copy = $item;
$copy = $UNDEF unless defined $copy;
- # Quote non-numbers (numbers also have optional leading '-' and
- # fractions)
- if ($copy eq "" || $copy !~ /^ -? \d+ ( \. \d+ )? $/x) {
+ # Quote non-integers (integers also have optional leading '-')
+ if ($copy eq "" || $copy !~ /^ -? \d+ $/x) {
# Escape apostrophe and backslash
$copy =~ s/ ( ['\\] ) /\\$1/xg;
Default_Map => "",
Directory => File::Spec->curdir(),
File => 'Name',
- Internal_Only_Warning => 1,
+ Internal_Only => 1,
Perl_Extension => 1,
Range_Size_1 => \&output_perl_charnames_line,
Type => $STRING,
Directory => File::Spec->curdir(),
File => 'Decomposition',
Format => $DECOMP_STRING_FORMAT,
- Internal_Only_Warning => 1,
+ Internal_Only => 1,
Perl_Extension => 1,
Default_Map => $CODE_POINT,
my ($code_point, @fields) = split /\s*;\s*/, $_, -1;
if ($code_point eq '0007') {
- $fields[$CHARNAME] = "ALERT";
+ $fields[$CHARNAME] = "";
}
elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see
# http://www.unicode.org/versions/corrigendum8.html
Type => $STRING,
Default_Map => $CODE_POINT,
Perl_Extension => 1,
+ Internal_Only => 1,
Description => "The simple mappings for $case for code points that have full mappings as well");
$simple_only->set_to_output_map($INTERNAL_MAP);
$simple_only->add_comment(join_lines( <<END
return;
}
+sub setup_v6_name_alias {
+ property_ref('Name_Alias')->add_map(7, 7, "ALERT");
+}
+
sub finish_Unicode() {
# This routine should be called after all the Unicode files have been read
# in. It:
# This fills in any missing values with the default. It's not
# necessary to do this with binary properties, as the default
# is defined completely in terms of the Y table.
- $property->add_map(0, $LAST_UNICODE_CODEPOINT,
+ $property->add_map(0, $MAX_UNICODE_CODEPOINT,
$default_map, Replace => $NO);
}
}
# 'Any' is all code points. As an error check, instead of just setting it
# to be that, construct it to be the union of all the major categories
$Any = $perl->add_match_table('Any',
- Description => "[\\x{0000}-\\x{$LAST_UNICODE_CODEPOINT_STRING}]",
+ Description => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
Matches_All => 1);
foreach my $major_table ($gc->tables) {
$Any += $major_table;
}
- if ($Any->max != $LAST_UNICODE_CODEPOINT) {
+ if ($Any->max != $MAX_UNICODE_CODEPOINT) {
Carp::my_carp_bug("Generated highest code point ("
. sprintf("%X", $Any->max)
- . ") doesn't match expected value $LAST_UNICODE_CODEPOINT_STRING.")
+ . ") doesn't match expected value $MAX_UNICODE_CODEPOINT_STRING.")
}
if ($Any->range_count != 1 || $Any->min != 0) {
Carp::my_carp_bug("Generated table 'Any' doesn't match all code points.")
);
# Our internal-only property should be treated as more than just a
- # synonym.
- $perl->add_match_table('_CombAbove')
+ # synonym; grandfather it in to the pod.
+ $perl->add_match_table('_CombAbove', Re_Pod_Entry => 1)
->set_equivalent_to(property_ref('ccc')->table('Above'),
Related => 1);
# one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf),
# Modifier_Letter (Lm), or Modifier_Symbol (Sk).
- # Perl has long had an internal-only alias for this property.
- my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable');
+ # Perl has long had an internal-only alias for this property; grandfather
+ # it in to the pod, but discourage its use.
+ my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable',
+ Re_Pod_Entry => 1);
my $case_ignorable = property_ref('Case_Ignorable');
if (defined $case_ignorable && ! $case_ignorable->is_empty) {
$perl_case_ignorable->set_equivalent_to($case_ignorable->table('Y'),
Description => '\p{Punct} + ASCII-range \p{Symbol}',
Initialize => $gc->table('Punctuation')
+ ($ASCII & $gc->table('Symbol')),
+ Perl_Extension => 1
);
- $perl->add_match_table('PosixPunct',
+ $perl->add_match_table('PosixPunct', Perl_Extension => 1,
Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]',
Initialize => $ASCII & $XPosixPunct,
);
# _CanonDCIJ is equivalent to Soft_Dotted, but if on a release earlier
# than SD appeared, construct it ourselves, based on the first release SD
- # was in.
- my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ');
+ # was in. A pod entry is grandfathered in for it
+ my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ', Re_Pod_Entry => 1,
+ Perl_Extension => 1, Internal_Only => 1);
my $soft_dotted = property_ref('Soft_Dotted');
if (defined $soft_dotted && ! $soft_dotted->is_empty) {
$CanonDCIJ->set_equivalent_to($soft_dotted->table('Y'), Related => 1);
}
# These are used in Unicode's definition of \X
- my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1);
- my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1);
+ my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1,
+ Internal_Only => 1);
+ my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1,
+ Internal_Only => 1);
# For backward compatibility, Perl has its own definition for IDStart
# First, we include the underscore, and then the regular XID_Start also
# More GCB. If we found some hangul syllables, populate a combined
# table.
- my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V');
+ my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V', Perl_Extension => 1, Internal_Only => 1);
my $LV = $gcb->table('LV');
if ($LV->is_empty) {
push @tables_that_may_be_empty, $lv_lvt_v->complete_name;
Unicode_1_Name entries are used only for otherwise nameless code
points.
$alias_sentence
+This file doesn't include the algorithmically determinable names. For those,
+use 'unicore/Name.pm'
+END
+ ));
+ property_ref('Name')->add_comment(join_lines( <<END
+This file doesn't include the algorithmically determinable names. For those,
+use 'unicore/Name.pm'
END
));
my $in = Property->new('In',
Default_Map => $default_map,
Full_Name => "Present_In",
- Internal_Only_Warning => 1,
Perl_Extension => 1,
Type => $ENUM,
Initialize => $age,
foreach my $alias ($table->aliases) {
next if $alias->name =~ /^_/;
$table->add_alias('Is_' . $alias->name,
- Pod_Entry => 0,
+ Re_Pod_Entry => 0,
Status => $alias->status,
Externally_Ok => 0);
}
Initialize => $gc->table('Unassigned')
& property_ref('Noncharacter_Code_Point')->table('N'));
- for (my $i = 0; $i <= $LAST_UNICODE_CODEPOINT; $i++ ) {
+ for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) {
$i = populate_char_info($i); # Note sets $i so may cause skips
}
}
# No name collision, so ok to add the perl synonym.
- my $make_pod_entry;
+ my $make_re_pod_entry;
my $externally_ok;
my $status = $alias->status;
if ($nominal_property == $block) {
# we don't want people using the name without the
# 'In', so discourage that.
if ($prefix eq "") {
- $make_pod_entry = 1;
+ $make_re_pod_entry = 1;
$status = $status || $DISCOURAGED;
$externally_ok = 0;
}
elsif ($prefix eq 'In_') {
- $make_pod_entry = 0;
+ $make_re_pod_entry = 0;
$status = $status || $NORMAL;
$externally_ok = 1;
}
else {
- $make_pod_entry = 0;
+ $make_re_pod_entry = 0;
$status = $status || $DISCOURAGED;
$externally_ok = 0;
}
# The 'Is' prefix is handled in the pod by a wild
# card, and we won't use it for an external name
- $make_pod_entry = 0;
+ $make_re_pod_entry = 0;
$status = $status || $NORMAL;
$externally_ok = 0;
}
# Here, is an empty prefix, non block. This gets its
# own pod entry and can be used for an external name.
- $make_pod_entry = 1;
+ $make_re_pod_entry = 1;
$status = $status || $NORMAL;
$externally_ok = 1;
}
# Here, have found a table for $perl. Add this alias
# to it, and are done with this prefix.
$equivalent->add_alias($proposed_name,
- Pod_Entry => $make_pod_entry,
+ Re_Pod_Entry => $make_re_pod_entry,
Status => $status,
Externally_Ok => $externally_ok);
trace "adding alias perl=$proposed_name to $equivalent" if main::DEBUG && $to_trace;
# Here, $perl doesn't already have a table that is a
# synonym for this property, add one.
my $added_table = $perl->add_match_table($proposed_name,
- Pod_Entry => $make_pod_entry,
+ Re_Pod_Entry => $make_re_pod_entry,
Status => $status,
Externally_Ok => $externally_ok);
# And it will be related to the actual table, since it is
foreach my $alias ($table->aliases) {
# Skip if not to go in pod.
- next unless $alias->make_pod_entry;
+ next unless $alias->make_re_pod_entry;
# Start gathering all the components for the entry
my $name = $alias->name;
} # End of looping through each reason.
+ # Similiarly, generate a list of files that we don't use, grouped by the
+ # reasons why. First, create a hash whose keys are the reasons, and whose
+ # values are anonymous arrays of all the files that share that reason.
+ my %grouped_by_reason;
+ foreach my $file (keys %ignored_files) {
+ push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
+ }
+
+ # Then, sort each group.
+ foreach my $group (keys %grouped_by_reason) {
+ @{$grouped_by_reason{$group}} = sort { lc $a cmp lc $b }
+ @{$grouped_by_reason{$group}} ;
+ }
+
+ # Finally, create the output text. For each reason (sorted by the
+ # alphabetically first file that has that reason)...
+ my @unused_files;
+ foreach my $reason (sort { lc $grouped_by_reason{$a}->[0]
+ cmp lc $grouped_by_reason{$b}->[0]
+ }
+ keys %grouped_by_reason)
+ {
+ # Add all the files that have that reason to the output. Start
+ # with an empty line.
+ push @unused_files, "\n\n";
+ push @unused_files, map { "\n=item F<$_> \n" }
+ @{$grouped_by_reason{$reason}};
+ # And add the reason under the list of files
+ push @unused_files, "\n$reason\n";
+ }
+
# Generate a list of the properties whose map table we output, from the
# global @map_properties.
my @map_tables_actually_output;
=head1 NAME
-$pod_file - Index of Unicode Version $string_version properties in Perl
+$pod_file - Index of Unicode Version $string_version character properties in Perl
=head1 DESCRIPTION
-There are many properties in Unicode, and Perl provides access to almost all of
-them, as well as some additional extensions and short-cut synonyms.
+This document provides information about the portion of the Unicode database
+that deals with character properties, that is the portion that is defined on
+single code points. (L</Other information in the Unicode data base>
+below briefly mentions other data that Unicode provides.)
-And just about all of the few that aren't accessible through the Perl
-core are accessible through the modules: L<Unicode::Normalize> and
-L<Unicode::UCD>, and for Unihan properties, via the CPAN module
-L<Unicode::Unihan>.
+Perl can provide access to all non-provisional Unicode character properties,
+though not all are enabled by default. The omitted ones are the Unihan
+properties (accessible via the CPAN module L<Unicode::Unihan>) and certain
+deprecated or Unicode-internal properties. (An installation may choose to
+recompile Perl's tables to change this. See L<Unicode regular expression
+properties that are NOT accepted by Perl>.)
+
+Perl also provides some additional extensions and short-cut synonyms
+for Unicode properties.
This document merely lists all available properties and does not attempt to
explain what each property really means. There is a brief description of each
=item Stabilized
-Obsolete properties may be stabilized. Such a determination does not indicate
+A property may be stabilized. Such a determination does not indicate
that the property should or should not be used; instead it is a declaration
that the property will not be maintained nor extended for newly encoded
characters. Such properties are marked with $a_bold_stabilized in the
=item Deprecated
-An obsolete property may be deprecated, perhaps because its original intent
+A property may be deprecated, perhaps because its original intent
has been replaced by another property, or because its specification was
somehow defective. This means that its use is strongly
discouraged, so much so that a warning will be issued if used, unless the
\$utf8::SwashInfo{'ToNAME'}{'specials_name'} = 'utf8::ToSpecNAME';
+
+=head1 Other information in the Unicode data base
+
+The Unicode data base is delivered in two different formats. The XML version
+is valid for more modern Unicode releases. The other version is a collection
+of files. The two are intended to give equivalent information. Perl uses the
+older form; this allows you to recompile Perl to use early Unicode releases.
+
+The only non-character property that Perl currently supports is Named
+Sequences, in which a sequence of code points
+is given a name and generally treated as a single entity. (Perl supports
+these via the C<\\N{...}> double-quotish construct,
+L<charnames/charnames::string_vianame(name)>, and L<Unicode::UCD/namedseq()>.
+
+Below is a list of the files in the Unicode data base that Perl doesn't
+currently use, along with very brief descriptions of their purposes.
+Some of the names of the files have been shortened from those that Unicode
+uses, in order to allow them to be distinguishable from similarly named files
+on file systems for which only the first 8 characters of a name are
+significant.
+
+=over 4
+
+@unused_files
+
+=back
+
=head1 SEE ALSO
L<$unicode_reference_url>
return;
}
+sub make_Name_pm () {
+ # Create and write Name.pm, which contains subroutines and data to use in
+ # conjunction with Name.pl
+
+ # Maybe there's nothing to do.
+ return unless $has_hangul_syllables || @code_points_ending_in_code_point;
+
+ my @name = <<END;
+$HEADER
+$INTERNAL_ONLY
+END
+
+ # Convert these structures to output format.
+ my $code_points_ending_in_code_point =
+ main::simple_dumper(\@code_points_ending_in_code_point,
+ ' ' x 8);
+ my $names = main::simple_dumper(\%names_ending_in_code_point,
+ ' ' x 8);
+ my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point,
+ ' ' x 8);
+
+ # Do the same with the Hangul names,
+ my $jamo;
+ my $jamo_l;
+ my $jamo_v;
+ my $jamo_t;
+ my $jamo_re;
+ if ($has_hangul_syllables) {
+
+ # Construct a regular expression of all the possible
+ # combinations of the Hangul syllables.
+ my @L_re; # Leading consonants
+ for my $i ($LBase .. $LBase + $LCount - 1) {
+ push @L_re, $Jamo{$i}
+ }
+ my @V_re; # Middle vowels
+ for my $i ($VBase .. $VBase + $VCount - 1) {
+ push @V_re, $Jamo{$i}
+ }
+ my @T_re; # Trailing consonants
+ for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
+ push @T_re, $Jamo{$i}
+ }
+
+ # The whole re is made up of the L V T combination.
+ $jamo_re = '('
+ . join ('|', sort @L_re)
+ . ')('
+ . join ('|', sort @V_re)
+ . ')('
+ . join ('|', sort @T_re)
+ . ')?';
+
+ # These hashes needed by the algorithm were generated
+ # during reading of the Jamo.txt file
+ $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
+ $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
+ $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
+ $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+ }
+
+ push @name, <<END;
+
+# This module contains machine-generated tables and code for the
+# algorithmically-determinable Unicode character names. The following
+# routines can be used to translate between name and code point and vice versa
+
+{ # Closure
+
+ # Matches legal code point. 4-6 hex numbers, If there are 6, the first
+ # two must be 10; if there are 5, the first must not be a 0. Written this
+ # way to decrease backtracking. The first regex allows the code point to
+ # be at the end of a word, but to work properly, the word shouldn't end
+ # with a valid hex character. The second one won't match a code point at
+ # the end of a word, and doesn't have the run-on issue
+ my \$run_on_code_point_re = qr/$run_on_code_point_re/;
+ my \$code_point_re = qr/$code_point_re/;
+
+ # In the following hash, the keys are the bases of names which includes
+ # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The values
+ # of each key is another hash which is used to get the low and high ends
+ # for each range of code points that apply to the name.
+ my %names_ending_in_code_point = (
+$names
+ );
+
+ # The following hash is a copy of the previous one, except is for loose
+ # matching, so each name has blanks and dashes squeezed out
+ my %loose_names_ending_in_code_point = (
+$loose_names
+ );
+
+ # And the following array gives the inverse mapping from code points to
+ # names. Lowest code points are first
+ my \@code_points_ending_in_code_point = (
+$code_points_ending_in_code_point
+ );
+END
+ # Earlier releases didn't have Jamos. No sense outputting
+ # them unless will be used.
+ if ($has_hangul_syllables) {
+ push @name, <<END;
+
+ # Convert from code point to Jamo short name for use in composing Hangul
+ # syllable names
+ my %Jamo = (
+$jamo
+ );
+
+ # Leading consonant (can be null)
+ my %Jamo_L = (
+$jamo_l
+ );
+
+ # Vowel
+ my %Jamo_V = (
+$jamo_v
+ );
+
+ # Optional trailing consonant
+ my %Jamo_T = (
+$jamo_t
+ );
+
+ # Computed re that splits up a Hangul name into LVT or LV syllables
+ my \$syllable_re = qr/$jamo_re/;
+
+ my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
+ my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
+
+ # These constants names and values were taken from the Unicode standard,
+ # version 5.1, section 3.12. They are used in conjunction with Hangul
+ # syllables
+ my \$SBase = $SBase_string;
+ my \$LBase = $LBase_string;
+ my \$VBase = $VBase_string;
+ my \$TBase = $TBase_string;
+ my \$SCount = $SCount;
+ my \$LCount = $LCount;
+ my \$VCount = $VCount;
+ my \$TCount = $TCount;
+ my \$NCount = \$VCount * \$TCount;
+END
+ } # End of has Jamos
+
+ push @name, << 'END';
+
+ sub name_to_code_point_special {
+ my ($name, $loose) = @_;
+
+ # Returns undef if not one of the specially handled names; otherwise
+ # returns the code point equivalent to the input name
+ # $loose is non-zero if to use loose matching, 'name' in that case
+ # must be input as upper case with all blanks and dashes squeezed out.
+END
+ if ($has_hangul_syllables) {
+ push @name, << 'END';
+
+ if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
+ || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
+ {
+ return if $name !~ qr/^$syllable_re$/;
+ my $L = $Jamo_L{$1};
+ my $V = $Jamo_V{$2};
+ my $T = (defined $3) ? $Jamo_T{$3} : 0;
+ return ($L * $VCount + $V) * $TCount + $T + $SBase;
+ }
+END
+ }
+ push @name, << 'END';
+
+ # Name must end in 'code_point' for this to handle.
+ return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
+ || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
+
+ my $base = $1;
+ my $code_point = CORE::hex $2;
+ my $names_ref;
+
+ if ($loose) {
+ $names_ref = \%loose_names_ending_in_code_point;
+ }
+ else {
+ return if $base !~ s/-$//;
+ $names_ref = \%names_ending_in_code_point;
+ }
+
+ # Name must be one of the ones which has the code point in it.
+ return if ! $names_ref->{$base};
+
+ # Look through the list of ranges that apply to this name to see if
+ # the code point is in one of them.
+ for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
+ return if $names_ref->{$base}{'low'}->[$i] > $code_point;
+ next if $names_ref->{$base}{'high'}->[$i] < $code_point;
+
+ # Here, the code point is in the range.
+ return $code_point;
+ }
+
+ # Here, looked like the name had a code point number in it, but
+ # did not match one of the valid ones.
+ return;
+ }
+
+ sub code_point_to_name_special {
+ my $code_point = shift;
+
+ # Returns the name of a code point if algorithmically determinable;
+ # undef if not
+END
+ if ($has_hangul_syllables) {
+ push @name, << 'END';
+
+ # If in the Hangul range, calculate the name based on Unicode's
+ # algorithm
+ if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
+ use integer;
+ my $SIndex = $code_point - $SBase;
+ my $L = $LBase + $SIndex / $NCount;
+ my $V = $VBase + ($SIndex % $NCount) / $TCount;
+ my $T = $TBase + $SIndex % $TCount;
+ $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
+ $name .= $Jamo{$T} if $T != $TBase;
+ return $name;
+ }
+END
+ }
+ push @name, << 'END';
+
+ # Look through list of these code points for one in range.
+ foreach my $hash (@code_points_ending_in_code_point) {
+ return if $code_point < $hash->{'low'};
+ if ($code_point <= $hash->{'high'}) {
+ return sprintf("%s-%04X", $hash->{'name'}, $code_point);
+ }
+ }
+ return; # None found
+ }
+} # End closure
+
+1;
+END
+
+ main::write("Name.pm", 0, \@name); # The 0 means no utf8.
+ return;
+}
+
+
sub write_all_tables() {
# Write out all the tables generated by this program to files, as well as
# the supporting data structures, pod file, and .t file.
# this one.
next if $i == 0
|| ! defined $pod_directory
- || ! $alias->make_pod_entry;
+ || ! $alias->make_re_pod_entry;
my $rhs = $full_property_name;
if ($property != $perl && $table->perl_extension) {
# Write out the pod file
make_pod;
- # And Heavy.pl
+ # And Heavy.pl, Name.pm
make_Heavy;
+ make_Name_pm;
make_property_test_script() if $make_test_script;
return;
# pre-existing one.
push @property_aliases, map { Alias->new("Is_" . $_->name,
$_->loose_match,
- $_->make_pod_entry,
+ $_->make_re_pod_entry,
$_->externally_ok,
$_->status)
} @property_aliases;
Property => 'Bidi_Mirroring_Glyph',
),
Input_file->new("NormalizationTest.txt", v3.0.1,
- Skip => 1,
+ Skip => 'Validation Tests',
),
Input_file->new('CaseFolding.txt', v3.0.1,
Pre_Handler => \&setup_case_folding,
Handler => \&process_GCB_test,
),
Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
- Skip => 1,
+ Skip => 'Validation Tests',
),
Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
- Skip => 1,
+ Skip => 'Validation Tests',
),
Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
- Skip => 1,
+ Skip => 'Validation Tests',
),
Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
Property => 'Sentence_Break',
),
Input_file->new('NameAliases.txt', v5.0.0,
Property => 'Name_Alias',
+ Pre_Handler => ($v_version ge v6.0.0)
+ ? \&setup_v6_name_alias
+ : undef,
),
Input_file->new("BidiTest.txt", v5.2.0,
- Skip => 1,
+ Skip => 'Validation Tests',
),
Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
Optional => 1,