# existence is not noted in the comment.
'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or Unicode::UCD::prop_invmap()',
+ 'Indic_Matra_Category' => "Provisional",
+ 'Indic_Syllabic_Category' => "Provisional",
+
# Don't suppress ISO_Comment, as otherwise special handling is needed
# to differentiate between it and gc=c, which can be written as 'isc',
# which is the same characters as ISO_Comment's short name.
- 'Name' => "Accessible via 'use charnames;' or Unicode::UCD::prop_invmap()",
+ 'Name' => "Accessible via \\N{...} or 'use charnames;' or Unicode::UCD::prop_invmap()",
'Simple_Case_Folding' => "$simple. Can access this through Unicode::UCD::casefold or Unicode::UCD::prop_invmap()",
'Simple_Lowercase_Mapping' => "$simple. Can access this through Unicode::UCD::charinfo or Unicode::UCD::prop_invmap()",
FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful',
);
- # The following are suppressed because they were made contributory or
- # deprecated by Unicode before Perl ever thought about supporting them.
- foreach my $property ('Jamo_Short_Name',
- 'Grapheme_Link',
- 'Expands_On_NFC',
- 'Expands_On_NFD',
- 'Expands_On_NFKC',
- 'Expands_On_NFKD'
+ foreach my $property (
+
+ # The following are suppressed because they were made contributory
+ # or deprecated by Unicode before Perl ever thought about
+ # supporting them.
+ 'Jamo_Short_Name',
+ 'Grapheme_Link',
+ 'Expands_On_NFC',
+ 'Expands_On_NFD',
+ 'Expands_On_NFKC',
+ 'Expands_On_NFKD',
+
+ # The following are suppressed because they have been marked
+ # as deprecated for a sufficient amount of time
+ 'Other_Alphabetic',
+ 'Other_Default_Ignorable_Code_Point',
+ 'Other_Grapheme_Extend',
+ 'Other_ID_Continue',
+ 'Other_ID_Start',
+ 'Other_Lowercase',
+ 'Other_Math',
+ 'Other_Uppercase',
) {
$why_suppressed{$property} = $why_deprecated{$property};
}
'ReadMe.txt' => 'Documentation',
'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized. This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
- 'IndicMatraCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
- 'IndicSyllabicCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
);
+my %skipped_files; # List of files that we skip
+
### End of externally interesting definitions, except for @input_file_objects
my $HEADER=<<"EOF";
# contrast to the non_skip element, which is supposed to be used very
# temporarily for debugging. Sets 'optional' to 1. Also, files that we
# pretty much will never look at can be placed in the global
- # %ignored_files instead. Ones used here will be added to that list.
+ # %ignored_files instead. Ones used here will be added to %skipped files
main::set_access('skip', \%skip, 'c');
my %each_line_handler;
# including its reason
if ($skip{$addr}) {
$optional{$addr} = 1;
- $ignored_files{$file{$addr}} = $skip{$addr}
+ $skipped_files{$file{$addr}} = $skip{$addr}
}
return $self;
# they are deleted from the hash, so any that remain at the
# end of the program are files that we didn't process.
my $fkey = File::Spec->rel2abs($file);
- my $expecting = delete $potential_files{$fkey};
- $expecting = delete $potential_files{lc($fkey)} unless defined $expecting;
+ my $expecting = delete $potential_files{lc($fkey)};
+
Carp::my_carp("Was not expecting '$file'.") if
! $expecting
&& ! defined $handle{$addr};
# the code point and name on each line. This was actually the hardest
# thing to design around. The code points in those ranges may actually
# have real maps not given by these two lines. These maps will either
- # be algorithmically determinable, or in the extracted files furnished
+ # be algorithmically determinable, or be in the extracted files furnished
# with the UCD. In the event of conflicts between these extracted files,
# and this one, Unicode says that this one prevails. But it shouldn't
# prevail for conflicts that occur in these ranges. The data from the
# need to be finished up.
next if $property == $perl;
+ # Nor do we need to do anything with properties that aren't going to
+ # be output.
+ next if $property->fate == $SUPPRESSED;
+
# Handle the properties that have more than one possible default
if (ref $property->default_map) {
my $default_map = $property->default_map;
$Posix_Lower->set_caseless_equivalent($Posix_Alpha);
my $Alnum = $perl->add_match_table('Alnum',
- Description => 'Alphabetic and (Decimal) Numeric',
+ Description => 'Alphabetic and (decimal) Numeric',
Initialize => $Alpha + $gc->table('Decimal_Number'),
);
$Alnum->add_alias('XPosixAlnum');
foreach my $file (keys %ignored_files) {
push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
}
+ foreach my $file (keys %skipped_files) {
+ push @{$grouped_by_reason{$skipped_files{$file}}}, $file;
+ }
# Then, sort each group.
foreach my $group (keys %grouped_by_reason) {
expressions.
And, the Name and Name_Aliases properties are accessible through the C<\\N{}>
-interpolation in double-quoted strings and regular expressions, but both
-usages require a L<use charnames;|charnames> to be specified, which also
-contains related functions viacode(), vianame(), and string_vianame().
+interpolation in double-quoted strings and regular expressions; and functions
+C<charnames::viacode()>, C<charnames::vianame()>, and
+C<charnames::string_vianame()> (which require a C<use charnames ();> to be
+specified.
Finally, most properties related to decomposition are accessible via
L<Unicode::Normalize>.
push @name, <<END;
+package charnames;
+
# This module contains machine-generated tables and code for the
# algorithmically-determinable Unicode character names. The following
# routines can be used to translate between name and code point and vice versa
|| ($table == $property->table('N')
&& $property->table('Y')->is_empty));
-
- # Some tables should match everything
- my $expected_full =
- ($is_property)
- ? # All these types of map tables will be full because
- # they will have been populated with defaults
- ($type == $ENUM || $type == $FORCED_BINARY)
-
- : # A match table should match everything if its method
- # shows it should
- ($table->matches_all
-
- # The complement of an empty binary table will match
- # everything
- || $is_complement_of_empty_binary
- )
- ;
-
if ($table->is_empty) {
if ($suppress_if_empty_warn_if_not) {
Carp::my_carp("Not expecting property $table$because. Generating file for it anyway.");
}
+ # Some tables should match everything
+ my $expected_full =
+ ($table->fate == $SUPPRESSED)
+ ? 0
+ : ($is_property)
+ ? # All these types of map tables will be full because
+ # they will have been populated with defaults
+ ($type == $ENUM || $type == $FORCED_BINARY)
+
+ : # A match table should match everything if its method
+ # shows it should
+ ($table->matches_all
+
+ # The complement of an empty binary table will match
+ # everything
+ || $is_complement_of_empty_binary
+ )
+ ;
+
my $count = $table->count;
if ($expected_full) {
if ($count != $MAX_UNICODE_CODEPOINTS) {
Pre_Handler => \&setup_script_extensions,
Each_Line_Handler => \&filter_script_extensions_line,
),
+ # The two Indic files are actually available starting in v6.0.0, but their
+ # property values are missing from PropValueAliases.txt in that release,
+ # so that further work would have to be done to get them to work properly
+ # for that release.
+ Input_file->new('IndicMatraCategory.txt', v6.1.0,
+ Property => 'Indic_Matra_Category',
+ Has_Missings_Defaults => $NOT_IGNORED,
+ Skip => "Provisional; for the analysis and processing of Indic scripts",
+ ),
+ Input_file->new('IndicSyllabicCategory.txt', v6.1.0,
+ Property => 'Indic_Syllabic_Category',
+ Has_Missings_Defaults => $NOT_IGNORED,
+ Skip => "Provisional; for the analysis and processing of Indic scripts",
+ ),
);
# End of all the preliminaries.
# The paths are stored with relative names, and with '/' as the
# delimiter; convert to absolute on this machine
my $full = lc(File::Spec->rel2abs(internal_file_to_platform($input)));
- $potential_files{$full} = 1
- if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
+ $potential_files{lc $full} = 1
+ if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
}
}
}
my @unknown_input_files;
- foreach my $file (keys %potential_files) {
- next if grep { lc($file) eq lc($_) } @known_files;
+ foreach my $file (keys %potential_files) { # The keys are stored in lc
+ next if grep { $file eq lc($_) } @known_files;
# Here, the file is unknown to us. Get relative path name
$file = File::Spec->abs2rel($file);