X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/9f877a47f789742824ee225ae9b6b73c2a70f515..35a865d48fd5b7517c276e673daf417f657c5c88:/lib/unicore/mktables diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 970d0bc..9b9dd7f 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -716,8 +716,8 @@ usage: $0 [-c|-p|-q|-v|-w] [-C dir] [-L filelist] [ -P pod_dir ] -makelist : Rewrite the file list $file_list based on current setup -annotate : Output an annotation for each character in the table files; useful for debugging mktables, looking at diffs; but is slow, - memory intensive; resulting tables are usable but slow and - very large. + memory intensive; resulting tables are usable but are slow and + very large (and currently fail the Unicode::UCD.t tests). -check A B : Executes $0 only if A and B are the same END } @@ -768,6 +768,8 @@ push @tables_that_may_be_empty, 'Script=Katakana_Or_Hiragana' if $v_version ge v4.1.0; push @tables_that_may_be_empty, 'Script_Extensions=Katakana_Or_Hiragana' if $v_version ge v6.0.0; +push @tables_that_may_be_empty, 'Grapheme_Cluster_Break=Prepend' + if $v_version ge v6.1.0; # The lists below are hashes, so the key is the item in the list, and the # value is the reason why it is in the list. This makes generation of @@ -831,6 +833,7 @@ if ($v_version ge v5.2.0) { # Enum values for to_output_map() method in the Map_Table package. my $EXTERNAL_MAP = 1; my $INTERNAL_MAP = 2; +my $OUTPUT_DELTAS = 3; # To override computed values for writing the map tables for these properties. # The default for enum map tables is to write them out, so that the Unicode @@ -897,6 +900,9 @@ my %why_obsolete; # Documentation only # existence is not noted in the comment. 'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or Unicode::UCD::prop_invmap()', + 'Indic_Matra_Category' => "Provisional", + 'Indic_Syllabic_Category' => "Provisional", + # Don't suppress ISO_Comment, as otherwise special handling is needed # to differentiate between it and gc=c, which can be written as 'isc', # which is the same characters as ISO_Comment's short name. @@ -1080,14 +1086,14 @@ my %ignored_files = ( 'ReadMe.txt' => 'Documentation', 'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized. This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L', 'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values', - 'IndicMatraCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts', - 'IndicSyllabicCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts', 'auxiliary/WordBreakTest.html' => 'Documentation of validation tests', 'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests', 'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests', 'auxiliary/LineBreakTest.html' => 'Documentation of validation tests', ); +my %skipped_files; # List of files that we skip + ### End of externally interesting definitions, except for @input_file_objects my $HEADER=<<"EOF"; @@ -1183,9 +1189,11 @@ my $YES = 1; my $IF_NOT_EQUIVALENT = 1; # Replace only under certain conditions; details in # the comments at the subroutine definition. my $UNCONDITIONALLY = 2; # Replace without conditions. -my $MULTIPLE = 4; # Don't replace, but add a duplicate record if +my $MULTIPLE_BEFORE = 4; # Don't replace, but add a duplicate record if + # already there +my $MULTIPLE_AFTER = 5; # Don't replace, but add a duplicate record if # already there -my $CROAK = 5; # Die with an error if is already there +my $CROAK = 6; # Die with an error if is already there # Flags to give property statuses. The phrases are to remind maintainers that # if the flag is changed, the indefinite article referring to it in the @@ -2026,7 +2034,7 @@ sub trace { return main::trace(@_); } # contrast to the non_skip element, which is supposed to be used very # temporarily for debugging. Sets 'optional' to 1. Also, files that we # pretty much will never look at can be placed in the global - # %ignored_files instead. Ones used here will be added to that list. + # %ignored_files instead. Ones used here will be added to %skipped files main::set_access('skip', \%skip, 'c'); my %each_line_handler; @@ -2153,7 +2161,7 @@ sub trace { return main::trace(@_); } # including its reason if ($skip{$addr}) { $optional{$addr} = 1; - $ignored_files{$file{$addr}} = $skip{$addr} + $skipped_files{$file{$addr}} = $skip{$addr} } return $self; @@ -2241,7 +2249,7 @@ sub trace { return main::trace(@_); } # its name if ($seen_non_extracted_non_age) { if ($file =~ /$EXTRACTED/i) { - Carp::my_carp_bug(join_lines(<rel2abs($file); - my $expecting = delete $potential_files{$fkey}; - $expecting = delete $potential_files{lc($fkey)} unless defined $expecting; + my $expecting = delete $potential_files{lc($fkey)}; + Carp::my_carp("Was not expecting '$file'.") if ! $expecting && ! defined $handle{$addr}; @@ -2429,7 +2437,8 @@ END || @defaults > 2 || ($default =~ /^$/i - && $default !~ /^$/i)) + && $default !~ /^$/i + && $default !~ /^