X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/25c180b8be73d8bfc857b4d0db9c82c968b82ba2..3282ee13c09f8fd1009f35571059ec6e8204b2ec:/lib/unicore/mktables diff --git a/lib/unicore/mktables b/lib/unicore/mktables index e531b44..ee214d1 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -31,7 +31,7 @@ use Text::Tabs; use re "/aa"; use feature 'state'; -sub DEBUG () { 0 } # Set to 0 for production; 1 for development +sub DEBUG () { 1 } # Set to 0 for production; 1 for development my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/; sub NON_ASCII_PLATFORM { ord("A") != 65 } @@ -45,7 +45,7 @@ sub NON_ASCII_PLATFORM { ord("A") != 65 } # expected, a warning will be generated. If an older version is being # compiled, any bounds tests that fail in the generated test file (-maketest # option) will be marked as TODO. -my $version_of_mk_invlist_bounds = v10.0.0; +my $version_of_mk_invlist_bounds = v12.0.0; ########################################################################## # @@ -652,7 +652,7 @@ sub stack_trace() { # to use the -annotate option when using this. Run this program on a unicore # containing the starting release you want to compare. Save that output # structure. Then, switching to a unicore with the ending release, change the -# 0 in the $string_compare_versions definition just below to a string +# "" in the $string_compare_versions definition just below to a string # containing a SINGLE dotted Unicode release number (e.g. "2.1") corresponding # to the starting release. This program will then compile, but throw away all # code points introduced after the starting release. Finally use a diff tool @@ -660,7 +660,7 @@ sub stack_trace() { # common to both releases, and you can see the changes caused just by the # underlying release semantic changes. For versions earlier than 3.2, you # must copy a version of DAge.txt into the directory. -my $string_compare_versions = DEBUG && ""; # e.g., "2.1"; +my $string_compare_versions = DEBUG && ""; my $compare_versions = DEBUG && $string_compare_versions && pack "C*", split /\./, $string_compare_versions; @@ -895,6 +895,19 @@ if ($v_version gt v3.2.0) { 'Canonical_Combining_Class=Attached_Below_Left' } +# Obsoleted +if ($v_version ge v11.0.0) { + push @tables_that_may_be_empty, qw( + Grapheme_Cluster_Break=E_Base + Grapheme_Cluster_Break=E_Base_GAZ + Grapheme_Cluster_Break=E_Modifier + Grapheme_Cluster_Break=Glue_After_Zwj + Word_Break=E_Base + Word_Break=E_Base_GAZ + Word_Break=E_Modifier + Word_Break=Glue_After_Zwj); +} + # Enum values for to_output_map() method in the Map_Table package. (0 is don't # output) my $EXTERNAL_MAP = 1; @@ -1436,10 +1449,10 @@ my @missing_early_files; # Generated list of absent files that we need to my @files_actually_output; # List of files we generated. my @more_Names; # Some code point names are compound; this is used # to store the extra components of them. -my $MIN_FRACTION_LENGTH = 3; # How many digits of a floating point number at - # the minimum before we consider it equivalent to a - # candidate rational -my $MAX_FLOATING_SLOP = 10 ** - $MIN_FRACTION_LENGTH; # And in floating terms +my $E_FLOAT_PRECISION = 2; # The minimum number of digits after the decimal + # point of a normalized floating point number + # needed to match before we consider it equivalent + # to a candidate rational # These store references to certain commonly used property objects my $age; @@ -10717,7 +10730,6 @@ END ); } - # Add any explicit cjk values $file->insert_lines(@cjk_property_values); @@ -12955,6 +12967,20 @@ sub register_fraction($) { my $rational = shift; my $float = eval $rational; + $float = sprintf "%.*e", $E_FLOAT_PRECISION, $float; + + # Strip off any leading zeros beyond 2 digits to make it C99 compliant. + # (Windows has 3 digit exponents, contrary to C99) + $float =~ s/ ( .* e [-+] ) 0* ( \d{2,}? ) /$1$2/x; + + if ( defined $nv_floating_to_rational{$float} + && $nv_floating_to_rational{$float} ne $rational) + { + die Carp::my_carp_bug("Both '$rational' and" + . " '$nv_floating_to_rational{$float}' evaluate to" + . " the same floating point number." + . " \$E_FLOAT_PRECISION must be increased"); + } $nv_floating_to_rational{$float} = $rational; return; } @@ -13466,6 +13492,24 @@ sub filter_script_extensions_line { return; } +sub setup_emojidata { + my $prop_ref = Property->new('XPG', + Full_Name => 'Extended_Pictographic', + ); + $prop_ref->set_fate($PLACEHOLDER, + "Not part of the Unicode Character Database"); +} + +sub filter_emojidata_line { + # We only are interested in this single property from this non-UCD data + # file, and we turn it into a Perl property, so that it isn't accessible + # to the users + + $_ = "" unless /\bExtended_Pictographic\b/; + + return; +} + sub generate_hst { # Populates the Hangul Syllable Type property from first principles @@ -14469,13 +14513,6 @@ sub compile_perl() { calculate_Assigned(); - # Our internal-only property should be treated as more than just a - # synonym; grandfather it in to the pod. - $perl->add_match_table('_CombAbove', Re_Pod_Entry => 1, - Fate => $INTERNAL_ONLY, Status => $DISCOURAGED) - ->set_equivalent_to(property_ref('ccc')->table('Above'), - Related => 1); - my $ASCII = $perl->add_match_table('ASCII'); if (defined $block) { # This is equivalent to the block if have it. my $Unicode_ASCII = $block->table('Basic_Latin'); @@ -14617,56 +14654,6 @@ sub compile_perl() { $Lower->set_caseless_equivalent($cased); } - # Similarly, set up our own Case_Ignorable property if this Unicode - # version doesn't have it. From Unicode 5.1: Definition D121: A character - # C is defined to be case-ignorable if C has the value MidLetter or the - # value MidNumLet for the Word_Break property or its General_Category is - # one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf), - # Modifier_Letter (Lm), or Modifier_Symbol (Sk). - - # Perl has long had an internal-only alias for this property; grandfather - # it in to the pod, but discourage its use. - my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable', - Re_Pod_Entry => 1, - Fate => $INTERNAL_ONLY, - Status => $DISCOURAGED); - my $case_ignorable = property_ref('Case_Ignorable'); - if (defined $case_ignorable && ! $case_ignorable->is_empty) { - $perl_case_ignorable->set_equivalent_to($case_ignorable->table('Y'), - Related => 1); - } - else { - - $perl_case_ignorable->initialize($gc->table('Mn') + $gc->table('Lm')); - - # The following three properties are not in early releases - $perl_case_ignorable += $gc->table('Me') if defined $gc->table('Me'); - $perl_case_ignorable += $gc->table('Cf') if defined $gc->table('Cf'); - $perl_case_ignorable += $gc->table('Sk') if defined $gc->table('Sk'); - - # For versions 4.1 - 5.0, there is no MidNumLet property, and - # correspondingly the case-ignorable definition lacks that one. For - # 4.0, it appears that it was meant to be the same definition, but was - # inadvertently omitted from the standard's text, so add it if the - # property actually is there - my $wb = property_ref('Word_Break'); - if (defined $wb) { - my $midlet = $wb->table('MidLetter'); - $perl_case_ignorable += $midlet if defined $midlet; - my $midnumlet = $wb->table('MidNumLet'); - $perl_case_ignorable += $midnumlet if defined $midnumlet; - } - else { - - # In earlier versions of the standard, instead of the above two - # properties , just the following characters were used: - $perl_case_ignorable += - ord("'") - + utf8::unicode_to_native(0xAD) # SOFT HYPHEN (SHY) - + 0x2019; # RIGHT SINGLE QUOTATION MARK - } - } - # The remaining perl defined tables are mostly based on Unicode TR 18, # "Annex C: Compatibility Properties". All of these have two versions, # one whose name generally begins with Posix that is posix-compliant, and @@ -14954,6 +14941,8 @@ sub compile_perl() { # Every character 0-255 is problematic because what each folds to depends # on the current locale $loc_problem_folds->add_range(0, 255); + $loc_problem_folds->add_range(0x130, 0x131); # These are problematic in + # Turkic locales $loc_problem_folds_start += $loc_problem_folds; # Also problematic are anything these fold to outside the range. Likely @@ -14993,8 +14982,19 @@ sub compile_perl() { Description => "Code points whose fold is a string of more than one character", ); + my $in_multi_fold = $perl->add_match_table( + "_Perl_Is_In_Multi_Char_Fold", + Description => + "Code points that are in some multiple character fold", + ); + my $non_final_fold = $perl->add_match_table( + "_Perl_Non_Final_Folds", + Description => "Code points that are in some multiple character fold, but not in the final position", + ); if ($v_version lt v3.0.1) { - push @tables_that_may_be_empty, '_Perl_Folds_To_Multi_Char'; + push @tables_that_may_be_empty, '_Perl_Folds_To_Multi_Char', + '_Perl_Is_In_Multi_Char_Fold', + '_Perl_Non_Final_Folds'; } # Look through all the known folds to populate these tables. @@ -15022,6 +15022,12 @@ sub compile_perl() { $loc_problem_folds->add_range($start, $end); $found_locale_problematic = 1; } + + if (@hex_folds > 1) { + $in_multi_fold->add_range($cp, $cp); + next if $i < @hex_folds - 1; + $non_final_fold->add_range($cp, $cp); + } } # If this is a problematic fold, add to the start chars the @@ -15040,33 +15046,6 @@ sub compile_perl() { Note => 'Union of all non-canonical decompositions', ); - # _CanonDCIJ is equivalent to Soft_Dotted, but if on a release earlier - # than SD appeared, construct it ourselves, based on the first release SD - # was in. A pod entry is grandfathered in for it - my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ', Re_Pod_Entry => 1, - Perl_Extension => 1, - Fate => $INTERNAL_ONLY, - Status => $DISCOURAGED); - my $soft_dotted = property_ref('Soft_Dotted'); - if (defined $soft_dotted && ! $soft_dotted->is_empty) { - $CanonDCIJ->set_equivalent_to($soft_dotted->table('Y'), Related => 1); - } - else { - - # This list came from 3.2 Soft_Dotted; all of these code points are in - # all releases - $CanonDCIJ->initialize([ ord('i'), - ord('j'), - 0x012F, - 0x0268, - 0x0456, - 0x0458, - 0x1E2D, - 0x1ECB, - ]); - $CanonDCIJ = $CanonDCIJ & $Assigned; - } - # For backward compatibility, Perl has its own definition for IDStart. # It is regular XID_Start plus the underscore, but all characters must be # Word characters as well @@ -15522,33 +15501,52 @@ END } # Perl tailors the WordBreak property so that \b{wb} doesn't split - # adjacent spaces into separate words. First create a copy of the regular - # WB property as '_Perl_WB'. (On Unicode releases earlier than when WB - # was defined for, this will already have been done by the substitute file - # portion for 'Input_file' code for WB.) + # adjacent spaces into separate words. Unicode 11.0 moved in that + # direction, but left TAB, FIGURE SPACE (U+2007), and (ironically) NO + # BREAK SPACE as breaking, so we retained the original Perl customization. + # To do this, in the Perl copy of WB, simply replace the mappings of + # horizontal space characters that otherwise would map to the default or + # the 11.0 'WSegSpace' to instead map to our tailoring. my $perl_wb = property_ref('_Perl_WB'); - if (! defined $perl_wb) { - $perl_wb = Property->new('_Perl_WB', - Fate => $INTERNAL_ONLY, - Perl_Extension => 1, - Directory => $map_directory, - Type => $STRING); - my $wb = property_ref('Word_Break'); - $perl_wb->initialize($wb); - $perl_wb->set_default_map($wb->default_map); - } - - # And simply replace the mappings of horizontal space characters that - # otherwise would map to the default to instead map to our tailoring. my $default = $perl_wb->default_map; for my $range ($Blank->ranges) { for my $i ($range->start .. $range->end) { - next unless $perl_wb->value_of($i) eq $default; + my $value = $perl_wb->value_of($i); + + next unless $value eq $default || $value eq 'WSegSpace'; $perl_wb->add_map($i, $i, 'Perl_Tailored_HSpace', Replace => $UNCONDITIONALLY); } } + # Also starting in Unicode 11.0, rules for some of the boundary types are + # based on a non-UCD property (which we have read in if it exists). + # Recall that these boundary properties partition the code points into + # equivalence classes (represented as enums). + # + # The loop below goes through each code point that matches the non-UCD + # property, and for each current equivalence class containing such a code + # point, splits it so that those that are in both are now in a newly + # created equivalence class whose name is a combination of the property + # and the old class name, leaving unchanged everything that doesn't match + # the non-UCD property. + my $pictographic_emoji = property_ref('XPG'); + if (defined $pictographic_emoji) { + foreach my $base_property (property_ref('GCB'), + property_ref('WB')) + { + my $property = property_ref('_Perl_' . $base_property->name); + foreach my $range ($pictographic_emoji->table('Y')->ranges) { + foreach my $i ($range->start .. $range->end) { + my $current = $property->value_of($i); + $current = $property->table($current)->short_name; + $property->add_map($i, $i, 'XPG_' . $current, + Replace => $UNCONDITIONALLY); + } + } + } + } + # Create a version of the LineBreak property with the mappings that are # omitted in the default algorithm remapped to what # http://www.unicode.org/reports/tr14 says they should be. @@ -17265,7 +17263,7 @@ Perl can provide access to all non-provisional Unicode character properties, though not all are enabled by default. The omitted ones are the Unihan properties (accessible via the CPAN module L) and certain deprecated or Unicode-internal properties. (An installation may choose to -recompile Perl's tables to change this. See L.) For most purposes, access to Unicode properties from the Perl core is through @@ -17740,10 +17738,10 @@ $loose_to_file_of $nv_floating_to_rational ); -# If a floating point number doesn't have enough digits in it to get this -# close to a fraction, it isn't considered to be that fraction even if all the -# digits it does have match. -\$utf8::max_floating_slop = $MAX_FLOATING_SLOP; +# If a %e floating point number doesn't have this number of digits in it after +# the decimal point to get this close to a fraction, it isn't considered to be +# that fraction even if all the digits it does have match. +\$utf8::e_precision = $E_FLOAT_PRECISION; # Deprecated tables to generate a warning for. The key is the file containing # the table, so as to avoid duplication, as many property names can map to the @@ -18091,7 +18089,7 @@ sub make_UCD () { next unless $alias->ucd; next unless $alias->ok_as_filename; push @{$perlprop_to_aliases{standardize($alias->name)}}, - @aliases_list; + uniques @aliases_list; } } @@ -18874,6 +18872,34 @@ EOC return @output; } +sub generate_wildcard_tests($$$$$) { + # This used only for making the test script. It generates wildcardl + # matching test cases that are expected to compile successfully in perl. + + my $lhs = shift; # The property: what's to the left of the + # or equals separator + my $rhs = shift; # The property value; what's to the right + my $valid_code = shift; # A code point that's known to be in the + # table given by LHS=RHS; undef if table is + # empty + my $invalid_code = shift; # A code point known to not be in the table; + # undef if the table is all code points + my $warning = shift; + + return if $lhs eq ""; + return if $lhs =~ / ^ Is_ /x; # These are not currently supported + + # Generate a standardized pattern, with colon being the delimitter + my $wildcard = "$lhs=:\\A$rhs\\z:"; + + my @output; + push @output, "Expect(1, $valid_code, '\\p{$wildcard}', $warning);" + if defined $valid_code; + push @output, "Expect(0, $invalid_code, '\\p{$wildcard}', $warning);" + if defined $invalid_code; + return @output; +} + sub generate_error($$$) { # This used only for making the test script. It generates test cases that # are expected to not only not match, but to be syntax or similar errors @@ -19066,21 +19092,12 @@ sub make_property_test_script() { $t_path = 'TestProp.pl' unless defined $t_path; # the traditional name - # Keep going down an order of magnitude - # until find that adding this quantity to - # 1 remains 1; but put an upper limit on - # this so in case this algorithm doesn't - # work properly on some platform, that we - # won't loop forever. - my $digits = 0; - my $min_floating_slop = 1; - while (1+ $min_floating_slop != 1 - && $digits++ < 50) - { - my $next = $min_floating_slop / 10; - last if $next == 0; # If underflows, - # use previous one - $min_floating_slop = $next; + # Create a list of what the %f representation is for each rational number. + # This will be used below. + my @valid_base_floats = '0.0'; + foreach my $e_representation (keys %nv_floating_to_rational) { + push @valid_base_floats, + eval $nv_floating_to_rational{$e_representation}; } # It doesn't matter whether the elements of this array contain single lines @@ -19110,7 +19127,7 @@ EOF_CODE } property_ref('*')) { # Non-binary properties should not match \p{}; Test all for that. - if ($property->type != $BINARY) { + if ($property->type != $BINARY && $property->type != $FORCED_BINARY) { my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $property->aliases; foreach my $property_alias ($property->aliases) { @@ -19176,6 +19193,11 @@ EOF_CODE # already guaranteed to be in error my $already_error = ! $table->file_path; + # A table that begins with these could actually be a + # user-defined property, so won't be compile time errors, as + # the definitions of those can be deferred until runtime + next if $already_error && $table_name =~ / ^ I[ns] /x; + # Generate error cases for this alias. push @output, generate_error($property_name, $table_name, @@ -19185,7 +19207,16 @@ EOF_CODE # quit now without generating success cases. next if $already_error; - # Now for the success cases. + # Now for the success cases. First, wildcard matching, as it + # shouldn't have any randomization. + if ($table_alias->status eq $NORMAL) { + push @output, generate_wildcard_tests($property_name, + $table_name, + $valid, + $invalid, + $warning, + ); + } my $random; if ($loose_match) { @@ -19206,6 +19237,15 @@ EOF_CODE $invalid, $warning, ); + if ($table_alias->status eq $NORMAL) { + push @output, generate_wildcard_tests( + $property_name, + $standard, + $valid, + $invalid, + $warning, + ); + } } $random = randomize_loose_name($table_name) } @@ -19224,77 +19264,112 @@ EOF_CODE $warning, ); - # If the name is a rational number, add tests for the - # floating point equivalent. - if ($table_name =~ qr{/}) { + if ($property->name eq 'nv') { + if ($table_name !~ qr{/}) { + push @output, generate_tests($property_name, + sprintf("%.15e", $table_name), + $valid, + $invalid, + $warning, + ); + } + else { + # If the name is a rational number, add tests for a + # non-reduced form, and for a floating point equivalent. + + # 60 is a number divisible by a bunch of things + my ($numerator, $denominator) = $table_name + =~ m! (.+) / (.+) !x; + $numerator *= 60; + $denominator *= 60; + push @output, generate_tests($property_name, + "$numerator/$denominator", + $valid, + $invalid, + $warning, + ); - # Calculate the float, and find just the fraction. + # Calculate the float, and the %e representation my $float = eval $table_name; - my ($whole, $fraction) - = $float =~ / (.*) \. (.*) /x; - - # Starting with one digit after the decimal point, - # create a test for each possible precision (number of - # digits past the decimal point) until well beyond the - # native number found on this machine. (If we started - # with 0 digits, it would be an integer, which could - # well match an unrelated table) - PLACE: - for my $i (1 .. $min_floating_slop + 3) { - my $table_name = sprintf("%.*f", $i, $float); - if ($i < $MIN_FRACTION_LENGTH) { - - # If the test case has fewer digits than the - # minimum acceptable precision, it shouldn't - # succeed, so we expect an error for it. - # E.g., 2/3 = .7 at one decimal point, and we - # shouldn't say it matches .7. We should make - # it be .667 at least before agreeing that the - # intent was to match 2/3. But at the - # less-than- acceptable level of precision, it - # might actually match an unrelated number. - # So don't generate a test case if this - # conflating is possible. In our example, we - # don't want 2/3 matching 7/10, if there is - # a 7/10 code point. - - # First, integers are not in the rationals - # table. Don't generate an error if this - # rounds to an integer using the given - # precision. - my $round = sprintf "%.0f", $table_name; - next PLACE if abs($table_name - $round) - < $MAX_FLOATING_SLOP; - - # Here, isn't close enough to an integer to be - # confusable with one. Now, see it it's - # "close" to a known rational - for my $existing - (keys %nv_floating_to_rational) + my $e_representation = sprintf("%.*e", + $E_FLOAT_PRECISION, $float); + # Parse that + my ($non_zeros, $zeros, $exponent_sign, $exponent) + = $e_representation + =~ / -? [1-9] \. (\d*?) (0*) e ([+-]) (\d+) /x; + my $min_e_precision; + my $min_f_precision; + + if ($exponent_sign eq '+' && $exponent != 0) { + Carp::my_carp_bug("Not yet equipped to handle" + . " positive exponents"); + return; + } + else { + # We're trying to find the minimum precision that + # is needed to indicate this particular rational + # for the given $E_FLOAT_PRECISION. For %e, any + # trailing zeros, like 1.500e-02 aren't needed, so + # the correct value is how many non-trailing zeros + # there are after the decimal point. + $min_e_precision = length $non_zeros; + + # For %f, like .01500, we want at least + # $E_FLOAT_PRECISION digits, but any trailing + # zeros aren't needed, so we can subtract the + # length of those. But we also need to include + # the zeros after the decimal point, but before + # the first significant digit. + $min_f_precision = $E_FLOAT_PRECISION + + $exponent + - length $zeros; + } + + # Make tests for each possible precision from 1 to + # just past the worst case. + my $upper_limit = ($min_e_precision > $min_f_precision) + ? $min_e_precision + : $min_f_precision; + + for my $i (1 .. $upper_limit + 1) { + for my $format ("e", "f") { + my $this_table + = sprintf("%.*$format", $i, $float); + + # If we don't have enough precision digits, + # make a fail test; otherwise a pass test. + my $pass = ($format eq "e") + ? $i >= $min_e_precision + : $i >= $min_f_precision; + if ($pass) { + push @output, generate_tests($property_name, + $this_table, + $valid, + $invalid, + $warning, + ); + } + elsif ( $format eq "e" + + # Here we would fail, but in the %f + # case, the representation at this + # precision could actually be a + # valid one for some other rational + || ! grep { $this_table + =~ / ^ $_ 0* $ /x } + @valid_base_floats) { - next PLACE - if abs($table_name - $existing) - < $MAX_FLOATING_SLOP; + push @output, + generate_error($property_name, + $this_table, + 1 # 1 => already an + # error + ); } - push @output, generate_error($property_name, - $table_name, - 1 # 1 => already an error - ); - } - else { - - # Here the number of digits exceeds the - # minimum we think is needed. So generate a - # success test case for it. - push @output, generate_tests($property_name, - $table_name, - $valid, - $invalid, - $warning, - ); } } } + } } } $table->DESTROY(); @@ -19770,12 +19845,7 @@ my @input_file_objects = ( Skip => $Documentation, ), Input_file->new("$AUXILIARY/WordBreakProperty.txt", v4.1.0, - Early => [ "WBsubst.txt", '_Perl_WB', 'ALetter', - - # Don't use _Perl_WB as a synonym for - # Word_Break in later perls, as it is tailored - # and isn't the same as Word_Break - 'ONLY_EARLY' ], + Early => [ "WBsubst.txt", '_Perl_WB', 'ALetter' ], Property => 'Word_Break', Has_Missings_Defaults => $NOT_IGNORED, ), @@ -19918,6 +19988,10 @@ my @input_file_objects = ( Skip => 'Maps certain Unicode code points to their ' . 'legacy Japanese cell-phone values', ), + # This file is actually not usable as-is until 6.1.0, because the property + # is provisional, so its name is missing from PropertyAliases.txt until + # that release, so that further work would have to be done to get it to + # work properly Input_file->new('ScriptExtensions.txt', v6.0.0, Property => 'Script_Extensions', Early => [ sub {} ], # Doesn't do anything but ensures @@ -19930,10 +20004,9 @@ my @input_file_objects = ( : $IGNORED), ), # These two Indic files are actually not usable as-is until 6.1.0, - # because their property values are missing from PropValueAliases.txt - # until that release, so that further work would have to be done to get - # them to work properly, which isn't worth it because of them being - # provisional. + # because they are provisional, so their property values are missing from + # PropValueAliases.txt until that release, so that further work would have + # to be done to get them to work properly. Input_file->new('IndicMatraCategory.txt', v6.0.0, Withdrawn => v8.0.0, Property => 'Indic_Matra_Category', @@ -19981,6 +20054,19 @@ my @input_file_objects = ( Input_file->new('NushuSources.txt', v10.0.0, Skip => 'Specifies source material for Nushu characters', ), + Input_file->new('EquivalentUnifiedIdeograph.txt', v11.0.0, + Property => 'Equivalent_Unified_Ideograph', + Has_Missings_Defaults => $NOT_IGNORED, + ), + Input_file->new('EmojiData.txt', v11.0.0, + # Is in UAX #51 and not the UCD, so must be updated + # separately, and the first line edited to indicate the + # UCD release we're pretending it to be in. The UTC says + # this is a transitional state. + Pre_Handler => \&setup_emojidata, + Has_Missings_Defaults => $NOT_IGNORED, + Each_Line_Handler => \&filter_emojidata_line, + ), ); # End of all the preliminaries. @@ -20317,7 +20403,7 @@ if ($verbosity >= $NORMAL_VERBOSITY && ! $debug_skip) { if ($version_of_mk_invlist_bounds lt $v_version) { Carp::my_carp("WARNING: \\b{} algorithms (regen/mk_invlist.pl) need" . " to be checked and possibly updated to Unicode" - . " $string_version"); + . " $string_version. Failing tests will be marked TODO"); } exit(0); @@ -20327,6 +20413,7 @@ __DATA__ use strict; use warnings; +no warnings 'experimental::uniprop_wildcards'; # Test qr/\X/ and the \p{} regular expression constructs. This file is # constructed by mktables from the tables it generates, so if mktables is