# changed 0+$self to pack 'J', $self.)
my $start_time;
-BEGIN { # Get the time the script started running; do it at compiliation to
+BEGIN { # Get the time the script started running; do it at compilation to
# get it as close as possible
$start_time= time;
}
use strict;
use warnings;
use Carp;
+use Config;
use File::Find;
use File::Path;
use File::Spec;
use Text::Tabs;
sub DEBUG () { 0 } # Set to 0 for production; 1 for development
+my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
##########################################################################
#
# the small actual loop to process the input files and finish up; then
# a __DATA__ section, for the .t tests
#
-# This program works on all releases of Unicode through at least 5.2. The
+# This program works on all releases of Unicode through at least 6.0. The
# outputs have been scrutinized most intently for release 5.1. The others
# have been checked for somewhat more than just sanity. It can handle all
# existing Unicode character properties in those releases.
# out. But all the ones which can be used in regular expression \p{} and \P{}
# constructs will. Generally a property will have either its map table or its
# match tables written but not both. Again, what gets written is controlled
-# by lists which can easily be changed.
+# by lists which can easily be changed. Properties have a 'Type', like
+# binary, or string, or enum depending on how many match tables there are and
+# the content of the maps. This 'Type' is different than a range 'Type', so
+# don't get confused by the two concepts having the same name.
#
# For information about the Unicode properties, see Unicode's UAX44 document:
# More information on Unicode version glitches is further down in these
# introductory comments.
#
-# This program works on all properties as of 5.2, though the files for some
-# are suppressed from apparent lack of demand for them. You can change which
-# are output by changing lists in this program.
+# This program works on all non-provisional properties as of 6.0, though the
+# files for some are suppressed from apparent lack of demand for them. You
+# can change which are output by changing lists in this program.
#
-# The old version of mktables emphasized the term "Fuzzy" to mean Unocde's
+# The old version of mktables emphasized the term "Fuzzy" to mean Unicode's
# loose matchings rules (from Unicode TR18):
#
# The recommended names for UCD properties and property values are in
# Unicode_Radical_Stroke was listed in those files, so if the Unihan database
# is present in the directory, a table will be generated for that property.
# In 5.2, several more properties were added. For your convenience, the two
-# arrays are initialized with all the 5.2 listed properties that are also in
+# arrays are initialized with all the 6.0 listed properties that are also in
# earlier releases. But these are commented out. You can just uncomment the
# ones you want, or use them as a template for adding entries for other
# properties.
#
# Here are some observations about some of the issues in early versions:
#
-# The number of code points in \p{alpha} halve in 2.1.9. It turns out that
+# The number of code points in \p{alpha} halved in 2.1.9. It turns out that
# the reason is that the CJK block starting at 4E00 was removed from PropList,
# and was not put back in until 3.1.0
#
# special things
my $glob_list = 0; # ? Should we try to include unknown .txt files
# in the input.
-my $output_range_counts = 1; # ? Should we include the number of code points
- # in ranges in the output
+my $output_range_counts = $debugging_build; # ? Should we include the number
+ # of code points in ranges in
+ # the output
my $annotate = 0; # ? Should character names be in the output
# Verbosity levels; 0 is quiet
}
elsif ($arg eq '-annotate') {
$annotate = 1;
+ $debugging_build = 1;
+ $output_range_counts = 1;
}
else {
my $with_c = 'with';
'Canonical_Combining_Class=Attached_Below_Left'
}
-# These are listed in the Property aliases file in 5.2, but Unihan is ignored
+# These are listed in the Property aliases file in 6.0, but Unihan is ignored
# unless explicitly added.
if ($v_version ge v5.2.0) {
my $unihan = 'Unihan; remove from list if using Unihan';
my $other_properties = 'other properties';
my $contributory = "Used by Unicode internally for generating $other_properties and not intended to be used stand-alone";
- my $why_no_expand = "Easily computed, and yet doesn't cover the common encoding forms (UTF-16/8)",
+ my $why_no_expand = "Deprecated by Unicode: less useful than UTF-specific calculations",
%why_deprecated = (
- 'Grapheme_Link' => 'Deprecated by Unicode. Use ccc=vr (Canonical_Combining_Class=Virama) instead',
+ 'Grapheme_Link' => 'Deprecated by Unicode: Duplicates ccc=vr (Canonical_Combining_Class=Virama)',
'Jamo_Short_Name' => $contributory,
'Line_Break=Surrogate' => 'Deprecated by Unicode because surrogates should never appear in well-formed text, and therefore shouldn\'t be the basis for line breaking',
'Other_Alphabetic' => $contributory,
);
%why_suppressed = (
- # There is a lib/unicore/Decomposition.pl (used by normalize.pm) which
+ # There is a lib/unicore/Decomposition.pl (used by Normalize.pm) which
# contains the same information, but without the algorithmically
# determinable Hangul syllables'. This file is not published, so it's
# existence is not noted in the comment.
'Name' => "Accessible via 'use charnames;'",
'Name_Alias' => "Accessible via 'use charnames;'",
- # These are sort of jumping the gun; deprecation is proposed for
- # Unicode version 6.0, but they have never been exposed by Perl, and
- # likely are soon to be deprecated, so best not to expose them.
- FC_NFKC_Closure => 'Use NFKC_Casefold instead',
+ FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful',
Expands_On_NFC => $why_no_expand,
Expands_On_NFD => $why_no_expand,
Expands_On_NFKC => $why_no_expand,
if ($v_version ge 4.0.0) {
$why_stabilized{'Hyphen'} = 'Use the Line_Break property instead; see www.unicode.org/reports/tr14';
+ if ($v_version ge 6.0.0) {
+ $why_deprecated{'Hyphen'} = 'Supplanted by Line_Break property values; see www.unicode.org/reports/tr14';
+ }
}
-if ($v_version ge 5.2.0) {
+if ($v_version ge 5.2.0 && $v_version lt 6.0.0) {
$why_obsolete{'ISO_Comment'} = 'Code points for it have been removed';
+ if ($v_version ge 6.0.0) {
+ $why_deprecated{'ISO_Comment'} = 'No longer needed for chart generation; otherwise not useful, and code points for it have been removed';
+ }
}
# Probably obsolete forever
# If you are using the Unihan database, you need to add the properties that
# you want to extract from it to this table. For your convenience, the
-# properties in the 5.2 PropertyAliases.txt file are listed, commented out
+# properties in the 6.0 PropertyAliases.txt file are listed, commented out
my @cjk_properties = split "\n", <<'END';
#cjkAccountingNumeric; kAccountingNumeric
#cjkOtherNumeric; kOtherNumeric
END
# Similarly for the property values. For your convenience, the lines in the
-# 5.2 PropertyAliases.txt file are listed. Just remove the first BUT NOT both
+# 6.0 PropertyAliases.txt file are listed. Just remove the first BUT NOT both
# '#' marks
my @cjk_property_values = split "\n", <<'END';
## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
'ReadMe.txt' => 'Just comments',
'README.TXT' => 'Just comments',
'StandardizedVariants.txt' => 'Only for glyph changes, not a Unicode character property. Does not fit into current scheme where one code point is mapped',
+ 'EmojiSources.txt' => 'Not of general utility: for Japanese legacy cell-phone applications',
+ 'IndicMatraCategory.txt' => 'Provisional',
+ 'IndicSyllabicCategory.txt' => 'Provisional',
+ 'ScriptExtensions.txt' => 'Provisional',
);
### End of externally interesting definitions, except for @input_file_objects
my $UNCONDITIONALLY = 2; # Replace without conditions.
my $MULTIPLE = 4; # Don't replace, but add a duplicate record if
# already there
+my $CROAK = 5; # Die with an error if is already there
# Flags to give property statuses. The phrases are to remind maintainers that
# if the flag is changed, the indefinite article referring to it in the
my @annotate_char_type; # Contains a type of those characters, specifically
# for the purposes of annotation.
my $annotate_ranges; # A map of ranges of code points that have the same
- # name for the purposes of annoation. They map to the
+ # name for the purposes of annotation. They map to the
# upper edge of the range, so that the end point can
# be immediately found. This is used to skip ahead to
# the end of a range, and avoid processing each
$end = min($block->containing_range($i)->end,
$unassigned_sans_noncharacters-> containing_range($i)->
end);
- } else {
- my_carp_bug("Can't figure out how to annotate"
- . sprintf("U+%04X", $i)
- . "Proceeding anyway.");
+ }
+ else {
+ Carp::my_carp_bug("Can't figure out how to annotate "
+ . sprintf("U+%04X", $i)
+ . ". Proceeding anyway.");
$viacode[$i] = 'UNKNOWN';
$annotate_char_type[$i] = $UNKNOWN_TYPE;
$printable[$i] = 0;
if ($clean_insert) {
if ($r->[$j]->standard_form ne $standard_form) {
$clean_insert = 0;
+ if ($replace == $CROAK) {
+ main::croak("The range to add "
+ . sprintf("%04X", $start)
+ . '-'
+ . sprintf("%04X", $end)
+ . " with value '$value' overlaps an existing range $r->[$j]");
+ }
}
else {
$extends_above = ($j+1 < $range_list_size
&& $r->[$j+1]->start == $end +1
&& $r->[$j+1]->standard_form eq $standard_form
- && $r->[$j-1]->type == $type);
+ && $r->[$j+1]->type == $type);
}
if ($extends_below && $extends_above) { # Adds to both
$splice_start--; # start replace at element below
trace "i =[", $i, "]", $r->[$i];
trace 'i+1=[', $i+1, ']', $r->[$i+1] if $i < @$r - 1;
trace 'i+2=[', $i+2, ']', $r->[$i+2] if $i < @$r - 2;
- trace "removed @return";
+ trace "removed ", @return if @return;
}
# An actual deletion could have changed the maximum in the list.
# the character very frequently used.
return $try_hard if $code == 0x0000;
- return 0 if $try_hard; # XXX Temporary until fix utf8.c
-
# shun non-character code points.
return $try_hard if $code >= 0xFDD0 && $code <= 0xFDEF;
return $try_hard if ($code & 0xFFFE) == 0xFFFE; # includes FFFF
# not, is normal. The lists are prioritized so the most serious
# ones are checked first
if (exists $why_suppressed{$complete_name}
- # Don't suppress if overriden
+ # Don't suppress if overridden
&& ! grep { $_ eq $complete_name{$addr} }
@output_mapped_properties)
{
sub add_comment { # Adds the parameter as a comment.
+ return unless $debugging_build;
+
my $self = shift;
my $comment = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
# If there is a range and doesn't need a single point range
# output
if ($start != $end && ! $range_size_1) {
- push @OUT, sprintf "%04X\t%04X\t%s", $start, $end, $value;
+ push @OUT, sprintf "%04X\t%04X", $start, $end;
+ $OUT[-1] .= "\t$value" if $value ne "";
# Add a comment with the size of the range, if requested.
# Expand Tabs to make sure they all start in the same
# Just before output, create the comment that heads the file
# containing this table.
+ return unless $debugging_build;
+
my $self = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
# The pack() below can't cope with surrogates.
if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
- Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self. No map created");
+ Carp::my_carp("Surrogate code point '$code_point' in mapping to '$map' in $self. No map created");
next;
}
# not quite so many.
# If they are related, one must be a perl extension. This is because
# we can't guarantee that Unicode won't change one or the other in a
- # later release even if they are idential now.
+ # later release even if they are identical now.
my $self = shift;
my $other = shift;
# ones that share the same file. It lists all such tables, ordered so
# that related ones are together.
+ return unless $debugging_build;
+
my $leader = shift; # Should only be called on the leader table of
# an equivalent group
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
# each of them is stored in %alias_to_property_of as they are defined.
# But it's possible that this subroutine will be called with some
# variant, so if the initial lookup fails, it is repeated with the
- # standarized form of the input name. If found, besides returning the
+ # standardized form of the input name. If found, besides returning the
# result, the input name is added to the list so future calls won't
# have to do the conversion again.
. " argument to '-='. Subtraction ignored.");
return $self;
}
- elsif ($reversed) { # Shouldnt happen in a -=, but just in case
+ elsif ($reversed) { # Shouldn't happen in a -=, but just in case
Carp::my_carp_bug("Can't cope with a "
. __PACKAGE__
. " being the first parameter in a '-='. Subtraction ignored.");
# A blank separates the joined lines except if there is a break; an extra
# blank is inserted after a period ending a line.
- # Intialize the return with the first line.
+ # Initialize the return with the first line.
my ($return, @lines) = split "\n", shift;
# If the first line is null, it was an empty line, add the \n back in
$name =~ s/^\s+//g;
$name =~ s/\s+$//g;
- # Convert interior white space and hypens into underscores.
+ # Convert interior white space and hyphens into underscores.
$name =~ s/ (?<= .) [ -]+ (.) /_$1/xg;
# Capitalize the letter following an underscore, and convert a sequence of
return lc $name;
}
+sub utf8_heavy_name ($$) {
+ # Returns the name that utf8_heavy.pl will use to find a table. XXX
+ # perhaps this function should be placed somewhere, like Heavy.pl so that
+ # utf8_heavy can use it directly without duplicating code that can get
+ # out-of sync.
+
+ my $table = shift;
+ my $alias = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $property = $table->property;
+ $property = ($property == $perl)
+ ? "" # 'perl' is never explicitly stated
+ : standardize($property->name) . '=';
+ if ($alias->loose_match) {
+ return $property . standardize($alias->name);
+ }
+ else {
+ return lc ($property . $alias->name);
+ }
+
+ return;
+}
+
{ # Closure
my $indent_increment = " " x 2;
}
}
- # This entry is still missing as of 5.2, perhaps because no short name for
+ # This entry is still missing as of 6.0, perhaps because no short name for
# it.
if (-e 'NameAliases.txt') {
my $aliases = property_ref('Name_Alias');
my $fold = property_ref('Case_Folding');
$fold->set_file('Fold') if defined $fold;
- # utf8.c can't currently cope with non range-size-1 for these, and even if
- # it were changed to do so, someone else may be using them, expecting the
- # old style
+ # utf8.c has a different meaning for non range-size-1 for map properties
+ # that this program doesn't currently handle; and even if it were changed
+ # to do so, some other code may be using them expecting range size 1.
foreach my $property (qw {
Case_Folding
Lowercase_Mapping
#
# meaning the codepoints in the range all have the value 'map' under
# 'property'.
- # Beginning and trailing white space in each field are not signficant.
+ # Beginning and trailing white space in each field are not significant.
# Note there is not a trailing semi-colon in the above. A trailing
# semi-colon means the map is a null-string. An omitted map, as
# opposed to a null-string, is assumed to be 'Y', based on Unicode
# file, in any order, interspersed in any way. The first time a
# property is seen, it gets information about that property and
# caches it for quick retrieval later. It also normalizes the maps
- # so that only one of many synonym is stored. The Unicode input files
- # do use some multiple synonyms.
+ # so that only one of many synonyms is stored. The Unicode input
+ # files do use some multiple synonyms.
my $file = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
# If the map begins with a special command to us (enclosed in
# delimiters), extract the command(s).
- if (substr($map, 0, 1) eq $CMD_DELIM) {
- while ($map =~ s/ ^ $CMD_DELIM (.*?) $CMD_DELIM //x) {
- my $command = $1;
- if ($command =~ / ^ $REPLACE_CMD= (.*) /x) {
- $replace = $1;
- }
- elsif ($command =~ / ^ $MAP_TYPE_CMD= (.*) /x) {
- $map_type = $1;
- }
- else {
- $file->carp_bad_line("Unknown command line: '$1'");
- next LINE;
- }
+ while ($map =~ s/ ^ $CMD_DELIM (.*?) $CMD_DELIM //x) {
+ my $command = $1;
+ if ($command =~ / ^ $REPLACE_CMD= (.*) /x) {
+ $replace = $1;
+ }
+ elsif ($command =~ / ^ $MAP_TYPE_CMD= (.*) /x) {
+ $map_type = $1;
+ }
+ else {
+ $file->carp_bad_line("Unknown command line: '$1'");
+ next LINE;
}
}
}
# the code point and name on each line. This was actually the hardest
# thing to design around. The code points in those ranges may actually
# have real maps not given by these two lines. These maps will either
- # be algorthimically determinable, or in the extracted files furnished
+ # be algorithmically determinable, or in the extracted files furnished
# with the UCD. In the event of conflicts between these extracted files,
# and this one, Unicode says that this one prevails. But it shouldn't
# prevail for conflicts that occur in these ranges. The data from the
}
return;
}
+
+ sub filter_v6_ucd {
+
+ # Unicode 6.0 co-opted the name BELL for U+1F514, so change the input
+ # to pretend that U+0007 is ALERT instead, and for Perl 5.14, don't
+ # allow the BELL name for U+1F514, so that the old usage can be
+ # deprecated for one cycle.
+
+ return if $_ !~ /^(?:0007|1F514|070F);/;
+
+ my ($code_point, @fields) = split /\s*;\s*/, $_, -1;
+ if ($code_point eq '0007') {
+ $fields[$CHARNAME] = "ALERT";
+ }
+ elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see
+ # http://www.unicode.org/versions/corrigendum8.html
+ $fields[$BIDI] = "AL";
+ }
+ elsif ($^V lt v5.15.0) { # For 5.16 will convert to use Unicode's name
+ $fields[$CHARNAME] = "";
+ }
+
+ $_ = join ';', $code_point, @fields;
+
+ return;
+ }
} # End closure for UnicodeData
sub process_GCB_test {
# implemented, it would be by hard-coding in the casing functions in the
# Perl core, not through tables. But if there is a new condition we don't
# know about, output a warning. We know about all the conditions through
- # 5.2
+ # 6.0
if ($fields[4] ne "") {
my @conditions = split ' ', $fields[4];
if ($conditions[0] ne 'tr' # We know that these languages have
# one.
# Titlecase duplicates UnicodeData.txt: gc=lt
# Unassigned Code Value duplicates UnicodeData.txt: gc=cc
- # Zero-width never made into offical property;
+ # Zero-width never made into official property;
# subset of gc=cf
# Most of the properties have the same names in this file as in later
# versions, but a couple do not.
# Add mappings to the property for each code point in the list
foreach my $range ($list->ranges) {
- $property->add_map($range->start, $range->end, $default);
+ $property->add_map($range->start, $range->end, $default,
+ Replace => $CROAK);
}
}
}
# Add any remaining code points to the mapping, using the default for
- # missing code points
+ # missing code points.
if (defined (my $default_map = $property->default_map)) {
- foreach my $range ($property->inverse_list->ranges) {
- $property->add_map($range->start, $range->end, $default_map);
- }
+
+ # This fills in any missing values with the default.
+ $property->add_map(0, $LAST_UNICODE_CODEPOINT,
+ $default_map, Replace => $NO);
# Make sure there is a match table for the default
if (! defined $property->table($default_map)) {
$LC->add_description('[\p{Ll}\p{Lu}\p{Lt}]');
my $Cs = $gc->table('Cs');
- if (defined $Cs) {
- $Cs->add_note('Mostly not usable in Perl.');
- $Cs->add_comment(join_lines(<<END
-Surrogates are used exclusively for I/O in UTF-16, and should not appear in
-Unicode text, and hence their use will generate (usually fatal) messages
-END
- ));
- }
# Folding information was introduced later into Unicode data. To get
);
my $Word = $perl->add_match_table('Word',
- Description => '\w, including beyond ASCII',
+ Description => '\w, including beyond ASCII;'
+ . ' = \p{Alnum} + \pM + \p{Pc}',
Initialize => $Alnum + $gc->table('Mark'),
);
$Word->add_alias('XPosixWord');
my $description_start = "Code point's usage introduced in version ";
$first_age->add_description($description_start . $first_age->name);
- # To construct the accumlated values, for each of the age tables
+ # To construct the accumulated values, for each of the age tables
# starting with the 2nd earliest, merge the earliest with it, to get
# all those code points existing in the 2nd earliest. Repeat merging
# the new 2nd earliest with the 3rd earliest to get all those existing
sub register_file_for_name($$$) {
# Given info about a table and a datafile that it should be associated
- # with, register that assocation
+ # with, register that association
my $table = shift;
my $directory_ref = shift; # Array of the directory path for the file
# goes through all aliases in the UCD that we generate regex match
# files for
foreach my $alias ($table->aliases) {
- my $name = $alias->name;
+ my $standard = utf8_heavy_name($table, $alias);
# Generate an entry in either the loose or strict hashes, which
# will translate the property and alias names combination into the
# file where the table for them is stored.
- my $standard;
if ($alias->loose_match) {
- $standard = $property . standardize($alias->name);
if (exists $loose_to_file_of{$standard}) {
Carp::my_carp("Can't change file registered to $loose_to_file_of{$standard} to '$sub_filename'.");
}
}
}
else {
- $standard = lc ($property . $name);
if (exists $stricter_to_file_of{$standard}) {
Carp::my_carp("Can't change file registered to $stricter_to_file_of{$standard} to '$sub_filename'.");
}
# will work. Also note that this assumes that such a
# number is matched strictly; so if that were to change,
# this would be wrong.
- if ((my $integer_name = $name)
+ if ((my $integer_name = $alias->name)
=~ s/^ ( -? \d+ ) \.0+ $ /$1/x)
{
$stricter_to_file_of{$property . $integer_name}
# expression, but with only one of 'Single', 'Short' if there
# are both items.
if ($short_name || $single_form || $table->conflicting) {
- $parenthesized .= '(';
$parenthesized .= "Short: $short_name" if $short_name;
if ($short_name && $single_form) {
$parenthesized .= ', ';
# to go on every entry.
my $conflicting = join " NOR ", $table->conflicting;
if ($conflicting) {
- $parenthesized .= '(' if ! $parenthesized;
- $parenthesized .= '; ' if $parenthesized ne '(';
+ $parenthesized .= '; ' if $parenthesized ne "";
$parenthesized .= "NOT $conflicting";
}
- $parenthesized .= ')' if $parenthesized;
- push @info, $parenthesized if $parenthesized;
+ push @info, "($parenthesized)" if $parenthesized;
if ($table_property != $perl && $table->perl_extension) {
push @info, '(Perl extension)';
}
- push @info, "($string_count)" if $output_range_counts;
+ push @info, "($string_count)";
# Now, we have both the entry and info so add them to the
# list of all the properties.
B<Compound forms> consist of two components, separated by an equals sign or a
colon. The first component is the property name, and the second component is
the particular value of the property to match against, for example,
-'\\p{Script: Greek}' or '\\p{Script=Greek}' both mean to match characters
+'\\p{Script: Greek}' and '\\p{Script=Greek}' both mean to match characters
whose Script property is Greek.
B<Single forms>, like '\\p{Greek}', are mostly Perl-defined shortcuts for
=back
Some properties are considered obsolete, but still available. There are
-several varieties of obsolesence:
+several varieties of obsolescence:
=over 4
=item Obsolete
Properties marked with $a_bold_obsolete in the table are considered
-obsolete. At the time of this writing (Unicode version 5.2) there is no
-information in the Unicode standard about the implications of a property being
obsolete.
=item Stabilized
-Obsolete properties may be stabilized. This means that they are not actively
-maintained by Unicode, and will not be extended as new characters are added to
-the standard. Such properties are marked with $a_bold_stabilized in the
-table. At the time of this writing (Unicode version 5.2) there is no further
-information in the Unicode standard about the implications of a property being
-stabilized.
+Obsolete properties may be stabilized. Such a determination does not indicate
+that the property should or should not be used; instead it is a declaration
+that the property will not be maintained nor extended for newly encoded
+characters. Such properties are marked with $a_bold_stabilized in the
+table.
=item Deprecated
-Obsolete properties may be deprecated. This means that their use is strongly
+An obsolete property may be deprecated, perhaps because its original intent
+has been replaced by another property or because its specification was somehow
+defective. This means that its use is strongly
discouraged, so much so that a warning will be issued if used, unless the
regular expression is in the scope of a C<S<no warnings 'deprecated'>>
statement. $A_bold_deprecated flags each such entry in the table, and
@block_warning
The table below has two columns. The left column contains the \\p{}
-constructs to look up, possibly preceeded by the flags mentioned above; and
+constructs to look up, possibly preceded by the flags mentioned above; and
the right column contains information about them, like a description, or
synonyms. It shows both the single and compound forms for each property that
has them. If the left column is a short name for a property, the right column
=back
An installation can choose to allow any of these to be matched by changing the
-controlling lists contained in the program C<\$Config{privlib}>/F<unicore/$0>
-and then re-running F<$0>. (C<\%Config> is available from the Config module).
+controlling lists contained in the program
+C<\$Config{privlib}>/F<unicore/mktables> and then re-running F<mktables>.
+(C<\%Config> is available from the Config module).
=head1 Files in the I<To> directory (for serious hackers only)
@map_tables_actually_output
An installation can choose to change which files are generated by changing the
-controlling lists contained in the program C<\$Config{privlib}>/F<unicore/$0>
-and then re-running F<$0>.
+controlling lists contained in the program
+C<\$Config{privlib}>/F<unicore/mktables> and then re-running F<mktables>.
Each of these files defines two hash entries to help reading programs decipher
it. One of them looks like this:
$filename = $table->file;
}
- # Use specified filename if avaliable, or default to property's
+ # Use specified filename if available, or default to property's
# shortest name. We need an 8.3 safe filename (which means "an 8
# safe" filename, since after the dot is only 'pl', which is < 3)
# The 2nd parameter is if the filename shouldn't be changed, and
? \&filter_v1_ucd
: ($v_version eq v2.1.5)
? \&filter_v2_1_5_ucd
- : undef),
+
+ # And for 5.14 Perls with 6.0,
+ # have to also make changes
+ : ($v_version ge v6.0.0)
+ ? \&filter_v6_ucd
+ : undef),
# And the main filter
\&filter_UnicodeData_line,
#
# - First section is input files
# ($0 itself is not listed but is automatically considered an input)
-# - Section seperator is /^=+\$/
+# - Section separator is /^=+\$/
# - Second section is a list of output files.
# - Lines matching /^\\s*#/ are treated as comments
# which along with blank lines are ignored.