# version. But manual intervention to decide what the actual behavior
# should be may be required should this happen. The introductory comments
# have more to say about this.
+#
+# 4) Definition. This is a string for human consumption that specifies the
+# code points that this table matches. This is used only for the generated
+# pod file.
sub standardize { return main::standardize($_[0]); }
sub trace { return main::trace(@_); }
# none.
main::set_access('complement', \%complement, 'r');
+ my %definition;
+ # Human readable string of the code points matched by this table
+ main::set_access('definition', \%definition, 'r', 's');
+
sub new {
my $class = shift;
my $initialize = delete $args{'Initialize'};
my $matches_all = delete $args{'Matches_All'} || 0;
my $format = delete $args{'Format'};
+ my $definition = delete $args{'Definition'} // "";
# Rest of parameters passed on.
my $range_list = Range_List->new(Initialize => $initialize,
$leader{$addr} = $self;
$parent{$addr} = $self;
$complement{$addr} = 0;
+ $definition{$addr} = $definition;
if (defined $format && $format ne $EMPTY_FORMAT) {
Carp::my_carp_bug("'Format' must be '$EMPTY_FORMAT' in a match table instead of '$format'. Using '$EMPTY_FORMAT'");
containing_range
count
default_map
+ definition
delete_range
description
each_range
}
my $Any = $perl->add_match_table('Any',
- Description => "All Unicode code points: [\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
- );
+ Description => "All Unicode code points",
+ Definition => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]");
$Any->add_range(0, $MAX_UNICODE_CODEPOINT);
$Any->add_alias('Unicode');
->set_equivalent_to(property_ref('ccc')->table('Above'),
Related => 1);
- my $ASCII = $perl->add_match_table('ASCII', Description => '[[:ASCII:]]');
+ my $ASCII = $perl->add_match_table('ASCII');
if (defined $block) { # This is equivalent to the block if have it.
my $Unicode_ASCII = $block->table('Basic_Latin');
if (defined $Unicode_ASCII && ! $Unicode_ASCII->is_empty) {
$Lower += $temp & $Assigned;
}
my $Posix_Lower = $perl->add_match_table("PosixLower",
- Description => "[a-z]",
+ Definition => "[a-z]",
Initialize => $Lower & $ASCII,
);
$Upper->add_range(0x24B6, 0x24CF); # Circled Latin upper case letters
}
my $Posix_Upper = $perl->add_match_table("PosixUpper",
- Description => "[A-Z]",
+ Definition => "[A-Z]",
Initialize => $Upper & $ASCII,
);
$Alpha->add_alias('Alphabetic');
}
my $Posix_Alpha = $perl->add_match_table("PosixAlpha",
- Description => "[A-Za-z]",
+ Definition => "[A-Za-z]",
Initialize => $Alpha & $ASCII,
);
$Posix_Upper->set_caseless_equivalent($Posix_Alpha);
Initialize => $Alpha + $gc->table('Decimal_Number'),
);
$perl->add_match_table("PosixAlnum",
- Description => "[A-Za-z0-9]",
+ Definition => "[A-Za-z0-9]",
Initialize => $Alnum & $ASCII,
);
# This is a Perl extension, so the name doesn't begin with Posix.
my $PerlWord = $perl->add_match_table('PosixWord',
- Description => '\w, restricted to ASCII = [A-Za-z0-9_]',
+ Description => '\w, restricted to ASCII',
+ Definition => '[A-Za-z0-9_]',
Initialize => $Word & $ASCII,
);
$PerlWord->add_alias('PerlWord');
);
$Blank->add_alias('HorizSpace'); # Another name for it.
$perl->add_match_table("PosixBlank",
- Description => "\\t and ' '",
+ Definition => "\\t and ' '",
Initialize => $Blank & $ASCII,
);
$Space->add_alias('Space') if $v_version lt v4.1.0;
my $Posix_space = $perl->add_match_table("PosixSpace",
- Description => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)",
+ Definition => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)",
Initialize => $Space & $ASCII,
);
$Posix_space->add_alias('PerlSpace'); # A pre-existing synonym
Description => 'Control characters');
$Cntrl->set_equivalent_to($gc->table('Cc'), Related => 1);
$perl->add_match_table("PosixCntrl",
- Description => "ASCII control characters: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS, HT, LF, VT, FF, CR, SO, SI, DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EOM, SUB, ESC, FS, GS, RS, US, and DEL",
+ Description => "ASCII control characters",
+ Definition => "ACK, BEL, BS, CAN, CR, DC1, DC2,"
+ . " DC3, DC4, DEL, DLE, ENQ, EOM,"
+ . " EOT, ESC, ETB, ETX, FF, FS, GS,"
+ . " HT, LF, NAK, NUL, RS, SI, SO,"
+ . " SOH, STX, SUB, SYN, US, VT",
Initialize => $Cntrl & $ASCII,
);
Initialize => ~ ($Space + $controls),
);
$perl->add_match_table("PosixGraph",
- Description =>
+ Definition =>
'[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~0-9A-Za-z]',
Initialize => $Graph & $ASCII,
);
Initialize => $Blank + $Graph - $gc->table('Control'),
);
$perl->add_match_table("PosixPrint",
- Description =>
+ Definition =>
'[- 0-9A-Za-z!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]',
Initialize => $print & $ASCII,
);
Perl_Extension => 1
);
$perl->add_match_table('PosixPunct', Perl_Extension => 1,
- Description => '[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]',
+ Definition => '[-!"#$%&\'()*+,./:;<=>?@[\\\]^_`{|}~]',
Initialize => $ASCII & $XPosixPunct,
);
Description => '[0-9] + all other decimal digits');
$Digit->set_equivalent_to($gc->table('Decimal_Number'), Related => 1);
my $PosixDigit = $perl->add_match_table("PosixDigit",
- Description => '[0-9]',
+ Definition => '[0-9]',
Initialize => $Digit & $ASCII,
);
ord('A') .. ord('F'),
ord('a') .. ord('f'),
0xFF10..0xFF19, 0xFF21..0xFF26, 0xFF41..0xFF46]);
- $Xdigit->add_description('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO');
+ $Xdigit->set_definition('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO');
}
# AHex was not present in early releases
$PosixXDigit->add_alias('AHex');
$PosixXDigit->add_alias('Ascii_Hex_Digit');
}
- $PosixXDigit->add_description('[0-9A-Fa-f]');
+ $PosixXDigit->set_definition('[0-9A-Fa-f]');
my $any_folds = $perl->add_match_table("_Perl_Any_Folds",
Description => "Code points that particpate in some fold",
if ($table_property != $perl && $table->perl_extension) {
push @info, '(Perl extension)';
}
- push @info, "($string_count)";
+ my $definition = $table->definition // "";
+ $definition = "" if $entry_for_first_alias;
+ $definition = ": $definition" if $definition;
+ push @info, "($string_count$definition)";
# Now, we have both the entry and info so add them to the
# list of all the properties.
: $table->parent->property;
my $perl_extension = $table->perl_extension;
+ my $is_perl_extension_match_table_but_not_dollar_perl
+ = $property != $perl
+ && $perl_extension
+ && $property != $table;
# Get the more official name for for perl extensions that aren't
# stand-alone properties
- if ($perl_extension && $property != $table) {
- if ($property == $perl ||$property->type == $BINARY) {
- $meaning = $table->complete_name;
+ if ($is_perl_extension_match_table_but_not_dollar_perl) {
+ if ($property->type == $BINARY) {
+ $meaning = $property->full_name;
}
else {
- $meaning = $property->full_name . "=$full_name";
+ $meaning = $table->parent->complete_name;
}
}
# There are three types of info column. One for the short name, one for
# the full name, and one for everything else. They mostly are the same,
# so initialize in the same loop.
+
foreach my $info_ref (\$full_info, \$short_info, \$other_info) {
- if ($perl_extension && $property != $table) {
+ if ($info_ref != \$full_info) {
+
+ # The non-full name columns include the full name
+ $$info_ref .= $full_name;
+ }
+
+
+ if ($is_perl_extension_match_table_but_not_dollar_perl) {
# Add the synonymous name for the non-full name entries; and to
# the full-name entry if it adds extra information
- if ($info_ref == \$other_info
- || ($info_ref == \$short_info
- && $standard_short_name ne $standard_full_name)
- || standardize($meaning) ne $standard_full_name
- ) {
- $$info_ref .= "$meaning.";
+ if ( standardize($meaning) ne $standard_full_name
+ || $info_ref == \$other_info
+ || $info_ref == \$short_info)
+ {
+ my $parenthesized = $info_ref != \$full_info;
+ $$info_ref .= " " if $$info_ref && $parenthesized;
+ $$info_ref .= "(=" if $parenthesized;
+ $$info_ref .= "$meaning";
+ $$info_ref .= ")" if $parenthesized;
+ $$info_ref .= ".";
}
}
- elsif ($info_ref != \$full_info) {
-
- # Otherwise, the non-full name columns include the full name
- $$info_ref .= $full_name;
- }
# And the full-name entry includes the short name, if shorter
if ($info_ref == \$full_info