##
## Process UnicodeData.txt (Categories, etc.)
##
-sub Unicode_Txt()
+sub UnicodeData_Txt()
{
my $Bidi = Table->New();
my $Deco = Table->New();
## Initialize Perl-generated categories
## (Categories from UnicodeData.txt are auto-initialized in gencat)
- $Cat{Alnum} = Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0);
- $Cat{Alpha} = Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0);
- $Cat{ASCII} = Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0);
- $Cat{Blank} = Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0);
- $Cat{Cntrl} = Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0);
- $Cat{Digit} = Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0);
- $Cat{Graph} = Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0);
- $Cat{Lower} = Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0);
- $Cat{Print} = Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0);
- $Cat{Punct} = Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0);
- $Cat{Space} = Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0);
- $Cat{Title} = Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0);
- $Cat{Upper} = Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0);
- $Cat{XDigit} = Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0);
- $Cat{Word} = Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
- $Cat{SpacePerl} = Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
+ $Cat{Alnum} =
+ Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0);
+ $Cat{Alpha} =
+ Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0);
+ $Cat{ASCII} =
+ Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0);
+ $Cat{Blank} =
+ Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0);
+ $Cat{Cntrl} =
+ Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0);
+ $Cat{Digit} =
+ Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0);
+ $Cat{Graph} =
+ Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0);
+ $Cat{Lower} =
+ Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0);
+ $Cat{Print} =
+ Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0);
+ $Cat{Punct} =
+ Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0);
+ $Cat{Space} =
+ Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0);
+ $Cat{Title} =
+ Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0);
+ $Cat{Upper} =
+ Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0);
+ $Cat{XDigit} =
+ Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0);
+ $Cat{Word} =
+ Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0);
+ $Cat{SpacePerl} =
+ Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0);
my %To;
$To{Upper} = Table->New();
$title, ## titlecase mapping
) = split(/\s*;\s*/);
+ # Note that in Unicode 3.2 there will be names like
+ # LINE FEED (LF), which probably means that \N{} needs
+ # to cope also with LINE FEED and LF.
+ $name = $unicode10 if $name eq '<control>' && $unicode10 ne '';
+
my $code = hex($hexcode);
if ($comb and $comb == 230) {
# Wait until all the special cases have been read since
# they are not listed in numeric order.
my $ix = hex($code);
- push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ];
- push @{$CaseInfo{Title}}, [ $ix, $code, $title ];
- push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ];
+ push @{$CaseInfo{Lower}}, [ $ix, $code, $lower ]
+ unless $code eq $lower;
+ push @{$CaseInfo{Title}}, [ $ix, $code, $title ]
+ unless $code eq $title;
+ push @{$CaseInfo{Upper}}, [ $ix, $code, $upper ]
+ unless $code eq $upper;
}
close IN;
my $tostr =
join "", map { sprintf "\\x{%s}", $_ } split ' ', $to;
push @OUT, sprintf qq['%04X' => "$tostr",\n], $ix;
+ # Remove any single-character mappings for
+ # the same character since we are going for
+ # the special casing rules.
+ $NormalCase =~ s/^$code\t\t\w+\n//m;
}
push @OUT, (
");\n\n",
## Do it....
-Unicode_Txt();
+UnicodeData_Txt();
Make_GC_Aliases();
PropList_txt();