/* -*- buffer-read-only: t -*-
* !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
- * This file is built by regen/mk_PL_charclass.pl from property definitions
- * and lib/unicore/CaseFolding.txt.
+ * This file is built by regen/mk_PL_charclass.pl from property definitions.
* Any changes made here will be lost!
*/
# Read in the case fold mappings.
my %folded_closure;
my $file="lib/unicore/CaseFolding.txt";
-open my $fh, "<", $file or die "Failed to read '$file': $!";
-while (<$fh>) {
+my @folds;
+use Unicode::UCD;
+
+# Use the Unicode data file if we are on an ASCII platform (which its data is
+# for), and it is in the modern format (starting in Unicode 3.1.0) and it is
+# available. This avoids being affected by potential bugs introduced by other
+# layers of Perl
+if (ord('A') == 65
+ && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0
+ && open my $fh, "<", $file)
+{
+ @folds = <$fh>;
+}
+else {
+ my ($invlist_ref, $invmap_ref, undef, $default)
+ = Unicode::UCD::prop_invmap('Case_Folding');
+ for my $i (0 .. @$invlist_ref - 1 - 1) {
+ next if $invmap_ref->[$i] == $default;
+ my $adjust = -1;
+ for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) {
+ $adjust++;
+
+ # Single-code point maps go to a 'C' type
+ if (! ref $invmap_ref->[$i]) {
+ push @folds, sprintf("%04X; C; %04X\n",
+ $j,
+ $invmap_ref->[$i] + $adjust);
+ }
+ else { # Multi-code point maps go to 'F'. prop_invmap()
+ # guarantees that no adjustment is needed for these,
+ # as the range will contain just one element
+ push @folds, sprintf("%04X; F; %s\n",
+ $j,
+ join " ", map { sprintf "%04X", $_ }
+ @{$invmap_ref->[$i]});
+ }
+ }
+ }
+}
+
+for (@folds) {
chomp;
# Lines look like (without the initial '#'
my $out_fh = open_new('l1_char_class_tab.h', '>',
{style => '*', by => $0,
- from => "property definitions and $file"});
+ from => "property definitions"});
# Output the table using fairly short names for each char.
for my $ord (0..255) {