+ # The remaining variables are temporaries used while writing each table,
+ # to output special ranges.
+ my $has_hangul_syllables;
+ my @multi_code_point_maps; # Map is to more than one code point.
+
+ # The key is the base name of the code point, and the value is an
+ # array giving all the ranges that use this base name. Each range
+ # is actually a hash giving the 'low' and 'high' values of it.
+ my %names_ending_in_code_point;
+
+ # Inverse mapping. The list of ranges that have these kinds of
+ # names. Each element contains the low, high, and base names in a
+ # hash.
+ my @code_points_ending_in_code_point;
+
+ sub handle_special_range {
+ # Called in the middle of write when it finds a range it doesn't know
+ # how to handle.
+
+ my $self = shift;
+ my $range = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $addr = do { no overloading; pack 'J', $self; };
+
+ my $type = $range->type;
+
+ my $low = $range->start;
+ my $high = $range->end;
+ my $map = $range->value;
+
+ # No need to output the range if it maps to the default.
+ return if $map eq $default_map{$addr};
+
+ # Switch based on the map type...
+ if ($type == $HANGUL_SYLLABLE) {
+
+ # These are entirely algorithmically determinable based on
+ # some constants furnished by Unicode; for now, just set a
+ # flag to indicate that have them. After everything is figured
+ # out, we will output the code that does the algorithm.
+ $has_hangul_syllables = 1;
+ }
+ elsif ($type == $CP_IN_NAME) {
+
+ # Code points whose the name ends in their code point are also
+ # algorithmically determinable, but need information about the map
+ # to do so. Both the map and its inverse are stored in data
+ # structures output in the file.
+ push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+ push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+ push @code_points_ending_in_code_point, { low => $low,
+ high => $high,
+ name => $map
+ };
+ }
+ elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
+
+ # Multi-code point maps and null string maps have an entry
+ # for each code point in the range. They use the same
+ # output format.
+ for my $code_point ($low .. $high) {
+
+ # The pack() below can't cope with surrogates.
+ if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
+ Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self. No map created");
+ next;
+ }
+
+ # Generate the hash entries for these in the form that
+ # utf8.c understands.
+ my $tostr = "";
+ my $to_name = "";
+ my $to_chr = "";
+ foreach my $to (split " ", $map) {
+ if ($to !~ /^$code_point_re$/) {
+ Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self. No map created");
+ next;
+ }
+ $tostr .= sprintf "\\x{%s}", $to;
+ $to = CORE::hex $to;
+ }
+
+ # I (khw) have never waded through this line to
+ # understand it well enough to comment it.
+ my $utf8 = sprintf(qq["%s" => "$tostr",],
+ join("", map { sprintf "\\x%02X", $_ }
+ unpack("U0C*", pack("U", $code_point))));
+
+ # Add a comment so that a human reader can more easily
+ # see what's going on.
+ push @multi_code_point_maps,
+ sprintf("%-45s # U+%04X", $utf8, $code_point);
+ $multi_code_point_maps[-1] .= " => $map";
+ }
+ }
+ else {
+ Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self. Not written");
+ }
+
+ return;
+ }
+