+ sub _next_line_with_remapped_range {
+ my $self = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ # like _next_line(), but for use on non-ASCII platforms. It sets $_
+ # to be the next logical input line, if any. Returns non-zero if such
+ # a line exists. 'logical' means that any lines that have been added
+ # via insert_lines() will be returned in $_ before the file is read
+ # again.
+ #
+ # The difference from _next_line() is that this remaps the Unicode
+ # code points in the input to those of the native platform. Each
+ # input line contains a single code point, or a single contiguous
+ # range of them This routine splits each range into its individual
+ # code points and caches them. It returns the cached values,
+ # translated into their native equivalents, one at a time, for each
+ # call, before reading the next line. Since native values can only be
+ # a single byte wide, no translation is needed for code points above
+ # 0xFF, and ranges that are entirely above that number are not split.
+ # If an input line contains the range 254-1000, it would be split into
+ # three elements: 254, 255, and 256-1000. (The downstream table
+ # insertion code will sort and coalesce the individual code points
+ # into appropriate ranges.)
+
+ my $addr = do { no overloading; pack 'J', $self; };
+
+ while (1) {
+
+ # Look in cache before reading the next line. Return any cached
+ # value, translated
+ my $inserted = shift @{$remapped_lines{$addr}};
+ if (defined $inserted) {
+ trace $inserted if main::DEBUG && $to_trace;
+ $_ = $inserted =~ s/^ ( \d+ ) /sprintf("%04X", utf8::unicode_to_native($1))/xer;
+ trace $_ if main::DEBUG && $to_trace;
+ return 1;
+ }
+
+ # Get the next line.
+ return 0 unless _next_line($self);
+
+ # If there is a special handler for it, return the line,
+ # untranslated. This should happen only for files that are
+ # special, not being code-point related, such as property names.
+ return 1 if $handler{$addr}
+ != \&main::process_generic_property_file;
+
+ my ($range, $property_name, $map, @remainder)
+ = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+
+ if (@remainder
+ || ! defined $property_name
+ || $range !~ /^ ($code_point_re) (?:\.\. ($code_point_re) )? $/x)
+ {
+ Carp::my_carp_bug("Unrecognized input line '$_'. Ignored");
+ }
+
+ my $low = hex $1;
+ my $high = (defined $2) ? hex $2 : $low;
+
+ # If the input maps the range to another code point, remap the
+ # target if it is between 0 and 255.
+ my $tail;
+ if (defined $map) {
+ $map =~ s/\b 00 ( [0-9A-F]{2} ) \b/sprintf("%04X", utf8::unicode_to_native(hex $1))/gxe;
+ $tail = "$property_name; $map";
+ $_ = "$range; $tail";
+ }
+ else {
+ $tail = $property_name;
+ }
+
+ # If entire range is above 255, just return it, unchanged (except
+ # any mapped-to code point, already changed above)
+ return 1 if $low > 255;
+
+ # Cache an entry for every code point < 255. For those in the
+ # range above 255, return a dummy entry for just that portion of
+ # the range. Note that this will be out-of-order, but that is not
+ # a problem.
+ foreach my $code_point ($low .. $high) {
+ if ($code_point > 255) {
+ $_ = sprintf "%04X..%04X; $tail", $code_point, $high;
+ return 1;
+ }
+ push @{$remapped_lines{$addr}}, "$code_point; $tail";
+ }
+ } # End of looping through lines.
+
+ # NOTREACHED
+ }
+