Commit | Line | Data |
---|---|---|
d73e5302 JH |
1 | #!/usr/bin/perl -w |
2 | ||
3 | # | |
4 | # mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl) | |
5 | # from the Unicode database files (lib/unicore/*.txt). | |
6 | # | |
7 | ||
8 | my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1. | |
9 | ||
10 | use strict; | |
11 | ||
12 | mkdir("In", 0755); | |
13 | mkdir("Is", 0755); | |
14 | mkdir("To", 0755); | |
15 | ||
16 | sub extend { | |
17 | my ($table, $last) = @_; | |
18 | ||
19 | $table->[-1]->[1] = $last; | |
20 | } | |
21 | ||
22 | sub append { | |
23 | my ($table, $code, $name) = @_; | |
24 | if (@$table && | |
25 | hex($table->[-1]->[1]) == hex($code) - 1 && | |
26 | (!defined $name || $table->[-1]->[2] eq $name)) { | |
27 | extend($table, $code); | |
28 | } else { | |
29 | push @$table, [$code, $code, $name]; | |
30 | } | |
31 | } | |
32 | ||
33 | sub inverse { | |
34 | my ($table) = @_; | |
35 | my $inverse = []; | |
36 | my ($first, $last); | |
37 | if ($table->[0]->[0]) { | |
38 | $last = hex($table->[0]->[0]); | |
39 | push @$inverse, [ "0000", | |
40 | sprintf("%04X", $last - 1) ]; | |
41 | } | |
42 | for my $i (0..$#$table-1) { | |
43 | $first = defined $table->[$i ]->[1] ? | |
44 | hex($table->[$i ]->[1]) : 0; | |
45 | $last = defined $table->[$i + 1]->[0] ? | |
46 | hex($table->[$i + 1]->[0]) : $first; | |
47 | push @$inverse, [ sprintf("%04X", $first + 1), | |
48 | sprintf("%04X", $last - 1) ] | |
49 | unless $first + 1 == $last; | |
50 | } | |
51 | return $inverse; | |
52 | } | |
53 | ||
54 | sub header { | |
55 | my $fh = shift; | |
56 | ||
57 | print $fh <<EOT; | |
58 | # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | |
59 | # This file is built by $0 from e.g. Unicode.txt. | |
60 | # Any changes made here will be lost! | |
61 | EOT | |
62 | } | |
63 | ||
64 | sub begin { | |
65 | my $fh = shift; | |
66 | ||
67 | print $fh <<EOT; | |
68 | return <<'END'; | |
69 | EOT | |
70 | } | |
71 | ||
72 | sub end { | |
73 | my $fh = shift; | |
74 | ||
75 | print $fh <<EOT; | |
76 | END | |
77 | EOT | |
78 | } | |
79 | ||
80 | sub flush { | |
81 | my ($table, $file) = @_; | |
82 | print "$file\n"; | |
83 | if (open(my $fh, ">$file")) { | |
84 | header($fh); | |
85 | begin($fh); | |
86 | for my $i (@$table) { | |
87 | print $fh $i->[0], "\t", | |
88 | $i->[1] ne $i->[0] ? $i->[1] : "", "\t", | |
89 | defined $i->[2] ? $i->[2] : "", "\n"; | |
90 | } | |
91 | end($fh); | |
92 | close($fh); | |
93 | } else { | |
94 | die "$0: $file: $!\n"; | |
95 | } | |
96 | } | |
97 | ||
98 | # | |
99 | # The %In contains the mapping of the script/block name into a number. | |
100 | # | |
101 | ||
102 | my %In; | |
103 | my $InId = 0; | |
104 | my %InIn; | |
105 | ||
106 | # | |
107 | # Read in the Unicode.txt, the main Unicode database. | |
108 | # | |
109 | ||
110 | my %Cat; | |
111 | my %General; | |
112 | my @General; | |
113 | ||
114 | if (open(my $Unicode, "Unicode.txt")) { | |
115 | my @Name; | |
116 | my @Bidi; | |
117 | my %Bidi; | |
118 | my @Comb; | |
119 | my @Deco; | |
120 | my %Deco; | |
121 | my %DC; | |
122 | my @Number; | |
123 | my @Mirrored; | |
124 | my %To; | |
125 | while (<$Unicode>) { | |
126 | next if /^\#/ || /^\s*$/; | |
127 | next unless /^[0-9a-f]+\s*;/i; | |
128 | s/\s+$//; | |
129 | my ($code, $name, $cat, $comb, $bidi, $deco, | |
130 | $decimal, $digit, $number, | |
131 | $mirrored, $unicode10, $comment, | |
132 | $upper, $lower, $title) = split(/\s*;\s*/); | |
133 | ||
134 | if ($name =~ /^<(.+), (First|Last)>$/) { | |
135 | $name = $1; | |
136 | if ($2 eq 'First') { | |
137 | append($General{$name} ||= [], $code, $name); | |
138 | } else { | |
139 | extend($General{$name} , $code); | |
140 | } | |
141 | unless (defined $In{$name}) { | |
142 | $In{$name} = $InId++; | |
143 | $InIn{$name} = $General{$name}; | |
144 | } | |
145 | append($Cat{$cat} ||= [], $code); | |
146 | append($Cat{substr($cat, 0, 1)} | |
147 | ||= [], $code); | |
148 | } else { | |
149 | append(\@Name, $code, $name); | |
150 | ||
151 | append(\@General, $code, $cat); | |
152 | ||
153 | append($Cat{$cat} ||= [], $code); | |
154 | append($Cat{substr($cat, 0, 1)} | |
155 | ||= [], $code); | |
156 | # 005F: SPACING UNDERSCORE | |
157 | append($Cat{Word} ||= [], $code) | |
158 | if $cat =~ /^[LMN]/ or $code eq "005F"; | |
159 | append($Cat{Alnum} ||= [], $code) | |
160 | if $cat =~ /^[LMN]/; | |
161 | append($Cat{Alpha} ||= [], $code) | |
162 | if $cat =~ /^[LM]/; | |
163 | # 0009: HORIZONTAL TABULATION | |
164 | # 000A: LINE FEED | |
165 | # 000B: VERTICAL TABULATION | |
166 | # 000C: FORM FEED | |
167 | # 000D: CARRIAGE RETURN | |
168 | # 0020: SPACE | |
169 | append($Cat{Space} ||= [], $code) | |
170 | if $cat =~ /^Z/ || | |
171 | $code =~ /^(0009|000A|000B|000C|000D)$/; | |
172 | append($Cat{SpacePerl} ||= [], $code) | |
173 | if $cat =~ /^Z/ || | |
174 | $code =~ /^(0009|000A|000C|000D)$/; | |
175 | append($Cat{Blank} ||= [], $code) | |
176 | if $code =~ /^(0020|0009)$/ || | |
177 | $cat =~ /^Z[^lp]$/; | |
178 | append($Cat{Digit} ||= [], $code) if $cat eq "Nd"; | |
179 | append($Cat{Upper} ||= [], $code) if $cat eq "Lu"; | |
180 | append($Cat{Lower} ||= [], $code) if $cat eq "Ll"; | |
181 | append($Cat{Title} ||= [], $code) if $cat eq "Lt"; | |
182 | append($Cat{ASCII} ||= [], $code) if $code le "007F"; | |
183 | append($Cat{Cntrl} ||= [], $code) if $cat =~ /^C/; | |
184 | append($Cat{Graph} ||= [], $code) if $cat =~ /^([LMNPS]|Co)/; | |
185 | append($Cat{Print} ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/; | |
186 | append($Cat{Punct} ||= [], $code) if $cat =~ /^P/; | |
187 | # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f | |
188 | append($Cat{XDigit} ||= [], $code) | |
189 | if $code =~ /^00(3[0-9]|[46][1-6])$/; | |
190 | ||
191 | append($To{Upper} ||= [], $code, $upper) if $upper; | |
192 | append($To{Lower} ||= [], $code, $lower) if $lower; | |
193 | append($To{Title} ||= [], $code, $title) if $title; | |
194 | append($To{Digit} ||= [], $code, $decimal) if $decimal; | |
195 | ||
196 | append(\@Bidi, $code, $bidi); | |
197 | append($Bidi{$bidi} ||= [], $code); | |
198 | ||
199 | append(\@Comb, $code, $comb) if $comb; | |
200 | ||
201 | if ($deco) { | |
202 | append(\@Deco, $code, $deco); | |
203 | if ($deco =~/^<(\w+)>/) { | |
204 | append($Deco{Compat} ||= [], $code); | |
205 | append($DC{$1} ||= [], $code); | |
206 | } else { | |
207 | append($Deco{Canon} ||= [], $code); | |
208 | } | |
209 | } | |
210 | ||
211 | append(\@Number, $code, $number) if $number; | |
212 | ||
213 | append(\@Mirrored, $code) if $mirrored eq "Y"; | |
214 | } | |
215 | } | |
216 | ||
217 | flush(\@Name, "Name.pl"); | |
218 | ||
219 | foreach my $cat (sort keys %Cat) { | |
220 | flush($Cat{$cat}, "Is/$cat.pl"); | |
221 | } | |
222 | ||
223 | foreach my $to (sort keys %To) { | |
224 | flush($To{$to}, "To/$to.pl"); | |
225 | } | |
226 | ||
227 | flush(\@Bidi, "Bidirectional.pl"); | |
228 | foreach my $bidi (sort keys %Bidi) { | |
229 | flush($Bidi{$bidi}, "Is/Bidi$bidi.pl"); | |
230 | } | |
231 | ||
232 | flush(\@Comb, "CombiningClass.pl"); | |
233 | ||
234 | flush(\@Deco, "Decomposition.pl"); | |
235 | foreach my $deco (sort keys %Deco) { | |
236 | flush($Deco{$deco}, "Is/Deco$deco.pl"); | |
237 | } | |
238 | foreach my $dc (sort keys %DC) { | |
239 | flush($DC{$dc}, "Is/DC$dc.pl"); | |
240 | } | |
241 | ||
242 | flush(\@Number, "Number.pl"); | |
243 | ||
244 | flush(\@Mirrored, "Is/Mirrored.pl"); | |
245 | } else { | |
246 | die "$0: Unicode.txt: $!\n"; | |
247 | } | |
248 | ||
249 | # The general cateory can be written out already now. | |
250 | ||
251 | flush(\@General, "Category.pl"); | |
252 | ||
253 | # | |
254 | # Read in the LineBrk.txt. | |
255 | # | |
256 | ||
257 | if (open(my $LineBrk, "LineBrk.txt")) { | |
258 | my @Lbrk; | |
259 | my %Lbrk; | |
260 | ||
261 | while (<$LineBrk>) { | |
262 | next if /^\#/ || /^\s*$/; | |
263 | s/\s+$//; | |
264 | s/\s*\#.*//; | |
265 | next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i; | |
266 | ||
267 | my ($first, $last, $lbrk) = ($1, $2, $3); | |
268 | ||
269 | append(\@Lbrk, $first, $lbrk); | |
270 | append($Lbrk{$lbrk} ||= [], $first); | |
271 | if (defined $last) { | |
272 | extend(\@Lbrk, $last); | |
273 | extend($Lbrk{$lbrk}, $last); | |
274 | } | |
275 | } | |
276 | ||
277 | flush(\@Lbrk, "Lbrk.pl"); | |
278 | foreach my $lbrk (sort keys %Lbrk) { | |
279 | flush($Lbrk{$lbrk}, "Is/Lbrk$lbrk.pl"); | |
280 | } | |
281 | } else { | |
282 | die "$0: LineBrk.txt: $!\n"; | |
283 | } | |
284 | ||
285 | # | |
286 | # Read in the ArabShap.txt. | |
287 | # | |
288 | ||
289 | if (open(my $ArabShap, "ArabShap.txt")) { | |
290 | my @ArabLink; | |
291 | my @ArabLinkGroup; | |
292 | ||
293 | while (<$ArabShap>) { | |
294 | next if /^\#/ || /^\s*$/; | |
295 | next unless /^[0-9a-f]+\s*;/i; | |
296 | s/\s+$//; | |
297 | my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/); | |
298 | ||
299 | append(\@ArabLink, $code, $link); | |
300 | append(\@ArabLinkGroup, $code, $linkgroup); | |
301 | } | |
302 | ||
303 | flush(\@ArabLink, "ArabLink.pl"); | |
304 | flush(\@ArabLinkGroup, "ArabLnkGrp.pl"); | |
305 | } else { | |
306 | die "$0: ArabShap.txt: $!\n"; | |
307 | } | |
308 | ||
309 | # | |
310 | # Read in the Jamo.txt. | |
311 | # | |
312 | ||
313 | if (open(my $Jamo, "Jamo.txt")) { | |
314 | my @Short; | |
315 | ||
316 | while (<$Jamo>) { | |
317 | next if /^\#/ || /^\s*$/; | |
318 | next unless /^[0-9a-f]+\s*;/i; | |
319 | s/\s*\#.*//; | |
320 | s/\s+$//; | |
321 | my ($code, $short) = split(/\s*;\s*/); | |
322 | ||
323 | append(\@Short, $code, $short); | |
324 | } | |
325 | ||
326 | flush(\@Short, "JamoShort.pl"); | |
327 | } else { | |
328 | die "$0: Jamo.txt: $!\n"; | |
329 | } | |
330 | ||
331 | # | |
332 | # Read in the Scripts.txt. | |
333 | # | |
334 | ||
335 | my @Scripts; | |
336 | ||
337 | if (open(my $Scripts, "Scripts.txt")) { | |
338 | while (<$Scripts>) { | |
339 | next if /^\#/ || /^\s*$/; | |
340 | s/\s*\#.*//; | |
341 | s/\s+$//; | |
342 | next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i; | |
343 | ||
344 | # Wait until all the scripts have been read since | |
345 | # they are not listed in numeric order. | |
346 | push @Scripts, [ hex($1), $1, $2, $3 ]; | |
347 | } | |
348 | } else { | |
349 | die "$0: Scripts.txt: $!\n"; | |
350 | } | |
351 | ||
352 | # Now append the scripts properties in their code point order. | |
353 | ||
354 | my %Script; | |
355 | my $Scripts = []; | |
356 | ||
357 | for my $script (sort { $a->[0] <=> $b->[0] } @Scripts) { | |
358 | my ($code, $first, $last, $name) = @$script; | |
359 | append($Scripts, $first, $name); | |
360 | append($Script{$name} ||= [], $first, $name); | |
361 | if (defined $last) { | |
362 | extend($Scripts, $last); | |
363 | extend($Script{$name}, $last); | |
364 | } | |
365 | unless (defined $In{$name}) { | |
366 | $In{$name} = $InId++; | |
367 | $InIn{$name} = $Script{$name}; | |
368 | } | |
369 | } | |
370 | ||
371 | # Scripts.pl can be written out already now. | |
372 | ||
373 | flush(\@Scripts, "Scripts.pl"); | |
374 | ||
375 | # Common is everything not explicitly assigned to a Script | |
376 | ||
377 | $In{Common} = $InId++; | |
378 | my $Common = inverse($Scripts); | |
379 | $InIn{Common} = $Common; | |
380 | ||
381 | # | |
382 | # Read in the Blocks.txt. | |
383 | # | |
384 | ||
385 | my @Blocks; | |
386 | my %Blocks; | |
387 | ||
388 | if (open(my $Blocks, "Blocks.txt")) { | |
389 | while (<$Blocks>) { | |
390 | next if /^\#/ || /^\s*$/; | |
391 | s/\s*\#.*//; | |
392 | s/\s+$//; | |
393 | next unless /^([0-9a-f]+)\.\.([0-9a-f]+)\s*;\s*(.+)$/i; | |
394 | ||
395 | my ($first, $last, $name) = ($1, $2, $3); | |
396 | ||
397 | # If there's a naming conflict (the script names are | |
398 | # in uppercase), the name of the block has " Block" | |
399 | # appended to it. | |
400 | $name = "$name Block" if defined $In{"\U$name"}; | |
401 | ||
402 | append(\@Blocks, $first, $name); | |
403 | append($Blocks{$name} ||= [], $first, $name); | |
404 | if (defined $last) { | |
405 | extend(\@Blocks, $last); | |
406 | extend($Blocks{$name}, $last); | |
407 | } | |
408 | unless (defined $In{$name}) { | |
409 | $In{$name} = $InId++; | |
410 | $InIn{$name} = $Blocks{$name}; | |
411 | } | |
412 | } | |
413 | } else { | |
414 | die "$0: Blocks.txt: $!\n"; | |
415 | } | |
416 | ||
417 | # Blocks.pl can be written out already now. | |
418 | ||
419 | flush(\@Blocks, "Blocks.pl"); | |
420 | ||
421 | # | |
422 | # Read in the PropList.txt. It contains extended properties not | |
423 | # listed in the Unicode.txt, such as 'Other_Alphabetic': | |
424 | # alphabetic but not of the general category L; many modifiers | |
425 | # belong to this extended property category: while they are not | |
426 | # alphabets, they are alphabetic in nature. | |
427 | # | |
428 | ||
429 | my @Props; | |
430 | ||
431 | if (open(my $Props, "PropList.txt")) { | |
432 | while (<$Props>) { | |
433 | next if /^\#/ || /^\s*$/; | |
434 | s/\s*\#.*//; | |
435 | s/\s+$//; | |
436 | next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(\w+)/i; | |
437 | ||
438 | # Wait until all the extended properties have been read since | |
439 | # they are not listed in numeric order. | |
440 | push @Props, [ hex($1), $1, $2, $3 ]; | |
441 | } | |
442 | } else { | |
443 | die "$0: PropList.txt: $!\n"; | |
444 | } | |
445 | ||
446 | # Now append the extended properties in their code point order. | |
447 | ||
448 | my %Prop; | |
449 | my $Props = []; | |
450 | ||
451 | for my $prop (sort { $a->[0] <=> $b->[0] } @Props) { | |
452 | my ($code, $first, $last, $name) = @$prop; | |
453 | append($Props, $first, $name); | |
454 | append($Prop{$name} ||= [], $first, $name); | |
455 | if (defined $last) { | |
456 | extend($Props, $last); | |
457 | extend($Prop{$name}, $last); | |
458 | } | |
459 | unless (defined $In{$name}) { | |
460 | $In{$name} = $InId++; | |
461 | $InIn{$name} = $Prop{$name}; | |
462 | } | |
463 | } | |
464 | ||
465 | # Assigned is everything not Cn aka Noncharacter_Code_Point | |
466 | ||
467 | $In{Assigned} = $InId++; | |
468 | my $Assigned = inverse($Prop{Noncharacter_Code_Point}); | |
469 | $InIn{Assigned} = $Assigned; | |
470 | ||
471 | sub merge_general_and_extended { | |
472 | my ($name, $general, $extended) = @_; | |
473 | my $merged; | |
474 | ||
475 | push @$merged, | |
476 | map { pop @{$_}; $_ } | |
477 | sort { $a->[2] <=> $b->[2] } | |
478 | map { [ $_->[0], $_->[1], hex($_->[0]) ] } | |
479 | ($general ? | |
480 | map { ref $_ ? @$_ : $_ } | |
481 | @Cat {ref $general ? @$general : $general } : | |
482 | (), | |
483 | $extended ? | |
484 | map { ref $_ ? @$_ : $_ } | |
485 | @Prop{ref $extended ? @$extended : $extended} : | |
486 | ()); | |
487 | ||
488 | $In{$name} = $InId++; | |
489 | $InIn{$name} = $merged; | |
490 | ||
491 | return $merged; | |
492 | } | |
493 | ||
494 | # Alphabetic is L and Other_Alphabetic. | |
495 | ||
496 | my $Alphabetic = | |
497 | merge_general_and_extended('Alphabetic', 'L', 'Other_Alphabetic'); | |
498 | ||
499 | # Lowercase is Ll and Other_Lowercase. | |
500 | ||
501 | my $Lowercase = | |
502 | merge_general_and_extended('Lowercase', 'Ll', 'Other_Lowercase'); | |
503 | ||
504 | # Uppercase is Lu and Other_Uppercase. | |
505 | ||
506 | my $Uppercase = | |
507 | merge_general_and_extended('Uppercase', 'Lu', 'Other_Uppercase'); | |
508 | ||
509 | # Math is Sm and Other_Math. | |
510 | ||
511 | my $Math = | |
512 | merge_general_and_extended('Math', 'Sm', 'Other_Math'); | |
513 | ||
514 | # Lampersand is Ll, Lu, and Lt. | |
515 | ||
516 | my $Lampersand = | |
517 | merge_general_and_extended('Lampersand', [ qw(Ll Lu Lt) ]); | |
518 | ||
519 | # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl. | |
520 | ||
521 | my $ID_Start = | |
522 | merge_general_and_extended('ID_Start', [ qw(Ll Lu Lt Lm Lo Nl) ]); | |
523 | ||
524 | # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc. | |
525 | ||
526 | my $ID_Continue = | |
527 | merge_general_and_extended('ID_Continue', [ qw(Ll Lu Lt Lm Lo Nl | |
528 | Mn Mc Nd Pc) ]); | |
529 | ||
530 | # | |
531 | # Any is any. | |
532 | # | |
533 | ||
534 | $In{Any} = $InId++; | |
535 | my $Any = [ [ 0, sprintf("%04X", $LastUnicodeCodepoint) ] ]; | |
536 | $InIn{Any} = $Any; | |
537 | ||
538 | # | |
539 | # mapping() will be used to write out the In and Is virtual mappings. | |
540 | # | |
541 | ||
542 | sub mapping { | |
543 | my ($map, $name) = @_; | |
544 | ||
545 | if (open(my $fh, ">$name.pl")) { | |
546 | print "$name.pl\n"; | |
547 | header($fh); | |
548 | ||
549 | # The %pat will hold a hash that maps the first two | |
550 | # lowercased letters of a class to a 'fuzzified' regular | |
551 | # expression that points to the real mapping. | |
552 | ||
553 | my %pat; | |
554 | ||
555 | # But first write out the offical name to real name | |
556 | # (the filename) mapping. | |
557 | ||
558 | print $fh <<EOT; | |
559 | %utf8::${name} = | |
560 | ( | |
561 | EOT | |
562 | for my $i (sort keys %$map) { | |
563 | my $pat = $i; | |
564 | # Here is the 'fuzzification': accept any space, | |
565 | # dash, or underbar where in the official name | |
566 | # there is space or a dash (or underbar, but | |
567 | # there never is). | |
568 | $pat =~ s/([- _])/(?:[-_]|\\s+)?/g; | |
569 | # The prefix length of 2 is enough spread, | |
570 | # and besides, we have 'Yi' as an In category. | |
571 | push @{$pat{lc(substr($i, 0, 2))}}, [ $i, $pat ]; | |
572 | print $fh "'$i' => '$map->{$i}',\n"; | |
573 | } | |
574 | print $fh <<EOT; | |
575 | ); | |
576 | EOT | |
577 | ||
578 | # Now write out the %pat mapping. | |
579 | ||
580 | print $fh <<EOT; | |
581 | %utf8::${name}Pat = | |
582 | ( | |
583 | EOT | |
584 | foreach my $prefix (sort keys %pat) { | |
585 | print $fh "'$prefix' => {\n"; | |
586 | foreach my $ipat (@{$pat{$prefix}}) { | |
587 | my ($i, $pat) = @$ipat; | |
588 | print $fh "\t'$pat' => '$map->{$i}',\n"; | |
589 | } | |
590 | print $fh "},\n"; | |
591 | } | |
592 | print $fh <<EOT; | |
593 | ); | |
594 | EOT | |
595 | ||
596 | close($fh); | |
597 | } else { | |
598 | die "$0: $name.pl: $!\n"; | |
599 | } | |
600 | } | |
601 | ||
602 | # | |
603 | # Write out the virtual In mappings. | |
604 | # | |
605 | ||
606 | mapping(\%In, "In"); | |
607 | ||
608 | # Easy low-calorie cheat. | |
609 | use File::Copy; | |
610 | copy("In/$In{Noncharacter_Code_Point}.pl", "Is/Cn.pl"); | |
611 | ||
612 | # | |
613 | # Write out the real In mappings | |
614 | # (the In.pl written out just above has the virtual In mappings) | |
615 | # | |
616 | ||
617 | foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) { | |
618 | flush($InIn{$in}, "In/$In{$in}.pl"); | |
619 | } | |
620 | ||
621 | # | |
622 | # The mapping from General Category long forms to short forms is | |
623 | # currently hardwired here since no simple data file in the UCD | |
624 | # seems to do that. | |
625 | # | |
626 | ||
627 | my %Is = ( | |
628 | 'Letter' => 'L', | |
629 | 'Uppercase Letter' => 'Lu', | |
630 | 'Lowercase Letter' => 'Ll', | |
631 | 'Titlecase Letter' => 'Lt', | |
632 | 'Modifier Letter' => 'Lm', | |
633 | 'Other Letter' => 'Lo', | |
634 | ||
635 | 'Mark' => 'M', | |
636 | 'Non-Spacing Mark' => 'Mn', | |
637 | 'Spacing Combining Mark' => 'Mc', | |
638 | 'Enclosing Mark' => 'Me', | |
639 | ||
640 | 'Separator' => 'Z', | |
641 | 'Space Separator' => 'Zs', | |
642 | 'Line Separator' => 'Zl', | |
643 | 'Paragraph Separator' => 'Zp', | |
644 | ||
645 | 'Number' => 'N', | |
646 | 'Decimal Digit Number' => 'Nd', | |
647 | 'Letter Number' => 'Nl', | |
648 | 'Other Number' => 'No', | |
649 | ||
650 | 'Punctuation' => 'P', | |
651 | 'Connector Punctuation' => 'Pc', | |
652 | 'Dash Punctuation' => 'Pd', | |
653 | 'Open Punctuation' => 'Ps', | |
654 | 'Close Punctuation' => 'Pe', | |
655 | 'Initial Punctuation' => 'Pi', | |
656 | 'Final Punctuation' => 'Pf', | |
657 | 'Other Punctuation' => 'Po', | |
658 | ||
659 | 'Symbol' => 'S', | |
660 | 'Math Symbol' => 'Sm', | |
661 | 'Currency Symbol' => 'Sc', | |
662 | 'Modifier Symbol' => 'Sk', | |
663 | 'Other Symbol' => 'So', | |
664 | ||
665 | 'Other' => 'C', | |
666 | 'Control' => 'Cc', | |
667 | 'Format' => 'Cf', | |
668 | 'Surrogate' => 'Cs', | |
669 | 'Private Use' => 'Co', | |
670 | 'Not Assigned' => 'Cn', | |
671 | # 'Other' aliases | |
672 | 'Other Control' => 'Cc', | |
673 | 'Other Format' => 'Cf', | |
674 | 'Other Surrogate' => 'Cs', | |
675 | 'Other Private Use' => 'Co', | |
676 | 'Other Not Assigned' => 'Cn', | |
677 | ); | |
678 | ||
679 | # | |
680 | # Write out the virtual Is mappings. | |
681 | # | |
682 | ||
683 | mapping(\%Is, "Is"); | |
684 | ||
685 | # That's all, folks! | |
686 |