Commit | Line | Data |
---|---|---|
ac5ea531 JH |
1 | package Unicode::Normalize; |
2 | ||
4a2e806c | 3 | BEGIN { |
1efaba7f | 4 | unless ("A" eq pack('U', 0x41)) { |
9f1f04a1 | 5 | die "Unicode::Normalize cannot stringify a Unicode code point\n"; |
4a2e806c JH |
6 | } |
7 | } | |
8 | ||
ac5ea531 JH |
9 | use 5.006; |
10 | use strict; | |
11 | use warnings; | |
12 | use Carp; | |
13 | ||
e524f5b2 NC |
14 | no warnings 'utf8'; |
15 | ||
51683ce6 | 16 | our $VERSION = '1.03'; |
ac5ea531 JH |
17 | our $PACKAGE = __PACKAGE__; |
18 | ||
19 | require Exporter; | |
20 | require DynaLoader; | |
ac5ea531 JH |
21 | |
22 | our @ISA = qw(Exporter DynaLoader); | |
23 | our @EXPORT = qw( NFC NFD NFKC NFKD ); | |
2a204b45 JH |
24 | our @EXPORT_OK = qw( |
25 | normalize decompose reorder compose | |
8f118dcd JH |
26 | checkNFD checkNFKD checkNFC checkNFKC check |
27 | getCanon getCompat getComposite getCombinClass | |
28 | isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex | |
29 | isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE | |
82e740b6 NC |
30 | FCD checkFCD FCC checkFCC composeContiguous |
31 | splitOnLastStarter | |
8f118dcd JH |
32 | ); |
33 | our %EXPORT_TAGS = ( | |
34 | all => [ @EXPORT, @EXPORT_OK ], | |
35 | normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], | |
36 | check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], | |
82e740b6 | 37 | fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], |
2a204b45 | 38 | ); |
ac5ea531 | 39 | |
82e740b6 NC |
40 | ###### |
41 | ||
ac5ea531 JH |
42 | bootstrap Unicode::Normalize $VERSION; |
43 | ||
82e740b6 NC |
44 | ###### |
45 | ||
fe067ad9 SP |
46 | ## |
47 | ## utilites for tests | |
48 | ## | |
49 | ||
9f1f04a1 | 50 | sub pack_U { |
b8d10bc1 | 51 | return pack('U*', @_); |
9f1f04a1 RGS |
52 | } |
53 | ||
54 | sub unpack_U { | |
fe067ad9 | 55 | return unpack('U*', shift(@_).pack('U*')); |
9f1f04a1 RGS |
56 | } |
57 | ||
82e740b6 NC |
58 | |
59 | ## | |
60 | ## normalization forms | |
61 | ## | |
62 | ||
82e740b6 NC |
63 | sub FCD ($) { |
64 | my $str = shift; | |
65 | return checkFCD($str) ? $str : NFD($str); | |
66 | } | |
82e740b6 NC |
67 | |
68 | our %formNorm = ( | |
69 | NFC => \&NFC, C => \&NFC, | |
70 | NFD => \&NFD, D => \&NFD, | |
71 | NFKC => \&NFKC, KC => \&NFKC, | |
72 | NFKD => \&NFKD, KD => \&NFKD, | |
73 | FCD => \&FCD, FCC => \&FCC, | |
74 | ); | |
75 | ||
ac5ea531 JH |
76 | sub normalize($$) |
77 | { | |
d85850a7 | 78 | my $form = shift; |
f027f502 | 79 | my $str = shift; |
fe067ad9 SP |
80 | if (exists $formNorm{$form}) { |
81 | return $formNorm{$form}->($str); | |
82 | } | |
83 | croak($PACKAGE."::normalize: invalid form name: $form"); | |
ac5ea531 JH |
84 | } |
85 | ||
82e740b6 NC |
86 | |
87 | ## | |
88 | ## quick check | |
89 | ## | |
90 | ||
91 | our %formCheck = ( | |
92 | NFC => \&checkNFC, C => \&checkNFC, | |
93 | NFD => \&checkNFD, D => \&checkNFD, | |
94 | NFKC => \&checkNFKC, KC => \&checkNFKC, | |
95 | NFKD => \&checkNFKD, KD => \&checkNFKD, | |
96 | FCD => \&checkFCD, FCC => \&checkFCC, | |
97 | ); | |
98 | ||
8f118dcd JH |
99 | sub check($$) |
100 | { | |
101 | my $form = shift; | |
f027f502 | 102 | my $str = shift; |
fe067ad9 SP |
103 | if (exists $formCheck{$form}) { |
104 | return $formCheck{$form}->($str); | |
105 | } | |
106 | croak($PACKAGE."::check: invalid form name: $form"); | |
8f118dcd JH |
107 | } |
108 | ||
ac5ea531 JH |
109 | 1; |
110 | __END__ | |
2a204b45 JH |
111 | |
112 | =head1 NAME | |
113 | ||
f027f502 | 114 | Unicode::Normalize - Unicode Normalization Forms |
2a204b45 JH |
115 | |
116 | =head1 SYNOPSIS | |
117 | ||
a092bcfd RGS |
118 | (1) using function names exported by default: |
119 | ||
2a204b45 JH |
120 | use Unicode::Normalize; |
121 | ||
8f118dcd JH |
122 | $NFD_string = NFD($string); # Normalization Form D |
123 | $NFC_string = NFC($string); # Normalization Form C | |
124 | $NFKD_string = NFKD($string); # Normalization Form KD | |
125 | $NFKC_string = NFKC($string); # Normalization Form KC | |
2a204b45 | 126 | |
a092bcfd | 127 | (2) using function names exported on request: |
2a204b45 JH |
128 | |
129 | use Unicode::Normalize 'normalize'; | |
130 | ||
8f118dcd JH |
131 | $NFD_string = normalize('D', $string); # Normalization Form D |
132 | $NFC_string = normalize('C', $string); # Normalization Form C | |
133 | $NFKD_string = normalize('KD', $string); # Normalization Form KD | |
134 | $NFKC_string = normalize('KC', $string); # Normalization Form KC | |
2a204b45 JH |
135 | |
136 | =head1 DESCRIPTION | |
137 | ||
00f2676f JH |
138 | Parameters: |
139 | ||
fe067ad9 | 140 | C<$string> is used as a string under character semantics (see F<perlunicode>). |
00f2676f | 141 | |
fe067ad9 | 142 | C<$code_point> should be an unsigned integer representing a Unicode code point. |
00f2676f | 143 | |
628bbff0 | 144 | Note: Between XSUB and pure Perl, there is an incompatibility |
fe067ad9 SP |
145 | about the interpretation of C<$code_point> as a decimal number. |
146 | XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not. | |
147 | Do not use a floating point nor a negative sign in C<$code_point>. | |
00f2676f | 148 | |
d85850a7 | 149 | =head2 Normalization Forms |
2a204b45 JH |
150 | |
151 | =over 4 | |
152 | ||
8f118dcd | 153 | =item C<$NFD_string = NFD($string)> |
2a204b45 | 154 | |
fe067ad9 | 155 | It returns the Normalization Form D (formed by canonical decomposition). |
2a204b45 | 156 | |
8f118dcd | 157 | =item C<$NFC_string = NFC($string)> |
2a204b45 | 158 | |
fe067ad9 | 159 | It returns the Normalization Form C (formed by canonical decomposition |
2a204b45 JH |
160 | followed by canonical composition). |
161 | ||
8f118dcd | 162 | =item C<$NFKD_string = NFKD($string)> |
2a204b45 | 163 | |
fe067ad9 | 164 | It returns the Normalization Form KD (formed by compatibility decomposition). |
2a204b45 | 165 | |
8f118dcd | 166 | =item C<$NFKC_string = NFKC($string)> |
2a204b45 | 167 | |
fe067ad9 | 168 | It returns the Normalization Form KC (formed by compatibility decomposition |
2a204b45 JH |
169 | followed by B<canonical> composition). |
170 | ||
82e740b6 NC |
171 | =item C<$FCD_string = FCD($string)> |
172 | ||
173 | If the given string is in FCD ("Fast C or D" form; cf. UTN #5), | |
fe067ad9 | 174 | it returns the string without modification; otherwise it returns an FCD string. |
82e740b6 NC |
175 | |
176 | Note: FCD is not always unique, then plural forms may be equivalent | |
177 | each other. C<FCD()> will return one of these equivalent forms. | |
178 | ||
179 | =item C<$FCC_string = FCC($string)> | |
180 | ||
fe067ad9 | 181 | It returns the FCC form ("Fast C Contiguous"; cf. UTN #5). |
82e740b6 | 182 | |
e524f5b2 | 183 | Note: FCC is unique, as well as four normalization forms (NF*). |
82e740b6 | 184 | |
8f118dcd | 185 | =item C<$normalized_string = normalize($form_name, $string)> |
2a204b45 | 186 | |
fe067ad9 SP |
187 | It returns the normalization form of C<$form_name>. |
188 | ||
2a204b45 JH |
189 | As C<$form_name>, one of the following names must be given. |
190 | ||
82e740b6 NC |
191 | 'C' or 'NFC' for Normalization Form C (UAX #15) |
192 | 'D' or 'NFD' for Normalization Form D (UAX #15) | |
193 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) | |
194 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) | |
195 | ||
196 | 'FCD' for "Fast C or D" Form (UTN #5) | |
197 | 'FCC' for "Fast C Contiguous" (UTN #5) | |
2a204b45 JH |
198 | |
199 | =back | |
200 | ||
8f118dcd JH |
201 | =head2 Decomposition and Composition |
202 | ||
203 | =over 4 | |
204 | ||
fe067ad9 | 205 | =item C<$decomposed_string = decompose($string [, $useCompatMapping])> |
8f118dcd | 206 | |
fe067ad9 SP |
207 | It returns the concatenation of the decomposition of each character |
208 | in the string. | |
8f118dcd | 209 | |
fe067ad9 SP |
210 | If the second parameter (a boolean) is omitted or false, |
211 | the decomposition is canonical decomposition; | |
212 | if the second parameter (a boolean) is true, | |
213 | the decomposition is compatibility decomposition. | |
8f118dcd | 214 | |
fe067ad9 | 215 | The string returned is not always in NFD/NFKD. Reordering may be required. |
8f118dcd JH |
216 | |
217 | $NFD_string = reorder(decompose($string)); # eq. to NFD() | |
218 | $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD() | |
219 | ||
fe067ad9 | 220 | =item C<$reordered_string = reorder($string)> |
8f118dcd | 221 | |
fe067ad9 SP |
222 | It returns the result of reordering the combining characters |
223 | according to Canonical Ordering Behavior. | |
8f118dcd | 224 | |
fe067ad9 SP |
225 | For example, when you have a list of NFD/NFKD strings, |
226 | you can get the concatenated NFD/NFKD string from them, by saying | |
8f118dcd JH |
227 | |
228 | $concat_NFD = reorder(join '', @NFD_strings); | |
229 | $concat_NFKD = reorder(join '', @NFKD_strings); | |
230 | ||
fe067ad9 | 231 | =item C<$composed_string = compose($string)> |
8f118dcd | 232 | |
fe067ad9 SP |
233 | It returns the result of canonical composition |
234 | without applying any decomposition. | |
8f118dcd | 235 | |
fe067ad9 SP |
236 | For example, when you have a NFD/NFKD string, |
237 | you can get its NFC/NFKC string, by saying | |
8f118dcd JH |
238 | |
239 | $NFC_string = compose($NFD_string); | |
240 | $NFKC_string = compose($NFKD_string); | |
241 | ||
242 | =back | |
243 | ||
244 | =head2 Quick Check | |
245 | ||
82e740b6 | 246 | (see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>) |
8f118dcd JH |
247 | |
248 | The following functions check whether the string is in that normalization form. | |
249 | ||
fe067ad9 | 250 | The result returned will be one of the following: |
8f118dcd JH |
251 | |
252 | YES The string is in that normalization form. | |
253 | NO The string is not in that normalization form. | |
254 | MAYBE Dubious. Maybe yes, maybe no. | |
255 | ||
256 | =over 4 | |
257 | ||
258 | =item C<$result = checkNFD($string)> | |
259 | ||
fe067ad9 | 260 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
8f118dcd JH |
261 | |
262 | =item C<$result = checkNFC($string)> | |
263 | ||
fe067ad9 | 264 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 | 265 | C<undef> if C<MAYBE>. |
8f118dcd JH |
266 | |
267 | =item C<$result = checkNFKD($string)> | |
268 | ||
fe067ad9 | 269 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
8f118dcd JH |
270 | |
271 | =item C<$result = checkNFKC($string)> | |
272 | ||
fe067ad9 | 273 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 | 274 | C<undef> if C<MAYBE>. |
8f118dcd | 275 | |
82e740b6 NC |
276 | =item C<$result = checkFCD($string)> |
277 | ||
fe067ad9 | 278 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
82e740b6 NC |
279 | |
280 | =item C<$result = checkFCC($string)> | |
281 | ||
fe067ad9 | 282 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 | 283 | C<undef> if C<MAYBE>. |
82e740b6 | 284 | |
fe067ad9 | 285 | Note: If a string is not in FCD, it must not be in FCC. |
82e740b6 NC |
286 | So C<checkFCC($not_FCD_string)> should return C<NO>. |
287 | ||
8f118dcd JH |
288 | =item C<$result = check($form_name, $string)> |
289 | ||
fe067ad9 | 290 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 | 291 | C<undef> if C<MAYBE>. |
8f118dcd | 292 | |
628bbff0 RGS |
293 | As C<$form_name>, one of the following names must be given. |
294 | ||
295 | 'C' or 'NFC' for Normalization Form C (UAX #15) | |
296 | 'D' or 'NFD' for Normalization Form D (UAX #15) | |
297 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) | |
298 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) | |
299 | ||
300 | 'FCD' for "Fast C or D" Form (UTN #5) | |
301 | 'FCC' for "Fast C Contiguous" (UTN #5) | |
8f118dcd JH |
302 | |
303 | =back | |
304 | ||
305 | B<Note> | |
306 | ||
82e740b6 NC |
307 | In the cases of NFD, NFKD, and FCD, the answer must be |
308 | either C<YES> or C<NO>. The answer C<MAYBE> may be returned | |
309 | in the cases of NFC, NFKC, and FCC. | |
8f118dcd | 310 | |
82e740b6 NC |
311 | A C<MAYBE> string should contain at least one combining character |
312 | or the like. For example, C<COMBINING ACUTE ACCENT> has | |
8f118dcd | 313 | the MAYBE_NFC/MAYBE_NFKC property. |
82e740b6 | 314 | |
8f118dcd JH |
315 | Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")> |
316 | and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>. | |
f027f502 | 317 | C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC |
8f118dcd JH |
318 | (its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">), |
319 | while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. | |
320 | ||
628bbff0 RGS |
321 | If you want to check exactly, compare the string with its NFC/NFKC/FCC. |
322 | ||
323 | if ($string eq NFC($string)) { | |
324 | # $string is exactly normalized in NFC; | |
325 | } else { | |
326 | # $string is not normalized in NFC; | |
327 | } | |
8f118dcd | 328 | |
628bbff0 RGS |
329 | if ($string eq NFKC($string)) { |
330 | # $string is exactly normalized in NFKC; | |
331 | } else { | |
332 | # $string is not normalized in NFKC; | |
333 | } | |
8f118dcd | 334 | |
2a204b45 JH |
335 | =head2 Character Data |
336 | ||
337 | These functions are interface of character data used internally. | |
d0ed0342 BG |
338 | If you want only to get Unicode normalization forms, you don't need |
339 | call them yourself. | |
2a204b45 JH |
340 | |
341 | =over 4 | |
342 | ||
fe067ad9 | 343 | =item C<$canonical_decomposition = getCanon($code_point)> |
2a204b45 | 344 | |
fe067ad9 SP |
345 | If the character is canonically decomposable (including Hangul Syllables), |
346 | it returns the (full) canonical decomposition as a string. | |
347 | Otherwise it returns C<undef>. | |
8f118dcd | 348 | |
fe067ad9 SP |
349 | B<Note:> According to the Unicode standard, the canonical decomposition |
350 | of the character that is not canonically decomposable is same as | |
351 | the character itself. | |
8f118dcd | 352 | |
fe067ad9 | 353 | =item C<$compatibility_decomposition = getCompat($code_point)> |
2a204b45 | 354 | |
fe067ad9 SP |
355 | If the character is compatibility decomposable (including Hangul Syllables), |
356 | it returns the (full) compatibility decomposition as a string. | |
357 | Otherwise it returns C<undef>. | |
2a204b45 | 358 | |
fe067ad9 SP |
359 | B<Note:> According to the Unicode standard, the compatibility decomposition |
360 | of the character that is not compatibility decomposable is same as | |
361 | the character itself. | |
2a204b45 | 362 | |
fe067ad9 | 363 | =item C<$code_point_composite = getComposite($code_point_here, $code_point_next)> |
2a204b45 | 364 | |
fe067ad9 | 365 | If two characters here and next (as code points) are composable |
8f118dcd | 366 | (including Hangul Jamo/Syllables and Composition Exclusions), |
fe067ad9 SP |
367 | it returns the code point of the composite. |
368 | ||
369 | If they are not composable, it returns C<undef>. | |
2a204b45 | 370 | |
fe067ad9 | 371 | =item C<$combining_class = getCombinClass($code_point)> |
2a204b45 | 372 | |
fe067ad9 | 373 | It returns the combining class (as an integer) of the character. |
2a204b45 | 374 | |
fe067ad9 | 375 | =item C<$may_be_composed_with_prev_char = isComp2nd($code_point)> |
2a204b45 | 376 | |
fe067ad9 SP |
377 | It returns a boolean whether the character of the specified codepoint |
378 | may be composed with the previous one in a certain composition | |
379 | (including Hangul Compositions, but excluding | |
380 | Composition Exclusions and Non-Starter Decompositions). | |
2a204b45 | 381 | |
fe067ad9 | 382 | =item C<$is_exclusion = isExclusion($code_point)> |
8f118dcd | 383 | |
fe067ad9 | 384 | It returns a boolean whether the code point is a composition exclusion. |
8f118dcd | 385 | |
fe067ad9 | 386 | =item C<$is_singleton = isSingleton($code_point)> |
8f118dcd | 387 | |
fe067ad9 | 388 | It returns a boolean whether the code point is a singleton |
8f118dcd | 389 | |
fe067ad9 | 390 | =item C<$is_non_starter_decomposition = isNonStDecomp($code_point)> |
8f118dcd | 391 | |
fe067ad9 | 392 | It returns a boolean whether the code point has Non-Starter Decomposition. |
8f118dcd | 393 | |
fe067ad9 SP |
394 | =item C<$is_Full_Composition_Exclusion = isComp_Ex($code_point)> |
395 | ||
396 | It returns a boolean of the derived property Comp_Ex | |
397 | (Full_Composition_Exclusion). This property is generated from | |
398 | Composition Exclusions + Singletons + Non-Starter Decompositions. | |
399 | ||
400 | =item C<$NFD_is_NO = isNFD_NO($code_point)> | |
401 | ||
402 | It returns a boolean of the derived property NFD_NO | |
403 | (NFD_Quick_Check=No). | |
404 | ||
405 | =item C<$NFC_is_NO = isNFC_NO($code_point)> | |
406 | ||
407 | It returns a boolean of the derived property NFC_NO | |
408 | (NFC_Quick_Check=No). | |
409 | ||
410 | =item C<$NFC_is_MAYBE = isNFC_MAYBE($code_point)> | |
411 | ||
412 | It returns a boolean of the derived property NFC_MAYBE | |
413 | (NFC_Quick_Check=Maybe). | |
414 | ||
415 | =item C<$NFKD_is_NO = isNFKD_NO($code_point)> | |
416 | ||
417 | It returns a boolean of the derived property NFKD_NO | |
418 | (NFKD_Quick_Check=No). | |
419 | ||
420 | =item C<$NFKC_is_NO = isNFKC_NO($code_point)> | |
421 | ||
422 | It returns a boolean of the derived property NFKC_NO | |
423 | (NFKC_Quick_Check=No). | |
424 | ||
425 | =item C<$NFKC_is_MAYBE = isNFKC_MAYBE($code_point)> | |
426 | ||
427 | It returns a boolean of the derived property NFKC_MAYBE | |
428 | (NFKC_Quick_Check=Maybe). | |
2a204b45 JH |
429 | |
430 | =back | |
431 | ||
628bbff0 | 432 | =head1 EXPORT |
2a204b45 JH |
433 | |
434 | C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default. | |
435 | ||
436 | C<normalize> and other some functions: on request. | |
437 | ||
628bbff0 RGS |
438 | =head1 CAVEATS |
439 | ||
440 | =over 4 | |
441 | ||
442 | =item Perl's version vs. Unicode version | |
443 | ||
444 | Since this module refers to perl core's Unicode database in the directory | |
445 | F</lib/unicore> (or formerly F</lib/unicode>), the Unicode version of | |
446 | normalization implemented by this module depends on your perl's version. | |
447 | ||
fe067ad9 SP |
448 | perl's version implemented Unicode version |
449 | 5.6.1 3.0.1 | |
450 | 5.7.2 3.1.0 | |
451 | 5.7.3 3.1.1 (normalization is same as 3.1.0) | |
452 | 5.8.0 3.2.0 | |
453 | 5.8.1-5.8.3 4.0.0 | |
454 | 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0) | |
455 | 5.8.7-5.8.8 4.1.0 | |
51683ce6 TS |
456 | 5.10.0 5.0.0 |
457 | 5.8.9 5.1.0 | |
628bbff0 RGS |
458 | |
459 | =item Correction of decomposition mapping | |
460 | ||
461 | In older Unicode versions, a small number of characters (all of which are | |
462 | CJK compatibility ideographs as far as they have been found) may have | |
463 | an erroneous decomposition mapping (see F<NormalizationCorrections.txt>). | |
464 | Anyhow, this module will neither refer to F<NormalizationCorrections.txt> | |
465 | nor provide any specific version of normalization. Therefore this module | |
466 | running on an older perl with an older Unicode database may use | |
467 | the erroneous decomposition mapping blindly conforming to the Unicode database. | |
468 | ||
469 | =item Revised definition of canonical composition | |
470 | ||
471 | In Unicode 4.1.0, the definition D2 of canonical composition (which | |
472 | affects NFC and NFKC) has been changed (see Public Review Issue #29 | |
473 | and recent UAX #15). This module has used the newer definition | |
474 | since the version 0.07 (Oct 31, 2001). | |
2b8d773d | 475 | This module will not support the normalization according to the older |
628bbff0 RGS |
476 | definition, even if the Unicode version implemented by perl is |
477 | lower than 4.1.0. | |
478 | ||
479 | =back | |
480 | ||
2a204b45 JH |
481 | =head1 AUTHOR |
482 | ||
a092bcfd | 483 | SADAHIRO Tomoyuki <SADAHIRO@cpan.org> |
2a204b45 | 484 | |
2b8d773d | 485 | Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved. |
2a204b45 | 486 | |
628bbff0 RGS |
487 | This module is free software; you can redistribute it |
488 | and/or modify it under the same terms as Perl itself. | |
2a204b45 JH |
489 | |
490 | =head1 SEE ALSO | |
491 | ||
492 | =over 4 | |
493 | ||
e524f5b2 | 494 | =item http://www.unicode.org/reports/tr15/ |
2a204b45 JH |
495 | |
496 | Unicode Normalization Forms - UAX #15 | |
497 | ||
fe067ad9 SP |
498 | =item http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt |
499 | ||
500 | Composition Exclusion Table | |
501 | ||
14e6b36c | 502 | =item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt |
8f118dcd JH |
503 | |
504 | Derived Normalization Properties | |
505 | ||
628bbff0 RGS |
506 | =item http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt |
507 | ||
508 | Normalization Corrections | |
509 | ||
510 | =item http://www.unicode.org/review/pr-29.html | |
511 | ||
512 | Public Review Issue #29: Normalization Issue | |
513 | ||
82e740b6 NC |
514 | =item http://www.unicode.org/notes/tn5/ |
515 | ||
516 | Canonical Equivalence in Applications - UTN #5 | |
517 | ||
2a204b45 JH |
518 | =back |
519 | ||
520 | =cut |