This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
No normalization form DNF.
[perl5.git] / ext / Unicode / Normalize / Normalize.pm
CommitLineData
ac5ea531
JH
1package Unicode::Normalize;
2
3use 5.006;
4use strict;
5use warnings;
6use Carp;
7
2a204b45 8our $VERSION = '0.12';
ac5ea531
JH
9our $PACKAGE = __PACKAGE__;
10
11require Exporter;
12require DynaLoader;
13require AutoLoader;
14
15our @ISA = qw(Exporter DynaLoader);
16our @EXPORT = qw( NFC NFD NFKC NFKD );
2a204b45
JH
17our @EXPORT_OK = qw(
18 normalize decompose reorder compose
19 getCanon getCompat getComposite getCombinClass isExclusion
20);
ac5ea531
JH
21our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
22
23bootstrap Unicode::Normalize $VERSION;
24
25use constant CANON => 0;
26use constant COMPAT => 1;
27
2a204b45 28sub NFD ($) { reorder(decompose($_[0], CANON )) }
ac5ea531
JH
29sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
30
2a204b45 31sub NFC ($) { compose(reorder(decompose($_[0], CANON ))) }
ac5ea531
JH
32sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
33
34sub normalize($$)
35{
36 my $form = shift;
93deb893 37 $form =~ s/^NF//;
2a204b45
JH
38 $form eq 'D' ? NFD ($_[0]) :
39 $form eq 'C' ? NFC ($_[0]) :
40 $form eq 'KD' ? NFKD($_[0]) :
41 $form eq 'KC' ? NFKC($_[0]) :
ac5ea531
JH
42 croak $PACKAGE."::normalize: invalid form name: $form";
43}
44
451;
46__END__
2a204b45
JH
47
48=head1 NAME
49
50Unicode::Normalize - normalized forms of Unicode text
51
52=head1 SYNOPSIS
53
54 use Unicode::Normalize;
55
56 $string_NFD = NFD($raw_string); # Normalization Form D
57 $string_NFC = NFC($raw_string); # Normalization Form C
58 $string_NFKD = NFKD($raw_string); # Normalization Form KD
59 $string_NFKC = NFKC($raw_string); # Normalization Form KC
60
61 or
62
63 use Unicode::Normalize 'normalize';
64
65 $string_NFD = normalize('D', $raw_string); # Normalization Form D
66 $string_NFC = normalize('C', $raw_string); # Normalization Form C
67 $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD
68 $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC
69
70=head1 DESCRIPTION
71
72=head2 Normalization
73
74=over 4
75
76=item C<$string_NFD = NFD($raw_string)>
77
78returns the Normalization Form D (formed by canonical decomposition).
79
80
81=item C<$string_NFC = NFC($raw_string)>
82
83returns the Normalization Form C (formed by canonical decomposition
84followed by canonical composition).
85
86=item C<$string_NFKD = NFKD($raw_string)>
87
88returns the Normalization Form KD (formed by compatibility decomposition).
89
90=item C<$string_NFKC = NFKC($raw_string)>
91
92returns the Normalization Form KC (formed by compatibility decomposition
93followed by B<canonical> composition).
94
95=item C<$normalized_string = normalize($form_name, $raw_string)>
96
97As C<$form_name>, one of the following names must be given.
98
99 'C' or 'NFC' for Normalization Form C
100 'D' or 'NFD' for Normalization Form D
101 'KC' or 'NFKC' for Normalization Form KC
102 'KD' or 'NFKD' for Normalization Form KD
103
104=back
105
106=head2 Character Data
107
108These functions are interface of character data used internally.
109If you want only to get unicode normalization forms,
110you need not to call them by yourself.
111
112=over 4
113
114=item C<$canonical_decomposed = getCanon($codepoint)>
115
116=item C<$compatibility_decomposed = getCompat($codepoint)>
117
118If the character of the specified codepoint is canonically or
119compatibility decomposable (including Hangul Syllables),
120returns the B<completely decomposed> string equivalent to it.
121
122If it is not decomposable, returns undef.
123
124=item C<$uv_composite = getComposite($uv_here, $uv_next)>
125
126If the couple of two characters here and next (as codepoints) is composable
127(including Hangul Jamo/Syllables and Exclusions),
128returns the codepoint of the composite.
129
130If they are not composable, returns undef.
131
132=item C<$combining_class = getCombinClass($codepoint)>
133
134Returns the combining class as integer of the character.
135
136=item C<$is_exclusion = isExclusion($codepoint)>
137
138Returns a boolean whether the character of the specified codepoint is
139a composition exclusion.
140
141=back
142
143=head2 EXPORT
144
145C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
146
147C<normalize> and other some functions: on request.
148
149=head1 AUTHOR
150
151SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
152
153 http://homepage1.nifty.com/nomenclator/perl/
154
155 Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
156
157 This program is free software; you can redistribute it and/or
158 modify it under the same terms as Perl itself.
159
160=head1 SEE ALSO
161
162=over 4
163
164=item http://www.unicode.org/unicode/reports/tr15/
165
166Unicode Normalization Forms - UAX #15
167
168=back
169
170=cut
171