This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Prepare for Unicode 10.0
[perl5.git] / lib / unicore / CompositionExclusions.txt
CommitLineData
f2bfbac0
UC
1# CompositionExclusions-9.0.0.txt
2# Date: 2016-01-21, 22:00:00 GMT [KW, LI]
3# © 2016 Unicode®, Inc.
4# For terms of use, see http://www.unicode.org/terms_of_use.html
5#
6# Unicode Character Database
7# For documentation, see http://www.unicode.org/reports/tr44/
8836d2a5 8#
98fbe989
JH
9# This file lists the characters for the Composition Exclusion Table
10# defined in UAX #15, Unicode Normalization Forms.
d357d9fe 11#
a2bd7410
JH
12# This file is a normative contributory data file in the
13# Unicode Character Database.
14#
d357d9fe 15# For more information, see
bd84d130 16# http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
98fbe989
JH
17#
18# For a full derivation of composition exclusions, see the derived property
19# Full_Composition_Exclusion in DerivedNormalizationProps.txt
20#
d357d9fe 21
98fbe989 22# ================================================
d357d9fe 23# (1) Script Specifics
98fbe989
JH
24#
25# This list of characters cannot be derived from the UnicodeData.txt file.
822ebcc8
JH
26# ================================================
27
280958 # DEVANAGARI LETTER QA
290959 # DEVANAGARI LETTER KHHA
30095A # DEVANAGARI LETTER GHHA
31095B # DEVANAGARI LETTER ZA
32095C # DEVANAGARI LETTER DDDHA
33095D # DEVANAGARI LETTER RHA
34095E # DEVANAGARI LETTER FA
35095F # DEVANAGARI LETTER YYA
3609DC # BENGALI LETTER RRA
3709DD # BENGALI LETTER RHA
3809DF # BENGALI LETTER YYA
390A33 # GURMUKHI LETTER LLA
400A36 # GURMUKHI LETTER SHA
410A59 # GURMUKHI LETTER KHHA
420A5A # GURMUKHI LETTER GHHA
430A5B # GURMUKHI LETTER ZA
440A5E # GURMUKHI LETTER FA
450B5C # ORIYA LETTER RRA
460B5D # ORIYA LETTER RHA
470F43 # TIBETAN LETTER GHA
480F4D # TIBETAN LETTER DDHA
490F52 # TIBETAN LETTER DHA
500F57 # TIBETAN LETTER BHA
510F5C # TIBETAN LETTER DZHA
520F69 # TIBETAN LETTER KSSA
530F76 # TIBETAN VOWEL SIGN VOCALIC R
540F78 # TIBETAN VOWEL SIGN VOCALIC L
550F93 # TIBETAN SUBJOINED LETTER GHA
560F9D # TIBETAN SUBJOINED LETTER DDHA
570FA2 # TIBETAN SUBJOINED LETTER DHA
580FA7 # TIBETAN SUBJOINED LETTER BHA
590FAC # TIBETAN SUBJOINED LETTER DZHA
600FB9 # TIBETAN SUBJOINED LETTER KSSA
61FB1D # HEBREW LETTER YOD WITH HIRIQ
62FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
63FB2A # HEBREW LETTER SHIN WITH SHIN DOT
64FB2B # HEBREW LETTER SHIN WITH SIN DOT
65FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
66FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
67FB2E # HEBREW LETTER ALEF WITH PATAH
68FB2F # HEBREW LETTER ALEF WITH QAMATS
69FB30 # HEBREW LETTER ALEF WITH MAPIQ
70FB31 # HEBREW LETTER BET WITH DAGESH
71FB32 # HEBREW LETTER GIMEL WITH DAGESH
72FB33 # HEBREW LETTER DALET WITH DAGESH
73FB34 # HEBREW LETTER HE WITH MAPIQ
74FB35 # HEBREW LETTER VAV WITH DAGESH
75FB36 # HEBREW LETTER ZAYIN WITH DAGESH
76FB38 # HEBREW LETTER TET WITH DAGESH
77FB39 # HEBREW LETTER YOD WITH DAGESH
78FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
79FB3B # HEBREW LETTER KAF WITH DAGESH
80FB3C # HEBREW LETTER LAMED WITH DAGESH
81FB3E # HEBREW LETTER MEM WITH DAGESH
82FB40 # HEBREW LETTER NUN WITH DAGESH
83FB41 # HEBREW LETTER SAMEKH WITH DAGESH
84FB43 # HEBREW LETTER FINAL PE WITH DAGESH
85FB44 # HEBREW LETTER PE WITH DAGESH
86FB46 # HEBREW LETTER TSADI WITH DAGESH
87FB47 # HEBREW LETTER QOF WITH DAGESH
88FB48 # HEBREW LETTER RESH WITH DAGESH
89FB49 # HEBREW LETTER SHIN WITH DAGESH
90FB4A # HEBREW LETTER TAV WITH DAGESH
91FB4B # HEBREW LETTER VAV WITH HOLAM
92FB4C # HEBREW LETTER BET WITH RAFE
93FB4D # HEBREW LETTER KAF WITH RAFE
94FB4E # HEBREW LETTER PE WITH RAFE
d357d9fe 95
822ebcc8 96# Total code points: 67
d357d9fe 97
822ebcc8 98# ================================================
190eec7c 99# (2) Post Composition Version precomposed characters
98fbe989 100#
822ebcc8
JH
101# These characters cannot be derived solely from the UnicodeData.txt file
102# in this version of Unicode.
98fbe989
JH
103#
104# Note that characters added to the standard after the
105# Composition Version and which have canonical decomposition mappings
106# are not automatically added to this list of Post Composition
107# Version precomposed characters.
822ebcc8
JH
108# ================================================
109
1102ADC # FORKING
1111D15E # MUSICAL SYMBOL HALF NOTE
1121D15F # MUSICAL SYMBOL QUARTER NOTE
1131D160 # MUSICAL SYMBOL EIGHTH NOTE
1141D161 # MUSICAL SYMBOL SIXTEENTH NOTE
1151D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
1161D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
1171D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1181D1BB # MUSICAL SYMBOL MINIMA
1191D1BC # MUSICAL SYMBOL MINIMA BLACK
1201D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
1211D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
1221D1BF # MUSICAL SYMBOL FUSA WHITE
1231D1C0 # MUSICAL SYMBOL FUSA BLACK
190eec7c 124
822ebcc8 125# Total code points: 14
d357d9fe 126
822ebcc8 127# ================================================
d357d9fe 128# (3) Singleton Decompositions
98fbe989
JH
129#
130# These characters can be derived from the UnicodeData.txt file
bd84d130
KW
131# by including all canonically decomposable characters whose
132# canonical decomposition consists of a single character.
98fbe989 133#
d357d9fe 134# These characters are simply quoted here for reference.
98fbe989 135# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
822ebcc8 136# ================================================
d357d9fe 137
822ebcc8
JH
138# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
139# 0343 COMBINING GREEK KORONIS
140# 0374 GREEK NUMERAL SIGN
141# 037E GREEK QUESTION MARK
142# 0387 GREEK ANO TELEIA
143# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
144# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
145# 1F75 GREEK SMALL LETTER ETA WITH OXIA
146# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
147# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
148# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
149# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
150# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
151# 1FBE GREEK PROSGEGRAMMENI
152# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
153# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
154# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
155# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
156# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
157# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
158# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
159# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
160# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
161# 1FFD GREEK OXIA
162# 2000..2001 [2] EN QUAD..EM QUAD
163# 2126 OHM SIGN
164# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
165# 2329 LEFT-POINTING ANGLE BRACKET
166# 232A RIGHT-POINTING ANGLE BRACKET
167# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
168# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
169# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
170# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
171# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
172# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
173# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
7620cb10 174# FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
a2bd7410 175# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
822ebcc8 176# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
d357d9fe 177
7620cb10 178# Total code points: 1035
822ebcc8
JH
179
180# ================================================
d357d9fe 181# (4) Non-Starter Decompositions
98fbe989 182#
7620cb10 183# These characters can be derived from the UnicodeData.txt file
bd84d130
KW
184# by including each expanding canonical decomposition
185# (i.e., those which canonically decompose to a sequence
186# of characters instead of a single character), such that:
187#
188# A. The character is not a Starter.
189#
190# OR (inclusive)
191#
192# B. The character's canonical decomposition begins
193# with a character that is not a Starter.
194#
195# Note that a "Starter" is any character with a zero combining class.
98fbe989 196#
d357d9fe 197# These characters are simply quoted here for reference.
98fbe989 198# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
822ebcc8
JH
199# ================================================
200
201# 0344 COMBINING GREEK DIALYTIKA TONOS
202# 0F73 TIBETAN VOWEL SIGN II
203# 0F75 TIBETAN VOWEL SIGN UU
204# 0F81 TIBETAN VOWEL SIGN REVERSED II
205
206# Total code points: 4
d357d9fe 207
83d881f0 208# EOF