| 1 | # CompositionExclusions-6.3.0.txt |
| 2 | # Date: 2012-12-11, 11:23:00 GMT [KW, LI] |
| 3 | # |
| 4 | # This file lists the characters for the Composition Exclusion Table |
| 5 | # defined in UAX #15, Unicode Normalization Forms. |
| 6 | # |
| 7 | # This file is a normative contributory data file in the |
| 8 | # Unicode Character Database. |
| 9 | # |
| 10 | # Copyright (c) 1991-2012 Unicode, Inc. |
| 11 | # For terms of use, see http://www.unicode.org/terms_of_use.html |
| 12 | # |
| 13 | # For more information, see |
| 14 | # http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table |
| 15 | # |
| 16 | # For a full derivation of composition exclusions, see the derived property |
| 17 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt |
| 18 | # |
| 19 | |
| 20 | # ================================================ |
| 21 | # (1) Script Specifics |
| 22 | # |
| 23 | # This list of characters cannot be derived from the UnicodeData.txt file. |
| 24 | # ================================================ |
| 25 | |
| 26 | 0958 # DEVANAGARI LETTER QA |
| 27 | 0959 # DEVANAGARI LETTER KHHA |
| 28 | 095A # DEVANAGARI LETTER GHHA |
| 29 | 095B # DEVANAGARI LETTER ZA |
| 30 | 095C # DEVANAGARI LETTER DDDHA |
| 31 | 095D # DEVANAGARI LETTER RHA |
| 32 | 095E # DEVANAGARI LETTER FA |
| 33 | 095F # DEVANAGARI LETTER YYA |
| 34 | 09DC # BENGALI LETTER RRA |
| 35 | 09DD # BENGALI LETTER RHA |
| 36 | 09DF # BENGALI LETTER YYA |
| 37 | 0A33 # GURMUKHI LETTER LLA |
| 38 | 0A36 # GURMUKHI LETTER SHA |
| 39 | 0A59 # GURMUKHI LETTER KHHA |
| 40 | 0A5A # GURMUKHI LETTER GHHA |
| 41 | 0A5B # GURMUKHI LETTER ZA |
| 42 | 0A5E # GURMUKHI LETTER FA |
| 43 | 0B5C # ORIYA LETTER RRA |
| 44 | 0B5D # ORIYA LETTER RHA |
| 45 | 0F43 # TIBETAN LETTER GHA |
| 46 | 0F4D # TIBETAN LETTER DDHA |
| 47 | 0F52 # TIBETAN LETTER DHA |
| 48 | 0F57 # TIBETAN LETTER BHA |
| 49 | 0F5C # TIBETAN LETTER DZHA |
| 50 | 0F69 # TIBETAN LETTER KSSA |
| 51 | 0F76 # TIBETAN VOWEL SIGN VOCALIC R |
| 52 | 0F78 # TIBETAN VOWEL SIGN VOCALIC L |
| 53 | 0F93 # TIBETAN SUBJOINED LETTER GHA |
| 54 | 0F9D # TIBETAN SUBJOINED LETTER DDHA |
| 55 | 0FA2 # TIBETAN SUBJOINED LETTER DHA |
| 56 | 0FA7 # TIBETAN SUBJOINED LETTER BHA |
| 57 | 0FAC # TIBETAN SUBJOINED LETTER DZHA |
| 58 | 0FB9 # TIBETAN SUBJOINED LETTER KSSA |
| 59 | FB1D # HEBREW LETTER YOD WITH HIRIQ |
| 60 | FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH |
| 61 | FB2A # HEBREW LETTER SHIN WITH SHIN DOT |
| 62 | FB2B # HEBREW LETTER SHIN WITH SIN DOT |
| 63 | FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT |
| 64 | FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT |
| 65 | FB2E # HEBREW LETTER ALEF WITH PATAH |
| 66 | FB2F # HEBREW LETTER ALEF WITH QAMATS |
| 67 | FB30 # HEBREW LETTER ALEF WITH MAPIQ |
| 68 | FB31 # HEBREW LETTER BET WITH DAGESH |
| 69 | FB32 # HEBREW LETTER GIMEL WITH DAGESH |
| 70 | FB33 # HEBREW LETTER DALET WITH DAGESH |
| 71 | FB34 # HEBREW LETTER HE WITH MAPIQ |
| 72 | FB35 # HEBREW LETTER VAV WITH DAGESH |
| 73 | FB36 # HEBREW LETTER ZAYIN WITH DAGESH |
| 74 | FB38 # HEBREW LETTER TET WITH DAGESH |
| 75 | FB39 # HEBREW LETTER YOD WITH DAGESH |
| 76 | FB3A # HEBREW LETTER FINAL KAF WITH DAGESH |
| 77 | FB3B # HEBREW LETTER KAF WITH DAGESH |
| 78 | FB3C # HEBREW LETTER LAMED WITH DAGESH |
| 79 | FB3E # HEBREW LETTER MEM WITH DAGESH |
| 80 | FB40 # HEBREW LETTER NUN WITH DAGESH |
| 81 | FB41 # HEBREW LETTER SAMEKH WITH DAGESH |
| 82 | FB43 # HEBREW LETTER FINAL PE WITH DAGESH |
| 83 | FB44 # HEBREW LETTER PE WITH DAGESH |
| 84 | FB46 # HEBREW LETTER TSADI WITH DAGESH |
| 85 | FB47 # HEBREW LETTER QOF WITH DAGESH |
| 86 | FB48 # HEBREW LETTER RESH WITH DAGESH |
| 87 | FB49 # HEBREW LETTER SHIN WITH DAGESH |
| 88 | FB4A # HEBREW LETTER TAV WITH DAGESH |
| 89 | FB4B # HEBREW LETTER VAV WITH HOLAM |
| 90 | FB4C # HEBREW LETTER BET WITH RAFE |
| 91 | FB4D # HEBREW LETTER KAF WITH RAFE |
| 92 | FB4E # HEBREW LETTER PE WITH RAFE |
| 93 | |
| 94 | # Total code points: 67 |
| 95 | |
| 96 | # ================================================ |
| 97 | # (2) Post Composition Version precomposed characters |
| 98 | # |
| 99 | # These characters cannot be derived solely from the UnicodeData.txt file |
| 100 | # in this version of Unicode. |
| 101 | # |
| 102 | # Note that characters added to the standard after the |
| 103 | # Composition Version and which have canonical decomposition mappings |
| 104 | # are not automatically added to this list of Post Composition |
| 105 | # Version precomposed characters. |
| 106 | # ================================================ |
| 107 | |
| 108 | 2ADC # FORKING |
| 109 | 1D15E # MUSICAL SYMBOL HALF NOTE |
| 110 | 1D15F # MUSICAL SYMBOL QUARTER NOTE |
| 111 | 1D160 # MUSICAL SYMBOL EIGHTH NOTE |
| 112 | 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE |
| 113 | 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE |
| 114 | 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE |
| 115 | 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE |
| 116 | 1D1BB # MUSICAL SYMBOL MINIMA |
| 117 | 1D1BC # MUSICAL SYMBOL MINIMA BLACK |
| 118 | 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE |
| 119 | 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK |
| 120 | 1D1BF # MUSICAL SYMBOL FUSA WHITE |
| 121 | 1D1C0 # MUSICAL SYMBOL FUSA BLACK |
| 122 | |
| 123 | # Total code points: 14 |
| 124 | |
| 125 | # ================================================ |
| 126 | # (3) Singleton Decompositions |
| 127 | # |
| 128 | # These characters can be derived from the UnicodeData.txt file |
| 129 | # by including all canonically decomposable characters whose |
| 130 | # canonical decomposition consists of a single character. |
| 131 | # |
| 132 | # These characters are simply quoted here for reference. |
| 133 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt |
| 134 | # ================================================ |
| 135 | |
| 136 | # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK |
| 137 | # 0343 COMBINING GREEK KORONIS |
| 138 | # 0374 GREEK NUMERAL SIGN |
| 139 | # 037E GREEK QUESTION MARK |
| 140 | # 0387 GREEK ANO TELEIA |
| 141 | # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA |
| 142 | # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA |
| 143 | # 1F75 GREEK SMALL LETTER ETA WITH OXIA |
| 144 | # 1F77 GREEK SMALL LETTER IOTA WITH OXIA |
| 145 | # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA |
| 146 | # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA |
| 147 | # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA |
| 148 | # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA |
| 149 | # 1FBE GREEK PROSGEGRAMMENI |
| 150 | # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA |
| 151 | # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA |
| 152 | # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA |
| 153 | # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA |
| 154 | # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA |
| 155 | # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA |
| 156 | # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA |
| 157 | # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA |
| 158 | # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA |
| 159 | # 1FFD GREEK OXIA |
| 160 | # 2000..2001 [2] EN QUAD..EM QUAD |
| 161 | # 2126 OHM SIGN |
| 162 | # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN |
| 163 | # 2329 LEFT-POINTING ANGLE BRACKET |
| 164 | # 232A RIGHT-POINTING ANGLE BRACKET |
| 165 | # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D |
| 166 | # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10 |
| 167 | # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12 |
| 168 | # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E |
| 169 | # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 |
| 170 | # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 |
| 171 | # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 |
| 172 | # FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D |
| 173 | # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 |
| 174 | # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D |
| 175 | |
| 176 | # Total code points: 1035 |
| 177 | |
| 178 | # ================================================ |
| 179 | # (4) Non-Starter Decompositions |
| 180 | # |
| 181 | # These characters can be derived from the UnicodeData.txt file |
| 182 | # by including each expanding canonical decomposition |
| 183 | # (i.e., those which canonically decompose to a sequence |
| 184 | # of characters instead of a single character), such that: |
| 185 | # |
| 186 | # A. The character is not a Starter. |
| 187 | # |
| 188 | # OR (inclusive) |
| 189 | # |
| 190 | # B. The character's canonical decomposition begins |
| 191 | # with a character that is not a Starter. |
| 192 | # |
| 193 | # Note that a "Starter" is any character with a zero combining class. |
| 194 | # |
| 195 | # These characters are simply quoted here for reference. |
| 196 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt |
| 197 | # ================================================ |
| 198 | |
| 199 | # 0344 COMBINING GREEK DIALYTIKA TONOS |
| 200 | # 0F73 TIBETAN VOWEL SIGN II |
| 201 | # 0F75 TIBETAN VOWEL SIGN UU |
| 202 | # 0F81 TIBETAN VOWEL SIGN REVERSED II |
| 203 | |
| 204 | # Total code points: 4 |
| 205 | |
| 206 | # EOF |