Use new Unicode 6.2 beta

[perl5.git] / lib / unicore / CompositionExclusions.txt
diff --git a/lib/unicore/CompositionExclusions.txt b/lib/unicore/CompositionExclusions.txt

index 07a60b8..cd19f42 100644 (file)
--- a/lib/unicore/CompositionExclusions.txt
+++ b/lib/unicore/CompositionExclusions.txt
@@ -1,18 +1,26 @@
-# CompositionExclusions-3.2.0.txt
-# Date: 2002-03-19,23:30:28 GMT [MD]
+# CompositionExclusions-6.2.0.txt
+# Date: 2012-05-15, 22:21:00 GMT [KW, LI]
  #
-# This file lists the characters from the UAX #15 Composition Exclusion Table.
+# This file lists the characters for the Composition Exclusion Table
+# defined in UAX #15, Unicode Normalization Forms.
  #
-# The format of the comments in this file has been updated since the last version,
-# CompositionExclusions-3.txt. The only substantive change to this file between that
-# version and this one is the addition of U+2ADC FORKING.
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 1991-2012 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
  #
  # For more information, see
-# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
-# ================================================
+# http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
+#
+# For a full derivation of composition exclusions, see the derived property
+# Full_Composition_Exclusion in DerivedNormalizationProps.txt
+#
  
+# ================================================
  # (1) Script Specifics
-# This list of characters cannot be derived from the UnicodeData file.
+#
+# This list of characters cannot be derived from the UnicodeData.txt file.
  # ================================================
  
  0958    #  DEVANAGARI LETTER QA
@@ -87,8 +95,14 @@ FB4E    #  HEBREW LETTER PE WITH RAFE
  
  # ================================================
  # (2) Post Composition Version precomposed characters
+#
  # These characters cannot be derived solely from the UnicodeData.txt file
  # in this version of Unicode.
+#
+# Note that characters added to the standard after the
+# Composition Version and which have canonical decomposition mappings
+# are not automatically added to this list of Post Composition
+# Version precomposed characters.
  # ================================================
  
  2ADC    #  FORKING
@@ -110,10 +124,13 @@ FB4E    #  HEBREW LETTER PE WITH RAFE
  
  # ================================================
  # (3) Singleton Decompositions
-# These characters can be derived from the UnicodeData file
-# by including all characters whose canonical decomposition
-# consists of a single character.
+#
+# These characters can be derived from the UnicodeData.txt file
+# by including all canonically decomposable characters whose
+# canonical decomposition consists of a single character.
+#
  # These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
  # ================================================
  
  # 0340..0341       [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
@@ -152,19 +169,31 @@ FB4E    #  HEBREW LETTER PE WITH RAFE
  # FA20                 CJK COMPATIBILITY IDEOGRAPH-FA20
  # FA22                 CJK COMPATIBILITY IDEOGRAPH-FA22
  # FA25..FA26       [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
-# FA2A..FA2D       [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
-# FA30..FA6A      [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
+# FA2A..FA6D      [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
+# FA70..FAD9     [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
  # 2F800..2FA1D   [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
  
-# Total code points: 924
+# Total code points: 1035
  
  # ================================================
  # (4) Non-Starter Decompositions
-# These characters can be derived from the UnicodeData file
-# by including all characters whose canonical decomposition consists
-# of a sequence of characters, the first of which has a non-zero
-# combining class.
+#
+# These characters can be derived from the UnicodeData.txt file
+# by including each expanding canonical decomposition
+# (i.e., those which canonically decompose to a sequence
+# of characters instead of a single character), such that:
+#
+# A. The character is not a Starter.
+#
+# OR (inclusive)
+#
+# B. The character's canonical decomposition begins
+# with a character that is not a Starter.
+#
+# Note that a "Starter" is any character with a zero combining class.
+#
  # These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
  # ================================================
  
  # 0344                 COMBINING GREEK DIALYTIKA TONOS
@@ -174,3 +203,4 @@ FB4E    #  HEBREW LETTER PE WITH RAFE
  
  # Total code points: 4
  
+# EOF