X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8a4f037860d9d14554f7acac3b5d56635b0f7ce8..20e8a3a35e61c7fcc6a4173969d7b685e762aef7:/lib/unicore/SpecialCasing.txt

diff --git a/lib/unicore/SpecialCasing.txt b/lib/unicore/SpecialCasing.txt
index 4bfe148..92e70a4 100644
--- a/lib/unicore/SpecialCasing.txt
+++ b/lib/unicore/SpecialCasing.txt
@@ -1,8 +1,8 @@
-# SpecialCasing-5.0.0.txt
-# Date: 2006-03-03, 08:23:36 GMT [MD]
+# SpecialCasing-5.1.0.txt
+# Date: 2008-03-03, 21:58:10 GMT [MD]
 #
 # Unicode Character Database
-# Copyright (c) 1991-2006 Unicode, Inc.
+# Copyright (c) 1991-2008 Unicode, Inc.
 # For terms of use, see http://www.unicode.org/terms_of_use.html
 # For documentation, see UCD.html
 #
@@ -11,7 +11,7 @@
 # This file is a supplement to the UnicodeData file.
 # It contains additional information about the casing of Unicode characters.
 # (For compatibility, the UnicodeData.txt file only contains case mappings for
-# characters where they are 1-1, and does not have locale-specific mappings.)
+# characters where they are 1-1, and independent of context and language.
 # For more information, see the discussion of Case Mappings in the Unicode Standard.
 #
 # All code points not listed in this file that do not have a simple case mappings
@@ -27,16 +27,16 @@
 # than one character, they are separated by spaces. Other than as used to separate 
 # elements, spaces are to be ignored.
 #
-# The <condition_list> is optional. Where present, it consists of one or more locale IDs
+# The <condition_list> is optional. Where present, it consists of one or more language IDs
 # or contexts, separated by spaces. In these conditions:
 # - A condition list overrides the normal behavior if all of the listed conditions are true.
 # - The context is always the context of the characters in the original string,
 #   NOT in the resulting string.
 # - Case distinctions in the condition list are not significant.
 # - Conditions preceded by "Not_" represent the negation of the condition.
+# The condition list is not represented in the UCD as a formal property.
 #
-# A locale ID is defined by taking any language tag as defined by
-# RFC 3066 (or its successor), and replacing '-' by '_'.
+# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
 #
 # A context for a character C is defined by Section 3.13 Default Case 
 # Operations, of The Unicode Standard, Version 5.0.
@@ -47,7 +47,7 @@
 #  * Additional contexts
 #  * Additional fields
 # ================================================================================
-
+# @missing 0000..10FFFF; <slc>; <stc>; <suc>
 # ================================================================================
 # Unconditional mappings
 # ================================================================================
@@ -189,7 +189,14 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
 
 # ================================================================================
-# Conditional mappings
+# Conditional Mappings
+# The remainder of this file provides conditional casing data used to produce 
+# full case mappings.
+# ================================================================================
+# Language-Insensitive Mappings
+# These are characters whose full case mappings do not depend on language, but do
+# depend on context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
 # ================================================================================
 
 # Special case for final form of sigma
@@ -208,7 +215,10 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
 
 # ================================================================================
-# Locale-sensitive mappings
+# Language-Sensitive Mappings
+# These are characters whose full case mappings depend on language and perhaps also
+# context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
 # ================================================================================
 
 # Lithuanian