This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Allow perl to work again with all Unicode releases
authorKarl Williamson <khw@cpan.org>
Wed, 29 Jul 2015 04:17:01 +0000 (22:17 -0600)
committerKarl Williamson <khw@cpan.org>
Wed, 29 Jul 2015 04:17:01 +0000 (22:17 -0600)
Compiling perl to work with early Unicode releases is a goal, but has
been broken since 5.18.  This sequence of commits gets this working
again, actually better than before.

24 files changed:
charclass_invlists.h
ext/XS-APItest/t/handy.t
lib/Unicode/UCD.pm
lib/Unicode/UCD.t
lib/locale.t
lib/unicore/README.perl
lib/unicore/mktables
lib/utf8_heavy.pl
perl.c
perl.h
pod/perldelta.pod
pp.c
regcharclass.h
regcomp.c
regen/mk_PL_charclass.pl
regen/mk_invlists.pl
regen/regcharclass.pl
regen/regcharclass_multi_char_folds.pl
regen/unicode_constants.pl
regexec.c
t/uni/variables.t
unicode_constants.h
utf8.c
utf8.h

index 076d223..64406bf 100644 (file)
@@ -312,7 +312,387 @@ static const UV Cased_invlist[] = { /* for ASCII/Latin1 */
        0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for ASCII/Latin1 */
+#endif /* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for ASCII/Latin1 */
+       45,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x2BC,
+       0x2BD,
+       0x308,
+       0x309,
+       0x313,
+       0x314,
+       0x342,
+       0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
+       0x3B9,
+       0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
+       0x565,
+       0x566,
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */
+       247,    /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x41,
+       0x5B,
+       0x61,
+       0x7B,
+       0xB5,
+       0xB6,
+       0xC0,
+       0xD7,
+       0xD8,
+       0xF7,
+       0xF8,
+       0x131,
+       0x132,
+       0x138,
+       0x139,
+       0x18D,
+       0x18E,
+       0x19B,
+       0x19C,
+       0x1AA,
+       0x1AC,
+       0x1BA,
+       0x1BC,
+       0x1BE,
+       0x1BF,
+       0x1C0,
+       0x1C4,
+       0x221,
+       0x222,
+       0x234,
+       0x23A,
+       0x255,
+       0x256,
+       0x258,
+       0x259,
+       0x25A,
+       0x25B,
+       0x25D,
+       0x260,
+       0x262,
+       0x263,
+       0x264,
+       0x265,
+       0x267,
+       0x268,
+       0x26A,
+       0x26B,
+       0x26D,
+       0x26F,
+       0x270,
+       0x271,
+       0x273,
+       0x275,
+       0x276,
+       0x27D,
+       0x27E,
+       0x280,
+       0x281,
+       0x283,
+       0x284,
+       0x287,
+       0x28D,
+       0x292,
+       0x293,
+       0x29D,
+       0x29F,
+       0x2BC,
+       0x2BD,
+       0x2BE,
+       0x2BF,
+       0x300,
+       0x302,
+       0x307,
+       0x309,
+       0x30A,
+       0x30B,
+       0x30C,
+       0x30D,
+       0x313,
+       0x314,
+       0x331,
+       0x332,
+       0x342,
+       0x343,
+       0x345,
+       0x346,
+       0x370,
+       0x374,
+       0x376,
+       0x378,
+       0x37B,
+       0x37E,
+       0x37F,
+       0x380,
+       0x386,
+       0x387,
+       0x388,
+       0x38B,
+       0x38C,
+       0x38D,
+       0x38E,
+       0x3A2,
+       0x3A3,
+       0x3D2,
+       0x3D5,
+       0x3F6,
+       0x3F7,
+       0x3FC,
+       0x3FD,
+       0x482,
+       0x48A,
+       0x530,
+       0x531,
+       0x557,
+       0x561,
+       0x588,
+       0x10A0,
+       0x10C6,
+       0x10C7,
+       0x10C8,
+       0x10CD,
+       0x10CE,
+       0x13A0,
+       0x13F6,
+       0x13F8,
+       0x13FE,
+       0x1D79,
+       0x1D7A,
+       0x1D7D,
+       0x1D7E,
+       0x1E00,
+       0x1E9C,
+       0x1E9E,
+       0x1E9F,
+       0x1EA0,
+       0x1F16,
+       0x1F18,
+       0x1F1E,
+       0x1F20,
+       0x1F46,
+       0x1F48,
+       0x1F4E,
+       0x1F50,
+       0x1F58,
+       0x1F59,
+       0x1F5A,
+       0x1F5B,
+       0x1F5C,
+       0x1F5D,
+       0x1F5E,
+       0x1F5F,
+       0x1F7E,
+       0x1F80,
+       0x1FB5,
+       0x1FB6,
+       0x1FBD,
+       0x1FBE,
+       0x1FBF,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FCD,
+       0x1FD0,
+       0x1FD4,
+       0x1FD6,
+       0x1FDC,
+       0x1FE0,
+       0x1FED,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FFD,
+       0x2126,
+       0x2127,
+       0x212A,
+       0x212C,
+       0x2132,
+       0x2133,
+       0x214E,
+       0x214F,
+       0x2160,
+       0x2180,
+       0x2183,
+       0x2185,
+       0x24B6,
+       0x24EA,
+       0x2C00,
+       0x2C2F,
+       0x2C30,
+       0x2C5F,
+       0x2C60,
+       0x2C71,
+       0x2C72,
+       0x2C74,
+       0x2C75,
+       0x2C77,
+       0x2C7E,
+       0x2CE4,
+       0x2CEB,
+       0x2CEF,
+       0x2CF2,
+       0x2CF4,
+       0x2D00,
+       0x2D26,
+       0x2D27,
+       0x2D28,
+       0x2D2D,
+       0x2D2E,
+       0xA640,
+       0xA66E,
+       0xA680,
+       0xA69C,
+       0xA722,
+       0xA730,
+       0xA732,
+       0xA770,
+       0xA779,
+       0xA788,
+       0xA78B,
+       0xA78E,
+       0xA790,
+       0xA794,
+       0xA796,
+       0xA7AE,
+       0xA7B0,
+       0xA7B8,
+       0xAB53,
+       0xAB54,
+       0xAB70,
+       0xABC0,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18,
+       0xFF21,
+       0xFF3B,
+       0xFF41,
+       0xFF5B,
+       0x10400,
+       0x10450,
+       0x10C80,
+       0x10CB3,
+       0x10CC0,
+       0x10CF3,
+       0x118A0,
+       0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for ASCII/Latin1 */
+       59,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xDF,
+       0xE0,
+       0x130,
+       0x131,
+       0x149,
+       0x14A,
+       0x1F0,
+       0x1F1,
+       0x390,
+       0x391,
+       0x3B0,
+       0x3B1,
+       0x587,
+       0x588,
+       0x1E96,
+       0x1E9B,
+       0x1E9E,
+       0x1E9F,
+       0x1F50,
+       0x1F51,
+       0x1F52,
+       0x1F53,
+       0x1F54,
+       0x1F55,
+       0x1F56,
+       0x1F57,
+       0x1F80,
+       0x1FB0,
+       0x1FB2,
+       0x1FB5,
+       0x1FB6,
+       0x1FB8,
+       0x1FBC,
+       0x1FBD,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FC8,
+       0x1FCC,
+       0x1FCD,
+       0x1FD2,
+       0x1FD4,
+       0x1FD6,
+       0x1FD8,
+       0x1FE2,
+       0x1FE5,
+       0x1FE6,
+       0x1FE8,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FF8,
+       0x1FFC,
+       0x1FFD,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for ASCII/Latin1 */
        1502,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -1844,7 +2224,7 @@ typedef enum {
        GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for ASCII/Latin1 */
        GCB_Control,
        GCB_LF,
        GCB_Control,
@@ -3351,382 +3731,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for ASCII/Latin1 */
-       45,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x2BC,
-       0x2BD,
-       0x308,
-       0x309,
-       0x313,
-       0x314,
-       0x342,
-       0x343,
-       0x3AC,
-       0x3AD,
-       0x3AE,
-       0x3AF,
-       0x3B1,
-       0x3B2,
-       0x3B7,
-       0x3B8,
-       0x3B9,
-       0x3BA,
-       0x3C1,
-       0x3C2,
-       0x3C5,
-       0x3C6,
-       0x3C9,
-       0x3CA,
-       0x3CE,
-       0x3CF,
-       0x565,
-       0x566,
-       0x574,
-       0x575,
-       0x57E,
-       0x57F,
-       0x1F00,
-       0x1F08,
-       0x1F20,
-       0x1F28,
-       0x1F60,
-       0x1F68,
-       0x1F70,
-       0x1F71,
-       0x1F74,
-       0x1F75,
-       0x1F7C,
-       0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for ASCII/Latin1 */
-       247,    /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x41,
-       0x5B,
-       0x61,
-       0x7B,
-       0xB5,
-       0xB6,
-       0xC0,
-       0xD7,
-       0xD8,
-       0xF7,
-       0xF8,
-       0x131,
-       0x132,
-       0x138,
-       0x139,
-       0x18D,
-       0x18E,
-       0x19B,
-       0x19C,
-       0x1AA,
-       0x1AC,
-       0x1BA,
-       0x1BC,
-       0x1BE,
-       0x1BF,
-       0x1C0,
-       0x1C4,
-       0x221,
-       0x222,
-       0x234,
-       0x23A,
-       0x255,
-       0x256,
-       0x258,
-       0x259,
-       0x25A,
-       0x25B,
-       0x25D,
-       0x260,
-       0x262,
-       0x263,
-       0x264,
-       0x265,
-       0x267,
-       0x268,
-       0x26A,
-       0x26B,
-       0x26D,
-       0x26F,
-       0x270,
-       0x271,
-       0x273,
-       0x275,
-       0x276,
-       0x27D,
-       0x27E,
-       0x280,
-       0x281,
-       0x283,
-       0x284,
-       0x287,
-       0x28D,
-       0x292,
-       0x293,
-       0x29D,
-       0x29F,
-       0x2BC,
-       0x2BD,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
-       0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
-       0x313,
-       0x314,
-       0x331,
-       0x332,
-       0x342,
-       0x343,
-       0x345,
-       0x346,
-       0x370,
-       0x374,
-       0x376,
-       0x378,
-       0x37B,
-       0x37E,
-       0x37F,
-       0x380,
-       0x386,
-       0x387,
-       0x388,
-       0x38B,
-       0x38C,
-       0x38D,
-       0x38E,
-       0x3A2,
-       0x3A3,
-       0x3D2,
-       0x3D5,
-       0x3F6,
-       0x3F7,
-       0x3FC,
-       0x3FD,
-       0x482,
-       0x48A,
-       0x530,
-       0x531,
-       0x557,
-       0x561,
-       0x588,
-       0x10A0,
-       0x10C6,
-       0x10C7,
-       0x10C8,
-       0x10CD,
-       0x10CE,
-       0x13A0,
-       0x13F6,
-       0x13F8,
-       0x13FE,
-       0x1D79,
-       0x1D7A,
-       0x1D7D,
-       0x1D7E,
-       0x1E00,
-       0x1E9C,
-       0x1E9E,
-       0x1E9F,
-       0x1EA0,
-       0x1F16,
-       0x1F18,
-       0x1F1E,
-       0x1F20,
-       0x1F46,
-       0x1F48,
-       0x1F4E,
-       0x1F50,
-       0x1F58,
-       0x1F59,
-       0x1F5A,
-       0x1F5B,
-       0x1F5C,
-       0x1F5D,
-       0x1F5E,
-       0x1F5F,
-       0x1F7E,
-       0x1F80,
-       0x1FB5,
-       0x1FB6,
-       0x1FBD,
-       0x1FBE,
-       0x1FBF,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FCD,
-       0x1FD0,
-       0x1FD4,
-       0x1FD6,
-       0x1FDC,
-       0x1FE0,
-       0x1FED,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FFD,
-       0x2126,
-       0x2127,
-       0x212A,
-       0x212C,
-       0x2132,
-       0x2133,
-       0x214E,
-       0x214F,
-       0x2160,
-       0x2180,
-       0x2183,
-       0x2185,
-       0x24B6,
-       0x24EA,
-       0x2C00,
-       0x2C2F,
-       0x2C30,
-       0x2C5F,
-       0x2C60,
-       0x2C71,
-       0x2C72,
-       0x2C74,
-       0x2C75,
-       0x2C77,
-       0x2C7E,
-       0x2CE4,
-       0x2CEB,
-       0x2CEF,
-       0x2CF2,
-       0x2CF4,
-       0x2D00,
-       0x2D26,
-       0x2D27,
-       0x2D28,
-       0x2D2D,
-       0x2D2E,
-       0xA640,
-       0xA66E,
-       0xA680,
-       0xA69C,
-       0xA722,
-       0xA730,
-       0xA732,
-       0xA770,
-       0xA779,
-       0xA788,
-       0xA78B,
-       0xA78E,
-       0xA790,
-       0xA794,
-       0xA796,
-       0xA7AE,
-       0xA7B0,
-       0xA7B8,
-       0xAB53,
-       0xAB54,
-       0xAB70,
-       0xABC0,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18,
-       0xFF21,
-       0xFF3B,
-       0xFF41,
-       0xFF5B,
-       0x10400,
-       0x10450,
-       0x10C80,
-       0x10CB3,
-       0x10CC0,
-       0x10CF3,
-       0x118A0,
-       0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for ASCII/Latin1 */
-       59,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0xDF,
-       0xE0,
-       0x130,
-       0x131,
-       0x149,
-       0x14A,
-       0x1F0,
-       0x1F1,
-       0x390,
-       0x391,
-       0x3B0,
-       0x3B1,
-       0x587,
-       0x588,
-       0x1E96,
-       0x1E9B,
-       0x1E9E,
-       0x1E9F,
-       0x1F50,
-       0x1F51,
-       0x1F52,
-       0x1F53,
-       0x1F54,
-       0x1F55,
-       0x1F56,
-       0x1F57,
-       0x1F80,
-       0x1FB0,
-       0x1FB2,
-       0x1FB5,
-       0x1FB6,
-       0x1FB8,
-       0x1FBC,
-       0x1FBD,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FC8,
-       0x1FCC,
-       0x1FCD,
-       0x1FD2,
-       0x1FD4,
-       0x1FD6,
-       0x1FD8,
-       0x1FE2,
-       0x1FE5,
-       0x1FE6,
-       0x1FE8,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FF8,
-       0x1FFC,
-       0x1FFD,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for ASCII/Latin1 */
@@ -6193,7 +6197,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for ASCII/Latin1 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for ASCII/Latin1 */
+static const UV _Perl_SB_invlist[] = { /* for ASCII/Latin1 */
        2896,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -9121,7 +9125,7 @@ typedef enum {
        SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */
+static const SB_enum _Perl_SB_invmap[] = { /* for ASCII/Latin1 */
        SB_Other,
        SB_Sp,
        SB_LF,
@@ -12022,37 +12026,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for ASCII/Latin1 */
-       3,      /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x80,
-       0x100
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for ASCII/Latin1 */
-       7,      /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0xA,
-       0xE,
-       0x85,
-       0x86,
-       0x2028,
-       0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for ASCII/Latin1 */
+static const UV _Perl_WB_invlist[] = { /* for ASCII/Latin1 */
        1524,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -13611,7 +13587,7 @@ typedef enum {
        WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */
+static const WB_enum _Perl_WB_invmap[] = { /* for ASCII/Latin1 */
        WB_Other,
        WB_LF,
        WB_Newline,
@@ -15140,8 +15116,36 @@ static const WB_enum Word_Break_invmap[] = { /* for ASCII/Latin1 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for ASCII/Latin1 */
+       3,      /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x80,
+       0x100
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for ASCII/Latin1 */
+       7,      /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xA,
+       0xE,
+       0x85,
+       0x86,
+       0x2028,
+       0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for ASCII/Latin1 */
        21,     /* Number of elements */
        148565664, /* Version and data structure type */
@@ -24947,7 +24951,419 @@ static const UV Cased_invlist[] = { /* for EBCDIC 1047 */
        0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC 1047 */
+#endif /* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 1047 */
+       45,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x2BC,
+       0x2BD,
+       0x308,
+       0x309,
+       0x313,
+       0x314,
+       0x342,
+       0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
+       0x3B9,
+       0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
+       0x565,
+       0x566,
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 1047 */
+       279,    /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x42,
+       0x4A,
+       0x51,
+       0x5A,
+       0x62,
+       0x6A,
+       0x70,
+       0x79,
+       0x80,
+       0x8A,
+       0x8C,
+       0x8F,
+       0x91,
+       0x9A,
+       0x9C,
+       0x9D,
+       0x9E,
+       0x9F,
+       0xA0,
+       0xA1,
+       0xA2,
+       0xAA,
+       0xAC,
+       0xAD,
+       0xAE,
+       0xAF,
+       0xBA,
+       0xBB,
+       0xC1,
+       0xCA,
+       0xCB,
+       0xD0,
+       0xD1,
+       0xDA,
+       0xDB,
+       0xE0,
+       0xE2,
+       0xEA,
+       0xEB,
+       0xF0,
+       0xFB,
+       0xFF,
+       0x100,
+       0x131,
+       0x132,
+       0x138,
+       0x139,
+       0x18D,
+       0x18E,
+       0x19B,
+       0x19C,
+       0x1AA,
+       0x1AC,
+       0x1BA,
+       0x1BC,
+       0x1BE,
+       0x1BF,
+       0x1C0,
+       0x1C4,
+       0x221,
+       0x222,
+       0x234,
+       0x23A,
+       0x255,
+       0x256,
+       0x258,
+       0x259,
+       0x25A,
+       0x25B,
+       0x25D,
+       0x260,
+       0x262,
+       0x263,
+       0x264,
+       0x265,
+       0x267,
+       0x268,
+       0x26A,
+       0x26B,
+       0x26D,
+       0x26F,
+       0x270,
+       0x271,
+       0x273,
+       0x275,
+       0x276,
+       0x27D,
+       0x27E,
+       0x280,
+       0x281,
+       0x283,
+       0x284,
+       0x287,
+       0x28D,
+       0x292,
+       0x293,
+       0x29D,
+       0x29F,
+       0x2BC,
+       0x2BD,
+       0x2BE,
+       0x2BF,
+       0x300,
+       0x302,
+       0x307,
+       0x309,
+       0x30A,
+       0x30B,
+       0x30C,
+       0x30D,
+       0x313,
+       0x314,
+       0x331,
+       0x332,
+       0x342,
+       0x343,
+       0x345,
+       0x346,
+       0x370,
+       0x374,
+       0x376,
+       0x378,
+       0x37B,
+       0x37E,
+       0x37F,
+       0x380,
+       0x386,
+       0x387,
+       0x388,
+       0x38B,
+       0x38C,
+       0x38D,
+       0x38E,
+       0x3A2,
+       0x3A3,
+       0x3D2,
+       0x3D5,
+       0x3F6,
+       0x3F7,
+       0x3FC,
+       0x3FD,
+       0x482,
+       0x48A,
+       0x530,
+       0x531,
+       0x557,
+       0x561,
+       0x588,
+       0x10A0,
+       0x10C6,
+       0x10C7,
+       0x10C8,
+       0x10CD,
+       0x10CE,
+       0x13A0,
+       0x13F6,
+       0x13F8,
+       0x13FE,
+       0x1D79,
+       0x1D7A,
+       0x1D7D,
+       0x1D7E,
+       0x1E00,
+       0x1E9C,
+       0x1E9E,
+       0x1E9F,
+       0x1EA0,
+       0x1F16,
+       0x1F18,
+       0x1F1E,
+       0x1F20,
+       0x1F46,
+       0x1F48,
+       0x1F4E,
+       0x1F50,
+       0x1F58,
+       0x1F59,
+       0x1F5A,
+       0x1F5B,
+       0x1F5C,
+       0x1F5D,
+       0x1F5E,
+       0x1F5F,
+       0x1F7E,
+       0x1F80,
+       0x1FB5,
+       0x1FB6,
+       0x1FBD,
+       0x1FBE,
+       0x1FBF,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FCD,
+       0x1FD0,
+       0x1FD4,
+       0x1FD6,
+       0x1FDC,
+       0x1FE0,
+       0x1FED,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FFD,
+       0x2126,
+       0x2127,
+       0x212A,
+       0x212C,
+       0x2132,
+       0x2133,
+       0x214E,
+       0x214F,
+       0x2160,
+       0x2180,
+       0x2183,
+       0x2185,
+       0x24B6,
+       0x24EA,
+       0x2C00,
+       0x2C2F,
+       0x2C30,
+       0x2C5F,
+       0x2C60,
+       0x2C71,
+       0x2C72,
+       0x2C74,
+       0x2C75,
+       0x2C77,
+       0x2C7E,
+       0x2CE4,
+       0x2CEB,
+       0x2CEF,
+       0x2CF2,
+       0x2CF4,
+       0x2D00,
+       0x2D26,
+       0x2D27,
+       0x2D28,
+       0x2D2D,
+       0x2D2E,
+       0xA640,
+       0xA66E,
+       0xA680,
+       0xA69C,
+       0xA722,
+       0xA730,
+       0xA732,
+       0xA770,
+       0xA779,
+       0xA788,
+       0xA78B,
+       0xA78E,
+       0xA790,
+       0xA794,
+       0xA796,
+       0xA7AE,
+       0xA7B0,
+       0xA7B8,
+       0xAB53,
+       0xAB54,
+       0xAB70,
+       0xABC0,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18,
+       0xFF21,
+       0xFF3B,
+       0xFF41,
+       0xFF5B,
+       0x10400,
+       0x10450,
+       0x10C80,
+       0x10CB3,
+       0x10CC0,
+       0x10CF3,
+       0x118A0,
+       0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 1047 */
+       59,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x59,
+       0x5A,
+       0x130,
+       0x131,
+       0x149,
+       0x14A,
+       0x1F0,
+       0x1F1,
+       0x390,
+       0x391,
+       0x3B0,
+       0x3B1,
+       0x587,
+       0x588,
+       0x1E96,
+       0x1E9B,
+       0x1E9E,
+       0x1E9F,
+       0x1F50,
+       0x1F51,
+       0x1F52,
+       0x1F53,
+       0x1F54,
+       0x1F55,
+       0x1F56,
+       0x1F57,
+       0x1F80,
+       0x1FB0,
+       0x1FB2,
+       0x1FB5,
+       0x1FB6,
+       0x1FB8,
+       0x1FBC,
+       0x1FBD,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FC8,
+       0x1FCC,
+       0x1FCD,
+       0x1FD2,
+       0x1FD4,
+       0x1FD6,
+       0x1FD8,
+       0x1FE2,
+       0x1FE5,
+       0x1FE6,
+       0x1FE8,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FF8,
+       0x1FFC,
+       0x1FFD,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC 1047 */
        1502,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -26479,7 +26895,7 @@ typedef enum {
        GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 1047 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 1047 */
        GCB_Control,
        GCB_CR,
        GCB_Control,
@@ -27986,414 +28402,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 1047 */
-       45,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x2BC,
-       0x2BD,
-       0x308,
-       0x309,
-       0x313,
-       0x314,
-       0x342,
-       0x343,
-       0x3AC,
-       0x3AD,
-       0x3AE,
-       0x3AF,
-       0x3B1,
-       0x3B2,
-       0x3B7,
-       0x3B8,
-       0x3B9,
-       0x3BA,
-       0x3C1,
-       0x3C2,
-       0x3C5,
-       0x3C6,
-       0x3C9,
-       0x3CA,
-       0x3CE,
-       0x3CF,
-       0x565,
-       0x566,
-       0x574,
-       0x575,
-       0x57E,
-       0x57F,
-       0x1F00,
-       0x1F08,
-       0x1F20,
-       0x1F28,
-       0x1F60,
-       0x1F68,
-       0x1F70,
-       0x1F71,
-       0x1F74,
-       0x1F75,
-       0x1F7C,
-       0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 1047 */
-       279,    /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x42,
-       0x4A,
-       0x51,
-       0x5A,
-       0x62,
-       0x6A,
-       0x70,
-       0x79,
-       0x80,
-       0x8A,
-       0x8C,
-       0x8F,
-       0x91,
-       0x9A,
-       0x9C,
-       0x9D,
-       0x9E,
-       0x9F,
-       0xA0,
-       0xA1,
-       0xA2,
-       0xAA,
-       0xAC,
-       0xAD,
-       0xAE,
-       0xAF,
-       0xBA,
-       0xBB,
-       0xC1,
-       0xCA,
-       0xCB,
-       0xD0,
-       0xD1,
-       0xDA,
-       0xDB,
-       0xE0,
-       0xE2,
-       0xEA,
-       0xEB,
-       0xF0,
-       0xFB,
-       0xFF,
-       0x100,
-       0x131,
-       0x132,
-       0x138,
-       0x139,
-       0x18D,
-       0x18E,
-       0x19B,
-       0x19C,
-       0x1AA,
-       0x1AC,
-       0x1BA,
-       0x1BC,
-       0x1BE,
-       0x1BF,
-       0x1C0,
-       0x1C4,
-       0x221,
-       0x222,
-       0x234,
-       0x23A,
-       0x255,
-       0x256,
-       0x258,
-       0x259,
-       0x25A,
-       0x25B,
-       0x25D,
-       0x260,
-       0x262,
-       0x263,
-       0x264,
-       0x265,
-       0x267,
-       0x268,
-       0x26A,
-       0x26B,
-       0x26D,
-       0x26F,
-       0x270,
-       0x271,
-       0x273,
-       0x275,
-       0x276,
-       0x27D,
-       0x27E,
-       0x280,
-       0x281,
-       0x283,
-       0x284,
-       0x287,
-       0x28D,
-       0x292,
-       0x293,
-       0x29D,
-       0x29F,
-       0x2BC,
-       0x2BD,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
-       0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
-       0x313,
-       0x314,
-       0x331,
-       0x332,
-       0x342,
-       0x343,
-       0x345,
-       0x346,
-       0x370,
-       0x374,
-       0x376,
-       0x378,
-       0x37B,
-       0x37E,
-       0x37F,
-       0x380,
-       0x386,
-       0x387,
-       0x388,
-       0x38B,
-       0x38C,
-       0x38D,
-       0x38E,
-       0x3A2,
-       0x3A3,
-       0x3D2,
-       0x3D5,
-       0x3F6,
-       0x3F7,
-       0x3FC,
-       0x3FD,
-       0x482,
-       0x48A,
-       0x530,
-       0x531,
-       0x557,
-       0x561,
-       0x588,
-       0x10A0,
-       0x10C6,
-       0x10C7,
-       0x10C8,
-       0x10CD,
-       0x10CE,
-       0x13A0,
-       0x13F6,
-       0x13F8,
-       0x13FE,
-       0x1D79,
-       0x1D7A,
-       0x1D7D,
-       0x1D7E,
-       0x1E00,
-       0x1E9C,
-       0x1E9E,
-       0x1E9F,
-       0x1EA0,
-       0x1F16,
-       0x1F18,
-       0x1F1E,
-       0x1F20,
-       0x1F46,
-       0x1F48,
-       0x1F4E,
-       0x1F50,
-       0x1F58,
-       0x1F59,
-       0x1F5A,
-       0x1F5B,
-       0x1F5C,
-       0x1F5D,
-       0x1F5E,
-       0x1F5F,
-       0x1F7E,
-       0x1F80,
-       0x1FB5,
-       0x1FB6,
-       0x1FBD,
-       0x1FBE,
-       0x1FBF,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FCD,
-       0x1FD0,
-       0x1FD4,
-       0x1FD6,
-       0x1FDC,
-       0x1FE0,
-       0x1FED,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FFD,
-       0x2126,
-       0x2127,
-       0x212A,
-       0x212C,
-       0x2132,
-       0x2133,
-       0x214E,
-       0x214F,
-       0x2160,
-       0x2180,
-       0x2183,
-       0x2185,
-       0x24B6,
-       0x24EA,
-       0x2C00,
-       0x2C2F,
-       0x2C30,
-       0x2C5F,
-       0x2C60,
-       0x2C71,
-       0x2C72,
-       0x2C74,
-       0x2C75,
-       0x2C77,
-       0x2C7E,
-       0x2CE4,
-       0x2CEB,
-       0x2CEF,
-       0x2CF2,
-       0x2CF4,
-       0x2D00,
-       0x2D26,
-       0x2D27,
-       0x2D28,
-       0x2D2D,
-       0x2D2E,
-       0xA640,
-       0xA66E,
-       0xA680,
-       0xA69C,
-       0xA722,
-       0xA730,
-       0xA732,
-       0xA770,
-       0xA779,
-       0xA788,
-       0xA78B,
-       0xA78E,
-       0xA790,
-       0xA794,
-       0xA796,
-       0xA7AE,
-       0xA7B0,
-       0xA7B8,
-       0xAB53,
-       0xAB54,
-       0xAB70,
-       0xABC0,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18,
-       0xFF21,
-       0xFF3B,
-       0xFF41,
-       0xFF5B,
-       0x10400,
-       0x10450,
-       0x10C80,
-       0x10CB3,
-       0x10CC0,
-       0x10CF3,
-       0x118A0,
-       0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 1047 */
-       59,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x59,
-       0x5A,
-       0x130,
-       0x131,
-       0x149,
-       0x14A,
-       0x1F0,
-       0x1F1,
-       0x390,
-       0x391,
-       0x3B0,
-       0x3B1,
-       0x587,
-       0x588,
-       0x1E96,
-       0x1E9B,
-       0x1E9E,
-       0x1E9F,
-       0x1F50,
-       0x1F51,
-       0x1F52,
-       0x1F53,
-       0x1F54,
-       0x1F55,
-       0x1F56,
-       0x1F57,
-       0x1F80,
-       0x1FB0,
-       0x1FB2,
-       0x1FB5,
-       0x1FB6,
-       0x1FB8,
-       0x1FBC,
-       0x1FBD,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FC8,
-       0x1FCC,
-       0x1FCD,
-       0x1FD2,
-       0x1FD4,
-       0x1FD6,
-       0x1FD8,
-       0x1FE2,
-       0x1FE5,
-       0x1FE6,
-       0x1FE8,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FF8,
-       0x1FFC,
-       0x1FFD,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC 1047 */
@@ -30910,7 +30918,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC 1047 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC 1047 */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC 1047 */
        2920,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -33862,7 +33870,7 @@ typedef enum {
        SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 1047 */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 1047 */
        SB_Other,
        SB_Sp,
        SB_Other,
@@ -36787,91 +36795,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC 1047 */
-       55,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x4,
-       0x5,
-       0x6,
-       0x7,
-       0x8,
-       0xB,
-       0x14,
-       0x15,
-       0x17,
-       0x18,
-       0x1A,
-       0x1C,
-       0x20,
-       0x26,
-       0x28,
-       0x2D,
-       0x30,
-       0x32,
-       0x33,
-       0x37,
-       0x38,
-       0x3C,
-       0x3E,
-       0x3F,
-       0x41,
-       0x4B,
-       0x51,
-       0x5A,
-       0x62,
-       0x6B,
-       0x70,
-       0x79,
-       0x80,
-       0x81,
-       0x8A,
-       0x91,
-       0x9A,
-       0xA1,
-       0xAA,
-       0xAD,
-       0xAE,
-       0xBD,
-       0xBE,
-       0xC0,
-       0xCA,
-       0xD0,
-       0xDA,
-       0xE0,
-       0xE1,
-       0xE2,
-       0xEA,
-       0xF0,
-       0xFA,
-       0x100
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC 1047 */
-       9,      /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0xB,
-       0xE,
-       0x15,
-       0x16,
-       0x25,
-       0x26,
-       0x2028,
-       0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC 1047 */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC 1047 */
        1549,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -38455,7 +38381,7 @@ typedef enum {
        WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 1047 */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 1047 */
        WB_Other,
        WB_Newline,
        WB_CR,
@@ -40009,8 +39935,90 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 1047 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC 1047 */
+       55,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x4,
+       0x5,
+       0x6,
+       0x7,
+       0x8,
+       0xB,
+       0x14,
+       0x15,
+       0x17,
+       0x18,
+       0x1A,
+       0x1C,
+       0x20,
+       0x26,
+       0x28,
+       0x2D,
+       0x30,
+       0x32,
+       0x33,
+       0x37,
+       0x38,
+       0x3C,
+       0x3E,
+       0x3F,
+       0x41,
+       0x4B,
+       0x51,
+       0x5A,
+       0x62,
+       0x6B,
+       0x70,
+       0x79,
+       0x80,
+       0x81,
+       0x8A,
+       0x91,
+       0x9A,
+       0xA1,
+       0xAA,
+       0xAD,
+       0xAE,
+       0xBD,
+       0xBE,
+       0xC0,
+       0xCA,
+       0xD0,
+       0xDA,
+       0xE0,
+       0xE1,
+       0xE2,
+       0xEA,
+       0xF0,
+       0xFA,
+       0x100
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC 1047 */
+       9,      /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xB,
+       0xE,
+       0x15,
+       0x16,
+       0x25,
+       0x26,
+       0x2028,
+       0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC 1047 */
        23,     /* Number of elements */
        148565664, /* Version and data structure type */
@@ -49924,174 +49932,582 @@ static const UV Cased_invlist[] = { /* for EBCDIC 037 */
        0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC 037 */
-       1502,   /* Number of elements */
+#endif /* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 037 */
+       45,     /* Number of elements */
        148565664, /* Version and data structure type */
-       0,      /* 0 if the list starts at 0;
+       1,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
        0x0,
-       0xD,
-       0xE,
-       0x25,
-       0x26,
-       0x40,
+       0x2BC,
+       0x2BD,
+       0x308,
+       0x309,
+       0x313,
+       0x314,
+       0x342,
+       0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
+       0x3B9,
+       0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
+       0x565,
+       0x566,
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 037 */
+       275,    /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x42,
+       0x4A,
+       0x51,
+       0x5A,
+       0x62,
+       0x6A,
+       0x70,
+       0x79,
+       0x80,
+       0x8A,
+       0x8C,
+       0x8F,
+       0x91,
+       0x9A,
+       0x9C,
+       0x9D,
+       0x9E,
+       0x9F,
+       0xA0,
+       0xA1,
+       0xA2,
+       0xAA,
+       0xAC,
+       0xAF,
+       0xC1,
        0xCA,
        0xCB,
+       0xD0,
+       0xD1,
+       0xDA,
+       0xDB,
+       0xE0,
+       0xE2,
+       0xEA,
+       0xEB,
+       0xF0,
+       0xFB,
        0xFF,
        0x100,
+       0x131,
+       0x132,
+       0x138,
+       0x139,
+       0x18D,
+       0x18E,
+       0x19B,
+       0x19C,
+       0x1AA,
+       0x1AC,
+       0x1BA,
+       0x1BC,
+       0x1BE,
+       0x1BF,
+       0x1C0,
+       0x1C4,
+       0x221,
+       0x222,
+       0x234,
+       0x23A,
+       0x255,
+       0x256,
+       0x258,
+       0x259,
+       0x25A,
+       0x25B,
+       0x25D,
+       0x260,
+       0x262,
+       0x263,
+       0x264,
+       0x265,
+       0x267,
+       0x268,
+       0x26A,
+       0x26B,
+       0x26D,
+       0x26F,
+       0x270,
+       0x271,
+       0x273,
+       0x275,
+       0x276,
+       0x27D,
+       0x27E,
+       0x280,
+       0x281,
+       0x283,
+       0x284,
+       0x287,
+       0x28D,
+       0x292,
+       0x293,
+       0x29D,
+       0x29F,
+       0x2BC,
+       0x2BD,
+       0x2BE,
+       0x2BF,
        0x300,
+       0x302,
+       0x307,
+       0x309,
+       0x30A,
+       0x30B,
+       0x30C,
+       0x30D,
+       0x313,
+       0x314,
+       0x331,
+       0x332,
+       0x342,
+       0x343,
+       0x345,
+       0x346,
        0x370,
-       0x483,
+       0x374,
+       0x376,
+       0x378,
+       0x37B,
+       0x37E,
+       0x37F,
+       0x380,
+       0x386,
+       0x387,
+       0x388,
+       0x38B,
+       0x38C,
+       0x38D,
+       0x38E,
+       0x3A2,
+       0x3A3,
+       0x3D2,
+       0x3D5,
+       0x3F6,
+       0x3F7,
+       0x3FC,
+       0x3FD,
+       0x482,
        0x48A,
-       0x591,
-       0x5BE,
-       0x5BF,
-       0x5C0,
-       0x5C1,
-       0x5C3,
-       0x5C4,
-       0x5C6,
-       0x5C7,
-       0x5C8,
-       0x600,
-       0x606,
-       0x610,
-       0x61B,
-       0x61C,
-       0x61D,
-       0x64B,
-       0x660,
-       0x670,
-       0x671,
-       0x6D6,
-       0x6DD,
-       0x6DE,
-       0x6DF,
-       0x6E5,
-       0x6E7,
-       0x6E9,
-       0x6EA,
-       0x6EE,
-       0x70F,
-       0x710,
-       0x711,
-       0x712,
-       0x730,
-       0x74B,
-       0x7A6,
-       0x7B1,
-       0x7EB,
-       0x7F4,
-       0x816,
-       0x81A,
-       0x81B,
-       0x824,
-       0x825,
-       0x828,
-       0x829,
-       0x82E,
-       0x859,
-       0x85C,
-       0x8E3,
-       0x903,
-       0x904,
-       0x93A,
-       0x93B,
-       0x93C,
-       0x93D,
-       0x93E,
-       0x941,
-       0x949,
-       0x94D,
-       0x94E,
-       0x950,
-       0x951,
-       0x958,
-       0x962,
-       0x964,
-       0x981,
-       0x982,
-       0x984,
-       0x9BC,
-       0x9BD,
-       0x9BE,
-       0x9BF,
-       0x9C1,
-       0x9C5,
-       0x9C7,
-       0x9C9,
-       0x9CB,
-       0x9CD,
-       0x9CE,
-       0x9D7,
-       0x9D8,
-       0x9E2,
-       0x9E4,
-       0xA01,
-       0xA03,
-       0xA04,
-       0xA3C,
-       0xA3D,
-       0xA3E,
-       0xA41,
-       0xA43,
-       0xA47,
-       0xA49,
-       0xA4B,
-       0xA4E,
-       0xA51,
-       0xA52,
-       0xA70,
-       0xA72,
-       0xA75,
-       0xA76,
-       0xA81,
-       0xA83,
-       0xA84,
-       0xABC,
-       0xABD,
-       0xABE,
-       0xAC1,
-       0xAC6,
-       0xAC7,
-       0xAC9,
-       0xACA,
-       0xACB,
-       0xACD,
-       0xACE,
-       0xAE2,
-       0xAE4,
-       0xB01,
-       0xB02,
-       0xB04,
-       0xB3C,
-       0xB3D,
-       0xB3E,
-       0xB40,
-       0xB41,
-       0xB45,
-       0xB47,
-       0xB49,
-       0xB4B,
-       0xB4D,
-       0xB4E,
-       0xB56,
-       0xB58,
-       0xB62,
-       0xB64,
-       0xB82,
-       0xB83,
-       0xBBE,
-       0xBBF,
-       0xBC0,
-       0xBC1,
-       0xBC3,
-       0xBC6,
-       0xBC9,
-       0xBCA,
-       0xBCD,
-       0xBCE,
-       0xBD7,
+       0x530,
+       0x531,
+       0x557,
+       0x561,
+       0x588,
+       0x10A0,
+       0x10C6,
+       0x10C7,
+       0x10C8,
+       0x10CD,
+       0x10CE,
+       0x13A0,
+       0x13F6,
+       0x13F8,
+       0x13FE,
+       0x1D79,
+       0x1D7A,
+       0x1D7D,
+       0x1D7E,
+       0x1E00,
+       0x1E9C,
+       0x1E9E,
+       0x1E9F,
+       0x1EA0,
+       0x1F16,
+       0x1F18,
+       0x1F1E,
+       0x1F20,
+       0x1F46,
+       0x1F48,
+       0x1F4E,
+       0x1F50,
+       0x1F58,
+       0x1F59,
+       0x1F5A,
+       0x1F5B,
+       0x1F5C,
+       0x1F5D,
+       0x1F5E,
+       0x1F5F,
+       0x1F7E,
+       0x1F80,
+       0x1FB5,
+       0x1FB6,
+       0x1FBD,
+       0x1FBE,
+       0x1FBF,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FCD,
+       0x1FD0,
+       0x1FD4,
+       0x1FD6,
+       0x1FDC,
+       0x1FE0,
+       0x1FED,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FFD,
+       0x2126,
+       0x2127,
+       0x212A,
+       0x212C,
+       0x2132,
+       0x2133,
+       0x214E,
+       0x214F,
+       0x2160,
+       0x2180,
+       0x2183,
+       0x2185,
+       0x24B6,
+       0x24EA,
+       0x2C00,
+       0x2C2F,
+       0x2C30,
+       0x2C5F,
+       0x2C60,
+       0x2C71,
+       0x2C72,
+       0x2C74,
+       0x2C75,
+       0x2C77,
+       0x2C7E,
+       0x2CE4,
+       0x2CEB,
+       0x2CEF,
+       0x2CF2,
+       0x2CF4,
+       0x2D00,
+       0x2D26,
+       0x2D27,
+       0x2D28,
+       0x2D2D,
+       0x2D2E,
+       0xA640,
+       0xA66E,
+       0xA680,
+       0xA69C,
+       0xA722,
+       0xA730,
+       0xA732,
+       0xA770,
+       0xA779,
+       0xA788,
+       0xA78B,
+       0xA78E,
+       0xA790,
+       0xA794,
+       0xA796,
+       0xA7AE,
+       0xA7B0,
+       0xA7B8,
+       0xAB53,
+       0xAB54,
+       0xAB70,
+       0xABC0,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18,
+       0xFF21,
+       0xFF3B,
+       0xFF41,
+       0xFF5B,
+       0x10400,
+       0x10450,
+       0x10C80,
+       0x10CB3,
+       0x10CC0,
+       0x10CF3,
+       0x118A0,
+       0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 037 */
+       59,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x59,
+       0x5A,
+       0x130,
+       0x131,
+       0x149,
+       0x14A,
+       0x1F0,
+       0x1F1,
+       0x390,
+       0x391,
+       0x3B0,
+       0x3B1,
+       0x587,
+       0x588,
+       0x1E96,
+       0x1E9B,
+       0x1E9E,
+       0x1E9F,
+       0x1F50,
+       0x1F51,
+       0x1F52,
+       0x1F53,
+       0x1F54,
+       0x1F55,
+       0x1F56,
+       0x1F57,
+       0x1F80,
+       0x1FB0,
+       0x1FB2,
+       0x1FB5,
+       0x1FB6,
+       0x1FB8,
+       0x1FBC,
+       0x1FBD,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FC8,
+       0x1FCC,
+       0x1FCD,
+       0x1FD2,
+       0x1FD4,
+       0x1FD6,
+       0x1FD8,
+       0x1FE2,
+       0x1FE5,
+       0x1FE6,
+       0x1FE8,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FF8,
+       0x1FFC,
+       0x1FFD,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC 037 */
+       1502,   /* Number of elements */
+       148565664, /* Version and data structure type */
+       0,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xD,
+       0xE,
+       0x25,
+       0x26,
+       0x40,
+       0xCA,
+       0xCB,
+       0xFF,
+       0x100,
+       0x300,
+       0x370,
+       0x483,
+       0x48A,
+       0x591,
+       0x5BE,
+       0x5BF,
+       0x5C0,
+       0x5C1,
+       0x5C3,
+       0x5C4,
+       0x5C6,
+       0x5C7,
+       0x5C8,
+       0x600,
+       0x606,
+       0x610,
+       0x61B,
+       0x61C,
+       0x61D,
+       0x64B,
+       0x660,
+       0x670,
+       0x671,
+       0x6D6,
+       0x6DD,
+       0x6DE,
+       0x6DF,
+       0x6E5,
+       0x6E7,
+       0x6E9,
+       0x6EA,
+       0x6EE,
+       0x70F,
+       0x710,
+       0x711,
+       0x712,
+       0x730,
+       0x74B,
+       0x7A6,
+       0x7B1,
+       0x7EB,
+       0x7F4,
+       0x816,
+       0x81A,
+       0x81B,
+       0x824,
+       0x825,
+       0x828,
+       0x829,
+       0x82E,
+       0x859,
+       0x85C,
+       0x8E3,
+       0x903,
+       0x904,
+       0x93A,
+       0x93B,
+       0x93C,
+       0x93D,
+       0x93E,
+       0x941,
+       0x949,
+       0x94D,
+       0x94E,
+       0x950,
+       0x951,
+       0x958,
+       0x962,
+       0x964,
+       0x981,
+       0x982,
+       0x984,
+       0x9BC,
+       0x9BD,
+       0x9BE,
+       0x9BF,
+       0x9C1,
+       0x9C5,
+       0x9C7,
+       0x9C9,
+       0x9CB,
+       0x9CD,
+       0x9CE,
+       0x9D7,
+       0x9D8,
+       0x9E2,
+       0x9E4,
+       0xA01,
+       0xA03,
+       0xA04,
+       0xA3C,
+       0xA3D,
+       0xA3E,
+       0xA41,
+       0xA43,
+       0xA47,
+       0xA49,
+       0xA4B,
+       0xA4E,
+       0xA51,
+       0xA52,
+       0xA70,
+       0xA72,
+       0xA75,
+       0xA76,
+       0xA81,
+       0xA83,
+       0xA84,
+       0xABC,
+       0xABD,
+       0xABE,
+       0xAC1,
+       0xAC6,
+       0xAC7,
+       0xAC9,
+       0xACA,
+       0xACB,
+       0xACD,
+       0xACE,
+       0xAE2,
+       0xAE4,
+       0xB01,
+       0xB02,
+       0xB04,
+       0xB3C,
+       0xB3D,
+       0xB3E,
+       0xB40,
+       0xB41,
+       0xB45,
+       0xB47,
+       0xB49,
+       0xB4B,
+       0xB4D,
+       0xB4E,
+       0xB56,
+       0xB58,
+       0xB62,
+       0xB64,
+       0xB82,
+       0xB83,
+       0xBBE,
+       0xBBF,
+       0xBC0,
+       0xBC1,
+       0xBC3,
+       0xBC6,
+       0xBC9,
+       0xBCA,
+       0xBCD,
+       0xBCE,
+       0xBD7,
        0xBD8,
        0xC00,
        0xC01,
@@ -51456,7 +51872,7 @@ typedef enum {
        GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 037 */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */
        GCB_Control,
        GCB_CR,
        GCB_Control,
@@ -52963,410 +53379,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC 037 */
-       45,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x2BC,
-       0x2BD,
-       0x308,
-       0x309,
-       0x313,
-       0x314,
-       0x342,
-       0x343,
-       0x3AC,
-       0x3AD,
-       0x3AE,
-       0x3AF,
-       0x3B1,
-       0x3B2,
-       0x3B7,
-       0x3B8,
-       0x3B9,
-       0x3BA,
-       0x3C1,
-       0x3C2,
-       0x3C5,
-       0x3C6,
-       0x3C9,
-       0x3CA,
-       0x3CE,
-       0x3CF,
-       0x565,
-       0x566,
-       0x574,
-       0x575,
-       0x57E,
-       0x57F,
-       0x1F00,
-       0x1F08,
-       0x1F20,
-       0x1F28,
-       0x1F60,
-       0x1F68,
-       0x1F70,
-       0x1F71,
-       0x1F74,
-       0x1F75,
-       0x1F7C,
-       0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC 037 */
-       275,    /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x42,
-       0x4A,
-       0x51,
-       0x5A,
-       0x62,
-       0x6A,
-       0x70,
-       0x79,
-       0x80,
-       0x8A,
-       0x8C,
-       0x8F,
-       0x91,
-       0x9A,
-       0x9C,
-       0x9D,
-       0x9E,
-       0x9F,
-       0xA0,
-       0xA1,
-       0xA2,
-       0xAA,
-       0xAC,
-       0xAF,
-       0xC1,
-       0xCA,
-       0xCB,
-       0xD0,
-       0xD1,
-       0xDA,
-       0xDB,
-       0xE0,
-       0xE2,
-       0xEA,
-       0xEB,
-       0xF0,
-       0xFB,
-       0xFF,
-       0x100,
-       0x131,
-       0x132,
-       0x138,
-       0x139,
-       0x18D,
-       0x18E,
-       0x19B,
-       0x19C,
-       0x1AA,
-       0x1AC,
-       0x1BA,
-       0x1BC,
-       0x1BE,
-       0x1BF,
-       0x1C0,
-       0x1C4,
-       0x221,
-       0x222,
-       0x234,
-       0x23A,
-       0x255,
-       0x256,
-       0x258,
-       0x259,
-       0x25A,
-       0x25B,
-       0x25D,
-       0x260,
-       0x262,
-       0x263,
-       0x264,
-       0x265,
-       0x267,
-       0x268,
-       0x26A,
-       0x26B,
-       0x26D,
-       0x26F,
-       0x270,
-       0x271,
-       0x273,
-       0x275,
-       0x276,
-       0x27D,
-       0x27E,
-       0x280,
-       0x281,
-       0x283,
-       0x284,
-       0x287,
-       0x28D,
-       0x292,
-       0x293,
-       0x29D,
-       0x29F,
-       0x2BC,
-       0x2BD,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
-       0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
-       0x313,
-       0x314,
-       0x331,
-       0x332,
-       0x342,
-       0x343,
-       0x345,
-       0x346,
-       0x370,
-       0x374,
-       0x376,
-       0x378,
-       0x37B,
-       0x37E,
-       0x37F,
-       0x380,
-       0x386,
-       0x387,
-       0x388,
-       0x38B,
-       0x38C,
-       0x38D,
-       0x38E,
-       0x3A2,
-       0x3A3,
-       0x3D2,
-       0x3D5,
-       0x3F6,
-       0x3F7,
-       0x3FC,
-       0x3FD,
-       0x482,
-       0x48A,
-       0x530,
-       0x531,
-       0x557,
-       0x561,
-       0x588,
-       0x10A0,
-       0x10C6,
-       0x10C7,
-       0x10C8,
-       0x10CD,
-       0x10CE,
-       0x13A0,
-       0x13F6,
-       0x13F8,
-       0x13FE,
-       0x1D79,
-       0x1D7A,
-       0x1D7D,
-       0x1D7E,
-       0x1E00,
-       0x1E9C,
-       0x1E9E,
-       0x1E9F,
-       0x1EA0,
-       0x1F16,
-       0x1F18,
-       0x1F1E,
-       0x1F20,
-       0x1F46,
-       0x1F48,
-       0x1F4E,
-       0x1F50,
-       0x1F58,
-       0x1F59,
-       0x1F5A,
-       0x1F5B,
-       0x1F5C,
-       0x1F5D,
-       0x1F5E,
-       0x1F5F,
-       0x1F7E,
-       0x1F80,
-       0x1FB5,
-       0x1FB6,
-       0x1FBD,
-       0x1FBE,
-       0x1FBF,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FCD,
-       0x1FD0,
-       0x1FD4,
-       0x1FD6,
-       0x1FDC,
-       0x1FE0,
-       0x1FED,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FFD,
-       0x2126,
-       0x2127,
-       0x212A,
-       0x212C,
-       0x2132,
-       0x2133,
-       0x214E,
-       0x214F,
-       0x2160,
-       0x2180,
-       0x2183,
-       0x2185,
-       0x24B6,
-       0x24EA,
-       0x2C00,
-       0x2C2F,
-       0x2C30,
-       0x2C5F,
-       0x2C60,
-       0x2C71,
-       0x2C72,
-       0x2C74,
-       0x2C75,
-       0x2C77,
-       0x2C7E,
-       0x2CE4,
-       0x2CEB,
-       0x2CEF,
-       0x2CF2,
-       0x2CF4,
-       0x2D00,
-       0x2D26,
-       0x2D27,
-       0x2D28,
-       0x2D2D,
-       0x2D2E,
-       0xA640,
-       0xA66E,
-       0xA680,
-       0xA69C,
-       0xA722,
-       0xA730,
-       0xA732,
-       0xA770,
-       0xA779,
-       0xA788,
-       0xA78B,
-       0xA78E,
-       0xA790,
-       0xA794,
-       0xA796,
-       0xA7AE,
-       0xA7B0,
-       0xA7B8,
-       0xAB53,
-       0xAB54,
-       0xAB70,
-       0xABC0,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18,
-       0xFF21,
-       0xFF3B,
-       0xFF41,
-       0xFF5B,
-       0x10400,
-       0x10450,
-       0x10C80,
-       0x10CB3,
-       0x10CC0,
-       0x10CF3,
-       0x118A0,
-       0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC 037 */
-       59,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x59,
-       0x5A,
-       0x130,
-       0x131,
-       0x149,
-       0x14A,
-       0x1F0,
-       0x1F1,
-       0x390,
-       0x391,
-       0x3B0,
-       0x3B1,
-       0x587,
-       0x588,
-       0x1E96,
-       0x1E9B,
-       0x1E9E,
-       0x1E9F,
-       0x1F50,
-       0x1F51,
-       0x1F52,
-       0x1F53,
-       0x1F54,
-       0x1F55,
-       0x1F56,
-       0x1F57,
-       0x1F80,
-       0x1FB0,
-       0x1FB2,
-       0x1FB5,
-       0x1FB6,
-       0x1FB8,
-       0x1FBC,
-       0x1FBD,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FC8,
-       0x1FCC,
-       0x1FCD,
-       0x1FD2,
-       0x1FD4,
-       0x1FD6,
-       0x1FD8,
-       0x1FE2,
-       0x1FE5,
-       0x1FE6,
-       0x1FE8,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FF8,
-       0x1FFC,
-       0x1FFD,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC 037 */
@@ -55875,7 +55887,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC 037 */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC 037 */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC 037 */
        2916,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -58823,7 +58835,7 @@ typedef enum {
        SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 037 */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 037 */
        SB_Other,
        SB_Sp,
        SB_Other,
@@ -61744,93 +61756,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC 037 */
-       57,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x4,
-       0x5,
-       0x6,
-       0x7,
-       0x8,
-       0xB,
-       0x14,
-       0x16,
-       0x17,
-       0x18,
-       0x1A,
-       0x1C,
-       0x20,
-       0x25,
-       0x28,
-       0x2D,
-       0x30,
-       0x32,
-       0x33,
-       0x37,
-       0x38,
-       0x3C,
-       0x3E,
-       0x3F,
-       0x41,
-       0x4B,
-       0x51,
-       0x5A,
-       0x5F,
-       0x60,
-       0x62,
-       0x6B,
-       0x70,
-       0x79,
-       0x80,
-       0x81,
-       0x8A,
-       0x91,
-       0x9A,
-       0xA1,
-       0xAA,
-       0xB0,
-       0xB1,
-       0xBA,
-       0xBC,
-       0xC0,
-       0xCA,
-       0xD0,
-       0xDA,
-       0xE0,
-       0xE1,
-       0xE2,
-       0xEA,
-       0xF0,
-       0xFA,
-       0x100
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC 037 */
-       9,      /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0xB,
-       0xE,
-       0x15,
-       0x16,
-       0x25,
-       0x26,
-       0x2028,
-       0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC 037 */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC 037 */
        1545,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -63410,7 +63338,7 @@ typedef enum {
        WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 037 */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 037 */
        WB_Other,
        WB_Newline,
        WB_CR,
@@ -64960,8 +64888,92 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC 037 */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC 037 */
+       57,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x4,
+       0x5,
+       0x6,
+       0x7,
+       0x8,
+       0xB,
+       0x14,
+       0x16,
+       0x17,
+       0x18,
+       0x1A,
+       0x1C,
+       0x20,
+       0x25,
+       0x28,
+       0x2D,
+       0x30,
+       0x32,
+       0x33,
+       0x37,
+       0x38,
+       0x3C,
+       0x3E,
+       0x3F,
+       0x41,
+       0x4B,
+       0x51,
+       0x5A,
+       0x5F,
+       0x60,
+       0x62,
+       0x6B,
+       0x70,
+       0x79,
+       0x80,
+       0x81,
+       0x8A,
+       0x91,
+       0x9A,
+       0xA1,
+       0xAA,
+       0xB0,
+       0xB1,
+       0xBA,
+       0xBC,
+       0xC0,
+       0xCA,
+       0xD0,
+       0xDA,
+       0xE0,
+       0xE1,
+       0xE2,
+       0xEA,
+       0xF0,
+       0xFA,
+       0x100
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC 037 */
+       9,      /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xB,
+       0xE,
+       0x15,
+       0x16,
+       0x25,
+       0x26,
+       0x2028,
+       0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC 037 */
        23,     /* Number of elements */
        148565664, /* Version and data structure type */
@@ -74865,164 +74877,574 @@ static const UV Cased_invlist[] = { /* for EBCDIC POSIX-BC */
        0x1F18A
 };
 
-static const UV Grapheme_Cluster_Break_invlist[] = { /* for EBCDIC POSIX-BC */
-       1502,   /* Number of elements */
+#endif /* defined(PERL_IN_PERL_C) */
+
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
+       45,     /* Number of elements */
        148565664, /* Version and data structure type */
-       0,      /* 0 if the list starts at 0;
+       1,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
        0x0,
-       0xD,
-       0xE,
-       0x15,
-       0x16,
-       0x40,
-       0x5F,
-       0x60,
+       0x2BC,
+       0x2BD,
+       0x308,
+       0x309,
+       0x313,
+       0x314,
+       0x342,
+       0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
+       0x3B9,
+       0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
+       0x565,
+       0x566,
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
+};
+
+static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
+       277,    /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x42,
+       0x4A,
+       0x51,
+       0x5A,
+       0x62,
+       0x6A,
+       0x70,
+       0x79,
+       0x80,
+       0x8A,
+       0x8C,
+       0x8F,
+       0x91,
+       0x9A,
+       0x9C,
+       0x9D,
+       0x9E,
+       0x9F,
+       0xA0,
+       0xA1,
+       0xA2,
+       0xAA,
+       0xAC,
+       0xAF,
+       0xC0,
        0xCA,
        0xCB,
+       0xD0,
+       0xD1,
+       0xDA,
+       0xDB,
+       0xE1,
+       0xE2,
+       0xEA,
+       0xEB,
+       0xF0,
+       0xFC,
+       0xFD,
+       0xFE,
+       0xFF,
+       0x100,
+       0x131,
+       0x132,
+       0x138,
+       0x139,
+       0x18D,
+       0x18E,
+       0x19B,
+       0x19C,
+       0x1AA,
+       0x1AC,
+       0x1BA,
+       0x1BC,
+       0x1BE,
+       0x1BF,
+       0x1C0,
+       0x1C4,
+       0x221,
+       0x222,
+       0x234,
+       0x23A,
+       0x255,
+       0x256,
+       0x258,
+       0x259,
+       0x25A,
+       0x25B,
+       0x25D,
+       0x260,
+       0x262,
+       0x263,
+       0x264,
+       0x265,
+       0x267,
+       0x268,
+       0x26A,
+       0x26B,
+       0x26D,
+       0x26F,
+       0x270,
+       0x271,
+       0x273,
+       0x275,
+       0x276,
+       0x27D,
+       0x27E,
+       0x280,
+       0x281,
+       0x283,
+       0x284,
+       0x287,
+       0x28D,
+       0x292,
+       0x293,
+       0x29D,
+       0x29F,
+       0x2BC,
+       0x2BD,
+       0x2BE,
+       0x2BF,
        0x300,
+       0x302,
+       0x307,
+       0x309,
+       0x30A,
+       0x30B,
+       0x30C,
+       0x30D,
+       0x313,
+       0x314,
+       0x331,
+       0x332,
+       0x342,
+       0x343,
+       0x345,
+       0x346,
        0x370,
-       0x483,
+       0x374,
+       0x376,
+       0x378,
+       0x37B,
+       0x37E,
+       0x37F,
+       0x380,
+       0x386,
+       0x387,
+       0x388,
+       0x38B,
+       0x38C,
+       0x38D,
+       0x38E,
+       0x3A2,
+       0x3A3,
+       0x3D2,
+       0x3D5,
+       0x3F6,
+       0x3F7,
+       0x3FC,
+       0x3FD,
+       0x482,
        0x48A,
-       0x591,
-       0x5BE,
-       0x5BF,
-       0x5C0,
-       0x5C1,
-       0x5C3,
-       0x5C4,
-       0x5C6,
-       0x5C7,
-       0x5C8,
-       0x600,
-       0x606,
-       0x610,
-       0x61B,
-       0x61C,
-       0x61D,
-       0x64B,
-       0x660,
-       0x670,
-       0x671,
-       0x6D6,
-       0x6DD,
-       0x6DE,
-       0x6DF,
-       0x6E5,
-       0x6E7,
-       0x6E9,
-       0x6EA,
-       0x6EE,
-       0x70F,
-       0x710,
-       0x711,
-       0x712,
-       0x730,
-       0x74B,
-       0x7A6,
-       0x7B1,
-       0x7EB,
-       0x7F4,
-       0x816,
-       0x81A,
-       0x81B,
-       0x824,
-       0x825,
-       0x828,
-       0x829,
-       0x82E,
-       0x859,
-       0x85C,
-       0x8E3,
-       0x903,
-       0x904,
-       0x93A,
-       0x93B,
-       0x93C,
-       0x93D,
-       0x93E,
-       0x941,
-       0x949,
-       0x94D,
-       0x94E,
-       0x950,
-       0x951,
-       0x958,
-       0x962,
-       0x964,
-       0x981,
-       0x982,
-       0x984,
-       0x9BC,
-       0x9BD,
-       0x9BE,
-       0x9BF,
-       0x9C1,
-       0x9C5,
-       0x9C7,
-       0x9C9,
-       0x9CB,
-       0x9CD,
-       0x9CE,
-       0x9D7,
-       0x9D8,
-       0x9E2,
-       0x9E4,
-       0xA01,
-       0xA03,
-       0xA04,
-       0xA3C,
-       0xA3D,
-       0xA3E,
-       0xA41,
-       0xA43,
-       0xA47,
-       0xA49,
-       0xA4B,
-       0xA4E,
-       0xA51,
-       0xA52,
-       0xA70,
-       0xA72,
-       0xA75,
-       0xA76,
-       0xA81,
-       0xA83,
-       0xA84,
-       0xABC,
-       0xABD,
-       0xABE,
-       0xAC1,
-       0xAC6,
-       0xAC7,
-       0xAC9,
-       0xACA,
-       0xACB,
-       0xACD,
-       0xACE,
-       0xAE2,
-       0xAE4,
-       0xB01,
-       0xB02,
-       0xB04,
-       0xB3C,
-       0xB3D,
-       0xB3E,
-       0xB40,
-       0xB41,
-       0xB45,
-       0xB47,
-       0xB49,
-       0xB4B,
-       0xB4D,
-       0xB4E,
-       0xB56,
-       0xB58,
-       0xB62,
-       0xB64,
-       0xB82,
-       0xB83,
-       0xBBE,
+       0x530,
+       0x531,
+       0x557,
+       0x561,
+       0x588,
+       0x10A0,
+       0x10C6,
+       0x10C7,
+       0x10C8,
+       0x10CD,
+       0x10CE,
+       0x13A0,
+       0x13F6,
+       0x13F8,
+       0x13FE,
+       0x1D79,
+       0x1D7A,
+       0x1D7D,
+       0x1D7E,
+       0x1E00,
+       0x1E9C,
+       0x1E9E,
+       0x1E9F,
+       0x1EA0,
+       0x1F16,
+       0x1F18,
+       0x1F1E,
+       0x1F20,
+       0x1F46,
+       0x1F48,
+       0x1F4E,
+       0x1F50,
+       0x1F58,
+       0x1F59,
+       0x1F5A,
+       0x1F5B,
+       0x1F5C,
+       0x1F5D,
+       0x1F5E,
+       0x1F5F,
+       0x1F7E,
+       0x1F80,
+       0x1FB5,
+       0x1FB6,
+       0x1FBD,
+       0x1FBE,
+       0x1FBF,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FCD,
+       0x1FD0,
+       0x1FD4,
+       0x1FD6,
+       0x1FDC,
+       0x1FE0,
+       0x1FED,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FFD,
+       0x2126,
+       0x2127,
+       0x212A,
+       0x212C,
+       0x2132,
+       0x2133,
+       0x214E,
+       0x214F,
+       0x2160,
+       0x2180,
+       0x2183,
+       0x2185,
+       0x24B6,
+       0x24EA,
+       0x2C00,
+       0x2C2F,
+       0x2C30,
+       0x2C5F,
+       0x2C60,
+       0x2C71,
+       0x2C72,
+       0x2C74,
+       0x2C75,
+       0x2C77,
+       0x2C7E,
+       0x2CE4,
+       0x2CEB,
+       0x2CEF,
+       0x2CF2,
+       0x2CF4,
+       0x2D00,
+       0x2D26,
+       0x2D27,
+       0x2D28,
+       0x2D2D,
+       0x2D2E,
+       0xA640,
+       0xA66E,
+       0xA680,
+       0xA69C,
+       0xA722,
+       0xA730,
+       0xA732,
+       0xA770,
+       0xA779,
+       0xA788,
+       0xA78B,
+       0xA78E,
+       0xA790,
+       0xA794,
+       0xA796,
+       0xA7AE,
+       0xA7B0,
+       0xA7B8,
+       0xAB53,
+       0xAB54,
+       0xAB70,
+       0xABC0,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18,
+       0xFF21,
+       0xFF3B,
+       0xFF41,
+       0xFF5B,
+       0x10400,
+       0x10450,
+       0x10C80,
+       0x10CB3,
+       0x10CC0,
+       0x10CF3,
+       0x118A0,
+       0x118E0
+};
+
+static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC POSIX-BC */
+       59,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x59,
+       0x5A,
+       0x130,
+       0x131,
+       0x149,
+       0x14A,
+       0x1F0,
+       0x1F1,
+       0x390,
+       0x391,
+       0x3B0,
+       0x3B1,
+       0x587,
+       0x588,
+       0x1E96,
+       0x1E9B,
+       0x1E9E,
+       0x1E9F,
+       0x1F50,
+       0x1F51,
+       0x1F52,
+       0x1F53,
+       0x1F54,
+       0x1F55,
+       0x1F56,
+       0x1F57,
+       0x1F80,
+       0x1FB0,
+       0x1FB2,
+       0x1FB5,
+       0x1FB6,
+       0x1FB8,
+       0x1FBC,
+       0x1FBD,
+       0x1FC2,
+       0x1FC5,
+       0x1FC6,
+       0x1FC8,
+       0x1FCC,
+       0x1FCD,
+       0x1FD2,
+       0x1FD4,
+       0x1FD6,
+       0x1FD8,
+       0x1FE2,
+       0x1FE5,
+       0x1FE6,
+       0x1FE8,
+       0x1FF2,
+       0x1FF5,
+       0x1FF6,
+       0x1FF8,
+       0x1FFC,
+       0x1FFD,
+       0xFB00,
+       0xFB07,
+       0xFB13,
+       0xFB18
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
+#if defined(PERL_IN_PERL_C)
+
+static const UV _Perl_GCB_invlist[] = { /* for EBCDIC POSIX-BC */
+       1502,   /* Number of elements */
+       148565664, /* Version and data structure type */
+       0,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xD,
+       0xE,
+       0x15,
+       0x16,
+       0x40,
+       0x5F,
+       0x60,
+       0xCA,
+       0xCB,
+       0x300,
+       0x370,
+       0x483,
+       0x48A,
+       0x591,
+       0x5BE,
+       0x5BF,
+       0x5C0,
+       0x5C1,
+       0x5C3,
+       0x5C4,
+       0x5C6,
+       0x5C7,
+       0x5C8,
+       0x600,
+       0x606,
+       0x610,
+       0x61B,
+       0x61C,
+       0x61D,
+       0x64B,
+       0x660,
+       0x670,
+       0x671,
+       0x6D6,
+       0x6DD,
+       0x6DE,
+       0x6DF,
+       0x6E5,
+       0x6E7,
+       0x6E9,
+       0x6EA,
+       0x6EE,
+       0x70F,
+       0x710,
+       0x711,
+       0x712,
+       0x730,
+       0x74B,
+       0x7A6,
+       0x7B1,
+       0x7EB,
+       0x7F4,
+       0x816,
+       0x81A,
+       0x81B,
+       0x824,
+       0x825,
+       0x828,
+       0x829,
+       0x82E,
+       0x859,
+       0x85C,
+       0x8E3,
+       0x903,
+       0x904,
+       0x93A,
+       0x93B,
+       0x93C,
+       0x93D,
+       0x93E,
+       0x941,
+       0x949,
+       0x94D,
+       0x94E,
+       0x950,
+       0x951,
+       0x958,
+       0x962,
+       0x964,
+       0x981,
+       0x982,
+       0x984,
+       0x9BC,
+       0x9BD,
+       0x9BE,
+       0x9BF,
+       0x9C1,
+       0x9C5,
+       0x9C7,
+       0x9C9,
+       0x9CB,
+       0x9CD,
+       0x9CE,
+       0x9D7,
+       0x9D8,
+       0x9E2,
+       0x9E4,
+       0xA01,
+       0xA03,
+       0xA04,
+       0xA3C,
+       0xA3D,
+       0xA3E,
+       0xA41,
+       0xA43,
+       0xA47,
+       0xA49,
+       0xA4B,
+       0xA4E,
+       0xA51,
+       0xA52,
+       0xA70,
+       0xA72,
+       0xA75,
+       0xA76,
+       0xA81,
+       0xA83,
+       0xA84,
+       0xABC,
+       0xABD,
+       0xABE,
+       0xAC1,
+       0xAC6,
+       0xAC7,
+       0xAC9,
+       0xACA,
+       0xACB,
+       0xACD,
+       0xACE,
+       0xAE2,
+       0xAE4,
+       0xB01,
+       0xB02,
+       0xB04,
+       0xB3C,
+       0xB3D,
+       0xB3E,
+       0xB40,
+       0xB41,
+       0xB45,
+       0xB47,
+       0xB49,
+       0xB4B,
+       0xB4D,
+       0xB4E,
+       0xB56,
+       0xB58,
+       0xB62,
+       0xB64,
+       0xB82,
+       0xB83,
+       0xBBE,
        0xBBF,
        0xBC0,
        0xBC1,
@@ -76397,7 +76819,7 @@ typedef enum {
        GCB_V = 13
 } GCB_enum;
 
-static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC POSIX-BC */
        GCB_Control,
        GCB_CR,
        GCB_Control,
@@ -77904,412 +78326,6 @@ static const GCB_enum Grapheme_Cluster_Break_invmap[] = { /* for EBCDIC POSIX-BC
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV NonL1_Perl_Non_Final_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
-       45,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x2BC,
-       0x2BD,
-       0x308,
-       0x309,
-       0x313,
-       0x314,
-       0x342,
-       0x343,
-       0x3AC,
-       0x3AD,
-       0x3AE,
-       0x3AF,
-       0x3B1,
-       0x3B2,
-       0x3B7,
-       0x3B8,
-       0x3B9,
-       0x3BA,
-       0x3C1,
-       0x3C2,
-       0x3C5,
-       0x3C6,
-       0x3C9,
-       0x3CA,
-       0x3CE,
-       0x3CF,
-       0x565,
-       0x566,
-       0x574,
-       0x575,
-       0x57E,
-       0x57F,
-       0x1F00,
-       0x1F08,
-       0x1F20,
-       0x1F28,
-       0x1F60,
-       0x1F68,
-       0x1F70,
-       0x1F71,
-       0x1F74,
-       0x1F75,
-       0x1F7C,
-       0x1F7D
-};
-
-static const UV _Perl_Any_Folds_invlist[] = { /* for EBCDIC POSIX-BC */
-       277,    /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x42,
-       0x4A,
-       0x51,
-       0x5A,
-       0x62,
-       0x6A,
-       0x70,
-       0x79,
-       0x80,
-       0x8A,
-       0x8C,
-       0x8F,
-       0x91,
-       0x9A,
-       0x9C,
-       0x9D,
-       0x9E,
-       0x9F,
-       0xA0,
-       0xA1,
-       0xA2,
-       0xAA,
-       0xAC,
-       0xAF,
-       0xC0,
-       0xCA,
-       0xCB,
-       0xD0,
-       0xD1,
-       0xDA,
-       0xDB,
-       0xE1,
-       0xE2,
-       0xEA,
-       0xEB,
-       0xF0,
-       0xFC,
-       0xFD,
-       0xFE,
-       0xFF,
-       0x100,
-       0x131,
-       0x132,
-       0x138,
-       0x139,
-       0x18D,
-       0x18E,
-       0x19B,
-       0x19C,
-       0x1AA,
-       0x1AC,
-       0x1BA,
-       0x1BC,
-       0x1BE,
-       0x1BF,
-       0x1C0,
-       0x1C4,
-       0x221,
-       0x222,
-       0x234,
-       0x23A,
-       0x255,
-       0x256,
-       0x258,
-       0x259,
-       0x25A,
-       0x25B,
-       0x25D,
-       0x260,
-       0x262,
-       0x263,
-       0x264,
-       0x265,
-       0x267,
-       0x268,
-       0x26A,
-       0x26B,
-       0x26D,
-       0x26F,
-       0x270,
-       0x271,
-       0x273,
-       0x275,
-       0x276,
-       0x27D,
-       0x27E,
-       0x280,
-       0x281,
-       0x283,
-       0x284,
-       0x287,
-       0x28D,
-       0x292,
-       0x293,
-       0x29D,
-       0x29F,
-       0x2BC,
-       0x2BD,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
-       0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
-       0x313,
-       0x314,
-       0x331,
-       0x332,
-       0x342,
-       0x343,
-       0x345,
-       0x346,
-       0x370,
-       0x374,
-       0x376,
-       0x378,
-       0x37B,
-       0x37E,
-       0x37F,
-       0x380,
-       0x386,
-       0x387,
-       0x388,
-       0x38B,
-       0x38C,
-       0x38D,
-       0x38E,
-       0x3A2,
-       0x3A3,
-       0x3D2,
-       0x3D5,
-       0x3F6,
-       0x3F7,
-       0x3FC,
-       0x3FD,
-       0x482,
-       0x48A,
-       0x530,
-       0x531,
-       0x557,
-       0x561,
-       0x588,
-       0x10A0,
-       0x10C6,
-       0x10C7,
-       0x10C8,
-       0x10CD,
-       0x10CE,
-       0x13A0,
-       0x13F6,
-       0x13F8,
-       0x13FE,
-       0x1D79,
-       0x1D7A,
-       0x1D7D,
-       0x1D7E,
-       0x1E00,
-       0x1E9C,
-       0x1E9E,
-       0x1E9F,
-       0x1EA0,
-       0x1F16,
-       0x1F18,
-       0x1F1E,
-       0x1F20,
-       0x1F46,
-       0x1F48,
-       0x1F4E,
-       0x1F50,
-       0x1F58,
-       0x1F59,
-       0x1F5A,
-       0x1F5B,
-       0x1F5C,
-       0x1F5D,
-       0x1F5E,
-       0x1F5F,
-       0x1F7E,
-       0x1F80,
-       0x1FB5,
-       0x1FB6,
-       0x1FBD,
-       0x1FBE,
-       0x1FBF,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FCD,
-       0x1FD0,
-       0x1FD4,
-       0x1FD6,
-       0x1FDC,
-       0x1FE0,
-       0x1FED,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FFD,
-       0x2126,
-       0x2127,
-       0x212A,
-       0x212C,
-       0x2132,
-       0x2133,
-       0x214E,
-       0x214F,
-       0x2160,
-       0x2180,
-       0x2183,
-       0x2185,
-       0x24B6,
-       0x24EA,
-       0x2C00,
-       0x2C2F,
-       0x2C30,
-       0x2C5F,
-       0x2C60,
-       0x2C71,
-       0x2C72,
-       0x2C74,
-       0x2C75,
-       0x2C77,
-       0x2C7E,
-       0x2CE4,
-       0x2CEB,
-       0x2CEF,
-       0x2CF2,
-       0x2CF4,
-       0x2D00,
-       0x2D26,
-       0x2D27,
-       0x2D28,
-       0x2D2D,
-       0x2D2E,
-       0xA640,
-       0xA66E,
-       0xA680,
-       0xA69C,
-       0xA722,
-       0xA730,
-       0xA732,
-       0xA770,
-       0xA779,
-       0xA788,
-       0xA78B,
-       0xA78E,
-       0xA790,
-       0xA794,
-       0xA796,
-       0xA7AE,
-       0xA7B0,
-       0xA7B8,
-       0xAB53,
-       0xAB54,
-       0xAB70,
-       0xABC0,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18,
-       0xFF21,
-       0xFF3B,
-       0xFF41,
-       0xFF5B,
-       0x10400,
-       0x10450,
-       0x10C80,
-       0x10CB3,
-       0x10CC0,
-       0x10CF3,
-       0x118A0,
-       0x118E0
-};
-
-static const UV _Perl_Folds_To_Multi_Char_invlist[] = { /* for EBCDIC POSIX-BC */
-       59,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x59,
-       0x5A,
-       0x130,
-       0x131,
-       0x149,
-       0x14A,
-       0x1F0,
-       0x1F1,
-       0x390,
-       0x391,
-       0x3B0,
-       0x3B1,
-       0x587,
-       0x588,
-       0x1E96,
-       0x1E9B,
-       0x1E9E,
-       0x1E9F,
-       0x1F50,
-       0x1F51,
-       0x1F52,
-       0x1F53,
-       0x1F54,
-       0x1F55,
-       0x1F56,
-       0x1F57,
-       0x1F80,
-       0x1FB0,
-       0x1FB2,
-       0x1FB5,
-       0x1FB6,
-       0x1FB8,
-       0x1FBC,
-       0x1FBD,
-       0x1FC2,
-       0x1FC5,
-       0x1FC6,
-       0x1FC8,
-       0x1FCC,
-       0x1FCD,
-       0x1FD2,
-       0x1FD4,
-       0x1FD6,
-       0x1FD8,
-       0x1FE2,
-       0x1FE5,
-       0x1FE6,
-       0x1FE8,
-       0x1FF2,
-       0x1FF5,
-       0x1FF6,
-       0x1FF8,
-       0x1FFC,
-       0x1FFD,
-       0xFB00,
-       0xFB07,
-       0xFB13,
-       0xFB18
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_UTF8_C)
 
 static const UV _Perl_IDCont_invlist[] = { /* for EBCDIC POSIX-BC */
@@ -80822,7 +80838,7 @@ static const UV _Perl_IDStart_invlist[] = { /* for EBCDIC POSIX-BC */
 
 #if defined(PERL_IN_PERL_C)
 
-static const UV Sentence_Break_invlist[] = { /* for EBCDIC POSIX-BC */
+static const UV _Perl_SB_invlist[] = { /* for EBCDIC POSIX-BC */
        2924,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -83778,7 +83794,7 @@ typedef enum {
        SB_Upper = 15
 } SB_enum;
 
-static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC POSIX-BC */
        SB_Other,
        SB_Sp,
        SB_Other,
@@ -86707,93 +86723,9 @@ static const SB_enum Sentence_Break_invmap[] = { /* for EBCDIC POSIX-BC */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
-#if defined(PERL_IN_REGCOMP_C)
-
-static const UV UpperLatin1_invlist[] = { /* for EBCDIC POSIX-BC */
-       57,     /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0x4,
-       0x5,
-       0x6,
-       0x7,
-       0x8,
-       0xB,
-       0x14,
-       0x15,
-       0x17,
-       0x18,
-       0x1A,
-       0x1C,
-       0x20,
-       0x26,
-       0x28,
-       0x2D,
-       0x30,
-       0x32,
-       0x33,
-       0x37,
-       0x38,
-       0x3C,
-       0x3E,
-       0x3F,
-       0x41,
-       0x4A,
-       0x51,
-       0x5A,
-       0x5F,
-       0x60,
-       0x62,
-       0x6A,
-       0x70,
-       0x7A,
-       0x80,
-       0x81,
-       0x8A,
-       0x91,
-       0x9A,
-       0xA2,
-       0xAA,
-       0xBB,
-       0xBE,
-       0xC1,
-       0xCA,
-       0xD1,
-       0xDA,
-       0xE2,
-       0xEA,
-       0xF0,
-       0xFA,
-       0xFB,
-       0xFC,
-       0xFD,
-       0xFE,
-       0xFF
-};
-
-#endif /* defined(PERL_IN_REGCOMP_C) */
-
 #if defined(PERL_IN_PERL_C)
 
-static const UV VertSpace_invlist[] = { /* for EBCDIC POSIX-BC */
-       9,      /* Number of elements */
-       148565664, /* Version and data structure type */
-       1,      /* 0 if the list starts at 0;
-                  1 if it starts at the element beyond 0 */
-       0x0,
-       0xB,
-       0xE,
-       0x15,
-       0x16,
-       0x25,
-       0x26,
-       0x2028,
-       0x202A
-};
-
-static const UV Word_Break_invlist[] = { /* for EBCDIC POSIX-BC */
+static const UV _Perl_WB_invlist[] = { /* for EBCDIC POSIX-BC */
        1547,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
@@ -88375,7 +88307,7 @@ typedef enum {
        WB_UNKNOWN = 18
 } WB_enum;
 
-static const WB_enum Word_Break_invmap[] = { /* for EBCDIC POSIX-BC */
+static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC POSIX-BC */
        WB_Other,
        WB_Newline,
        WB_CR,
@@ -89927,8 +89859,92 @@ static const WB_enum Word_Break_invmap[] = { /* for EBCDIC POSIX-BC */
 
 #endif /* defined(PERL_IN_REGEXEC_C) */
 
+#if defined(PERL_IN_REGCOMP_C)
+
+static const UV UpperLatin1_invlist[] = { /* for EBCDIC POSIX-BC */
+       57,     /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x4,
+       0x5,
+       0x6,
+       0x7,
+       0x8,
+       0xB,
+       0x14,
+       0x15,
+       0x17,
+       0x18,
+       0x1A,
+       0x1C,
+       0x20,
+       0x26,
+       0x28,
+       0x2D,
+       0x30,
+       0x32,
+       0x33,
+       0x37,
+       0x38,
+       0x3C,
+       0x3E,
+       0x3F,
+       0x41,
+       0x4A,
+       0x51,
+       0x5A,
+       0x5F,
+       0x60,
+       0x62,
+       0x6A,
+       0x70,
+       0x7A,
+       0x80,
+       0x81,
+       0x8A,
+       0x91,
+       0x9A,
+       0xA2,
+       0xAA,
+       0xBB,
+       0xBE,
+       0xC1,
+       0xCA,
+       0xD1,
+       0xDA,
+       0xE2,
+       0xEA,
+       0xF0,
+       0xFA,
+       0xFB,
+       0xFC,
+       0xFD,
+       0xFE,
+       0xFF
+};
+
+#endif /* defined(PERL_IN_REGCOMP_C) */
+
 #if defined(PERL_IN_PERL_C)
 
+static const UV VertSpace_invlist[] = { /* for EBCDIC POSIX-BC */
+       9,      /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0xB,
+       0xE,
+       0x15,
+       0x16,
+       0x25,
+       0x26,
+       0x2028,
+       0x202A
+};
+
 static const UV XPerlSpace_invlist[] = { /* for EBCDIC POSIX-BC */
        23,     /* Number of elements */
        148565664, /* Version and data structure type */
@@ -99479,7 +99495,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
 #endif /* EBCDIC POSIX-BC */
 
 /* Generated from:
- * 083180df694deb1fc173361406c1a75619fb8376403db3a76dc585c1e3951eca lib/Unicode/UCD.pm
+ * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
  * ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
  * 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
  * 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
@@ -99521,8 +99537,8 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * c9326eab8d7861c3543963e555d5b927348f4467c93071db23154dece7619654 lib/unicore/mktables
+ * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
- * f199f92c0b5f87882b0198936ea8ef3dc43627b57a77ac3eb9250bd2664bbd88 regen/mk_invlists.pl
+ * 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl
  * ex: set ro: */
index 9ebe0d3..eea81e8 100644 (file)
@@ -42,7 +42,7 @@ my %properties = (
                    alnum => 'Word',
                    wordchar => 'Word',
                    alphanumeric => 'Alnum',
-                   alpha => 'Alpha',
+                   alpha => 'XPosixAlpha',
                    ascii => 'ASCII',
                    blank => 'Blank',
                    cntrl => 'Control',
@@ -50,14 +50,14 @@ my %properties = (
                    graph => 'Graph',
                    idfirst => '_Perl_IDStart',
                    idcont => '_Perl_IDCont',
-                   lower => 'Lower',
+                   lower => 'XPosixLower',
                    print => 'Print',
                    psxspc => 'XPosixSpace',
                    punct => 'XPosixPunct',
                    quotemeta => '_Perl_Quotemeta',
                    space => 'XPerlSpace',
                    vertws => 'VertSpace',
-                   upper => 'Upper',
+                   upper => 'XPosixUpper',
                    xdigit => 'XDigit',
                 );
 
@@ -69,8 +69,13 @@ foreach my $name (sort keys %properties) {
     my $property = $properties{$name};
     my @invlist = prop_invlist($property, '_perl_core_internal_ok');
     if (! @invlist) {
-        fail("No inversion list found for $property");
-        next;
+
+        # An empty return could mean an unknown property, or merely that it is
+        # empty.  Call in scalar context to differentiate
+        if (! prop_invlist($property, '_perl_core_internal_ok')) {
+            fail("No inversion list found for $property");
+            next;
+        }
     }
 
     # Include all the Latin1 code points, plus 0x100.
@@ -270,7 +275,7 @@ foreach my $name (sort keys %to_properties) {
         fail("No inversion map found for $property");
         next;
     }
-    if ($format ne "al") {
+    if ($format !~ / ^ a l? $ /x) {
         fail("Unexpected inversion map format ('$format') found for $property");
         next;
     }
index 06fbfd1..1854982 100644 (file)
@@ -5,7 +5,7 @@ use warnings;
 no warnings 'surrogate';    # surrogates can be inputs to this
 use charnames ();
 
-our $VERSION = '0.61';
+our $VERSION = '0.62';
 
 require Exporter;
 
@@ -775,7 +775,6 @@ sub charprop ($$) {
     }
     else {
         croak __PACKAGE__, "::charprop: Internal error: unknown format '$format'.  Please perlbug this";
-        return undef;
     }
 }
 
@@ -877,6 +876,10 @@ sub _charblocks {
            local $_;
            local $/ = "\n";
            while (<$BLOCKSFH>) {
+
+                # Old versions used a different syntax to mark the range.
+                $_ =~ s/;\s+/../ if $v_unicode_version lt v3.1.0;
+
                if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) {
                    my ($lo, $hi) = (hex($1), hex($2));
                    my $subrange = [ $lo, $hi, $3 ];
@@ -2652,9 +2655,11 @@ or even better, C<"Gc=LC">).
 
 Many Unicode properties have more than one name (or alias).  C<prop_invmap>
 understands all of these, including Perl extensions to them.  Ambiguities are
-resolved as described above for L</prop_aliases()>.  The Perl internal
-property "Perl_Decimal_Digit, described below, is also accepted.  An empty
-list is returned if the property name is unknown.
+resolved as described above for L</prop_aliases()> (except if a property has
+both a complete mapping, and a binary C<Y>/C<N> mapping, then specifying the
+property name prefixed by C<"is"> causes the binary one to be returned).  The
+Perl internal property "Perl_Decimal_Digit, described below, is also accepted.
+An empty list is returned if the property name is unknown.
 See L<perluniprops/Properties accessible through Unicode::UCD> for the
 properties acceptable as inputs to this function.
 
@@ -3253,8 +3258,8 @@ RETRY:
             # we need to also read in that table.  Create a hash with the keys
             # being the code points, and the values being a list of the
             # aliases for the code point key.
-            my ($aliases_code_points, $aliases_maps, undef, undef) =
-                                                &prop_invmap('Name_Alias');
+            my ($aliases_code_points, $aliases_maps, undef, undef)
+                  = &prop_invmap("_Perl_Name_Alias", '_perl_core_internal_ok');
             my %aliases;
             for (my $i = 0; $i < @$aliases_code_points; $i++) {
                 my $code_point = $aliases_code_points->[$i];
@@ -3545,7 +3550,19 @@ RETRY:
 
     if ($swash->{'LIST'} =~ /^V/) {
         @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
-        shift @invlist;
+
+        shift @invlist;     # Get rid of 'V';
+
+        # Could need to be inverted: add or subtract a 0 at the beginning of
+        # the list.
+        if ($swash->{'INVERT_IT'}) {
+            if (@invlist && $invlist[0] == 0) {
+                shift @invlist;
+            }
+            else {
+                unshift @invlist, 0;
+            }
+        }
         foreach my $i (0 .. @invlist - 1) {
             $invmap[$i] = ($i % 2 == 0) ? 'Y' : 'N'
         }
@@ -3558,6 +3575,10 @@ RETRY:
         }
     }
     else {
+        if ($swash->{'INVERT_IT'}) {
+            croak __PACKAGE__, ":prop_invmap: Don't know how to deal with inverted";
+        }
+
         # The LIST input lines look like:
         # ...
         # 0374\t\tCommon
@@ -3873,7 +3894,7 @@ RETRY:
         map { $_ = [ split " ", $_  ] if $_ =~ / / } @invmap;
         $format = 'sl';
     }
-    elsif ($returned_prop eq 'ToNameAlias') {
+    elsif ($returned_prop =~ / To ( _Perl )? NameAlias/x) {
 
         # This property currently doesn't have any lists, but theoretically
         # could
@@ -3888,7 +3909,14 @@ RETRY:
         # to indicate that need to add code point to it.
         $format = 'ar';
     }
-    elsif ($format ne 'n' && $format ne 'a') {
+    elsif ($format eq 'ax') {
+
+        # Normally 'ax' properties have overrides, and will have been handled
+        # above, but if not, they still need adjustment, and the hex values
+        # have already been converted to decimal
+        $format = 'a';
+    }
+    elsif ($format ne 'n' && $format !~ / ^ a /x) {
 
         # All others are simple scalars
         $format = 's';
@@ -4079,6 +4107,15 @@ for its block using C<charblock>).
 Note that starting in Unicode 6.1, many of the block names have shorter
 synonyms.  These are always given in the new style.
 
+=head2 Use with older Unicode versions
+
+The functions in this module work as well as can be expected when
+used on earlier Unicode versions.  But, obviously, they use the available data
+from that Unicode version.  For example, if the Unicode version predates the
+definition of the script property (Unicode 3.1), then any function that deals
+with scripts is going to return C<undef> for the script portion of the return
+value.
+
 =head1 AUTHOR
 
 Jarkko Hietaniemi.  Now maintained by perl5 porters.
index a799dd0..22b2edb 100644 (file)
@@ -19,14 +19,22 @@ use Test::More;
 
 use Unicode::UCD qw(charinfo charprop charprops_all);
 
+my $expected_version = '8.0.0';
+my $current_version = Unicode::UCD::UnicodeVersion;
+my $v_unicode_version = pack "C*", split /\./, $current_version;
+my $unknown_script = ($v_unicode_version lt v5.0.0)
+                     ? 'Common'
+                     : 'Unknown';
 my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by
 $/ = $input_record_separator;   # setting this.
 
 my $charinfo;
 
 is(charinfo(0x110000), undef, "Verify charinfo() of non-unicode is undef");
-is(charprop(0x110000, 'age'), "Unassigned", "Verify charprop(age) of non-unicode is Unassigned");
-is(charprop(0x110000, 'in'), "Unassigned", "Verify charprop(in), a bipartite Perl extension, works");
+if ($v_unicode_version ge v3.2.0) {
+    is(lc charprop(0x110000, 'age'), lc "Unassigned", "Verify charprop(age) of non-unicode is Unassigned");
+    is(charprop(0x110000, 'in'), "Unassigned", "Verify charprop(in), a bipartite Perl extension, works");
+}
 is(charprop(0x110000, 'Any'), undef, "Verify charprop of non-bipartite Perl extension returns undef");
 
 my $cp = 0;
@@ -37,9 +45,10 @@ is($charinfo->{code},           "0000",
 is($charinfo->{name},           "<control>");
 is(charprop($cp, "name"),       "");
 
-# This gets a sl-type property returning a flattened list
-is(charprop($cp, "name_alias"), "NULL: control,NUL: abbreviation");
-
+if ($v_unicode_version ge v6.1.0) {
+    # This gets a sl-type property returning a flattened list
+    is(charprop($cp, "name_alias"), "NULL: control,NUL: abbreviation");
+}
 is($charinfo->{category},       "Cc");
 is(charprop($cp, "category"),   "Control");
 is($charinfo->{combining},      "0");
@@ -66,8 +75,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, "tc"),         "\0");
 is($charinfo->{block},          "Basic Latin");
 is(charprop($cp, "block"),      "Basic_Latin");
-is($charinfo->{script},         "Common");
-is(charprop($cp, "script"),     "Common");
+is($charinfo->{script},         "Common") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, "script"),     "Common") if $v_unicode_version gt v3.0.1;
 
 $cp = utf8::unicode_to_native(0x41);
 my $A_code = sprintf("%04X", ord("A"));
@@ -103,8 +112,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "A");
 is($charinfo->{block},          "Basic Latin");
 is(charprop($cp, 'block'),      "Basic_Latin");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 $cp = 0x100;
 $charinfo = charinfo($cp);
@@ -138,8 +147,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{100}");
 is($charinfo->{block},          "Latin Extended-A");
 is(charprop($cp, 'block'),      "Latin_Extended_A");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 $cp = 0x590;               # 0x0590 is in the Hebrew block but unused.
 $charinfo = charinfo($cp);
@@ -152,7 +161,9 @@ is(charprop($cp, 'gc'),         "Unassigned");
 is($charinfo->{combining},      undef);
 is(charprop($cp, 'ccc'),        "Not_Reordered");
 is($charinfo->{bidi},           undef);
-is(charprop($cp, 'bc'),         "Right_To_Left");
+if ($v_unicode_version gt v3.2.0) {
+    is(charprop($cp, 'bc'),         "Right_To_Left");
+}
 is($charinfo->{decomposition},  undef);
 is(charprop($cp, 'dm'),         "\x{590}");
 is($charinfo->{decimal},        undef);
@@ -174,7 +185,8 @@ is(charprop($cp, 'tc'),         "\x{590}");
 is($charinfo->{block},          undef);
 is(charprop($cp, 'block'),      "Hebrew");
 is($charinfo->{script},         undef);
-is(charprop($cp, 'script'),     "Unknown");
+is(charprop($cp, 'script'),     $unknown_script) if $v_unicode_version gt
+v3.0.1;
 
 # 0x05d0 is in the Hebrew block and used.
 
@@ -210,8 +222,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{5d0}");
 is($charinfo->{block},          "Hebrew");
 is(charprop($cp, 'block'),      "Hebrew");
-is($charinfo->{script},         "Hebrew");
-is(charprop($cp, 'script'),     "Hebrew");
+is($charinfo->{script},         "Hebrew") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hebrew") if $v_unicode_version gt v3.0.1;
 
 # An open syllable in Hangul.
 
@@ -247,8 +259,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{AC00}");
 is($charinfo->{block},          "Hangul Syllables");
 is(charprop($cp, 'block'),      "Hangul_Syllables");
-is($charinfo->{script},         "Hangul");
-is(charprop($cp, 'script'),     "Hangul");
+is($charinfo->{script},         "Hangul") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hangul") if $v_unicode_version gt v3.0.1;
 
 # A closed syllable in Hangul.
 
@@ -284,85 +296,89 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{AE00}");
 is($charinfo->{block},          "Hangul Syllables");
 is(charprop($cp, 'block'),      "Hangul_Syllables");
-is($charinfo->{script},         "Hangul");
-is(charprop($cp, 'script'),     "Hangul");
-
-$cp = 0x1D400;
-$charinfo = charinfo($cp);
+is($charinfo->{script},         "Hangul") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Hangul") if $v_unicode_version gt v3.0.1;
+
+if ($v_unicode_version gt v3.0.1) {
+    $cp = 0x1D400;
+    $charinfo = charinfo($cp);
+
+    is($charinfo->{code},           "1D400", "MATHEMATICAL BOLD CAPITAL A");
+    is($charinfo->{name},           "MATHEMATICAL BOLD CAPITAL A");
+    is(charprop($cp, 'name'),       "MATHEMATICAL BOLD CAPITAL A");
+    is($charinfo->{category},       "Lu");
+    is(charprop($cp, 'gc'),         "Uppercase_Letter");
+    is($charinfo->{combining},      "0");
+    is(charprop($cp, 'ccc'),        "Not_Reordered");
+    is($charinfo->{bidi},           "L");
+    is(charprop($cp, 'bc'),         "Left_To_Right");
+    is($charinfo->{decomposition},  "<font> $A_code");
+    is(charprop($cp, 'dm'),         "A");
+    is($charinfo->{decimal},        "");
+    is($charinfo->{digit},          "");
+    is($charinfo->{numeric},        "");
+    is(charprop($cp, 'nv'),         "NaN");
+    is($charinfo->{mirrored},       "N");
+    is(charprop($cp, 'bidim'),      "No");
+    is($charinfo->{unicode10},      "");
+    is(charprop($cp, 'na1'),        "");
+    is($charinfo->{comment},        "");
+    is(charprop($cp, 'isc'),        "");
+    is($charinfo->{upper},          "");
+    is(charprop($cp, 'uc'),         "\x{1D400}");
+    is($charinfo->{lower},          "");
+    is(charprop($cp, 'lc'),         "\x{1D400}");
+    is($charinfo->{title},          "");
+    is(charprop($cp, 'tc'),         "\x{1D400}");
+    is($charinfo->{block},          "Mathematical Alphanumeric Symbols");
+    is(charprop($cp, 'block'),      "Mathematical_Alphanumeric_Symbols");
+    is($charinfo->{script},         "Common");
+    is(charprop($cp, 'script'),     "Common");
+}
 
-is($charinfo->{code},           "1D400", "MATHEMATICAL BOLD CAPITAL A");
-is($charinfo->{name},           "MATHEMATICAL BOLD CAPITAL A");
-is(charprop($cp, 'name'),       "MATHEMATICAL BOLD CAPITAL A");
-is($charinfo->{category},       "Lu");
-is(charprop($cp, 'gc'),         "Uppercase_Letter");
-is($charinfo->{combining},      "0");
-is(charprop($cp, 'ccc'),        "Not_Reordered");
-is($charinfo->{bidi},           "L");
-is(charprop($cp, 'bc'),         "Left_To_Right");
-is($charinfo->{decomposition},  "<font> $A_code");
-is(charprop($cp, 'dm'),         "A");
-is($charinfo->{decimal},        "");
-is($charinfo->{digit},          "");
-is($charinfo->{numeric},        "");
-is(charprop($cp, 'nv'),         "NaN");
-is($charinfo->{mirrored},       "N");
-is(charprop($cp, 'bidim'),      "No");
-is($charinfo->{unicode10},      "");
-is(charprop($cp, 'na1'),        "");
-is($charinfo->{comment},        "");
-is(charprop($cp, 'isc'),        "");
-is($charinfo->{upper},          "");
-is(charprop($cp, 'uc'),         "\x{1D400}");
-is($charinfo->{lower},          "");
-is(charprop($cp, 'lc'),         "\x{1D400}");
-is($charinfo->{title},          "");
-is(charprop($cp, 'tc'),         "\x{1D400}");
-is($charinfo->{block},          "Mathematical Alphanumeric Symbols");
-is(charprop($cp, 'block'),      "Mathematical_Alphanumeric_Symbols");
-is($charinfo->{script},         "Common");
-is(charprop($cp, 'script'),     "Common");
-
-$cp = 0x9FBA;                  #Bug 58428
-$charinfo = charinfo(0x9FBA);
-
-is($charinfo->{code},           "9FBA", "U+9FBA");
-is($charinfo->{name},           "CJK UNIFIED IDEOGRAPH-9FBA");
-is(charprop($cp, 'name'),       "CJK UNIFIED IDEOGRAPH-9FBA");
-is($charinfo->{category},       "Lo");
-is(charprop($cp, 'gc'),         "Other_Letter");
-is($charinfo->{combining},      "0");
-is(charprop($cp, 'ccc'),        "Not_Reordered");
-is($charinfo->{bidi},           "L");
-is(charprop($cp, 'bc'),         "Left_To_Right");
-is($charinfo->{decomposition},  "");
-is(charprop($cp, 'dm'),         "\x{9FBA}");
-is($charinfo->{decimal},        "");
-is($charinfo->{digit},          "");
-is($charinfo->{numeric},        "");
-is(charprop($cp, 'nv'),         "NaN");
-is($charinfo->{mirrored},       "N");
-is(charprop($cp, 'bidim'),      "No");
-is($charinfo->{unicode10},      "");
-is(charprop($cp, 'na1'),        "");
-is($charinfo->{comment},        "");
-is(charprop($cp, 'isc'),        "");
-is($charinfo->{upper},          "");
-is(charprop($cp, 'uc'),         "\x{9FBA}");
-is($charinfo->{lower},          "");
-is(charprop($cp, 'lc'),         "\x{9FBA}");
-is($charinfo->{title},          "");
-is(charprop($cp, 'tc'),         "\x{9FBA}");
-is($charinfo->{block},          "CJK Unified Ideographs");
-is(charprop($cp, 'block'),      "CJK_Unified_Ideographs");
-is($charinfo->{script},         "Han");
-is(charprop($cp, 'script'),     "Han");
+if ($v_unicode_version ge v4.1.0) {
+    $cp = 0x9FBA;                      #Bug 58428
+    $charinfo = charinfo(0x9FBA);
+
+    is($charinfo->{code},           "9FBA", "U+9FBA");
+    is($charinfo->{name},           "CJK UNIFIED IDEOGRAPH-9FBA");
+    is(charprop($cp, 'name'),       "CJK UNIFIED IDEOGRAPH-9FBA");
+    is($charinfo->{category},       "Lo");
+    is(charprop($cp, 'gc'),         "Other_Letter");
+    is($charinfo->{combining},      "0");
+    is(charprop($cp, 'ccc'),        "Not_Reordered");
+    is($charinfo->{bidi},           "L");
+    is(charprop($cp, 'bc'),         "Left_To_Right");
+    is($charinfo->{decomposition},  "");
+    is(charprop($cp, 'dm'),         "\x{9FBA}");
+    is($charinfo->{decimal},        "");
+    is($charinfo->{digit},          "");
+    is($charinfo->{numeric},        "");
+    is(charprop($cp, 'nv'),         "NaN");
+    is($charinfo->{mirrored},       "N");
+    is(charprop($cp, 'bidim'),      "No");
+    is($charinfo->{unicode10},      "");
+    is(charprop($cp, 'na1'),        "");
+    is($charinfo->{comment},        "");
+    is(charprop($cp, 'isc'),        "");
+    is($charinfo->{upper},          "");
+    is(charprop($cp, 'uc'),         "\x{9FBA}");
+    is($charinfo->{lower},          "");
+    is(charprop($cp, 'lc'),         "\x{9FBA}");
+    is($charinfo->{title},          "");
+    is(charprop($cp, 'tc'),         "\x{9FBA}");
+    is($charinfo->{block},          "CJK Unified Ideographs");
+    is(charprop($cp, 'block'),      "CJK_Unified_Ideographs");
+    is($charinfo->{script},         "Han");
+    is(charprop($cp, 'script'),     "Han");
+}
 
 use Unicode::UCD qw(charblock charscript);
 
 # 0x0590 is in the Hebrew block but unused.
 
 is(charblock(0x590),          "Hebrew", "0x0590 - Hebrew unused charblock");
-is(charscript(0x590),         "Unknown",    "0x0590 - Hebrew unused charscript");
+is(charscript(0x590),         $unknown_script, "0x0590 - Hebrew unused charscript") if $v_unicode_version gt v3.0.1;
 is(charblock(0x1FFFF),        "No_Block", "0x1FFFF - unused charblock");
 
 my $fraction_3_4_code = sprintf("%04X", utf8::unicode_to_native(0xbe));
@@ -401,8 +417,8 @@ is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         chr hex $cp);
 is($charinfo->{block},          "Latin-1 Supplement");
 is(charprop($cp, 'block'),      "Latin_1_Supplement");
-is($charinfo->{script},         "Common");
-is(charprop($cp, 'script'),     "Common");
+is($charinfo->{script},         "Common") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Common") if $v_unicode_version gt v3.0.1;
 
 # This is to test a case where both simple and full lowercases exist and
 # differ
@@ -435,13 +451,13 @@ is(charprop($cp, 'isc'),        "");
 is($charinfo->{upper},          "");
 is(charprop($cp, 'uc'),         "\x{130}");
 is($charinfo->{lower},          $i_code);
-is(charprop($cp, 'lc'),         "i\x{307}");
+is(charprop($cp, 'lc'),         "i\x{307}") if $v_unicode_version ge v3.2.0;
 is($charinfo->{title},          "");
 is(charprop($cp, 'tc'),         "\x{130}");
 is($charinfo->{block},          "Latin Extended-A");
 is(charprop($cp, 'block'),      "Latin_Extended_A");
-is($charinfo->{script},         "Latin");
-is(charprop($cp, 'script'),     "Latin");
+is($charinfo->{script},         "Latin") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, 'script'),     "Latin") if $v_unicode_version gt v3.0.1;
 
 # This is to test a case where both simple and full uppercases exist and
 # differ
@@ -478,19 +494,23 @@ is($charinfo->{title},          "1F88");
 is(charprop($cp, "tc"),         "\x{1F88}");
 is($charinfo->{block},          "Greek Extended");
 is(charprop($cp, "block"),      "Greek_Extended");
-is($charinfo->{script},         "Greek");
-is(charprop($cp, "script"),     "Greek");
+is($charinfo->{script},         "Greek") if $v_unicode_version gt v3.0.1;
+is(charprop($cp, "script"),     "Greek") if $v_unicode_version gt v3.0.1;
 
 is(charprop(ord("A"), "foo"),    undef,
                         "Verify charprop of unknown property returns <undef>");
 
 # These were created from inspection of the code to exercise the branches
-is(charprop(ord("("), "bpb"),    ")",
+if ($v_unicode_version ge v6.3.0) {
+    is(charprop(ord("("), "bpb"),    ")",
             "Verify charprop figures out that s-type properties can be char");
+}
 is(charprop(ord("9"), "nv"),     9,
                             "Verify charprop can adjust an ar-type property");
-is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
+if ($v_unicode_version ge v5.2.0) {
+    is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
                     "Verify charprop can handle an \"\" in ae-type property");
+}
 
 my $mark_props_ref = charprops_all(0x300);
 is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
@@ -499,9 +519,13 @@ is($mark_props_ref->{'Bidi_Mirrored'}, "No");
 is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
 is($mark_props_ref->{'Case_Folding'}, "\x{300}");
 is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
-is($mark_props_ref->{'Decomposition_Type'}, "None");
+is($mark_props_ref->{'Decomposition_Type'}, ($v_unicode_version le v4.0.0)
+                                             ? "none"
+                                             : "None");
 is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
-is($mark_props_ref->{'ISO_Comment'}, "");
+if ($v_unicode_version gt v5.1.0) {
+    is($mark_props_ref->{'ISO_Comment'}, "");
+}
 is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
 is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
 is($mark_props_ref->{'Numeric_Type'}, "None");
@@ -522,36 +546,40 @@ ok(exists $charblocks->{Thai}, 'Thai charblock exists');
 is($charblocks->{Thai}->[0]->[0], hex('0e00'));
 ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
 
-my $charscripts = charscripts();
+if ($v_unicode_version gt v3.0.1) {
+    my $charscripts = charscripts();
 
-ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
-is($charscripts->{Armenian}->[0]->[0], hex('0531'));
-ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
+    ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
+    is($charscripts->{Armenian}->[0]->[0], hex('0531'));
+    ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
 
-my $charscript;
+    my $charscript;
 
-$charscript = charscript("12ab");
-is($charscript, 'Ethiopic', 'Ethiopic charscript');
+    $charscript = charscript("12ab");
+    is($charscript, 'Ethiopic', 'Ethiopic charscript');
 
-$charscript = charscript("0x12ab");
-is($charscript, 'Ethiopic');
+    $charscript = charscript("0x12ab");
+    is($charscript, 'Ethiopic');
 
-$charscript = charscript("U+12ab");
-is($charscript, 'Ethiopic');
+    $charscript = charscript("U+12ab");
+    is($charscript, 'Ethiopic');
 
-my $ranges;
+    my $ranges;
 
-$ranges = charscript('Ogham');
-is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
-is($ranges->[0]->[1], hex('169C'));
+    if ($v_unicode_version gt v4.0.0) {
+        $ranges = charscript('Ogham');
+        is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
+        is($ranges->[0]->[1], hex('169C'));
+    }
 
-use Unicode::UCD qw(charinrange);
+    use Unicode::UCD qw(charinrange);
 
-$ranges = charscript('Cherokee');
-ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
-ok( charinrange($ranges, "13a0"));
-ok( charinrange($ranges, "13f4"));
-ok(!charinrange($ranges, "13ff"));
+    $ranges = charscript('Cherokee');
+    ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
+    ok( charinrange($ranges, "13a0"));
+    ok( charinrange($ranges, "13f4"));
+    ok(!charinrange($ranges, "13ff"));
+}
 
 use Unicode::UCD qw(general_categories);
 
@@ -571,7 +599,8 @@ is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic');
 
 # If this fails, then maybe one should look at the Unicode changes to see
 # what else might need to be updated.
-is(Unicode::UCD::UnicodeVersion, '8.0.0', 'UnicodeVersion');
+ok($current_version le $expected_version,
+                    "Verify there isn't a new Unicode version to upgrade to");
 
 use Unicode::UCD qw(compexcl);
 
@@ -593,66 +622,93 @@ is($casefold->{full}, $a_code, 'casefold native(0x41) full');
 is($casefold->{simple}, $a_code, 'casefold native(0x41) simple');
 is($casefold->{turkic}, "", 'casefold native(0x41) turkic');
 
-$casefold = casefold(utf8::unicode_to_native(0xdf));
 my $sharp_s_code = sprintf("%04X", utf8::unicode_to_native(0xdf));
 my $S_code = sprintf("%04X", ord "S");
 my $s_code = sprintf("%04X", ord "s");
 
-is($casefold->{code}, $sharp_s_code, 'casefold native(0xDF) code');
-is($casefold->{status}, 'F', 'casefold native(0xDF) status');
-is($casefold->{mapping}, "$s_code $s_code", 'casefold native(0xDF) mapping');
-is($casefold->{full}, "$s_code $s_code", 'casefold native(0xDF) full');
-is($casefold->{simple}, "", 'casefold native(0xDF) simple');
-is($casefold->{turkic}, "", 'casefold native(0xDF) turkic');
-
-# Do different tests depending on if version < 3.2, or not.
-my $v_unicode_version = pack "C*", split /\./, Unicode::UCD::UnicodeVersion();
-if ($v_unicode_version lt v3.2.0) {
-       $casefold = casefold(0x130);
-
-       is($casefold->{code}, '0130', 'casefold 0x130 code');
-       is($casefold->{status}, 'I' , 'casefold 0x130 status');
-       is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
-       is($casefold->{full}, $i_code, 'casefold 0x130 full');
-       is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
-       is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
-
-       $casefold = casefold(0x131);
-
-       is($casefold->{code}, '0131', 'casefold 0x131 code');
-       is($casefold->{status}, 'I' , 'casefold 0x131 status');
-       is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
-       is($casefold->{full}, $i_code, 'casefold 0x131 full');
-       is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
-       is($casefold->{turkic}, $i_code, 'casefold 0x131 turkic');
-} else {
-       $casefold = casefold(utf8::unicode_to_native(0x49));
-
-       is($casefold->{code}, $I_code, 'casefold native(0x49) code');
-       is($casefold->{status}, 'C' , 'casefold native(0x49) status');
-       is($casefold->{mapping}, $i_code, 'casefold native(0x49) mapping');
-       is($casefold->{full}, $i_code, 'casefold native(0x49) full');
-       is($casefold->{simple}, $i_code, 'casefold native(0x49) simple');
-       is($casefold->{turkic}, "0131", 'casefold native(0x49) turkic');
-
-       $casefold = casefold(0x130);
-
-       is($casefold->{code}, '0130', 'casefold 0x130 code');
-       is($casefold->{status}, 'F' , 'casefold 0x130 status');
-       is($casefold->{mapping}, "$i_code 0307", 'casefold 0x130 mapping');
-       is($casefold->{full}, "$i_code 0307", 'casefold 0x130 full');
-       is($casefold->{simple}, "", 'casefold 0x130 simple');
-       is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
-}
+if ($v_unicode_version gt v3.0.0) { # These special ones don't work on early
+                                    # perls
+    $casefold = casefold(utf8::unicode_to_native(0xdf));
+
+    is($casefold->{code}, $sharp_s_code, 'casefold native(0xDF) code');
+    is($casefold->{status}, 'F', 'casefold native(0xDF) status');
+    is($casefold->{mapping}, "$s_code $s_code", 'casefold native(0xDF) mapping');
+    is($casefold->{full}, "$s_code $s_code", 'casefold native(0xDF) full');
+    is($casefold->{simple}, "", 'casefold native(0xDF) simple');
+    is($casefold->{turkic}, "", 'casefold native(0xDF) turkic');
+
+    # Do different tests depending on if version < 3.2, or not.
+    if ($v_unicode_version eq v3.0.1) {
+            # In this release, there was no special Turkic values.
+            # Both 0x130 and 0x131 folded to 'i'.
+
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'C' , 'casefold 0x130 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x130 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
+            is($casefold->{turkic}, "", 'casefold 0x130 turkic');
+
+            $casefold = casefold(0x131);
+
+            is($casefold->{code}, '0131', 'casefold 0x131 code');
+            is($casefold->{status}, 'C' , 'casefold 0x131 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x131 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
+            is($casefold->{turkic}, "", 'casefold 0x131 turkic');
+    }
+    elsif ($v_unicode_version lt v3.2.0) {
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'I' , 'casefold 0x130 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x130 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x130 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x130 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
+
+            $casefold = casefold(0x131);
+
+            is($casefold->{code}, '0131', 'casefold 0x131 code');
+            is($casefold->{status}, 'I' , 'casefold 0x131 status');
+            is($casefold->{mapping}, $i_code, 'casefold 0x131 mapping');
+            is($casefold->{full}, $i_code, 'casefold 0x131 full');
+            is($casefold->{simple}, $i_code, 'casefold 0x131 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x131 turkic');
+    } else {
+            $casefold = casefold(utf8::unicode_to_native(0x49));
+
+            is($casefold->{code}, $I_code, 'casefold native(0x49) code');
+            is($casefold->{status}, 'C' , 'casefold native(0x49) status');
+            is($casefold->{mapping}, $i_code, 'casefold native(0x49) mapping');
+            is($casefold->{full}, $i_code, 'casefold native(0x49) full');
+            is($casefold->{simple}, $i_code, 'casefold native(0x49) simple');
+            is($casefold->{turkic}, "0131", 'casefold native(0x49) turkic');
+
+            $casefold = casefold(0x130);
+
+            is($casefold->{code}, '0130', 'casefold 0x130 code');
+            is($casefold->{status}, 'F' , 'casefold 0x130 status');
+            is($casefold->{mapping}, "$i_code 0307", 'casefold 0x130 mapping');
+            is($casefold->{full}, "$i_code 0307", 'casefold 0x130 full');
+            is($casefold->{simple}, "", 'casefold 0x130 simple');
+            is($casefold->{turkic}, $i_code, 'casefold 0x130 turkic');
+    }
 
-$casefold = casefold(0x1F88);
+    if ($v_unicode_version gt v3.0.1) {
+        $casefold = casefold(0x1F88);
 
-is($casefold->{code}, '1F88', 'casefold 0x1F88 code');
-is($casefold->{status}, 'S' , 'casefold 0x1F88 status');
-is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping');
-is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full');
-is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple');
-is($casefold->{turkic}, "", 'casefold 0x1F88 turkic');
+        is($casefold->{code}, '1F88', 'casefold 0x1F88 code');
+        is($casefold->{status}, 'S' , 'casefold 0x1F88 status');
+        is($casefold->{mapping}, '1F80', 'casefold 0x1F88 mapping');
+        is($casefold->{full}, '1F00 03B9', 'casefold 0x1F88 full');
+        is($casefold->{simple}, '1F80', 'casefold 0x1F88 simple');
+        is($casefold->{turkic}, "", 'casefold 0x1F88 turkic');
+    }
+}
 
 ok(!casefold(utf8::unicode_to_native(0x20)));
 
@@ -672,12 +728,16 @@ ok($casespec->{code} eq $sharp_s_code &&
 
 $casespec = casespec(0x307);
 
-ok($casespec->{az}->{code} eq '0307' &&
-   !defined $casespec->{az}->{lower} &&
-   $casespec->{az}->{title} eq '0307'  &&
-   $casespec->{az}->{upper} eq '0307' &&
-   $casespec->{az}->{condition} eq 'az After_I',
-   'casespec 0x307');
+if ($v_unicode_version gt v3.1.0) {
+    ok($casespec->{az}->{code} eq '0307'
+    && !defined $casespec->{az}->{lower}
+    && $casespec->{az}->{title} eq '0307'
+    && $casespec->{az}->{upper} eq '0307'
+    && $casespec->{az}->{condition} eq ($v_unicode_version le v3.2)
+                                        ? 'az After_Soft_Dotted'
+                                        : 'az After_I',
+    'casespec 0x307');
+}
 
 # perl #7305 UnicodeCD::compexcl is weird
 
@@ -699,11 +759,15 @@ is(Unicode::UCD::_getcode('x123'),    undef, "_getcode(x123)");
 is(Unicode::UCD::_getcode('0x123x'),  undef, "_getcode(x123)");
 is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
 
+SKIP:
 {
+    skip("Script property not in this release", 3) if $v_unicode_version lt v3.1.0;
     my $r1 = charscript('Latin');
     if (ok(defined $r1, "Found Latin script")) {
+        skip("Latin range count will be wrong when using older Unicode release",
+             2) if $v_unicode_version lt $expected_version;
         my $n1 = @$r1;
-        is($n1, 31, "number of ranges in Latin script (Unicode 7.0.0)") if $::IS_ASCII;
+        is($n1, 31, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
         shift @$r1 while @$r1;
         my $r2 = charscript('Latin');
         is(@$r2, $n1, "modifying results should not mess up internal caches");
@@ -714,38 +778,72 @@ is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
        is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
 }
 
-use Unicode::UCD qw(namedseq);
-
-is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
-is(namedseq("KATAKANA LETTER AINU Q"), undef);
-is(namedseq(), undef);
-is(namedseq(qw(foo bar)), undef);
-my @ns = namedseq("KATAKANA LETTER AINU P");
-is(scalar @ns, 2);
-is($ns[0], 0x31F7);
-is($ns[1], 0x309A);
-my %ns = namedseq();
-is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
-@ns = namedseq(42);
-is(@ns, 0);
+if ($v_unicode_version ge v4.1.0) {
+    use Unicode::UCD qw(namedseq);
+
+    is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
+    is(namedseq("KATAKANA LETTER AINU Q"), undef);
+    is(namedseq(), undef);
+    is(namedseq(qw(foo bar)), undef);
+    my @ns = namedseq("KATAKANA LETTER AINU P");
+    is(scalar @ns, 2);
+    is($ns[0], 0x31F7);
+    is($ns[1], 0x309A);
+    my %ns = namedseq();
+    is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
+    @ns = namedseq(42);
+    is(@ns, 0);
+}
 
 use Unicode::UCD qw(num);
-use charnames ":full";
+use charnames ();   # Don't use \N{} on things not in original Unicode
+                    # version; else will get a compilation error when this .t
+                    # is run on an older version.
 
 is(num("0"), 0, 'Verify num("0") == 0');
 is(num("98765"), 98765, 'Verify num("98765") == 98765');
-ok(! defined num("98765\N{FULLWIDTH DIGIT FOUR}"), 'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined');
-is(num("\N{NEW TAI LUE DIGIT TWO}"), 2, 'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2');
-is(num("\N{NEW TAI LUE DIGIT ONE}"), 1, 'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1');
-is(num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}"), 21, 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21');
-ok(! defined num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}"), 'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined');
-is(num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}"), 3, 'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3');
-ok(! defined num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}"), 'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined');
+ok(! defined num("98765\N{FULLWIDTH DIGIT FOUR}"),
+   'Verify num("98765\N{FULLWIDTH DIGIT FOUR}") isnt defined');
+my $tai_lue_2;
+if ($v_unicode_version ge v4.1.0) {
+    my $tai_lue_1 = charnames::string_vianame("NEW TAI LUE DIGIT ONE");
+    $tai_lue_2 = charnames::string_vianame("NEW TAI LUE DIGIT TWO");
+    is(num($tai_lue_2), 2, 'Verify num("\N{NEW TAI LUE DIGIT TWO}") == 2');
+    is(num($tai_lue_1), 1, 'Verify num("\N{NEW TAI LUE DIGIT ONE}") == 1');
+    is(num($tai_lue_2 . $tai_lue_1), 21,
+       'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE DIGIT ONE}") == 21');
+}
+if ($v_unicode_version ge v5.2.0) {
+    ok(! defined num($tai_lue_2
+         . charnames::string_vianame("NEW TAI LUE THAM DIGIT ONE")),
+         'Verify num("\N{NEW TAI LUE DIGIT TWO}\N{NEW TAI LUE THAM DIGIT ONE}") isnt defined');
+}
+if ($v_unicode_version ge v5.1.0) {
+    my $cham_0 = charnames::string_vianame("CHAM DIGIT ZERO");
+    is(num($cham_0 . charnames::string_vianame("CHAM DIGIT THREE")), 3,
+       'Verify num("\N{CHAM DIGIT ZERO}\N{CHAM DIGIT THREE}") == 3');
+    if ($v_unicode_version ge v5.2.0) {
+        ok(! defined num(  $cham_0
+                         . charnames::string_vianame("JAVANESE DIGIT NINE")),
+        'Verify num("\N{CHAM DIGIT ZERO}\N{JAVANESE DIGIT NINE}") isnt defined');
+    }
+}
 is(num("\N{SUPERSCRIPT TWO}"), 2, 'Verify num("\N{SUPERSCRIPT TWO} == 2');
-is(num("\N{ETHIOPIC NUMBER TEN THOUSAND}"), 10000, 'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000');
-is(num("\N{NORTH INDIC FRACTION ONE HALF}"), .5, 'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5');
-is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9');
-is(num("\N{U+5146}"), 1000000000000, 'Verify num("\N{U+5146}") == 1000000000000');
+if ($v_unicode_version ge v3.0.0) {
+    is(num(charnames::string_vianame("ETHIOPIC NUMBER TEN THOUSAND")), 10000,
+       'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000');
+}
+if ($v_unicode_version ge v5.2.0) {
+    is(num(charnames::string_vianame("NORTH INDIC FRACTION ONE HALF")),
+       .5,
+       'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5');
+    is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9');
+}
+if ($v_unicode_version gt v3.2.0) { # Is missing from non-Unihan files before
+                                    # this
+    is(num("\N{U+5146}"), 1000000000000,
+                                'Verify num("\N{U+5146}") == 1000000000000');
+}
 
 # Create a user-defined property
 sub InKana {<<'END'}
@@ -979,6 +1077,12 @@ while (<$propvalues>) {
     my @fields = split /\s*;\s*/; # Fields are separated by semi-colons
     my $prop = shift @fields;   # 0th field is the property,
 
+    # 'qc' is short in early versions of the file for any of the quick check
+    # properties.  Choose one of them.
+    if ($prop eq 'qc' && $v_unicode_version le v4.0.0) {
+        $prop = "NFKC_QC";
+    }
+
     # When changing properties, we examine the accumulated values for the old
     # one to see if our function that returns them matches.
     if ($prev_prop ne $prop) {
@@ -986,6 +1090,11 @@ while (<$propvalues>) {
             my @ucd_function_values = prop_values($prev_prop);
             @ucd_function_values = () unless @ucd_function_values;
 
+            # The file didn't include strictly numeric values until after this
+            if ($prev_prop eq 'ccc' && $v_unicode_version le v6.0.0) {
+                @ucd_function_values = grep { /\D/ } @ucd_function_values;
+            }
+
             # This perl extension doesn't appear in the official file
             push @this_prop_values, "Non_Canon" if $prev_prop eq 'dt';
 
@@ -1008,6 +1117,12 @@ while (<$propvalues>) {
     # characters that are ignored under loose matching to test that
     my $mod_prop = "$extra_chars$prop";
 
+    if ($prop eq 'blk' && $v_unicode_version le v5.0.0) {
+        foreach my $element (@fields) {
+            $element =~ s/-/_/g;
+        }
+    }
+
     if ($fields[0] eq 'n/a') {  # See comments in input file, essentially
                                 # means full name and short name are identical
         $fields[0] = $fields[1];
@@ -1190,36 +1305,39 @@ if ($::IS_ASCII) { # On EBCDIC, other things will come first, and can vary
 
     $prop = "lc";
     ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
-    is($format, 'al', "prop_invmap() format of '$prop' is 'al'");
+    my $lc_format = ($v_unicode_version ge v3.2.0) ? 'al' : 'a';
+    is($format, $lc_format, "prop_invmap() format of '$prop' is '$lc_format");
     is($missing, '0', "prop_invmap() missing of '$prop' is '0'");
     is($invlist_ref->[1], 0x41, "prop_invmap('$prop') list[1] is 0x41");
     is($invmap_ref->[1], 0x61, "prop_invmap('$prop') map[1] is 0x61");
 }
 
 # This property is stable and small, so can test all of it
-$prop = "ASCII_Hex_Digit";
-($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
-is($format, 's', "prop_invmap() format of '$prop' is 's'");
-is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
-if ($::IS_ASCII) {
-    is_deeply($invlist_ref, [ 0x0000, 0x0030, 0x003A,
-                              0x0041, 0x0047,
-                              0x0061, 0x0067, 0x110000
-                            ],
-          "prop_invmap('$prop') code point list is correct");
-}
-elsif ($::IS_EBCDIC) {
-    is_deeply($invlist_ref, [
-            utf8::unicode_to_native(0x0000),
-            utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
-            utf8::unicode_to_native(0x110000)
-          ],
-          "prop_invmap('$prop') code point list is correct");
+if ($v_unicode_version gt v3.1.0) {
+    $prop = "ASCII_Hex_Digit";
+    ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($prop);
+    is($format, 's', "prop_invmap() format of '$prop' is 's'");
+    is($missing, 'N', "prop_invmap() missing of '$prop' is 'N'");
+    if ($::IS_ASCII) {
+        is_deeply($invlist_ref, [ 0x0000, 0x0030, 0x003A,
+                                0x0041, 0x0047,
+                                0x0061, 0x0067, 0x110000
+                                ],
+            "prop_invmap('$prop') code point list is correct");
+    }
+    elsif ($::IS_EBCDIC) {
+        is_deeply($invlist_ref, [
+                utf8::unicode_to_native(0x0000),
+                utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
+                utf8::unicode_to_native(0x110000)
+            ],
+            "prop_invmap('$prop') code point list is correct");
+    }
+    is_deeply($invmap_ref, [ 'N', 'Y', 'N', 'Y', 'N', 'Y', 'N', 'N' ] ,
+            "prop_invmap('$prop') map list is correct");
 }
-is_deeply($invmap_ref, [ 'N', 'Y', 'N', 'Y', 'N', 'Y', 'N', 'N' ] ,
-          "prop_invmap('$prop') map list is correct");
 
 is(prop_invlist("Unknown property"), undef, "prop_invlist(<Unknown property>) returns undef");
 is(prop_invlist(undef), undef, "prop_invlist(undef) returns undef");
@@ -1240,36 +1358,38 @@ is(prop_invlist("InKana"), undef, "prop_invlist(<user-defined property returns u
 # are there in the files.  As a small hedge against that, test some
 # prop_invlist() tables fully with the known correct result.  We choose
 # ASCII_Hex_Digit again, as it is stable.
-if ($::IS_ASCII) {
-    @invlist = prop_invlist("AHex");
-    is_deeply(\@invlist, [ 0x0030, 0x003A, 0x0041,
-                                 0x0047, 0x0061, 0x0067 ],
-          "prop_invlist('AHex') is exactly the expected set of points");
-    @invlist = prop_invlist("AHex=f");
-    is_deeply(\@invlist, [ 0x0000, 0x0030, 0x003A, 0x0041,
-                                 0x0047, 0x0061, 0x0067 ],
-          "prop_invlist('AHex=f') is exactly the expected set of points");
-}
-elsif ($::IS_EBCDIC) { # Relies on the ranges 0-9, a-f, and A-F each being
-                    # contiguous
-    @invlist = prop_invlist("AHex");
-    is_deeply(\@invlist, [
-            utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
-       ],
-       "prop_invlist('AHex') is exactly the expected set of points");
-    @invlist = prop_invlist("AHex=f");
-    is_deeply(\@invlist, [
-            utf8::unicode_to_native(0x0000),
-            utf8::unicode_to_native(0x0061),
-            utf8::unicode_to_native(0x0066) + 1,
-            utf8::unicode_to_native(0x0041),
-            utf8::unicode_to_native(0x0046) + 1,
-            utf8::unicode_to_native(0x0030),
-            utf8::unicode_to_native(0x0039) + 1,
-       ],
-       "prop_invlist('AHex=f') is exactly the expected set of points");
+if ($v_unicode_version gt v3.1.0) {
+    if ($::IS_ASCII) {
+        @invlist = prop_invlist("AHex");
+        is_deeply(\@invlist, [ 0x0030, 0x003A, 0x0041,
+                                    0x0047, 0x0061, 0x0067 ],
+            "prop_invlist('AHex') is exactly the expected set of points");
+        @invlist = prop_invlist("AHex=f");
+        is_deeply(\@invlist, [ 0x0000, 0x0030, 0x003A, 0x0041,
+                                    0x0047, 0x0061, 0x0067 ],
+            "prop_invlist('AHex=f') is exactly the expected set of points");
+    }
+    elsif ($::IS_EBCDIC) { # Relies on the ranges 0-9, a-f, and A-F each being
+                        # contiguous
+        @invlist = prop_invlist("AHex");
+        is_deeply(\@invlist, [
+                utf8::unicode_to_native(0x0061), utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041), utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030), utf8::unicode_to_native(0x0039) + 1,
+        ],
+        "prop_invlist('AHex') is exactly the expected set of points");
+        @invlist = prop_invlist("AHex=f");
+        is_deeply(\@invlist, [
+                utf8::unicode_to_native(0x0000),
+                utf8::unicode_to_native(0x0061),
+                utf8::unicode_to_native(0x0066) + 1,
+                utf8::unicode_to_native(0x0041),
+                utf8::unicode_to_native(0x0046) + 1,
+                utf8::unicode_to_native(0x0030),
+                utf8::unicode_to_native(0x0039) + 1,
+        ],
+        "prop_invlist('AHex=f') is exactly the expected set of points");
+    }
 }
 
 sub fail_with_diff ($$$$) {
@@ -1547,7 +1667,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
                 fail("prop_invmap('$prop')");
                 diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap");
             }
-        next PROPERTY;
+            next PROPERTY;
         }
     }
 
@@ -1555,6 +1675,14 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
     # normalized version.
     $name = &utf8::_loose_name(lc $name);
 
+    # In the case of a combination property, both a map table and a match
+    # table are generated.  For all the tests except prop_invmap(), this is
+    # irrelevant, but for prop_invmap, having an 'is' prefix forces it to
+    # return the match table; otherwise the map.  We thus need to distinguish
+    # between the two forms.  The property name is what has this information.
+    $name = &utf8::_loose_name(lc $prop)
+                         if exists $Unicode::UCD::combination_property{$name};
+
     # Add in the characters that are supposed to be ignored to test loose
     # matching, which the tested function applies to all properties
     $display_prop = "$extra_chars$prop" unless $display_prop;
@@ -2309,7 +2437,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
         # And remove the aliases.  We read in the Name_Alias property, and go
         # through them one by one.
         my ($aliases_code_points, $aliases_maps, undef, undef)
-                                                = &prop_invmap('Name_Alias');
+                = &prop_invmap('_Perl_Name_Alias', '_perl_core_internal_ok');
         for (my $i = 0; $i < @$aliases_code_points; $i++) {
             my $code_point = $aliases_code_points->[$i];
 
@@ -2534,11 +2662,13 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
 # A few tests of search_invlist
 use Unicode::UCD qw(search_invlist);
 
-my ($scripts_ranges_ref, $scripts_map_ref) = prop_invmap("Script");
-my $index = search_invlist($scripts_ranges_ref, 0x390);
-is($scripts_map_ref->[$index], "Greek", "U+0390 is Greek");
-my @alpha_invlist = prop_invlist("Alpha");
-is(search_invlist(\@alpha_invlist, ord("\t")), undef, "search_invlist returns undef for code points before first one on the list");
+if ($v_unicode_version ge v3.1.0) { # No Script property before this
+    my ($scripts_ranges_ref, $scripts_map_ref) = prop_invmap("Script");
+    my $index = search_invlist($scripts_ranges_ref, 0x390);
+    is($scripts_map_ref->[$index], "Greek", "U+0390 is Greek");
+    my @alpha_invlist = prop_invlist("Alpha");
+    is(search_invlist(\@alpha_invlist, ord("\t")), undef, "search_invlist returns undef for code points before first one on the list");
+}
 
 ok($/ eq $input_record_separator,  "The record separator didn't get overridden");
 
index 1ebd0ce..1b510d2 100644 (file)
@@ -848,7 +848,8 @@ sub disp_str ($) {
             }
             else {
                 $result .= "  " unless $prev_was_punct;
-                $result .= charnames::viacode(ord $char);
+                my $name = charnames::viacode(ord $char);
+                $result .= (defined $name) ? $name : ':unknown:';
                 $prev_was_punct = 0;
             }
         }
index ef5fec6..f892334 100644 (file)
@@ -1,15 +1,12 @@
 # The goal is for perl to compile and reasonably run any version of Unicode.
-# But in v5.22, the earliest version that this works for is Unicode 5.1.
 # Working reasonably well doesn't mean that the test suite will run without
-# showing errors.  You may be able to compile an earlier version, and get
-# things to sort-of work. A few of the very-Unicode specific test files have been
+# showing errors.  A few of the very-Unicode specific test files have been
 # modified to account for different versions, but most have not.  For example,
 # some tests use characters that aren't encoded in all Unicode versions; others
 # have hard-coded the General Categories for a code point that were correct at
 # the time the test was written.  Perl itself will not compile under Unicode
 # releases prior to 3.0 without a simple change to Unicode::Normalize.
-# mktables contains instructions for this, as well as other hints for using
-# older Unicode versions.
+# mktables contains instructions for this.
 
 # The *.txt files were copied from
 
 
 # (which always points to the latest version) with subdirectories 'extracted' and
 # 'auxiliary'.  Older versions are located under Public with an appropriate name.
+# They are also available via http at www.unicode.org/versions/
+#
 
 # The Unihan files were not included due to space considerations.  Also NOT
-# included were any *.html files.  It is possible to add the Unihan files, and
-# edit mktables (see instructions near its beginning) to look at them.
+# included were any *.html files.  It is possible to add the Unihan files and
+# have some properties from them automatically compiled.  By editing mktables
+# (see instructions near its beginning) you can add other Unihan properties.
 
 # The file named 'version' should exist and be a single line with the Unicode
 # version, like:
+#
 # 5.2.0
+#
+# (without the initial '# ')
 
 # To be 8.3 filesystem friendly, the names of some of the input files have been
 # changed from the values that are in the Unicode DB.  Not all of the Test
@@ -58,6 +61,8 @@ mv extracted/DerivedJoiningType.txt extracted/DJoinType.txt
 mv extracted/DerivedLineBreak.txt extracted/DLineBreak.txt
 mv extracted/DerivedNumericType.txt extracted/DNumType.txt
 mv extracted/DerivedNumericValues.txt extracted/DNumValues.txt
+rmdir extracted 2>/dev/null     # Will fail if non-empty, but if it is empty
+                                # was an early release that didn't have it.
 
 mv auxiliary/GraphemeBreakTest.txt auxiliary/GCBTest.txt
 mv auxiliary/LineBreakTest.txt auxiliary/LBTest.txt
@@ -81,14 +86,31 @@ mv Unihan_Variants.txt UnihanVariants.txt
 # filesystems.
 
 # mktables is used to generate the tables used by the rest of Perl.  It will
-# warn you about any *.txt files in the directory substructure that it doesn't
-# know about.  You should remove any so-identified, or edit mktables to add
-# them to its lists to process.  You can run
+# warn you about any *.txt and *.html files in the directory substructure that
+# it doesn't know about.  You should remove any so-identified, or edit mktables
+# to add them to its lists to process.  You can run
 #
 #    mktables -globlist
 #
-#to have it try to process these tables generically.
-#
+# to have it try to process these tables generically.
+
+# COMPILING ON OLDER UNICODE VERSIONS
+#
+# To compile perl for use with an older Unicode release, delete everything in
+# the lib/unicore directory except mktables and Makefile.  Then download the
+# Unicode-supplied files for the desired version to that directory  (A url for
+# these is given earlier in this file).  Then create the 'version' file with a
+# single line, like '6.1.0'.  Do a 'make test' from the project level.  You
+# will get some porting errors for needing to regen.  Regenerate what it tells
+# you are needed, and make test again.  If you compile an old enough version,
+# you will also have to download a few files from later Unicode versions,
+# following the instructions that will be given if warranted.  It should
+# compile in any release without warnings, except for some casing conflicts
+# in Unicode 2.1.8, and some extraneous files will show up in very early
+# releases of the form qr/diff.*\.txt/.  If you add Unihan.txt, one line is in error in
+#
+# Other glitches are noted in mktables under 'UNICODE VERSIONS NOTES'
+
 # FOR PUMPKINS
 #
 # The files are inter-related.  If you take the latest UnicodeData.txt, for
index 572c299..27cb45a 100644 (file)
@@ -4,17 +4,9 @@
 # Any files created or read by this program should be listed in 'mktables.lst'
 # Use -makelist to regenerate it.
 
-# Needs 'no overloading' to run faster on miniperl.  Code commented out at the
-# subroutine objaddr can be used instead to work as far back (untested) as
-# 5.8: needs pack "U".  But almost all occurrences of objaddr have been
-# removed in favor of using 'no overloading'.  You also would have to go
-# through and replace occurrences like:
-#       my $addr = do { no overloading; pack 'J', $self; }
-# with
-#       my $addr = main::objaddr $self;
-# (or reverse commit 9b01bafde4b022706c3d6f947a0963f821b2e50b
-# that instituted the change to main::objaddr, and subsequent commits that
-# changed 0+$self to pack 'J', $self.)
+# There was an attempt when this was first rewritten to make it 5.8
+# compatible, but that has now been abandoned, and newer constructs are used
+# as convenient.
 
 my $start_time;
 BEGIN { # Get the time the script started running; do it at compilation to
@@ -32,6 +24,7 @@ use File::Path;
 use File::Spec;
 use Text::Tabs;
 use re "/aa";
+use feature 'state';
 
 sub DEBUG () { 0 }  # Set to 0 for production; 1 for development
 my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
@@ -292,8 +285,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # As mentioned earlier, some properties are given in more than one file.  In
 # particular, the files in the extracted directory are supposedly just
 # reformattings of the others.  But they contain information not easily
-# derivable from the other files, including results for Unihan, which this
-# program doesn't ordinarily look at, and for unassigned code points.  They
+# derivable from the other files, including results for Unihan (which isn't
+# usually available to this program) and for unassigned code points.  They
 # also have historically had errors or been incomplete.  In an attempt to
 # create the best possible data, this program thus processes them first to
 # glean information missing from the other files; then processes those other
@@ -411,24 +404,19 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 #
 # A NOTE ON UNIHAN
 #
-# This program can generate tables from the Unihan database.  But it doesn't
-# by default, letting the CPAN module Unicode::Unihan handle them.  Prior to
-# version 5.2, this database was in a single file, Unihan.txt.  In 5.2 the
-# database was split into 8 different files, all beginning with the letters
-# 'Unihan'.  This program will read those file(s) if present, but it needs to
-# know which of the many properties in the file(s) should have tables created
-# for them.  It will create tables for any properties listed in
-# PropertyAliases.txt and PropValueAliases.txt, plus any listed in the
-# @cjk_properties array and the @cjk_property_values array.  Thus, if a
-# property you want is not in those files of the release you are building
-# against, you must add it to those two arrays.  Starting in 4.0, the
-# Unicode_Radical_Stroke was listed in those files, so if the Unihan database
-# is present in the directory, a table will be generated for that property.
-# In 5.2, several more properties were added.  For your convenience, the two
-# arrays are initialized with all the 6.0 listed properties that are also in
-# earlier releases.  But these are commented out.  You can just uncomment the
-# ones you want, or use them as a template for adding entries for other
-# properties.
+# This program can generate tables from the Unihan database.  But that db
+# isn't normally available, so it is marked as optional.  Prior to version
+# 5.2, this database was in a single file, Unihan.txt.  In 5.2 the database
+# was split into 8 different files, all beginning with the letters 'Unihan'.
+# If you plunk those files down into the directory mktables ($0) is in, this
+# program will read them and automatically create tables for the properties
+# from it that are listed in PropertyAliases.txt and PropValueAliases.txt,
+# plus any you add to the @cjk_properties array and the @cjk_property_values
+# array, being sure to add necessary '# @missings' lines to the latter.  For
+# Unicode versions earlier than 5.2, most of the Unihan properties are not
+# listed at all in PropertyAliases nor PropValueAliases.  This program assumes
+# for these early releases that you want the properties that are specified in
+# the 5.2 release.
 #
 # You may need to adjust the entries to suit your purposes.  setup_unihan(),
 # and filter_unihan_line() are the functions where this is done.  This program
@@ -437,8 +425,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 #
 # There is a bug in the 3.2 data file in which some values for the
 # kPrimaryNumeric property have commas and an unexpected comment.  A filter
-# could be added for these; or for a particular installation, the Unihan.txt
-# file could be edited to fix them.
+# could be added to correct these; or for a particular installation, the
+# Unihan.txt file could be edited to fix them.
 #
 # HOW TO ADD A FILE TO BE PROCESSED
 #
@@ -484,13 +472,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # handled by Unicode::Normalize, nor will it compile when presented a version
 # that has them.  However, you can trivially get it to compile by simply
 # ignoring those decompositions, by changing the croak to a carp.  At the time
-# of this writing, the line (in cpan/Unicode-Normalize/mkheader) reads
+# of this writing, the line (in cpan/Unicode-Normalize/Normalize.pm or
+# cpan/Unicode-Normalize/mkheader) reads
 #
 #   croak("Weird Canonical Decomposition of U+$h");
 #
 # Simply comment it out.  It will compile, but will not know about any three
-# character decompositions.  If using the .pm version, there is a similar
-# line.
+# character decompositions.
 
 # The number of code points in \p{alpha=True} halved in 2.1.9.  It turns out
 # that the reason is that the CJK block starting at 4E00 was removed from
@@ -513,10 +501,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # name for the class, it would not have been affected, but if it used the
 # mnemonic, it would have been.
 #
-# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that code
+# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that, code
 # points which eventually came to have this script property value, instead
 # mapped to "Unknown".  But in the next release all these code points were
 # moved to \p{sc=common} instead.
+
+# The tests furnished  by Unicode for testing WordBreak and SentenceBreak
+# generate errors in 5.0 and earlier.
 #
 # The default for missing code points for BidiClass is complicated.  Starting
 # in 3.1.1, the derived file DBidiClass.txt handles this, but this program
@@ -596,8 +587,8 @@ our $to_trace = 0;
                                     || $caller_name eq 'trace');
 
         my $output = "";
+        #print STDERR __LINE__, ": ", join ", ", @input, "\n";
         foreach my $string (@input) {
-            #print STDERR __LINE__, ": ", join ", ", @input, "\n";
             if (ref $string eq 'ARRAY' || ref $string eq 'HASH') {
                 $output .= simple_dumper($string);
             }
@@ -623,10 +614,11 @@ our $to_trace = 0;
 
 # This is for a rarely used development feature that allows you to compare two
 # versions of the Unicode standard without having to deal with changes caused
-# by the code points introduced in the later version.  Change the 0 to a
-# string containing a SINGLE dotted Unicode release number (e.g. "2.1").  Only
-# code points introduced in that release and earlier will be used; later ones
-# are thrown away.  You use the version number of the earliest one you want to
+# by the code points introduced in the later version.  You probably also want
+# to use the -annotate option when using this.  Change the 0 to a string
+# containing a SINGLE dotted Unicode release number (e.g. "2.1").  Only code
+# points introduced in that release and earlier will be used; later ones are
+# thrown away.  You use the version number of the earliest one you want to
 # compare; then run this program on directory structures containing each
 # release, and compare the outputs.  These outputs will therefore include only
 # the code points common to both releases, and you can see the changes caused
@@ -861,33 +853,8 @@ if ($v_version gt v3.2.0) {
                                 'Canonical_Combining_Class=Attached_Below_Left'
 }
 
-# These are listed in the Property aliases file in 6.0, but Unihan is ignored
-# unless explicitly added.
-if ($v_version ge v5.2.0) {
-    my $unihan = 'Unihan; remove from list if using Unihan';
-    foreach my $table (qw (
-                           kAccountingNumeric
-                           kOtherNumeric
-                           kPrimaryNumeric
-                           kCompatibilityVariant
-                           kIICore
-                           kIRG_GSource
-                           kIRG_HSource
-                           kIRG_JSource
-                           kIRG_KPSource
-                           kIRG_MSource
-                           kIRG_KSource
-                           kIRG_TSource
-                           kIRG_USource
-                           kIRG_VSource
-                           kRSUnicode
-                        ))
-    {
-        $why_suppress_if_empty_warn_if_not{$table} = $unihan;
-    }
-}
-
-# Enum values for to_output_map() method in the Map_Table package.
+# Enum values for to_output_map() method in the Map_Table package. (0 is don't
+# output)
 my $EXTERNAL_MAP = 1;
 my $INTERNAL_MAP = 2;
 my $OUTPUT_ADJUSTED = 3;
@@ -913,13 +880,6 @@ my %global_to_output_map = (
     Decomposition_Type => 0,
 );
 
-# Properties that this program ignores.
-my @unimplemented_properties;
-
-# With this release, it is automatically handled if the Unihan db is
-# downloaded
-push @unimplemented_properties, 'Unicode_Radical_Stroke' if $v_version le v5.2.0;
-
 # There are several types of obsolete properties defined by Unicode.  These
 # must be hand-edited for every new Unicode release.
 my %why_deprecated;  # Generates a deprecated warning message if used.
@@ -959,8 +919,6 @@ my %why_obsolete;    # Documentation only
         # existence is not noted in the comment.
         'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or prop_invmap() or charprop() in Unicode::UCD::',
 
-        'Indic_Matra_Category' => "Withdrawn by Unicode while still provisional",
-
         # Don't suppress ISO_Comment, as otherwise special handling is needed
         # to differentiate between it and gc=c, which can be written as 'isc',
         # which is the same characters as ISO_Comment's short name.
@@ -1046,45 +1004,13 @@ if ($v_version ge v6.0.0) {
 my @output_mapped_properties = split "\n", <<END;
 END
 
-# If you are using the Unihan database in a Unicode version before 5.2, you
-# need to add the properties that you want to extract from it to this table.
-# For your convenience, the properties in the 6.0 PropertyAliases.txt file are
-# listed, commented out
+# If you want more Unihan properties than the default, you need to add them to
+# these arrays.  Depending on the property type, @missing lines might have to
+# be added to the second array.  A sample entry would be (including the '#'):
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
 my @cjk_properties = split "\n", <<'END';
-#cjkAccountingNumeric; kAccountingNumeric
-#cjkOtherNumeric; kOtherNumeric
-#cjkPrimaryNumeric; kPrimaryNumeric
-#cjkCompatibilityVariant; kCompatibilityVariant
-#cjkIICore ; kIICore
-#cjkIRG_GSource; kIRG_GSource
-#cjkIRG_HSource; kIRG_HSource
-#cjkIRG_JSource; kIRG_JSource
-#cjkIRG_KPSource; kIRG_KPSource
-#cjkIRG_KSource; kIRG_KSource
-#cjkIRG_TSource; kIRG_TSource
-#cjkIRG_USource; kIRG_USource
-#cjkIRG_VSource; kIRG_VSource
-#cjkRSUnicode; kRSUnicode                ; Unicode_Radical_Stroke; URS
 END
-
-# Similarly for the property values.  For your convenience, the lines in the
-# 6.0 PropertyAliases.txt file are listed.  Just remove the first BUT NOT both
-# '#' marks (for Unicode versions before 5.2)
 my @cjk_property_values = split "\n", <<'END';
-## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
-## @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
-## @missing: 0000..10FFFF; cjkIICore; <none>
-## @missing: 0000..10FFFF; cjkIRG_GSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_HSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_JSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KPSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_TSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_USource; <none>
-## @missing: 0000..10FFFF; cjkIRG_VSource; <none>
-## @missing: 0000..10FFFF; cjkOtherNumeric; NaN
-## @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
-## @missing: 0000..10FFFF; cjkRSUnicode; <none>
 END
 
 # The input files don't list every code point.  Those not listed are to be
@@ -1109,7 +1035,7 @@ my %default_mapping = (
     Decomposition_Type => 'None',
     East_Asian_Width => "Neutral",
     FC_NFKC_Closure => $CODE_POINT,
-    General_Category => 'Cn',
+    General_Category => ($v_version le 6.3.0) ? 'Cn' : 'Unassigned',
     Grapheme_Cluster_Break => 'Other',
     Hangul_Syllable_Type => 'NA',
     ISO_Comment => "",
@@ -1140,39 +1066,6 @@ my %default_mapping = (
     Word_Break => 'Other',
 );
 
-# Below are files that Unicode furnishes, but this program ignores, and why.
-# NormalizationCorrections.txt requires some more explanation.  It documents
-# the cumulative fixes to erroneous normalizations in earlier Unicode
-# versions.  Its main purpose is so that someone running on an earlier version
-# can use this file to override what got published in that earlier release.
-# It would be easy for mktables to read and handle this file.  But all the
-# corrections in it should already be in the other files for the release it
-# is.  To get it to actually mean something useful, someone would have to be
-# using an earlier Unicode release, and copy it to the files for that release
-# and recomplile.  So far there has been no demand to do that, so this hasn't
-# been implemented.
-my %ignored_files = (
-    'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points',
-    'Index.txt' => 'Alphabetical index of Unicode characters',
-    'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl',
-    'NamesList.txt' => 'Annotated list of characters',
-    'NamesList.html' => 'Describes the format and contents of F<NamesList.txt>',
-    'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base',
-    'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)',
-    'ReadMe.txt' => 'Documentation',
-    'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized.  This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
-    'StandardizedVariants.html' => 'Provides a visual display of the standard variant sequences derived from F<StandardizedVariants.txt>.',
-    'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
-    'USourceData.txt' => 'Documentation of status and cross reference of proposals for encoding by Unicode of Unihan characters',
-    'USourceGlyphs.pdf' => 'Pictures of the characters in F<USourceData.txt>',
-    'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
-);
-
-my %skipped_files;  # List of files that we skip
-
 ### End of externally interesting definitions, except for @input_file_objects
 
 my $HEADER=<<"EOF";
@@ -1199,7 +1092,9 @@ my $DEVELOPMENT_ONLY=<<"EOF";
 
 EOF
 
-my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF";
+my $MAX_UNICODE_CODEPOINT_STRING = ($v_version ge v2.0.0)
+                                   ? "10FFFF"
+                                   : "FFFF";
 my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING;
 my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1;
 
@@ -1229,8 +1124,7 @@ my $code_point_re = qr/\b$run_on_code_point_re/;
 # defaults for code points not listed (i.e., missing) in the file.  The code
 # depends on this ending with a semi-colon, so it can assume it is a valid
 # field when the line is split() by semi-colons
-my $missing_defaults_prefix =
-            qr/^#\s+\@missing:\s+0000\.\.$MAX_UNICODE_CODEPOINT_STRING\s*;/;
+my $missing_defaults_prefix = qr/^#\s+\@missing:\s+0000\.\.10FFFF\s*;/;
 
 # Property types.  Unicode has more types, but these are sufficient for our
 # purposes.
@@ -1307,11 +1201,15 @@ my $OBSOLETE = 'O';
 my $a_bold_obsolete = "an 'B<$OBSOLETE>'";
 my $A_bold_obsolete = "An 'B<$OBSOLETE>'";
 
+# Aliases can also have an extra status:
+my $INTERNAL_ALIAS = 'P';
+
 my %status_past_participles = (
     $DISCOURAGED => 'discouraged',
     $STABILIZED => 'stabilized',
     $OBSOLETE => 'obsolete',
     $DEPRECATED => 'deprecated',
+    $INTERNAL_ALIAS => 'reserved for Perl core internal use only',
 );
 
 # Table fates.  These are somewhat ordered, so that fates < $MAP_PROXIED should be
@@ -1374,18 +1272,21 @@ my %loose_to_file_of;       # loosely maps table names to their respective
                             # files
 my %stricter_to_file_of;    # same; but for stricter mapping.
 my %loose_property_to_file_of; # Maps a loose property name to its map file
+my %strict_property_to_file_of; # Same, but strict
 my @inline_definitions = "V0"; # Each element gives a definition of a unique
                             # inversion list.  When a definition is inlined,
                             # its value in the hash it's in (one of the two
                             # defined just above) will include an index into
                             # this array.  The 0th element is initialized to
-                            # the definition for a zero length invwersion list
+                            # the definition for a zero length inversion list
 my %file_to_swash_name;     # Maps the file name to its corresponding key name
                             # in the hash %utf8::SwashInfo
 my %nv_floating_to_rational; # maps numeric values floating point numbers to
                              # their rational equivalent
 my %loose_property_name_of; # Loosely maps (non_string) property names to
                             # standard form
+my %strict_property_name_of; # Strictly maps (non_string) property names to
+                            # standard form
 my %string_property_loose_to_name; # Same, for string properties.
 my %loose_defaults;         # keys are of form "prop=value", where 'prop' is
                             # the property name in standard loose form, and
@@ -1395,12 +1296,16 @@ my %loose_to_standard_value; # loosely maps table names to the canonical
                             # alias for them
 my %ambiguous_names;        # keys are alias names (in standard form) that
                             # have more than one possible meaning.
+my %combination_property;   # keys are alias names (in standard form) that
+                            # have both a map table, and a binary one that
+                            # yields true for all non-null maps.
 my %prop_aliases;           # Keys are standard property name; values are each
                             # one's aliases
 my %prop_value_aliases;     # Keys of top level are standard property name;
                             # values are keys to another hash,  Each one is
                             # one of the property's values, in standard form.
                             # The values are that prop-val's aliases.
+my %skipped_files;          # List of files that we skip
 my %ucd_pod;    # Holds entries that will go into the UCD section of the pod
 
 # Most properties are immune to caseless matching, otherwise you would get
@@ -1477,6 +1382,8 @@ my @named_sequences;       # NamedSequences.txt contents.
 my %potential_files;       # Generated list of all .txt files in the directory
                            # structure so we can warn if something is being
                            # ignored.
+my @missing_early_files;   # Generated list of absent files that we need to
+                           # proceed in compiling this early Unicode version
 my @files_actually_output; # List of files we generated.
 my @more_Names;            # Some code point names are compound; this is used
                            # to store the extra components of them.
@@ -1493,6 +1400,7 @@ my $block;
 my $perl_charname;
 my $print;
 my $All;
+my $Assigned;   # All assigned characters in this Unicode release
 my $script;
 
 # Are there conflicting names because of beginning with 'In_', or 'Is_'
@@ -1616,7 +1524,7 @@ sub populate_char_info ($) {
         elsif ($gc-> table('Unassigned')->contains($i)) {
             $annotate_char_type[$i] = $UNASSIGNED_TYPE;
             $printable[$i] = 0;
-            if ($v_version lt v2.0.0) { # No blocks in earliest releases
+            if (defined $block) { # No blocks in earliest releases
                 $viacode[$i] = 'Unassigned';
                 $end = $gc-> table('Unassigned')->containing_range($i)->end;
             }
@@ -1633,12 +1541,7 @@ sub populate_char_info ($) {
                                                     containing_range($i)->end);
             }
         }
-        elsif ($v_version lt v2.0.0) {  # No surrogates in earliest releases
-            $viacode[$i] = $gc->value_of($i);
-            $annotate_char_type[$i] = $UNKNOWN_TYPE;
-            $printable[$i] = 0;
-        }
-        elsif ($gc-> table('Surrogate')->contains($i)) {
+        elsif ($perl->table('_Perl_Surrogate')->contains($i)) {
             $viacode[$i] = 'Surrogate';
             $annotate_char_type[$i] = $SURROGATE_TYPE;
             $printable[$i] = 0;
@@ -2110,6 +2013,7 @@ package Input_file;
 # while(next_line()) {...} loop.
 #
 # You can also set up handlers to
+#   0) call during object construction time, after everything else is done
 #   1) call before the first line is read, for pre processing
 #   2) call to adjust each line of the input before the main handler gets
 #      them.  This can be automatically generated, if appropriately simple
@@ -2121,19 +2025,29 @@ package Input_file;
 # each_line_handler()s.  So, if the format of the line is not in the desired
 # format for the main handler, these are used to do that adjusting.  They can
 # be stacked (by enclosing them in an [ anonymous array ] in the constructor,
-# so the $_ output of one is used as the input to the next.  None of the other
-# handlers are stackable, but could easily be changed to be so.
+# so the $_ output of one is used as the input to the next.  The eof handler
+# is also stackable, but none of the others are, but could easily be changed
+# to be so.
+#
+# Some properties are used by the Perl core but aren't defined until later
+# Unicode releases.  The perl interpreter would have problems working when
+# compiled with an earlier Unicode version that doesn't have them, so we need
+# to define them somehow for those releases.  The 'Early' constructor
+# parameter can be used to automatically handle this.  It is essentially
+# ignored if the Unicode version being compiled has a data file for this
+# property.  Either code to execute or a file to read can be specified.
+# Details are at the %early definition.
 #
 # Most of the handlers can call insert_lines() or insert_adjusted_lines()
 # which insert the parameters as lines to be processed before the next input
-# file line is read.  This allows the EOF handler to flush buffers, for
+# file line is read.  This allows the EOF handler(s) to flush buffers, for
 # example.  The difference between the two routines is that the lines inserted
 # by insert_lines() are subjected to the each_line_handler()s.  (So if you
-# called it from such a handler, you would get infinite recursion.)  Lines
-# inserted by insert_adjusted_lines() go directly to the main handler without
-# any adjustments.  If the  post-processing handler calls any of these, there
-# will be no effect.  Some error checking for these conditions could be added,
-# but it hasn't been done.
+# called it from such a handler, you would get infinite recursion without some
+# mechanism to prevent that.)  Lines inserted by insert_adjusted_lines() go
+# directly to the main handler without any adjustments.  If the
+# post-processing handler calls any of these, there will be no effect.  Some
+# error checking for these conditions could be added, but it hasn't been done.
 #
 # carp_bad_line() should be called to warn of bad input lines, which clears $_
 # to prevent further processing of the line.  This routine will output the
@@ -2169,10 +2083,16 @@ sub trace { return main::trace(@_); }
     main::set_access('property', \%property, qw{ c r });
 
     my %optional;
-    # If this is true, the file is optional.  If not present, no warning is
-    # output.  If it is present, the string given by this parameter is
-    # evaluated, and if false the file is not processed.
-    main::set_access('optional', \%optional, 'c', 'r');
+    # This is either an unsigned number, or a list of property names.  In the
+    # former case, if it is non-zero, it means the file is optional, so if the
+    # file is absent, no warning about that is output.  In the latter case, it
+    # is a list of properties that the file (exclusively) defines.  If the
+    # file is present, tables for those properties will be produced; if
+    # absent, none will, even if they are listed elsewhere (namely
+    # PropertyAliases.txt and PropValueAliases.txt) as being in this release,
+    # and no warnings will be raised about them not being available.  (And no
+    # warning about the file itself will be raised.)
+    main::set_access('optional', \%optional, qw{ c readable_array } );
 
     my %non_skip;
     # This is used for debugging, to skip processing of all but a few input
@@ -2181,16 +2101,19 @@ sub trace { return main::trace(@_); }
     main::set_access('non_skip', \%non_skip, 'c');
 
     my %skip;
-    # This is used to skip processing of this input file semi-permanently,
-    # when it evaluates to true.  The value should be the reason the file is
-    # being skipped.  It is used for files that we aren't planning to process
-    # anytime soon, but want to allow to be in the directory and not raise a
-    # message that we are not handling.  Mostly for test files.  This is in
-    # contrast to the non_skip element, which is supposed to be used very
-    # temporarily for debugging.  Sets 'optional' to 1.  Also, files that we
-    # pretty much will never look at can be placed in the global
-    # %ignored_files instead.  Ones used here will be added to %skipped files
-    main::set_access('skip', \%skip, 'c');
+    # This is used to skip processing of this input file (semi-) permanently.
+    # The value should be the reason the file is being skipped.  It is used
+    # for files that we aren't planning to process anytime soon, but want to
+    # allow to be in the directory and be checked for their names not
+    # conflicting with any other files on a DOS 8.3 name filesystem, but to
+    # not otherwise be processed, and to not raise a warning about not being
+    # handled.  In the constructor call, any value that evaluates to a numeric
+    # 0 or undef means don't skip.  Any other value is a string giving the
+    # reason it is being skippped, and this will appear in generated pod.
+    # However, an empty string reason will suppress the pod entry.
+    # Internally, calls that evaluate to numeric 0 are changed into undef to
+    # distinguish them from an empty string call.
+    main::set_access('skip', \%skip, 'c', 'r');
 
     my %each_line_handler;
     # list of subroutines to look at and filter each non-comment line in the
@@ -2221,21 +2144,28 @@ sub trace { return main::trace(@_); }
     main::set_access('has_missings_defaults',
                                         \%has_missings_defaults, qw{ c r });
 
+    my %construction_time_handler;
+    # Subroutine to call at the end of the new method.  If undef, no such
+    # handler is called.
+    main::set_access('construction_time_handler',
+                                        \%construction_time_handler, qw{ c });
+
     my %pre_handler;
     # Subroutine to call before doing anything else in the file.  If undef, no
     # such handler is called.
     main::set_access('pre_handler', \%pre_handler, qw{ c });
 
     my %eof_handler;
-    # Subroutine to call upon getting an EOF on the input file, but before
+    # Subroutines to call upon getting an EOF on the input file, but before
     # that is returned to the main handler.  This is to allow buffers to be
     # flushed.  The handler is expected to call insert_lines() or
     # insert_adjusted() with the buffered material
-    main::set_access('eof_handler', \%eof_handler, qw{ c });
+    main::set_access('eof_handler', \%eof_handler, qw{ c });
 
     my %post_handler;
     # Subroutine to call after all the lines of the file are read in and
-    # processed.  If undef, no such handler is called.
+    # processed.  If undef, no such handler is called.  Note that this cannot
+    # add lines to be processed; instead use eof_handler
     main::set_access('post_handler', \%post_handler, qw{ c });
 
     my %progress_message;
@@ -2263,6 +2193,69 @@ sub trace { return main::trace(@_); }
     # storage of '@missing' defaults lines
     main::set_access('missings', \%missings);
 
+    my %early;
+    # Used for properties that must be defined (for Perl's purposes) on
+    # versions of Unicode earlier than Unicode itself defines them.  The
+    # parameter is an array (it would be better to be a hash, but not worth
+    # bothering about due to its rare use).
+    #
+    # The first element is either a code reference to call when in a release
+    # earlier than the Unicode file is available in, or it is an alternate
+    # file to use instead of the non-existent one.  This file must have been
+    # plunked down in the same directory as mktables.  Should you be compiling
+    # on a release that needs such a file, mktables will abort the
+    # compilation, and tell you where to get the necessary file(s), and what
+    # name(s) to use to store them as.
+    # In the case of specifying an alternate file, the array must contain two
+    # further elements:
+    #
+    # [1] is the name of the property that will be generated by this file.
+    # The class automatically takes the input file and excludes any code
+    # points in it that were not assigned in the Unicode version being
+    # compiled.  It then uses this result to define the property in the given
+    # version.  Since the property doesn't actually exist in the Unicode
+    # version being compiled, this should be a name accessible only by core
+    # perl.  If it is the same name as the regular property, the constructor
+    # will mark the output table as a $PLACEHOLDER so that it doesn't actually
+    # get output, and so will be unusable by non-core code.  Otherwise it gets
+    # marked as $INTERNAL_ONLY.
+    #
+    # [2] is a property value to assign (only when compiling Unicode 1.1.5) to
+    # the Hangul syllables in that release (which were ripped out in version
+    # 2) for the given property .  (Hence it is ignored except when compiling
+    # version 1.  You only get one value that applies to all of them, which
+    # may not be the actual reality, but probably nobody cares anyway for
+    # these obsolete characters.)
+    #
+    # Not all files can be handled in the above way, and so the code ref
+    # alternative is available.  It can do whatever it needs to.  The other
+    # array elements are optional in this case, and the code is free to use or
+    # ignore them if they are present.
+    #
+    # Internally, the constructor unshifts a 0 or 1 onto this array to
+    # indicate if an early alternative is actually being used or not.  This
+    # makes for easier testing later on.
+    main::set_access('early', \%early, 'c');
+
+    my %required_even_in_debug_skip;
+    # debug_skip is used to speed up compilation during debugging by skipping
+    # processing files that are not needed for the task at hand.  However,
+    # some files pretty much can never be skipped, and this is used to specify
+    # that this is one of them.  In order to skip this file, the call to the
+    # constructor must be edited to comment out this parameter.
+    main::set_access('required_even_in_debug_skip',
+                     \%required_even_in_debug_skip, 'c');
+
+    my %withdrawn;
+    # Some files get removed from the Unicode DB.  This is a version object
+    # giving the first release without this file.
+    main::set_access('withdrawn', \%withdrawn, 'c');
+
+    my %in_this_release;
+    # Calculated value from %first_released and %withdrawn.  Are we compiling
+    # a Unicode release which includes this file?
+    main::set_access('in_this_release', \%in_this_release);
+
     sub _next_line;
     sub _next_line_with_remapped_range;
 
@@ -2275,22 +2268,23 @@ sub trace { return main::trace(@_); }
         # Set defaults
         $handler{$addr} = \&main::process_generic_property_file;
         $non_skip{$addr} = 0;
-        $skip{$addr} = 0;
+        $skip{$addr} = undef;
         $has_missings_defaults{$addr} = $NO_DEFAULTS;
         $handle{$addr} = undef;
         $added_lines{$addr} = [ ];
         $remapped_lines{$addr} = [ ];
         $each_line_handler{$addr} = [ ];
+        $eof_handler{$addr} = [ ];
         $errors{$addr} = { };
         $missings{$addr} = [ ];
+        $early{$addr} = [ ];
+        $optional{$addr} = [ ];
 
         # Two positional parameters.
         return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
         $file{$addr} = main::internal_file_to_platform(shift);
         $first_released{$addr} = shift;
 
-        undef $file{$addr} if $first_released{$addr} gt $v_version;
-
         # The rest of the arguments are key => value pairs
         # %constructor_fields has been set up earlier to list all possible
         # ones.  Either set or push, depending on how the default has been set
@@ -2322,30 +2316,206 @@ sub trace { return main::trace(@_); }
             delete $args{$key};
         };
 
-        # If the file has a property for it, it means that the property is not
-        # listed in the file's entries.  So add a handler to the list of line
-        # handlers to insert the property name into the lines, to provide a
-        # uniform interface to the final processing subroutine.
-        # the final code doesn't have to worry about that.
-        if ($property{$addr}) {
-            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
+        $non_skip{$addr} = 1 if $required_even_in_debug_skip{$addr};
+
+        # Convert 0 (meaning don't skip) to undef
+        undef $skip{$addr} unless $skip{$addr};
+
+        # Handle the case where this file is optional
+        my $pod_message_for_non_existent_optional = "";
+        if ($optional{$addr}->@*) {
+
+            # First element is the pod message
+            $pod_message_for_non_existent_optional
+                                                = shift $optional{$addr}->@*;
+            # Convert a 0 'Optional' argument to an empty list to make later
+            # code more concise.
+            if (   $optional{$addr}->@*
+                && $optional{$addr}->@* == 1
+                && $optional{$addr}[0] ne ""
+                && $optional{$addr}[0] !~ /\D/
+                && $optional{$addr}[0] == 0)
+            {
+                $optional{$addr} = [ ];
+            }
+            else {  # But if the only element doesn't evaluate to 0, make sure
+                    # that this file is indeed considered optional below.
+                unshift $optional{$addr}->@*, 1;
+            }
+        }
+
+        my $progress;
+        my $function_instead_of_file = 0;
+
+        # If we are compiling a Unicode release earlier than the file became
+        # available, the constructor may have supplied a substitute
+        if ($first_released{$addr} gt $v_version && $early{$addr}->@*) {
+
+            # Yes, we have a substitute, that we will use; mark it so
+            unshift $early{$addr}->@*, 1;
+
+            # See the definition of %early for what the array elements mean.
+            # If we have a property this defines, create a table and default
+            # map for it now (at essentially compile time), so that it will be
+            # available for the whole of run time.  (We will want to add this
+            # name as an alias when we are using the official property name;
+            # but this must be deferred until run(), because at construction
+            # time the official names have yet to be defined.)
+            if ($early{$addr}[2]) {
+                my $fate = ($property{$addr}
+                            && $property{$addr} eq $early{$addr}[2])
+                          ? $PLACEHOLDER
+                          : $INTERNAL_ONLY;
+                my $prop_object = Property->new($early{$addr}[2],
+                                                Fate => $fate,
+                                                Perl_Extension => 1,
+                                                );
+
+                # Use the default mapping for the regular property for this
+                # substitute one.
+                if (    defined $property{$addr}
+                    &&  defined $default_mapping{$property{$addr}})
+                {
+                    $prop_object
+                        ->set_default_map($default_mapping{$property{$addr}});
+                }
+            }
+
+            if (ref $early{$addr}[1] eq 'CODE') {
+                $function_instead_of_file = 1;
+
+                # If the first element of the array is a code ref, the others
+                # are optional.
+                $handler{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2]
+                                                if defined $early{$addr}[2];
+                $progress = "substitute $file{$addr}";
+
+                undef $file{$addr};
+            }
+            else {  # Specifying a substitute file
+
+                if (! main::file_exists($early{$addr}[1])) {
+
+                    # If we don't see the substitute file, generate an error
+                    # message giving the needed things, and add it to the list
+                    # of such to output before actual processing happens
+                    # (hence the user finds out all of them in one run).
+                    # Instead of creating a general method for NameAliases,
+                    # hard-code it here, as there is unlikely to ever be a
+                    # second one which needs special handling.
+                    my $string_version = ($file{$addr} eq "NameAliases.txt")
+                                    ? 'at least 6.1 (the later, the better)'
+                                    : sprintf "%vd", $first_released{$addr};
+                    push @missing_early_files, <<END;
+'$file{$addr}' version $string_version should be copied to '$early{$addr}[1]'.
+END
+                    ;
+                    return;
+                }
+                $progress = $early{$addr}[1];
+                $progress .= ", substituting for $file{$addr}" if $file{$addr};
+                $file{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2];
+
+                # Ignore code points not in the version being compiled
+                push $each_line_handler{$addr}->@*, \&_exclude_unassigned;
+
+                if (   $v_version lt v2.0        # Hanguls in this release ...
+                    && defined $early{$addr}[3]) # ... need special treatment
+                {
+                    push $eof_handler{$addr}->@*, \&_fixup_obsolete_hanguls;
+                }
+            }
+
+            # And this substitute is valid for all releases.
+            $first_released{$addr} = v0;
+        }
+        else {  # Normal behavior
+            $progress = $file{$addr};
+            unshift $early{$addr}->@*, 0; # No substitute
         }
 
-        if ($non_skip{$addr} && ! $debug_skip && $verbosity) {
-            print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
+        my $file = $file{$addr};
+        $progress_message{$addr} = "Processing $progress"
+                                            unless $progress_message{$addr};
+
+        # A file should be there if it is within the window of versions for
+        # which Unicode supplies it
+        if ($withdrawn{$addr} && $withdrawn{$addr} le $v_version) {
+            $in_this_release{$addr} = 0;
+            $skip{$addr} = "";
         }
+        else {
+            $in_this_release{$addr} = $first_released{$addr} le $v_version;
 
-        # If skipping, set to optional, and add to list of ignored files,
-        # including its reason
-        if ($skip{$addr}) {
-            $optional{$addr} = 1;
-            $skipped_files{$file{$addr}} = $skip{$addr} if $file{$addr};
+            # Check that the file for this object (possibly using a substitute
+            # for early releases) exists or we have a function alternative
+            if (   ! $function_instead_of_file
+                && ! main::file_exists($file))
+            {
+                # Here there is nothing available for this release.  This is
+                # fine if we aren't expecting anything in this release.
+                if (! $in_this_release{$addr}) {
+                    $skip{$addr} = "";  # Don't remark since we expected
+                                        # nothing and got nothing
+                }
+                elsif ($optional{$addr}->@*) {
+
+                    # Here the file is optional in this release; Use the
+                    # passed in text to document this case in the pod.
+                    $skip{$addr} = $pod_message_for_non_existent_optional;
+                }
+                elsif (   $in_this_release{$addr}
+                       && ! defined $skip{$addr}
+                       && defined $file)
+                { # Doesn't exist but should.
+                    $skip{$addr} = "'$file' not found.  Possibly Big problems";
+                    Carp::my_carp($skip{$addr});
+                }
+            }
+            elsif ($debug_skip && ! defined $skip{$addr} && ! $non_skip{$addr})
+            {
+
+                # The file exists; if not skipped for another reason, and we are
+                # skipping most everything during debugging builds, use that as
+                # the skip reason.
+                $skip{$addr} = '$debug_skip is on'
+            }
+        }
+
+        if (   ! $debug_skip
+            && $non_skip{$addr}
+            && ! $required_even_in_debug_skip{$addr}
+            && $verbosity)
+        {
+            print "Warning: " . __PACKAGE__ . " constructor for $file has useless 'non_skip' in it\n";
+        }
+
+        # Here, we have figured out if we will be skipping this file or not.
+        # If so, we add any single property it defines to any passed in
+        # optional property list.  These will be dealt with at run time.
+        if (defined $skip{$addr}) {
+            if ($property{$addr}) {
+                push $optional{$addr}->@*, $property{$addr};
+            }
+        } # Otherwise, are going to process the file.
+        elsif ($property{$addr}) {
+
+            # If the file has a property defined in the constructor for it, it
+            # means that the property is not listed in the file's entries.  So
+            # add a handler (to the list of line handlers) to insert the
+            # property name into the lines, to provide a uniform interface to
+            # the final processing subroutine.
+            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
         }
         elsif ($properties{$addr}) {
 
-            # Add a handler for each line in the input so that it creates a
-            # separate input line for each property in those input lines, thus
-            # making them suitable for process_generic_property_file().
+            # Similarly, there may be more than one property represented on
+            # each line, with no clue but the constructor input what those
+            # might be.  Add a handler for each line in the input so that it
+            # creates a separate input line for each property in those input
+            # lines, thus making them suitable to handle generically.
 
             push @{$each_line_handler{$addr}},
                  sub {
@@ -2376,7 +2546,7 @@ sub trace { return main::trace(@_); }
                 };
         }
 
-        {   # On non-ascii platforms, we use a special handler
+        {   # On non-ascii platforms, we use a special pre-handler
             no strict;
             no warnings 'once';
             *next_line = (main::NON_ASCII_PLATFORM)
@@ -2384,6 +2554,9 @@ sub trace { return main::trace(@_); }
                          : *_next_line;
         }
 
+        &{$construction_time_handler{$addr}}($self)
+                                        if $construction_time_handler{$addr};
+
         return $self;
     }
 
@@ -2401,13 +2574,13 @@ sub trace { return main::trace(@_); }
         return __PACKAGE__ . " object for " . $self->file;
     }
 
-    # flag to make sure extracted files are processed early
-    my $seen_non_extracted_non_age = 0;
-
     sub run {
         # Process the input object $self.  This opens and closes the file and
         # calls all the handlers for it.  Currently,  this can only be called
-        # once per file, as it destroy's the EOF handler
+        # once per file, as it destroy's the EOF handlers
+
+        # flag to make sure extracted files are processed early
+        state $seen_non_extracted_non_age = 0;
 
         my $self = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -2416,61 +2589,14 @@ sub trace { return main::trace(@_); }
 
         my $file = $file{$addr};
 
-        # Don't process if not expecting this file (because released later
-        # than this Unicode version), and isn't there.  This means if someone
-        # copies it into an earlier version's directory, we will go ahead and
-        # process it.
-        return if $first_released{$addr} gt $v_version
-                  && (! defined $file || ! -e $file);
-
-        # If in debugging mode and this file doesn't have the non-skip
-        # flag set, and isn't one of the critical files, skip it.
-        if ($debug_skip
-            && $first_released{$addr} ne v0
-            && ! $non_skip{$addr})
-        {
-            print "Skipping $file in debugging\n" if $verbosity;
-            return;
-        }
-
-        # File could be optional
-        if ($optional{$addr}) {
-            return unless -e $file;
-            my $result = eval $optional{$addr};
-            if (! defined $result) {
-                Carp::my_carp_bug("Got '$@' when tried to eval $optional{$addr}.  $file Skipped.");
-                return;
-            }
-            if (! $result) {
-                if ($verbosity) {
-                    print STDERR "Skipping processing input file '$file' because '$optional{$addr}' is not true\n";
-                }
-                return;
-            }
-        }
-
-        if (! defined $file || ! -e $file) {
-
-            # If the file doesn't exist, see if have internal data for it
-            # (based on first_released being 0).
-            if ($first_released{$addr} eq v0) {
-                $handle{$addr} = 'pretend_is_open';
-            }
-            else {
-                if (! $optional{$addr}  # File could be optional
-                    && $v_version ge $first_released{$addr})
-                {
-                    print STDERR "Skipping processing input file '$file' because not found\n" if $v_version ge $first_released{$addr};
-                }
-                return;
-            }
+        if (! $file) {
+            $handle{$addr} = 'pretend_is_open';
         }
         else {
-
-            # Here, the file exists.  Some platforms may change the case of
-            # its name
             if ($seen_non_extracted_non_age) {
-                if ($file =~ /$EXTRACTED/i) {
+                if ($file =~ /$EXTRACTED/i) # Some platforms may change the
+                                            # case of the file's name
+                {
                     Carp::my_carp_bug(main::join_lines(<<END
 $file should be processed just after the 'Prop...Alias' files, and before
 anything not in the $EXTRACTED_DIR directory.  Proceeding, but the results may
@@ -2480,7 +2606,10 @@ END
                 }
             }
             elsif ($EXTRACTED_DIR
-                    && $first_released{$addr} ne v0
+
+                    # We only do this check for generic property files
+                    && $handler{$addr} == \&main::process_generic_property_file
+
                     && $file !~ /$EXTRACTED/i
                     && lc($file) ne 'dage.txt')
             {
@@ -2490,71 +2619,137 @@ END
                 $seen_non_extracted_non_age = 1;
             }
 
-            # And mark the file as having being processed, and warn if it
+            # Mark the file as having being processed, and warn if it
             # isn't a file we are expecting.  As we process the files,
             # they are deleted from the hash, so any that remain at the
             # end of the program are files that we didn't process.
             my $fkey = File::Spec->rel2abs($file);
-            my $expecting = delete $potential_files{lc($fkey)};
+            my $exists = delete $potential_files{lc($fkey)};
+
+            Carp::my_carp("Was not expecting '$file'.")
+                                    if $exists && ! $in_this_release{$addr};
+
+            # If there is special handling for compiling Unicode releases
+            # earlier than the first one in which Unicode defines this
+            # property ...
+            if ($early{$addr}->@* > 1) {
+
+                # Mark as processed any substitute file that would be used in
+                # such a release
+                $fkey = File::Spec->rel2abs($early{$addr}[1]);
+                delete $potential_files{lc($fkey)};
+
+                # As commented in the constructor code, when using the
+                # official property, we still have to allow the publicly
+                # inaccessible early name so that the core code which uses it
+                # will work regardless.
+                if (! $early{$addr}[0] && $early{$addr}->@* > 2) {
+                    my $early_property_name = $early{$addr}[2];
+                    if ($property{$addr} ne $early_property_name) {
+                        main::property_ref($property{$addr})
+                                            ->add_alias($early_property_name);
+                    }
+                }
+            }
+
+            # We may be skipping this file ...
+            if (defined $skip{$addr}) {
 
-            Carp::my_carp("Was not expecting '$file'.") if
-                    ! $expecting
-                    && ! defined $handle{$addr};
+                # If the file isn't supposed to be in this release, there is
+                # nothing to do
+                if ($in_this_release{$addr}) {
+
+                    # But otherwise, we may print a message
+                    if ($debug_skip) {
+                        print STDERR "Skipping input file '$file'",
+                                     " because '$skip{$addr}'\n";
+                    }
+
+                    # And add it to the list of skipped files, which is later
+                    # used to make the pod
+                    $skipped_files{$file} = $skip{$addr};
+
+                    # The 'optional' list contains properties that are also to
+                    # be skipped along with the file.  (There may also be
+                    # digits which are just placeholders to make sure it isn't
+                    # an empty list
+                    foreach my $property ($optional{$addr}->@*) {
+                        next unless $property =~ /\D/;
+                        my $prop_object = main::property_ref($property);
+                        next unless defined $prop_object;
+                        $prop_object->set_fate($SUPPRESSED, $skip{$addr});
+                    }
+                }
 
-            # Having deleted from expected files, we can quit if not to do
-            # anything.  Don't print progress unless really want verbosity
-            if ($skip{$addr}) {
-                print "Skipping $file.\n" if $verbosity >= $VERBOSE;
                 return;
             }
 
-            # Open the file, converting the slashes used in this program
-            # into the proper form for the OS
+            # Here, we are going to process the file.  Open it, converting the
+            # slashes used in this program into the proper form for the OS
             my $file_handle;
             if (not open $file_handle, "<", $file) {
                 Carp::my_carp("Can't open $file.  Skipping: $!");
-                return 0;
+                return;
             }
             $handle{$addr} = $file_handle; # Cache the open file handle
 
-            if ($v_version ge v3.2.0 && lc($file) ne 'unicodedata.txt') {
+            # If possible, make sure that the file is the correct version.
+            # (This data isn't available on early Unicode releases or in
+            # UnicodeData.txt.)  We don't do this check if we are using a
+            # substitute file instead of the official one (though the code
+            # could be extended to do so).
+            if ($in_this_release{$addr}
+                && ! $early{$addr}[0]
+                && lc($file) ne 'unicodedata.txt')
+            {
                 if ($file !~ /^Unihan/i) {
-                    $_ = <$file_handle>;
-                    if ($_ !~ / - $string_version \. /x) {
-                        chomp;
-                        $_ =~ s/^#\s*//;
-                        die Carp::my_carp("File '$file' is version '$_'.  It should be version $string_version");
+
+                    # The non-Unihan files started getting version numbers in
+                    # 3.2, but some files in 4.0 are unchanged from 3.2, and
+                    # marked as 3.2.  4.0.1 is the first version where there
+                    # are no files marked as being from less than 4.0, though
+                    # some are marked as 4.0.  In versions after that, the
+                    # numbers are correct.
+                    if ($v_version ge v4.0.1) {
+                        $_ = <$file_handle>;    # The version number is in the
+                                                # very first line
+                        if ($_ !~ / - $string_version \. /x) {
+                            chomp;
+                            $_ =~ s/^#\s*//;
+
+                            # 4.0.1 had some valid files that weren't updated.
+                            if (! ($v_version eq v4.0.1 && $_ =~ /4\.0\.0/)) {
+                                die Carp::my_carp("File '$file' is version "
+                                                . "'$_'.  It should be "
+                                                . "version $string_version");
+                            }
+                        }
                     }
                 }
-                else {
+                elsif ($v_version ge v6.0.0) { # Unihan
+
+                    # Unihan files didn't get accurate version numbers until
+                    # 6.0.  The version is somewhere in the first comment
+                    # block
                     while (<$file_handle>) {
                         if ($_ !~ /^#/) {
-                            Carp::my_carp_bug("Could not find the expected version info in file '$file'");
+                            Carp::my_carp_bug("Could not find the expected "
+                                            . "version info in file '$file'");
                             last;
                         }
                         chomp;
                         $_ =~ s/^#\s*//;
                         next if $_ !~ / version: /x;
                         last if $_ =~ /$string_version/;
-                        die Carp::my_carp("File '$file' is '$_'.  It should be version $string_version");
+                        die Carp::my_carp("File '$file' is version "
+                                        . "'$_'.  It should be "
+                                        . "version $string_version");
                     }
                 }
             }
         }
 
-        if ($verbosity >= $PROGRESS) {
-            if ($progress_message{$addr}) {
-                print "$progress_message{$addr}\n";
-            }
-            else {
-                # If using a virtual file, say so.
-                print "Processing ", (-e $file)
-                                       ? $file
-                                       : "substitute $file",
-                                     "\n";
-            }
-        }
-
+        print "$progress_message{$addr}\n" if $verbosity >= $PROGRESS;
 
         # Call any special handler for before the file.
         &{$pre_handler{$addr}}($self) if $pre_handler{$addr};
@@ -2742,11 +2937,11 @@ END
             return 1;
         } # End of looping through lines.
 
-        # If there is an EOF handler, call it (only once) and if it generates
+        # If there are EOF handlers, call each (only once) and if it generates
         # more lines to process go back in the loop to handle them.
-        if ($eof_handler{$addr}) {
-            &{$eof_handler{$addr}}($self);
-            $eof_handler{$addr} = "";   # Currently only get one shot at it.
+        while ($eof_handler{$addr}->@*) {
+            &{$eof_handler{$addr}[0]}($self);
+            shift $eof_handler{$addr}->@*;   # Currently only get one shot at it.
             goto LINE if $added_lines{$addr};
         }
 
@@ -2943,6 +3138,82 @@ END
         return @return;
     }
 
+    sub _exclude_unassigned {
+
+        # Takes the range in $_ and excludes code points that aren't assigned
+        # in this release
+
+        state $skip_inserted_count = 0;
+
+        # Ignore recursive calls.
+        if ($skip_inserted_count) {
+            $skip_inserted_count--;
+            return;
+        }
+
+        # Find what code points are assigned in this release
+        main::calculate_Assigned() if ! defined $Assigned;
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my ($range, @remainder)
+            = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+
+        # Examine the range.
+        if ($range =~ /^ ($code_point_re) (?:\.\. ($code_point_re) )? $/x)
+        {
+            my $low = hex $1;
+            my $high = (defined $2) ? hex $2 : $low;
+
+            # Split the range into subranges of just those code points in it
+            # that are assigned.
+            my @ranges = (Range_List->new(Initialize
+                              => Range->new($low, $high)) & $Assigned)->ranges;
+
+            # Do nothing if nothing in the original range is assigned in this
+            # release; handle normally if everything is in this release.
+            if (! @ranges) {
+                $_ = "";
+            }
+            elsif (@ranges != 1) {
+
+                # Here, some code points in the original range aren't in this
+                # release; @ranges gives the ones that are.  Create fake input
+                # lines for each of the ranges, and set things up so that when
+                # this routine is called on that fake input, it will do
+                # nothing.
+                $skip_inserted_count = @ranges;
+                my $remainder = join ";", @remainder;
+                for my $range (@ranges) {
+                    $self->insert_lines(sprintf("%04X..%04X;%s",
+                                    $range->start, $range->end, $remainder));
+                }
+                $_ = "";    # The original range is now defunct.
+            }
+        }
+
+        return;
+    }
+
+    sub _fixup_obsolete_hanguls {
+
+        # This is called only when compiling Unicode version 1.  All Unicode
+        # data for subsequent releases assumes that the code points that were
+        # Hangul syllables in this release only are something else, so if
+        # using such data, we have to override it
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $object = main::property_ref($property{$addr});
+        $object->add_map(0x3400, 0x4DFF,
+                         $early{$addr}[3],  # Passed-in value for these
+                         Replace => $UNCONDITIONALLY);
+    }
+
     sub _insert_property_into_line {
         # Add a property field to $_, if this file requires it.
 
@@ -3027,6 +3298,8 @@ package Multi_Default;
         #        .
         #        .
         #        'U'));
+        # It is best to leave the final value be the one that matches the
+        # above-Unicode code points.
 
         my $class = shift;
 
@@ -3732,7 +4005,7 @@ sub trace { return main::trace(@_); }
         #       => $MULTIPLE_BEFORE means that if this range duplicates an
         #                         existing one, but has a different value,
         #                         don't replace the existing one, but insert
-        #                         this, one so that the same range can occur
+        #                         this one so that the same range can occur
         #                         multiple times.  They are stored LIFO, so
         #                         that the final one inserted is the first one
         #                         returned in an ordered search of the table.
@@ -3747,6 +4020,7 @@ sub trace { return main::trace(@_); }
         #                         existing range, this one is discarded
         #                         (leaving the existing one in its original,
         #                         higher priority position
+        #       => $CROAK         Die with an error if is already there
         #       => anything else  is the same as => $IF_NOT_EQUIVALENT
         #
         # "same value" means identical for non-type-0 ranges, and it means
@@ -3835,7 +4109,7 @@ sub trace { return main::trace(@_); }
 
                 # Here, the new range starts just after the current highest in
                 # the range list, and they have the same type and value.
-                # Extend the current range to incorporate the new one.
+                # Extend the existing range to incorporate the new one.
                 @{$r}[-1]->set_end($end);
             }
 
@@ -4228,7 +4502,7 @@ sub trace { return main::trace(@_); }
             # In other words,
             #   r[$i-1]->end < $start <= r[$i]->end
             # And:
-            #   r[$i-1]->end < $start <= $end <= r[$j+1]->start
+            #   r[$i-1]->end < $start <= $end < r[$j+1]->start
             #
             # Also:
             #   $clean_insert is a boolean which is set true if and only if
@@ -5093,6 +5367,7 @@ sub trace { return main::trace(@_); }
         my $note = delete $args{'Note'};
         my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
         my $perl_extension = delete $args{'Perl_Extension'};
+        my $suppression_reason = delete $args{'Suppression_Reason'};
 
         # Shouldn't have any left over
         Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5134,11 +5409,12 @@ END
         {
             $fate{$addr} = $SUPPRESSED;
         }
-        elsif ($fate{$addr} == $SUPPRESSED
-               && ! exists $why_suppressed{$property{$addr}->complete_name})
-        {
-            Carp::my_carp_bug("There is no current capability to set the reason for suppressing.");
-            # perhaps Fate => [ $SUPPRESSED, "reason" ]
+        elsif ($fate{$addr} == $SUPPRESSED) {
+            Carp::my_carp_bug("Need reason for suppressing") unless $suppression_reason;
+            # Though currently unused
+        }
+        elsif ($suppression_reason) {
+            Carp::my_carp_bug("A reason was given for suppressing, but not suppressed");
         }
 
         # If hasn't set its status already, see if it is on one of the
@@ -5266,17 +5542,18 @@ END
         my %args = @_;
         my $loose_match = delete $args{'Fuzzy'};
 
-        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
-        $make_re_pod_entry = $YES unless defined $make_re_pod_entry;
-
         my $ok_as_filename = delete $args{'OK_as_Filename'};
         $ok_as_filename = 1 unless defined $ok_as_filename;
 
-        my $status = delete $args{'Status'};
-        $status = $NORMAL unless defined $status;
-
         # An internal name does not get documented, unless overridden by the
-        # input.
+        # input; same for making tests for it.
+        my $status = delete $args{'Status'} || (($name =~ /^_/)
+                                                ? $INTERNAL_ALIAS
+                                                : $NORMAL);
+        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'}
+                                            // (($status ne $INTERNAL_ALIAS)
+                                               ? (($name =~ /^_/) ? $NO : $YES)
+                                               : $NO);
         my $ucd = delete $args{'UCD'} // (($name =~ /^_/) ? 0 : 1);
 
         Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5346,7 +5623,7 @@ END
                 $insert_position,
                 0,
                 Alias->new($name, $loose_match, $make_re_pod_entry,
-                                                $ok_as_filename, $status, $ucd);
+                           $ok_as_filename, $status, $ucd);
 
         # This name may be shorter than any existing ones, so clear the cache
         # of the shortest, so will have to be recalculated.
@@ -6267,7 +6544,8 @@ END
         }
 
         # Save the reason for suppression for output
-        if ($fate == $SUPPRESSED && defined $reason) {
+        if ($fate >= $SUPPRESSED) {
+            $reason = "" unless defined $reason;
             $why_suppressed{$complete_name{$addr}} = $reason;
         }
 
@@ -6763,7 +7041,7 @@ END
         # The ranges that map to the default aren't output, so subtract that
         # to get those actually output.  A property with matching tables
         # already has the information calculated.
-        if ($property->type != $STRING) {
+        if ($property->type != $STRING && $property->type != $FORCED_BINARY) {
             $count -= $property->table($default_map)->count;
         }
         elsif (defined $default_map) {
@@ -6839,9 +7117,11 @@ END
         $comment .= "This file returns the $mapping:\n";
 
         my $ucd_accessible_name = "";
+        my $has_underscore_name = 0;
         my $full_name = $self->property->full_name;
         for my $i (0 .. @property_aliases - 1) {
             my $name = $property_aliases[$i]->name;
+            $has_underscore_name = 1 if $name =~ /^_/;
             $comment .= sprintf("%-8s%s\n", " ", $name . '(cp)');
             if ($property_aliases[$i]->ucd) {
                 if ($name eq $full_name) {
@@ -6854,7 +7134,12 @@ END
         }
         $comment .= "\nwhere 'cp' is $cp.";
         if ($ucd_accessible_name) {
-            $comment .= "  Note that $these_mappings $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+            $comment .= "  Note that $these_mappings";
+            if ($has_underscore_name) {
+                $comment .= " (except for the one(s) that begin with an underscore)";
+            }
+            $comment .= " $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+
         }
 
         # And append any commentary already set from the actual property.
@@ -6888,7 +7173,7 @@ END
                 # There are tables which end up only having one element per
                 # range, but it is not worth keeping track of for making just
                 # this comment a little better.
-                $comment.= <<END;
+                $comment .= <<END;
 non-comment portions of the main body of lines of this file is:
 START\\tSTOP\\tMAPPING where START is the starting code point of the
 range, in hex; STOP is the ending point, or if omitted, the range has just one
@@ -7848,7 +8133,6 @@ END
                             main::uniques($leader, @{$equivalents{$addr}});
         my $has_unrelated = (@parents >= 2);  # boolean, ? are there unrelated
                                               # tables
-
         for my $parent (@parents) {
 
             my $property = $parent->property;
@@ -7906,7 +8190,7 @@ END
                 # commentary that the other combinations are possible.
                 # Because regular expressions don't recognize things like
                 # \p{jsn=}, only look at non-null right-hand-sides
-                my @property_aliases = $table_property->aliases;
+                my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table_property->aliases;
                 my @table_aliases = grep { $_->name ne "" } $table->aliases;
 
                 # The alias lists above are already ordered in the order we
@@ -7918,8 +8202,7 @@ END
                                     ?  main::max(scalar @table_aliases,
                                                  scalar @property_aliases)
                                     : 0;
-                trace "$listed_combos, tables=", scalar @table_aliases, "; names=", scalar @property_aliases if main::DEBUG;
-
+                trace "$listed_combos, tables=", scalar @table_aliases, "; property names=", scalar @property_aliases if main::DEBUG;
 
                 my $property_had_compound_name = 0;
 
@@ -8070,8 +8353,14 @@ END
             foreach my $flag (sort keys %flags) {
                 $comment .= <<END;
 '$flag' below means that this form is $flags{$flag}.
-Consult $pod_file.pod
 END
+                if ($flag eq $INTERNAL_ALIAS) {
+                    $comment .= "DO NOT USE!!!";
+                }
+                else {
+                    $comment .= "Consult $pod_file.pod";
+                }
+                $comment .= "\n";
             }
             $comment .= "\n";
         }
@@ -8436,18 +8725,27 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
             $perl_extension
                         = $self->perl_extension if ! defined $perl_extension;
 
+            my $fate;
+            my $suppression_reason = "";
+            if ($self->name =~ /^_/) {
+                $fate = $SUPPRESSED;
+                $suppression_reason = "Parent property is internal only";
+            }
+            elsif ($self->fate >= $SUPPRESSED) {
+                $fate = $self->fate;
+                $suppression_reason = $why_suppressed{$self->complete_name};
+
+            }
+            elsif ($name =~ /^_/) {
+                $fate = $INTERNAL_ONLY;
+            }
             $table = Match_Table->new(
                                 Name => $name,
                                 Perl_Extension => $perl_extension,
                                 _Alias_Hash => $table_ref{$addr},
                                 _Property => $self,
-
-                                # gets property's fate and status by default,
-                                # except if the name begind with an
-                                # underscore, default it to internal
-                                Fate => ($name =~ /^_/)
-                                         ? $INTERNAL_ONLY
-                                         : $self->fate,
+                                Fate => $fate,
+                                Suppression_Reason => $suppression_reason,
                                 Status => $self->status,
                                 _Status_Info => $self->status_info,
                                 %args);
@@ -8562,10 +8860,13 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         # Swash names are used only on either
         # 1) legacy-only properties, because the formats for these are
         #    unchangeable, and they have had these lines in them; or
-        # 2) regular map tables; otherwise there should be no access to the
+        # 2) regular or internal-only map tables
+        # 3) otherwise there should be no access to the
         #    property map table from other parts of Perl.
         return if $map{$addr}->fate != $ORDINARY
-                  && $map{$addr}->fate != $LEGACY_ONLY;
+                  && $map{$addr}->fate != $LEGACY_ONLY
+                  && ! ($map{$addr}->name =~ /^_/
+                        && $map{$addr}->fate == $INTERNAL_ONLY);
 
         return $file{$addr} if defined $file{$addr};
         return $map{$addr}->external_name;
@@ -8589,9 +8890,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         # to it.
         return 0 if $type{$addr} == $STRING;
 
-        # Don't generate anything for unimplemented properties.
-        return 0 if grep { $self->complete_name eq $_ }
-                                                    @unimplemented_properties;
         # Otherwise, do.
         return 1;
     }
@@ -8808,7 +9106,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
         my $addr = do { no overloading; pack 'J', $self; };
-        if ($fate == $SUPPRESSED) {
+        if ($fate >= $SUPPRESSED) {
             $why_suppressed{$self->complete_name} = $reason;
         }
 
@@ -8894,15 +9192,15 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
 
 package main;
 
-    sub display_chr {
-        # Converts an ordinal printable character value to a displayable
-        # string, using a dotted circle to hold combining characters.
+sub display_chr {
+    # Converts an ordinal printable character value to a displayable string,
+    # using a dotted circle to hold combining characters.
 
-        my $ord = shift;
-        my $chr = chr $ord;
-        return $chr if $ccc->table(0)->contains($ord);
-        return "\x{25CC}$chr";
-    }
+    my $ord = shift;
+    my $chr = chr $ord;
+    return $chr if $ccc->table(0)->contains($ord);
+    return "\x{25CC}$chr";
+}
 
 sub join_lines($) {
     # Returns lines of the input joined together, so that they can be folded
@@ -9443,7 +9741,6 @@ sub dump_inside_out {
 
     my $object = shift;
     my $fields_ref = shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
     my $addr = do { no overloading; pack 'J', $object; };
 
@@ -9520,6 +9817,17 @@ sub _operator_not_equal {
     return ! _operator_equal($self, $other);
 }
 
+sub substitute_PropertyAliases($) {
+    # Deal with early releases that don't have the crucial PropertyAliases.txt
+    # file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_aliases());
+
+    process_PropertyAliases($file_object);
+}
+
+
 sub process_PropertyAliases($) {
     # This reads in the PropertyAliases.txt file, which contains almost all
     # the character properties in Unicode and their equivalent aliases:
@@ -9532,11 +9840,6 @@ sub process_PropertyAliases($) {
     my $file= shift;
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This whole file was non-existent in early releases, so use our own
-    # internal one.
-    $file->insert_lines(get_old_property_aliases())
-                                                if ! -e 'PropertyAliases.txt';
-
     # Add any cjk properties that may have been defined.
     $file->insert_lines(@cjk_properties);
 
@@ -9546,8 +9849,17 @@ sub process_PropertyAliases($) {
 
         my $full = $data[1];
 
+        # This line is defective in early Perls.  The property in Unihan.txt
+        # is kRSUnicode.
+        if ($full eq 'Unicode_Radical_Stroke' && @data < 3) {
+            push @data, qw(cjkRSUnicode kRSUnicode);
+        }
+
         my $this = Property->new($data[0], Full_Name => $full);
 
+        $this->set_fate($SUPPRESSED, $why_suppressed{$full})
+                                                    if $why_suppressed{$full};
+
         # Start looking for more aliases after these two.
         for my $i (2 .. @data - 1) {
             $this->add_alias($data[$i]);
@@ -9573,18 +9885,6 @@ sub finish_property_setup {
         Property->new('JSN', Full_Name => 'Jamo_Short_Name');
     }
 
-    # These two properties must be defined in all releases so we can generate
-    # the tables from them to make regex \X work, but suppress their output so
-    # aren't application visible prior to releases where they should be
-    if (! defined property_ref('GCB')) {
-        Property->new('GCB', Full_Name => 'Grapheme_Cluster_Break',
-                      Fate => $PLACEHOLDER);
-    }
-    if (! defined property_ref('hst')) {
-        Property->new('hst', Full_Name => 'Hangul_Syllable_Type',
-                      Fate => $PLACEHOLDER);
-    }
-
     # These are used so much, that we set globals for them.
     $gc = property_ref('General_Category');
     $block = property_ref('Block');
@@ -9701,22 +10001,15 @@ sub finish_property_setup {
     # for non-assigned code points; 'AL' for assigned.
     if (file_exists("${EXTRACTED}DLineBreak.txt") || -e 'LineBreak.txt') {
         my $lb = property_ref('Line_Break');
-        if ($v_version gt 3.2.0) {
+        if (file_exists("${EXTRACTED}DLineBreak.txt")) {
             $lb->set_default_map('Unknown');
         }
         else {
-            my $default = Multi_Default->new( 'Unknown' => '$gc->table("Cn")',
-                                              'AL');
+            my $default = Multi_Default->new('AL' => '~ $gc->table("Cn")',
+                                             'Unknown',
+                                            );
             $lb->set_default_map($default);
         }
-
-        # If has the URS property, make sure that the standard aliases are in
-        # it, since not in the input tables in some versions.
-        my $urs = property_ref('Unicode_Radical_Stroke');
-        if (defined $urs) {
-            $urs->add_alias('cjkRSUnicode');
-            $urs->add_alias('kRSUnicode');
-        }
     }
 
     # For backwards compatibility with applications that may read the mapping
@@ -9874,6 +10167,16 @@ END
     return @return;
 }
 
+sub substitute_PropValueAliases($) {
+    # Deal with early releases that don't have the crucial
+    # PropValueAliases.txt file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_value_aliases());
+
+    process_PropValueAliases($file_object);
+}
+
 sub process_PropValueAliases {
     # This file contains values that properties look like:
     # bc ; AL        ; Arabic_Letter
@@ -9899,35 +10202,29 @@ sub process_PropValueAliases {
     my $file= shift;
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This whole file was non-existent in early releases, so use our own
-    # internal one if necessary.
-    if (! -e 'PropValueAliases.txt') {
-        $file->insert_lines(get_old_property_value_aliases());
-    }
-
     if ($v_version lt 4.0.0) {
         $file->insert_lines(split /\n/, <<'END'
-hst; L                                ; Leading_Jamo
-hst; LV                               ; LV_Syllable
-hst; LVT                              ; LVT_Syllable
-hst; NA                               ; Not_Applicable
-hst; T                                ; Trailing_Jamo
-hst; V                                ; Vowel_Jamo
+Hangul_Syllable_Type; L                                ; Leading_Jamo
+Hangul_Syllable_Type; LV                               ; LV_Syllable
+Hangul_Syllable_Type; LVT                              ; LVT_Syllable
+Hangul_Syllable_Type; NA                               ; Not_Applicable
+Hangul_Syllable_Type; T                                ; Trailing_Jamo
+Hangul_Syllable_Type; V                                ; Vowel_Jamo
 END
         );
     }
     if ($v_version lt 4.1.0) {
         $file->insert_lines(split /\n/, <<'END'
-GCB; CN                               ; Control
-GCB; CR                               ; CR
-GCB; EX                               ; Extend
-GCB; L                                ; L
-GCB; LF                               ; LF
-GCB; LV                               ; LV
-GCB; LVT                              ; LVT
-GCB; T                                ; T
-GCB; V                                ; V
-GCB; XX                               ; Other
+_Perl_GCB; CN                               ; Control
+_Perl_GCB; CR                               ; CR
+_Perl_GCB; EX                               ; Extend
+_Perl_GCB; L                                ; L
+_Perl_GCB; LF                               ; LF
+_Perl_GCB; LV                               ; LV
+_Perl_GCB; LVT                              ; LVT
+_Perl_GCB; T                                ; T
+_Perl_GCB; V                                ; V
+_Perl_GCB; XX                               ; Other
 END
         );
     }
@@ -9942,7 +10239,6 @@ END
     # program generates for this block property value
     #$file->insert_lines('blk; n/a; Herited');
 
-
     # Process each line of the file ...
     while ($file->next_line) {
 
@@ -9959,6 +10255,11 @@ END
         # thus shifting the former field 0 to after them.)
         splice (@data, 0, 0, splice(@data, 1, 2)) if $property eq 'ccc';
 
+        if ($v_version le v5.0.0 && $property eq 'blk' && $data[1] =~ /-/) {
+            my $new_style = $data[1] =~ s/-/_/gr;
+            splice @data, 1, 0, $new_style;
+        }
+
         # Field 0 is a short name unless "n/a"; field 1 is the full name.  If
         # there is no short name, use the full one in element 1
         if ($data[0] eq "n/a") {
@@ -10552,7 +10853,8 @@ sub output_perl_charnames_line ($$) {
                                                             $line));
                     }
 
-                    # And process the first range, like any other.
+                    # And set things up so that the below will process this first
+                    # range, like any other.
                     $low = $this_range->start;
                     $high = $this_range->end;
                 }
@@ -10963,11 +11265,12 @@ END
         my $file = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-        # Create a new property specially located that is a combination of the
+        # Create a new property specially located that is a combination of
         # various Name properties: Name, Unicode_1_Name, Named Sequences, and
-        # Name_Alias properties.  (The final duplicates elements of the
-        # first.)  A comment for it will later be constructed based on the
-        # actual properties present and used
+        # _Perl_Name_Alias properties.  (The final one duplicates elements of the
+        # first, and starting in v6.1, is the same as the 'Name_Alias
+        # property.)  A comment for the new property will later be constructed
+        # based on the actual properties present and used
         $perl_charname = Property->new('Perl_Charnames',
                        Default_Map => "",
                        Directory => File::Spec->curdir(),
@@ -12028,10 +12331,8 @@ sub filter_old_style_case_folding {
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
     my @fields = split /\s*;\s*/;
-    if ($fields[0] =~ /^ 013 [01] $/x) { # The two turkish fields
-        $fields[1] = 'I';
-    }
-    elsif ($fields[1] eq 'L') {
+
+    if ($fields[1] eq 'L') {
         $fields[1] = 'C';             # L => C always
     }
     elsif ($fields[1] eq 'E') {
@@ -12339,6 +12640,68 @@ sub filter_numeric_value_line {
 { # Closure
     my %unihan_properties;
 
+    sub construct_unihan {
+
+        my $file_object = shift;
+
+        return unless file_exists($file_object->file);
+
+        if ($v_version lt v4.0.0) {
+            push @cjk_properties, 'URS ; Unicode_Radical_Stroke';
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none>
+END
+        }
+
+        if ($v_version ge v3.0.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkIRG_GSource; kIRG_GSource
+cjkIRG_JSource; kIRG_JSource
+cjkIRG_KSource; kIRG_KSource
+cjkIRG_TSource; kIRG_TSource
+cjkIRG_VSource; kIRG_VSource
+END
+        push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkIRG_GSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_JSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_KSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_TSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_VSource; <none>
+END
+        }
+        if ($v_version ge v3.1.0) {
+            push @cjk_properties, 'cjkIRG_HSource; kIRG_HSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_HSource; <none>';
+        }
+        if ($v_version ge v3.1.1) {
+            push @cjk_properties, 'cjkIRG_KPSource; kIRG_KPSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_KPSource; <none>';
+        }
+        if ($v_version ge v3.2.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkAccountingNumeric; kAccountingNumeric
+cjkCompatibilityVariant; kCompatibilityVariant
+cjkOtherNumeric; kOtherNumeric
+cjkPrimaryNumeric; kPrimaryNumeric
+END
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
+# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
+# @missing: 0000..10FFFF; cjkOtherNumeric; NaN
+# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
+END
+        }
+        if ($v_version gt v4.0.0) {
+            push @cjk_properties, 'cjkIRG_USource; kIRG_USource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_USource; <none>';
+        }
+
+        if ($v_version ge v4.1.0) {
+            push @cjk_properties, 'cjkIICore ; kIICore';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIICore; <none>';
+        }
+    }
+
     sub setup_unihan {
         # Do any special setup for Unihan properties.
 
@@ -12351,16 +12714,16 @@ sub filter_numeric_value_line {
         my $iicore = property_ref('kIICore');
         if (defined $iicore) {
             $iicore->set_type($FORCED_BINARY);
-            $iicore->table("Y")->add_note("Forced to a binary property as per unicode.org UAX #38.");
+            $iicore->table("Y")->add_note("Matches any code point which has a non-null value for this property; see unicode.org UAX #38.");
 
             # Unicode doesn't include the maps for this property, so don't
             # warn that they are missing.
             $iicore->set_pre_declared_maps(0);
             $iicore->add_comment(join_lines( <<END
-This property contains enum values, but Unicode UAX #38 says it should be
-interpreted as binary, so Perl creates tables for both 1) its enum values,
-plus 2) true/false tables in which it is considered true for all code points
-that have a non-null value
+This property contains string values, but any non-empty ones are considered to
+be 'core', so Perl creates tables for both: 1) its string values, plus 2)
+tables so that \\p{kIICore} matches any code point which has a non-empty
+value for this property.
 END
             ));
         }
@@ -12439,7 +12802,7 @@ sub filter_blocks_lines {
 
     # Change hyphens and blanks in the block name field only
     $fields[1] =~ s/[ -]/_/g;
-    $fields[1] =~ s/_ ( [a-z] ) /_\u$1/g;   # Capitalize first letter of word
+    $fields[1] =~ s/_ ( [a-z] ) /_\u$1/xg;   # Capitalize first letter of word
 
     $_ = join("; ", @fields);
     return;
@@ -12657,12 +13020,22 @@ sub generate_hst {
 END
 );
 
-    # The Hangul syllables in version 1 are completely different than what came
-    # after, so just ignore them there.
+    # The Hangul syllables in version 1 are at different code points than
+    # those that came along starting in version 2, and have different names;
+    # they comprise about 60% of the code points of the later version.
+    # From my (khw) research on them (see <558493EB.4000807@att.net>), the
+    # initial set is a subset of the later version, with different English
+    # transliterations.  I did not see an easy mapping between them.  The
+    # later set includes essentially all possibilities, even ones that aren't
+    # in modern use (if they ever were), and over 96% of the new ones are type
+    # LVT.  Mathematically, the early set must also contain a preponderance of
+    # LVT values.  In lieu of doing nothing, we just set them all to LVT, and
+    # expect that this will be right most of the time, which is better than
+    # not being right at all.
     if ($v_version lt v2.0.0) {
         my $property = property_ref($file->property);
+        $file->insert_lines("3400..4DFF; LVT\n");
         push @tables_that_may_be_empty, $property->table('LV')->complete_name;
-        push @tables_that_may_be_empty, $property->table('LVT')->complete_name;
         return;
     }
 
@@ -12712,7 +13085,6 @@ sub generate_GCB {
 
     # Also from http://www.unicode.org/reports/tr29/tr29-3.html.
     foreach my $code_point ( qw{
-                                40000
                                 09BE 09D7 0B3E 0B57 0BBE 0BD7 0CC2 0CD5 0CD6
                                 0D3E 0D57 0DCF 0DDF FF9E FF9F 1D165 1D16E 1D16F
                                 }
@@ -12735,275 +13107,50 @@ sub generate_GCB {
         generate_hst($file);
     }
 
-    return;
+    main::process_generic_property_file($file);
 }
 
-sub setup_early_name_alias {
-    my $file= shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    # This has the effect of pretending that the Name_Alias property was
-    # available in all Unicode releases.  Strictly speaking, this property
-    # should not be availabe in early releases, but doing this allows
-    # charnames.pm to work on older releases without change.  Prior to v5.16
-    # it had these names hard-coded inside it.  Unicode 6.1 came along and
-    # created these names, and so they were removed from charnames.
+sub fixup_early_perl_name_alias {
 
-    my $aliases = property_ref('Name_Alias');
-    if (! defined $aliases) {
-        $aliases = Property->new('Name_Alias', Default_Map => "");
-    }
+    # Different versions of Unicode have varying support for the name synonyms
+    # below.  Just include everything.  As of 6.1, all these are correct in
+    # the Unicode-supplied file.
 
-    $file->insert_lines(get_old_name_aliases());
+    my $file= shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    return;
-}
 
-sub get_old_name_aliases () {
+    # ALERT did not come along until 6.0, at which point it became preferred
+    # over BELL.  By inserting it last in early releases, BELL is preferred
+    # over it; and vice-vers in 6.0
+    my $type_for_bell = ($v_version lt v6.0.0)
+               ? 'correction'
+               : 'alternate';
+    $file->insert_lines(split /\n/, <<END
+0007;BELL; $type_for_bell
+000A;LINE FEED (LF);alternate
+000C;FORM FEED (FF);alternate
+000D;CARRIAGE RETURN (CR);alternate
+0085;NEXT LINE (NEL);alternate
+END
+
+    );
 
-    # The Unicode_1_Name field, contains most of these names.  One would
-    # expect, given the field's name, that its values would be fixed across
-    # versions, giving the true Unicode version 1 name for the character.
-    # Sadly, this is not the case.  Actually Version 1.1.5 had no names for
-    # any of the controls; Version 2.0 introduced names for the C0 controls,
-    # and 3.0 introduced C1 names.  3.0.1 removed the name INDEX; and 3.2
-    # changed some names: it
+    # One might think that the the 'Unicode_1_Name' field, could work for most
+    # of the above names, but sadly that field varies depending on the
+    # release.  Version 1.1.5 had no names for any of the controls; Version
+    # 2.0 introduced names for the C0 controls, and 3.0 introduced C1 names.
+    # 3.0.1 removed the name INDEX; and 3.2 changed some names:
     #   changed to parenthesized versions like "NEXT LINE" to
     #       "NEXT LINE (NEL)";
     #   changed PARTIAL LINE DOWN to PARTIAL LINE FORWARD
     #   changed PARTIAL LINE UP to PARTIAL LINE BACKWARD;;
     #   changed e.g. FILE SEPARATOR to INFORMATION SEPARATOR FOUR
-    # This list contains all the names that were defined so that
-    # charnames::vianame(), etc. understand them all EVEN if this version of
-    # Unicode didn't specify them (this could be construed as a bug).
-    # mktables elsewhere gives preference to the Unicode_1_Name field over
-    # these names, so that viacode() will return the correct value for that
-    # version of Unicode, except when that version doesn't define a name,
-    # viacode() will return one anyway (this also could be construed as a
-    # bug).  But these potential "bugs" allow for the smooth working of code
-    # on earlier Unicode releases.
-
-    my @return = split /\n/, <<'END';
-0000;NULL;control
-0000;NUL;abbreviation
-0001;START OF HEADING;control
-0001;SOH;abbreviation
-0002;START OF TEXT;control
-0002;STX;abbreviation
-0003;END OF TEXT;control
-0003;ETX;abbreviation
-0004;END OF TRANSMISSION;control
-0004;EOT;abbreviation
-0005;ENQUIRY;control
-0005;ENQ;abbreviation
-0006;ACKNOWLEDGE;control
-0006;ACK;abbreviation
-0007;BELL;control
-0007;BEL;abbreviation
-0008;BACKSPACE;control
-0008;BS;abbreviation
-0009;CHARACTER TABULATION;control
-0009;HORIZONTAL TABULATION;control
-0009;HT;abbreviation
-0009;TAB;abbreviation
-000A;LINE FEED;control
-000A;LINE FEED (LF);control
-000A;NEW LINE;control
-000A;END OF LINE;control
-000A;LF;abbreviation
-000A;NL;abbreviation
-000A;EOL;abbreviation
-000B;LINE TABULATION;control
-000B;VERTICAL TABULATION;control
-000B;VT;abbreviation
-000C;FORM FEED;control
-000C;FORM FEED (FF);control
-000C;FF;abbreviation
-000D;CARRIAGE RETURN;control
-000D;CARRIAGE RETURN (CR);control
-000D;CR;abbreviation
-000E;SHIFT OUT;control
-000E;LOCKING-SHIFT ONE;control
-000E;SO;abbreviation
-000F;SHIFT IN;control
-000F;LOCKING-SHIFT ZERO;control
-000F;SI;abbreviation
-0010;DATA LINK ESCAPE;control
-0010;DLE;abbreviation
-0011;DEVICE CONTROL ONE;control
-0011;DC1;abbreviation
-0012;DEVICE CONTROL TWO;control
-0012;DC2;abbreviation
-0013;DEVICE CONTROL THREE;control
-0013;DC3;abbreviation
-0014;DEVICE CONTROL FOUR;control
-0014;DC4;abbreviation
-0015;NEGATIVE ACKNOWLEDGE;control
-0015;NAK;abbreviation
-0016;SYNCHRONOUS IDLE;control
-0016;SYN;abbreviation
-0017;END OF TRANSMISSION BLOCK;control
-0017;ETB;abbreviation
-0018;CANCEL;control
-0018;CAN;abbreviation
-0019;END OF MEDIUM;control
-0019;EOM;abbreviation
-001A;SUBSTITUTE;control
-001A;SUB;abbreviation
-001B;ESCAPE;control
-001B;ESC;abbreviation
-001C;INFORMATION SEPARATOR FOUR;control
-001C;FILE SEPARATOR;control
-001C;FS;abbreviation
-001D;INFORMATION SEPARATOR THREE;control
-001D;GROUP SEPARATOR;control
-001D;GS;abbreviation
-001E;INFORMATION SEPARATOR TWO;control
-001E;RECORD SEPARATOR;control
-001E;RS;abbreviation
-001F;INFORMATION SEPARATOR ONE;control
-001F;UNIT SEPARATOR;control
-001F;US;abbreviation
-0020;SP;abbreviation
-007F;DELETE;control
-007F;DEL;abbreviation
-0080;PADDING CHARACTER;figment
-0080;PAD;abbreviation
-0081;HIGH OCTET PRESET;figment
-0081;HOP;abbreviation
-0082;BREAK PERMITTED HERE;control
-0082;BPH;abbreviation
-0083;NO BREAK HERE;control
-0083;NBH;abbreviation
-0084;INDEX;control
-0084;IND;abbreviation
-0085;NEXT LINE;control
-0085;NEXT LINE (NEL);control
-0085;NEL;abbreviation
-0086;START OF SELECTED AREA;control
-0086;SSA;abbreviation
-0087;END OF SELECTED AREA;control
-0087;ESA;abbreviation
-0088;CHARACTER TABULATION SET;control
-0088;HORIZONTAL TABULATION SET;control
-0088;HTS;abbreviation
-0089;CHARACTER TABULATION WITH JUSTIFICATION;control
-0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
-0089;HTJ;abbreviation
-008A;LINE TABULATION SET;control
-008A;VERTICAL TABULATION SET;control
-008A;VTS;abbreviation
-008B;PARTIAL LINE FORWARD;control
-008B;PARTIAL LINE DOWN;control
-008B;PLD;abbreviation
-008C;PARTIAL LINE BACKWARD;control
-008C;PARTIAL LINE UP;control
-008C;PLU;abbreviation
-008D;REVERSE LINE FEED;control
-008D;REVERSE INDEX;control
-008D;RI;abbreviation
-008E;SINGLE SHIFT TWO;control
-008E;SINGLE-SHIFT-2;control
-008E;SS2;abbreviation
-008F;SINGLE SHIFT THREE;control
-008F;SINGLE-SHIFT-3;control
-008F;SS3;abbreviation
-0090;DEVICE CONTROL STRING;control
-0090;DCS;abbreviation
-0091;PRIVATE USE ONE;control
-0091;PRIVATE USE-1;control
-0091;PU1;abbreviation
-0092;PRIVATE USE TWO;control
-0092;PRIVATE USE-2;control
-0092;PU2;abbreviation
-0093;SET TRANSMIT STATE;control
-0093;STS;abbreviation
-0094;CANCEL CHARACTER;control
-0094;CCH;abbreviation
-0095;MESSAGE WAITING;control
-0095;MW;abbreviation
-0096;START OF GUARDED AREA;control
-0096;START OF PROTECTED AREA;control
-0096;SPA;abbreviation
-0097;END OF GUARDED AREA;control
-0097;END OF PROTECTED AREA;control
-0097;EPA;abbreviation
-0098;START OF STRING;control
-0098;SOS;abbreviation
-0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
-0099;SGC;abbreviation
-009A;SINGLE CHARACTER INTRODUCER;control
-009A;SCI;abbreviation
-009B;CONTROL SEQUENCE INTRODUCER;control
-009B;CSI;abbreviation
-009C;STRING TERMINATOR;control
-009C;ST;abbreviation
-009D;OPERATING SYSTEM COMMAND;control
-009D;OSC;abbreviation
-009E;PRIVACY MESSAGE;control
-009E;PM;abbreviation
-009F;APPLICATION PROGRAM COMMAND;control
-009F;APC;abbreviation
-00A0;NBSP;abbreviation
-00AD;SHY;abbreviation
-200B;ZWSP;abbreviation
-200C;ZWNJ;abbreviation
-200D;ZWJ;abbreviation
-200E;LRM;abbreviation
-200F;RLM;abbreviation
-202A;LRE;abbreviation
-202B;RLE;abbreviation
-202C;PDF;abbreviation
-202D;LRO;abbreviation
-202E;RLO;abbreviation
-FEFF;BYTE ORDER MARK;alternate
-FEFF;BOM;abbreviation
-FEFF;ZWNBSP;abbreviation
-END
-
-    if ($v_version ge v3.0.0) {
-        push @return, split /\n/, <<'END';
-180B; FVS1; abbreviation
-180C; FVS2; abbreviation
-180D; FVS3; abbreviation
-180E; MVS; abbreviation
-202F; NNBSP; abbreviation
-END
-    }
-
-    if ($v_version ge v3.2.0) {
-        push @return, split /\n/, <<'END';
-034F; CGJ; abbreviation
-205F; MMSP; abbreviation
-2060; WJ; abbreviation
-END
-        # Add in VS1..VS16
-        my $cp = 0xFE00 - 1;
-        for my $i (1..16) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-    if ($v_version ge v4.0.0) { # Add in VS17..VS256
-        my $cp = 0xE0100 - 17;
-        for my $i (17..256) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-
-    # ALERT did not come along until 6.0, at which point it became preferred
-    # over BELL, and was never in the Unicode_1_Name field.  For the same
-    # reasons, that the other names are made known to all releases by this
-    # function, we make ALERT known too.  By inserting it
-    # last in early releases, BELL is preferred over it; and vice-vers in 6.0
-    my $alert = '0007; ALERT; control';
-    if ($v_version lt v6.0.0) {
-        push @return, $alert;
-    }
-    else {
-        unshift @return, $alert;
-    }
+    #
+    # All these are present in the 6.1 NameAliases.txt
 
-    return @return;
+    return;
 }
 
 sub filter_later_version_name_alias_line {
@@ -13030,14 +13177,29 @@ sub filter_later_version_name_alias_line {
 sub filter_early_version_name_alias_line {
 
     # Early versions did not have the trailing alias type field; implicitly it
-    # was 'correction'.   But our synthetic lines we add in this program do
-    # have it, so test for the type field.
-    $_ .= "; correction" if $_ !~ /;.*;/;
+    # was 'correction'.
+    $_ .= "; correction";
 
     filter_later_version_name_alias_line;
     return;
 }
 
+sub filter_all_caps_script_names {
+
+    # Some early Unicode releases had the script names in all CAPS.  This
+    # converts them to just the first letter of each word being capital.
+
+    my ($range, $script, @remainder)
+        = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+    my @words = split "_", $script;
+    for my $word (@words) {
+        $word =
+            ucfirst(lc($word)) if $word ne 'CJK';
+    }
+    $script = join "_", @words;
+    $_ = join ";", $range, $script, @remainder;
+}
+
 sub finish_Unicode() {
     # This routine should be called after all the Unicode files have been read
     # in.  It:
@@ -13188,72 +13350,81 @@ END
         # Add any remaining code points to the mapping, using the default for
         # missing code points.
         my $default_table;
-        if (defined (my $default_map = $property->default_map)) {
+        my $default_map = $property->default_map;
+        if ($property_type == $FORCED_BINARY) {
 
-            # Make sure there is a match table for the default
-            if (! defined ($default_table = $property->table($default_map))) {
-                $default_table = $property->add_match_table($default_map);
+            # A forced binary property creates a 'Y' table that matches all
+            # non-default values.  The actual string values are also written out
+            # as a map table.  (The default value will almost certainly be the
+            # empty string, so the pod glosses over the distinction, and just
+            # talks about empty vs non-empty.)
+            my $yes = $property->table("Y");
+            foreach my $range ($property->ranges) {
+                next if $range->value eq $default_map;
+                $yes->add_range($range->start, $range->end);
             }
+            $property->table("N")->set_complement($yes);
+        }
+        else {
+            if (defined $default_map) {
 
-            # And, if the property is binary, the default table will just
-            # be the complement of the other table.
-            if ($property_type == $BINARY) {
-                my $non_default_table;
-
-                # Find the non-default table.
-                for my $table ($property->tables) {
-                    next if $table == $default_table;
-                    $non_default_table = $table;
+                # Make sure there is a match table for the default
+                if (! defined ($default_table = $property->table($default_map)))
+                {
+                    $default_table = $property->add_match_table($default_map);
                 }
-                $default_table->set_complement($non_default_table);
-            }
-            else {
 
-                # This fills in any missing values with the default.  It's not
-                # necessary to do this with binary properties, as the default
-                # is defined completely in terms of the Y table.
-                $property->add_map(0, $MAX_WORKING_CODEPOINT,
-                                   $default_map, Replace => $NO);
-            }
-        }
+                # And, if the property is binary, the default table will just
+                # be the complement of the other table.
+                if ($property_type == $BINARY) {
+                    my $non_default_table;
 
-        # Have all we need to populate the match tables.
-        my $maps_should_be_defined = $property->pre_declared_maps;
-        foreach my $range ($property->ranges) {
-            my $map = $range->value;
-            my $table = $property->table($map);
-            if (! defined $table) {
+                    # Find the non-default table.
+                    for my $table ($property->tables) {
+                        if ($table == $default_table) {
+                            if ($v_version le v5.0.0) {
+                                $table->add_alias($_) for qw(N No F False);
+                            }
+                            next;
+                        } elsif ($v_version le v5.0.0) {
+                            $table->add_alias($_) for qw(Y Yes T True);
+                        }
+                        $non_default_table = $table;
+                    }
+                    $default_table->set_complement($non_default_table);
+                }
+                else {
 
-                # Integral and rational property values are not necessarily
-                # defined in PropValueAliases, but whether all the other ones
-                # should be depends on the property.
-                if ($maps_should_be_defined
-                    && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
-                {
-                    Carp::my_carp("Table '$property_name=$map' should have been defined.  Defining it now.")
+                    # This fills in any missing values with the default.  It's
+                    # not necessary to do this with binary properties, as the
+                    # default is defined completely in terms of the Y table.
+                    $property->add_map(0, $MAX_WORKING_CODEPOINT,
+                                    $default_map, Replace => $NO);
                 }
-                $table = $property->add_match_table($map);
             }
 
-            next if $table->complement != 0;    # Don't need to populate these
-            $table->add_range($range->start, $range->end);
-        }
+            # Have all we need to populate the match tables.
+            my $maps_should_be_defined = $property->pre_declared_maps;
+            foreach my $range ($property->ranges) {
+                my $map = $range->value;
+                my $table = $property->table($map);
+                if (! defined $table) {
 
-        # A forced binary property has additional true/false tables which
-        # should have been set up when it was forced into binary.  The false
-        # table matches exactly the same set as the property's default table.
-        # The true table matches the complement of that.  The false table is
-        # not the same as an additional set of aliases on top of the default
-        # table, so use 'set_equivalent_to'.  If it were implemented as
-        # additional aliases, various things would have to be adjusted, but
-        # especially, if the user wants to get a list of names for the table
-        # using Unicode::UCD::prop_value_aliases(), s/he should get a
-        # different set depending on whether they want the default table or
-        # the false table.
-        if ($property_type == $FORCED_BINARY) {
-            $property->table('N')->set_equivalent_to($default_table,
-                                                     Related => 1);
-            $property->table('Y')->set_complement($default_table);
+                    # Integral and rational property values are not
+                    # necessarily defined in PropValueAliases, but whether all
+                    # the other ones should be depends on the property.
+                    if ($maps_should_be_defined
+                        && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
+                    {
+                        Carp::my_carp("Table '$property_name=$map' should "
+                                    . "have been defined.  Defining it now.")
+                    }
+                    $table = $property->add_match_table($map);
+                }
+
+                next if $table->complement != 0; # Don't need to populate these
+                $table->add_range($range->start, $range->end);
+            }
         }
 
         # For Perl 5.6 compatibility, all properties matchable in regexes can
@@ -13321,8 +13492,6 @@ END
     $gc->table('Ll')->set_caseless_equivalent($LC);
     $gc->table('Lu')->set_caseless_equivalent($LC);
 
-    my $Cs = $gc->table('Cs');
-
     # Create digit and case fold tables with the original file names for
     # backwards compatibility with applications that read them directly.
     my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
@@ -13408,6 +13577,26 @@ sub pre_3_dot_1_Nl () {
     return $Nl;
 }
 
+sub calculate_Assigned() {  # Calculate the gc != Cn code points; may be
+                            # called before the Cn's are completely filled.
+                            # Works on Unicodes earlier than ones that
+                            # explicitly specify Cn.
+    return if defined $Assigned;
+
+    if (! defined $gc || $gc->is_empty()) {
+        Carp::my_carp_bug("calculate_Assigned() called before $gc is populated");
+    }
+
+    $Assigned = $perl->add_match_table('Assigned',
+                                Description  => "All assigned code points",
+                                );
+    while (defined (my $range = $gc->each_range())) {
+        my $standard_value = standardize($range->value);
+        next if $standard_value eq 'cn' || $standard_value eq 'unassigned';
+        $Assigned->add_range($range->start, $range->end);
+    }
+}
+
 sub compile_perl() {
     # Create perl-defined tables.  Almost all are part of the pseudo-property
     # named 'perl' internally to this program.  Many of these are recommended
@@ -13446,16 +13635,12 @@ sub compile_perl() {
     }
 
     my $Any = $perl->add_match_table('Any',
-                                     Description  => "All Unicode code points: [\\x{0000}-\\x{10FFFF}]",
+                                     Description  => "All Unicode code points: [\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
                                      );
-    $Any->add_range(0, 0x10FFFF);
+    $Any->add_range(0, $MAX_UNICODE_CODEPOINT);
     $Any->add_alias('Unicode');
 
-    # Assigned is the opposite of gc=unassigned
-    my $Assigned = $perl->add_match_table('Assigned',
-                                Description  => "All assigned code points",
-                                Initialize => ~ $gc->table('Unassigned'),
-                                );
+    calculate_Assigned();
 
     # Our internal-only property should be treated as more than just a
     # synonym; grandfather it in to the pod.
@@ -13501,32 +13686,27 @@ sub compile_perl() {
 
         # There are quite a few code points in Lower, that aren't in gc=lc,
         # and not all are in all releases.
-        foreach my $code_point (    utf8::unicode_to_native(0xAA),
-                                    utf8::unicode_to_native(0xBA),
-                                    0x02B0 .. 0x02B8,
-                                    0x02C0 .. 0x02C1,
-                                    0x02E0 .. 0x02E4,
-                                    0x0345,
-                                    0x037A,
-                                    0x1D2C .. 0x1D6A,
-                                    0x1D78,
-                                    0x1D9B .. 0x1DBF,
-                                    0x2071,
-                                    0x207F,
-                                    0x2090 .. 0x209C,
-                                    0x2170 .. 0x217F,
-                                    0x24D0 .. 0x24E9,
-                                    0x2C7C .. 0x2C7D,
-                                    0xA770,
-                                    0xA7F8 .. 0xA7F9,
-        ) {
-            # Don't include the code point unless it is assigned in this
-            # release
-            my $category = $gc->value_of(hex $code_point);
-            next if ! defined $category || $category eq 'Cn';
-
-            $Lower += $code_point;
-        }
+        my $temp = Range_List->new(Initialize => [
+                                                utf8::unicode_to_native(0xAA),
+                                                utf8::unicode_to_native(0xBA),
+                                                0x02B0 .. 0x02B8,
+                                                0x02C0 .. 0x02C1,
+                                                0x02E0 .. 0x02E4,
+                                                0x0345,
+                                                0x037A,
+                                                0x1D2C .. 0x1D6A,
+                                                0x1D78,
+                                                0x1D9B .. 0x1DBF,
+                                                0x2071,
+                                                0x207F,
+                                                0x2090 .. 0x209C,
+                                                0x2170 .. 0x217F,
+                                                0x24D0 .. 0x24E9,
+                                                0x2C7C .. 0x2C7D,
+                                                0xA770,
+                                                0xA7F8 .. 0xA7F9,
+                                ]);
+        $Lower += $temp & $Assigned;
     }
     my $Posix_Lower = $perl->add_match_table("PosixLower",
                             Description => "[a-z]",
@@ -13832,6 +14012,7 @@ sub compile_perl() {
     );
     $Space->add_alias('XPerlSpace');    # Pre-existing synonyms
     $Space->add_alias('SpacePerl');
+    $Space->add_alias('Space') if $v_version lt v4.1.0;
 
     my $Posix_space = $perl->add_match_table("PosixSpace",
                             Description => "\\t, \\n, \\cK, \\f, \\r, and ' '.  (\\cK is vertical tab)",
@@ -13847,11 +14028,18 @@ sub compile_perl() {
                             Initialize => $Cntrl & $ASCII,
                             );
 
+    my $perl_surrogate = $perl->add_match_table('_Perl_Surrogate');
+    if (defined (my $Cs = $gc->table('Cs'))) {
+        $perl_surrogate += $Cs;
+    }
+    else {
+        push @tables_that_may_be_empty, '_Perl_Surrogate';
+    }
+
     # $controls is a temporary used to construct Graph.
     my $controls = Range_List->new(Initialize => $gc->table('Unassigned')
-                                                + $gc->table('Control'));
-    # Cs not in release 1
-    $controls += $gc->table('Surrogate') if defined $gc->table('Surrogate');
+                                                + $gc->table('Control')
+                                                + $perl_surrogate);
 
     # Graph is  ~space &  ~(Cc|Cs|Cn) = ~(space + $controls)
     my $Graph = $perl->add_match_table('Graph', Full_Name => 'XPosixGraph',
@@ -13957,14 +14145,21 @@ sub compile_perl() {
     # 31f05a37c4e9c37a7263491f2fc0237d836e1a80 for a more complete description
     # of the MU issue.
     foreach my $range ($loc_problem_folds->ranges) {
-        foreach my $code_point($range->start .. $range->end) {
+        foreach my $code_point ($range->start .. $range->end) {
             my $fold_range = $cf->containing_range($code_point);
             next unless defined $fold_range;
 
+            # Skip if folds to itself
+            next if $fold_range->value eq $CODE_POINT;
+
             my @hex_folds = split " ", $fold_range->value;
-            my $start_cp = hex $hex_folds[0];
+            my $start_cp = $hex_folds[0];
+            next if $start_cp eq $CODE_POINT;
+            $start_cp = hex $start_cp;
             foreach my $i (0 .. @hex_folds - 1) {
-                my $cp = hex $hex_folds[$i];
+                my $cp = $hex_folds[$i];
+                next if $cp eq $CODE_POINT;
+                $cp = hex $cp;
                 next unless $cp > 255;    # Already have the < 256 ones
 
                 $loc_problem_folds->add_range($cp, $cp);
@@ -13978,9 +14173,13 @@ sub compile_perl() {
          Description =>
               "Code points whose fold is a string of more than one character",
     );
+    if ($v_version lt v3.0.1) {
+        push @tables_that_may_be_empty, '_Perl_Folds_To_Multi_Char';
+    }
 
     # Look through all the known folds to populate these tables.
     foreach my $range ($cf->ranges) {
+        next if $range->value eq $CODE_POINT;
         my $start = $range->start;
         my $end = $range->end;
         $any_folds->add_range($start, $end);
@@ -14190,7 +14389,7 @@ sub compile_perl() {
                                     + utf8::unicode_to_native(0xA0) # NBSP
                         );
 
-    my @composition = ('Name', 'Unicode_1_Name', 'Name_Alias');
+    my @composition = ('Name', 'Unicode_1_Name', '_Perl_Name_Alias');
 
     if (@named_sequences) {
         push @composition, 'Named_Sequence';
@@ -14201,15 +14400,15 @@ sub compile_perl() {
 
     my $alias_sentence = "";
     my %abbreviations;
-    my $alias = property_ref('Name_Alias');
-    $perl_charname->set_proxy_for('Name_Alias');
-
-    # Add each entry in Name_Alias to Perl_Charnames.  Where these go with
-    # respect to any existing entry depends on the entry type.  Corrections go
-    # before said entry, as they should be returned in preference over the
-    # existing entry.  (A correction to a correction should be later in the
-    # Name_Alias table, so it will correctly precede the erroneous correction
-    # in Perl_Charnames.)
+    my $alias = property_ref('_Perl_Name_Alias');
+    $perl_charname->set_proxy_for('_Perl_Name_Alias');
+
+    # Add each entry in _Perl_Name_Alias to Perl_Charnames.  Where these go
+    # with respect to any existing entry depends on the entry type.
+    # Corrections go before said entry, as they should be returned in
+    # preference over the existing entry.  (A correction to a correction
+    # should be later in the _Perl_Name_Alias table, so it will correctly
+    # precede the erroneous correction in Perl_Charnames.)
     #
     # Abbreviations go after everything else, so they are saved temporarily in
     # a hash for later.
@@ -14244,7 +14443,7 @@ sub compile_perl() {
         $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type);
     }
     $alias_sentence = <<END;
-The Name_Alias property adds duplicate code point entries that are
+The _Perl_Name_Alias property adds duplicate code point entries that are
 alternatives to the original name.  If an addition is a corrected
 name, it will be physically first in the table.  The original (less correct,
 but still valid) name will be next; then any alternatives, in no particular
@@ -14252,8 +14451,9 @@ order; and finally any abbreviations, again in no particular order.
 END
 
     # Now add the Unicode_1 names for the controls.  The Unicode_1 names had
-    # precedence before 6.1, so should be first in the file; the other names
-    # have precedence starting in 6.1,
+    # precedence before 6.1, including the awful ones like "LINE FEED (LF)",
+    # so should be first in the file; the other names have precedence starting
+    # in 6.1,
     my $before_or_after = ($v_version lt v6.1.0)
                           ? $MULTIPLE_BEFORE
                           : $MULTIPLE_AFTER;
@@ -14283,12 +14483,6 @@ END
                                         Replace => $before_or_after);
     }
 
-    # But in this version only, the ALERT has precedence over BELL, the
-    # Unicode_1_Name that would otherwise have precedence.
-    if ($v_version eq v6.0.0) {
-        $perl_charname->add_duplicate(7, 'ALERT', Replace => $MULTIPLE_BEFORE);
-    }
-
     # Now that have everything added, add in abbreviations after
     # everything else.  Sort so results don't change between runs of this
     # program
@@ -14398,6 +14592,25 @@ END
         $unassigned->set_equivalent_to($age_default, Related => 1);
     }
 
+    my $patws = $perl->add_match_table('_Perl_PatWS',
+                                       Perl_Extension => 1,
+                                       Fate => $INTERNAL_ONLY);
+    if (defined (my $off_patws = property_ref('Pattern_White_Space'))) {
+        $patws->initialize($off_patws->table('Y'));
+    }
+    else {
+        $patws->initialize([ ord("\t"),
+                             ord("\n"),
+                             utf8::unicode_to_native(0x0B), # VT
+                             ord("\f"),
+                             ord("\r"),
+                             ord(" "),
+                             utf8::unicode_to_native(0x85), # NEL
+                             0x200E..0x200F,             # Left, Right marks
+                             0x2028..0x2029              # Line, Paragraph seps
+                           ] );
+    }
+
     # See L<perlfunc/quotemeta>
     my $quotemeta = $perl->add_match_table('_Perl_Quotemeta',
                                            Perl_Extension => 1,
@@ -14406,22 +14619,78 @@ END
                                            # Initialize to what's common in
                                            # all Unicode releases.
                                            Initialize =>
-                                                $Space
-                                                + $gc->table('Control')
+                                                  $gc->table('Control')
+                                                + $Space
+                                                + $patws
+                                                + ((~ $Word) & $ASCII)
                            );
 
-    # In early releases without the proper Unicode properties, just set to \W.
-    if (! defined (my $patsyn = property_ref('Pattern_Syntax'))
-        || ! defined (my $patws = property_ref('Pattern_White_Space'))
-        || ! defined (my $di = property_ref('Default_Ignorable_Code_Point')))
-    {
-        $quotemeta += ~ $Word;
+    if (defined (my $patsyn = property_ref('Pattern_Syntax'))) {
+        $quotemeta += $patsyn->table('Y');
     }
     else {
-        $quotemeta += $patsyn->table('Y')
-                   + $patws->table('Y')
-                   + $di->table('Y')
-                   + ((~ $Word) & $ASCII);
+        $quotemeta += ((~ $Word) & Range->new(0, 255))
+                    - utf8::unicode_to_native(0xA8)
+                    - utf8::unicode_to_native(0xAF)
+                    - utf8::unicode_to_native(0xB2)
+                    - utf8::unicode_to_native(0xB3)
+                    - utf8::unicode_to_native(0xB4)
+                    - utf8::unicode_to_native(0xB7)
+                    - utf8::unicode_to_native(0xB8)
+                    - utf8::unicode_to_native(0xB9)
+                    - utf8::unicode_to_native(0xBC)
+                    - utf8::unicode_to_native(0xBD)
+                    - utf8::unicode_to_native(0xBE);
+        $quotemeta += [ # These are above-Latin1 patsyn; hence should be the
+                        # same in all releases
+                        0x2010 .. 0x2027,
+                        0x2030 .. 0x203E,
+                        0x2041 .. 0x2053,
+                        0x2055 .. 0x205E,
+                        0x2190 .. 0x245F,
+                        0x2500 .. 0x2775,
+                        0x2794 .. 0x2BFF,
+                        0x2E00 .. 0x2E7F,
+                        0x3001 .. 0x3003,
+                        0x3008 .. 0x3020,
+                        0x3030 .. 0x3030,
+                        0xFD3E .. 0xFD3F,
+                        0xFE45 .. 0xFE46
+                      ];
+    }
+
+    if (defined (my $di = property_ref('Default_Ignorable_Code_Point'))) {
+        $quotemeta += $di->table('Y')
+    }
+    else {
+        if ($v_version ge v2.0) {
+            $quotemeta += $gc->table('Cf')
+                       +  $gc->table('Cs');
+        }
+        $quotemeta += $gc->table('Cc')
+                    - $Space;
+        my $temp = Range_List->new(Initialize => [ 0x180B .. 0x180D,
+                                                   0x2060 .. 0x206F,
+                                                   0xFE00 .. 0xFE0F,
+                                                   0xFFF0 .. 0xFFFB,
+                                                   0xE0000 .. 0xE0FFF,
+                                                  ]);
+        $quotemeta += $temp & $Assigned;
+    }
+
+    my $nchar = $perl->add_match_table('_Perl_Nchar',
+                                       Perl_Extension => 1,
+                                       Fate => $INTERNAL_ONLY);
+    if (defined (my $off_nchar = property_ref('Nchar'))) {
+        $nchar->initialize($off_nchar->table('Y'));
+    }
+    else {
+        $nchar->initialize([ 0xFFFE .. 0xFFFF ]);
+        if ($v_version ge v2.0) {   # First release with these nchars
+            for (my $i = 0x1FFFE; $i <= 0x10FFFE; $i += 0x10000) {
+                $nchar += [ $i .. $i+1 ];
+            }
+        }
     }
 
     # Finished creating all the perl properties.  All non-internal non-string
@@ -14743,14 +15012,15 @@ sub register_file_for_name($$$) {
     my $file = shift;            # The file name in the final directory.
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-    trace "table=$table, file=$file, directory=@$directory_ref" if main::DEBUG && $to_trace;
+    trace "table=$table, file=$file, directory=@$directory_ref, fate=", $table->fate if main::DEBUG && $to_trace;
 
     if ($table->isa('Property')) {
         $table->set_file_path(@$directory_ref, $file);
         push @map_properties, $table;
 
         # No swash means don't do the rest of this.
-        return if $table->fate != $ORDINARY;
+        return if $table->fate != $ORDINARY
+                  && ! ($table->name =~ /^_/ && $table->fate == $INTERNAL_ONLY);
 
         # Get the path to the file
         my @path = $table->file_path;
@@ -14764,7 +15034,12 @@ sub register_file_for_name($$$) {
         # property's map table
         foreach my $alias ($table->aliases) {
             my $name = $alias->name;
-            $loose_property_to_file_of{standardize($name)} = $file;
+            if ($name =~ /^_/) {
+                $strict_property_to_file_of{lc $name} = $file;
+            }
+            else {
+                $loose_property_to_file_of{standardize($name)} = $file;
+            }
         }
 
         # And a way for utf8_heavy to find the proper key in the SwashInfo
@@ -14972,7 +15247,22 @@ sub register_file_for_name($$$) {
         # Remove interior underscores.
         (my $filename = $name) =~ s/ (?<=.) _ (?=.) //xg;
 
-        # Change any non-word character into an underscore, and truncate to 8.
+        # Convert the dot in floating point numbers to an underscore
+        $filename =~ s/\./_/ if $filename =~ / ^ \d+ \. \d+ $ /x;
+
+        my $suffix = "";
+
+        # Extract any suffix, delete any non-word character, and truncate to 3
+        # after the dot
+        if ($filename =~ m/ ( .*? ) ( \. .* ) /x) {
+            $filename = $1;
+            $suffix = $2;
+            $suffix =~ s/\W+//g;
+            substr($suffix, 4) = "" if length($suffix) > 4;
+        }
+
+        # Change any non-word character outside the suffix into an underscore,
+        # and truncate to 8.
         $filename =~ s/\W+/_/g;   # eg., "L&" -> "L_"
         substr($filename, 8) = "" if length($filename) > 8;
 
@@ -14984,7 +15274,7 @@ sub register_file_for_name($$$) {
         #     InGreekE
         #     InGreek2
         my $warned = 0;
-        while (my $num = $base_names{$path}{lc $filename}++) {
+        while (my $num = $base_names{$path}{lc "$filename$suffix"}++) {
             $num++; # so basenames with numbers start with '2', which
                     # just looks more natural.
 
@@ -15491,9 +15781,9 @@ sub make_ucd_table_pod_entries {
             $$info_ref .= $full_name;
         }
 
-        # And the full-name entry includes the short name, if different
+        # And the full-name entry includes the short name, if shorter
         if ($info_ref == \$full_info
-            && $standard_short_name ne $standard_full_name)
+            && length $standard_short_name < length $standard_full_name)
         {
             $full_info =~ s/\.\Z//;
             $full_info .= "  " if $full_info;
@@ -15517,6 +15807,17 @@ sub make_ucd_table_pod_entries {
         $full_info .= ".  " if $full_info;
         $full_info .= $more_info;
     }
+    if ($table->property->type == $FORCED_BINARY) {
+        if ($full_info) {
+            $full_info =~ s/\.\Z//;
+            $full_info .= ".  ";
+        }
+        $full_info .= "This is a combination property which has both:"
+                    . " 1) a map to various string values; and"
+                    . " 2) a map to boolean Y/N, where 'Y' means the"
+                    . " string value is non-empty.  Add the prefix 'is'"
+                    . " to the prop_invmap() call to get the latter";
+    }
 
     # These keep track if have created full and short name pod entries for the
     # property
@@ -15548,6 +15849,9 @@ sub make_ucd_table_pod_entries {
             $info = $other_info;
         }
 
+        $combination_property{$standard} = 1
+                                  if $table->property->type == $FORCED_BINARY;
+
         # Here, we have set up the two columns for this entry.  But if an
         # entry already exists for this name, we have to decide which one
         # we're going to later output.
@@ -15614,9 +15918,9 @@ sub pod_alphanumeric_sort {
     # The first few character columns are filler, plus the '\p{'; and get rid
     # of all the trailing stuff, starting with the trailing '}', so as to sort
     # on just 'Name=Value'
-    (my $a = lc $a) =~ s/^ .*? { //x;
+    (my $a = lc $a) =~ s/^ .*? \{ //x;
     $a =~ s/}.*//;
-    (my $b = lc $b) =~ s/^ .*? { //x;
+    (my $b = lc $b) =~ s/^ .*? \{ //x;
     $b =~ s/}.*//;
 
     # Determine if the two operands are both internal only or both not.
@@ -15761,6 +16065,7 @@ END
     # The sort will cause the alphabetically first properties to be added to
     # each list first, so each list will be sorted.
     foreach my $property (sort keys %why_suppressed) {
+        next unless $why_suppressed{$property};
         push @{$why_list{$why_suppressed{$property}}}, $property;
     }
 
@@ -15823,13 +16128,12 @@ END
     }
 
     # Similiarly, generate a list of files that we don't use, grouped by the
-    # reasons why.  First, create a hash whose keys are the reasons, and whose
-    # values are anonymous arrays of all the files that share that reason.
+    # reasons why (Don't output if the reason is empty).  First, create a hash
+    # whose keys are the reasons, and whose values are anonymous arrays of all
+    # the files that share that reason.
     my %grouped_by_reason;
-    foreach my $file (keys %ignored_files) {
-        push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
-    }
     foreach my $file (keys %skipped_files) {
+        next unless $skipped_files{$file};
         push @{$grouped_by_reason{$skipped_files{$file}}}, $file;
     }
 
@@ -16284,6 +16588,10 @@ sub make_Heavy () {
                            = simple_dumper(\%loose_property_name_of, ' ' x 4);
     chomp $loose_property_name_of;
 
+    my $strict_property_name_of
+                           = simple_dumper(\%strict_property_name_of, ' ' x 4);
+    chomp $strict_property_name_of;
+
     my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
     chomp $stricter_to_file_of;
 
@@ -16322,6 +16630,10 @@ sub make_Heavy () {
                         = simple_dumper(\%loose_property_to_file_of, ' ' x 4);
     chomp $loose_property_to_file_of;
 
+    my $strict_property_to_file_of
+                        = simple_dumper(\%strict_property_to_file_of, ' ' x 4);
+    chomp $strict_property_to_file_of;
+
     my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4);
     chomp $file_to_swash_name;
 
@@ -16337,6 +16649,11 @@ $INTERNAL_ONLY_HEADER
 $loose_property_name_of
 );
 
+# Same, but strict names
+\%utf8::strict_property_name_of = (
+$strict_property_name_of
+);
+
 # Gives the definitions (in the form of inversion lists) for those properties
 # whose definitions aren't kept in files
 \@utf8::inline_definitions = (
@@ -16385,6 +16702,11 @@ $caseless_equivalent_to
 $loose_property_to_file_of
 );
 
+# Property names to mapping files
+\%utf8::strict_property_to_file_of = (
+$strict_property_to_file