This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
charclass_invlists.h: Add some inverse folds.
authorKarl Williamson <khw@cpan.org>
Wed, 18 Mar 2020 15:12:34 +0000 (09:12 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 16 Oct 2020 13:01:41 +0000 (07:01 -0600)
The MICRO SIGN folds to above the Latin1 range, the only character that
does so in Unicode (or ever likely to).  This requires special handling.
This commit reduces some of the need for that handling by creating the
inversion map for it, which can be used in certain instances in pattern
matching, without having to have a special case.  The actual use of this
will come in a future commit.

charclass_invlists.h
lib/unicore/uni_keywords.pl
regen/mk_invlists.pl
uni_keywords.h

index 9a6a9cf..78d773d 100644 (file)
@@ -29629,7 +29629,7 @@ static const GCB_enum _Perl_GCB_invmap[] = {  /* for EBCDIC 037 */
 #  if 'A' == 65 /* ASCII/Latin1 */
 
 static const UV _Perl_IVCF_invlist[] = {  /* for ASCII/Latin1 */
-       1316,   /* Number of elements */
+       1318,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
@@ -29640,6 +29640,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for ASCII/Latin1 */
        0x73,
        0x74,
        0x7B,
+       0xB5,
+       0xB6,
        0xDF,
        0xE0,
        0xE5,
@@ -30991,7 +30993,8 @@ typedef enum {
        IVCF_use_AUX_TABLE_24 = -24,
        IVCF_use_AUX_TABLE_25 = -25,
        IVCF_use_AUX_TABLE_26 = -26,
-       IVCF_use_AUX_TABLE_27 = -27
+       IVCF_use_AUX_TABLE_27 = -27,
+       IVCF_use_AUX_TABLE_28 = -28
 } IVCF_enum;
 
 #define HAS_IVCF_AUX_TABLES
@@ -31007,129 +31010,134 @@ static const U32 IVCF_AUX_TABLE_2[] = {
 };
 
 static const U32 IVCF_AUX_TABLE_3[] = {
+       0x39C,
+       0x3BC
+};
+
+static const U32 IVCF_AUX_TABLE_4[] = {
        0xC5,
        0x212B
 };
 
-static const U32 IVCF_AUX_TABLE_4[] = {
+static const U32 IVCF_AUX_TABLE_5[] = {
        0x1C4,
        0x1C5
 };
 
-static const U32 IVCF_AUX_TABLE_5[] = {
+static const U32 IVCF_AUX_TABLE_6[] = {
        0x1C7,
        0x1C8
 };
 
-static const U32 IVCF_AUX_TABLE_6[] = {
+static const U32 IVCF_AUX_TABLE_7[] = {
        0x1CA,
        0x1CB
 };
 
-static const U32 IVCF_AUX_TABLE_7[] = {
+static const U32 IVCF_AUX_TABLE_8[] = {
        0x1F1,
        0x1F2
 };
 
-static const U32 IVCF_AUX_TABLE_8[] = {
+static const U32 IVCF_AUX_TABLE_9[] = {
        0x392,
        0x3D0
 };
 
-static const U32 IVCF_AUX_TABLE_9[] = {
+static const U32 IVCF_AUX_TABLE_10[] = {
        0x395,
        0x3F5
 };
 
-static const U32 IVCF_AUX_TABLE_10[] = {
+static const U32 IVCF_AUX_TABLE_11[] = {
        0x398,
        0x3D1,
        0x3F4
 };
 
-static const U32 IVCF_AUX_TABLE_11[] = {
+static const U32 IVCF_AUX_TABLE_12[] = {
        0x345,
        0x399,
        0x1FBE
 };
 
-static const U32 IVCF_AUX_TABLE_12[] = {
+static const U32 IVCF_AUX_TABLE_13[] = {
        0x39A,
        0x3F0
 };
 
-static const U32 IVCF_AUX_TABLE_13[] = {
+static const U32 IVCF_AUX_TABLE_14[] = {
        0xB5,
        0x39C
 };
 
-static const U32 IVCF_AUX_TABLE_14[] = {
+static const U32 IVCF_AUX_TABLE_15[] = {
        0x3A0,
        0x3D6
 };
 
-static const U32 IVCF_AUX_TABLE_15[] = {
+static const U32 IVCF_AUX_TABLE_16[] = {
        0x3A1,
        0x3F1
 };
 
-static const U32 IVCF_AUX_TABLE_16[] = {
+static const U32 IVCF_AUX_TABLE_17[] = {
        0x3A3,
        0x3C2
 };
 
-static const U32 IVCF_AUX_TABLE_17[] = {
+static const U32 IVCF_AUX_TABLE_18[] = {
        0x3A6,
        0x3D5
 };
 
-static const U32 IVCF_AUX_TABLE_18[] = {
+static const U32 IVCF_AUX_TABLE_19[] = {
        0x3A9,
        0x2126
 };
 
-static const U32 IVCF_AUX_TABLE_19[] = {
+static const U32 IVCF_AUX_TABLE_20[] = {
        0x412,
        0x1C80
 };
 
-static const U32 IVCF_AUX_TABLE_20[] = {
+static const U32 IVCF_AUX_TABLE_21[] = {
        0x414,
        0x1C81
 };
 
-static const U32 IVCF_AUX_TABLE_21[] = {
+static const U32 IVCF_AUX_TABLE_22[] = {
        0x41E,
        0x1C82
 };
 
-static const U32 IVCF_AUX_TABLE_22[] = {
+static const U32 IVCF_AUX_TABLE_23[] = {
        0x421,
        0x1C83
 };
 
-static const U32 IVCF_AUX_TABLE_23[] = {
+static const U32 IVCF_AUX_TABLE_24[] = {
        0x422,
        0x1C84,
        0x1C85
 };
 
-static const U32 IVCF_AUX_TABLE_24[] = {
+static const U32 IVCF_AUX_TABLE_25[] = {
        0x42A,
        0x1C86
 };
 
-static const U32 IVCF_AUX_TABLE_25[] = {
+static const U32 IVCF_AUX_TABLE_26[] = {
        0x462,
        0x1C87
 };
 
-static const U32 IVCF_AUX_TABLE_26[] = {
+static const U32 IVCF_AUX_TABLE_27[] = {
        0x1E60,
        0x1E9B
 };
 
-static const U32 IVCF_AUX_TABLE_27[] = {
+static const U32 IVCF_AUX_TABLE_28[] = {
        0x1C88,
        0xA64A
 };
@@ -31162,7 +31170,8 @@ static const U32 * const IVCF_AUX_TABLE_ptrs[] = {
        IVCF_AUX_TABLE_24,
        IVCF_AUX_TABLE_25,
        IVCF_AUX_TABLE_26,
-       IVCF_AUX_TABLE_27
+       IVCF_AUX_TABLE_27,
+       IVCF_AUX_TABLE_28
 };
 
 /* Parallel table to the above, giving the number of elements in each table
@@ -31179,9 +31188,9 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_7 */,
        2       /* IVCF_AUX_TABLE_8 */,
        2       /* IVCF_AUX_TABLE_9 */,
-       3       /* IVCF_AUX_TABLE_10 */,
+       2       /* IVCF_AUX_TABLE_10 */,
        3       /* IVCF_AUX_TABLE_11 */,
-       2       /* IVCF_AUX_TABLE_12 */,
+       3       /* IVCF_AUX_TABLE_12 */,
        2       /* IVCF_AUX_TABLE_13 */,
        2       /* IVCF_AUX_TABLE_14 */,
        2       /* IVCF_AUX_TABLE_15 */,
@@ -31192,11 +31201,12 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_20 */,
        2       /* IVCF_AUX_TABLE_21 */,
        2       /* IVCF_AUX_TABLE_22 */,
-       3       /* IVCF_AUX_TABLE_23 */,
-       2       /* IVCF_AUX_TABLE_24 */,
+       2       /* IVCF_AUX_TABLE_23 */,
+       3       /* IVCF_AUX_TABLE_24 */,
        2       /* IVCF_AUX_TABLE_25 */,
        2       /* IVCF_AUX_TABLE_26 */,
-       2       /* IVCF_AUX_TABLE_27 */
+       2       /* IVCF_AUX_TABLE_27 */,
+       2       /* IVCF_AUX_TABLE_28 */
 };
 
 static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
@@ -31207,9 +31217,11 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        IVCF_use_AUX_TABLE_2,
        0x54,
        0,
+       IVCF_use_AUX_TABLE_3,
+       0,
        0x1E9E,
        0xC0,
-       IVCF_use_AUX_TABLE_3,
+       IVCF_use_AUX_TABLE_4,
        0xC6,
        0,
        0xD8,
@@ -31378,12 +31390,12 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0,
        0x1F7,
        0,
-       IVCF_use_AUX_TABLE_4,
-       0,
        IVCF_use_AUX_TABLE_5,
        0,
        IVCF_use_AUX_TABLE_6,
        0,
+       IVCF_use_AUX_TABLE_7,
+       0,
        0x1CD,
        0,
        0x1CF,
@@ -31419,7 +31431,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0,
        0x1EE,
        0,
-       IVCF_use_AUX_TABLE_7,
+       IVCF_use_AUX_TABLE_8,
        0,
        0x1F4,
        0,
@@ -31562,24 +31574,24 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0x388,
        0x1FE3,
        0x391,
-       IVCF_use_AUX_TABLE_8,
-       0x393,
        IVCF_use_AUX_TABLE_9,
-       0x396,
+       0x393,
        IVCF_use_AUX_TABLE_10,
+       0x396,
        IVCF_use_AUX_TABLE_11,
        IVCF_use_AUX_TABLE_12,
-       0x39B,
        IVCF_use_AUX_TABLE_13,
-       0x39D,
+       0x39B,
        IVCF_use_AUX_TABLE_14,
+       0x39D,
        IVCF_use_AUX_TABLE_15,
-       0,
        IVCF_use_AUX_TABLE_16,
-       0x3A4,
+       0,
        IVCF_use_AUX_TABLE_17,
-       0x3A7,
+       0x3A4,
        IVCF_use_AUX_TABLE_18,
+       0x3A7,
+       IVCF_use_AUX_TABLE_19,
        0x3AA,
        0x38C,
        0x38E,
@@ -31618,22 +31630,22 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0x3FA,
        0,
        0x410,
-       IVCF_use_AUX_TABLE_19,
-       0x413,
        IVCF_use_AUX_TABLE_20,
-       0x415,
+       0x413,
        IVCF_use_AUX_TABLE_21,
-       0x41F,
+       0x415,
        IVCF_use_AUX_TABLE_22,
+       0x41F,
        IVCF_use_AUX_TABLE_23,
-       0x423,
        IVCF_use_AUX_TABLE_24,
+       0x423,
+       IVCF_use_AUX_TABLE_25,
        0x42B,
        0x400,
        0,
        0x460,
        0,
-       IVCF_use_AUX_TABLE_25,
+       IVCF_use_AUX_TABLE_26,
        0,
        0x464,
        0,
@@ -31941,7 +31953,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0,
        0x1E5E,
        0,
-       IVCF_use_AUX_TABLE_26,
+       IVCF_use_AUX_TABLE_27,
        0,
        0x1E62,
        0,
@@ -32296,7 +32308,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
        0,
        0xA648,
        0,
-       IVCF_use_AUX_TABLE_27,
+       IVCF_use_AUX_TABLE_28,
        0,
        0xA64C,
        0,
@@ -32530,7 +32542,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for ASCII/Latin1 */
      && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21
 
 static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 1047 */
-       1331,   /* Number of elements */
+       1333,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
@@ -32556,6 +32568,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 1047 */
        0x9A,
        0x9C,
        0x9D,
+       0xA0,
+       0xA1,
        0xA2,
        0xA3,
        0xAA,
@@ -33910,7 +33924,8 @@ typedef enum {
        IVCF_use_AUX_TABLE_24 = -24,
        IVCF_use_AUX_TABLE_25 = -25,
        IVCF_use_AUX_TABLE_26 = -26,
-       IVCF_use_AUX_TABLE_27 = -27
+       IVCF_use_AUX_TABLE_27 = -27,
+       IVCF_use_AUX_TABLE_28 = -28
 } IVCF_enum;
 
 #define HAS_IVCF_AUX_TABLES
@@ -33926,129 +33941,134 @@ static const U32 IVCF_AUX_TABLE_2[] = {
 };
 
 static const U32 IVCF_AUX_TABLE_3[] = {
+       0x39C,
+       0x3BC
+};
+
+static const U32 IVCF_AUX_TABLE_4[] = {
        0xE2,
        0x17F
 };
 
-static const U32 IVCF_AUX_TABLE_4[] = {
+static const U32 IVCF_AUX_TABLE_5[] = {
        0x1C4,
        0x1C5
 };
 
-static const U32 IVCF_AUX_TABLE_5[] = {
+static const U32 IVCF_AUX_TABLE_6[] = {
        0x1C7,
        0x1C8
 };
 
-static const U32 IVCF_AUX_TABLE_6[] = {
+static const U32 IVCF_AUX_TABLE_7[] = {
        0x1CA,
        0x1CB
 };
 
-static const U32 IVCF_AUX_TABLE_7[] = {
+static const U32 IVCF_AUX_TABLE_8[] = {
        0x1F1,
        0x1F2
 };
 
-static const U32 IVCF_AUX_TABLE_8[] = {
+static const U32 IVCF_AUX_TABLE_9[] = {
        0x392,
        0x3D0
 };
 
-static const U32 IVCF_AUX_TABLE_9[] = {
+static const U32 IVCF_AUX_TABLE_10[] = {
        0x395,
        0x3F5
 };
 
-static const U32 IVCF_AUX_TABLE_10[] = {
+static const U32 IVCF_AUX_TABLE_11[] = {
        0x398,
        0x3D1,
        0x3F4
 };
 
-static const U32 IVCF_AUX_TABLE_11[] = {
+static const U32 IVCF_AUX_TABLE_12[] = {
        0x345,
        0x399,
        0x1FBE
 };
 
-static const U32 IVCF_AUX_TABLE_12[] = {
+static const U32 IVCF_AUX_TABLE_13[] = {
        0x39A,
        0x3F0
 };
 
-static const U32 IVCF_AUX_TABLE_13[] = {
+static const U32 IVCF_AUX_TABLE_14[] = {
        0xA0,
        0x39C
 };
 
-static const U32 IVCF_AUX_TABLE_14[] = {
+static const U32 IVCF_AUX_TABLE_15[] = {
        0x3A0,
        0x3D6
 };
 
-static const U32 IVCF_AUX_TABLE_15[] = {
+static const U32 IVCF_AUX_TABLE_16[] = {
        0x3A1,
        0x3F1
 };
 
-static const U32 IVCF_AUX_TABLE_16[] = {
+static const U32 IVCF_AUX_TABLE_17[] = {
        0x3A3,
        0x3C2
 };
 
-static const U32 IVCF_AUX_TABLE_17[] = {
+static const U32 IVCF_AUX_TABLE_18[] = {
        0x3A6,
        0x3D5
 };
 
-static const U32 IVCF_AUX_TABLE_18[] = {
+static const U32 IVCF_AUX_TABLE_19[] = {
        0x3A9,
        0x2126
 };
 
-static const U32 IVCF_AUX_TABLE_19[] = {
+static const U32 IVCF_AUX_TABLE_20[] = {
        0x412,
        0x1C80
 };
 
-static const U32 IVCF_AUX_TABLE_20[] = {
+static const U32 IVCF_AUX_TABLE_21[] = {
        0x414,
        0x1C81
 };
 
-static const U32 IVCF_AUX_TABLE_21[] = {
+static const U32 IVCF_AUX_TABLE_22[] = {
        0x41E,
        0x1C82
 };
 
-static const U32 IVCF_AUX_TABLE_22[] = {
+static const U32 IVCF_AUX_TABLE_23[] = {
        0x421,
        0x1C83
 };
 
-static const U32 IVCF_AUX_TABLE_23[] = {
+static const U32 IVCF_AUX_TABLE_24[] = {
        0x422,
        0x1C84,
        0x1C85
 };
 
-static const U32 IVCF_AUX_TABLE_24[] = {
+static const U32 IVCF_AUX_TABLE_25[] = {
        0x42A,
        0x1C86
 };
 
-static const U32 IVCF_AUX_TABLE_25[] = {
+static const U32 IVCF_AUX_TABLE_26[] = {
        0x462,
        0x1C87
 };
 
-static const U32 IVCF_AUX_TABLE_26[] = {
+static const U32 IVCF_AUX_TABLE_27[] = {
        0x1E60,
        0x1E9B
 };
 
-static const U32 IVCF_AUX_TABLE_27[] = {
+static const U32 IVCF_AUX_TABLE_28[] = {
        0x1C88,
        0xA64A
 };
@@ -34081,7 +34101,8 @@ static const U32 * const IVCF_AUX_TABLE_ptrs[] = {
        IVCF_AUX_TABLE_24,
        IVCF_AUX_TABLE_25,
        IVCF_AUX_TABLE_26,
-       IVCF_AUX_TABLE_27
+       IVCF_AUX_TABLE_27,
+       IVCF_AUX_TABLE_28
 };
 
 /* Parallel table to the above, giving the number of elements in each table
@@ -34098,9 +34119,9 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_7 */,
        2       /* IVCF_AUX_TABLE_8 */,
        2       /* IVCF_AUX_TABLE_9 */,
-       3       /* IVCF_AUX_TABLE_10 */,
+       2       /* IVCF_AUX_TABLE_10 */,
        3       /* IVCF_AUX_TABLE_11 */,
-       2       /* IVCF_AUX_TABLE_12 */,
+       3       /* IVCF_AUX_TABLE_12 */,
        2       /* IVCF_AUX_TABLE_13 */,
        2       /* IVCF_AUX_TABLE_14 */,
        2       /* IVCF_AUX_TABLE_15 */,
@@ -34111,11 +34132,12 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_20 */,
        2       /* IVCF_AUX_TABLE_21 */,
        2       /* IVCF_AUX_TABLE_22 */,
-       3       /* IVCF_AUX_TABLE_23 */,
-       2       /* IVCF_AUX_TABLE_24 */,
+       2       /* IVCF_AUX_TABLE_23 */,
+       3       /* IVCF_AUX_TABLE_24 */,
        2       /* IVCF_AUX_TABLE_25 */,
        2       /* IVCF_AUX_TABLE_26 */,
-       2       /* IVCF_AUX_TABLE_27 */
+       2       /* IVCF_AUX_TABLE_27 */,
+       2       /* IVCF_AUX_TABLE_28 */
 };
 
 static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
@@ -34142,6 +34164,8 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0x9E,
        0,
        IVCF_use_AUX_TABLE_3,
+       0,
+       IVCF_use_AUX_TABLE_4,
        0xE3,
        0,
        0xEB,
@@ -34312,12 +34336,12 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0,
        0x1F7,
        0,
-       IVCF_use_AUX_TABLE_4,
-       0,
        IVCF_use_AUX_TABLE_5,
        0,
        IVCF_use_AUX_TABLE_6,
        0,
+       IVCF_use_AUX_TABLE_7,
+       0,
        0x1CD,
        0,
        0x1CF,
@@ -34353,7 +34377,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0,
        0x1EE,
        0,
-       IVCF_use_AUX_TABLE_7,
+       IVCF_use_AUX_TABLE_8,
        0,
        0x1F4,
        0,
@@ -34496,24 +34520,24 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0x388,
        0x1FE3,
        0x391,
-       IVCF_use_AUX_TABLE_8,
-       0x393,
        IVCF_use_AUX_TABLE_9,
-       0x396,
+       0x393,
        IVCF_use_AUX_TABLE_10,
+       0x396,
        IVCF_use_AUX_TABLE_11,
        IVCF_use_AUX_TABLE_12,
-       0x39B,
        IVCF_use_AUX_TABLE_13,
-       0x39D,
+       0x39B,
        IVCF_use_AUX_TABLE_14,
+       0x39D,
        IVCF_use_AUX_TABLE_15,
-       0,
        IVCF_use_AUX_TABLE_16,
-       0x3A4,
+       0,
        IVCF_use_AUX_TABLE_17,
-       0x3A7,
+       0x3A4,
        IVCF_use_AUX_TABLE_18,
+       0x3A7,
+       IVCF_use_AUX_TABLE_19,
        0x3AA,
        0x38C,
        0x38E,
@@ -34552,22 +34576,22 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0x3FA,
        0,
        0x410,
-       IVCF_use_AUX_TABLE_19,
-       0x413,
        IVCF_use_AUX_TABLE_20,
-       0x415,
+       0x413,
        IVCF_use_AUX_TABLE_21,
-       0x41F,
+       0x415,
        IVCF_use_AUX_TABLE_22,
+       0x41F,
        IVCF_use_AUX_TABLE_23,
-       0x423,
        IVCF_use_AUX_TABLE_24,
+       0x423,
+       IVCF_use_AUX_TABLE_25,
        0x42B,
        0x400,
        0,
        0x460,
        0,
-       IVCF_use_AUX_TABLE_25,
+       IVCF_use_AUX_TABLE_26,
        0,
        0x464,
        0,
@@ -34875,7 +34899,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0,
        0x1E5E,
        0,
-       IVCF_use_AUX_TABLE_26,
+       IVCF_use_AUX_TABLE_27,
        0,
        0x1E62,
        0,
@@ -35230,7 +35254,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
        0,
        0xA648,
        0,
-       IVCF_use_AUX_TABLE_27,
+       IVCF_use_AUX_TABLE_28,
        0,
        0xA64C,
        0,
@@ -35464,7 +35488,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 1047 */
      && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 37
 
 static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 037 */
-       1329,   /* Number of elements */
+       1331,   /* Number of elements */
        148565664, /* Version and data structure type */
        0,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
@@ -35488,6 +35512,8 @@ static const UV _Perl_IVCF_invlist[] = {  /* for EBCDIC 037 */
        0x9A,
        0x9C,
        0x9D,
+       0xA0,
+       0xA1,
        0xA2,
        0xA3,
        0xAA,
@@ -36842,7 +36868,8 @@ typedef enum {
        IVCF_use_AUX_TABLE_24 = -24,
        IVCF_use_AUX_TABLE_25 = -25,
        IVCF_use_AUX_TABLE_26 = -26,
-       IVCF_use_AUX_TABLE_27 = -27
+       IVCF_use_AUX_TABLE_27 = -27,
+       IVCF_use_AUX_TABLE_28 = -28
 } IVCF_enum;
 
 #define HAS_IVCF_AUX_TABLES
@@ -36858,129 +36885,134 @@ static const U32 IVCF_AUX_TABLE_2[] = {
 };
 
 static const U32 IVCF_AUX_TABLE_3[] = {
+       0x39C,
+       0x3BC
+};
+
+static const U32 IVCF_AUX_TABLE_4[] = {
        0xE2,
        0x17F
 };
 
-static const U32 IVCF_AUX_TABLE_4[] = {
+static const U32 IVCF_AUX_TABLE_5[] = {
        0x1C4,
        0x1C5
 };
 
-static const U32 IVCF_AUX_TABLE_5[] = {
+static const U32 IVCF_AUX_TABLE_6[] = {
        0x1C7,
        0x1C8
 };
 
-static const U32 IVCF_AUX_TABLE_6[] = {
+static const U32 IVCF_AUX_TABLE_7[] = {
        0x1CA,
        0x1CB
 };
 
-static const U32 IVCF_AUX_TABLE_7[] = {
+static const U32 IVCF_AUX_TABLE_8[] = {
        0x1F1,
        0x1F2
 };
 
-static const U32 IVCF_AUX_TABLE_8[] = {
+static const U32 IVCF_AUX_TABLE_9[] = {
        0x392,
        0x3D0
 };
 
-static const U32 IVCF_AUX_TABLE_9[] = {
+static const U32 IVCF_AUX_TABLE_10[] = {
        0x395,
        0x3F5
 };
 
-static const U32 IVCF_AUX_TABLE_10[] = {
+static const U32 IVCF_AUX_TABLE_11[] = {
        0x398,
        0x3D1,
        0x3F4
 };
 
-static const U32 IVCF_AUX_TABLE_11[] = {
+static const U32 IVCF_AUX_TABLE_12[] = {
        0x345,
        0x399,
        0x1FBE
 };
 
-static const U32 IVCF_AUX_TABLE_12[] = {
+static const U32 IVCF_AUX_TABLE_13[] = {
        0x39A,
        0x3F0
 };
 
-static const U32 IVCF_AUX_TABLE_13[] = {
+static const U32 IVCF_AUX_TABLE_14[] = {
        0xA0,
        0x39C
 };
 
-static const U32 IVCF_AUX_TABLE_14[] = {
+static const U32 IVCF_AUX_TABLE_15[] = {
        0x3A0,
        0x3D6
 };
 
-static const U32 IVCF_AUX_TABLE_15[] = {
+static const U32 IVCF_AUX_TABLE_16[] = {
        0x3A1,
        0x3F1
 };
 
-static const U32 IVCF_AUX_TABLE_16[] = {
+static const U32 IVCF_AUX_TABLE_17[] = {
        0x3A3,
        0x3C2
 };
 
-static const U32 IVCF_AUX_TABLE_17[] = {
+static const U32 IVCF_AUX_TABLE_18[] = {
        0x3A6,
        0x3D5
 };
 
-static const U32 IVCF_AUX_TABLE_18[] = {
+static const U32 IVCF_AUX_TABLE_19[] = {
        0x3A9,
        0x2126
 };
 
-static const U32 IVCF_AUX_TABLE_19[] = {
+static const U32 IVCF_AUX_TABLE_20[] = {
        0x412,
        0x1C80
 };
 
-static const U32 IVCF_AUX_TABLE_20[] = {
+static const U32 IVCF_AUX_TABLE_21[] = {
        0x414,
        0x1C81
 };
 
-static const U32 IVCF_AUX_TABLE_21[] = {
+static const U32 IVCF_AUX_TABLE_22[] = {
        0x41E,
        0x1C82
 };
 
-static const U32 IVCF_AUX_TABLE_22[] = {
+static const U32 IVCF_AUX_TABLE_23[] = {
        0x421,
        0x1C83
 };
 
-static const U32 IVCF_AUX_TABLE_23[] = {
+static const U32 IVCF_AUX_TABLE_24[] = {
        0x422,
        0x1C84,
        0x1C85
 };
 
-static const U32 IVCF_AUX_TABLE_24[] = {
+static const U32 IVCF_AUX_TABLE_25[] = {
        0x42A,
        0x1C86
 };
 
-static const U32 IVCF_AUX_TABLE_25[] = {
+static const U32 IVCF_AUX_TABLE_26[] = {
        0x462,
        0x1C87
 };
 
-static const U32 IVCF_AUX_TABLE_26[] = {
+static const U32 IVCF_AUX_TABLE_27[] = {
        0x1E60,
        0x1E9B
 };
 
-static const U32 IVCF_AUX_TABLE_27[] = {
+static const U32 IVCF_AUX_TABLE_28[] = {
        0x1C88,
        0xA64A
 };
@@ -37013,7 +37045,8 @@ static const U32 * const IVCF_AUX_TABLE_ptrs[] = {
        IVCF_AUX_TABLE_24,
        IVCF_AUX_TABLE_25,
        IVCF_AUX_TABLE_26,
-       IVCF_AUX_TABLE_27
+       IVCF_AUX_TABLE_27,
+       IVCF_AUX_TABLE_28
 };
 
 /* Parallel table to the above, giving the number of elements in each table
@@ -37030,9 +37063,9 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_7 */,
        2       /* IVCF_AUX_TABLE_8 */,
        2       /* IVCF_AUX_TABLE_9 */,
-       3       /* IVCF_AUX_TABLE_10 */,
+       2       /* IVCF_AUX_TABLE_10 */,
        3       /* IVCF_AUX_TABLE_11 */,
-       2       /* IVCF_AUX_TABLE_12 */,
+       3       /* IVCF_AUX_TABLE_12 */,
        2       /* IVCF_AUX_TABLE_13 */,
        2       /* IVCF_AUX_TABLE_14 */,
        2       /* IVCF_AUX_TABLE_15 */,
@@ -37043,11 +37076,12 @@ static const U8 IVCF_AUX_TABLE_lengths[] = {
        2       /* IVCF_AUX_TABLE_20 */,
        2       /* IVCF_AUX_TABLE_21 */,
        2       /* IVCF_AUX_TABLE_22 */,
-       3       /* IVCF_AUX_TABLE_23 */,
-       2       /* IVCF_AUX_TABLE_24 */,
+       2       /* IVCF_AUX_TABLE_23 */,
+       3       /* IVCF_AUX_TABLE_24 */,
        2       /* IVCF_AUX_TABLE_25 */,
        2       /* IVCF_AUX_TABLE_26 */,
-       2       /* IVCF_AUX_TABLE_27 */
+       2       /* IVCF_AUX_TABLE_27 */,
+       2       /* IVCF_AUX_TABLE_28 */
 };
 
 static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
@@ -37072,6 +37106,8 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0x9E,
        0,
        IVCF_use_AUX_TABLE_3,
+       0,
+       IVCF_use_AUX_TABLE_4,
        0xE3,
        0,
        0xEB,
@@ -37242,12 +37278,12 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0,
        0x1F7,
        0,
-       IVCF_use_AUX_TABLE_4,
-       0,
        IVCF_use_AUX_TABLE_5,
        0,
        IVCF_use_AUX_TABLE_6,
        0,
+       IVCF_use_AUX_TABLE_7,
+       0,
        0x1CD,
        0,
        0x1CF,
@@ -37283,7 +37319,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0,
        0x1EE,
        0,
-       IVCF_use_AUX_TABLE_7,
+       IVCF_use_AUX_TABLE_8,
        0,
        0x1F4,
        0,
@@ -37426,24 +37462,24 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0x388,
        0x1FE3,
        0x391,
-       IVCF_use_AUX_TABLE_8,
-       0x393,
        IVCF_use_AUX_TABLE_9,
-       0x396,
+       0x393,
        IVCF_use_AUX_TABLE_10,
+       0x396,
        IVCF_use_AUX_TABLE_11,
        IVCF_use_AUX_TABLE_12,
-       0x39B,
        IVCF_use_AUX_TABLE_13,
-       0x39D,
+       0x39B,
        IVCF_use_AUX_TABLE_14,
+       0x39D,
        IVCF_use_AUX_TABLE_15,
-       0,
        IVCF_use_AUX_TABLE_16,
-       0x3A4,
+       0,
        IVCF_use_AUX_TABLE_17,
-       0x3A7,
+       0x3A4,
        IVCF_use_AUX_TABLE_18,
+       0x3A7,
+       IVCF_use_AUX_TABLE_19,
        0x3AA,
        0x38C,
        0x38E,
@@ -37482,22 +37518,22 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0x3FA,
        0,
        0x410,
-       IVCF_use_AUX_TABLE_19,
-       0x413,
        IVCF_use_AUX_TABLE_20,
-       0x415,
+       0x413,
        IVCF_use_AUX_TABLE_21,
-       0x41F,
+       0x415,
        IVCF_use_AUX_TABLE_22,
+       0x41F,
        IVCF_use_AUX_TABLE_23,
-       0x423,
        IVCF_use_AUX_TABLE_24,
+       0x423,
+       IVCF_use_AUX_TABLE_25,
        0x42B,
        0x400,
        0,
        0x460,
        0,
-       IVCF_use_AUX_TABLE_25,
+       IVCF_use_AUX_TABLE_26,
        0,
        0x464,
        0,
@@ -37805,7 +37841,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0,
        0x1E5E,
        0,
-       IVCF_use_AUX_TABLE_26,
+       IVCF_use_AUX_TABLE_27,
        0,
        0x1E62,
        0,
@@ -38160,7 +38196,7 @@ static const I32 _Perl_IVCF_invmap[] = {  /* for EBCDIC 037 */
        0,
        0xA648,
        0,
-       IVCF_use_AUX_TABLE_27,
+       IVCF_use_AUX_TABLE_28,
        0,
        0xA64C,
        0,
@@ -419868,5 +419904,5 @@ static const U8 WB_table[23][23] = {
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * 7a7e778e903508b1c244a77bae57ec57606ab775bb5f81719dbb122eb66d4259 regen/mk_invlists.pl
+ * d99eae7d3b60d8ed3af56e6fdc41ab53b22288238749812aa1cd01f847fe9d5f regen/mk_invlists.pl
  * ex: set ro: */
index 2b57230..dda925b 100644 (file)
 # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
 # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
 # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
-# 7a7e778e903508b1c244a77bae57ec57606ab775bb5f81719dbb122eb66d4259 regen/mk_invlists.pl
+# d99eae7d3b60d8ed3af56e6fdc41ab53b22288238749812aa1cd01f847fe9d5f regen/mk_invlists.pl
 # ex: set ro:
index 8c848cc..7df5b7d 100644 (file)
@@ -1029,13 +1029,20 @@ sub _Perl_IVCF {
     }
 
     # Now go through and make some adjustments.  We add synthetic entries for
-    # two cases.
-    # 1) Two or more code points can fold to the same multiple character,
+    # three cases.
+    # 1) If the fold of a Latin1-range character is above that range, some
+    #    coding in regexec.c can be saved by creating a reverse map here.  The
+    #    impetus for this is that U+B5 (MICRO SIGN) folds to the Greek small
+    #    mu (U+3BC).  That fold isn't done at regex pattern compilation time
+    #    if it means that the pattern would have to be translated into UTF-8,
+    #    whose operation is slower.  At run time, having this reverse
+    #    translation eliminates some special cases in the code.
+    # 2) Two or more code points can fold to the same multiple character,
     #    sequence, as U+FB05 and U+FB06 both fold to 'st'.  This code is only
     #    for single character folds, but FB05 and FB06 are single characters
     #    that are equivalent folded, so we add entries so that they are
     #    considered to fold to each other
-    # 2) If two or more above-Latin1 code points fold to the same Latin1 range
+    # 3) If two or more above-Latin1 code points fold to the same Latin1 range
     #    one, we also add entries so that they are considered to fold to each
     #    other.  This is so that under /aa or /l matching, where folding to
     #    their Latin1 range code point is illegal, they still can fold to each
@@ -1048,9 +1055,28 @@ sub _Perl_IVCF {
         # scalar
         if (scalar $new{$fold}->@* == 1) {
             $new{$fold} = $new{$fold}[0];
+
+            # Handle case 1) above: if there were a Latin1 range code point
+            # whose fold is above that range, this creates an extra entry that
+            # maps the other direction, and would save some special case code.
+            # (The one current case of this is handled in the else clause
+            # below.)
+            $new{$new{$fold}} = $fold if $new{$fold} < 256 && $fold > 255;
         }
         else {
 
+            # Handle case 1) when there are multiple things that fold to an
+            # above-Latin1 code point, at least one of which is in Latin1.
+            if (! $folds_to_string && $fold > 255) {
+                foreach my $cp ($new{$fold}->@*) {
+                    if ($cp < 256) {
+                        my @new_entry = grep { $_ != $cp } $new{$fold}->@*;
+                        push @new_entry, $fold;
+                        $new{$cp}->@* = @new_entry;
+                    }
+                }
+            }
+                
             # Otherwise, sort numerically.  This places the highest code point
             # in the list at the tail end.  This is because Unicode keeps the
             # lowercase code points as higher ordinals than the uppercase, at
index a36d8bc..57f0f97 100644 (file)
@@ -7541,6 +7541,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * 7a7e778e903508b1c244a77bae57ec57606ab775bb5f81719dbb122eb66d4259 regen/mk_invlists.pl
+ * d99eae7d3b60d8ed3af56e6fdc41ab53b22288238749812aa1cd01f847fe9d5f regen/mk_invlists.pl
  * cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl
  * ex: set ro: */