regen/mk_invlists.pl: Create new inversion list
authorKarl Williamson <khw@cpan.org>
Fri, 1 Feb 2019 18:22:15 +0000 (11:22 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 5 Feb 2019 18:44:29 +0000 (11:44 -0700)
This will be used in a future commit.

charclass_invlists.h
embedvar.h
perlapi.h
perlvars.h
regcomp.c
regen/mk_invlists.pl
uni_keywords.h

index eb40f5e..b1dd443 100644 (file)
@@ -18043,6 +18043,360 @@ static const int Lowercase_Mapping_invmap[] = {  /* for EBCDIC 037 */
 
 #if (defined(PERL_IN_REGCOMP_C) && ! defined(PERL_IN_XSUB_RE))
 
+static const UV _Perl_CCC_non0_non230_invlist[] = {  /* for all charsets */
+       347,    /* Number of elements */
+       148565664, /* Version and data structure type */
+       1,      /* 0 if the list starts at 0;
+                  1 if it starts at the element beyond 0 */
+       0x0,
+       0x315,
+       0x33D,
+       0x345,
+       0x346,
+       0x347,
+       0x34A,
+       0x34D,
+       0x34F,
+       0x353,
+       0x357,
+       0x358,
+       0x35B,
+       0x35C,
+       0x363,
+       0x591,
+       0x592,
+       0x596,
+       0x597,
+       0x59A,
+       0x59C,
+       0x5A2,
+       0x5A8,
+       0x5AA,
+       0x5AB,
+       0x5AD,
+       0x5AF,
+       0x5B0,
+       0x5BE,
+       0x5BF,
+       0x5C0,
+       0x5C1,
+       0x5C3,
+       0x5C5,
+       0x5C6,
+       0x5C7,
+       0x5C8,
+       0x618,
+       0x61B,
+       0x64B,
+       0x653,
+       0x655,
+       0x657,
+       0x65C,
+       0x65D,
+       0x65F,
+       0x660,
+       0x670,
+       0x671,
+       0x6E3,
+       0x6E4,
+       0x6EA,
+       0x6EB,
+       0x6ED,
+       0x6EE,
+       0x711,
+       0x712,
+       0x731,
+       0x732,
+       0x734,
+       0x735,
+       0x737,
+       0x73A,
+       0x73B,
+       0x73D,
+       0x73E,
+       0x73F,
+       0x742,
+       0x743,
+       0x744,
+       0x745,
+       0x746,
+       0x747,
+       0x748,
+       0x749,
+       0x7F2,
+       0x7F3,
+       0x7FD,
+       0x7FE,
+       0x859,
+       0x85C,
+       0x8D3,
+       0x8D4,
+       0x8E3,
+       0x8E4,
+       0x8E6,
+       0x8E7,
+       0x8E9,
+       0x8EA,
+       0x8ED,
+       0x8F3,
+       0x8F6,
+       0x8F7,
+       0x8F9,
+       0x8FB,
+       0x93C,
+       0x93D,
+       0x94D,
+       0x94E,
+       0x952,
+       0x953,
+       0x9BC,
+       0x9BD,
+       0x9CD,
+       0x9CE,
+       0xA3C,
+       0xA3D,
+       0xA4D,
+       0xA4E,
+       0xABC,
+       0xABD,
+       0xACD,
+       0xACE,
+       0xB3C,
+       0xB3D,
+       0xB4D,
+       0xB4E,
+       0xBCD,
+       0xBCE,
+       0xC4D,
+       0xC4E,
+       0xC55,
+       0xC57,
+       0xCBC,
+       0xCBD,
+       0xCCD,
+       0xCCE,
+       0xD3B,
+       0xD3D,
+       0xD4D,
+       0xD4E,
+       0xDCA,
+       0xDCB,
+       0xE38,
+       0xE3B,
+       0xE48,
+       0xE4C,
+       0xEB8,
+       0xEBA,
+       0xEC8,
+       0xECC,
+       0xF18,
+       0xF1A,
+       0xF35,
+       0xF36,
+       0xF37,
+       0xF38,
+       0xF39,
+       0xF3A,
+       0xF71,
+       0xF73,
+       0xF74,
+       0xF75,
+       0xF7A,
+       0xF7E,
+       0xF80,
+       0xF81,
+       0xF84,
+       0xF85,
+       0xFC6,
+       0xFC7,
+       0x1037,
+       0x1038,
+       0x1039,
+       0x103B,
+       0x108D,
+       0x108E,
+       0x1714,
+       0x1715,
+       0x1734,
+       0x1735,
+       0x17D2,
+       0x17D3,
+       0x18A9,
+       0x18AA,
+       0x1939,
+       0x193A,
+       0x193B,
+       0x193C,
+       0x1A18,
+       0x1A19,
+       0x1A60,
+       0x1A61,
+       0x1A7F,
+       0x1A80,
+       0x1AB5,
+       0x1ABB,
+       0x1ABD,
+       0x1ABE,
+       0x1B34,
+       0x1B35,
+       0x1B44,
+       0x1B45,
+       0x1B6C,
+       0x1B6D,
+       0x1BAA,
+       0x1BAC,
+       0x1BE6,
+       0x1BE7,
+       0x1BF2,
+       0x1BF4,
+       0x1C37,
+       0x1C38,
+       0x1CD4,
+       0x1CDA,
+       0x1CDC,
+       0x1CE0,
+       0x1CE2,
+       0x1CE9,
+       0x1CED,
+       0x1CEE,
+       0x1DC2,
+       0x1DC3,
+       0x1DCA,
+       0x1DCB,
+       0x1DCD,
+       0x1DD1,
+       0x1DF6,
+       0x1DFA,
+       0x1DFC,
+       0x1DFE,
+       0x1DFF,
+       0x1E00,
+       0x20D2,
+       0x20D4,
+       0x20D8,
+       0x20DB,
+       0x20E5,
+       0x20E7,
+       0x20E8,
+       0x20E9,
+       0x20EA,
+       0x20F0,
+       0x2D7F,
+       0x2D80,
+       0x302A,
+       0x3030,
+       0x3099,
+       0x309B,
+       0xA806,
+       0xA807,
+       0xA8C4,
+       0xA8C5,
+       0xA92B,
+       0xA92E,
+       0xA953,
+       0xA954,
+       0xA9B3,
+       0xA9B4,
+       0xA9C0,
+       0xA9C1,
+       0xAAB4,
+       0xAAB5,
+       0xAAF6,
+       0xAAF7,
+       0xABED,
+       0xABEE,
+       0xFB1E,
+       0xFB1F,
+       0xFE27,
+       0xFE2E,
+       0x101FD,
+       0x101FE,
+       0x102E0,
+       0x102E1,
+       0x10A0D,
+       0x10A0E,
+       0x10A39,
+       0x10A3B,
+       0x10A3F,
+       0x10A40,
+       0x10AE6,
+       0x10AE7,
+       0x10F46,
+       0x10F48,
+       0x10F4B,
+       0x10F4C,
+       0x10F4D,
+       0x10F51,
+       0x11046,
+       0x11047,
+       0x1107F,
+       0x11080,
+       0x110B9,
+       0x110BB,
+       0x11133,
+       0x11135,
+       0x11173,
+       0x11174,
+       0x111C0,
+       0x111C1,
+       0x111CA,
+       0x111CB,
+       0x11235,
+       0x11237,
+       0x112E9,
+       0x112EB,
+       0x1133B,
+       0x1133D,
+       0x1134D,
+       0x1134E,
+       0x11442,
+       0x11443,
+       0x11446,
+       0x11447,
+       0x114C2,
+       0x114C4,
+       0x115BF,
+       0x115C1,
+       0x1163F,
+       0x11640,
+       0x116B6,
+       0x116B8,
+       0x1172B,
+       0x1172C,
+       0x11839,
+       0x1183B,
+       0x11A34,
+       0x11A35,
+       0x11A47,
+       0x11A48,
+       0x11A99,
+       0x11A9A,
+       0x11C3F,
+       0x11C40,
+       0x11D42,
+       0x11D43,
+       0x11D44,
+       0x11D46,
+       0x11D97,
+       0x11D98,
+       0x16AF0,
+       0x16AF5,
+       0x1BC9E,
+       0x1BC9F,
+       0x1D165,
+       0x1D16A,
+       0x1D16D,
+       0x1D173,
+       0x1D17B,
+       0x1D183,
+       0x1D18A,
+       0x1D18C,
+       0x1E8D0,
+       0x1E8D7,
+       0x1E94A,
+       0x1E94B
+};
+
 #  if 'A' == 65 /* ASCII/Latin1 */
 
 static const UV _Perl_GCB_invlist[] = {  /* for ASCII/Latin1 */
@@ -383696,5 +384050,5 @@ static const U8 WB_table[23][23] = {
  * 7bd6bcbe3813e0cd55e0998053d182b7bc8c97dcfd0b85028e9f7f55af4ad61b lib/unicore/version
  * 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 1fdcc4c0ed94008c13daeb934b40cbd9f5b2871201dce7a9f0530be4145ea026 regen/mk_invlists.pl
+ * 8ae37f2b5bbc7d215f63e8d1189754d83a16c6156fd353847f6fcced90c513d5 regen/mk_invlists.pl
  * ex: set ro: */
index 539fc5a..ad7aae8 100644 (file)
 #define PL_GAboveLatin1                (my_vars->GAboveLatin1)
 #define PL_Assigned_invlist    (my_vars->GAssigned_invlist)
 #define PL_GAssigned_invlist   (my_vars->GAssigned_invlist)
+#define PL_CCC_non0_non230     (my_vars->GCCC_non0_non230)
+#define PL_GCCC_non0_non230    (my_vars->GCCC_non0_non230)
 #define PL_C_locale_obj                (my_vars->GC_locale_obj)
 #define PL_GC_locale_obj       (my_vars->GC_locale_obj)
 #define PL_GCB_invlist         (my_vars->GGCB_invlist)
index cb77694..de4267a 100644 (file)
--- a/perlapi.h
+++ b/perlapi.h
@@ -103,6 +103,8 @@ END_EXTERN_C
 #define PL_AboveLatin1         (*Perl_GAboveLatin1_ptr(NULL))
 #undef  PL_Assigned_invlist
 #define PL_Assigned_invlist    (*Perl_GAssigned_invlist_ptr(NULL))
+#undef  PL_CCC_non0_non230
+#define PL_CCC_non0_non230     (*Perl_GCCC_non0_non230_ptr(NULL))
 #undef  PL_C_locale_obj
 #define PL_C_locale_obj                (*Perl_GC_locale_obj_ptr(NULL))
 #undef  PL_GCB_invlist
index e8064eb..8a4ff6a 100644 (file)
@@ -305,6 +305,7 @@ PERLVAR(G, utf8_charname_begin, SV *)
 PERLVAR(G, utf8_charname_continue, SV *)
 PERLVAR(G, utf8_mark,  SV *)
 PERLVAR(G, InBitmap,   SV *)
+PERLVAR(G, CCC_non0_non230,    SV *)
 
 /* Everything that folds to a given character, for case insensitivity regex
  * matching */
index 66cc6e0..4fa1224 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -21947,6 +21947,7 @@ Perl_init_uniprops(pTHX)
     PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
     PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
     PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
+    PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
 
 #ifdef UNI_XIDC
     /* The below are used only by deprecated functions.  They could be removed */
index dd6a032..55c4afb 100644 (file)
@@ -1092,6 +1092,35 @@ sub UpperLatin1 {
     return \@return;
 }
 
+sub _Perl_CCC_non0_non230 {
+
+    # Create an inversion list of code points with non-zero canonical
+    # combining class that also don't have 230 as the class number.  This is
+    # part of a Unicode Standard rule
+
+    my @nonzeros = prop_invlist("ccc=0");
+    shift @nonzeros;    # Invert so is "ccc != 0"
+
+    my @return;
+
+    # Expand into list of code points, while excluding those with ccc == 230
+    for (my $i = 0; $i < @nonzeros; $i += 2) {
+        my $upper = ($i + 1) < @nonzeros
+                    ? $nonzeros[$i+1] - 1      # In range
+                    : $Unicode::UCD::MAX_CP;  # To infinity.
+        for my $j ($nonzeros[$i] .. $upper) {
+            my @ccc_names = prop_value_aliases("ccc", charprop($j, "ccc"));
+
+            # Final element in @ccc_names will be all numeric
+            push @return, $j if $ccc_names[-1] != 230;
+        }
+    }
+
+    @return = sort { $a <=> $b } @return;
+    @return = mk_invlist_from_sorted_cp_list(\@return);
+    return \@return;
+}
+
 sub output_table_common {
 
     # Common subroutine to actually output the generated rules table.
@@ -2319,6 +2348,7 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
                     Simple_Case_Folding
                     Case_Folding
                     &_Perl_IVCF
+                    &_Perl_CCC_non0_non230
                 );
                 # NOTE that the convention is that extra enum values come
                 # after the property name, separated by commas, with the enums
index c2fceb5..9d2b881 100644 (file)
@@ -6996,6 +6996,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * 7bd6bcbe3813e0cd55e0998053d182b7bc8c97dcfd0b85028e9f7f55af4ad61b lib/unicore/version
  * 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 1fdcc4c0ed94008c13daeb934b40cbd9f5b2871201dce7a9f0530be4145ea026 regen/mk_invlists.pl
+ * 8ae37f2b5bbc7d215f63e8d1189754d83a16c6156fd353847f6fcced90c513d5 regen/mk_invlists.pl
  * c56b78df81e0f96632246052d71580b212546ca02ba4075158965e11d892f21e regen/mph.pl
  * ex: set ro: */