This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use array for some inversion lists
authorKarl Williamson <public@khwilliamson.com>
Thu, 13 Dec 2012 03:26:08 +0000 (20:26 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sat, 22 Dec 2012 18:11:28 +0000 (11:11 -0700)
This patch creates an array pointing to the inversion lists that cover
the Latin-1 ranges for Posix character classes, and uses it instead of
the individual variables previously referred to.

embedvar.h
handy.h
intrpvar.h
perl.c
regcomp.c
sv.c

index 270ce19..09e0810 100644 (file)
 #define PL_Env                 (vTHX->IEnv)
 #define PL_HasMultiCharFold    (vTHX->IHasMultiCharFold)
 #define PL_L1Cased             (vTHX->IL1Cased)
-#define PL_L1PosixAlnum                (vTHX->IL1PosixAlnum)
-#define PL_L1PosixAlpha                (vTHX->IL1PosixAlpha)
-#define PL_L1PosixGraph                (vTHX->IL1PosixGraph)
-#define PL_L1PosixLower                (vTHX->IL1PosixLower)
-#define PL_L1PosixPrint                (vTHX->IL1PosixPrint)
-#define PL_L1PosixPunct                (vTHX->IL1PosixPunct)
-#define PL_L1PosixUpper                (vTHX->IL1PosixUpper)
-#define PL_L1PosixWord         (vTHX->IL1PosixWord)
+#define PL_L1Posix_ptrs                (vTHX->IL1Posix_ptrs)
 #define PL_LIO                 (vTHX->ILIO)
 #define PL_Latin1              (vTHX->ILatin1)
 #define PL_Mem                 (vTHX->IMem)
diff --git a/handy.h b/handy.h
index c8edc2b..10f483a 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -801,6 +801,7 @@ typedef enum {
 #endif
 
 #define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
+#define POSIX_CC_COUNT    (_HIGHEST_REGCOMP_DOT_H_SYNC + 1)
 
 #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C)
 #   if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
index a50ad51..5fd84b4 100644 (file)
@@ -569,10 +569,8 @@ PERLVAR(I, AboveLatin1,    SV *)
 PERLVAR(I, PerlSpace,  SV *)
 PERLVAR(I, XPerlSpace, SV *)
 
-PERLVAR(I, L1PosixAlnum,SV *)
 PERLVAR(I, PosixAlnum, SV *)
 
-PERLVAR(I, L1PosixAlpha,SV *)
 PERLVAR(I, PosixAlpha, SV *)
 
 PERLVAR(I, PosixBlank, SV *)
@@ -585,25 +583,19 @@ PERLVAR(I, XPosixCntrl,   SV *)
 
 PERLVAR(I, PosixDigit, SV *)
 
-PERLVAR(I, L1PosixGraph,SV *)
 PERLVAR(I, PosixGraph, SV *)
 
-PERLVAR(I, L1PosixLower,SV *)
 PERLVAR(I, PosixLower, SV *)
 
-PERLVAR(I, L1PosixPrint,SV *)
 PERLVAR(I, PosixPrint, SV *)
 
-PERLVAR(I, L1PosixPunct,SV *)
 PERLVAR(I, PosixPunct, SV *)
 
 PERLVAR(I, PosixSpace, SV *)
 PERLVAR(I, XPosixSpace,        SV *)
 
-PERLVAR(I, L1PosixUpper,SV *)
 PERLVAR(I, PosixUpper, SV *)
 
-PERLVAR(I, L1PosixWord,        SV *)
 PERLVAR(I, PosixWord,  SV *)
 
 PERLVAR(I, PosixXDigit,        SV *)
@@ -625,6 +617,7 @@ PERLVAR(I, utf8_tofold,     SV *)
 PERLVAR(I, utf8_charname_begin, SV *)
 PERLVAR(I, utf8_charname_continue, SV *)
 PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *)
+PERLVARA(I, L1Posix_ptrs, POSIX_CC_COUNT, SV *)
 PERLVAR(I, last_swash_hv, HV *)
 PERLVAR(I, last_swash_tmps, U8 *)
 PERLVAR(I, last_swash_slen, STRLEN)
diff --git a/perl.c b/perl.c
index 03e80a4..f8d9e8f 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -1002,6 +1002,10 @@ perl_destruct(pTHXx)
     PL_utf8_idstart    = NULL;
     PL_utf8_idcont     = NULL;
     PL_utf8_foldclosures = NULL;
+    for (i = 0; i < POSIX_CC_COUNT; i++) {
+        SvREFCNT_dec(PL_L1Posix_ptrs[i]);
+        PL_L1Posix_ptrs[i] = NULL;
+    }
 
     if (!specialWARN(PL_compiling.cop_warnings))
        PerlMemShared_free(PL_compiling.cop_warnings);
index 31466b4..aa3608a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -5334,15 +5334,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
        PL_ASCII = _new_invlist_C_array(ASCII_invlist);
        PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
 
-       PL_L1PosixAlnum = _new_invlist_C_array(L1PosixAlnum_invlist);
+       PL_L1Posix_ptrs[_CC_ALPHANUMERIC]
+                                = _new_invlist_C_array(L1PosixAlnum_invlist);
        PL_PosixAlnum = _new_invlist_C_array(PosixAlnum_invlist);
 
-       PL_L1PosixAlpha = _new_invlist_C_array(L1PosixAlpha_invlist);
+       PL_L1Posix_ptrs[_CC_ALPHA]
+                                = _new_invlist_C_array(L1PosixAlpha_invlist);
        PL_PosixAlpha = _new_invlist_C_array(PosixAlpha_invlist);
 
        PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist);
        PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist);
-
        PL_L1Cased = _new_invlist_C_array(L1Cased_invlist);
 
        PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist);
@@ -5350,31 +5351,31 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
 
        PL_PosixDigit = _new_invlist_C_array(PosixDigit_invlist);
 
-       PL_L1PosixGraph = _new_invlist_C_array(L1PosixGraph_invlist);
+       PL_L1Posix_ptrs[_CC_GRAPH] = _new_invlist_C_array(L1PosixGraph_invlist);
        PL_PosixGraph = _new_invlist_C_array(PosixGraph_invlist);
 
-       PL_L1PosixLower = _new_invlist_C_array(L1PosixLower_invlist);
+       PL_L1Posix_ptrs[_CC_LOWER] = _new_invlist_C_array(L1PosixLower_invlist);
        PL_PosixLower = _new_invlist_C_array(PosixLower_invlist);
 
-       PL_L1PosixPrint = _new_invlist_C_array(L1PosixPrint_invlist);
+       PL_L1Posix_ptrs[_CC_PRINT] = _new_invlist_C_array(L1PosixPrint_invlist);
        PL_PosixPrint = _new_invlist_C_array(PosixPrint_invlist);
 
-       PL_L1PosixPunct = _new_invlist_C_array(L1PosixPunct_invlist);
+       PL_L1Posix_ptrs[_CC_PUNCT] = _new_invlist_C_array(L1PosixPunct_invlist);
        PL_PosixPunct = _new_invlist_C_array(PosixPunct_invlist);
 
        PL_PerlSpace = _new_invlist_C_array(PerlSpace_invlist);
        PL_XPerlSpace = _new_invlist_C_array(XPerlSpace_invlist);
-
        PL_PosixSpace = _new_invlist_C_array(PosixSpace_invlist);
        PL_XPosixSpace = _new_invlist_C_array(XPosixSpace_invlist);
 
-       PL_L1PosixUpper = _new_invlist_C_array(L1PosixUpper_invlist);
+       PL_L1Posix_ptrs[_CC_UPPER] = _new_invlist_C_array(L1PosixUpper_invlist);
        PL_PosixUpper = _new_invlist_C_array(PosixUpper_invlist);
 
        PL_VertSpace = _new_invlist_C_array(VertSpace_invlist);
 
        PL_PosixWord = _new_invlist_C_array(PosixWord_invlist);
-       PL_L1PosixWord = _new_invlist_C_array(L1PosixWord_invlist);
+       PL_L1Posix_ptrs[_CC_WORDCHAR]
+                                = _new_invlist_C_array(L1PosixWord_invlist);
 
        PL_PosixXDigit = _new_invlist_C_array(PosixXDigit_invlist);
        PL_XPosixXDigit = _new_invlist_C_array(XPosixXDigit_invlist);
@@ -11925,20 +11926,20 @@ parseit:
 
                case ANYOF_ALPHANUMERIC: /* C's alnum, in contrast to \w */
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlnum, PL_utf8_swash_ptrs[_CC_ALPHANUMERIC], PL_L1PosixAlnum, swash_property_names[_CC_ALPHANUMERIC], listsv);
+                        PL_PosixAlnum, PL_utf8_swash_ptrs[_CC_ALPHANUMERIC], PL_L1Posix_ptrs[_CC_ALPHANUMERIC], swash_property_names[_CC_ALPHANUMERIC], listsv);
                    break;
                case ANYOF_NALPHANUMERIC:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlnum, PL_L1PosixAlnum, swash_property_names[_CC_ALPHANUMERIC], listsv,
+                        PL_PosixAlnum, PL_L1Posix_ptrs[_CC_ALPHANUMERIC], swash_property_names[_CC_ALPHANUMERIC], listsv,
                         runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_ALPHA:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlpha, PL_utf8_swash_ptrs[_CC_ALPHA], PL_L1PosixAlpha, swash_property_names[_CC_ALPHA], listsv);
+                        PL_PosixAlpha, PL_utf8_swash_ptrs[_CC_ALPHA], PL_L1Posix_ptrs[_CC_ALPHA], swash_property_names[_CC_ALPHA], listsv);
                    break;
                case ANYOF_NALPHA:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixAlpha, PL_L1PosixAlpha, swash_property_names[_CC_ALPHA], listsv,
+                        PL_PosixAlpha, PL_L1Posix_ptrs[_CC_ALPHA], swash_property_names[_CC_ALPHA], listsv,
                         runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_ASCII:
@@ -12045,11 +12046,11 @@ parseit:
                    break;
                case ANYOF_GRAPH:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixGraph, PL_utf8_swash_ptrs[_CC_GRAPH], PL_L1PosixGraph, swash_property_names[_CC_GRAPH], listsv);
+                        PL_PosixGraph, PL_utf8_swash_ptrs[_CC_GRAPH], PL_L1Posix_ptrs[_CC_GRAPH], swash_property_names[_CC_GRAPH], listsv);
                    break;
                case ANYOF_NGRAPH:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixGraph, PL_L1PosixGraph, swash_property_names[_CC_GRAPH], listsv,
+                        PL_PosixGraph, PL_L1Posix_ptrs[_CC_GRAPH], swash_property_names[_CC_GRAPH], listsv,
                         runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_HORIZWS:
@@ -12083,7 +12084,7 @@ parseit:
                    }
                    else {
                        ascii_source = PL_PosixLower;
-                       l1_source = PL_L1PosixLower;
+                       l1_source = PL_L1Posix_ptrs[_CC_LOWER];
                        Xname = swash_property_names[_CC_LOWER];
                         swash = PL_utf8_swash_ptrs[_CC_LOWER];
                    }
@@ -12100,20 +12101,20 @@ parseit:
                }
                case ANYOF_PRINT:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPrint, PL_utf8_swash_ptrs[_CC_PRINT], PL_L1PosixPrint, swash_property_names[_CC_PRINT], listsv);
+                        PL_PosixPrint, PL_utf8_swash_ptrs[_CC_PRINT], PL_L1Posix_ptrs[_CC_PRINT], swash_property_names[_CC_PRINT], listsv);
                    break;
                case ANYOF_NPRINT:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPrint, PL_L1PosixPrint, swash_property_names[_CC_PRINT], listsv,
+                        PL_PosixPrint, PL_L1Posix_ptrs[_CC_PRINT], swash_property_names[_CC_PRINT], listsv,
                         runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_PUNCT:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPunct, PL_utf8_swash_ptrs[_CC_PUNCT], PL_L1PosixPunct, swash_property_names[_CC_PUNCT], listsv);
+                        PL_PosixPunct, PL_utf8_swash_ptrs[_CC_PUNCT], PL_L1Posix_ptrs[_CC_PUNCT], swash_property_names[_CC_PUNCT], listsv);
                    break;
                case ANYOF_NPUNCT:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                        PL_PosixPunct, PL_L1PosixPunct, swash_property_names[_CC_PUNCT], listsv,
+                        PL_PosixPunct, PL_L1Posix_ptrs[_CC_PUNCT], swash_property_names[_CC_PUNCT], listsv,
                         runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_PSXSPC:
@@ -12148,7 +12149,7 @@ parseit:
                    }
                    else {
                        ascii_source = PL_PosixUpper;
-                       l1_source = PL_L1PosixUpper;
+                       l1_source = PL_L1Posix_ptrs[_CC_UPPER];
                        Xname = swash_property_names[_CC_UPPER];
                         swash = PL_utf8_swash_ptrs[_CC_UPPER];
                    }
@@ -12165,11 +12166,11 @@ parseit:
                }
                case ANYOF_WORDCHAR:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                            PL_PosixWord, PL_utf8_swash_ptrs[_CC_WORDCHAR], PL_L1PosixWord, swash_property_names[_CC_WORDCHAR], listsv);
+                            PL_PosixWord, PL_utf8_swash_ptrs[_CC_WORDCHAR], PL_L1Posix_ptrs[_CC_WORDCHAR], swash_property_names[_CC_WORDCHAR], listsv);
                    break;
                case ANYOF_NWORDCHAR:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
-                            PL_PosixWord, PL_L1PosixWord, swash_property_names[_CC_WORDCHAR], listsv,
+                            PL_PosixWord, PL_L1Posix_ptrs[_CC_WORDCHAR], swash_property_names[_CC_WORDCHAR], listsv,
                             runtime_posix_matches_above_Unicode);
                    break;
                case ANYOF_VERTWS:
@@ -12652,7 +12653,7 @@ parseit:
          * indicators, which are weeded out below using the
          * IS_IN_SOME_FOLD_L1() macro */
         if (invlist_highest(cp_list) < 256) {
-            _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
+            _invlist_intersection(PL_L1Posix_ptrs[_CC_ALPHA], cp_list, &fold_intersection);
         }
         else {
 
diff --git a/sv.c b/sv.c
index 360bd7b..0dea39e 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13591,10 +13591,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_PerlSpace       = sv_dup_inc(proto_perl->IPerlSpace, param);
     PL_XPerlSpace      = sv_dup_inc(proto_perl->IXPerlSpace, param);
 
-    PL_L1PosixAlnum    = sv_dup_inc(proto_perl->IL1PosixAlnum, param);
     PL_PosixAlnum      = sv_dup_inc(proto_perl->IPosixAlnum, param);
 
-    PL_L1PosixAlpha    = sv_dup_inc(proto_perl->IL1PosixAlpha, param);
     PL_PosixAlpha      = sv_dup_inc(proto_perl->IPosixAlpha, param);
 
     PL_PosixBlank      = sv_dup_inc(proto_perl->IPosixBlank, param);
@@ -13607,25 +13605,19 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
 
     PL_PosixDigit      = sv_dup_inc(proto_perl->IPosixDigit, param);
 
-    PL_L1PosixGraph    = sv_dup_inc(proto_perl->IL1PosixGraph, param);
     PL_PosixGraph      = sv_dup_inc(proto_perl->IPosixGraph, param);
 
-    PL_L1PosixLower    = sv_dup_inc(proto_perl->IL1PosixLower, param);
     PL_PosixLower      = sv_dup_inc(proto_perl->IPosixLower, param);
 
-    PL_L1PosixPrint    = sv_dup_inc(proto_perl->IL1PosixPrint, param);
     PL_PosixPrint      = sv_dup_inc(proto_perl->IPosixPrint, param);
 
-    PL_L1PosixPunct    = sv_dup_inc(proto_perl->IL1PosixPunct, param);
     PL_PosixPunct      = sv_dup_inc(proto_perl->IPosixPunct, param);
 
     PL_PosixSpace      = sv_dup_inc(proto_perl->IPosixSpace, param);
     PL_XPosixSpace     = sv_dup_inc(proto_perl->IXPosixSpace, param);
 
-    PL_L1PosixUpper    = sv_dup_inc(proto_perl->IL1PosixUpper, param);
     PL_PosixUpper      = sv_dup_inc(proto_perl->IPosixUpper, param);
 
-    PL_L1PosixWord     = sv_dup_inc(proto_perl->IL1PosixWord, param);
     PL_PosixWord       = sv_dup_inc(proto_perl->IPosixWord, param);
 
     PL_PosixXDigit     = sv_dup_inc(proto_perl->IPosixXDigit, param);
@@ -13640,6 +13632,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     for (i = 0; i < POSIX_SWASH_COUNT; i++) {
         PL_utf8_swash_ptrs[i] = sv_dup_inc(proto_perl->Iutf8_swash_ptrs[i], param);
     }
+    for (i = 0; i < POSIX_CC_COUNT; i++) {
+        PL_L1Posix_ptrs[i] = sv_dup_inc(proto_perl->IL1Posix_ptrs[i], param);
+    }
     PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
     PL_utf8_X_regular_begin    = sv_dup_inc(proto_perl->Iutf8_X_regular_begin, param);
     PL_utf8_X_extend   = sv_dup_inc(proto_perl->Iutf8_X_extend, param);