This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Use compile-time invlists
authorKarl Williamson <public@khwilliamson.com>
Sat, 4 Feb 2012 05:01:03 +0000 (22:01 -0700)
committerKarl Williamson <public@khwilliamson.com>
Thu, 9 Feb 2012 17:13:57 +0000 (10:13 -0700)
This creates three simple compile-time inversion lists from the data
that has been generated in a previous commit, and uses two of them.
Three PL_ variables are used to store them.

embedvar.h
intrpvar.h
regcomp.c
sv.c

index f618aef..0aa20e1 100644 (file)
@@ -40,6 +40,8 @@
 #    define vTHX       PERL_GET_INTERP
 #  endif
 
+#define PL_ASCII               (vTHX->IASCII)
+#define PL_AboveLatin1         (vTHX->IAboveLatin1)
 #define PL_Argv                        (vTHX->IArgv)
 #define PL_Cmd                 (vTHX->ICmd)
 #define PL_DBcv                        (vTHX->IDBcv)
@@ -52,6 +54,7 @@
 #define PL_Dir                 (vTHX->IDir)
 #define PL_Env                 (vTHX->IEnv)
 #define PL_LIO                 (vTHX->ILIO)
+#define PL_Latin1              (vTHX->ILatin1)
 #define PL_Mem                 (vTHX->IMem)
 #define PL_MemParse            (vTHX->IMemParse)
 #define PL_MemShared           (vTHX->IMemShared)
index c921112..96125b6 100644 (file)
@@ -565,7 +565,12 @@ PERLVAR(I, numeric_radix_sv, SV *) /* The radix separator if not '.' */
 
 #endif /* !USE_LOCALE_NUMERIC */
 
-/* utf8 character classes */
+/* Unicode inversion lists */
+PERLVAR(I, ASCII,      SV *)
+PERLVAR(I, Latin1,     SV *)
+PERLVAR(I, AboveLatin1,        SV *)
+
+/* utf8 character class swashes */
 PERLVAR(I, utf8_alnum, SV *)
 PERLVAR(I, utf8_alpha, SV *)
 PERLVAR(I, utf8_space, SV *)
index d809b17..d5c5ff2 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -86,6 +86,7 @@
 #endif
 
 #include "dquote_static.c"
+#include "charclass_invlists.h"
 
 #ifdef op
 #undef op
@@ -4813,6 +4814,14 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
+    /* Initialize these here instead of as-needed, as is quick and avoids
+     * having to test them each time otherwise */
+    if (! PL_AboveLatin1) {
+       PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
+       PL_ASCII = _new_invlist_C_array(ASCII_invlist);
+       PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
+    }
+
     exp = SvPV(pattern, plen);
 
     if (plen == 0) { /* ignore the utf8ness if the pattern is 0 length */
@@ -11113,13 +11122,14 @@ parseit:
            }
        }
 
-       /* Done with loop; set <nonbitmap> to not include any code points that
-        * are in the bitmap */
+        /* Done with loop; remove any code points that are in the bitmap from
+         * <nonbitmap> */
        if (change_invlist) {
-           SV* keep_list = _new_invlist(2);
-           _append_range_to_invlist(keep_list, max_cp_to_set + 1, UV_MAX);
-           _invlist_intersection(nonbitmap, keep_list, &nonbitmap);
-           SvREFCNT_dec(keep_list);
+           _invlist_subtract(nonbitmap,
+                             (DEPENDS_SEMANTICS)
+                               ? PL_ASCII
+                               : PL_Latin1,
+                              &nonbitmap);
        }
 
        /* If have completely emptied it, remove it completely */
@@ -11233,10 +11243,7 @@ parseit:
            else {
                /* There is no overlap for non-/d, so just delete anything
                 * below 256 */
-               SV* keep_list = _new_invlist(2);
-               _append_range_to_invlist(keep_list, 256, UV_MAX);
-               _invlist_intersection(nonbitmap, keep_list, &nonbitmap);
-               SvREFCNT_dec(keep_list);
+               _invlist_intersection(nonbitmap, PL_AboveLatin1, &nonbitmap);
            }
        }
 
diff --git a/sv.c b/sv.c
index 47e71ec..fe10362 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13414,6 +13414,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_utf8_idcont     = sv_dup_inc(proto_perl->Iutf8_idcont, param);
     PL_utf8_xidcont    = sv_dup_inc(proto_perl->Iutf8_xidcont, param);
     PL_utf8_foldable   = sv_dup_inc(proto_perl->Iutf8_foldable, param);
+    PL_ASCII           = sv_dup_inc(proto_perl->IASCII, param);
+    PL_AboveLatin1     = sv_dup_inc(proto_perl->IAboveLatin1, param);
+    PL_Latin1          = sv_dup_inc(proto_perl->ILatin1, param);
 
 
     if (proto_perl->Ipsig_pend) {