Use charnames inversion lists
authorKarl Williamson <khw@cpan.org>
Sat, 31 Mar 2018 17:18:38 +0000 (11:18 -0600)
committerKarl Williamson <khw@cpan.org>
Sat, 31 Mar 2018 21:36:45 +0000 (15:36 -0600)
This commit makes the inversion lists for parsing character name global
instead of interpreter level, so can be initialized once per process,
and no copies are created upon new thread instantiation.  More
importantly, this is another instance where utf8_heavy.pl no longer
needs to be loaded, and the definition files read from disk.

embed.fnc
embed.h
embedvar.h
intrpvar.h
invlist_inline.h
perl.c
perlapi.h
perlvars.h
proto.h
sv.c
toke.c

index 095fe3e..d2f52d4 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1738,7 +1738,7 @@ EXp       |SV*    |_core_swash_init|NN const char* pkg|NN const char* name \
                |NN SV* listsv|I32 minbits|I32 none \
                |NULLOK SV* invlist|NULLOK U8* const flags_p
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
 EiMRn  |UV*    |invlist_array  |NN SV* const invlist
 EiMRn  |bool*  |get_invlist_offset_addr|NN SV* invlist
 EiMRn  |UV     |_invlist_len   |NN SV* const invlist
diff --git a/embed.h b/embed.h
index f77aa4e..e71262a 100644 (file)
--- a/embed.h
+++ b/embed.h
 #endif
 #define regprop(a,b,c,d,e)     Perl_regprop(aTHX_ a,b,c,d,e)
 #  endif
-#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#  if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
 #define _get_swash_invlist(a)  Perl__get_swash_invlist(aTHX_ a)
 #define _invlist_contains_cp   S__invlist_contains_cp
 #define _invlist_len           S__invlist_len
index e344965..890a8b4 100644 (file)
 #define PL_unitcheckav_save    (vTHX->Iunitcheckav_save)
 #define PL_unlockhook          (vTHX->Iunlockhook)
 #define PL_unsafe              (vTHX->Iunsafe)
-#define PL_utf8_charname_begin (vTHX->Iutf8_charname_begin)
-#define PL_utf8_charname_continue      (vTHX->Iutf8_charname_continue)
 #define PL_utf8_foldclosures   (vTHX->Iutf8_foldclosures)
 #define PL_utf8_mark           (vTHX->Iutf8_mark)
 #define PL_utf8_swash_ptrs     (vTHX->Iutf8_swash_ptrs)
 #define PL_Gtimesbase          (my_vars->Gtimesbase)
 #define PL_use_safe_putenv     (my_vars->Guse_safe_putenv)
 #define PL_Guse_safe_putenv    (my_vars->Guse_safe_putenv)
+#define PL_utf8_charname_begin (my_vars->Gutf8_charname_begin)
+#define PL_Gutf8_charname_begin        (my_vars->Gutf8_charname_begin)
+#define PL_utf8_charname_continue      (my_vars->Gutf8_charname_continue)
+#define PL_Gutf8_charname_continue     (my_vars->Gutf8_charname_continue)
 #define PL_utf8_foldable       (my_vars->Gutf8_foldable)
 #define PL_Gutf8_foldable      (my_vars->Gutf8_foldable)
 #define PL_utf8_idcont         (my_vars->Gutf8_idcont)
index 00d612a..a05e847 100644 (file)
@@ -645,8 +645,6 @@ PERLVAR(I, InBitmap,        SV *)
 
 /* utf8 character class swashes */
 PERLVAR(I, utf8_mark,  SV *)
-PERLVAR(I, utf8_charname_begin, SV *)
-PERLVAR(I, utf8_charname_continue, SV *)
 PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *)
 PERLVAR(I, seen_deprecated_macro, HV *)
 
index 4ce04f9..3a1afc6 100644 (file)
@@ -6,7 +6,7 @@
  *    License or the Artistic License, as specified in the README file.
  */
 
-#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C)
 
 /* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
  * etc */
diff --git a/perl.c b/perl.c
index e364f22..ff66b4e 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -334,6 +334,8 @@ perl_construct(pTHXx)
                                             _Perl_Folds_To_Multi_Char_invlist);
     PL_NonL1NonFinalFold = _new_invlist_C_array(
                                             NonL1_Perl_Non_Final_Folds_invlist);
+    PL_utf8_charname_begin = _new_invlist_C_array(_Perl_Charname_Begin_invlist);
+    PL_utf8_charname_continue = _new_invlist_C_array(_Perl_Charname_Continue_invlist);
 
 
 #if defined(LOCAL_PATCH_COUNT)
index c24f8ea..6b90055 100644 (file)
--- a/perlapi.h
+++ b/perlapi.h
@@ -205,6 +205,10 @@ END_EXTERN_C
 #define PL_timesbase           (*Perl_Gtimesbase_ptr(NULL))
 #undef  PL_use_safe_putenv
 #define PL_use_safe_putenv     (*Perl_Guse_safe_putenv_ptr(NULL))
+#undef  PL_utf8_charname_begin
+#define PL_utf8_charname_begin (*Perl_Gutf8_charname_begin_ptr(NULL))
+#undef  PL_utf8_charname_continue
+#define PL_utf8_charname_continue      (*Perl_Gutf8_charname_continue_ptr(NULL))
 #undef  PL_utf8_foldable
 #define PL_utf8_foldable       (*Perl_Gutf8_foldable_ptr(NULL))
 #undef  PL_utf8_idcont
index a3ba851..af48fa8 100644 (file)
@@ -300,3 +300,5 @@ PERLVAR(G, utf8_totitle, SV *)
 PERLVAR(G, utf8_tolower, SV *)
 PERLVAR(G, utf8_tofold,        SV *)
 PERLVAR(G, utf8_tosimplefold,  SV *)
+PERLVAR(G, utf8_charname_begin, SV *)
+PERLVAR(G, utf8_charname_continue, SV *)
diff --git a/proto.h b/proto.h
index 6e2f646..91bcd6d 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5444,7 +5444,7 @@ PERL_CALLCONV void        Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode*
 #define PERL_ARGS_ASSERT_REGPROP       \
        assert(sv); assert(o)
 #endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
 PERL_CALLCONV SV*      Perl__get_swash_invlist(pTHX_ SV* const swash)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT__GET_SWASH_INVLIST    \
diff --git a/sv.c b/sv.c
index fb89ac0..54a4508 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -15592,8 +15592,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     }
     PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
     PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
-    PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param);
-    PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param);
 
     if (proto_perl->Ipsig_pend) {
        Newxz(PL_psig_pend, SIG_SIZE, int);
diff --git a/toke.c b/toke.c
index 0ef3341..3405dc6 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -39,6 +39,7 @@ Individual members of C<PL_parser> have their own documentation.
 #define PERL_IN_TOKE_C
 #include "perl.h"
 #include "dquote_inline.h"
+#include "invlist_inline.h"
 
 #define new_constant(a,b,c,d,e,f,g)    \
        S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
@@ -2683,14 +2684,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
             s += 2;
         }
         else {
-            if (! PL_utf8_charname_begin) {
-                U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                PL_utf8_charname_begin = _core_swash_init("utf8",
-                                                        "_Perl_Charname_Begin",
-                                                        &PL_sv_undef,
-                                                        1, 0, NULL, &flags);
-            }
-            if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+            if (! _invlist_contains_cp(PL_utf8_charname_begin,
+                                       utf8_to_uvchr_buf((U8 *) s,
+                                                         (U8 *) e,
+                                                         NULL)))
+            {
                 goto bad_charname;
             }
             s += UTF8SKIP(s);
@@ -2714,14 +2712,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                 s += 2;
             }
             else {
-                if (! PL_utf8_charname_continue) {
-                    U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                    PL_utf8_charname_continue = _core_swash_init("utf8",
-                                                "_Perl_Charname_Continue",
-                                                &PL_sv_undef,
-                                                1, 0, NULL, &flags);
-                }
-                if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+                if (! _invlist_contains_cp(PL_utf8_charname_continue,
+                                           utf8_to_uvchr_buf((U8 *) s,
+                                                             (U8 *) e,
+                                                             NULL)))
+                {
                     goto bad_charname;
                 }
                 s += UTF8SKIP(s);