This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regexec.c: More efficient Korean \X processing
authorKarl Williamson <public@khwilliamson.com>
Sat, 15 Dec 2012 16:53:19 +0000 (09:53 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sun, 16 Dec 2012 18:03:29 +0000 (11:03 -0700)
This refactors the code slightly that checks for Korean precomposed
syllables in \X.  It eliminates the PL_variable formerly used to keep
track of things.

embed.fnc
embed.h
embedvar.h
intrpvar.h
proto.h
regexec.c
sv.c

index 0a382f6..2be18ad 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2037,8 +2037,6 @@ ERsn      |U8*    |reghop3        |NN U8 *s|I32 off|NN const U8 *lim
 ERsM   |SV*    |core_regclass_swash|NULLOK const regexp *prog \
                                |NN const struct regnode *node|bool doinit \
                                |NULLOK SV **listsvp
-:not currently used EiR        |bool   |is_utf8_X_LV           |NN const U8 *p
-EiR    |bool   |is_utf8_X_LVT          |NN const U8 *p
 #ifdef XXX_dmq
 ERsn   |U8*    |reghop4        |NN U8 *s|I32 off|NN const U8 *llim \
                                |NN const U8 *rlim
diff --git a/embed.h b/embed.h
index d6b1c2f..4ae36e3 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define core_regclass_swash(a,b,c,d)   S_core_regclass_swash(aTHX_ a,b,c,d)
 #define find_byclass(a,b,c,d,e)        S_find_byclass(aTHX_ a,b,c,d,e)
 #define isFOO_lc(a,b)          S_isFOO_lc(aTHX_ a,b)
-#define is_utf8_X_LVT(a)       S_is_utf8_X_LVT(aTHX_ a)
 #define reg_check_named_buff_matched(a,b)      S_reg_check_named_buff_matched(aTHX_ a,b)
 #define regcppop(a)            S_regcppop(aTHX_ a)
 #define regcppush(a,b)         S_regcppush(aTHX_ a,b)
index 9fc6709..87791b4 100644 (file)
 #define PL_unitcheckav_save    (vTHX->Iunitcheckav_save)
 #define PL_unlockhook          (vTHX->Iunlockhook)
 #define PL_unsafe              (vTHX->Iunsafe)
-#define PL_utf8_X_LVT          (vTHX->Iutf8_X_LVT)
 #define PL_utf8_X_extend       (vTHX->Iutf8_X_extend)
 #define PL_utf8_X_regular_begin        (vTHX->Iutf8_X_regular_begin)
 #define PL_utf8_alnum          (vTHX->Iutf8_alnum)
index 004989c..b513d22 100644 (file)
@@ -625,7 +625,6 @@ PERLVAR(I, utf8_punct,      SV *)
 PERLVAR(I, utf8_mark,  SV *)
 PERLVAR(I, utf8_X_regular_begin, SV *)
 PERLVAR(I, utf8_X_extend, SV *)
-PERLVAR(I, utf8_X_LVT, SV *)
 PERLVAR(I, utf8_toupper, SV *)
 PERLVAR(I, utf8_totitle, SV *)
 PERLVAR(I, utf8_tolower, SV *)
diff --git a/proto.h b/proto.h
index 7f4942e..2ab4429 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6790,12 +6790,6 @@ STATIC char*     S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
 STATIC bool    S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
                        __attribute__warn_unused_result__;
 
-PERL_STATIC_INLINE bool        S_is_utf8_X_LVT(pTHX_ const U8 *p)
-                       __attribute__warn_unused_result__
-                       __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_IS_UTF8_X_LVT \
-       assert(p)
-
 STATIC I32     S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1)
index 25e2d7e..c4b949b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4601,10 +4601,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                             if (locinput < PL_regeol
                                 && is_GCB_LV_LVT_V_utf8(locinput))
                             {
-
                                 /* Otherwise keep going.  Must be LV, LVT or V.
-                                 * See if LVT */
-                                if (is_utf8_X_LVT((U8*)locinput)) {
+                                 * See if LVT, by first ruling out V, then LV */
+                                if (! is_GCB_V_utf8(locinput)
+                                        /* All but every TCount one is LV */
+                                    && (valid_utf8_to_uvchr((U8 *) locinput,
+                                                                         NULL)
+                                                                        - SBASE)
+                                        % TCount != 0)
+                                {
                                     locinput += UTF8SKIP(locinput);
                                 } else {
 
@@ -7741,64 +7746,6 @@ S_to_byte_substr(pTHX_ regexp *prog)
     return TRUE;
 }
 
-#if 0   /* This routine is not currently used */
-PERL_STATIC_INLINE bool
-S_is_utf8_X_LV(pTHX_ const U8 *p)
-{
-    /* Unlike most other similarly named routines here, this does not create a
-     * swash, so swash_fetch() cannot be used on PL_utf8_X_LV. */
-
-    dVAR;
-
-    UV cp = valid_utf8_to_uvchr(p, NULL);
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_LV;
-
-    /* The earliest Unicode releases did not have these precomposed Hangul
-     * syllables.  Set to point to undef in that case, so will return false on
-     * every call */
-    if (! PL_utf8_X_LV) {   /* Set up if this is the first time called */
-        PL_utf8_X_LV = swash_init("utf8", "_X_GCB_LV", &PL_sv_undef, 1, 0);
-        if (_invlist_len(_get_swash_invlist(PL_utf8_X_LV)) == 0) {
-            SvREFCNT_dec(PL_utf8_X_LV);
-            PL_utf8_X_LV = &PL_sv_undef;
-        }
-    }
-
-    return (PL_utf8_X_LV != &PL_sv_undef
-            && cp >= SBASE && cp < SBASE + SCount
-            && (cp - SBASE) % TCount == 0); /* Only every TCount one is LV */
-}
-#endif
-
-PERL_STATIC_INLINE bool
-S_is_utf8_X_LVT(pTHX_ const U8 *p)
-{
-    /* Unlike most other similarly named routines here, this does not create a
-     * swash, so swash_fetch() cannot be used on PL_utf8_X_LVT. */
-
-    dVAR;
-
-    UV cp = valid_utf8_to_uvchr(p, NULL);
-
-    PERL_ARGS_ASSERT_IS_UTF8_X_LVT;
-
-    /* The earliest Unicode releases did not have these precomposed Hangul
-     * syllables.  Set to point to undef in that case, so will return false on
-     * every call */
-    if (! PL_utf8_X_LVT) {   /* Set up if this is the first time called */
-        PL_utf8_X_LVT = swash_init("utf8", "_X_GCB_LVT", &PL_sv_undef, 1, 0);
-        if (_invlist_len(_get_swash_invlist(PL_utf8_X_LVT)) == 0) {
-            SvREFCNT_dec(PL_utf8_X_LVT);
-            PL_utf8_X_LVT = &PL_sv_undef;
-        }
-    }
-
-    return (PL_utf8_X_LVT != &PL_sv_undef
-            && cp >= SBASE && cp < SBASE + SCount
-            && (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
-}
-
 /*
  * Local variables:
  * c-indentation-style: bsd
diff --git a/sv.c b/sv.c
index 73fa710..50f8e66 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13649,7 +13649,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
     PL_utf8_X_regular_begin    = sv_dup_inc(proto_perl->Iutf8_X_regular_begin, param);
     PL_utf8_X_extend   = sv_dup_inc(proto_perl->Iutf8_X_extend, param);
-    PL_utf8_X_LVT      = sv_dup_inc(proto_perl->Iutf8_X_LVT, param);
     PL_utf8_toupper    = sv_dup_inc(proto_perl->Iutf8_toupper, param);
     PL_utf8_totitle    = sv_dup_inc(proto_perl->Iutf8_totitle, param);
     PL_utf8_tolower    = sv_dup_inc(proto_perl->Iutf8_tolower, param);