This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Ooops. Retract the regex parts of #14090.
authorJarkko Hietaniemi <jhi@iki.fi>
Sat, 5 Jan 2002 17:21:12 +0000 (17:21 +0000)
committerJarkko Hietaniemi <jhi@iki.fi>
Sat, 5 Jan 2002 17:21:12 +0000 (17:21 +0000)
p4raw-id: //depot/perl@14091

embed.fnc
embed.h
proto.h
regcomp.c
regcomp.h
regexec.c

index e534f52..da7e2ce 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -584,7 +584,7 @@ Ap  |void   |push_scope
 p      |OP*    |ref            |OP* o|I32 type
 p      |OP*    |refkids        |OP* o|I32 type
 Ap     |void   |regdump        |regexp* r
-Ap     |SV*    |regclass_swash |struct regnode *n|bool doinit|SV **listsvp|SV **altsvp
+Ap     |SV*    |regclass_swash |struct regnode *n|bool doinit|SV **initsvp
 Ap     |I32    |pregexec       |regexp* prog|char* stringarg \
                                |char* strend|char* strbeg|I32 minend \
                                |SV* screamer|U32 nosave
@@ -1134,7 +1134,6 @@ s |I32    |regrepeat      |regnode *p|I32 max
 s      |I32    |regrepeat_hard |regnode *p|I32 max|I32 *lp
 s      |I32    |regtry         |regexp *prog|char *startpos
 s      |bool   |reginclass     |regnode *n|U8 *p|bool do_utf8sv_is_utf8
-s      |bool   |reginclasslen  |regnode *n|U8 *p|STRLEN *lenp|bool do_utf8sv_is_utf8
 s      |CHECKPOINT|regcppush   |I32 parenfloor
 s      |char*|regcppop
 s      |char*|regcp_set_to     |I32 ss
diff --git a/embed.h b/embed.h
index 6203634..8a5cc4e 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define regrepeat_hard         S_regrepeat_hard
 #define regtry                 S_regtry
 #define reginclass             S_reginclass
-#define reginclasslen          S_reginclasslen
 #define regcppush              S_regcppush
 #define regcppop               S_regcppop
 #define regcp_set_to           S_regcp_set_to
 #define ref(a,b)               Perl_ref(aTHX_ a,b)
 #define refkids(a,b)           Perl_refkids(aTHX_ a,b)
 #define regdump(a)             Perl_regdump(aTHX_ a)
-#define regclass_swash(a,b,c,d)        Perl_regclass_swash(aTHX_ a,b,c,d)
+#define regclass_swash(a,b,c)  Perl_regclass_swash(aTHX_ a,b,c)
 #define pregexec(a,b,c,d,e,f,g)        Perl_pregexec(aTHX_ a,b,c,d,e,f,g)
 #define pregfree(a)            Perl_pregfree(aTHX_ a)
 #define pregcomp(a,b,c)                Perl_pregcomp(aTHX_ a,b,c)
 #define regrepeat_hard(a,b,c)  S_regrepeat_hard(aTHX_ a,b,c)
 #define regtry(a,b)            S_regtry(aTHX_ a,b)
 #define reginclass(a,b,c)      S_reginclass(aTHX_ a,b,c)
-#define reginclasslen(a,b,c,d) S_reginclasslen(aTHX_ a,b,c,d)
 #define regcppush(a)           S_regcppush(aTHX_ a)
 #define regcppop()             S_regcppop(aTHX)
 #define regcp_set_to(a)                S_regcp_set_to(aTHX_ a)
diff --git a/proto.h b/proto.h
index ea837ec..52d634e 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -619,7 +619,7 @@ PERL_CALLCONV void  Perl_push_scope(pTHX);
 PERL_CALLCONV OP*      Perl_ref(pTHX_ OP* o, I32 type);
 PERL_CALLCONV OP*      Perl_refkids(pTHX_ OP* o, I32 type);
 PERL_CALLCONV void     Perl_regdump(pTHX_ regexp* r);
-PERL_CALLCONV SV*      Perl_regclass_swash(pTHX_ struct regnode *n, bool doinit, SV **listsvp, SV **altsvp);
+PERL_CALLCONV SV*      Perl_regclass_swash(pTHX_ struct regnode *n, bool doinit, SV **initsvp);
 PERL_CALLCONV I32      Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave);
 PERL_CALLCONV void     Perl_pregfree(pTHX_ struct regexp* r);
 PERL_CALLCONV regexp*  Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm);
@@ -1164,7 +1164,6 @@ STATIC I32        S_regrepeat(pTHX_ regnode *p, I32 max);
 STATIC I32     S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp);
 STATIC I32     S_regtry(pTHX_ regexp *prog, char *startpos);
 STATIC bool    S_reginclass(pTHX_ regnode *n, U8 *p, bool do_utf8sv_is_utf8);
-STATIC bool    S_reginclasslen(pTHX_ regnode *n, U8 *p, STRLEN *lenp, bool do_utf8sv_is_utf8);
 STATIC CHECKPOINT      S_regcppush(pTHX_ I32 parenfloor);
 STATIC char*   S_regcppop(pTHX);
 STATIC char*   S_regcp_set_to(pTHX_ I32 ss);
index e81bc0a..aacae22 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3427,8 +3427,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     SV *listsv = Nullsv;
     register char *e;
     UV n;
-    bool optimize_invert   = TRUE;
-    AV* unicode_alternate  = 0;
+    bool optimize_invert = TRUE;
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -4029,35 +4028,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                         /* If folding and foldable and a single
                          * character, insert also the folded version
                          * to the charclass. */
-                        if (f != value) {
-                             if (foldlen == UNISKIP(f))
-                                 Perl_sv_catpvf(aTHX_ listsv,
-                                                "%04"UVxf"\n", f);
-                             else {
-                                 /* Any multicharacter foldings
-                                  * require the following transform:
-                                  * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst)
-                                  * where E folds into "pq" and F folds
-                                  * into "rst", all other characters
-                                  * fold to single characters. */
-                                 SV *sv;
-
-                                 if (!unicode_alternate)
-                                     unicode_alternate = newAV();
-                                 sv = newSVpvn((char*)foldbuf, foldlen);
-                                 SvUTF8_on(sv);
-                                 av_push(unicode_alternate, sv);
-                             }
-                        }
+                        if (f != value && foldlen == UNISKIP(f))
+                             Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f);
 
                         /* If folding and the value is one of the Greek
                          * sigmas insert a few more sigmas to make the
                          * folding rules of the sigmas to work right.
                          * Note that not all the possible combinations
                          * are handled here: some of them are handled
-                         * by the standard folding rules, and some of
-                         * them (literal or EXACTF cases) are handled
-                         * during runtime in regexec.c:S_find_byclass(). */
+                         * handled by the standard folding rules, and
+                         * some of them (literal or EXACTF cases) are
+                         * handled during runtime in
+                         * regexec.c:S_find_byclass(). */
                         if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
                              Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
                                             (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
@@ -4114,7 +4096,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 
        av_store(av, 0, listsv);
        av_store(av, 1, NULL);
-       av_store(av, 2, (SV*)unicode_alternate);
        rv = newRV_noinc((SV*)av);
        n = add_data(pRExC_state, 1, "s");
        RExC_rx->data->data[n] = (void*)rv;
@@ -4644,7 +4625,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
 
        {
            SV *lv;
-           SV *sw = regclass_swash(o, FALSE, &lv, 0);
+           SV *sw = regclass_swash(o, FALSE, &lv);
        
            if (lv) {
                if (sw) {
@@ -4798,7 +4779,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
                new_comppad = NULL;
                break;
            case 'n':
-               break;
+               break;
            default:
                Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
            }
index 9053242..16cf957 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -365,9 +365,7 @@ typedef struct re_scream_pos_data_s
  *   n - Root of op tree for (?{EVAL}) item
  *   o - Start op for (?{EVAL}) item
  *   p - Pad for (?{EVAL} item
- *   s - swash for unicode-style character class, and the multicharacter
- *       strings resulting from casefolding the single-character entries
- *       in the character class
+ *   s - swash for unicode-style character class
  * 20010712 mjd@plover.com
  * (Remember to update re_dup() and pregfree() if you add any items.)
  */
index 5f25888..fe9ad4b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2369,13 +2369,11 @@ S_regmatch(pTHX_ regnode *prog)
            break;
        case ANYOF:
            if (do_utf8) {
-               STRLEN inclasslen = PL_regeol - locinput;
-
-               if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
+               if (!reginclass(scan, (U8*)locinput, do_utf8))
                    sayNO;
                if (locinput >= PL_regeol)
                    sayNO;
-               locinput += inclasslen;
+               locinput += PL_utf8skip[nextchr];
                nextchr = UCHARAT(locinput);
            }
            else {
@@ -4109,11 +4107,10 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
 */
 
 SV *
-Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
+Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
 {
-    SV *sw  = NULL;
-    SV *si  = NULL;
-    SV *alt = NULL;
+    SV *sw = NULL;
+    SV *si = NULL;
 
     if (PL_regdata && PL_regdata->count) {
        U32 n = ARG(node);
@@ -4121,11 +4118,10 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV
        if (PL_regdata->what[n] == 's') {
            SV *rv = (SV*)PL_regdata->data[n];
            AV *av = (AV*)SvRV((SV*)rv);
-           SV **a, **b;
+           SV **a;
        
-           si  = *av_fetch(av, 0, FALSE);
-           a   =  av_fetch(av, 1, FALSE);
-           b   =  av_fetch(av, 2, FALSE);
+           si = *av_fetch(av, 0, FALSE);
+           a  =  av_fetch(av, 1, FALSE);
        
            if (a)
                sw = *a;
@@ -4133,15 +4129,11 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV
                sw = swash_init("utf8", "", si, 1, 0);
                (void)av_store(av, 1, sw);
            }
-           if (b)
-               alt = *b;
        }
     }
        
-    if (listsvp)
-       *listsvp = si;
-    if (altsvp)
-       *altsvp  = alt;
+    if (initsvp)
+       *initsvp = si;
 
     return sw;
 }
@@ -4151,20 +4143,16 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV
  */
 
 STATIC bool
-S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
+S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
 {
     char flags = ANYOF_FLAGS(n);
     bool match = FALSE;
     UV c;
     STRLEN len = 0;
-    STRLEN plen;
 
     c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
 
-    plen = lenp ? *lenp : UNISKIP(c);
     if (do_utf8 || (flags & ANYOF_UNICODE)) {
-        if (lenp)
-           *lenp = 0;
        if (do_utf8 && !ANYOF_RUNTIME(n)) {
            if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
                match = TRUE;
@@ -4172,46 +4160,24 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe
        if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
            match = TRUE;
        if (!match) {
-           AV *av;
-           SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
+           SV *sw = regclass_swash(n, TRUE, 0);
        
            if (sw) {
                if (swash_fetch(sw, p, do_utf8))
                    match = TRUE;
                else if (flags & ANYOF_FOLD) {
-                   U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
-                   STRLEN tmplen;
-
-                   if (!match && lenp && av) {
-                       I32 i;
-                     
-                       for (i = 0; i <= av_len(av); i++) {
-                           SV* sv = *av_fetch(av, i, FALSE);
-                           STRLEN len;
-                           char *s = SvPV(sv, len);
-                       
-                           if (len <= plen && memEQ(s, p, len)) {
-                               *lenp = len;
-                               match = TRUE;
-                               break;
-                           }
-                       }
-                   }
-                   if (!match) {
-                       to_utf8_fold(p, tmpbuf, &tmplen);
-                       if (swash_fetch(sw, tmpbuf, do_utf8))
-                           match = TRUE;
-                   }
-                   if (!match) {
-                       to_utf8_upper(p, tmpbuf, &tmplen);
-                       if (swash_fetch(sw, tmpbuf, do_utf8))
-                           match = TRUE;
-                   }
+                   U8 foldbuf[UTF8_MAXLEN_FOLD+1];
+                   STRLEN foldlen;
+
+                   to_utf8_fold(p, foldbuf, &foldlen);
+                   if (swash_fetch(sw, foldbuf, do_utf8))
+                       match = TRUE;
+                   to_utf8_upper(p, foldbuf, &foldlen);
+                   if (swash_fetch(sw, foldbuf, do_utf8))
+                       match = TRUE;
                }
            }
        }
-       if (match && lenp && *lenp == 0)
-           *lenp = UNISKIP(c);
     }
     if (!match && c < 256) {
        if (ANYOF_BITMAP_TEST(n, c))
@@ -4272,12 +4238,6 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe
     return (flags & ANYOF_INVERT) ? !match : match;
 }
 
-STATIC bool
-S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
-{
-    return S_reginclasslen(aTHX_ n, p, 0, do_utf8);
-}
-
 STATIC U8 *
 S_reghop(pTHX_ U8 *s, I32 off)
 {