consistent commands for perl5db.pl etc.

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index c3948ac..61d175a 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -67,7 +67,7 @@
   *
   ****    Alterations to Henry's code are...
   ****
- ****    Copyright (c) 1991-2001, Larry Wall
+ ****    Copyright (c) 1991-2002, Larry Wall
   ****
   ****    You may distribute under the terms of either the GNU General Public
   ****    License or the Artistic License, as specified in the README file.
@@ -924,8 +924,11 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
  
                 if (reginclass(c, (U8*)s, do_utf8) ||
-                   (ANYOF_UNICODE_FOLD_SHARP_S(c, s, strend) &&
-                    (skip = 2))) {
+                   (ANYOF_FOLD_SHARP_S(c, s, strend) &&
+                    /* The assignment of 2 is intentional:
+                     * for the sharp s, the skip is 2. */
+                    (skip = SHARP_S_SKIP)
+                    )) {
                     if (tmp && (norun || regtry(prog, s)))
                         goto got_it;
                     else
@@ -956,8 +959,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
                 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
  
-               c1 = utf8_to_uvuni(tmpbuf1, 0);
-               c2 = utf8_to_uvuni(tmpbuf2, 0);
+               c1 = utf8_to_uvchr(tmpbuf1, 0);
+               c2 = utf8_to_uvchr(tmpbuf2, 0);
             }
             else {
                 c1 = *(U8*)m;
@@ -981,9 +984,10 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
              * text of the node.  The c1 and c2 are the first
              * characters (though in Unicode it gets a bit
              * more complicated because there are more cases
-            * than just upper and lower: one is really supposed
-            * to use the so-called folding case for case-insensitive
-            * matching (called "loose matching" in Unicode).  */
+            * than just upper and lower: one needs to use
+            * the so-called folding case for case-insensitive
+            * matching (called "loose matching" in Unicode).
+            * ibcmp_utf8() will do just that. */
  
             if (do_utf8) {
                 UV c, f;
@@ -1043,7 +1047,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                              if ( f != c
                                   && (f == c1 || f == c2)
                                   && (ln == foldlen ||
-                                     !ibcmp_utf8((char *)foldbuf,
+                                     !ibcmp_utf8((char *) foldbuf,
                                                   (char **)0, foldlen, do_utf8,
                                                   m,
                                                   (char **)0, ln,      UTF))
@@ -2332,7 +2336,7 @@ S_regmatch(pTHX_ regnode *prog)
                         if (l >= PL_regeol)
                              sayNO;
                         if (NATIVE_TO_UNI(*(U8*)s) !=
-                           utf8_to_uvchr((U8*)l, &ulen))
+                           utf8_to_uvuni((U8*)l, &ulen))
                              sayNO;
                         l += ulen;
                         s ++;
@@ -2344,7 +2348,7 @@ S_regmatch(pTHX_ regnode *prog)
                         if (l >= PL_regeol)
                             sayNO;
                         if (NATIVE_TO_UNI(*((U8*)l)) !=
-                           utf8_to_uvchr((U8*)s, &ulen))
+                           utf8_to_uvuni((U8*)s, &ulen))
                             sayNO;
                         s += ulen;
                         l ++;
@@ -2377,8 +2381,8 @@ S_regmatch(pTHX_ regnode *prog)
                 char *l = locinput;
                 char *e = PL_regeol;
  
-               if (ibcmp_utf8(s, 0,  ln, do_utf8,
-                              l, &e, 0,  UTF)) {
+               if (ibcmp_utf8(s, 0,  ln, UTF,
+                              l, &e, 0,  do_utf8)) {
                      /* One more case for the sharp s:
                       * pack("U0U*", 0xDF) =~ /ss/i,
                       * the 0xC3 0x9F are the UTF-8
@@ -2439,8 +2443,8 @@ S_regmatch(pTHX_ regnode *prog)
             /* If we might have the case of the German sharp s
              * in a casefolding Unicode character class. */
  
-           if (ANYOF_UNICODE_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
-                locinput += 2;
+           if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
+                locinput += SHARP_S_SKIP;
                  nextchr = UCHARAT(locinput);
             }
             else
@@ -4256,9 +4260,6 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe
                 if (swash_fetch(sw, p, do_utf8))
                     match = TRUE;
                 else if (flags & ANYOF_FOLD) {
-                   U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
-                   STRLEN tmplen;
-
                     if (!match && lenp && av) {
                         I32 i;
                       
@@ -4275,15 +4276,13 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe
                         }
                     }
                     if (!match) {
+                       U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
+                       STRLEN tmplen;
+
                         to_utf8_fold(p, tmpbuf, &tmplen);
                         if (swash_fetch(sw, tmpbuf, do_utf8))
                             match = TRUE;
                     }
-                   if (!match) {
-                       to_utf8_upper(p, tmpbuf, &tmplen);
-                       if (swash_fetch(sw, tmpbuf, do_utf8))
-                           match = TRUE;
-                   }
                 }
             }
         }