Integrate:

author Nick Ing-Simmons <nik@tiuk.ti.com>

Fri, 8 Aug 2003 23:05:49 +0000 (00:05 +0100)

committer Jarkko Hietaniemi <jhi@iki.fi>

Fri, 8 Aug 2003 21:07:09 +0000 (21:07 +0000)
author Nick Ing-Simmons <nik@tiuk.ti.com>
Fri, 8 Aug 2003 23:05:49 +0000 (00:05 +0100)
committer Jarkko Hietaniemi <jhi@iki.fi>
Fri, 8 Aug 2003 21:07:09 +0000 (21:07 +0000)
diff --git a/regexec.c b/regexec.c

index 5cae8b2..e6af021 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -953,6 +953,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
         I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
         char *m;
         STRLEN ln;
+       STRLEN lnc;
         unsigned int c1;
         unsigned int c2;
         char *e;
@@ -1008,10 +1009,12 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
             }
             break;
         case EXACTF:
-           m = STRING(c);
-           ln = STR_LEN(c);
+           m   = STRING(c);
+           ln  = STR_LEN(c);   /* length to match in octets/bytes */
+           lnc = (I32) ln;     /* length to match in characters */
             if (UTF) {
                 STRLEN ulen1, ulen2;
+               U8 *sm = (U8 *) m;
                 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
                 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
  
@@ -1022,6 +1025,11 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
                                     0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+               lnc = 0;
+               while (sm < ((U8 *) m + ln)) {
+                   lnc++;
+                   sm += UTF8SKIP(sm);
+               }
             }
             else {
                 c1 = *(U8*)m;
@@ -1029,14 +1037,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
             }
             goto do_exactf;
         case EXACTFL:
-           m = STRING(c);
-           ln = STR_LEN(c);
+           m   = STRING(c);
+           ln  = STR_LEN(c);
+           lnc = (I32) ln;
             c1 = *(U8*)m;
             c2 = PL_fold_locale[c1];
           do_exactf:
-           /* The last byte to try is ln-1 characters before strend
-            * since the strend points one byte past the string. */
-           e = HOP3c(strend, (I32)1 - (I32)ln, s);
+           e = HOP3c(strend, -lnc, s);
  
             if (norun && e < s)
                 e = s;                  /* Due to minlen logic of intuit() */
@@ -1059,6 +1066,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                 STRLEN len, foldlen;
                 
                 if (c1 == c2) {
+                   /* Upper and lower of 1st char are equal -
+                    * probably not a "letter". */
                     while (s <= e) {
                         c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
                                            ckWARN(WARN_UTF8) ?
diff --git a/t/op/pat.t b/t/op/pat.t

index 69b47b8..a4ab2b1 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
  
  $| = 1;
  
-print "1..1009\n";
+print "1..1010\n";
  
  BEGIN {
      chdir 't' if -d 't';
@@ -3192,5 +3192,7 @@ ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr');
  ok("  \x{101}" =~ qr/\x{100}/i,
     "<20030808193656.5109.1@llama.ni-s.u-net.com>");
  
-# last test 1008
+ok("  \x{1E01}" =~ qr/\x{1E00}/i,
+   "<20030808193656.5109.1@llama.ni-s.u-net.com>");
  
+# last test 1010
author	Nick Ing-Simmons <nik@tiuk.ti.com>
	Fri, 8 Aug 2003 23:05:49 +0000 (00:05 +0100)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Fri, 8 Aug 2003 21:07:09 +0000 (21:07 +0000)
regexec.c		patch \| blob \| blame \| history
t/op/pat.t		patch \| blob \| blame \| history