DynaLoader.doc was deleted 20 years ago in 3b35bae3

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index c88f467..5535a0e 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1205,10 +1205,10 @@ Perl_re_intuit_start(pTHX_
               * didn't contradict, so just retry the anchored "other"
               * substr */
              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
-                "  Found /%s^%s/m, rescanning for anchored from offset %ld (rx_origin now %"IVdf")...\n",
+                "  Found /%s^%s/m, rescanning for anchored from offset %"IVdf" (rx_origin now %"IVdf")...\n",
                  PL_colors[0], PL_colors[1],
-                (long)(rx_origin - strbeg + prog->anchored_offset),
-                (long)(rx_origin - strbeg)
+                (IV)(rx_origin - strbeg + prog->anchored_offset),
+                (IV)(rx_origin - strbeg)
              ));
              goto do_other_substr;
          }
@@ -5526,6 +5526,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* FALLTHROUGH */
  
         case BOUNDL:  /*  /\b/l  */
+        {
+            bool b1, b2;
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
              if (FLAGS(scan) != TRADITIONAL_BOUND) {
@@ -5538,27 +5540,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
             if (utf8_target) {
                 if (locinput == reginfo->strbeg)
-                   ln = isWORDCHAR_LC('\n');
+                   b1 = isWORDCHAR_LC('\n');
                 else {
-                    ln = isWORDCHAR_LC_utf8(reghop3((U8*)locinput, -1,
+                    b1 = isWORDCHAR_LC_utf8(reghop3((U8*)locinput, -1,
                                                          (U8*)(reginfo->strbeg)));
                 }
-                n = (NEXTCHR_IS_EOS)
+                b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
                      : isWORDCHAR_LC_utf8((U8*)locinput);
             }
             else { /* Here the string isn't utf8 */
-               ln = (locinput == reginfo->strbeg)
+               b1 = (locinput == reginfo->strbeg)
                       ? isWORDCHAR_LC('\n')
                       : isWORDCHAR_LC(UCHARAT(locinput - 1));
-                n = (NEXTCHR_IS_EOS)
+                b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
                      : isWORDCHAR_LC(nextchr);
             }
-            if (to_complement ^ (ln == n)) {
+            if (to_complement ^ (b1 == b2)) {
                  sayNO;
              }
             break;
+        }
  
         case NBOUND:  /*  /\B/   */
              to_complement = 1;
@@ -5575,6 +5578,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* FALLTHROUGH */
  
         case BOUNDA:  /*  /\b/a  */
+        {
+            bool b1, b2;
  
            bound_ascii_match_only:
              /* Here the string isn't utf8, or is utf8 and only ascii characters
@@ -5586,16 +5591,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               * 2) it is a multi-byte character, in which case the final byte is
               *    never mistakable for ASCII, and so the test will say it is
               *    not a word character, which is the correct answer. */
-            ln = (locinput == reginfo->strbeg)
+            b1 = (locinput == reginfo->strbeg)
                   ? isWORDCHAR_A('\n')
                   : isWORDCHAR_A(UCHARAT(locinput - 1));
-            n = (NEXTCHR_IS_EOS)
+            b2 = (NEXTCHR_IS_EOS)
                  ? isWORDCHAR_A('\n')
                  : isWORDCHAR_A(nextchr);
-            if (to_complement ^ (ln == n)) {
+            if (to_complement ^ (b1 == b2)) {
                  sayNO;
              }
             break;
+        }
  
         case NBOUNDU: /*  /\B/u  */
              to_complement = 1;
@@ -5609,15 +5615,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                bound_utf8:
                  switch((bound_type) FLAGS(scan)) {
                      case TRADITIONAL_BOUND:
-                        ln = (locinput == reginfo->strbeg)
+                    {
+                        bool b1, b2;
+                        b1 = (locinput == reginfo->strbeg)
                               ? 0 /* isWORDCHAR_L1('\n') */
                               : isWORDCHAR_utf8(reghop3((U8*)locinput, -1,
                                                                  (U8*)(reginfo->strbeg)));
-                        n = (NEXTCHR_IS_EOS)
+                        b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_utf8((U8*)locinput);
-                        match = cBOOL(ln != n);
+                        match = cBOOL(b1 != b2);
                          break;
+                    }
                      case GCB_BOUND:
                          if (locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
                              match = TRUE; /* GCB always matches at begin and
@@ -5679,14 +5688,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             else {  /* Not utf8 target */
                  switch((bound_type) FLAGS(scan)) {
                      case TRADITIONAL_BOUND:
-                        ln = (locinput == reginfo->strbeg)
+                    {
+                        bool b1, b2;
+                        b1 = (locinput == reginfo->strbeg)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_L1(UCHARAT(locinput - 1));
-                        n = (NEXTCHR_IS_EOS)
+                        b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_L1(nextchr);
-                        match = cBOOL(ln != n);
+                        match = cBOOL(b1 != b2);
                          break;
+                    }
  
                      case GCB_BOUND:
                          if (locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
@@ -6529,7 +6541,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
          case ACCEPT:  /*  (*ACCEPT)  */
-            if (ARG(scan)){
+            if (scan->flags)
+                sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
+            if (ARG2L(scan)){
                  regnode *cursor;
                  for (cursor=scan;
                       cursor && OP(cursor)!=END; 
@@ -7001,8 +7015,9 @@ NULL
             NOT_REACHED; /* NOTREACHED */
  
          case CUTGROUP:  /*  /(*THEN)/  */
-            sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
-                MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
+            sv_yes_mark = st->u.mark.mark_name = scan->flags
+                ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
+                : NULL;
              PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
              /* NOTREACHED */
              NOT_REACHED; /* NOTREACHED */
@@ -7699,7 +7714,7 @@ NULL
             /* FALLTHROUGH */
  
         case PRUNE:   /*  (*PRUNE)   */
-           if (!scan->flags)
+            if (scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
             PUSH_STATE_GOTO(COMMIT_next, next, locinput);
              /* NOTREACHED */
@@ -7708,9 +7723,21 @@ NULL
         case COMMIT_next_fail:
             no_final = 1;    
             /* FALLTHROUGH */       
+            sayNO;
+            NOT_REACHED; /* NOTREACHED */
  
         case OPFAIL:   /* (*FAIL)  */
-           sayNO;
+            if (scan->flags)
+                sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
+            if (logical) {
+                /* deal with (?(?!)X|Y) properly,
+                 * make sure we trigger the no branch
+                 * of the trailing IFTHEN structure*/
+                sw= 0;
+                break;
+            } else {
+                sayNO;
+            }
              /* NOTREACHED */
             NOT_REACHED; /* NOTREACHED */
  
@@ -7754,7 +7781,7 @@ NULL
              NOT_REACHED; /* NOTREACHED */
  
          case SKIP:  /*  (*SKIP)  */
-            if (scan->flags) {
+            if (!scan->flags) {
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
@@ -8761,6 +8788,9 @@ S_reghop3(U8 *s, SSize_t off, const U8* lim)
              if (UTF8_IS_CONTINUED(*s)) {
                  while (s > lim && UTF8_IS_CONTINUATION(*s))
                      s--;
+                if (! UTF8_IS_START(*s)) {
+                    Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
+                }
             }
              /* XXX could check well-formedness here */
         }
@@ -8785,6 +8815,9 @@ S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
              if (UTF8_IS_CONTINUED(*s)) {
                  while (s > llim && UTF8_IS_CONTINUATION(*s))
                      s--;
+                if (! UTF8_IS_START(*s)) {
+                    Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
+                }
              }
              /* XXX could check well-formedness here */
          }
@@ -8814,6 +8847,9 @@ S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)
              if (UTF8_IS_CONTINUED(*s)) {
                  while (s > lim && UTF8_IS_CONTINUATION(*s))
                      s--;
+                if (! UTF8_IS_START(*s)) {
+                    Perl_croak_nocontext("Malformed UTF-8 character (fatal)");
+                }
             }
              /* XXX could check well-formedness here */
         }