This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
locale.c: Remove vars unused on some platforms
[perl5.git] / regcomp.c
index fa8a61a..a82171a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -160,7 +160,7 @@ struct RExC_state_t {
     int                num_code_blocks;        /* size of code_blocks[] */
     int                code_index;             /* next code_blocks[] slot */
     SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
-#if ADD_TO_REGEXEC
+#ifdef ADD_TO_REGEXEC
     char       *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry  (pRExC_state->starttry)
 #endif
@@ -3545,8 +3545,6 @@ typedef struct scan_frame {
 } scan_frame;
 
 
-#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
-
 STATIC SSize_t
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         SSize_t *minlenp, SSize_t *deltap,
@@ -3672,10 +3670,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                regnode_ssc accum;
                regnode * const startbranch=scan;
 
-               if (flags & SCF_DO_SUBSTR)
-                    SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge
-                                                                strings after
-                                                            this. */
+                if (flags & SCF_DO_SUBSTR) {
+                    /* Cannot merge strings after this. */
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
+                }
+
                 if (flags & SCF_DO_STCLASS)
                    ssc_init_zero(pRExC_state, &accum);
 
@@ -4145,7 +4144,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* some form of infinite recursion, assume infinite length
                      * */
                     if (flags & SCF_DO_SUBSTR) {
-                        SCAN_COMMIT(pRExC_state,data,minlenp);
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
                         data->longest = &(data->longest_float);
                     }
                     is_inf = is_inf_internal = 1;
@@ -4242,7 +4241,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            /* Search for fixed substrings supports EXACT only. */
            if (flags & SCF_DO_SUBSTR) {
                assert(data);
-               SCAN_COMMIT(pRExC_state, data, minlenp);
+                scan_commit(pRExC_state, data, minlenp, is_inf);
            }
            if (UTF) {
                const U8 * const s = (U8 *)STRING(scan);
@@ -4389,13 +4388,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    scan = NEXTOPER(scan);
                    goto do_curly;
                }
-               is_inf = is_inf_internal = 1;
-               scan = regnext(scan);
                if (flags & SCF_DO_SUBSTR) {
-                    SCAN_COMMIT(pRExC_state, data, minlenp);
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
                     /* Cannot extend fixed substrings */
                    data->longest = &(data->longest_float);
                }
+                is_inf = is_inf_internal = 1;
+                scan = regnext(scan);
                goto optimize_curly_tail;
            case CURLY:
                if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
@@ -4416,7 +4415,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                next_is_eval = (OP(scan) == EVAL);
              do_curly:
                if (flags & SCF_DO_SUBSTR) {
-                   if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp);
+                    if (mincount == 0)
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
                     /* Cannot extend fixed substrings */
                    pos_before = data->pos_min;
                }
@@ -4630,6 +4630,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    pars++;
                if (flags & SCF_DO_SUBSTR) {
                    SV *last_str = NULL;
+                    STRLEN last_chrs = 0;
                    int counted = mincount != 0;
 
                     if (data->last_end > 0 && mincount != 0) { /* Ends with a
@@ -4645,9 +4646,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        l -= old;
                        /* Get the added string: */
                        last_str = newSVpvn_utf8(s  + old, l, UTF);
+                        last_chrs = UTF ? utf8_length((U8*)(s + old),
+                                            (U8*)(s + old + l)) : l;
                        if (deltanext == 0 && pos_before == b) {
                            /* What was added is a constant string */
                            if (mincount > 1) {
+
                                SvGROW(last_str, (mincount * l) + 1);
                                repeatcpy(SvPVX(last_str) + l,
                                          SvPVX_const(last_str), l,
@@ -4663,8 +4667,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                        SvUTF8(sv) && SvMAGICAL(sv) ?
                                        mg_find(sv, PERL_MAGIC_utf8) : NULL;
                                    if (mg && mg->mg_len >= 0)
-                                       mg->mg_len += CHR_SVLEN(last_str) - l;
+                                       mg->mg_len += last_chrs * (mincount-1);
                                }
+                                last_chrs *= mincount;
                                data->last_end += l * (mincount - 1);
                            }
                        } else {
@@ -4696,7 +4701,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                    if (mincount != maxcount) {
                         /* Cannot extend fixed substrings found inside
                            the group.  */
-                       SCAN_COMMIT(pRExC_state,data,minlenp);
+                        scan_commit(pRExC_state, data, minlenp, is_inf);
                        if (mincount && last_str) {
                            SV * const sv = data->last_found;
                            MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
@@ -4706,12 +4711,10 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                                mg->mg_len = -1;
                            sv_setsv(sv, last_str);
                            data->last_end = data->pos_min;
-                           data->last_start_min =
-                               data->pos_min - CHR_SVLEN(last_str);
+                           data->last_start_min = data->pos_min - last_chrs;
                            data->last_start_max = is_inf
                                ? SSize_t_MAX
-                               : data->pos_min + data->pos_delta
-                               - CHR_SVLEN(last_str);
+                               : data->pos_min + data->pos_delta - last_chrs;
                        }
                        data->longest = &(data->longest_float);
                    }
@@ -4735,8 +4738,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
             case REF:
             case CLUMP:
                if (flags & SCF_DO_SUBSTR) {
-                    SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect
-                                                              anything... */
+                    /* Cannot expect anything... */
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
                    data->longest = &(data->longest_float);
                }
                is_inf = is_inf_internal = 1;
@@ -4777,8 +4780,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
            min++;
            delta++;    /* Because of the 2 char string cr-lf */
             if (flags & SCF_DO_SUBSTR) {
-                SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect
-                                                           anything... */
+                /* Cannot expect anything... */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += 1;
                data->pos_delta += 1;
                data->longest = &(data->longest_float);
@@ -4787,7 +4790,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
        else if (REGNODE_SIMPLE(OP(scan))) {
 
            if (flags & SCF_DO_SUBSTR) {
-               SCAN_COMMIT(pRExC_state,data,minlenp);
+                scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min++;
            }
            min++;
@@ -4945,7 +4948,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
            data->flags |= (OP(scan) == MEOL
                            ? SF_BEFORE_MEOL
                            : SF_BEFORE_SEOL);
-           SCAN_COMMIT(pRExC_state, data, minlenp);
+            scan_commit(pRExC_state, data, minlenp, is_inf);
 
        }
        else if (  PL_regkind[OP(scan)] == BRANCHJ
@@ -5072,7 +5075,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                     if ((flags & SCF_DO_SUBSTR) && data->last_found) {
                         f |= SCF_DO_SUBSTR;
                         if (scan->flags)
-                            SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
+                            scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
                         data_fake.last_found=newSVsv(data->last_found);
                     }
                 }
@@ -5122,7 +5125,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                     if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
                         if (RExC_rx->minlen<*minnextp)
                             RExC_rx->minlen=*minnextp;
-                        SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
+                        scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
                         SvREFCNT_dec_NN(data_fake.last_found);
 
                         if ( data_fake.minlen_fixed != minlenp )
@@ -5166,7 +5169,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
        }
        else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
            if (flags & SCF_DO_SUBSTR) {
-               SCAN_COMMIT(pRExC_state,data,minlenp);
+                scan_commit(pRExC_state, data, minlenp, is_inf);
                flags &= ~SCF_DO_SUBSTR;
            }
            if (data && OP(scan)==ACCEPT) {
@@ -5178,7 +5181,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
        else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
        {
                if (flags & SCF_DO_SUBSTR) {
-                   SCAN_COMMIT(pRExC_state,data,minlenp);
+                    scan_commit(pRExC_state, data, minlenp, is_inf);
                    data->longest = &(data->longest_float);
                }
                is_inf = is_inf_internal = 1;
@@ -5211,9 +5214,10 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
             SSize_t max1 = 0, min1 = SSize_t_MAX;
             regnode_ssc accum;
 
-            if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
-                SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings
-                                                           after this. */
+            if (flags & SCF_DO_SUBSTR) { /* XXXX Add !SUSPEND? */
+                /* Cannot merge strings after this. */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
+            }
             if (flags & SCF_DO_STCLASS)
                 ssc_init_zero(pRExC_state, &accum);
 
@@ -5327,8 +5331,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
            delta += (trie->maxlen - trie->minlen);
            flags &= ~SCF_DO_STCLASS; /* xxx */
             if (flags & SCF_DO_SUBSTR) {
-                SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect
-                                                           anything... */
+                /* Cannot expect anything... */
+                scan_commit(pRExC_state, data, minlenp, is_inf);
                data->pos_min += trie->minlen;
                data->pos_delta += (trie->maxlen - trie->minlen);
                if (trie->maxlen != trie->minlen)
@@ -6949,14 +6953,16 @@ reStudy:
         /* A temporary algorithm prefers floated substr to fixed one to dig
          * more info. */
        if (longest_fixed_length > longest_float_length) {
+           r->substrs->check_ix = 0;
            r->check_end_shift = r->anchored_end_shift;
            r->check_substr = r->anchored_substr;
            r->check_utf8 = r->anchored_utf8;
            r->check_offset_min = r->check_offset_max = r->anchored_offset;
-            if (r->intflags & PREGf_ANCH_SINGLE)
+            if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
                 r->intflags |= PREGf_NOSCAN;
        }
        else {
+           r->substrs->check_ix = 1;
            r->check_end_shift = r->float_end_shift;
            r->check_substr = r->float_substr;
            r->check_utf8 = r->float_utf8;
@@ -6968,6 +6974,8 @@ reStudy:
            if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
                r->extflags |= RXf_INTUIT_TAIL;
        }
+        r->substrs->data[0].max_offset = r->substrs->data[0].min_offset;
+
        /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
        if ( (STRLEN)minlen < longest_float_length )
             minlen= longest_float_length;
@@ -7432,7 +7440,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
     assert(s >= rx->subbeg);
     assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
     if (i >= 0) {
-#if NO_TAINT_SUPPORT
+#ifdef NO_TAINT_SUPPORT
         sv_setpvn(sv, s, i);
 #else
         const int oldtainted = TAINT_get;
@@ -11276,7 +11284,8 @@ tryagain:
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
            if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
-               ckWARNdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" or \"\\b[{]\" instead");
+                /* diag_listed_as: Use "%s" instead of "%s" */
+               vFAIL("Use \"\\b\\{\" instead of \"\\b{\"");
            }
            goto finish_meta_pat;
        case 'B':
@@ -11290,7 +11299,8 @@ tryagain:
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
            if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
-               ckWARNdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" or \"\\B[{]\" instead");
+                /* diag_listed_as: Use "%s" instead of "%s" */
+               vFAIL("Use \"\\B\\{\" instead of \"\\B{\"");
            }
            goto finish_meta_pat;
 
@@ -11784,7 +11794,7 @@ tryagain:
                        }
                    case 'c':
                        p++;
-                       ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
+                       ender = grok_bslash_c(*p++, SIZE_ONLY);
                        break;
                     case '8': case '9': /* must be a backreference */
                         --p;
@@ -13527,7 +13537,7 @@ parseit:
                    goto recode_encoding;
                break;
            case 'c':
-               value = grok_bslash_c(*RExC_parse++, UTF, SIZE_ONLY);
+               value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7':