X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/377243219089fca10a12fb0bd3f960d785ad8517..b977d03a544315f8953421cd86f9236a4b3157de:/regcomp.c diff --git a/regcomp.c b/regcomp.c index b02e342..0da8c1b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -282,27 +282,6 @@ static const scan_data_t zero_scan_data = } STMT_END /* - * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given - * args. Show regex, up to a maximum length. If it's too long, chop and add - * "...". - */ -#define FAIL2(pat,msg) STMT_START { \ - const char *ellipses = ""; \ - IV len = RExC_end - RExC_precomp; \ - \ - if (!SIZE_ONLY) \ - SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx); \ - if (len > RegexLengthToShowInErrorMessages) { \ - /* chop 10 shorter than the max, to ensure meaning of "..." */ \ - len = RegexLengthToShowInErrorMessages - 10; \ - ellipses = "..."; \ - } \ - S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \ - msg, (int)len, RExC_precomp, ellipses); \ -} STMT_END - - -/* * Simple_vFAIL -- like FAIL, but marks the current location in the scan */ #define Simple_vFAIL(m) STMT_START { \ @@ -712,7 +691,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg } else if (stringok) { const int oldl = STR_LEN(scan); - regnode *nnext = regnext(n); + regnode * const nnext = regnext(n); if (oldl + STR_LEN(n) > U8_MAX) break; @@ -757,8 +736,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg another valid sequence of UTF-8 bytes. */ - char *s0 = STRING(scan), *s, *t; - char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4; + char * const s0 = STRING(scan), *s, *t; + char * const s1 = s0 + STR_LEN(scan) - 1; + char * const s2 = s1 - 4; const char * const t0 = "\xcc\x88\xcc\x81"; const char * const t1 = t0 + 3; @@ -908,11 +888,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg } else if (OP(scan) == EXACT) { I32 l = STR_LEN(scan); - UV uc = *((U8*)STRING(scan)); + UV uc; if (UTF) { const U8 * const s = (U8*)STRING(scan); l = utf8_length((U8 *)s, (U8 *)s + l); uc = utf8_to_uvchr((U8 *)s, NULL); + } else { + uc = *((U8*)STRING(scan)); } min += l; if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */ @@ -976,7 +958,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg if (flags & SCF_DO_SUBSTR) scan_commit(pRExC_state, data); if (UTF) { - U8 *s = (U8 *)STRING(scan); + U8 * const s = (U8 *)STRING(scan); l = utf8_length(s, s + l); uc = utf8_to_uvchr(s, NULL); } @@ -1258,7 +1240,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) pars++; if (flags & SCF_DO_SUBSTR) { - SV *last_str = Nullsv; + SV *last_str = NULL; int counted = mincount != 0; if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */ @@ -1786,7 +1768,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) REGC((U8)REG_MAGIC, (char*)RExC_emit); #endif if (reg(pRExC_state, 0, &flags) == NULL) { - RExC_precomp = Nullch; + RExC_precomp = NULL; return(NULL); } DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size)); @@ -1948,9 +1930,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) */ minlen = 0; - data.longest_fixed = newSVpvn("",0); - data.longest_float = newSVpvn("",0); - data.last_found = newSVpvn("",0); + data.longest_fixed = newSVpvs(""); + data.longest_float = newSVpvs(""); + data.last_found = newSVpvs(""); data.longest = &(data.longest_fixed); first = scan; if (!r->regstclass) { @@ -1985,10 +1967,10 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) if (SvUTF8(data.longest_float)) { r->float_utf8 = data.longest_float; - r->float_substr = Nullsv; + r->float_substr = NULL; } else { r->float_substr = data.longest_float; - r->float_utf8 = Nullsv; + r->float_utf8 = NULL; } r->float_min_offset = data.offset_float_min; r->float_max_offset = data.offset_float_max; @@ -1999,7 +1981,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) } else { remove_float: - r->float_substr = r->float_utf8 = Nullsv; + r->float_substr = r->float_utf8 = NULL; SvREFCNT_dec(data.longest_float); longest_float_length = 0; } @@ -2013,10 +1995,10 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) if (SvUTF8(data.longest_fixed)) { r->anchored_utf8 = data.longest_fixed; - r->anchored_substr = Nullsv; + r->anchored_substr = NULL; } else { r->anchored_substr = data.longest_fixed; - r->anchored_utf8 = Nullsv; + r->anchored_utf8 = NULL; } r->anchored_offset = data.offset_fixed; t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */ @@ -2025,7 +2007,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0); } else { - r->anchored_substr = r->anchored_utf8 = Nullsv; + r->anchored_substr = r->anchored_utf8 = NULL; SvREFCNT_dec(data.longest_fixed); longest_fixed_length = 0; } @@ -2089,7 +2071,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) data.last_closep = &last_close; minlen = study_chunk(pRExC_state, &scan, &fake, scan + RExC_size, &data, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS); r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8 - = r->float_substr = r->float_utf8 = Nullsv; + = r->float_substr = r->float_utf8 = NULL; if (!(data.start_class->flags & ANYOF_EOS) && !cl_is_anything(data.start_class)) { @@ -2142,22 +2124,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) register regnode *ret; /* Will be the head of the group. */ register regnode *br; register regnode *lastbr; - register regnode *ender = 0; + register regnode *ender = NULL; register I32 parno = 0; - I32 flags, oregflags = RExC_flags, have_branch = 0, open = 0; + I32 flags; + const I32 oregflags = RExC_flags; + bool have_branch = 0; + bool is_open = 0; /* for (?g), (?gc), and (?o) warnings; warning about (?c) will warn about (?g) -- japhy */ - I32 wastedflags = 0x00, - wasted_o = 0x01, - wasted_g = 0x02, - wasted_gc = 0x02 | 0x04, - wasted_c = 0x04; +#define WASTED_O 0x01 +#define WASTED_G 0x02 +#define WASTED_C 0x04 +#define WASTED_GC (0x02|0x04) + I32 wastedflags = 0x00; char * parse_start = RExC_parse; /* MJD */ char * const oregcomp_parse = RExC_parse; - char c; *flagp = 0; /* Tentatively. */ @@ -2167,7 +2151,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (*RExC_parse == '?') { /* (?...) */ U32 posflags = 0, negflags = 0; U32 *flagsp = &posflags; - int logical = 0; + bool is_logical = 0; const char * const seqstart = RExC_parse; RExC_parse++; @@ -2204,7 +2188,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})"); /* FALL THROUGH*/ case '?': /* (??...) */ - logical = 1; + is_logical = 1; if (*RExC_parse != '{') goto unknown; paren = *RExC_parse++; @@ -2214,32 +2198,28 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) I32 count = 1, n = 0; char c; char *s = RExC_parse; - SV *sv; - OP_4tree *sop, *rop; RExC_seen_zerolen++; RExC_seen |= REG_SEEN_EVAL; while (count && (c = *RExC_parse)) { - if (c == '\\' && RExC_parse[1]) - RExC_parse++; + if (c == '\\') { + if (RExC_parse[1]) + RExC_parse++; + } else if (c == '{') count++; else if (c == '}') count--; RExC_parse++; } - if (*RExC_parse != ')') - { + if (*RExC_parse != ')') { RExC_parse = s; vFAIL("Sequence (?{...}) not terminated or not {}-balanced"); } if (!SIZE_ONLY) { PAD *pad; - - if (RExC_parse - 1 - s) - sv = newSVpvn(s, RExC_parse - 1 - s); - else - sv = newSVpvn("", 0); + OP_4tree *sop, *rop; + SV * const sv = newSVpvn(s, RExC_parse - 1 - s); ENTER; Perl_save_re_context(aTHX); @@ -2266,7 +2246,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } nextchar(pRExC_state); - if (logical) { + if (is_logical) { ret = reg_node(pRExC_state, LOGICAL); if (!SIZE_ONLY) ret->flags = 2; @@ -2296,6 +2276,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) { /* (?(1)...) */ + char c; parno = atoi(RExC_parse++); while (isDIGIT(*RExC_parse)) @@ -2353,7 +2334,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) if (*RExC_parse == 'o' || *RExC_parse == 'g') { if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { - I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g; + const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G; if (! (wastedflags & wflagbit) ) { wastedflags |= wflagbit; vWARN5( @@ -2369,8 +2350,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) } else if (*RExC_parse == 'c') { if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { - if (! (wastedflags & wasted_c) ) { - wastedflags |= wasted_gc; + if (! (wastedflags & WASTED_C) ) { + wastedflags |= WASTED_GC; vWARN3( RExC_parse + 1, "Useless (%sc) - %suse /gc modifier", @@ -2413,7 +2394,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) ret = reganode(pRExC_state, OPEN, parno); Set_Node_Length(ret, 1); /* MJD */ Set_Node_Offset(ret, RExC_parse); /* MJD */ - open = 1; + is_open = 1; } } else /* ! paren */ @@ -2442,7 +2423,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp) else if (paren == ':') { *flagp |= flags&SIMPLE; } - if (open) { /* Starts with OPEN. */ + if (is_open) { /* Starts with OPEN. */ regtail(pRExC_state, ret, br); /* OPEN -> first. */ } else if (paren != '?') /* Not Conditional */ @@ -2638,7 +2619,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) if (op == '{' && regcurly(RExC_parse)) { parse_start = RExC_parse; /* MJD */ next = RExC_parse + 1; - maxpos = Nullch; + maxpos = NULL; while (isDIGIT(*next) || *next == ',') { if (*next == ',') { if (maxpos) @@ -2764,7 +2745,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) { vWARN3(RExC_parse, "%.*s matches null string many times", - RExC_parse - origparse, + (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0), origparse); } @@ -2793,7 +2774,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) STATIC regnode * S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) { - register regnode *ret = 0; + register regnode *ret = NULL; I32 flags; char *parse_start = RExC_parse; @@ -3574,13 +3555,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) register regnode *ret; STRLEN numlen; IV namedclass; - char *rangebegin = 0; + char *rangebegin = NULL; bool need_class = 0; - SV *listsv = Nullsv; + SV *listsv = NULL; register char *e; UV n; bool optimize_invert = TRUE; - AV* unicode_alternate = 0; + AV* unicode_alternate = NULL; #ifdef EBCDIC UV literal_endpoint = 0; #endif @@ -3606,7 +3587,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (LOC) ANYOF_FLAGS(ret) |= ANYOF_LOCALE; ANYOF_BITMAP_ZERO(ret); - listsv = newSVpvn("# comment\n", 10); + listsv = newSVpvs("# comment\n"); } nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0; @@ -3762,12 +3743,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* a bad range like a-\d, a-[:digit:] ? */ if (range) { if (!SIZE_ONLY) { - if (ckWARN(WARN_REGEXP)) + if (ckWARN(WARN_REGEXP)) { + int w = + RExC_parse >= rangebegin ? + RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, + w, + w, rangebegin); + } if (prevvalue < 256) { ANYOF_BITMAP_SET(ret, prevvalue); ANYOF_BITMAP_SET(ret, '-'); @@ -4171,12 +4156,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* a bad range like \w-, [:word:]- ? */ if (namedclass > OOB_NAMEDCLASS) { - if (ckWARN(WARN_REGEXP)) + if (ckWARN(WARN_REGEXP)) { + int w = + RExC_parse >= rangebegin ? + RExC_parse - rangebegin : 0; vWARN4(RExC_parse, "False [] range \"%*.*s\"", - RExC_parse - rangebegin, - RExC_parse - rangebegin, + w, + w, rangebegin); + } if (!SIZE_ONLY) ANYOF_BITMAP_SET(ret, '-'); } else @@ -4586,7 +4575,7 @@ void Perl_regdump(pTHX_ regexp *r) { #ifdef DEBUGGING - SV *sv = sv_newmortal(); + SV * const sv = sv_newmortal(); (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0); @@ -4675,6 +4664,8 @@ Perl_regdump(pTHX_ regexp *r) (UV)r->offsets[i*2]); PerlIO_printf(Perl_debug_log, "\n"); } +#else + PERL_UNUSED_ARG(r); #endif /* DEBUGGING */ } @@ -4697,7 +4688,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) k = PL_regkind[(U8)OP(o)]; if (k == EXACT) { - SV * const dsv = sv_2mortal(newSVpvn("", 0)); + SV * const dsv = sv_2mortal(newSVpvs("")); /* Using is_utf8_string() is a crude hack but it may * be the best for now since we have no flag "this EXACTish * node was UTF-8" --jhi */ @@ -4728,8 +4719,9 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == ANYOF) { int i, rangestart = -1; const U8 flags = ANYOF_FLAGS(o); - const char * const anyofs[] = { /* Should be synchronized with - * ANYOF_ #xdefines in regcomp.h */ + + /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */ + static const char * const anyofs[] = { "\\w", "\\W", "\\s", @@ -4763,12 +4755,12 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) }; if (flags & ANYOF_LOCALE) - sv_catpv(sv, "{loc}"); + sv_catpvs(sv, "{loc}"); if (flags & ANYOF_FOLD) - sv_catpv(sv, "{i}"); + sv_catpvs(sv, "{i}"); Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); if (flags & ANYOF_INVERT) - sv_catpv(sv, "^"); + sv_catpvs(sv, "^"); for (i = 0; i <= 256; i++) { if (i < 256 && ANYOF_BITMAP_TEST(o,i)) { if (rangestart == -1) @@ -4779,7 +4771,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) put_byte(sv, rangestart); else { put_byte(sv, rangestart); - sv_catpv(sv, "-"); + sv_catpvs(sv, "-"); put_byte(sv, i - 1); } rangestart = -1; @@ -4792,9 +4784,9 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) sv_catpv(sv, anyofs[i]); if (flags & ANYOF_UNICODE) - sv_catpv(sv, "{unicode}"); + sv_catpvs(sv, "{unicode}"); else if (flags & ANYOF_UNICODE_ALL) - sv_catpv(sv, "{unicode_all}"); + sv_catpvs(sv, "{unicode_all}"); { SV *lv; @@ -4823,7 +4815,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) U8 *p; for (p = s; p < e; p++) put_byte(sv, *p); - sv_catpvn(sv, "-", 1); + sv_catpvs(sv, "-"); e = uvchr_to_utf8(s, i-1); for (p = s; p < e; p++) put_byte(sv, *p); @@ -4832,12 +4824,12 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) } } - sv_catpv(sv, "..."); /* et cetera */ + sv_catpvs(sv, "..."); /* et cetera */ } { char *s = savesvpv(lv); - char *origs = s; + char * const origs = s; while(*s && *s != '\n') s++; @@ -4864,6 +4856,9 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags); +#else + PERL_UNUSED_ARG(sv); + PERL_UNUSED_ARG(o); #endif /* DEBUGGING */ } @@ -4893,13 +4888,13 @@ void Perl_pregfree(pTHX_ struct regexp *r) { #ifdef DEBUGGING - SV *dsv = PERL_DEBUG_PAD_ZERO(0); + SV * const dsv = PERL_DEBUG_PAD_ZERO(0); #endif if (!r || (--r->refcnt > 0)) return; DEBUG_r({ - const char *s = (r->reganch & ROPT_UTF8) + const char * const s = (r->reganch & ROPT_UTF8) ? pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60, UNI_DISPLAY_REGEX) : pv_display(dsv, r->precomp, r->prelen, 0, 60); const int len = SvCUR(dsv); @@ -4953,8 +4948,7 @@ Perl_pregfree(pTHX_ struct regexp *r) Perl_croak(aTHX_ "panic: pregfree comppad"); PAD_SAVE_LOCAL(old_comppad, /* Watch out for global destruction's random ordering. */ - (SvTYPE(new_comppad) == SVt_PVAV) ? - new_comppad : Null(PAD *) + (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL ); OP_REFCNT_LOCK; refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]); @@ -5092,24 +5086,27 @@ Perl_save_re_context(pTHX) PL_reg_start_tmp = 0; PL_reg_start_tmpl = 0; - PL_reg_oldsaved = Nullch; + PL_reg_oldsaved = NULL; PL_reg_oldsavedlen = 0; PL_reg_maxiter = 0; PL_reg_leftiter = 0; - PL_reg_poscache = Nullch; + PL_reg_poscache = NULL; PL_reg_poscache_size = 0; - { - /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ - REGEXP *rx; - - if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ + if (PL_curpm) { + const REGEXP * const rx = PM_GETRE(PL_curpm); + if (rx) { U32 i; for (i = 1; i <= rx->nparens; i++) { - GV *mgv; char digits[TYPE_CHARS(long)]; +#ifdef USE_SNPRINTF + const STRLEN len = snprintf(digits, sizeof(digits), "%lu", (long)i); +#else const STRLEN len = my_sprintf(digits, "%lu", (long)i); - if ((mgv = gv_fetchpvn_flags(digits, len, 0, SVt_PV))) +#endif /* #ifdef USE_SNPRINTF */ + GV * const mgv = gv_fetchpvn_flags(digits, len, 0, SVt_PV); + if (mgv) save_scalar(mgv); } }