From f0ab9afb53ef594bb6fb8989153fbfba9762816f Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Mon, 26 Mar 2007 22:52:18 +0000 Subject: [PATCH] In struct regexp replace the two arrays of I32s accessed via startp and endp with a single array of struct regexp_paren_pair, which has 2 I32 members. PL_regstartp and PL_regendp are replaced with a pointer to regexp_paren_pair. The regexp swap structure now only has one member, so abolish it and store the pointer to the swap array directly. Hopefully keeping the corresponding start and end adjacent in memory will help with cache coherency. p4raw-id: //depot/perl@30769 --- mg.c | 21 +++++------ pp.c | 8 ++--- pp_ctl.c | 12 +++---- pp_hot.c | 63 ++++++++++++++++----------------- regcomp.c | 42 +++++++++------------- regexec.c | 113 ++++++++++++++++++++++++++++-------------------------------- regexp.h | 20 +++++------ sv.c | 7 ++-- universal.c | 8 ++--- 9 files changed, 137 insertions(+), 157 deletions(-) diff --git a/mg.c b/mg.c index c603073..1aaf0ac 100644 --- a/mg.c +++ b/mg.c @@ -521,7 +521,8 @@ Perl_magic_regdata_cnt(pTHX_ SV *sv, MAGIC *mg) /* return the last filled */ while ( paren >= 0 - && (rx->startp[paren] == -1 || rx->endp[paren] == -1) ) + && (rx->offs[paren].start == -1 + || rx->offs[paren].end == -1) ) paren--; return (U32)paren; } @@ -544,8 +545,8 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg) if (paren < 0) return 0; if (paren <= (I32)rx->nparens && - (s = rx->startp[paren]) != -1 && - (t = rx->endp[paren]) != -1) + (s = rx->offs[paren].start) != -1 && + (t = rx->offs[paren].end) != -1) { register I32 i; if (mg->mg_obj) /* @+ */ @@ -592,8 +593,8 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) paren = atoi(mg->mg_ptr); /* $& is in [0] */ getparen: if (paren <= (I32)rx->nparens && - (s1 = rx->startp[paren]) != -1 && - (t1 = rx->endp[paren]) != -1) + (s1 = rx->offs[paren].start) != -1 && + (t1 = rx->offs[paren].end) != -1) { i = t1 - s1; getlen: @@ -636,8 +637,8 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) return 0; case '`': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if (rx->startp[0] != -1) { - i = rx->startp[0]; + if (rx->offs[0].start != -1) { + i = rx->offs[0].start; if (i > 0) { s1 = 0; t1 = i; @@ -648,10 +649,10 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) return 0; case '\'': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if (rx->endp[0] != -1) { - i = rx->sublen - rx->endp[0]; + if (rx->offs[0].end != -1) { + i = rx->sublen - rx->offs[0].end; if (i > 0) { - s1 = rx->endp[0]; + s1 = rx->offs[0].end; t1 = rx->sublen; goto getlen; } diff --git a/pp.c b/pp.c index b5c696c..173f81d 100644 --- a/pp.c +++ b/pp.c @@ -4762,7 +4762,7 @@ PP(pp_split) s = orig + (m - s); strend = s + (strend - m); } - m = rx->startp[0] + orig; + m = rx->offs[0].start + orig; dstr = newSVpvn(s, m-s); if (make_mortal) sv_2mortal(dstr); @@ -4772,8 +4772,8 @@ PP(pp_split) if (rx->nparens) { I32 i; for (i = 1; i <= (I32)rx->nparens; i++) { - s = rx->startp[i] + orig; - m = rx->endp[i] + orig; + s = rx->offs[i].start + orig; + m = rx->offs[i].end + orig; /* japhy (07/27/01) -- the (m && s) test doesn't catch parens that didn't match -- they should be set to @@ -4790,7 +4790,7 @@ PP(pp_split) XPUSHs(dstr); } } - s = rx->endp[0] + orig; + s = rx->offs[0].end + orig; } } diff --git a/pp_ctl.c b/pp_ctl.c index 124a40f..3d4992f 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -279,14 +279,14 @@ PP(pp_substcont) s = orig + (m - s); cx->sb_strend = s + (cx->sb_strend - m); } - cx->sb_m = m = rx->startp[0] + orig; + cx->sb_m = m = rx->offs[0].start + orig; if (m > s) { if (DO_UTF8(dstr) && !SvUTF8(cx->sb_targ)) sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv); else sv_catpvn(dstr, s, m-s); } - cx->sb_s = rx->endp[0] + orig; + cx->sb_s = rx->offs[0].end + orig; { /* Update the pos() information. */ SV * const sv = cx->sb_targ; MAGIC *mg; @@ -345,8 +345,8 @@ Perl_rxres_save(pTHX_ void **rsp, REGEXP *rx) *p++ = PTR2UV(rx->subbeg); *p++ = (UV)rx->sublen; for (i = 0; i <= rx->nparens; ++i) { - *p++ = (UV)rx->startp[i]; - *p++ = (UV)rx->endp[i]; + *p++ = (UV)rx->offs[i].start; + *p++ = (UV)rx->offs[i].end; } } @@ -373,8 +373,8 @@ Perl_rxres_restore(pTHX_ void **rsp, REGEXP *rx) rx->subbeg = INT2PTR(char*,*p++); rx->sublen = (I32)(*p++); for (i = 0; i <= rx->nparens; ++i) { - rx->startp[i] = (I32)(*p++); - rx->endp[i] = (I32)(*p++); + rx->offs[i].start = (I32)(*p++); + rx->offs[i].end = (I32)(*p++); } } diff --git a/pp_hot.c b/pp_hot.c index 10caecb..f01a3e0 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1246,19 +1246,19 @@ PP(pp_match) /* XXXX What part of this is needed with true \G-support? */ if ((global = dynpm->op_pmflags & PMf_GLOBAL)) { - rx->startp[0] = -1; + rx->offs[0].start = -1; if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) { MAGIC* const mg = mg_find(TARG, PERL_MAGIC_regex_global); if (mg && mg->mg_len >= 0) { if (!(rx->extflags & RXf_GPOS_SEEN)) - rx->endp[0] = rx->startp[0] = mg->mg_len; + rx->offs[0].end = rx->offs[0].start = mg->mg_len; else if (rx->extflags & RXf_ANCH_GPOS) { r_flags |= REXEC_IGNOREPOS; - rx->endp[0] = rx->startp[0] = mg->mg_len; + rx->offs[0].end = rx->offs[0].start = mg->mg_len; } else if (rx->extflags & RXf_GPOS_FLOAT) gpos = mg->mg_len; else - rx->endp[0] = rx->startp[0] = mg->mg_len; + rx->offs[0].end = rx->offs[0].start = mg->mg_len; minmatch = (mg->mg_flags & MGf_MINMATCH) ? rx->gofs + 1 : 0; update_minmatch = 0; } @@ -1274,8 +1274,8 @@ PP(pp_match) r_flags |= REXEC_SCREAM; play_it_again: - if (global && rx->startp[0] != -1) { - t = s = rx->endp[0] + truebase - rx->gofs; + if (global && rx->offs[0].start != -1) { + t = s = rx->offs[0].end + truebase - rx->gofs; if ((s + rx->minlen) > strend || s < truebase) goto nope; if (update_minmatch++) @@ -1322,10 +1322,10 @@ play_it_again: EXTEND_MORTAL(nparens + i); for (i = !i; i <= nparens; i++) { PUSHs(sv_newmortal()); - if ((rx->startp[i] != -1) && rx->endp[i] != -1 ) { - const I32 len = rx->endp[i] - rx->startp[i]; - s = rx->startp[i] + truebase; - if (rx->endp[i] < 0 || rx->startp[i] < 0 || + if ((rx->offs[i].start != -1) && rx->offs[i].end != -1 ) { + const I32 len = rx->offs[i].end - rx->offs[i].start; + s = rx->offs[i].start + truebase; + if (rx->offs[i].end < 0 || rx->offs[i].start < 0 || len < 0 || len > strend - s) DIE(aTHX_ "panic: pp_match start/end pointers"); sv_setpvn(*SP, s, len); @@ -1346,16 +1346,17 @@ play_it_again: mg = sv_magicext(TARG, NULL, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); } - if (rx->startp[0] != -1) { - mg->mg_len = rx->endp[0]; - if (rx->startp[0] + rx->gofs == (UV)rx->endp[0]) + if (rx->offs[0].start != -1) { + mg->mg_len = rx->offs[0].end; + if (rx->offs[0].start + rx->gofs == (UV)rx->offs[0].end) mg->mg_flags |= MGf_MINMATCH; else mg->mg_flags &= ~MGf_MINMATCH; } } - had_zerolen = (rx->startp[0] != -1 - && rx->startp[0] + rx->gofs == (UV)rx->endp[0]); + had_zerolen = (rx->offs[0].start != -1 + && (rx->offs[0].start + rx->gofs + == (UV)rx->offs[0].end)); PUTBACK; /* EVAL blocks may use stack */ r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST; goto play_it_again; @@ -1380,9 +1381,9 @@ play_it_again: mg = sv_magicext(TARG, NULL, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); } - if (rx->startp[0] != -1) { - mg->mg_len = rx->endp[0]; - if (rx->startp[0] + rx->gofs == (UV)rx->endp[0]) + if (rx->offs[0].start != -1) { + mg->mg_len = rx->offs[0].end; + if (rx->offs[0].start + rx->gofs == (UV)rx->offs[0].end) mg->mg_flags |= MGf_MINMATCH; else mg->mg_flags &= ~MGf_MINMATCH; @@ -1406,13 +1407,13 @@ yup: /* Confirmed by INTUIT */ if (global) { /* FIXME - should rx->subbeg be const char *? */ rx->subbeg = (char *) truebase; - rx->startp[0] = s - truebase; + rx->offs[0].start = s - truebase; if (RX_MATCH_UTF8(rx)) { char * const t = (char*)utf8_hop((U8*)s, rx->minlenret); - rx->endp[0] = t - truebase; + rx->offs[0].end = t - truebase; } else { - rx->endp[0] = s - truebase + rx->minlenret; + rx->offs[0].end = s - truebase + rx->minlenret; } rx->sublen = strend - truebase; goto gotcha; @@ -1441,12 +1442,12 @@ yup: /* Confirmed by INTUIT */ } rx->sublen = strend - t; RX_MATCH_COPIED_on(rx); - off = rx->startp[0] = s - t; - rx->endp[0] = off + rx->minlenret; + off = rx->offs[0].start = s - t; + rx->offs[0].end = off + rx->minlenret; } else { /* startp/endp are used by @- @+. */ - rx->startp[0] = s - truebase; - rx->endp[0] = s - truebase + rx->minlenret; + rx->offs[0].start = s - truebase; + rx->offs[0].end = s - truebase + rx->minlenret; } /* including rx->nparens in the below code seems highly suspicious. -dmq */ @@ -2135,8 +2136,8 @@ PP(pp_subst) SvSCREAM_off(TARG); /* disable possible screamer */ if (once) { rxtainted |= RX_MATCH_TAINTED(rx); - m = orig + rx->startp[0]; - d = orig + rx->endp[0]; + m = orig + rx->offs[0].start; + d = orig + rx->offs[0].end; s = orig; if (m - s > strend - d) { /* faster to shorten from end */ if (clen) { @@ -2178,7 +2179,7 @@ PP(pp_subst) if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); rxtainted |= RX_MATCH_TAINTED(rx); - m = rx->startp[0] + orig; + m = rx->offs[0].start + orig; if ((i = m - s)) { if (s != d) Move(s, d, i, char); @@ -2188,7 +2189,7 @@ PP(pp_subst) Copy(c, d, clen, char); d += clen; } - s = rx->endp[0] + orig; + s = rx->offs[0].end + orig; } while (CALLREGEXEC(rx, s, strend, orig, s == m, TARG, NULL, /* don't match same null twice */ @@ -2251,12 +2252,12 @@ PP(pp_subst) s = orig + (m - s); strend = s + (strend - m); } - m = rx->startp[0] + orig; + m = rx->offs[0].start + orig; if (doutf8 && !SvUTF8(dstr)) sv_catpvn_utf8_upgrade(dstr, s, m - s, nsv); else sv_catpvn(dstr, s, m-s); - s = rx->endp[0] + orig; + s = rx->offs[0].end + orig; if (clen) sv_catpvn(dstr, c, clen); if (once) diff --git a/regcomp.c b/regcomp.c index c58a784..3519c8d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4695,8 +4695,7 @@ reStudy: ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan ); } } - Newxz(r->startp, RExC_npar * 2, I32); - r->endp = r->startp + RExC_npar; + Newxz(r->offs, RExC_npar, regexp_paren_pair); /* assume we don't need to swap parens around before we match */ DEBUG_DUMP_r({ @@ -4739,8 +4738,8 @@ Perl_reg_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags) I32 *nums=(I32*)SvPVX(sv_dat); for ( i=0; inparens) >= nums[i] - && rx->startp[nums[i]] != -1 - && rx->endp[nums[i]] != -1) + && rx->offs[nums[i]].start != -1 + && rx->offs[nums[i]].end != -1) { ret = CALLREG_NUMBUF(rx,nums[i],NULL); if (!retarray) @@ -4773,21 +4772,21 @@ Perl_reg_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv) return sv; } else - if (paren == -2 && rx->startp[0] != -1) { + if (paren == -2 && rx->offs[0].start != -1) { /* $` */ - i = rx->startp[0]; + i = rx->offs[0].start; s = rx->subbeg; } else - if (paren == -1 && rx->endp[0] != -1) { + if (paren == -1 && rx->offs[0].end != -1) { /* $' */ - s = rx->subbeg + rx->endp[0]; - i = rx->sublen - rx->endp[0]; + s = rx->subbeg + rx->offs[0].end; + i = rx->sublen - rx->offs[0].end; } else if ( 0 <= paren && paren <= (I32)rx->nparens && - (s1 = rx->startp[paren]) != -1 && - (t1 = rx->endp[paren]) != -1) + (s1 = rx->offs[paren].start) != -1 && + (t1 = rx->offs[paren].end) != -1) { /* $& $1 ... */ i = t1 - s1; @@ -8686,11 +8685,8 @@ Perl_pregfree(pTHX_ struct regexp *r) if (r->saved_copy) SvREFCNT_dec(r->saved_copy); #endif - if (r->swap) { - Safefree(r->swap->startp); - Safefree(r->swap); - } - Safefree(r->startp); + Safefree(r->swap); + Safefree(r->offs); Safefree(r); } @@ -8718,9 +8714,8 @@ Perl_reg_temp_copy (pTHX_ struct regexp *r) { (void)ReREFCNT_inc(r); Newx(ret, 1, regexp); StructCopy(r, ret, regexp); - Newx(ret->startp, npar * 2, I32); - Copy(r->startp, ret->startp, npar * 2, I32); - ret->endp = ret->startp + npar; + Newx(ret->offs, npar, regexp_paren_pair); + Copy(r->offs, ret->offs, npar, regexp_paren_pair); ret->refcnt = 1; if (r->substrs) { Newx(ret->substrs, 1, struct reg_substr_data); @@ -8911,14 +8906,11 @@ Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param) npar = r->nparens+1; Newx(ret, 1, regexp); StructCopy(r, ret, regexp); - Newx(ret->startp, npar * 2, I32); - Copy(r->startp, ret->startp, npar * 2, I32); - ret->endp = ret->startp + npar; + Newx(ret->offs, npar, regexp_paren_pair); + Copy(r->offs, ret->offs, npar, regexp_paren_pair); if(ret->swap) { - Newx(ret->swap, 1, regexp_paren_ofs); /* no need to copy these */ - Newx(ret->swap->startp, npar * 2, I32); - ret->swap->endp = ret->swap->startp + npar; + Newx(ret->swap, npar, regexp_paren_pair); } if (ret->substrs) { diff --git a/regexec.c b/regexec.c index e382a2a..b9ce5f9 100644 --- a/regexec.c +++ b/regexec.c @@ -184,25 +184,24 @@ S_regcppush(pTHX_ I32 parenfloor) if (paren_elems_to_push < 0) Perl_croak(aTHX_ "panic: paren_elems_to_push < 0"); -#define REGCP_OTHER_ELEMS 8 +#define REGCP_OTHER_ELEMS 7 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS); for (p = PL_regsize; p > parenfloor; p--) { /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ - SSPUSHINT(PL_regendp[p]); - SSPUSHINT(PL_regstartp[p]); + SSPUSHINT(PL_regoffs[p].end); + SSPUSHINT(PL_regoffs[p].start); SSPUSHPTR(PL_reg_start_tmp[p]); SSPUSHINT(p); DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", - (UV)p, (IV)PL_regstartp[p], + (UV)p, (IV)PL_regoffs[p].start, (IV)(PL_reg_start_tmp[p] - PL_bostr), - (IV)PL_regendp[p] + (IV)PL_regoffs[p].end )); } /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */ - SSPUSHPTR(PL_regstartp); - SSPUSHPTR(PL_regendp); + SSPUSHPTR(PL_regoffs); SSPUSHINT(PL_regsize); SSPUSHINT(*PL_reglastparen); SSPUSHINT(*PL_reglastcloseparen); @@ -249,8 +248,7 @@ S_regcppop(pTHX_ const regexp *rex) *PL_reglastcloseparen = SSPOPINT; *PL_reglastparen = SSPOPINT; PL_regsize = SSPOPINT; - PL_regendp=(I32 *) SSPOPPTR; - PL_regstartp=(I32 *) SSPOPPTR; + PL_regoffs=(regexp_paren_pair *) SSPOPPTR; /* Now restore the parentheses context. */ @@ -259,16 +257,16 @@ S_regcppop(pTHX_ const regexp *rex) I32 tmps; U32 paren = (U32)SSPOPINT; PL_reg_start_tmp[paren] = (char *) SSPOPPTR; - PL_regstartp[paren] = SSPOPINT; + PL_regoffs[paren].start = SSPOPINT; tmps = SSPOPINT; if (paren <= *PL_reglastparen) - PL_regendp[paren] = tmps; + PL_regoffs[paren].end = tmps; DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", - (UV)paren, (IV)PL_regstartp[paren], + (UV)paren, (IV)PL_regoffs[paren].start, (IV)(PL_reg_start_tmp[paren] - PL_bostr), - (IV)PL_regendp[paren], + (IV)PL_regoffs[paren].end, (paren > *PL_reglastparen ? "(no)" : "")); ); } @@ -292,8 +290,8 @@ S_regcppop(pTHX_ const regexp *rex) * --jhi */ for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) { if (i > PL_regsize) - PL_regstartp[i] = -1; - PL_regendp[i] = -1; + PL_regoffs[i].start = -1; + PL_regoffs[i].end = -1; } #endif return input; @@ -1651,7 +1649,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, static void S_swap_match_buff (pTHX_ regexp *prog) { - I32 *t; + regexp_paren_pair *t; if (!prog->swap) { /* We have to be careful. If the previous successful match @@ -1661,17 +1659,11 @@ S_swap_match_buff (pTHX_ regexp *prog) { to the re, and switch the buffer each match. If we fail we switch it back, otherwise we leave it swapped. */ - Newxz(prog->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newxz(prog->swap->startp, 2 * (prog->nparens + 1), I32); - prog->swap->endp = prog->swap->startp + prog->nparens + 1; + Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair); } - t = prog->swap->startp; - prog->swap->startp = prog->startp; - prog->startp = t; - t = prog->swap->endp; - prog->swap->endp = prog->endp; - prog->endp = t; + t = prog->swap; + prog->swap = prog->offs; + prog->offs = t; } @@ -2225,15 +2217,14 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */ } DEBUG_EXECUTE_r(PL_reg_starttry = *startpos); - prog->startp[0] = *startpos - PL_bostr; + prog->offs[0].start = *startpos - PL_bostr; PL_reginput = *startpos; PL_reglastparen = &prog->lastparen; PL_reglastcloseparen = &prog->lastcloseparen; prog->lastparen = 0; prog->lastcloseparen = 0; PL_regsize = 0; - PL_regstartp = prog->startp; - PL_regendp = prog->endp; + PL_regoffs = prog->offs; if (PL_reg_start_tmpl <= prog->nparens) { PL_reg_start_tmpl = prog->nparens*3/2 + 3; if(PL_reg_start_tmp) @@ -2258,18 +2249,18 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) * --jhi */ #if 1 if (prog->nparens) { - I32 *sp = PL_regstartp; - I32 *ep = PL_regendp; + regexp_paren_pair *pp = PL_regoffs; register I32 i; for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) { - *++sp = -1; - *++ep = -1; + ++pp; + pp->start = -1; + pp->end = -1; } } #endif REGCP_SET(lastcp); if (regmatch(reginfo, progi->program + 1)) { - PL_regendp[0] = PL_reginput - PL_bostr; + PL_regoffs[0].end = PL_reginput - PL_bostr; return 1; } if (reginfo->cutpoint) @@ -2600,7 +2591,7 @@ S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) { I32 *nums=(I32*)SvPVX(sv_dat); for ( n=0; n= nums[n] && - PL_regendp[nums[n]] != -1) + PL_regoffs[nums[n]].end != -1) { return nums[n]; } @@ -2757,14 +2748,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) case KEEPS: /* update the startpoint */ - st->u.keeper.val = PL_regstartp[0]; + st->u.keeper.val = PL_regoffs[0].start; PL_reginput = locinput; - PL_regstartp[0] = locinput - PL_bostr; + PL_regoffs[0].start = locinput - PL_bostr; PUSH_STATE_GOTO(KEEPS_next, next); /*NOT-REACHED*/ case KEEPS_next_fail: /* rollback the start point change */ - PL_regstartp[0] = st->u.keeper.val; + PL_regoffs[0].start = st->u.keeper.val; sayNO_SILENT; /*NOT-REACHED*/ case EOL: @@ -2983,7 +2974,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) if ( ST.jump) { REGCP_UNWIND(ST.cp); for (n = *PL_reglastparen; n > ST.lastparen; n--) - PL_regendp[n] = -1; + PL_regoffs[n].end = -1; *PL_reglastparen = n; } trie_first_try: @@ -3498,17 +3489,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) n = ARG(scan); /* which paren pair */ type = OP(scan); do_ref: - ln = PL_regstartp[n]; + ln = PL_regoffs[n].start; PL_reg_leftiter = PL_reg_maxiter; /* Void cache */ if (*PL_reglastparen < n || ln == -1) sayNO; /* Do not match unless seen CLOSEn. */ - if (ln == PL_regendp[n]) + if (ln == PL_regoffs[n].end) break; s = PL_bostr + ln; if (do_utf8 && type != REF) { /* REF can do byte comparison */ char *l = locinput; - const char *e = PL_bostr + PL_regendp[n]; + const char *e = PL_bostr + PL_regoffs[n].end; /* * Note that we can't do the "other character" lookup trick as * in the 8-bit case (no pun intended) because in Unicode we @@ -3541,7 +3532,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) (UCHARAT(s) != (type == REFF ? PL_fold : PL_fold_locale)[nextchr]))) sayNO; - ln = PL_regendp[n] - ln; + ln = PL_regoffs[n].end - ln; if (locinput + ln > PL_regeol) sayNO; if (ln > 1 && (type == REF @@ -3612,7 +3603,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) ); PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]); - PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr; + PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr; if (sv_yes_mark) { SV *sv_mrk = get_sv("REGMARK", 1); @@ -3698,8 +3689,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ST.cp = regcppush(0); /* Save *all* the positions. */ REGCP_SET(ST.lastcp); - PL_regstartp = re->startp; /* essentially NOOP on GOSUB */ - PL_regendp = re->endp; /* essentially NOOP on GOSUB */ + PL_regoffs = re->offs; /* essentially NOOP on GOSUB */ *PL_reglastparen = 0; *PL_reglastcloseparen = 0; @@ -3777,8 +3767,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) break; case CLOSE: n = ARG(scan); /* which paren pair */ - PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; - PL_regendp[n] = locinput - PL_bostr; + PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr; + PL_regoffs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ if (n > *PL_reglastparen) @@ -3798,8 +3788,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) if ( OP(cursor)==CLOSE ){ n = ARG(cursor); if ( n <= lastopen ) { - PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; - PL_regendp[n] = locinput - PL_bostr; + PL_regoffs[n].start + = PL_reg_start_tmp[n] - PL_bostr; + PL_regoffs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ if (n > *PL_reglastparen) @@ -3816,7 +3807,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /*NOTREACHED*/ case GROUPP: n = ARG(scan); /* which paren pair */ - sw = (bool)(*PL_reglastparen >= n && PL_regendp[n] != -1); + sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1); break; case NGROUPP: /* reg_check_named_buff_matched returns 0 for no match */ @@ -4234,7 +4225,7 @@ NULL } REGCP_UNWIND(ST.cp); for (n = *PL_reglastparen; n > ST.lastparen; n--) - PL_regendp[n] = -1; + PL_regoffs[n].end = -1; *PL_reglastparen = n; /*dmq: *PL_reglastcloseparen = n; */ scan = ST.next_branch; @@ -4399,13 +4390,13 @@ NULL /* mark current A as captured */ I32 paren = ST.me->flags; if (ST.count) { - PL_regstartp[paren] + PL_regoffs[paren].start = HOPc(PL_reginput, -ST.alen) - PL_bostr; - PL_regendp[paren] = PL_reginput - PL_bostr; + PL_regoffs[paren].end = PL_reginput - PL_bostr; /*dmq: *PL_reglastcloseparen = paren; */ } else - PL_regendp[paren] = -1; + PL_regoffs[paren].end = -1; if (cur_eval && cur_eval->u.eval.close_paren && cur_eval->u.eval.close_paren == (U32)ST.me->flags) { @@ -4439,12 +4430,12 @@ NULL #define CURLY_SETPAREN(paren, success) \ if (paren) { \ if (success) { \ - PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \ - PL_regendp[paren] = locinput - PL_bostr; \ + PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \ + PL_regoffs[paren].end = locinput - PL_bostr; \ *PL_reglastcloseparen = paren; \ } \ else \ - PL_regendp[paren] = -1; \ + PL_regoffs[paren].end = -1; \ } case STAR: /* /A*B/ where A is width 1 */ @@ -4617,7 +4608,7 @@ NULL case CURLY_B_min_known_fail: /* failed to find B in a non-greedy match where c1,c2 valid */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; PL_reginput = locinput; /* Could be reset... */ REGCP_UNWIND(ST.cp); @@ -4695,7 +4686,7 @@ NULL case CURLY_B_min_fail: /* failed to find B in a non-greedy match where c1,c2 invalid */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; REGCP_UNWIND(ST.cp); /* failed -- move forward one */ @@ -4742,7 +4733,7 @@ NULL case CURLY_B_max_fail: /* failed to find B in a greedy match */ if (ST.paren && ST.count) - PL_regendp[ST.paren] = -1; + PL_regoffs[ST.paren].end = -1; REGCP_UNWIND(ST.cp); /* back up. */ diff --git a/regexp.h b/regexp.h index 9cf324f..e17b2c9 100644 --- a/regexp.h +++ b/regexp.h @@ -50,11 +50,10 @@ struct reg_substr_data { #define SV_SAVED_COPY #endif -/* swap buffer for paren structs */ -typedef struct regexp_paren_ofs { - I32 *startp; - I32 *endp; -} regexp_paren_ofs; +typedef struct regexp_paren_pair { + I32 start; + I32 end; +} regexp_paren_pair; /* this is ordered such that the most commonly used fields are at the start of the struct */ @@ -80,9 +79,8 @@ typedef struct regexp { /* Data about the last/current match. These are modified during matching*/ U32 lastparen; /* last open paren matched */ U32 lastcloseparen; /* last close paren matched */ - regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ - I32 *startp; /* Array of offsets from start of string (@-) */ - I32 *endp; /* Array of offsets from start of string (@+) */ + regexp_paren_pair *swap; /* Swap copy of *offs */ + regexp_paren_pair *offs; /* Array of offsets for (@-) and (@+) */ char *subbeg; /* saved or original string so \digit works forever. */ @@ -469,8 +467,7 @@ typedef struct regmatch_slab { #define PL_bostr PL_reg_state.re_state_bostr #define PL_reginput PL_reg_state.re_state_reginput #define PL_regeol PL_reg_state.re_state_regeol -#define PL_regstartp PL_reg_state.re_state_regstartp -#define PL_regendp PL_reg_state.re_state_regendp +#define PL_regoffs PL_reg_state.re_state_regoffs #define PL_reglastparen PL_reg_state.re_state_reglastparen #define PL_reglastcloseparen PL_reg_state.re_state_reglastcloseparen #define PL_reg_start_tmp PL_reg_state.re_state_reg_start_tmp @@ -496,8 +493,7 @@ struct re_save_state { char *re_state_bostr; char *re_state_reginput; /* String-input pointer. */ char *re_state_regeol; /* End of input, for $ check. */ - I32 *re_state_regstartp; /* Pointer to startp array. */ - I32 *re_state_regendp; /* Ditto for endp. */ + regexp_paren_pair *re_state_regoffs; /* Pointer to start/end pairs */ U32 *re_state_reglastparen; /* Similarly for lastparen. */ U32 *re_state_reglastcloseparen; /* Similarly for lastcloseparen. */ char **re_state_reg_start_tmp; /* from regexec.c */ diff --git a/sv.c b/sv.c index c10eb93..88dcd96 100644 --- a/sv.c +++ b/sv.c @@ -10616,10 +10616,9 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) = pv_dup(old_state->re_state_reginput); new_state->re_state_regeol = pv_dup(old_state->re_state_regeol); - new_state->re_state_regstartp - = (I32*) any_dup(old_state->re_state_regstartp, proto_perl); - new_state->re_state_regendp - = (I32*) any_dup(old_state->re_state_regendp, proto_perl); + new_state->re_state_regoffs + = (regexp_paren_pair*) + any_dup(old_state->re_state_regoffs, proto_perl); new_state->re_state_reglastparen = (U32*) any_dup(old_state->re_state_reglastparen, proto_perl); diff --git a/universal.c b/universal.c index 6fdf8b9..adfddb5 100644 --- a/universal.c +++ b/universal.c @@ -1209,8 +1209,8 @@ XS(XS_re_regnames) I32 *nums = (I32*)SvPVX(sv_dat); for ( i = 0; i < SvIVX(sv_dat); i++ ) { if ((I32)(re->lastcloseparen) >= nums[i] && - re->startp[nums[i]] != -1 && - re->endp[nums[i]] != -1) + re->offs[nums[i]].start != -1 && + re->offs[nums[i]].end != -1) { parno = nums[i]; break; @@ -1291,8 +1291,8 @@ XS(XS_re_regnames_iternext) I32 *nums = (I32*)SvPVX(sv_dat); for ( i = 0; i < SvIVX(sv_dat); i++ ) { if ((I32)(re->lastcloseparen) >= nums[i] && - re->startp[nums[i]] != -1 && - re->endp[nums[i]] != -1) + re->offs[nums[i]].start != -1 && + re->offs[nums[i]].end != -1) { parno = nums[i]; break; -- 1.8.3.1