**** Alterations to Henry's code are...
****
**** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
+ **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
I32 utf8;
HV *charnames; /* cache of named sequences */
HV *paren_names; /* Paren names */
+
regnode **recurse; /* Recurse regops */
I32 recurse_count; /* Number of recurse regops */
#if ADD_TO_REGEXEC
#ifdef DEBUGGING
const char *lastparse;
I32 lastnum;
+ AV *paren_name_list; /* idx -> name */
#define RExC_lastparse (pRExC_state->lastparse)
#define RExC_lastnum (pRExC_state->lastnum)
+#define RExC_paren_name_list (pRExC_state->paren_name_list)
#endif
} RExC_state_t;
#define RExC_recurse (pRExC_state->recurse)
#define RExC_recurse_count (pRExC_state->recurse_count)
+
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
((*s) == '{' && regcurly(s)))
\
if ( noper_next < tail ) { \
if (!trie->jump) \
- trie->jump = PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \
+ trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \
trie->jump[curword] = (U16)(noper_next - convert); \
if (!jumper) \
jumper = noper_next; \
/* we only allocate the nextword buffer when there */\
/* a dupe, so first time we have to do the allocation */\
if (!trie->nextword) \
- trie->nextword = \
+ trie->nextword = (U16 *) \
PerlMemShared_calloc( word_count + 1, sizeof(U16)); \
while ( trie->nextword[dupe] ) \
dupe= trie->nextword[dupe]; \
PERL_UNUSED_ARG(depth);
#endif
- trie = PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
+ trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
trie->refcount = 1;
trie->startstate = 1;
trie->wordcount = word_count;
RExC_rxi->data->data[ data_slot ] = (void*)trie;
- trie->charmap = PerlMemShared_calloc( 256, sizeof(U16) );
+ trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
if (!(UTF && folder))
- trie->bitmap = PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
+ trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
DEBUG_r({
trie_words = newAV();
});
(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
(int)trie->minlen, (int)trie->maxlen )
);
- trie->wordlen = PerlMemShared_calloc( word_count, sizeof(U32) );
+ trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) );
/*
We now know what we are dealing with in terms of unique chars and
"%*sCompiling trie using list compiler\n",
(int)depth * 2 + 2, ""));
- trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
- sizeof(reg_trie_state) );
+ trie->states = (reg_trie_state *)
+ PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
TRIE_LIST_NEW(1);
next_alloc = 2;
/* next alloc is the NEXT state to be allocated */
trie->statecount = next_alloc;
- trie->states = PerlMemShared_realloc( trie->states, next_alloc
- * sizeof(reg_trie_state) );
+ trie->states = (reg_trie_state *)
+ PerlMemShared_realloc( trie->states,
+ next_alloc
+ * sizeof(reg_trie_state) );
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
depth+1)
);
- trie->trans
- = PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
{
U32 state;
U32 tp = 0;
}
if ( transcount < tp + maxid - minid + 1) {
transcount *= 2;
- trie->trans
- = PerlMemShared_realloc( trie->trans,
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_realloc( trie->trans,
transcount
* sizeof(reg_trie_trans) );
Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
"%*sCompiling trie using table compiler\n",
(int)depth * 2 + 2, ""));
- trie->trans = PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
- * trie->uniquecharcount + 1,
- sizeof(reg_trie_trans) );
- trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
- sizeof(reg_trie_state) );
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
+ * trie->uniquecharcount + 1,
+ sizeof(reg_trie_trans) );
+ trie->states = (reg_trie_state *)
+ PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
next_alloc = trie->uniquecharcount + 1;
}
}
trie->lasttrans = pos + 1;
- trie->states = PerlMemShared_realloc( trie->states, laststate
- * sizeof(reg_trie_state) );
+ trie->states = (reg_trie_state *)
+ PerlMemShared_realloc( trie->states, laststate
+ * sizeof(reg_trie_state) );
DEBUG_TRIE_COMPILE_MORE_r(
PerlIO_printf( Perl_debug_log,
"%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
(UV)trie->lasttrans)
);
/* resize the trans array to remove unused space */
- trie->trans = PerlMemShared_realloc( trie->trans, trie->lasttrans
- * sizeof(reg_trie_trans) );
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_realloc( trie->trans, trie->lasttrans
+ * sizeof(reg_trie_trans) );
/* and now dump out the compressed format */
DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
ARG_SET( stclass, data_slot );
- aho = PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
+ aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
RExC_rxi->data->data[ data_slot ] = (void*)aho;
aho->trie=trie_offset;
aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
Copy( trie->states, aho->states, numstates, reg_trie_state );
Newxz( q, numstates, U32);
- aho->fail = PerlMemShared_calloc( numstates, sizeof(U32) );
+ aho->fail = (U32 *) PerlMemShared_calloc( numstates, sizeof(U32) );
aho->refcount = 1;
fail = aho->fail;
/* initialize fail[0..1] to be 1 so that we always have
extern const struct regexp_engine my_reg_engine;
#define RE_ENGINE_PTR &my_reg_engine
#endif
-/* these make a few things look better, to avoid indentation */
-#define BEGIN_BLOCK {
-#define END_BLOCK }
-
+
+#ifndef PERL_IN_XSUB_RE
regexp *
Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
{
dVAR;
- GET_RE_DEBUG_FLAGS_DECL;
- DEBUG_r(if (!PL_colorset) reginitcolors());
-#ifndef PERL_IN_XSUB_RE
- BEGIN_BLOCK
+ HV * const table = GvHV(PL_hintgv);
/* Dispatch a request to compile a regexp to correct
regexp engine. */
- HV * const table = GvHV(PL_hintgv);
if (table) {
SV **ptr= hv_fetchs(table, "regcomp", FALSE);
+ GET_RE_DEBUG_FLAGS_DECL;
if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
DEBUG_COMPILE_r({
return CALLREGCOMP_ENG(eng, exp, xend, pm);
}
}
- END_BLOCK
+ return Perl_re_compile(aTHX_ exp, xend, pm);
+}
#endif
- BEGIN_BLOCK
+
+regexp *
+Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
+{
+ dVAR;
register regexp *r;
register regexp_internal *ri;
regnode *scan;
int restudied= 0;
RExC_state_t copyRExC_state;
#endif
+ GET_RE_DEBUG_FLAGS_DECL;
+ DEBUG_r(if (!PL_colorset) reginitcolors());
+
if (exp == NULL)
FAIL("NULL regexp argument");
RExC_close_parens = NULL;
RExC_opend = NULL;
RExC_paren_names = NULL;
+#ifdef DEBUGGING
+ RExC_paren_name_list = NULL;
+#endif
RExC_recurse = NULL;
RExC_recurse_count = 0;
regnode *trie_op;
/* this can happen only on restudy */
if ( OP(first) == TRIE ) {
- struct regnode_1 *trieop =
+ struct regnode_1 *trieop = (struct regnode_1 *)
PerlMemShared_calloc(1, sizeof(struct regnode_1));
StructCopy(first,trieop,struct regnode_1);
trie_op=(regnode *)trieop;
} else {
- struct regnode_charclass *trieop =
+ struct regnode_charclass *trieop = (struct regnode_charclass *)
PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
StructCopy(first,trieop,struct regnode_charclass);
trie_op=(regnode *)trieop;
r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
else
r->paren_names = NULL;
-
+ if (r->prelen == 3 && strEQ("\\s+", r->precomp))
+ r->extflags |= RXf_WHITE;
+ else if (r->prelen == 1 && r->precomp[0] == '^')
+ r->extflags |= RXf_START_ONLY;
+
+#ifdef DEBUGGING
+ if (RExC_paren_names) {
+ ri->name_list_idx = add_data( pRExC_state, 1, "p" );
+ ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
+ } else
+#endif
+ ri->name_list_idx = 0;
+
if (RExC_recurse_count) {
for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
const regnode *scan = RExC_recurse[RExC_recurse_count-1];
PerlIO_printf(Perl_debug_log, "\n");
});
return(r);
- END_BLOCK
}
#undef CORE_ONLY_BLOCK
-#undef END_BLOCK
#undef RE_ENGINE_PTR
#ifndef PERL_IN_XSUB_RE
SV*
-Perl_reg_named_buff_sv(pTHX_ SV* namesv)
+Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags)
{
- I32 parno = 0; /* no match */
- if (PL_curpm) {
- const REGEXP * const rx = PM_GETRE(PL_curpm);
+ AV *retarray = NULL;
+ SV *ret;
+ if (flags & 1)
+ retarray=newAV();
+
+ if (from_re || PL_curpm) {
+ const REGEXP * const rx = from_re ? from_re : PM_GETRE(PL_curpm);
if (rx && rx->paren_names) {
HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 );
if (he_str) {
if ((I32)(rx->lastparen) >= nums[i] &&
rx->endp[nums[i]] != -1)
{
- parno = nums[i];
- break;
+ ret = reg_numbered_buff_get(nums[i],rx,NULL,0);
+ if (!retarray)
+ return ret;
+ } else {
+ ret = newSVsv(&PL_sv_undef);
+ }
+ if (retarray) {
+ SvREFCNT_inc(ret);
+ av_push(retarray, ret);
}
}
+ if (retarray)
+ return (SV*)retarray;
}
}
}
- if ( !parno ) {
- return 0;
+ return NULL;
+}
+
+SV*
+Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags)
+{
+ char *s = NULL;
+ I32 i = 0;
+ I32 s1, t1;
+ SV *sv = usesv ? usesv : newSVpvs("");
+ PERL_UNUSED_ARG(flags);
+
+ if (!rx->subbeg) {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ else
+ if (paren == -2 && rx->startp[0] != -1) {
+ /* $` */
+ i = rx->startp[0];
+ s = rx->subbeg;
+ }
+ else
+ if (paren == -1 && rx->endp[0] != -1) {
+ /* $' */
+ s = rx->subbeg + rx->endp[0];
+ i = rx->sublen - rx->endp[0];
+ }
+ else
+ if ( 0 <= paren && paren <= (I32)rx->nparens &&
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
+ {
+ /* $& $1 ... */
+ i = t1 - s1;
+ s = rx->subbeg + s1;
+ } else {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ assert(rx->sublen >= (s - rx->subbeg) + i );
+ if (i >= 0) {
+ const int oldtainted = PL_tainted;
+ TAINT_NOT;
+ sv_setpvn(sv, s, i);
+ PL_tainted = oldtainted;
+ if ( (rx->extflags & RXf_CANY_SEEN)
+ ? (RX_MATCH_UTF8(rx)
+ && (!i || is_utf8_string((U8*)s, i)))
+ : (RX_MATCH_UTF8(rx)) )
+ {
+ SvUTF8_on(sv);
+ }
+ else
+ SvUTF8_off(sv);
+ if (PL_tainting) {
+ if (RX_MATCH_TAINTED(rx)) {
+ if (SvTYPE(sv) >= SVt_PVMG) {
+ MAGIC* const mg = SvMAGIC(sv);
+ MAGIC* mgt;
+ PL_tainted = 1;
+ SvMAGIC_set(sv, mg->mg_moremagic);
+ SvTAINT(sv);
+ if ((mgt = SvMAGIC(sv))) {
+ mg->mg_moremagic = mgt;
+ SvMAGIC_set(sv, mg);
+ }
+ } else {
+ PL_tainted = 1;
+ SvTAINT(sv);
+ }
+ } else
+ SvTAINTED_off(sv);
+ }
} else {
- GV *gv_paren;
- SV *sv= sv_newmortal();
- Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno);
- gv_paren= Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV);
- return GvSVn(gv_paren);
+ sv_setsv(sv,&PL_sv_undef);
}
+ return sv;
}
#endif
STATIC SV*
S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
char *name_start = RExC_parse;
- if ( UTF ) {
- STRLEN numlen;
- while( isIDFIRST_uni(utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse, &numlen, UTF8_ALLOW_DEFAULT)))
- {
- RExC_parse += numlen;
- }
- } else {
- while( isIDFIRST(*RExC_parse) )
- RExC_parse++;
+
+ if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
+ /* skip IDFIRST by using do...while */
+ if (UTF)
+ do {
+ RExC_parse += UTF8SKIP(RExC_parse);
+ } while (isALNUM_utf8((U8*)RExC_parse));
+ else
+ do {
+ RExC_parse++;
+ } while (isALNUM(*RExC_parse));
}
+
if ( flags ) {
SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
(int)(RExC_parse - name_start)));
return ret;
} else
if (*RExC_parse == '?') { /* (?...) */
- U32 posflags = 0, negflags = 0;
- U32 *flagsp = &posflags;
bool is_logical = 0;
const char * const seqstart = RExC_parse;
ret = NULL; /* For look-ahead/behind. */
switch (paren) {
+ case 'P': /* (?P...) variants for those used to PCRE/Python */
+ paren = *RExC_parse++;
+ if ( paren == '<') /* (?P<...>) named capture */
+ goto named_capture;
+ else if (paren == '>') { /* (?P>name) named recursion */
+ goto named_recursion;
+ }
+ else if (paren == '=') { /* (?P=...) named backref */
+ /* this pretty much dupes the code for \k<NAME> in regatom(), if
+ you change this make sure you change that */
+ char* name_start = RExC_parse;
+ U32 num = 0;
+ SV *sv_dat = reg_scan_name(pRExC_state,
+ SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
+ if (RExC_parse == name_start || *RExC_parse != ')')
+ vFAIL2("Sequence %.3s... not terminated",parse_start);
+
+ if (!SIZE_ONLY) {
+ num = add_data( pRExC_state, 1, "S" );
+ RExC_rxi->data->data[num]=(void*)sv_dat;
+ SvREFCNT_inc(sv_dat);
+ }
+ RExC_sawback = 1;
+ ret = reganode(pRExC_state,
+ (U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
+ num);
+ *flagp |= HASWIDTH;
+
+ Set_Node_Offset(ret, parse_start+1);
+ Set_Node_Cur_Length(ret); /* MJD */
+
+ nextchar(pRExC_state);
+ return ret;
+ }
+ goto unknown;
case '<': /* (?<...) */
if (*RExC_parse == '!')
paren = ',';
else if (*RExC_parse != '=')
+ named_capture:
{ /* (?<...>) */
char *name_start;
SV *svname;
if (!RExC_paren_names) {
RExC_paren_names= newHV();
sv_2mortal((SV*)RExC_paren_names);
+#ifdef DEBUGGING
+ RExC_paren_name_list= newAV();
+ sv_2mortal((SV*)RExC_paren_name_list);
+#endif
}
he_str = hv_fetch_ent( RExC_paren_names, svname, 1, 0 );
if ( he_str )
SvIOK_on(sv_dat);
SvIVX(sv_dat)= 1;
}
+#ifdef DEBUGGING
+ if (!av_store(RExC_paren_name_list, RExC_npar, SvREFCNT_inc(svname)))
+ SvREFCNT_dec(svname);
+#endif
/*sv_dump(sv_dat);*/
}
/*notreached*/
{ /* named and numeric backreferences */
I32 num;
- char * parse_start;
case '&': /* (?&NAME) */
parse_start = RExC_parse - 1;
+ named_recursion:
{
SV *sv_dat = reg_scan_name(pRExC_state,
SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
vFAIL("Sequence (? incomplete");
break;
default:
- --RExC_parse;
- parse_flags: /* (?i) */
- while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
+ --RExC_parse;
+ parse_flags: /* (?i) */
+ {
+ U32 posflags = 0, negflags = 0;
+ U32 *flagsp = &posflags;
+
+ while (*RExC_parse) {
+ /* && strchr("iogcmsx", *RExC_parse) */
/* (?g), (?gc) and (?o) are useless here
and must be globally applied -- japhy */
-
- if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+ switch (*RExC_parse) {
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+ case 'o':
+ case 'g':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
if (! (wastedflags & wflagbit) ) {
);
}
}
- }
- else if (*RExC_parse == 'c') {
+ break;
+
+ case 'c':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
);
}
}
- }
- else { pmflag(flagsp, *RExC_parse); }
-
- ++RExC_parse;
- }
- if (*RExC_parse == '-') {
- flagsp = &negflags;
- wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case 'k':
+ if (flagsp == &negflags) {
+ if (SIZE_ONLY && ckWARN(WARN_REGEXP))
+ vWARN(RExC_parse + 1,"Useless use of (?-k)");
+ } else {
+ *flagsp |= RXf_PMf_KEEPCOPY;
+ }
+ break;
+ case '-':
+ if (flagsp == &negflags)
+ goto unknown;
+ flagsp = &negflags;
+ wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case ':':
+ paren = ':';
+ /*FALLTHROUGH*/
+ case ')':
+ RExC_flags |= posflags;
+ RExC_flags &= ~negflags;
+ nextchar(pRExC_state);
+ if (paren != ':') {
+ *flagp = TRYAGAIN;
+ return NULL;
+ } else {
+ ret = NULL;
+ goto parse_rest;
+ }
+ /*NOTREACHED*/
+ default:
+ unknown:
+ RExC_parse++;
+ vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
+ /*NOTREACHED*/
+ }
++RExC_parse;
- goto parse_flags;
- }
- RExC_flags |= posflags;
- RExC_flags &= ~negflags;
- if (*RExC_parse == ':') {
- RExC_parse++;
- paren = ':';
- break;
- }
- unknown:
- if (*RExC_parse != ')') {
- RExC_parse++;
- vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
}
- nextchar(pRExC_state);
- *flagp = TRYAGAIN;
- return NULL;
- }
+ }} /* one for the default block, one for the switch */
}
else { /* (...) */
capturing_parens:
}
else /* ! paren */
ret = NULL;
-
+
+ parse_rest:
/* Pick up the branches, linking them together. */
parse_start = RExC_parse; /* MJD */
br = regbranch(pRExC_state, &flags, 1,depth+1);
/*
- regatom - the lowest level
- *
- * Optimization: gobbles an entire sequence of ordinary characters so that
- * it can turn them into a single node, which is smaller to store and
- * faster to run. Backslashed characters are exceptions, each becoming a
- * separate node; the code is simpler that way and it's not worth fixing.
- *
- * [Yes, it is worth fixing, some scripts can run twice the speed.]
- * [It looks like its ok, as in S_study_chunk we merge adjacent EXACT nodes]
- */
+
+ Try to identify anything special at the start of the pattern. If there
+ is, then handle it as required. This may involve generating a single regop,
+ such as for an assertion; or it may involve recursing, such as to
+ handle a () structure.
+
+ If the string doesn't start with something special then we gobble up
+ as much literal text as we can.
+
+ Once we have been able to handle whatever type of thing started the
+ sequence, we return.
+
+ Note: we have to be careful with escapes, as they can be both literal
+ and special, and in the case of \10 and friends can either, depending
+ on context. Specifically there are two seperate switches for handling
+ escape sequences, with the one for handling literal escapes requiring
+ a dummy entry for all of the special escapes that are actually handled
+ by the other.
+*/
+
STATIC regnode *
S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
{
DEBUG_PARSE("atom");
*flagp = WORST; /* Tentatively. */
+
tryagain:
switch (*RExC_parse) {
case '^':
vFAIL("Quantifier follows nothing");
break;
case '\\':
+ /* Special Escapes
+
+ This switch handles escape sequences that resolve to some kind
+ of special regop and not to literal text. Escape sequnces that
+ resolve to literal text are handled below in the switch marked
+ "Literal Escapes".
+
+ Every entry in this switch *must* have a corresponding entry
+ in the literal escape switch. However, the opposite is not
+ required, as the default for this switch is to jump to the
+ literal text handling code.
+ */
switch (*++RExC_parse) {
+ /* Special Escapes */
case 'A':
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, SBOL);
*flagp |= SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'G':
ret = reg_node(pRExC_state, GPOS);
RExC_seen |= REG_SEEN_GPOS;
*flagp |= SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
+ case 'K':
+ RExC_seen_zerolen++;
+ ret = reg_node(pRExC_state, KEEPS);
+ *flagp |= SIMPLE;
+ goto finish_meta_pat;
case 'Z':
ret = reg_node(pRExC_state, SEOL);
*flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
- nextchar(pRExC_state);
- break;
+ goto finish_meta_pat;
case 'z':
ret = reg_node(pRExC_state, EOS);
*flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'C':
ret = reg_node(pRExC_state, CANY);
RExC_seen |= REG_SEEN_CANY;
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'X':
ret = reg_node(pRExC_state, CLUMP);
*flagp |= HASWIDTH;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'w':
ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML : ALNUM));
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'W':
ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML : NALNUM));
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'b':
RExC_seen_zerolen++;
RExC_seen |= REG_SEEN_LOOKBEHIND;
ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL : BOUND));
*flagp |= SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'B':
RExC_seen_zerolen++;
RExC_seen |= REG_SEEN_LOOKBEHIND;
ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL : NBOUND));
*flagp |= SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 's':
ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL : SPACE));
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'S':
ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL : NSPACE));
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'd':
ret = reg_node(pRExC_state, DIGIT);
*flagp |= HASWIDTH|SIMPLE;
- nextchar(pRExC_state);
- Set_Node_Length(ret, 2); /* MJD */
- break;
+ goto finish_meta_pat;
case 'D':
ret = reg_node(pRExC_state, NDIGIT);
*flagp |= HASWIDTH|SIMPLE;
+ goto finish_meta_pat;
+ case 'v':
+ ret = reganode(pRExC_state, PRUNE, 0);
+ ret->flags = 1;
+ *flagp |= SIMPLE;
+ goto finish_meta_pat;
+ case 'V':
+ ret = reganode(pRExC_state, SKIP, 0);
+ ret->flags = 1;
+ *flagp |= SIMPLE;
+ finish_meta_pat:
nextchar(pRExC_state);
Set_Node_Length(ret, 2); /* MJD */
- break;
+ break;
case 'p':
case 'P':
{
ret= reg_namedseq(pRExC_state, NULL);
break;
case 'k': /* Handle \k<NAME> and \k'NAME' */
+ parse_named_seq:
{
char ch= RExC_parse[1];
- if (ch != '<' && ch != '\'') {
- if (SIZE_ONLY)
- vWARN( RExC_parse + 1,
- "Possible broken named back reference treated as literal k");
- parse_start--;
- goto defchar;
+ if (ch != '<' && ch != '\'' && ch != '{') {
+ RExC_parse++;
+ vFAIL2("Sequence %.2s... not terminated",parse_start);
} else {
+ /* this pretty much dupes the code for (?P=...) in reg(), if
+ you change this make sure you change that */
char* name_start = (RExC_parse += 2);
U32 num = 0;
SV *sv_dat = reg_scan_name(pRExC_state,
SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
- ch= (ch == '<') ? '>' : '\'';
-
+ ch= (ch == '<') ? '>' : (ch == '{') ? '}' : '\'';
if (RExC_parse == name_start || *RExC_parse != ch)
- vFAIL2("Sequence \\k%c... not terminated",
- (ch == '>' ? '<' : ch));
-
+ vFAIL2("Sequence %.3s... not terminated",parse_start);
+
+ if (!SIZE_ONLY) {
+ num = add_data( pRExC_state, 1, "S" );
+ RExC_rxi->data->data[num]=(void*)sv_dat;
+ SvREFCNT_inc(sv_dat);
+ }
+
RExC_sawback = 1;
ret = reganode(pRExC_state,
(U8)(FOLD ? (LOC ? NREFFL : NREFF) : NREF),
num);
*flagp |= HASWIDTH;
-
-
- if (!SIZE_ONLY) {
- num = add_data( pRExC_state, 1, "S" );
- ARG_SET(ret,num);
- RExC_rxi->data->data[num]=(void*)sv_dat;
- SvREFCNT_inc(sv_dat);
- }
+
/* override incorrect value set in reganode MJD */
Set_Node_Offset(ret, parse_start+1);
Set_Node_Cur_Length(ret); /* MJD */
nextchar(pRExC_state);
-
+
}
break;
- }
- case 'n':
- case 'r':
- case 't':
- case 'f':
- case 'e':
- case 'a':
- case 'x':
- case 'c':
- case '0':
- goto defchar;
+ }
case 'g':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
RExC_parse++;
isrel = 1;
}
- }
+ if (hasbrace && !isDIGIT(*RExC_parse)) {
+ if (isrel) RExC_parse--;
+ RExC_parse -= 2;
+ goto parse_named_seq;
+ } }
num = atoi(RExC_parse);
if (isrel) {
num = RExC_npar - num;
char * const parse_start = RExC_parse - 1; /* MJD */
while (isDIGIT(*RExC_parse))
RExC_parse++;
+ if (parse_start == RExC_parse - 1)
+ vFAIL("Unterminated \\g... pattern");
if (hasbrace) {
if (*RExC_parse != '}')
vFAIL("Unterminated \\g{...} pattern");
case '|':
goto loopdone;
case '\\':
+ /* Literal Escapes Switch
+
+ This switch is meant to handle escape sequences that
+ resolve to a literal character.
+
+ Every escape sequence that represents something
+ else, like an assertion or a char class, is handled
+ in the switch marked 'Special Escapes' above in this
+ routine, but also has an entry here as anything that
+ isn't explicitly mentioned here will be treated as
+ an unescaped equivalent literal.
+ */
+
switch (*++p) {
- case 'A':
- case 'C':
- case 'X':
- case 'G':
- case 'g':
- case 'Z':
- case 'z':
- case 'w':
- case 'W':
- case 'b':
- case 'B':
- case 's':
- case 'S':
- case 'd':
- case 'D':
- case 'p':
- case 'P':
- case 'N':
- case 'R':
- case 'k':
+ /* These are all the special escapes. */
+ case 'A': /* Start assertion */
+ case 'b': case 'B': /* Word-boundary assertion*/
+ case 'C': /* Single char !DANGEROUS! */
+ case 'd': case 'D': /* digit class */
+ case 'g': case 'G': /* generic-backref, pos assertion */
+ case 'k': case 'K': /* named backref, keep marker */
+ case 'N': /* named char sequence */
+ case 'p': case 'P': /* unicode property */
+ case 's': case 'S': /* space class */
+ case 'v': case 'V': /* (*PRUNE) and (*SKIP) */
+ case 'w': case 'W': /* word class */
+ case 'X': /* eXtended Unicode "combining character sequence" */
+ case 'z': case 'Z': /* End of line/string assertion */
--p;
goto loopdone;
+
+ /* Anything after here is an escape that resolves to a
+ literal. (Except digits, which may or may not)
+ */
case 'n':
ender = '\n';
p++;
}
+#define _C_C_T_(NAME,TEST,WORD) \
+ANYOF_##NAME: \
+ if (LOC) \
+ ANYOF_CLASS_SET(ret, ANYOF_##NAME); \
+ else { \
+ for (value = 0; value < 256; value++) \
+ if (TEST) \
+ ANYOF_BITMAP_SET(ret, value); \
+ } \
+ yesno = '+'; \
+ what = WORD; \
+ break; \
+case ANYOF_N##NAME: \
+ if (LOC) \
+ ANYOF_CLASS_SET(ret, ANYOF_N##NAME); \
+ else { \
+ for (value = 0; value < 256; value++) \
+ if (!TEST) \
+ ANYOF_BITMAP_SET(ret, value); \
+ } \
+ yesno = '!'; \
+ what = WORD; \
+ break
+
+
/*
parse a class specification and produce either an ANYOF node that
- matches the pattern. If the pattern matches a single char only and
- that char is < 256 then we produce an EXACT node instead.
+ matches the pattern or if the pattern matches a single char only and
+ that char is < 256 and we are case insensitive then we produce an
+ EXACT node instead.
*/
+
STATIC regnode *
S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
{
range = 0; /* this was not a true range */
}
+
+
if (!SIZE_ONLY) {
const char *what = NULL;
char yesno = 0;
* A similar issue a little earlier when switching on value.
* --jhi */
switch ((I32)namedclass) {
- case ANYOF_ALNUM:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUM(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Word";
- break;
- case ANYOF_NALNUM:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUM(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Word";
- break;
- case ANYOF_ALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUMC(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Alnum";
- break;
- case ANYOF_NALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUMC(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Alnum";
- break;
- case ANYOF_ALPHA:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (isALPHA(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Alpha";
- break;
- case ANYOF_NALPHA:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (!isALPHA(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Alpha";
- break;
+ case _C_C_T_(ALNUM, isALNUM(value), "Word");
+ case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
+ case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
+ case _C_C_T_(BLANK, isBLANK(value), "Blank");
+ case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
+ case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
+ case _C_C_T_(LOWER, isLOWER(value), "Lower");
+ case _C_C_T_(PRINT, isPRINT(value), "Print");
+ case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
+ case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
+ case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
+ case _C_C_T_(UPPER, isUPPER(value), "Upper");
+ case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
case ANYOF_ASCII:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_ASCII);
}
yesno = '!';
what = "ASCII";
- break;
- case ANYOF_BLANK:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_BLANK);
- else {
- for (value = 0; value < 256; value++)
- if (isBLANK(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Blank";
- break;
- case ANYOF_NBLANK:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
- else {
- for (value = 0; value < 256; value++)
- if (!isBLANK(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Blank";
- break;
- case ANYOF_CNTRL:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (isCNTRL(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Cntrl";
- break;
- case ANYOF_NCNTRL:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (!isCNTRL(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Cntrl";
- break;
+ break;
case ANYOF_DIGIT:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
}
yesno = '!';
what = "Digit";
- break;
- case ANYOF_GRAPH:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (isGRAPH(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Graph";
- break;
- case ANYOF_NGRAPH:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (!isGRAPH(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Graph";
- break;
- case ANYOF_LOWER:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_LOWER);
- else {
- for (value = 0; value < 256; value++)
- if (isLOWER(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Lower";
- break;
- case ANYOF_NLOWER:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
- else {
- for (value = 0; value < 256; value++)
- if (!isLOWER(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Lower";
- break;
- case ANYOF_PRINT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_PRINT);
- else {
- for (value = 0; value < 256; value++)
- if (isPRINT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Print";
- break;
- case ANYOF_NPRINT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPRINT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Print";
- break;
- case ANYOF_PSXSPC:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
- else {
- for (value = 0; value < 256; value++)
- if (isPSXSPC(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Space";
- break;
- case ANYOF_NPSXSPC:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
- else {
- for (value = 0; value < 256; value++)
- if (!isPSXSPC(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Space";
- break;
- case ANYOF_PUNCT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (isPUNCT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Punct";
- break;
- case ANYOF_NPUNCT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPUNCT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Punct";
- break;
- case ANYOF_SPACE:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_SPACE);
- else {
- for (value = 0; value < 256; value++)
- if (isSPACE(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "SpacePerl";
- break;
- case ANYOF_NSPACE:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
- else {
- for (value = 0; value < 256; value++)
- if (!isSPACE(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "SpacePerl";
- break;
- case ANYOF_UPPER:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_UPPER);
- else {
- for (value = 0; value < 256; value++)
- if (isUPPER(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "Upper";
- break;
- case ANYOF_NUPPER:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
- else {
- for (value = 0; value < 256; value++)
- if (!isUPPER(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "Upper";
- break;
- case ANYOF_XDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (isXDIGIT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '+';
- what = "XDigit";
- break;
- case ANYOF_NXDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (!isXDIGIT(value))
- ANYOF_BITMAP_SET(ret, value);
- }
- yesno = '!';
- what = "XDigit";
- break;
+ break;
case ANYOF_MAX:
/* this is to handle \p and \P */
break;
}
return ret;
}
+#undef _C_C_T_
+
STATIC char*
S_nextchar(pTHX_ RExC_state_t *pRExC_state)
/* print the details of the trie in dumpuntil instead, as
* progi->data isn't available here */
const char op = OP(o);
- const I32 n = ARG(o);
+ const U32 n = ARG(o);
const reg_ac_data * const ac = IS_TRIE_AC(op) ?
(reg_ac_data *)progi->data->data[n] :
NULL;
}
else if (k == WHILEM && o->flags) /* Ordinal/of */
Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
- else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT)
+ else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) {
Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
- else if (k == GOSUB)
+ if ( prog->paren_names ) {
+ if ( k != REF || OP(o) < NREF) {
+ AV *list= (AV *)progi->data->data[progi->name_list_idx];
+ SV **name= av_fetch(list, ARG(o), 0 );
+ if (name)
+ Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+ }
+ else {
+ AV *list= (AV *)progi->data->data[ progi->name_list_idx ];
+ SV *sv_dat=(SV*)progi->data->data[ ARG( o ) ];
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ SV **name= av_fetch(list, nums[0], 0 );
+ I32 n;
+ if (name) {
+ for ( n=0; n<SvIVX(sv_dat); n++ ) {
+ Perl_sv_catpvf(aTHX_ sv, "%s%"IVdf,
+ (n ? "," : ""), (IV)nums[n]);
+ }
+ Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+ }
+ }
+ }
+ } else if (k == GOSUB)
Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
else if (k == VERB) {
if (!o->flags)
Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
- (SV*)progi->data->data[ ARG( o ) ]);
+ SVfARG((SV*)progi->data->data[ ARG( o ) ]));
} else if (k == LOGICAL)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == ANYOF) {
Newx(ret->endp, npar, I32);
Copy(r->endp, ret->endp, npar, I32);
- Newx(ret->substrs, 1, struct reg_substr_data);
- for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
- s->min_offset = r->substrs->data[i].min_offset;
- s->max_offset = r->substrs->data[i].max_offset;
- s->end_shift = r->substrs->data[i].end_shift;
- s->substr = sv_dup_inc(r->substrs->data[i].substr, param);
- s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
- }
-
+ if (r->substrs) {
+ Newx(ret->substrs, 1, struct reg_substr_data);
+ for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
+ s->min_offset = r->substrs->data[i].min_offset;
+ s->max_offset = r->substrs->data[i].max_offset;
+ s->end_shift = r->substrs->data[i].end_shift;
+ s->substr = sv_dup_inc(r->substrs->data[i].substr, param);
+ s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
+ }
+ } else
+ ret->substrs = NULL;
ret->precomp = SAVEPVN(r->precomp, r->prelen);
ret->refcnt = r->refcnt;
#endif
ret->pprivate = r->pprivate;
- RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
+ if (ret->pprivate)
+ RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
ptr_table_store(PL_ptr_table, r, ret);
return ret;
else
reti->data = NULL;
+ reti->name_list_idx = ri->name_list_idx;
+
Newx(reti->offsets, 2*len+1, U32);
Copy(ri->offsets, reti->offsets, 2*len+1, U32);
if (!mg->mg_ptr) {
const char *fptr = "msix";
- char reflags[6];
+ char reflags[7];
char ch;
- int left = 0;
- int right = 4;
- bool need_newline = 0;
- U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12);
-
+ bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+ U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
+ bool need_newline = 0;
+ int left = 0;
+ int right = 4 + hask;
+ if (hask)
+ reflags[left++]='k';
while((ch = *fptr++)) {
if(reganch & 1) {
reflags[left++] = ch;
}
reganch >>= 1;
}
- if(left != 4) {
+ if(hasm) {
reflags[left] = '-';
- left = 5;
+ left = 5 + hask;
}
-
+ /* printf("[%*.7s]\n",left,reflags); */
mg->mg_len = re->prelen + 4 + left;
/*
* If /x was used, we have to worry about a regex ending with a
register U8 op = PSEUDO; /* Arbitrary non-END op. */
register const regnode *next;
const regnode *optstart= NULL;
+
RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
-
+
#ifdef DEBUG_DUMPUNTIL
PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start,
last ? last-start : 0,plast ? plast-start : 0);
while (PL_regkind[op] != END && (!last || node < last)) {
/* While that wasn't END last time... */
-
NODE_ALIGN(node);
op = OP(node);
if (op == CLOSE || op == WHILEM)
indent--;
next = regnext((regnode *)node);
-
+
/* Where, what. */
if (OP(node) == OPTIMIZED) {
if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
goto after_print;
} else
CLEAR_OPTSTART;
-
+
regprop(r, sv, node);
PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
(int)(2*indent + 1), "", SvPVX_const(sv));
-
- if (OP(node) != OPTIMIZED) {
- if (next == NULL) /* Next ptr. */
- PerlIO_printf(Perl_debug_log, "(0)");
- else if (PL_regkind[(U8)op] == BRANCH && PL_regkind[OP(next)] != BRANCH )
- PerlIO_printf(Perl_debug_log, "(FAIL)");
- else
- PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
-
- /*if (PL_regkind[(U8)op] != TRIE)*/
- (void)PerlIO_putc(Perl_debug_log, '\n');
- }
-
+
+ if (OP(node) != OPTIMIZED) {
+ if (next == NULL) /* Next ptr. */
+ PerlIO_printf(Perl_debug_log, " (0)");
+ else if (PL_regkind[(U8)op] == BRANCH && PL_regkind[OP(next)] != BRANCH )
+ PerlIO_printf(Perl_debug_log, " (FAIL)");
+ else
+ PerlIO_printf(Perl_debug_log, " (%"IVdf")", (IV)(next - start));
+ (void)PerlIO_putc(Perl_debug_log, '\n');
+ }
+
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
assert(next);
else if ( PL_regkind[(U8)op] == TRIE ) {
const regnode *this_trie = node;
const char op = OP(node);
- const I32 n = ARG(node);
+ const U32 n = ARG(node);
const reg_ac_data * const ac = op>=AHOCORASICK ?
(reg_ac_data *)ri->data->data[n] :
NULL;