typedef struct RExC_state_t {
U32 flags; /* are we folding, multilining? */
char *precomp; /* uncompiled string. */
- regexp *rx;
+ regexp *rx; /* perl core regexp structure */
+ regexp_internal *rxi; /* internal data for regexp object pprivate field */
char *start; /* Start of input for compile */
char *end; /* End of input for compile */
char *parse; /* Input-scan pointer. */
#define RExC_flags (pRExC_state->flags)
#define RExC_precomp (pRExC_state->precomp)
#define RExC_rx (pRExC_state->rx)
+#define RExC_rxi (pRExC_state->rxi)
#define RExC_start (pRExC_state->start)
#define RExC_end (pRExC_state->end)
#define RExC_parse (pRExC_state->parse)
#define RExC_whilem_seen (pRExC_state->whilem_seen)
-#define RExC_offsets (pRExC_state->rx->offsets) /* I am not like the others */
+#define RExC_offsets (pRExC_state->rxi->offsets) /* I am not like the others */
#define RExC_emit (pRExC_state->emit)
#define RExC_emit_start (pRExC_state->emit_start)
#define RExC_naughty (pRExC_state->naughty)
#define RExC_seen (pRExC_state->seen)
#define RExC_size (pRExC_state->size)
#define RExC_npar (pRExC_state->npar)
-#define RExC_cpar (pRExC_state->cpar)
#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_extralen (pRExC_state->extralen)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define EXPERIMENTAL_INPLACESCAN
#endif
-#define DEBUG_STUDYDATA(data,depth) \
-DEBUG_OPTIMISE_MORE_r(if(data){ \
+#define DEBUG_STUDYDATA(str,data,depth) \
+DEBUG_OPTIMISE_MORE_r(if(data){ \
PerlIO_printf(Perl_debug_log, \
- "%*s"/* Len:%"IVdf"/%"IVdf" */"Pos:%"IVdf"/%"IVdf \
- " Flags: %"IVdf" Whilem_c: %"IVdf" Lcp: %"IVdf" ", \
+ "%*s" str "Pos:%"IVdf"/%"IVdf \
+ " Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \
(int)(depth)*2, "", \
(IV)((data)->pos_min), \
(IV)((data)->pos_delta), \
- (IV)((data)->flags), \
+ (UV)((data)->flags), \
(IV)((data)->whilem_c), \
- (IV)((data)->last_closep ? *((data)->last_closep) : -1) \
+ (IV)((data)->last_closep ? *((data)->last_closep) : -1), \
+ is_inf ? "INF " : "" \
); \
if ((data)->last_found) \
PerlIO_printf(Perl_debug_log, \
floating substrings if needed. */
STATIC void
-S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *minlenp, int is_inf)
{
const STRLEN l = CHR_SVLEN(data->last_found);
const STRLEN old_l = CHR_SVLEN(*data->longest);
data->minlen_fixed=minlenp;
data->lookbehind_fixed=0;
}
- else {
+ else { /* *data->longest == data->longest_float */
data->offset_float_min = l ? data->last_start_min : data->pos_min;
data->offset_float_max = (l
? data->last_start_max
: data->pos_min + data->pos_delta);
- if ((U32)data->offset_float_max > (U32)I32_MAX)
+ if (is_inf || (U32)data->offset_float_max > (U32)I32_MAX)
data->offset_float_max = I32_MAX;
if (data->flags & SF_BEFORE_EOL)
data->flags
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
- DEBUG_STUDYDATA(data,0);
+ DEBUG_STUDYDATA("cl_anything: ",data,0);
}
/* Can match anything (initialization) */
#ifdef DEBUGGING
/*
- dump_trie(trie)
- dump_trie_interim_list(trie,next_alloc)
- dump_trie_interim_table(trie,next_alloc)
+ dump_trie(trie,widecharmap,revcharmap)
+ dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
+ dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
These routines dump out a trie in a somewhat readable format.
The _interim_ variants are used for debugging the interim
*/
/*
- dump_trie(trie)
Dumps the final compressed table form of the trie to Perl_debug_log.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
+ AV *revcharmap, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
"Match","Base","Ofs" );
for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, state, 0);
+ SV ** const tmp = av_fetch( revcharmap, state, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s",
colwidth,
}
}
/*
- dump_trie_interim_list(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in list form.
List tries normally only are used for construction when the number of
possible chars (trie->uniquecharcount) is very high.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc,U32 depth)
+S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/* print out the table precompression. */
PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
);
}
for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
+ SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ",
colwidth,
}
/*
- dump_trie_interim_table(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in table form.
This is the normal DFA style state transition table, with a few
twists to facilitate compression later.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
U16 charid;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/*
PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" );
for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, charid, 0);
+ SV ** const tmp = av_fetch( revcharmap, charid, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s",
colwidth,
SV *tmp = newSVpvs(""); \
if (UTF) SvUTF8_on(tmp); \
Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \
- av_push( TRIE_REVCHARMAP(trie), tmp ); \
+ av_push( revcharmap, tmp ); \
} STMT_END
#define TRIE_READ_CHAR STMT_START { \
else \
tmp = newSVpvn( "", 0 ); \
if ( UTF ) SvUTF8_on( tmp ); \
- av_push( trie->words, tmp ); \
+ av_push( trie_words, tmp ); \
}); \
\
curword++; \
\
if ( noper_next < tail ) { \
if (!trie->jump) \
- Newxz( trie->jump, word_count + 1, U16); \
+ trie->jump = PerlMemShared_calloc( word_count + 1, sizeof(U16) ); \
trie->jump[curword] = (U16)(noper_next - convert); \
if (!jumper) \
jumper = noper_next; \
/* we only allocate the nextword buffer when there */\
/* a dupe, so first time we have to do the allocation */\
if (!trie->nextword) \
- Newxz( trie->nextword, word_count + 1, U16); \
+ trie->nextword = \
+ PerlMemShared_calloc( word_count + 1, sizeof(U16)); \
while ( trie->nextword[dupe] ) \
dupe= trie->nextword[dupe]; \
trie->nextword[dupe]= curword; \
dVAR;
/* first pass, loop through and scan words */
reg_trie_data *trie;
+ HV *widecharmap = NULL;
+ AV *revcharmap = newAV();
regnode *cur;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
STRLEN len = 0;
)
);
- const U32 data_slot = add_data( pRExC_state, 1, "t" );
- SV *re_trie_maxbuff;
-#ifndef DEBUGGING
- /* these are only used during construction but are useful during
- * debugging so we store them in the struct when debugging.
+#ifdef DEBUGGING
+ const U32 data_slot = add_data( pRExC_state, 4, "tuuu" );
+ AV *trie_words = NULL;
+ /* along with revcharmap, this only used during construction but both are
+ * useful during debugging so we store them in the struct when debugging.
*/
+#else
+ const U32 data_slot = add_data( pRExC_state, 2, "tu" );
STRLEN trie_charcount=0;
- AV *trie_revcharmap;
#endif
+ SV *re_trie_maxbuff;
GET_RE_DEBUG_FLAGS_DECL;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
- Newxz( trie, 1, reg_trie_data );
+ trie = PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
trie->refcount = 1;
trie->startstate = 1;
trie->wordcount = word_count;
- RExC_rx->data->data[ data_slot ] = (void*)trie;
- Newxz( trie->charmap, 256, U16 );
+ RExC_rxi->data->data[ data_slot ] = (void*)trie;
+ trie->charmap = PerlMemShared_calloc( 256, sizeof(U16) );
if (!(UTF && folder))
- Newxz( trie->bitmap, ANYOF_BITMAP_SIZE, char );
+ trie->bitmap = PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
DEBUG_r({
- trie->words = newAV();
+ trie_words = newAV();
});
- TRIE_REVCHARMAP(trie) = newAV();
re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
if (!SvIOK(re_trie_maxbuff)) {
}
} else {
SV** svpp;
- if ( !trie->widecharmap )
- trie->widecharmap = newHV();
+ if ( !widecharmap )
+ widecharmap = newHV();
- svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 );
+ svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
if ( !svpp )
Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
DEBUG_TRIE_COMPILE_r(
PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
(int)depth * 2 + 2,"",
- ( trie->widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
+ ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
(int)trie->minlen, (int)trie->maxlen )
);
- Newxz( trie->wordlen, word_count, U32 );
+ trie->wordlen = PerlMemShared_calloc( word_count, sizeof(U32) );
/*
We now know what we are dealing with in terms of unique chars and
DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
"%*sCompiling trie using list compiler\n",
(int)depth * 2 + 2, ""));
-
- Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state );
+
+ trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
TRIE_LIST_NEW(1);
next_alloc = 2;
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
if ( !svpp ) {
charid = 0;
} else {
/* next alloc is the NEXT state to be allocated */
trie->statecount = next_alloc;
- Renew( trie->states, next_alloc, reg_trie_state );
+ trie->states = PerlMemShared_realloc( trie->states, next_alloc
+ * sizeof(reg_trie_state) );
/* and now dump it out before we compress it */
- DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_list(trie,next_alloc,depth+1)
+ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
+ revcharmap, next_alloc,
+ depth+1)
);
- Newxz( trie->trans, transcount ,reg_trie_trans );
+ trie->trans
+ = PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
{
U32 state;
U32 tp = 0;
}
if ( transcount < tp + maxid - minid + 1) {
transcount *= 2;
- Renew( trie->trans, transcount, reg_trie_trans );
+ trie->trans
+ = PerlMemShared_realloc( trie->trans,
+ transcount
+ * sizeof(reg_trie_trans) );
Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
}
base = trie->uniquecharcount + tp - minid;
"%*sCompiling trie using table compiler\n",
(int)depth * 2 + 2, ""));
- Newxz( trie->trans, ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1,
- reg_trie_trans );
- Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state );
+ trie->trans = PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
+ * trie->uniquecharcount + 1,
+ sizeof(reg_trie_trans) );
+ trie->states = PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
next_alloc = trie->uniquecharcount + 1;
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
charid = svpp ? (U16)SvIV(*svpp) : 0;
}
if ( charid ) {
} /* end second pass */
/* and now dump it out before we compress it */
- DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_table(trie,next_alloc,depth+1)
- );
+ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
+ revcharmap,
+ next_alloc, depth+1));
{
/*
}
}
trie->lasttrans = pos + 1;
- Renew( trie->states, laststate, reg_trie_state);
+ trie->states = PerlMemShared_realloc( trie->states, laststate
+ * sizeof(reg_trie_state) );
DEBUG_TRIE_COMPILE_MORE_r(
PerlIO_printf( Perl_debug_log,
"%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
(UV)trie->lasttrans)
);
/* resize the trans array to remove unused space */
- Renew( trie->trans, trie->lasttrans, reg_trie_trans);
+ trie->trans = PerlMemShared_realloc( trie->trans, trie->lasttrans
+ * sizeof(reg_trie_trans) );
/* and now dump out the compressed format */
- DEBUG_TRIE_COMPILE_r(
- dump_trie(trie,depth+1)
- );
+ DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
{ /* Modify the program and insert the new TRIE node*/
U8 nodetype =(U8)(flags & 0xFF);
/* But first we check to see if there is a common prefix we can
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
- if ( trie->bitmap && !trie->widecharmap && !trie->jump ) {
+ if ( trie->bitmap && !widecharmap && !trie->jump ) {
U32 state;
for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
U32 ofs = 0;
trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
{
if ( ++count > 1 ) {
- SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), ofs, 0);
+ SV **tmp = av_fetch( revcharmap, ofs, 0);
const U8 *ch = (U8*)SvPV_nolen_const( *tmp );
if ( state == 1 ) break;
if ( count == 2 ) {
(int)depth * 2 + 2, "",
(UV)state));
if (idx >= 0) {
- SV ** const tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+ SV ** const tmp = av_fetch( revcharmap, idx, 0);
const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
TRIE_BITMAP_SET(trie,*ch);
}
}
if ( count == 1 ) {
- SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+ SV **tmp = av_fetch( revcharmap, idx, 0);
char *ch = SvPV_nolen( *tmp );
DEBUG_OPTIMISE_r({
SV *sv=sv_newmortal();
Set_Node_Offset_Length(fix, 0, 0);
}
while (word--) {
- SV ** const tmp = av_fetch( trie->words, word, 0 );
+ SV ** const tmp = av_fetch( trie_words, word, 0 );
if (tmp) {
if ( STR_LEN(convert) <= SvCUR(*tmp) )
sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
{
OP( convert ) = TRIEC;
Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
- Safefree(trie->bitmap);
+ PerlMemShared_free(trie->bitmap);
trie->bitmap= NULL;
} else
OP( convert ) = TRIE;
Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
});
} /* end node insert */
-#ifndef DEBUGGING
- SvREFCNT_dec(TRIE_REVCHARMAP(trie));
+ RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
+#ifdef DEBUGGING
+ RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
+ RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
+#else
+ SvREFCNT_dec(revcharmap);
#endif
return trie->jump
? MADE_JUMP_TRIE
try 'g' and succeed, prodceding to match 'cdgu'.
*/
/* add a fail transition */
- reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)];
+ const U32 trie_offset = ARG(source);
+ reg_trie_data *trie=(reg_trie_data *)RExC_rxi->data->data[trie_offset];
U32 *q;
const U32 ucharcount = trie->uniquecharcount;
const U32 numstates = trie->statecount;
ARG_SET( stclass, data_slot );
- Newxz( aho, 1, reg_ac_data );
- RExC_rx->data->data[ data_slot ] = (void*)aho;
- aho->trie=trie;
- aho->states=(reg_trie_state *)savepvn((const char*)trie->states,
- numstates * sizeof(reg_trie_state));
+ aho = PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
+ RExC_rxi->data->data[ data_slot ] = (void*)aho;
+ aho->trie=trie_offset;
+ aho->states=(reg_trie_state *)PerlMemShared_malloc( numstates * sizeof(reg_trie_state) );
+ Copy( trie->states, aho->states, numstates, reg_trie_state );
Newxz( q, numstates, U32);
- Newxz( aho->fail, numstates, U32 );
+ aho->fail = PerlMemShared_calloc( numstates, sizeof(U32) );
aho->refcount = 1;
fail = aho->fail;
/* initialize fail[0..1] to be 1 so that we always have
I32 stop; /* what stopparen do we use */
} scan_frame;
+
+#define SCAN_COMMIT(s, data, m) scan_commit(s, data, m, is_inf)
+
STATIC I32
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 *minlenp, I32 *deltap,
fake_study_recurse:
while ( scan && OP(scan) != END && scan < last ){
/* Peephole optimizer: */
- DEBUG_STUDYDATA(data,depth);
+ DEBUG_STUDYDATA("Peep:", data,depth);
DEBUG_PEEP("Peep",scan,depth);
JOIN_EXACT(scan,&min,0);
regnode * const startbranch=scan;
if (flags & SCF_DO_SUBSTR)
- scan_commit(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
+ SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
if (flags & SCF_DO_STCLASS)
cl_init_zero(pRExC_state, &accum);
end = RExC_close_parens[paren-1];
} else {
paren = 0;
- start = RExC_rx->program + 1;
+ start = RExC_rxi->program + 1;
end = RExC_opend;
}
if (!recursed) {
Newx(newframe,1,scan_frame);
} else {
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp);
+ SCAN_COMMIT(pRExC_state,data,minlenp);
data->longest = &(data->longest_float);
}
is_inf = is_inf_internal = 1;
/* Search for fixed substrings supports EXACT only. */
if (flags & SCF_DO_SUBSTR) {
assert(data);
- scan_commit(pRExC_state, data, minlenp);
+ SCAN_COMMIT(pRExC_state, data, minlenp);
}
if (UTF) {
const U8 * const s = (U8 *)STRING(scan);
is_inf = is_inf_internal = 1;
scan = regnext(scan);
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
+ SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot extend fixed substrings */
data->longest = &(data->longest_float);
}
goto optimize_curly_tail;
next_is_eval = (OP(scan) == EVAL);
do_curly:
if (flags & SCF_DO_SUBSTR) {
- if (mincount == 0) scan_commit(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
+ if (mincount == 0) SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot extend fixed substrings */
pos_before = data->pos_min;
}
if (data) {
if (mincount != maxcount) {
/* Cannot extend fixed substrings found inside
the group. */
- scan_commit(pRExC_state,data,minlenp);
+ SCAN_COMMIT(pRExC_state,data,minlenp);
if (mincount && last_str) {
SV * const sv = data->last_found;
MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
continue;
default: /* REF and CLUMP only? */
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp); /* Cannot expect anything... */
+ SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
data->longest = &(data->longest_float);
}
is_inf = is_inf_internal = 1;
int value = 0;
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp);
+ SCAN_COMMIT(pRExC_state,data,minlenp);
data->pos_min++;
}
min++;
if ((flags & SCF_DO_SUBSTR) && data->last_found) {
f |= SCF_DO_SUBSTR;
if (scan->flags)
- scan_commit(pRExC_state, &data_fake,minlenp);
+ SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
data_fake.last_found=newSVsv(data->last_found);
}
}
if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
if (RExC_rx->minlen<*minnextp)
RExC_rx->minlen=*minnextp;
- scan_commit(pRExC_state, &data_fake, minnextp);
+ SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
SvREFCNT_dec(data_fake.last_found);
if ( data_fake.minlen_fixed != minlenp )
}
else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp);
+ SCAN_COMMIT(pRExC_state,data,minlenp);
flags &= ~SCF_DO_SUBSTR;
}
if (data && OP(scan)==ACCEPT) {
else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
{
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp);
+ SCAN_COMMIT(pRExC_state,data,minlenp);
data->longest = &(data->longest_float);
}
is_inf = is_inf_internal = 1;
check there too. */
regnode *trie_node= scan;
regnode *tail= regnext(scan);
- reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
+ reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
I32 max1 = 0, min1 = I32_MAX;
struct regnode_charclass_class accum;
if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
- scan_commit(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
+ SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
if (flags & SCF_DO_STCLASS)
cl_init_zero(pRExC_state, &accum);
}
#else
else if (PL_regkind[OP(scan)] == TRIE) {
- reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
+ reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
U8*bang=NULL;
min += trie->minlen;
delta += (trie->maxlen - trie->minlen);
flags &= ~SCF_DO_STCLASS; /* xxx */
if (flags & SCF_DO_SUBSTR) {
- scan_commit(pRExC_state,data,minlenp); /* Cannot expect anything... */
+ SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
data->pos_min += trie->minlen;
data->pos_delta += (trie->maxlen - trie->minlen);
if (trie->maxlen != trie->minlen)
finish:
assert(!frame);
+ DEBUG_STUDYDATA("pre-fin:",data,depth);
*scanp = scan;
*deltap = is_inf_internal ? I32_MAX : delta;
if (flags & SCF_TRIE_RESTUDY)
data->flags |= SCF_TRIE_RESTUDY;
- DEBUG_STUDYDATA(data,depth);
+ DEBUG_STUDYDATA("post-fin:",data,depth);
return min < stopmin ? min : stopmin;
}
-STATIC I32
-S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
+STATIC U32
+S_add_data(RExC_state_t *pRExC_state, U32 n, const char *s)
{
- if (RExC_rx->data) {
- const U32 count = RExC_rx->data->count;
- Renewc(RExC_rx->data,
- sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
- char, struct reg_data);
- Renew(RExC_rx->data->what, count + n, U8);
- RExC_rx->data->count += n;
- }
- else {
- Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1),
- char, struct reg_data);
- Newx(RExC_rx->data->what, n, U8);
- RExC_rx->data->count = n;
- }
- Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
- return RExC_rx->data->count - n;
+ U32 count = RExC_rxi->data ? RExC_rxi->data->count : 0;
+
+ Renewc(RExC_rxi->data,
+ sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
+ char, struct reg_data);
+ if(count)
+ Renew(RExC_rxi->data->what, count + n, U8);
+ else
+ Newx(RExC_rxi->data->what, n, U8);
+ RExC_rxi->data->count = count + n;
+ Copy(s, RExC_rxi->data->what + count, n, U8);
+ return count;
}
+/*XXX: todo make this not included in a non debugging perl */
#ifndef PERL_IN_XSUB_RE
void
Perl_reginitcolors(pTHX)
#endif
BEGIN_BLOCK
register regexp *r;
+ register regexp_internal *ri;
regnode *scan;
regnode *first;
I32 flags;
RExC_end = xend;
RExC_naughty = 0;
RExC_npar = 1;
- RExC_cpar = 1;
RExC_nestroot = 0;
RExC_size = 0L;
RExC_emit = &PL_regdummy;
/* Allocate space and zero-initialize. Note, the two step process
of zeroing when in debug mode, thus anything assigned has to
happen after that */
- Newxc(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode),
- char, regexp);
- if (r == NULL)
+ Newxz(r, 1, regexp);
+ Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
+ char, regexp_internal);
+ if ( r == NULL || ri == NULL )
FAIL("Regexp out of space");
#ifdef DEBUGGING
/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
- Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char);
+ Zero(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode), char);
#else
- /* bulk initialize fields with 0. */
- Zero(r, sizeof(regexp), char);
+ /* bulk initialize base fields with 0. */
+ Zero(ri, sizeof(regexp_internal), char);
#endif
/* non-zero initialization begins here */
+ RXi_SET( r, ri );
r->engine= RE_ENGINE_PTR;
r->refcnt = 1;
r->prelen = xend - exp;
}
/* Useful during FAIL. */
- Newxz(r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
- if (r->offsets) {
- r->offsets[0] = RExC_size;
+ Newxz(ri->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
+ if (ri->offsets) {
+ ri->offsets[0] = RExC_size;
}
DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log,
"%s %"UVuf" bytes for offset annotations.\n",
- r->offsets ? "Got" : "Couldn't get",
+ ri->offsets ? "Got" : "Couldn't get",
(UV)((2*RExC_size+1) * sizeof(U32))));
RExC_rx = r;
+ RExC_rxi = ri;
/* Second pass: emit code. */
RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
RExC_end = xend;
RExC_naughty = 0;
RExC_npar = 1;
- RExC_cpar = 1;
- RExC_emit_start = r->program;
- RExC_emit = r->program;
+ RExC_emit_start = ri->program;
+ RExC_emit = ri->program;
#ifdef DEBUGGING
/* put a sentinal on the end of the program so we can check for
overwrites */
- r->program[RExC_size].type = 255;
+ ri->program[RExC_size].type = 255;
#endif
/* Store the count of eval-groups for security checks: */
- RExC_emit->next_off = (RExC_seen_evals > (I32)U16_MAX) ? U16_MAX : (U16)RExC_seen_evals;
+ RExC_rx->seen_evals = RExC_seen_evals;
REGC((U8)REG_MAGIC, (char*) RExC_emit++);
- r->data = 0;
if (reg(pRExC_state, 0, &flags,1) == NULL)
return(NULL);
pm->op_pmflags = RExC_flags;
if (UTF)
r->extflags |= RXf_UTF8; /* Unicode in it? */
- r->regstclass = NULL;
+ ri->regstclass = NULL;
if (RExC_naughty >= 10) /* Probably an expensive pattern. */
r->intflags |= PREGf_NAUGHTY;
- scan = r->program + 1; /* First BRANCH. */
+ scan = ri->program + 1; /* First BRANCH. */
/* testing for BRANCH here tells us whether there is "must appear"
data in the pattern. If there is then we can use it for optimisations */
if (OP(first) == EXACT)
NOOP; /* Empty, get anchored substr later. */
else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
- r->regstclass = first;
+ ri->regstclass = first;
}
#ifdef TRIE_STCLASS
else if (PL_regkind[OP(first)] == TRIE &&
- ((reg_trie_data *)r->data->data[ ARG(first) ])->minlen>0)
+ ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
{
regnode *trie_op;
/* this can happen only on restudy */
if ( OP(first) == TRIE ) {
- struct regnode_1 *trieop;
- Newxz(trieop,1,struct regnode_1);
+ struct regnode_1 *trieop =
+ PerlMemShared_calloc(1, sizeof(struct regnode_1));
StructCopy(first,trieop,struct regnode_1);
trie_op=(regnode *)trieop;
} else {
- struct regnode_charclass *trieop;
- Newxz(trieop,1,struct regnode_charclass);
+ struct regnode_charclass *trieop =
+ PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
StructCopy(first,trieop,struct regnode_charclass);
trie_op=(regnode *)trieop;
}
OP(trie_op)+=2;
make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
- r->regstclass = trie_op;
+ ri->regstclass = trie_op;
}
#endif
else if (strchr((const char*)PL_simple,OP(first)))
- r->regstclass = first;
+ ri->regstclass = first;
else if (PL_regkind[OP(first)] == BOUND ||
PL_regkind[OP(first)] == NBOUND)
- r->regstclass = first;
+ ri->regstclass = first;
else if (PL_regkind[OP(first)] == BOL) {
r->extflags |= (OP(first) == MBOL
? RXf_ANCH_MBOL
data.last_found = newSVpvs("");
data.longest = &(data.longest_fixed);
first = scan;
- if (!r->regstclass) {
+ if (!ri->regstclass) {
cl_init(pRExC_state, &ch_class);
data.start_class = &ch_class;
stclass_flag = SCF_DO_STCLASS_AND;
&& !RExC_seen_zerolen
&& (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
r->extflags |= RXf_CHECK_ALL;
- scan_commit(pRExC_state, &data,&minlen);
+ scan_commit(pRExC_state, &data,&minlen,0);
SvREFCNT_dec(data.last_found);
/* Note that code very similar to this but for anchored string
SvREFCNT_dec(data.longest_fixed);
longest_fixed_length = 0;
}
- if (r->regstclass
- && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY))
- r->regstclass = NULL;
+ if (ri->regstclass
+ && (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
+ ri->regstclass = NULL;
if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
&& stclass_flag
&& !(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
- const I32 n = add_data(pRExC_state, 1, "f");
+ const U32 n = add_data(pRExC_state, 1, "f");
- Newx(RExC_rx->data->data[n], 1,
+ Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
StructCopy(data.start_class,
- (struct regnode_charclass_class*)RExC_rx->data->data[n],
+ (struct regnode_charclass_class*)RExC_rxi->data->data[n],
struct regnode_charclass_class);
- r->regstclass = (regnode*)RExC_rx->data->data[n];
+ ri->regstclass = (regnode*)RExC_rxi->data->data[n];
r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
regprop(r, sv, (regnode*)data.start_class);
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n"));
- scan = r->program + 1;
+ scan = ri->program + 1;
cl_init(pRExC_state, &ch_class);
data.start_class = &ch_class;
data.last_closep = &last_close;
if (!(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
- const I32 n = add_data(pRExC_state, 1, "f");
+ const U32 n = add_data(pRExC_state, 1, "f");
- Newx(RExC_rx->data->data[n], 1,
+ Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
StructCopy(data.start_class,
- (struct regnode_charclass_class*)RExC_rx->data->data[n],
+ (struct regnode_charclass_class*)RExC_rxi->data->data[n],
struct regnode_charclass_class);
- r->regstclass = (regnode*)RExC_rx->data->data[n];
+ ri->regstclass = (regnode*)RExC_rxi->data->data[n];
r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
regprop(r, sv, (regnode*)data.start_class);
the "real" pattern. */
DEBUG_OPTIMISE_r({
PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf"\n",
- minlen, r->minlen);
+ (IV)minlen, (IV)r->minlen);
});
r->minlenret = minlen;
if (r->minlen < minlen)
PerlIO_printf(Perl_debug_log,"Final program:\n");
regdump(r);
});
- DEBUG_OFFSETS_r(if (r->offsets) {
- const U32 len = r->offsets[0];
+ DEBUG_OFFSETS_r(if (ri->offsets) {
+ const U32 len = ri->offsets[0];
U32 i;
GET_RE_DEBUG_FLAGS_DECL;
- PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]);
+ PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)ri->offsets[0]);
for (i = 1; i <= len; i++) {
- if (r->offsets[i*2-1] || r->offsets[i*2])
+ if (ri->offsets[i*2-1] || ri->offsets[i*2])
PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ",
- (UV)i, (UV)r->offsets[i*2-1], (UV)r->offsets[i*2]);
+ (UV)i, (UV)ri->offsets[i*2-1], (UV)ri->offsets[i*2]);
}
PerlIO_printf(Perl_debug_log, "\n");
});
if (start_arg) {
SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
ARG(ret) = add_data( pRExC_state, 1, "S" );
- RExC_rx->data->data[ARG(ret)]=(void*)sv;
+ RExC_rxi->data->data[ARG(ret)]=(void*)sv;
ret->flags = 0;
} else {
ret->flags = 1;
/* FALL THROUGH */
case '{': /* (?{...}) */
{
- I32 count = 1, n = 0;
+ I32 count = 1;
+ U32 n = 0;
char c;
char *s = RExC_parse;
LEAVE;
n = add_data(pRExC_state, 3, "nop");
- RExC_rx->data->data[n] = (void*)rop;
- RExC_rx->data->data[n+1] = (void*)sop;
- RExC_rx->data->data[n+2] = (void*)pad;
+ RExC_rxi->data->data[n] = (void*)rop;
+ RExC_rxi->data->data[n+1] = (void*)sop;
+ RExC_rxi->data->data[n+2] = (void*)pad;
SvREFCNT_dec(sv);
}
else { /* First pass */
{
char ch = RExC_parse[0] == '<' ? '>' : '\'';
char *name_start= RExC_parse++;
- I32 num = 0;
+ U32 num = 0;
SV *sv_dat=reg_scan_name(pRExC_state,
SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
if (RExC_parse == name_start || *RExC_parse != ch)
RExC_parse++;
if (!SIZE_ONLY) {
num = add_data( pRExC_state, 1, "S" );
- RExC_rx->data->data[num]=(void*)sv_dat;
+ RExC_rxi->data->data[num]=(void*)sv_dat;
SvREFCNT_inc(sv_dat);
}
ret = reganode(pRExC_state,NGROUPP,num);
ender = reg_node(pRExC_state, TAIL);
break;
case 1:
- RExC_cpar++;
ender = reganode(pRExC_state, CLOSE, parno);
if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
goto defchar;
} else {
char* name_start = (RExC_parse += 2);
- I32 num = 0;
+ U32 num = 0;
SV *sv_dat = reg_scan_name(pRExC_state,
SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
ch= (ch == '<') ? '>' : '\'';
if (!SIZE_ONLY) {
num = add_data( pRExC_state, 1, "S" );
ARG_SET(ret,num);
- RExC_rx->data->data[num]=(void*)sv_dat;
+ RExC_rxi->data->data[num]=(void*)sv_dat;
SvREFCNT_inc(sv_dat);
}
/* override incorrect value set in reganode MJD */
RExC_parse++;
num = atoi(RExC_parse);
if (isrel) {
- num = RExC_cpar - num;
+ num = RExC_npar - num;
if (num < 1)
vFAIL("Reference to nonexistent or unclosed group");
}
if (!SIZE_ONLY) {
if (num > (I32)RExC_rx->nparens)
vFAIL("Reference to nonexistent group");
- /* People make this error all the time apparently.
- So we cant fail on it, even though we should
-
- else if (num >= RExC_cpar)
- vFAIL("Reference to unclosed group will always match");
- */
}
RExC_sawback = 1;
ret = reganode(pRExC_state,
case 'P':
case 'N':
case 'R':
+ case 'k':
--p;
goto loopdone;
case 'n':
av_store(av, 2, (SV*)unicode_alternate);
rv = newRV_noinc((SV*)av);
n = add_data(pRExC_state, 1, "s");
- RExC_rx->data->data[n] = (void*)rv;
+ RExC_rxi->data->data[n] = (void*)rv;
ARG_SET(ret, n);
}
return ret;
SV * const mysv_val=sv_newmortal();
DEBUG_PARSE_MSG("");
regprop(RExC_rx, mysv_val, val);
- PerlIO_printf(Perl_debug_log, "~ attach to %s (%d) offset to %d\n",
- SvPV_nolen_const(mysv_val),
- REG_NODE_NUM(val),
- val - scan
+ PerlIO_printf(Perl_debug_log, "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
+ SvPV_nolen_const(mysv_val),
+ (IV)REG_NODE_NUM(val),
+ (IV)(val - scan)
);
});
if (reg_off_by_arg[OP(scan)]) {
dVAR;
SV * const sv = sv_newmortal();
SV *dsv= sv_newmortal();
+ RXi_GET_DECL(r,ri);
- (void)dumpuntil(r, r->program, r->program + 1, NULL, NULL, sv, 0, 0);
+ (void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
/* Header fields of interest. */
if (r->anchored_substr) {
if (r->check_substr || r->check_utf8)
PerlIO_printf(Perl_debug_log, ") ");
- if (r->regstclass) {
- regprop(r, sv, r->regstclass);
+ if (ri->regstclass) {
+ regprop(r, sv, ri->regstclass);
PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
}
if (r->extflags & RXf_ANCH) {
PerlIO_putc(Perl_debug_log, ' ');
}
if (r->extflags & RXf_GPOS_SEEN)
- PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", r->gofs);
+ PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs);
if (r->intflags & PREGf_SKIP)
PerlIO_printf(Perl_debug_log, "plus ");
if (r->intflags & PREGf_IMPLICIT)
PerlIO_printf(Perl_debug_log, "implicit ");
- PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
+ PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen);
if (r->extflags & RXf_EVAL_SEEN)
PerlIO_printf(Perl_debug_log, "with eval ");
PerlIO_printf(Perl_debug_log, "\n");
#ifdef DEBUGGING
dVAR;
register int k;
+ RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
+
sv_setpvn(sv, "", 0);
Perl_sv_catpvf(aTHX_ sv, " %s", s );
} else if (k == TRIE) {
/* print the details of the trie in dumpuntil instead, as
- * prog->data isn't available here */
+ * progi->data isn't available here */
const char op = OP(o);
const I32 n = ARG(o);
const reg_ac_data * const ac = IS_TRIE_AC(op) ?
- (reg_ac_data *)prog->data->data[n] :
+ (reg_ac_data *)progi->data->data[n] :
NULL;
- const reg_trie_data * const trie = !IS_TRIE_AC(op) ?
- (reg_trie_data*)prog->data->data[n] :
- ac->trie;
+ const reg_trie_data * const trie
+ = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
Perl_sv_catpvf(aTHX_ sv, "-%s",reg_name[o->flags]);
DEBUG_TRIE_COMPILE_r(
else if (k == VERB) {
if (!o->flags)
Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
- (SV*)prog->data->data[ ARG( o ) ]);
+ (SV*)progi->data->data[ ARG( o ) ]);
} else if (k == LOGICAL)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == ANYOF) {
}
/*
- pregfree - free a regexp
+ pregfree()
+
+ handles refcounting and freeing the perl core regexp structure. When
+ it is necessary to actually free the structure the first thing it
+ does is call the 'free' method of the regexp_engine associated to to
+ the regexp, allowing the handling of the void *pprivate; member
+ first. (This routine is not overridable by extensions, which is why
+ the extensions free is called first.)
- See regdupe below if you change anything here.
+ See regdupe and regdupe_internal if you change anything here.
*/
-
+#ifndef PERL_IN_XSUB_RE
void
Perl_pregfree(pTHX_ struct regexp *r)
{
dVAR;
-
GET_RE_DEBUG_FLAGS_DECL;
if (!r || (--r->refcnt > 0))
return;
- DEBUG_COMPILE_r({
- if (!PL_colorset)
- reginitcolors();
- {
- SV *dsv= sv_newmortal();
- RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
- dsv, r->precomp, r->prelen, 60);
- PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
- PL_colors[4],PL_colors[5],s);
- }
- });
-
+
+ CALLREGFREE_PVT(r); /* free the private data */
+
/* gcov results gave these as non-null 100% of the time, so there's no
optimisation in checking them before calling Safefree */
Safefree(r->precomp);
- Safefree(r->offsets); /* 20010421 MJD */
RX_MATCH_COPY_FREE(r);
#ifdef PERL_OLD_COPY_ON_WRITE
if (r->saved_copy)
}
if (r->paren_names)
SvREFCNT_dec(r->paren_names);
- if (r->data) {
- int n = r->data->count;
+
+ Safefree(r->startp);
+ Safefree(r->endp);
+ Safefree(r);
+}
+#endif
+
+/* regfree_internal()
+
+ Free the private data in a regexp. This is overloadable by
+ extensions. Perl takes care of the regexp structure in pregfree(),
+ this covers the *pprivate pointer which technically perldoesnt
+ know about, however of course we have to handle the
+ regexp_internal structure when no extension is in use.
+
+ Note this is called before freeing anything in the regexp
+ structure.
+ */
+
+void
+Perl_regfree_internal(pTHX_ struct regexp *r)
+{
+ dVAR;
+ RXi_GET_DECL(r,ri);
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ DEBUG_COMPILE_r({
+ if (!PL_colorset)
+ reginitcolors();
+ {
+ SV *dsv= sv_newmortal();
+ RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
+ dsv, r->precomp, r->prelen, 60);
+ PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
+ PL_colors[4],PL_colors[5],s);
+ }
+ });
+
+ Safefree(ri->offsets); /* 20010421 MJD */
+ if (ri->data) {
+ int n = ri->data->count;
PAD* new_comppad = NULL;
PAD* old_comppad;
PADOFFSET refcnt;
while (--n >= 0) {
/* If you add a ->what type here, update the comment in regcomp.h */
- switch (r->data->what[n]) {
+ switch (ri->data->what[n]) {
case 's':
case 'S':
- SvREFCNT_dec((SV*)r->data->data[n]);
+ case 'u':
+ SvREFCNT_dec((SV*)ri->data->data[n]);
break;
case 'f':
- Safefree(r->data->data[n]);
+ Safefree(ri->data->data[n]);
break;
case 'p':
- new_comppad = (AV*)r->data->data[n];
+ new_comppad = (AV*)ri->data->data[n];
break;
case 'o':
if (new_comppad == NULL)
(SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
);
OP_REFCNT_LOCK;
- refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]);
+ refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
OP_REFCNT_UNLOCK;
if (!refcnt)
- op_free((OP_4tree*)r->data->data[n]);
+ op_free((OP_4tree*)ri->data->data[n]);
PAD_RESTORE_LOCAL(old_comppad);
SvREFCNT_dec((SV*)new_comppad);
{ /* Aho Corasick add-on structure for a trie node.
Used in stclass optimization only */
U32 refcount;
- reg_ac_data *aho=(reg_ac_data*)r->data->data[n];
+ reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
OP_REFCNT_LOCK;
refcount = --aho->refcount;
OP_REFCNT_UNLOCK;
if ( !refcount ) {
- Safefree(aho->states);
- Safefree(aho->fail);
- aho->trie=NULL; /* not necessary to free this as it is
- handled by the 't' case */
- Safefree(r->data->data[n]); /* do this last!!!! */
- Safefree(r->regstclass);
+ PerlMemShared_free(aho->states);
+ PerlMemShared_free(aho->fail);
+ /* do this last!!!! */
+ PerlMemShared_free(ri->data->data[n]);
+ PerlMemShared_free(ri->regstclass);
}
}
break;
{
/* trie structure. */
U32 refcount;
- reg_trie_data *trie=(reg_trie_data*)r->data->data[n];
+ reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
OP_REFCNT_LOCK;
refcount = --trie->refcount;
OP_REFCNT_UNLOCK;
if ( !refcount ) {
- Safefree(trie->charmap);
- if (trie->widecharmap)
- SvREFCNT_dec((SV*)trie->widecharmap);
- Safefree(trie->states);
- Safefree(trie->trans);
+ PerlMemShared_free(trie->charmap);
+ PerlMemShared_free(trie->states);
+ PerlMemShared_free(trie->trans);
if (trie->bitmap)
- Safefree(trie->bitmap);
+ PerlMemShared_free(trie->bitmap);
if (trie->wordlen)
- Safefree(trie->wordlen);
+ PerlMemShared_free(trie->wordlen);
if (trie->jump)
- Safefree(trie->jump);
+ PerlMemShared_free(trie->jump);
if (trie->nextword)
- Safefree(trie->nextword);
-#ifdef DEBUGGING
- if (trie->words)
- SvREFCNT_dec((SV*)trie->words);
- if (trie->revcharmap)
- SvREFCNT_dec((SV*)trie->revcharmap);
-#endif
- Safefree(r->data->data[n]); /* do this last!!!! */
+ PerlMemShared_free(trie->nextword);
+ /* do this last!!!! */
+ PerlMemShared_free(ri->data->data[n]);
}
}
break;
default:
- Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
+ Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]);
}
}
- Safefree(r->data->what);
- Safefree(r->data);
+ Safefree(ri->data->what);
+ Safefree(ri->data);
}
- Safefree(r->startp);
- Safefree(r->endp);
- if (r->swap) {
- Safefree(r->swap->startp);
- Safefree(r->swap->endp);
- Safefree(r->swap);
+ if (ri->swap) {
+ Safefree(ri->swap->startp);
+ Safefree(ri->swap->endp);
+ Safefree(ri->swap);
}
- Safefree(r);
+ Safefree(ri);
}
#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
given regexp structure. It is a no-op when not under USE_ITHREADS.
(Originally this *was* re_dup() for change history see sv.c)
- See pregfree() above if you change anything here.
+ After all of the core data stored in struct regexp is duplicated
+ the regexp_engine.dupe method is used to copy any private data
+ stored in the *pprivate pointer. This allows extensions to handle
+ any duplication it needs to do.
+
+ See pregfree() and regfree_internal() if you change anything here.
*/
#if defined(USE_ITHREADS)
+#ifndef PERL_IN_XSUB_RE
regexp *
-Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param)
+Perl_re_dup(pTHX_ const regexp *r, CLONE_PARAMS *param)
{
dVAR;
- REGEXP *ret;
- int i, len, npar;
+ regexp *ret;
+ int i, npar;
struct reg_substr_datum *s;
if (!r)
if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r)))
return ret;
- len = r->offsets[0];
+
npar = r->nparens+1;
-
- Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp);
- Copy(r->program, ret->program, len+1, regnode);
-
+ Newxz(ret, 1, regexp);
Newx(ret->startp, npar, I32);
Copy(r->startp, ret->startp, npar, I32);
Newx(ret->endp, npar, I32);
- Copy(r->startp, ret->startp, npar, I32);
- if(r->swap) {
- Newx(ret->swap, 1, regexp_paren_ofs);
- /* no need to copy these */
- Newx(ret->swap->startp, npar, I32);
- Newx(ret->swap->endp, npar, I32);
- } else {
- ret->swap = NULL;
- }
+ Copy(r->endp, ret->endp, npar, I32);
Newx(ret->substrs, 1, struct reg_substr_data);
for (s = ret->substrs->data, i = 0; i < 3; i++, s++) {
s->substr = sv_dup_inc(r->substrs->data[i].substr, param);
s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param);
}
+
- ret->regstclass = NULL;
- if (r->data) {
+ ret->precomp = SAVEPVN(r->precomp, r->prelen);
+ ret->refcnt = r->refcnt;
+ ret->minlen = r->minlen;
+ ret->minlenret = r->minlenret;
+ ret->prelen = r->prelen;
+ ret->nparens = r->nparens;
+ ret->lastparen = r->lastparen;
+ ret->lastcloseparen = r->lastcloseparen;
+ ret->intflags = r->intflags;
+ ret->extflags = r->extflags;
+
+ ret->sublen = r->sublen;
+
+ ret->engine = r->engine;
+
+ ret->paren_names = hv_dup_inc(r->paren_names, param);
+
+ if (RX_MATCH_COPIED(ret))
+ ret->subbeg = SAVEPVN(r->subbeg, r->sublen);
+ else
+ ret->subbeg = NULL;
+#ifdef PERL_OLD_COPY_ON_WRITE
+ ret->saved_copy = NULL;
+#endif
+
+ ret->pprivate = r->pprivate;
+ RXi_SET(ret,CALLREGDUPE_PVT(ret,param));
+
+ ptr_table_store(PL_ptr_table, r, ret);
+ return ret;
+}
+#endif /* PERL_IN_XSUB_RE */
+
+/*
+ regdupe_internal()
+
+ This is the internal complement to regdupe() which is used to copy
+ the structure pointed to by the *pprivate pointer in the regexp.
+ This is the core version of the extension overridable cloning hook.
+ The regexp structure being duplicated will be copied by perl prior
+ to this and will be provided as the regexp *r argument, however
+ with the /old/ structures pprivate pointer value. Thus this routine
+ may override any copying normally done by perl.
+
+ It returns a pointer to the new regexp_internal structure.
+*/
+
+void *
+Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
+{
+ dVAR;
+ regexp_internal *reti;
+ int len, npar;
+ RXi_GET_DECL(r,ri);
+
+ npar = r->nparens+1;
+ len = ri->offsets[0];
+
+ Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal);
+ Copy(ri->program, reti->program, len+1, regnode);
+
+ if(ri->swap) {
+ Newx(reti->swap, 1, regexp_paren_ofs);
+ /* no need to copy these */
+ Newx(reti->swap->startp, npar, I32);
+ Newx(reti->swap->endp, npar, I32);
+ } else {
+ reti->swap = NULL;
+ }
+
+
+ reti->regstclass = NULL;
+ if (ri->data) {
struct reg_data *d;
- const int count = r->data->count;
+ const int count = ri->data->count;
int i;
Newxc(d, sizeof(struct reg_data) + count*sizeof(void *),
d->count = count;
for (i = 0; i < count; i++) {
- d->what[i] = r->data->what[i];
+ d->what[i] = ri->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sSfpontT
+ /* legal options are one of: sSfpontTu
see also regcomp.h and pregfree() */
case 's':
case 'S':
- d->data[i] = sv_dup_inc((SV *)r->data->data[i], param);
- break;
- case 'p':
- d->data[i] = av_dup_inc((AV *)r->data->data[i], param);
+ case 'p': /* actually an AV, but the dup function is identical. */
+ case 'u': /* actually an HV, but the dup function is identical. */
+ d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
break;
case 'f':
/* This is cheating. */
Newx(d->data[i], 1, struct regnode_charclass_class);
- StructCopy(r->data->data[i], d->data[i],
+ StructCopy(ri->data->data[i], d->data[i],
struct regnode_charclass_class);
- ret->regstclass = (regnode*)d->data[i];
+ reti->regstclass = (regnode*)d->data[i];
break;
case 'o':
/* Compiled op trees are readonly and in shared memory,
and can thus be shared without duplication. */
OP_REFCNT_LOCK;
- d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]);
- OP_REFCNT_UNLOCK;
- break;
- case 'n':
- d->data[i] = r->data->data[i];
- break;
- case 't':
- d->data[i] = r->data->data[i];
- OP_REFCNT_LOCK;
- ((reg_trie_data*)d->data[i])->refcount++;
+ d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]);
OP_REFCNT_UNLOCK;
break;
case 'T':
- d->data[i] = r->data->data[i];
- OP_REFCNT_LOCK;
- ((reg_ac_data*)d->data[i])->refcount++;
- OP_REFCNT_UNLOCK;
/* Trie stclasses are readonly and can thus be shared
* without duplication. We free the stclass in pregfree
* when the corresponding reg_ac_data struct is freed.
*/
- ret->regstclass= r->regstclass;
+ reti->regstclass= ri->regstclass;
+ /* Fall through */
+ case 't':
+ OP_REFCNT_LOCK;
+ ((reg_trie_data*)ri->data->data[i])->refcount++;
+ OP_REFCNT_UNLOCK;
+ /* Fall through */
+ case 'n':
+ d->data[i] = ri->data->data[i];
break;
default:
- Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]);
+ Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", ri->data->what[i]);
}
}
- ret->data = d;
+ reti->data = d;
}
else
- ret->data = NULL;
-
- Newx(ret->offsets, 2*len+1, U32);
- Copy(r->offsets, ret->offsets, 2*len+1, U32);
+ reti->data = NULL;
- ret->precomp = SAVEPVN(r->precomp, r->prelen);
- ret->refcnt = r->refcnt;
- ret->minlen = r->minlen;
- ret->minlenret = r->minlenret;
- ret->prelen = r->prelen;
- ret->nparens = r->nparens;
- ret->lastparen = r->lastparen;
- ret->lastcloseparen = r->lastcloseparen;
- ret->intflags = r->intflags;
- ret->extflags = r->extflags;
-
- ret->sublen = r->sublen;
-
- ret->engine = r->engine;
+ Newx(reti->offsets, 2*len+1, U32);
+ Copy(ri->offsets, reti->offsets, 2*len+1, U32);
- ret->paren_names = hv_dup_inc(r->paren_names, param);
-
- if (RX_MATCH_COPIED(ret))
- ret->subbeg = SAVEPVN(r->subbeg, r->sublen);
- else
- ret->subbeg = NULL;
-#ifdef PERL_OLD_COPY_ON_WRITE
- ret->saved_copy = NULL;
-#endif
-
- ptr_table_store(PL_ptr_table, r, ret);
- return ret;
+ return (void*)reti;
}
-#endif
+
+#endif /* USE_ITHREADS */
/*
reg_stringify()
resulting string
If flags is nonnull and the returned string contains UTF8 then
- (flags & 1) will be true.
+ (*flags & 1) will be true.
If haseval is nonnull then it is used to return whether the pattern
contains evals.
Normally called via macro:
- CALLREG_STRINGIFY(mg,0,0);
+ CALLREG_STRINGIFY(mg,&len,&utf8);
And internally with
- CALLREG_AS_STR(mg,lp,flags,haseval)
+ CALLREG_AS_STR(mg,&lp,&flags,&haseval)
See sv_2pv_flags() in sv.c for an example of internal usage.
*/
-
+#ifndef PERL_IN_XSUB_RE
char *
Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
dVAR;
mg->mg_ptr[mg->mg_len] = 0;
}
if (haseval)
- *haseval = re->program[0].next_off;
+ *haseval = re->seen_evals;
if (flags)
*flags = ((re->extflags & RXf_UTF8) ? 1 : 0);
return mg->mg_ptr;
}
-
-#ifndef PERL_IN_XSUB_RE
/*
- regnext - dig the "next" pointer out of a node
*/
dVAR;
register I32 offset;
- if (p == &PL_regdummy)
+ if (!p)
return(NULL);
offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
#define CLEAR_OPTSTART \
if (optstart) STMT_START { \
- DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%d nodes)\n", node - optstart)); \
+ DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, " (%"IVdf" nodes)\n", (IV)(node - optstart))); \
optstart=NULL; \
} STMT_END
register U8 op = PSEUDO; /* Arbitrary non-END op. */
register const regnode *next;
const regnode *optstart= NULL;
+ RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
#ifdef DEBUG_DUMPUNTIL
const char op = OP(node);
const I32 n = ARG(node);
const reg_ac_data * const ac = op>=AHOCORASICK ?
- (reg_ac_data *)r->data->data[n] :
+ (reg_ac_data *)ri->data->data[n] :
NULL;
- const reg_trie_data * const trie = op<AHOCORASICK ?
- (reg_trie_data*)r->data->data[n] :
- ac->trie;
+ const reg_trie_data * const trie =
+ (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
+#ifdef DEBUGGING
+ AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET];
+#endif
const regnode *nextbranch= NULL;
I32 word_idx;
sv_setpvn(sv, "", 0);
for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
- SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
+ SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
PerlIO_printf(Perl_debug_log, "%*s%s ",
(int)(2*(indent+3)), "",
);
if (trie->jump) {
U16 dist= trie->jump[word_idx+1];
- PerlIO_printf(Perl_debug_log, "(%u)\n",
- (dist ? this_trie + dist : next) - start);
+ PerlIO_printf(Perl_debug_log, "(%"UVuf")\n",
+ (UV)((dist ? this_trie + dist : next) - start));
if (dist) {
if (!nextbranch)
nextbranch= this_trie + trie->jump[0];
}
CLEAR_OPTSTART;
#ifdef DEBUG_DUMPUNTIL
- PerlIO_printf(Perl_debug_log, "--- %d\n",indent);
+ PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent);
#endif
return node;
}