X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5bd2d46ea3f06ba4e06c713635d5f83a331c4af0..1cb6cce332ab216d091c7d3cbabf621acd293a71:/regcomp.c diff --git a/regcomp.c b/regcomp.c index f8f4b91..5dff17c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -168,7 +168,7 @@ struct RExC_state_t { I32 seen_zerolen; regnode **open_parens; /* pointers to open parens */ regnode **close_parens; /* pointers to close parens */ - regnode *end_op; /* END node in program */ + regnode *end_op; /* END node in program */ I32 utf8; /* whether the pattern is utf8 or not */ I32 orig_utf8; /* whether the pattern was originally in utf8 */ /* XXX use this for future optimisation of case @@ -179,7 +179,7 @@ struct RExC_state_t { HV *paren_names; /* Paren names */ regnode **recurse; /* Recurse regops */ - I32 recurse_count; /* Number of recurse regops */ + I32 recurse_count; /* Number of recurse regops we have generated */ U8 *study_chunk_recursed; /* bitmap of which subs we have moved through */ U32 study_chunk_recursed_bytes; /* bytes in bitmap */ @@ -898,52 +898,78 @@ static const scan_data_t zero_scan_data = #define EXPERIMENTAL_INPLACESCAN #endif /*PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS*/ -#define DEBUG_RExC_seen() \ +#ifdef DEBUGGING +int +Perl_re_printf(pTHX_ const char *fmt, ...) +{ + va_list ap; + int result; + PerlIO *f= Perl_debug_log; + PERL_ARGS_ASSERT_RE_PRINTF; + va_start(ap, fmt); + result = PerlIO_vprintf(f, fmt, ap); + va_end(ap); + return result; +} + +int +Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...) +{ + va_list ap; + int result; + PerlIO *f= Perl_debug_log; + PERL_ARGS_ASSERT_RE_INDENTF; + va_start(ap, depth); + PerlIO_printf(f, "%*s", ( depth % 20 ) * 2, ""); + result = PerlIO_vprintf(f, fmt, ap); + va_end(ap); + return result; +} +#endif /* DEBUGGING */ + +#define DEBUG_RExC_seen() \ DEBUG_OPTIMISE_MORE_r({ \ - PerlIO_printf(Perl_debug_log,"RExC_seen: "); \ + Perl_re_printf( aTHX_ "RExC_seen: "); \ \ if (RExC_seen & REG_ZERO_LEN_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_ZERO_LEN_SEEN "); \ + Perl_re_printf( aTHX_ "REG_ZERO_LEN_SEEN "); \ \ if (RExC_seen & REG_LOOKBEHIND_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_LOOKBEHIND_SEEN "); \ + Perl_re_printf( aTHX_ "REG_LOOKBEHIND_SEEN "); \ \ if (RExC_seen & REG_GPOS_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_GPOS_SEEN "); \ + Perl_re_printf( aTHX_ "REG_GPOS_SEEN "); \ \ if (RExC_seen & REG_RECURSE_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_RECURSE_SEEN "); \ + Perl_re_printf( aTHX_ "REG_RECURSE_SEEN "); \ \ - if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_TOP_LEVEL_BRANCHES_SEEN "); \ + if (RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN) \ + Perl_re_printf( aTHX_ "REG_TOP_LEVEL_BRANCHES_SEEN "); \ \ if (RExC_seen & REG_VERBARG_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_VERBARG_SEEN "); \ + Perl_re_printf( aTHX_ "REG_VERBARG_SEEN "); \ \ if (RExC_seen & REG_CUTGROUP_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_CUTGROUP_SEEN "); \ + Perl_re_printf( aTHX_ "REG_CUTGROUP_SEEN "); \ \ if (RExC_seen & REG_RUN_ON_COMMENT_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_RUN_ON_COMMENT_SEEN "); \ + Perl_re_printf( aTHX_ "REG_RUN_ON_COMMENT_SEEN "); \ \ if (RExC_seen & REG_UNFOLDED_MULTI_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_UNFOLDED_MULTI_SEEN "); \ - \ - if (RExC_seen & REG_GOSTART_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_GOSTART_SEEN "); \ + Perl_re_printf( aTHX_ "REG_UNFOLDED_MULTI_SEEN "); \ \ - if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_UNBOUNDED_QUANTIFIER_SEEN "); \ + if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) \ + Perl_re_printf( aTHX_ "REG_UNBOUNDED_QUANTIFIER_SEEN "); \ \ - PerlIO_printf(Perl_debug_log,"\n"); \ + Perl_re_printf( aTHX_ "\n"); \ }); #define DEBUG_SHOW_STUDY_FLAG(flags,flag) \ - if ((flags) & flag) PerlIO_printf(Perl_debug_log, "%s ", #flag) + if ((flags) & flag) Perl_re_printf( aTHX_ "%s ", #flag) #define DEBUG_SHOW_STUDY_FLAGS(flags,open_str,close_str) \ if ( ( flags ) ) { \ - PerlIO_printf(Perl_debug_log, "%s", open_str); \ + Perl_re_printf( aTHX_ "%s", open_str); \ DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_SEOL); \ DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_MEOL); \ DEBUG_SHOW_STUDY_FLAG(flags,SF_IS_INF); \ @@ -959,29 +985,28 @@ static const scan_data_t zero_scan_data = DEBUG_SHOW_STUDY_FLAG(flags,SCF_SEEN_ACCEPT); \ DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_DOING_RESTUDY); \ DEBUG_SHOW_STUDY_FLAG(flags,SCF_IN_DEFINE); \ - PerlIO_printf(Perl_debug_log, "%s", close_str); \ + Perl_re_printf( aTHX_ "%s", close_str); \ } #define DEBUG_STUDYDATA(str,data,depth) \ DEBUG_OPTIMISE_MORE_r(if(data){ \ - PerlIO_printf(Perl_debug_log, \ - "%*s" str "Pos:%"IVdf"/%"IVdf \ + Perl_re_indentf( aTHX_ "" str "Pos:%"IVdf"/%"IVdf \ " Flags: 0x%"UVXf, \ - (int)(depth)*2, "", \ + depth, \ (IV)((data)->pos_min), \ (IV)((data)->pos_delta), \ (UV)((data)->flags) \ ); \ DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]"); \ - PerlIO_printf(Perl_debug_log, \ + Perl_re_printf( aTHX_ \ " Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \ (IV)((data)->whilem_c), \ (IV)((data)->last_closep ? *((data)->last_closep) : -1), \ is_inf ? "INF " : "" \ ); \ if ((data)->last_found) \ - PerlIO_printf(Perl_debug_log, \ + Perl_re_printf( aTHX_ \ "Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \ " %sFloat: '%s' @ %"IVdf"/%"IVdf"", \ SvPVX_const((data)->last_found), \ @@ -998,9 +1023,10 @@ DEBUG_OPTIMISE_MORE_r(if(data){ \ (IV)((data)->offset_float_min), \ (IV)((data)->offset_float_max) \ ); \ - PerlIO_printf(Perl_debug_log,"\n"); \ + Perl_re_printf( aTHX_ "\n"); \ }); + /* ========================================================= * BEGIN edit_distance stuff. * @@ -1951,14 +1977,13 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, PERL_ARGS_ASSERT_DUMP_TRIE; - PerlIO_printf( Perl_debug_log, "%*sChar : %-6s%-6s%-4s ", - (int)depth * 2 + 2,"", - "Match","Base","Ofs" ); + Perl_re_indentf( aTHX_ "Char : %-6s%-6s%-4s ", + depth+1, "Match","Base","Ofs" ); for( state = 0 ; state < trie->uniquecharcount ; state++ ) { SV ** const tmp = av_fetch( revcharmap, state, 0); if ( tmp ) { - PerlIO_printf( Perl_debug_log, "%*s", + Perl_re_printf( aTHX_ "%*s", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, PL_colors[0], PL_colors[1], @@ -1968,27 +1993,25 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, ); } } - PerlIO_printf( Perl_debug_log, "\n%*sState|-----------------------", - (int)depth * 2 + 2,""); + Perl_re_printf( aTHX_ "\n"); + Perl_re_indentf( aTHX_ "State|-----------------------", depth+1); for( state = 0 ; state < trie->uniquecharcount ; state++ ) - PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------"); - PerlIO_printf( Perl_debug_log, "\n"); + Perl_re_printf( aTHX_ "%.*s", colwidth, "--------"); + Perl_re_printf( aTHX_ "\n"); for( state = 1 ; state < trie->statecount ; state++ ) { const U32 base = trie->states[ state ].trans.base; - PerlIO_printf( Perl_debug_log, "%*s#%4"UVXf"|", - (int)depth * 2 + 2,"", (UV)state); + Perl_re_indentf( aTHX_ "#%4"UVXf"|", depth+1, (UV)state); if ( trie->states[ state ].wordnum ) { - PerlIO_printf( Perl_debug_log, " W%4X", - trie->states[ state ].wordnum ); + Perl_re_printf( aTHX_ " W%4X", trie->states[ state ].wordnum ); } else { - PerlIO_printf( Perl_debug_log, "%6s", "" ); + Perl_re_printf( aTHX_ "%6s", "" ); } - PerlIO_printf( Perl_debug_log, " @%4"UVXf" ", (UV)base ); + Perl_re_printf( aTHX_ " @%4"UVXf" ", (UV)base ); if ( base ) { U32 ofs = 0; @@ -1999,7 +2022,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, != state)) ofs++; - PerlIO_printf( Perl_debug_log, "+%2"UVXf"[ ", (UV)ofs); + Perl_re_printf( aTHX_ "+%2"UVXf"[ ", (UV)ofs); for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) { if ( ( base + ofs >= trie->uniquecharcount ) @@ -2008,28 +2031,27 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, && trie->trans[ base + ofs - trie->uniquecharcount ].check == state ) { - PerlIO_printf( Perl_debug_log, "%*"UVXf, - colwidth, - (UV)trie->trans[ base + ofs - - trie->uniquecharcount ].next ); + Perl_re_printf( aTHX_ "%*"UVXf, colwidth, + (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next + ); } else { - PerlIO_printf( Perl_debug_log, "%*s",colwidth," ." ); + Perl_re_printf( aTHX_ "%*s",colwidth," ." ); } } - PerlIO_printf( Perl_debug_log, "]"); + Perl_re_printf( aTHX_ "]"); } - PerlIO_printf( Perl_debug_log, "\n" ); + Perl_re_printf( aTHX_ "\n" ); } - PerlIO_printf(Perl_debug_log, "%*sword_info N:(prev,len)=", - (int)depth*2, ""); + Perl_re_indentf( aTHX_ "word_info N:(prev,len)=", + depth); for (word=1; word <= trie->wordcount; word++) { - PerlIO_printf(Perl_debug_log, " %d:(%d,%d)", + Perl_re_printf( aTHX_ " %d:(%d,%d)", (int)word, (int)(trie->wordinfo[word].prev), (int)(trie->wordinfo[word].len)); } - PerlIO_printf(Perl_debug_log, "\n" ); + Perl_re_printf( aTHX_ "\n" ); } /* Dumps a fully constructed but uncompressed trie in list form. @@ -2050,19 +2072,20 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, PERL_ARGS_ASSERT_DUMP_TRIE_INTERIM_LIST; /* print out the table precompression. */ - PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s", - (int)depth * 2 + 2,"", (int)depth * 2 + 2,"", - "------:-----+-----------------\n" ); + Perl_re_indentf( aTHX_ "State :Word | Transition Data\n", + depth+1 ); + Perl_re_indentf( aTHX_ "%s", + depth+1, "------:-----+-----------------\n" ); for( state=1 ; state < next_alloc ; state ++ ) { U16 charid; - PerlIO_printf( Perl_debug_log, "%*s %4"UVXf" :", - (int)depth * 2 + 2,"", (UV)state ); + Perl_re_indentf( aTHX_ " %4"UVXf" :", + depth+1, (UV)state ); if ( ! trie->states[ state ].wordnum ) { - PerlIO_printf( Perl_debug_log, "%5s| ",""); + Perl_re_printf( aTHX_ "%5s| ",""); } else { - PerlIO_printf( Perl_debug_log, "W%4x| ", + Perl_re_printf( aTHX_ "W%4x| ", trie->states[ state ].wordnum ); } @@ -2070,7 +2093,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0); if ( tmp ) { - PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ", + Perl_re_printf( aTHX_ "%*s:%3X=%4"UVXf" | ", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, @@ -2082,11 +2105,11 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, (UV)TRIE_LIST_ITEM(state,charid).newstate ); if (!(charid % 10)) - PerlIO_printf(Perl_debug_log, "\n%*s| ", + Perl_re_printf( aTHX_ "\n%*s| ", (int)((depth * 2) + 14), ""); } } - PerlIO_printf( Perl_debug_log, "\n"); + Perl_re_printf( aTHX_ "\n"); } } @@ -2114,12 +2137,12 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, that they are identical. */ - PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" ); + Perl_re_indentf( aTHX_ "Char : ", depth+1 ); for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) { SV ** const tmp = av_fetch( revcharmap, charid, 0); if ( tmp ) { - PerlIO_printf( Perl_debug_log, "%*s", + Perl_re_printf( aTHX_ "%*s", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, PL_colors[0], PL_colors[1], @@ -2130,32 +2153,32 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, } } - PerlIO_printf( Perl_debug_log, "\n%*sState+-",(int)depth * 2 + 2,"" ); + Perl_re_printf( aTHX_ "\n%*sState+-",depth+1 ); for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) { - PerlIO_printf( Perl_debug_log, "%.*s", colwidth,"--------"); + Perl_re_printf( aTHX_ "%.*s", colwidth,"--------"); } - PerlIO_printf( Perl_debug_log, "\n" ); + Perl_re_printf( aTHX_ "\n" ); for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) { - PerlIO_printf( Perl_debug_log, "%*s%4"UVXf" : ", - (int)depth * 2 + 2,"", + Perl_re_indentf( aTHX_ "%4"UVXf" : ", + depth+1, (UV)TRIE_NODENUM( state ) ); for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) { UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next ); if (v) - PerlIO_printf( Perl_debug_log, "%*"UVXf, colwidth, v ); + Perl_re_printf( aTHX_ "%*"UVXf, colwidth, v ); else - PerlIO_printf( Perl_debug_log, "%*s", colwidth, "." ); + Perl_re_printf( aTHX_ "%*s", colwidth, "." ); } if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) { - PerlIO_printf( Perl_debug_log, " (%4"UVXf")\n", + Perl_re_printf( aTHX_ " (%4"UVXf")\n", (UV)trie->trans[ state ].check ); } else { - PerlIO_printf( Perl_debug_log, " (%4"UVXf") W%4X\n", + Perl_re_printf( aTHX_ " (%4"UVXf") W%4X\n", (UV)trie->trans[ state ].check, trie->states[ TRIE_NODENUM( state ) ].wordnum ); } @@ -2467,9 +2490,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT); } DEBUG_TRIE_COMPILE_r({ - PerlIO_printf( Perl_debug_log, - "%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n", - (int)depth * 2 + 2, "", + Perl_re_indentf( aTHX_ + "make_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n", + depth+1, REG_NODE_NUM(startbranch),REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth); }); @@ -2508,8 +2531,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, for ( cur = first ; cur < last ; cur = regnext( cur ) ) { regnode *noper = NEXTOPER( cur ); - const U8 *uc = (U8*)STRING( noper ); - const U8 *e = uc + STR_LEN( noper ); + const U8 *uc; + const U8 *e; int foldlen = 0; U32 wordlen = 0; /* required init */ STRLEN minchars = 0; @@ -2519,17 +2542,19 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if (OP(noper) == NOTHING) { regnode *noper_next= regnext(noper); - if (noper_next != tail && OP(noper_next) == flags) { - noper = noper_next; - uc= (U8*)STRING(noper); - e= uc + STR_LEN(noper); - trie->minlen= STR_LEN(noper); - } else { - trie->minlen= 0; - continue; - } + if (noper_next < tail) + noper= noper_next; + } + + if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) { + uc= (U8*)STRING(noper); + e= uc + STR_LEN(noper); + } else { + trie->minlen= 0; + continue; } + if ( set_bit ) { /* bitmap only alloced when !(UTF&&Folding) */ TRIE_BITMAP_SET(trie,*uc); /* store the raw first byte regardless of encoding */ @@ -2672,9 +2697,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } } /* end first pass */ DEBUG_TRIE_COMPILE_r( - PerlIO_printf( Perl_debug_log, - "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n", - (int)depth * 2 + 2,"", + Perl_re_indentf( aTHX_ + "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n", + depth+1, ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, (int)trie->minlen, (int)trie->maxlen ) @@ -2722,9 +2747,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, STRLEN transcount = 1; - DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, - "%*sCompiling trie using list compiler\n", - (int)depth * 2 + 2, "")); + DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using list compiler\n", + depth+1)); trie->states = (reg_trie_state *) PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, @@ -2735,22 +2759,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, for ( cur = first ; cur < last ; cur = regnext( cur ) ) { regnode *noper = NEXTOPER( cur ); - U8 *uc = (U8*)STRING( noper ); - const U8 *e = uc + STR_LEN( noper ); U32 state = 1; /* required init */ U16 charid = 0; /* sanity init */ U32 wordlen = 0; /* required init */ if (OP(noper) == NOTHING) { regnode *noper_next= regnext(noper); - if (noper_next != tail && OP(noper_next) == flags) { - noper = noper_next; - uc= (U8*)STRING(noper); - e= uc + STR_LEN(noper); - } + if (noper_next < tail) + noper= noper_next; } - if (OP(noper) != NOTHING) { + if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) { + const U8 *uc= (U8*)STRING(noper); + const U8 *e= uc + STR_LEN(noper); + for ( ; uc < e ; uc += len ) { TRIE_READ_CHAR; @@ -2832,7 +2854,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, /* DEBUG_TRIE_COMPILE_MORE_r( - PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp) + Perl_re_printf( aTHX_ "tp: %d zp: %d ",tp,zp) ); */ @@ -2894,7 +2916,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } /* DEBUG_TRIE_COMPILE_MORE_r( - PerlIO_printf( Perl_debug_log, " base: %d\n",base); + Perl_re_printf( aTHX_ " base: %d\n",base); ); */ trie->states[ state ].trans.base=base; @@ -2937,9 +2959,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, we have to use TRIE_NODENUM() to convert. */ - DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, - "%*sCompiling trie using table compiler\n", - (int)depth * 2 + 2, "")); + DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using table compiler\n", + depth+1)); trie->trans = (reg_trie_trans *) PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 ) @@ -2954,8 +2975,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, for ( cur = first ; cur < last ; cur = regnext( cur ) ) { regnode *noper = NEXTOPER( cur ); - const U8 *uc = (U8*)STRING( noper ); - const U8 *e = uc + STR_LEN( noper ); U32 state = 1; /* required init */ @@ -2966,14 +2985,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if (OP(noper) == NOTHING) { regnode *noper_next= regnext(noper); - if (noper_next != tail && OP(noper_next) == flags) { - noper = noper_next; - uc= (U8*)STRING(noper); - e= uc + STR_LEN(noper); - } + if (noper_next < tail) + noper= noper_next; } - if ( OP(noper) != NOTHING ) { + if ( noper < tail && ( OP(noper) == flags || ( flags == EXACTFU && OP(noper) == EXACTFU_SS ) ) ) { + const U8 *uc= (U8*)STRING(noper); + const U8 *e= uc + STR_LEN(noper); + for ( ; uc < e ; uc += len ) { TRIE_READ_CHAR; @@ -3131,9 +3150,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, PerlMemShared_realloc( trie->states, laststate * sizeof(reg_trie_state) ); DEBUG_TRIE_COMPILE_MORE_r( - PerlIO_printf( Perl_debug_log, - "%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n", - (int)depth * 2 + 2,"", + Perl_re_indentf( aTHX_ "Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n", + depth+1, (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1 ), (IV)next_alloc, @@ -3144,9 +3162,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } /* end table compress */ } DEBUG_TRIE_COMPILE_MORE_r( - PerlIO_printf(Perl_debug_log, - "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n", - (int)depth * 2 + 2, "", + Perl_re_indentf( aTHX_ "Statecount:%"UVxf" Lasttrans:%"UVxf"\n", + depth+1, (UV)trie->statecount, (UV)trie->lasttrans) ); @@ -3196,9 +3213,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, }); } DEBUG_OPTIMISE_r( - PerlIO_printf(Perl_debug_log, - "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n", - (int)depth * 2 + 2, "", + Perl_re_indentf( aTHX_ "MJD offset:%"UVuf" MJD length:%"UVuf"\n", + depth+1, (UV)mjd_offset, (UV)mjd_nodelen) ); #endif @@ -3228,9 +3244,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if ( count == 2 ) { Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char); DEBUG_OPTIMISE_r( - PerlIO_printf(Perl_debug_log, - "%*sNew Start State=%"UVuf" Class: [", - (int)depth * 2 + 2, "", + Perl_re_indentf( aTHX_ "New Start State=%"UVuf" Class: [", + depth+1, (UV)state)); if (idx >= 0) { SV ** const tmp = av_fetch( revcharmap, idx, 0); @@ -3240,14 +3255,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if ( folder ) TRIE_BITMAP_SET(trie, folder[ *ch ]); DEBUG_OPTIMISE_r( - PerlIO_printf(Perl_debug_log, "%s", (char*)ch) + Perl_re_printf( aTHX_ "%s", (char*)ch) ); } } TRIE_BITMAP_SET(trie,*ch); if ( folder ) TRIE_BITMAP_SET(trie,folder[ *ch ]); - DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"%s", ch)); + DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch)); } idx = ofs; } @@ -3258,9 +3273,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, char *ch = SvPV( *tmp, len ); DEBUG_OPTIMISE_r({ SV *sv=sv_newmortal(); - PerlIO_printf( Perl_debug_log, - "%*sPrefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n", - (int)depth * 2 + 2, "", + Perl_re_indentf( aTHX_ "Prefix State: %"UVuf" Idx:%"UVuf" Char='%s'\n", + depth+1, (UV)state, (UV)idx, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6, PL_colors[0], PL_colors[1], @@ -3280,7 +3294,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } else { #ifdef DEBUGGING if (state>1) - DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"]\n")); + DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n")); #endif break; } @@ -3552,14 +3566,13 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour */ fail[ 0 ] = fail[ 1 ] = 0; DEBUG_TRIE_COMPILE_r({ - PerlIO_printf(Perl_debug_log, - "%*sStclass Failtable (%"UVuf" states): 0", - (int)(depth * 2), "", (UV)numstates + Perl_re_indentf( aTHX_ "Stclass Failtable (%"UVuf" states): 0", + depth, (UV)numstates ); for( q_read=1; q_read%3d: %s (%d)", \ - (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\ - Next ? (REG_NODE_NUM(Next)) : 0 ); \ +#define DEBUG_PEEP(str,scan,depth) \ + DEBUG_OPTIMISE_r({if (scan){ \ + regnode *Next = regnext(scan); \ + regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);\ + Perl_re_indentf( aTHX_ "" str ">%3d: %s (%d)", \ + depth, REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\ + Next ? (REG_NODE_NUM(Next)) : 0 );\ DEBUG_SHOW_STUDY_FLAGS(flags," [ ","]");\ - PerlIO_printf(Perl_debug_log, "\n"); \ + Perl_re_printf( aTHX_ "\n"); \ }}); /* The below joins as many adjacent EXACTish nodes as possible into a single @@ -4089,9 +4102,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, ); DEBUG_OPTIMISE_MORE_r( { - PerlIO_printf(Perl_debug_log, - "%*sstudy_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p", - (int)(depth*2), "", (long)stopparen, + Perl_re_indentf( aTHX_ "study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p", + depth, (long)stopparen, (unsigned long)RExC_study_chunk_recursed_count, (unsigned long)depth, (unsigned long)recursed_depth, scan, @@ -4110,16 +4122,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, (( j - 1 ) * RExC_study_chunk_recursed_bytes), i) ) ) { - PerlIO_printf(Perl_debug_log," %d",(int)i); + Perl_re_printf( aTHX_ " %d",(int)i); break; } } if ( j + 1 < recursed_depth ) { - PerlIO_printf(Perl_debug_log, ","); + Perl_re_printf( aTHX_ ","); } } } - PerlIO_printf(Perl_debug_log,"\n"); + Perl_re_printf( aTHX_ "\n"); } ); while ( scan && OP(scan) != END && scan < last ){ @@ -4395,9 +4407,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log, "%*s%s%s\n", - (int)depth * 2 + 2, "", - "Looking for TRIE'able sequences. Tail node is: ", + Perl_re_indentf( aTHX_ "%s %"UVuf":%s\n", + depth+1, + "Looking for TRIE'able sequences. Tail node is ", + (UV)(tail - RExC_emit_start), SvPV_nolen_const( RExC_mysv ) ); }); @@ -4480,25 +4493,26 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, U8 noper_trietype = TRIE_TYPE( noper_type ); #if defined(DEBUGGING) || defined(NOJUMPTRIE) regnode * const noper_next = regnext( noper ); - U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0; - U8 noper_next_trietype = (noper_next && noper_next != tail) ? TRIE_TYPE( noper_next_type ) :0; + U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0; + U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0; #endif DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log, "%*s- %s (%d)", - (int)depth * 2 + 2,"", SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) ); + Perl_re_indentf( aTHX_ "- %d:%s (%d)", + depth+1, + REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) ); regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log, " -> %s", - SvPV_nolen_const(RExC_mysv)); + Perl_re_printf( aTHX_ " -> %d:%s", + REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv)); if ( noper_next ) { regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log,"\t=> %s\t", - SvPV_nolen_const(RExC_mysv)); + Perl_re_printf( aTHX_ "\t=> %d:%s\t", + REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv)); } - PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,nt==%s,nnt==%s)\n", + Perl_re_printf( aTHX_ "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n", REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur), PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype] ); @@ -4509,12 +4523,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ( noper_trietype && ( - ( noper_trietype == NOTHING) + ( noper_trietype == NOTHING ) || ( trietype == NOTHING ) || ( trietype == noper_trietype ) ) #ifdef NOJUMPTRIE - && noper_next == tail + && noper_next >= tail #endif && count < U16_MAX) { @@ -4527,7 +4541,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ( noper_trietype == NOTHING ) { #if !defined(DEBUGGING) && !defined(NOJUMPTRIE) regnode * const noper_next = regnext( noper ); - U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0; + U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0; U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0; #endif @@ -4574,7 +4588,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } if ( noper_trietype #ifdef NOJUMPTRIE - && noper_next == tail + && noper_next >= tail #endif ){ /* noper is triable, so we can start a new @@ -4594,10 +4608,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } /* loop over branches */ DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log, - "%*s- %s (%d) \n", - (int)depth * 2 + 2, - "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur)); + Perl_re_indentf( aTHX_ "- %s (%d) ", + depth+1, SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur)); + Perl_re_printf( aTHX_ "(First==%d, Last==%d, Cur==%d, tt==%s)\n", + REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur), + PL_reg_name[trietype] + ); }); if ( last && trietype ) { @@ -4615,7 +4631,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, depth==0 ) { flags |= SCF_TRIE_RESTUDY; if ( startbranch == first - && scan == tail ) + && scan >= tail ) { RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN; } @@ -4634,9 +4650,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * turn it into a plain NOTHING op. */ DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state); - PerlIO_printf( Perl_debug_log, - "%*s- %s (%d) \n", (int)depth * 2 + 2, - "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur)); + Perl_re_indentf( aTHX_ "- %s (%d) \n", + depth+1, + SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur)); }); OP(startbranch)= NOTHING; @@ -4656,29 +4672,24 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else /* single branch is optimized. */ scan = NEXTOPER(scan); continue; - } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) { + } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) { I32 paren = 0; regnode *start = NULL; regnode *end = NULL; U32 my_recursed_depth= recursed_depth; - - if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */ + if (OP(scan) != SUSPEND) { /* GOSUB */ /* Do setup, note this code has side effects beyond * the rest of this block. Specifically setting * RExC_recurse[] must happen at least once during * study_chunk(). */ - if (OP(scan) == GOSUB) { - paren = ARG(scan); - RExC_recurse[ARG2L(scan)] = scan; - start = RExC_open_parens[paren-1]; - end = RExC_close_parens[paren-1]; - } else { - start = RExC_rxi->program + 1; - end = RExC_end_op; - } + paren = ARG(scan); + RExC_recurse[ARG2L(scan)] = scan; + start = RExC_open_parens[paren]; + end = RExC_close_parens[paren]; + /* NOTE we MUST always execute the above code, even - * if we do nothing with a GOSUB/GOSTART */ + * if we do nothing with a GOSUB */ if ( ( flags & SCF_IN_DEFINE ) || @@ -4727,11 +4738,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, RExC_study_chunk_recursed_bytes, U8); } /* we havent recursed into this paren yet, so recurse into it */ - DEBUG_STUDYDATA("set:", data,depth); + DEBUG_STUDYDATA("gosub-set:", data,depth); PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren); my_recursed_depth= recursed_depth + 1; } else { - DEBUG_STUDYDATA("inf:", data,depth); + DEBUG_STUDYDATA("gosub-inf:", data,depth); /* some form of infinite recursion, assume infinite length * */ if (flags & SCF_DO_SUBSTR) { @@ -5074,8 +5085,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (OP(nxt) != CLOSE) goto nogo; if (RExC_open_parens) { - RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/ - RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/ + RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/ + RExC_close_parens[ARG(nxt1)]=nxt+2; /*close->while*/ } /* Now we know that nxt2 is the only contents: */ oscan->flags = (U8)ARG(nxt); @@ -5121,8 +5132,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, oscan->flags = (U8)ARG(nxt); if (RExC_open_parens) { - RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/ - RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/ + RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/ + RExC_close_parens[ARG(nxt1)]=nxt2+1; /*close->NOTHING*/ } OP(nxt1) = OPTIMIZED; /* was OPEN. */ OP(nxt) = OPTIMIZED; /* was CLOSE. */ @@ -5229,13 +5240,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* It is counted once already... */ data->pos_min += minnext * (mincount - counted); #if 0 -PerlIO_printf(Perl_debug_log, "counted=%"UVuf" deltanext=%"UVuf +Perl_re_printf( aTHX_ "counted=%"UVuf" deltanext=%"UVuf " SSize_t_MAX=%"UVuf" minnext=%"UVuf " maxcount=%"UVuf" mincount=%"UVuf"\n", (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount, (UV)mincount); if (deltanext != SSize_t_MAX) -PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n", +Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", (UV)(-counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta)); #endif @@ -6049,7 +6060,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags) /* Dispatch a request to compile a regexp to correct regexp engine. */ DEBUG_COMPILE_r({ - PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n", + Perl_re_printf( aTHX_ "Using engine %"UVxf"\n", PTR2UV(eng)); }); return CALLREGCOMP_ENG(eng, pattern, flags); @@ -6094,7 +6105,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state, bool do_end = 0; GET_RE_DEBUG_FLAGS_DECL; - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + DEBUG_PARSE_r(Perl_re_printf( aTHX_ "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); Newx(dst, *plen_p * 2 + 1, U8); @@ -6462,7 +6473,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, *p++ = 'x'; *p++ = '\0'; DEBUG_COMPILE_r({ - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "%sre-parsing pattern for runtime code:%s %s\n", PL_colors[4],PL_colors[5],newpat); }); @@ -6797,7 +6808,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Assembling pattern from %d elements%s\n", pat_count, orig_rx_flags & RXf_SPLIT ? " for split" : "")); @@ -6826,7 +6837,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, *is_bare_re = TRUE; SvREFCNT_inc(re); Safefree(pRExC_state->code_blocks); - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); @@ -6869,7 +6880,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, DEBUG_COMPILE_r({ SV *dsv= sv_newmortal(); RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60); - PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n", + Perl_re_printf( aTHX_ "%sCompiling REx%s %s\n", PL_colors[4],PL_colors[5],s); }); @@ -6985,7 +6996,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, assert(*RExC_end == '\0'); DEBUG_PARSE_r( - PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"); + Perl_re_printf( aTHX_ "Starting first pass (sizing)\n"); RExC_lastnum=0; RExC_lastparse=NULL; ); @@ -7015,7 +7026,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pRExC_state->num_code_blocks); } else { - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo pass 1\n")); } @@ -7027,7 +7038,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ DEBUG_PARSE_r({ - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "Required size %"IVdf" nodes\n" "Starting second pass (creation)\n", (IV)RExC_size); @@ -7162,28 +7173,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, r->intflags = 0; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ - /* setup various meta data about recursion, this all requires - * RExC_npar to be correctly set, and a bit later on we clear it */ - if (RExC_seen & REG_RECURSE_SEEN) { - Newxz(RExC_open_parens, RExC_npar,regnode *); - SAVEFREEPV(RExC_open_parens); - Newxz(RExC_close_parens,RExC_npar,regnode *); - SAVEFREEPV(RExC_close_parens); - } - if (RExC_seen & (REG_RECURSE_SEEN | REG_GOSTART_SEEN)) { - /* Note, RExC_npar is 1 + the number of parens in a pattern. - * So its 1 if there are no parens. */ - RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) + - ((RExC_npar & 0x07) != 0); - Newx(RExC_study_chunk_recursed, - RExC_study_chunk_recursed_bytes * RExC_npar, U8); - SAVEFREEPV(RExC_study_chunk_recursed); - } - /* Useful during FAIL. */ #ifdef RE_TRACK_PATTERN_OFFSETS Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ - DEBUG_OFFSETS_r(PerlIO_printf(Perl_debug_log, + DEBUG_OFFSETS_r(Perl_re_printf( aTHX_ "%s %"UVuf" bytes for offset annotations.\n", ri->u.offsets ? "Got" : "Couldn't get", (UV)((2*RExC_size+1) * sizeof(U32)))); @@ -7199,17 +7192,51 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_parse = exp; RExC_end = exp + plen; RExC_naughty = 0; - RExC_npar = 1; RExC_emit_start = ri->program; RExC_emit = ri->program; RExC_emit_bound = ri->program + RExC_size + 1; pRExC_state->code_index = 0; *((char*) RExC_emit++) = (char) REG_MAGIC; + /* setup various meta data about recursion, this all requires + * RExC_npar to be correctly set, and a bit later on we clear it */ + if (RExC_seen & REG_RECURSE_SEEN) { + DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ + "%*s%*s Setting up open/close parens\n", + 22, "| |", (int)(0 * 2 + 1), "")); + + /* setup RExC_open_parens, which holds the address of each + * OPEN tag, and to make things simpler for the 0 index + * the start of the program - this is used later for offsets */ + Newxz(RExC_open_parens, RExC_npar,regnode *); + SAVEFREEPV(RExC_open_parens); + RExC_open_parens[0] = RExC_emit; + + /* setup RExC_close_parens, which holds the address of each + * CLOSE tag, and to make things simpler for the 0 index + * the end of the program - this is used later for offsets */ + Newxz(RExC_close_parens, RExC_npar,regnode *); + SAVEFREEPV(RExC_close_parens); + /* we dont know where end op starts yet, so we dont + * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */ + + /* Note, RExC_npar is 1 + the number of parens in a pattern. + * So its 1 if there are no parens. */ + RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) + + ((RExC_npar & 0x07) != 0); + Newx(RExC_study_chunk_recursed, + RExC_study_chunk_recursed_bytes * RExC_npar, U8); + SAVEFREEPV(RExC_study_chunk_recursed); + } + RExC_npar = 1; if (reg(pRExC_state, 0, &flags,1) == NULL) { ReREFCNT_dec(rx); Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags); } + DEBUG_OPTIMISE_r( + Perl_re_printf( aTHX_ "Starting post parse optimization\n"); + ); + /* XXXX To minimize changes to RE engine we always allocate 3-units-long substrs field. */ Newx(r->substrs, 1, struct reg_substr_data); @@ -7236,7 +7263,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, copyRExC_state = RExC_state; } else { U32 seen=RExC_seen; - DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log,"Restudying\n")); + DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "Restudying\n")); RExC_state = copyRExC_state; if (seen & REG_TOP_LEVEL_BRANCHES_SEEN) @@ -7370,12 +7397,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, #ifdef TRIE_STUDY_OPT DEBUG_PARSE_r( if (!restudied) - PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n", + Perl_re_printf( aTHX_ "first at %"IVdf"\n", (IV)(first - scan + 1)) ); #else DEBUG_PARSE_r( - PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n", + Perl_re_printf( aTHX_ "first at %"IVdf"\n", (IV)(first - scan + 1)) ); #endif @@ -7504,7 +7531,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "synthetic stclass \"%s\".\n", SvPVX_const(sv));}); data.start_class = NULL; @@ -7549,7 +7576,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, regnode_ssc ch_class; SSize_t last_close = 0; - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n")); + DEBUG_PARSE_r(Perl_re_printf( aTHX_ "\nMulti Top Level\n")); scan = ri->program + 1; ssc_init(pRExC_state, &ch_class); @@ -7584,7 +7611,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "synthetic stclass \"%s\".\n", SvPVX_const(sv));}); data.start_class = NULL; @@ -7602,13 +7629,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* Guard against an embedded (?=) or (?<=) with a longer minlen than the "real" pattern. */ DEBUG_OPTIMISE_r({ - PerlIO_printf(Perl_debug_log,"minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n", + Perl_re_printf( aTHX_ "minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n", (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen); }); r->minlenret = minlen; if (r->minlen < minlen) r->minlen = minlen; + if (RExC_seen & REG_RECURSE_SEEN ) { + r->intflags |= PREGf_RECURSE_SEEN; + Newxz(r->recurse_locinput, r->nparens + 1, char *); + } if (RExC_seen & REG_GPOS_SEEN) r->intflags |= PREGf_GPOS_SEEN; if (RExC_seen & REG_LOOKBEHIND_SEEN) @@ -7682,23 +7713,22 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, = (void*)SvREFCNT_inc(RExC_paren_name_list); } else #endif - ri->name_list_idx = 0; + ri->name_list_idx = 0; - if (RExC_recurse_count) { - for ( ; RExC_recurse_count ; RExC_recurse_count-- ) { - const regnode *scan = RExC_recurse[RExC_recurse_count-1]; - ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan ); - } + while ( RExC_recurse_count > 0 ) { + const regnode *scan = RExC_recurse[ --RExC_recurse_count ]; + ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan ); } + Newxz(r->offs, RExC_npar, regexp_paren_pair); /* assume we don't need to swap parens around before we match */ DEBUG_TEST_r({ - PerlIO_printf(Perl_debug_log,"study_chunk_recursed_count: %lu\n", + Perl_re_printf( aTHX_ "study_chunk_recursed_count: %lu\n", (unsigned long)RExC_study_chunk_recursed_count); }); DEBUG_DUMP_r({ DEBUG_RExC_seen(); - PerlIO_printf(Perl_debug_log,"Final program:\n"); + Perl_re_printf( aTHX_ "Final program:\n"); regdump(r); }); #ifdef RE_TRACK_PATTERN_OFFSETS @@ -7706,14 +7736,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, const STRLEN len = ri->u.offsets[0]; STRLEN i; GET_RE_DEBUG_FLAGS_DECL; - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]); for (i = 1; i <= len; i++) { if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2]) - PerlIO_printf(Perl_debug_log, "%"UVuf":%"UVuf"[%"UVuf"] ", + Perl_re_printf( aTHX_ "%"UVuf":%"UVuf"[%"UVuf"] ", (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]); } - PerlIO_printf(Perl_debug_log, "\n"); + Perl_re_printf( aTHX_ "\n"); }); #endif @@ -8231,7 +8261,7 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) #define DEBUG_PARSE_MSG(funcname) DEBUG_PARSE_r({ \ int num; \ if (RExC_lastparse!=RExC_parse) { \ - PerlIO_printf(Perl_debug_log, "%s", \ + Perl_re_printf( aTHX_ "%s", \ Perl_pv_pretty(aTHX_ RExC_mysv1, RExC_parse, \ RExC_end - RExC_parse, 16, \ "", "", \ @@ -8243,17 +8273,17 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) ) \ ); \ } else \ - PerlIO_printf(Perl_debug_log,"%16s",""); \ + Perl_re_printf( aTHX_ "%16s",""); \ \ if (SIZE_ONLY) \ num = RExC_size + 1; \ else \ num=REG_NODE_NUM(RExC_emit); \ if (RExC_lastnum!=num) \ - PerlIO_printf(Perl_debug_log,"|%4d",num); \ + Perl_re_printf( aTHX_ "|%4d",num); \ else \ - PerlIO_printf(Perl_debug_log,"|%4s",""); \ - PerlIO_printf(Perl_debug_log,"|%*s%-4s", \ + Perl_re_printf( aTHX_ "|%4s",""); \ + Perl_re_printf( aTHX_ "|%*s%-4s", \ (int)((depth*2)), "", \ (funcname) \ ); \ @@ -8265,11 +8295,11 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) #define DEBUG_PARSE(funcname) DEBUG_PARSE_r({ \ DEBUG_PARSE_MSG((funcname)); \ - PerlIO_printf(Perl_debug_log,"%4s","\n"); \ + Perl_re_printf( aTHX_ "%4s","\n"); \ }) -#define DEBUG_PARSE_FMT(funcname,fmt,args) DEBUG_PARSE_r({ \ +#define DEBUG_PARSE_FMT(funcname,fmt,args) DEBUG_PARSE_r({\ DEBUG_PARSE_MSG((funcname)); \ - PerlIO_printf(Perl_debug_log,fmt "\n",args); \ + Perl_re_printf( aTHX_ fmt "\n",args); \ }) /* This section of code defines the inversion list object and its methods. The @@ -8903,7 +8933,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, SV* u; /* the resulting union */ UV* array_u; - UV len_u; + UV len_u = 0; UV i_a = 0; /* current index into a's array */ UV i_b = 0; @@ -9215,7 +9245,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, SV* r; /* the resulting intersection */ UV* array_r; - UV len_r; + UV len_r = 0; UV i_a = 0; /* current index into a's array */ UV i_b = 0; @@ -9970,7 +10000,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node) { AV* list = (AV*) *listp; IV k; - for (k = 0; k <= av_tindex(list); k++) { + for (k = 0; k <= av_tindex_nomg(list); k++) { SV** c_p = av_fetch(list, k, FALSE); UV c; assert(c_p); @@ -10605,13 +10635,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) break; case '0' : /* (?0) */ case 'R' : /* (?R) */ - if (*RExC_parse != ')') + if (RExC_parse == RExC_end || *RExC_parse != ')') FAIL("Sequence (?R) not terminated"); - ret = reg_node(pRExC_state, GOSTART); - RExC_seen |= REG_GOSTART_SEEN; + num = 0; + RExC_seen |= REG_RECURSE_SEEN; *flagp |= POSTPONED; - nextchar(pRExC_state); - return ret; + goto gen_recurse_regop; /*notreached*/ /* named and numeric backreferences */ case '&': /* (?&NAME) */ @@ -10687,6 +10716,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } else if ( paren == '+' ) { num = RExC_npar + num - 1; } + /* We keep track how many GOSUB items we have produced. + To start off the ARG2L() of the GOSUB holds its "id", + which is used later in conjunction with RExC_recurse + to calculate the offset we need to jump for the GOSUB, + which it will store in the final representation. + We have to defer the actual calculation until much later + as the regop may move. + */ ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count); if (!SIZE_ONLY) { @@ -10695,16 +10732,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) vFAIL("Reference to nonexistent group"); } RExC_recurse_count++; - DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, + DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ "%*s%*s Recurse #%"UVuf" to %"IVdf"\n", 22, "| |", (int)(depth * 2 + 1), "", (UV)ARG(ret), (IV)ARG2L(ret))); } RExC_seen |= REG_RECURSE_SEEN; + Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */ Set_Node_Offset(ret, parse_start); /* MJD */ *flagp |= POSTPONED; + assert(*RExC_parse == ')'); nextchar(pRExC_state); return ret; @@ -10848,7 +10887,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) else if (RExC_parse[0] == 'R') { RExC_parse++; /* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval" - * parno == 1 => /(?(R0)YES|NO)/ "in GOSTART (?0) / (?R)" + * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)" * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)" */ parno = 0; @@ -10881,7 +10920,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) * will return something, and when SIZE_ONLY is * true, reg_scan_name() just parses the string, * and doesnt return anything. (in theory) */ - assert(SIZE_ONLY ? !sv_dat : sv_dat); + assert(SIZE_ONLY ? !sv_dat : !!sv_dat); if (sv_dat) parno = 1 + *((I32 *)SvPVX(sv_dat)); @@ -11004,14 +11043,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (!SIZE_ONLY ){ if (!RExC_nestroot) RExC_nestroot = parno; - if (RExC_seen & REG_RECURSE_SEEN - && !RExC_open_parens[parno-1]) + if (RExC_open_parens && !RExC_open_parens[parno]) { - DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, + DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ "%*s%*s Setting open paren #%"IVdf" to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ret))); - RExC_open_parens[parno-1]= ret; + RExC_open_parens[parno]= ret; } } Set_Node_Length(ret, 1); /* MJD */ @@ -11100,11 +11138,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) break; case 1: case 2: ender = reganode(pRExC_state, CLOSE, parno); - if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) { - DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, + if ( RExC_close_parens ) { + DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ "%*s%*s Setting close paren #%"IVdf" to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender))); - RExC_close_parens[parno-1]= ender; + RExC_close_parens[parno]= ender; if (RExC_nestroot == parno) RExC_nestroot = 0; } @@ -11125,6 +11163,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (!SIZE_ONLY) { assert(!RExC_end_op); /* there can only be one! */ RExC_end_op = ender; + if (RExC_close_parens) { + DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ + "%*s%*s Setting close paren #0 (END) to %d\n", + 22, "| |", (int)(depth * 2 + 1), "", REG_NODE_NUM(ender))); + + RExC_close_parens[0]= ender; + } } break; } @@ -11132,7 +11177,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) DEBUG_PARSE_MSG("lsbr"); regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state); regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", + Perl_re_printf( aTHX_ "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", SvPV_nolen_const(RExC_mysv1), (IV)REG_NODE_NUM(lastbr), SvPV_nolen_const(RExC_mysv2), @@ -11171,7 +11216,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) DEBUG_PARSE_MSG("NADA"); regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state); regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", + Perl_re_printf( aTHX_ "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", SvPV_nolen_const(RExC_mysv1), (IV)REG_NODE_NUM(ret), SvPV_nolen_const(RExC_mysv2), @@ -14610,7 +14655,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, no_close: /* We output the messages even if warnings are off, because we'll fail * the very next thing, and these give a likely diagnosis for that */ - if (posix_warnings && av_tindex(posix_warnings) >= 0) { + if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) { output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL); } @@ -14719,7 +14764,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, redo_curchar: - top_index = av_tindex(stack); + top_index = av_tindex_nomg(stack); switch (curchar) { SV** stacked_ptr; /* Ptr to something already on 'stack' */ @@ -14897,7 +14942,7 @@ redo_curchar: goto done; case ')': - if (av_tindex(fence_stack) < 0) { + if (av_tindex_nomg(fence_stack) < 0) { RExC_parse++; vFAIL("Unexpected ')'"); } @@ -15089,10 +15134,25 @@ redo_curchar: handle_operand: /* Here 'current' is the operand. If something is already on the - * stack, we have to check if it is a !. */ - top_index = av_tindex(stack); /* Code above may have altered the - * stack in the time since we - * earlier set 'top_index'. */ + * stack, we have to check if it is a !. But first, the code above + * may have altered the stack in the time since we earlier set + * 'top_index'. */ + + { + /* Work round an optimiser bug in Solaris Studio 12.3: + * for some reason, the presence of the __assert() in + * av_tindex_nomg() causes the value of fence to get + * corrupted, even though the assert is never called. So + * save the value then restore afterwards. + * Note that in fact merely accessing the value of fence + * prior to the statement containing the assert is enough + * to make the bug go away. + */ + IV f = fence; + top_index = av_tindex_nomg(stack); + fence = f; + } + if (top_index - fence >= 0) { /* If the top entry on the stack is an operator, it had better * be a '!', otherwise the entry below the top operand should @@ -15114,7 +15174,6 @@ redo_curchar: only_to_avoid_leaks = av_pop(stack); SvREFCNT_dec(only_to_avoid_leaks); - top_index = av_tindex(stack); /* And we redo with the inverted operand. This allows * handling multiple ! in a row */ @@ -15144,15 +15203,15 @@ redo_curchar: } /* End of loop parsing through the construct */ done: - if (av_tindex(fence_stack) >= 0) { + if (av_tindex_nomg(fence_stack) >= 0) { vFAIL("Unmatched ("); } - if (av_tindex(stack) < 0 /* Was empty */ + if (av_tindex_nomg(stack) < 0 /* Was empty */ || ((final = av_pop(stack)) == NULL) || ! IS_OPERAND(final) || SvTYPE(final) != SVt_INVLIST - || av_tindex(stack) >= 0) /* More left on stack */ + || av_tindex_nomg(stack) >= 0) /* More left on stack */ { bad_syntax: SvREFCNT_dec(final); @@ -15653,7 +15712,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, while (1) { if ( posix_warnings - && av_tindex(posix_warnings) >= 0 + && av_tindex_nomg(posix_warnings) >= 0 && RExC_parse > not_posix_region_end) { /* Warnings about posix class issues are considered tentative until @@ -15709,7 +15768,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * posix class, and it failed, it was a false alarm, as this * successful one proves */ if ( posix_warnings - && av_tindex(posix_warnings) >= 0 + && av_tindex_nomg(posix_warnings) >= 0 && not_posix_region_end >= RExC_parse && not_posix_region_end <= posix_class_end) { @@ -16655,7 +16714,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } /* End of loop through all the text within the brackets */ - if ( posix_warnings && av_tindex(posix_warnings) >= 0) { + if ( posix_warnings && av_tindex_nomg(posix_warnings) >= 0) { output_or_return_posix_warnings(pRExC_state, posix_warnings, return_posix_warnings); } @@ -16688,7 +16747,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, #endif /* Look at the longest folds first */ - for (cp_count = av_tindex(multi_char_matches); cp_count > 0; cp_count--) { + for (cp_count = av_tindex_nomg(multi_char_matches); + cp_count > 0; + cp_count--) + { if (av_exists(multi_char_matches, cp_count)) { AV** this_array_ptr; @@ -17069,7 +17131,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { AV* list = (AV*) *listp; IV k; - for (k = 0; k <= av_tindex(list); k++) { + for (k = 0; k <= av_tindex_nomg(list); k++) { SV** c_p = av_fetch(list, k, FALSE); UV c; assert(c_p); @@ -17749,7 +17811,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, si = *ary; /* ary[0] = the string to initialize the swash with */ - if (av_tindex(av) >= 2) { + if (av_tindex_nomg(av) >= 2) { if (only_utf8_locale_ptr && ary[2] && ary[2] != &PL_sv_undef) @@ -17765,7 +17827,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, * is any inversion list generated at compile time; [4] * indicates if that inversion list has any user-defined * properties in it. */ - if (av_tindex(av) >= 3) { + if (av_tindex_nomg(av) >= 3) { invlist = ary[3]; if (SvUV(ary[4])) { swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY; @@ -18182,6 +18244,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) if (RExC_open_parens) { int paren; /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/ + /* remember that RExC_npar is rex->nparens + 1, + * iow it is 1 more than the number of parens seen in + * the pattern so far. */ for ( paren=0 ; paren < RExC_npar ; paren++ ) { if ( RExC_open_parens[paren] >= opnd ) { /*DEBUG_PARSE_FMT("open"," - %d",size);*/ @@ -18197,6 +18262,8 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) } } } + if (RExC_end_op) + RExC_end_op += size; while (src > opnd) { StructCopy(--src, --dst, regnode); @@ -18269,7 +18336,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state, DEBUG_PARSE_r({ DEBUG_PARSE_MSG((scan==p ? "tail" : "")); regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n", + Perl_re_printf( aTHX_ "~ %s (%d) %s %s\n", SvPV_nolen_const(RExC_mysv), REG_NODE_NUM(scan), (temp == NULL ? "->" : ""), (temp == NULL ? PL_reg_name[OP(val)] : "") @@ -18359,7 +18426,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_r({ DEBUG_PARSE_MSG((scan==p ? "tsdy" : "")); regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n", + Perl_re_printf( aTHX_ "~ %s (%d) -> %s\n", SvPV_nolen_const(RExC_mysv), REG_NODE_NUM(scan), PL_reg_name[exact]); @@ -18371,7 +18438,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_r({ DEBUG_PARSE_MSG(""); regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "~ attach to %s (%"IVdf") offset to %"IVdf"\n", SvPV_nolen_const(RExC_mysv), (IV)REG_NODE_NUM(val), @@ -18405,15 +18472,15 @@ S_regdump_intflags(pTHX_ const char *lead, const U32 flags) for (bit=0; bitanchored_substr) { RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr), RE_SV_DUMPLEN(r->anchored_substr), 30); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "anchored %s%s at %"IVdf" ", s, RE_SV_TAIL(r->anchored_substr), (IV)r->anchored_offset); } else if (r->anchored_utf8) { RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8), RE_SV_DUMPLEN(r->anchored_utf8), 30); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "anchored utf8 %s%s at %"IVdf" ", s, RE_SV_TAIL(r->anchored_utf8), (IV)r->anchored_offset); @@ -18499,55 +18566,55 @@ Perl_regdump(pTHX_ const regexp *r) if (r->float_substr) { RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr), RE_SV_DUMPLEN(r->float_substr), 30); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "floating %s%s at %"IVdf"..%"UVuf" ", s, RE_SV_TAIL(r->float_substr), (IV)r->float_min_offset, (UV)r->float_max_offset); } else if (r->float_utf8) { RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8), RE_SV_DUMPLEN(r->float_utf8), 30); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "floating utf8 %s%s at %"IVdf"..%"UVuf" ", s, RE_SV_TAIL(r->float_utf8), (IV)r->float_min_offset, (UV)r->float_max_offset); } if (r->check_substr || r->check_utf8) - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ (const char *) (r->check_substr == r->float_substr && r->check_utf8 == r->float_utf8 ? "(checking floating" : "(checking anchored")); if (r->intflags & PREGf_NOSCAN) - PerlIO_printf(Perl_debug_log, " noscan"); + Perl_re_printf( aTHX_ " noscan"); if (r->extflags & RXf_CHECK_ALL) - PerlIO_printf(Perl_debug_log, " isall"); + Perl_re_printf( aTHX_ " isall"); if (r->check_substr || r->check_utf8) - PerlIO_printf(Perl_debug_log, ") "); + Perl_re_printf( aTHX_ ") "); if (ri->regstclass) { regprop(r, sv, ri->regstclass, NULL, NULL); - PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv)); + Perl_re_printf( aTHX_ "stclass %s ", SvPVX_const(sv)); } if (r->intflags & PREGf_ANCH) { - PerlIO_printf(Perl_debug_log, "anchored"); + Perl_re_printf( aTHX_ "anchored"); if (r->intflags & PREGf_ANCH_MBOL) - PerlIO_printf(Perl_debug_log, "(MBOL)"); + Perl_re_printf( aTHX_ "(MBOL)"); if (r->intflags & PREGf_ANCH_SBOL) - PerlIO_printf(Perl_debug_log, "(SBOL)"); + Perl_re_printf( aTHX_ "(SBOL)"); if (r->intflags & PREGf_ANCH_GPOS) - PerlIO_printf(Perl_debug_log, "(GPOS)"); - (void)PerlIO_putc(Perl_debug_log, ' '); + Perl_re_printf( aTHX_ "(GPOS)"); + Perl_re_printf( aTHX_ " "); } if (r->intflags & PREGf_GPOS_SEEN) - PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", (UV)r->gofs); + Perl_re_printf( aTHX_ "GPOS:%"UVuf" ", (UV)r->gofs); if (r->intflags & PREGf_SKIP) - PerlIO_printf(Perl_debug_log, "plus "); + Perl_re_printf( aTHX_ "plus "); if (r->intflags & PREGf_IMPLICIT) - PerlIO_printf(Perl_debug_log, "implicit "); - PerlIO_printf(Perl_debug_log, "minlen %"IVdf" ", (IV)r->minlen); + Perl_re_printf( aTHX_ "implicit "); + Perl_re_printf( aTHX_ "minlen %"IVdf" ", (IV)r->minlen); if (r->extflags & RXf_EVAL_SEEN) - PerlIO_printf(Perl_debug_log, "with eval "); - PerlIO_printf(Perl_debug_log, "\n"); + Perl_re_printf( aTHX_ "with eval "); + Perl_re_printf( aTHX_ "\n"); DEBUG_FLAGS_r({ regdump_extflags("r->extflags: ",r->extflags); regdump_intflags("r->intflags: ",r->intflags); @@ -18748,7 +18815,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } /* Paren and offset */ - Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); + Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o), + (int)((o + (int)ARG2L(o)) - progi->program) ); if (name_list) { SV **name= av_fetch(name_list, ARG(o), 0 ); if (name) @@ -18953,7 +19021,7 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r) ? prog->check_utf8 : prog->check_substr); if (!PL_colorset) reginitcolors(); - PerlIO_printf(Perl_debug_log, + Perl_re_printf( aTHX_ "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n", PL_colors[4], RX_UTF8(r) ? "utf8 " : "", @@ -19014,6 +19082,8 @@ Perl_pregfree2(pTHX_ REGEXP *rx) #endif Safefree(r->offs); SvREFCNT_dec(r->qr_anoncv); + if (r->recurse_locinput) + Safefree(r->recurse_locinput); rx->sv_u.svu_rx = 0; } @@ -19097,6 +19167,8 @@ Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx) #endif ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx); SvREFCNT_inc_void(ret->qr_anoncv); + if (r->recurse_locinput) + Newxz(ret->recurse_locinput,r->nparens + 1,char *); return ret_x; } @@ -19130,7 +19202,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) SV *dsv= sv_newmortal(); RE_PV_QUOTED_DECL(s, RX_UTF8(rx), dsv, RX_PRECOMP(rx), RX_PRELEN(rx), 60); - PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", + Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n", PL_colors[4],PL_colors[5],s); } }); @@ -19235,7 +19307,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) #define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) /* - re_dup - duplicate a regexp. + re_dup_guts - duplicate a regexp. This routine is expected to clone a given regexp structure. It is only compiled under USE_ITHREADS. @@ -19303,6 +19375,8 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param); ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param)); + if (r->recurse_locinput) + Newxz(ret->recurse_locinput,r->nparens + 1,char *); if (ret->pprivate) RXi_SET(ret,CALLREGDUPE_PVT(dstr,param)); @@ -19357,6 +19431,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) char, regexp_internal); Copy(ri->program, reti->program, len+1, regnode); + reti->num_code_blocks = ri->num_code_blocks; if (ri->code_blocks) { int n; @@ -19417,7 +19492,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) d->data[i] = ri->data->data[i]; break; default: - Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", + Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'", ri->data->what[i]); } } @@ -20122,11 +20197,11 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, return SvCUR(sv) > orig_sv_cur; } -#define CLEAR_OPTSTART \ +#define CLEAR_OPTSTART \ if (optstart) STMT_START { \ - DEBUG_OPTIMISE_r(PerlIO_printf(Perl_debug_log, \ + DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ \ " (%"IVdf" nodes)\n", (IV)(node - optstart))); \ - optstart=NULL; \ + optstart=NULL; \ } STMT_END #define DUMPUNTIL(b,e) \ @@ -20148,7 +20223,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, PERL_ARGS_ASSERT_DUMPUNTIL; #ifdef DEBUG_DUMPUNTIL - PerlIO_printf(Perl_debug_log, "--- %d : %d - %d - %d\n",indent,node-start, + Perl_re_printf( aTHX_ "--- %d : %d - %d - %d\n",indent,node-start, last ? last-start : 0,plast ? plast-start : 0); #endif @@ -20174,18 +20249,18 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, CLEAR_OPTSTART; regprop(r, sv, node, NULL, NULL); - PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start), + Perl_re_printf( aTHX_ "%4"IVdf":%*s%s", (IV)(node - start), (int)(2*indent + 1), "", SvPVX_const(sv)); if (OP(node) != OPTIMIZED) { if (next == NULL) /* Next ptr. */ - PerlIO_printf(Perl_debug_log, " (0)"); + Perl_re_printf( aTHX_ " (0)"); else if (PL_regkind[(U8)op] == BRANCH && PL_regkind[OP(next)] != BRANCH ) - PerlIO_printf(Perl_debug_log, " (FAIL)"); + Perl_re_printf( aTHX_ " (FAIL)"); else - PerlIO_printf(Perl_debug_log, " (%"IVdf")", (IV)(next - start)); - (void)PerlIO_putc(Perl_debug_log, '\n'); + Perl_re_printf( aTHX_ " (%"IVdf")", (IV)(next - start)); + Perl_re_printf( aTHX_ "\n"); } after_print: @@ -20223,8 +20298,8 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { SV ** const elem_ptr = av_fetch(trie_words,word_idx,0); - PerlIO_printf(Perl_debug_log, "%*s%s ", - (int)(2*(indent+3)), "", + Perl_re_indentf( aTHX_ "%s ", + indent+3, elem_ptr ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), SvCUR(*elem_ptr), 60, @@ -20239,7 +20314,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, ); if (trie->jump) { U16 dist= trie->jump[word_idx+1]; - PerlIO_printf(Perl_debug_log, "(%"UVuf")\n", + Perl_re_printf( aTHX_ "(%"UVuf")\n", (UV)((dist ? this_trie + dist : next) - start)); if (dist) { if (!nextbranch) @@ -20249,7 +20324,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode *)nextbranch); } else { - PerlIO_printf(Perl_debug_log, "\n"); + Perl_re_printf( aTHX_ "\n"); } } if (last && next > last) @@ -20289,7 +20364,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } CLEAR_OPTSTART; #ifdef DEBUG_DUMPUNTIL - PerlIO_printf(Perl_debug_log, "--- %d\n", (int)indent); + Perl_re_printf( aTHX_ "--- %d\n", (int)indent); #endif return node; }