X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ee0dfd0b018301d385524a3a56b30f5bbf76ac7d..a4368cc338f9178e7f4861a61ab56710050e50dc:/regcomp.c diff --git a/regcomp.c b/regcomp.c index 484f7f6..2cbc94d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -177,7 +177,6 @@ struct RExC_state_t { U32 study_chunk_recursed_bytes; /* bytes in bitmap */ I32 in_lookbehind; I32 contains_locale; - I32 contains_i; I32 override_recoding; #ifdef EBCDIC I32 recode_x_to_native; @@ -272,8 +271,6 @@ struct RExC_state_t { (pRExC_state->study_chunk_recursed_bytes) #define RExC_in_lookbehind (pRExC_state->in_lookbehind) #define RExC_contains_locale (pRExC_state->contains_locale) -#define RExC_contains_i (pRExC_state->contains_i) -#define RExC_override_recoding (pRExC_state->override_recoding) #ifdef EBCDIC # define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) #endif @@ -556,7 +553,6 @@ static const scan_data_t zero_scan_data = #define OOB_UNICODE 0xDEADBEEF #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) -#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) /* length of regex to show in messages that don't mark a position within */ @@ -571,7 +567,7 @@ static const scan_data_t zero_scan_data = #define MARKER2 " <-- HERE " /* marker as it appears within the regex */ #define REPORT_LOCATION " in regex; marked by " MARKER1 \ - " in m/%"UTF8f MARKER2 "%"UTF8f"/" + " in m/%" UTF8f MARKER2 "%" UTF8f "/" /* The code in this file in places uses one level of recursion with parsing * rebased to an alternate string constructed by us in memory. This can take @@ -659,11 +655,11 @@ static const scan_data_t zero_scan_data = } STMT_END #define FAIL(msg) _FAIL( \ - Perl_croak(aTHX_ "%s in regex m/%"UTF8f"%s/", \ + Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/", \ msg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) #define FAIL2(msg,arg) _FAIL( \ - Perl_croak(aTHX_ msg " in regex m/%"UTF8f"%s/", \ + Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/", \ arg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) /* @@ -1000,24 +996,25 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...) #define DEBUG_STUDYDATA(str,data,depth) \ DEBUG_OPTIMISE_MORE_r(if(data){ \ - Perl_re_indentf( aTHX_ "" str "Pos:%"IVdf"/%"IVdf \ - " Flags: 0x%"UVXf, \ + Perl_re_indentf( aTHX_ "" str "Pos:%" IVdf "/%" IVdf \ + " Flags: 0x%" UVXf, \ depth, \ (IV)((data)->pos_min), \ (IV)((data)->pos_delta), \ (UV)((data)->flags) \ ); \ DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]"); \ - Perl_re_printf( aTHX_ \ - " Whilem_c: %"IVdf" Lcp: %"IVdf" %s", \ + Perl_re_printf( aTHX_ \ + " Whilem_c: %" IVdf " Lcp: %" IVdf " %s", \ (IV)((data)->whilem_c), \ (IV)((data)->last_closep ? *((data)->last_closep) : -1), \ is_inf ? "INF " : "" \ ); \ if ((data)->last_found) \ - Perl_re_printf( aTHX_ \ - "Last:'%s' %"IVdf":%"IVdf"/%"IVdf" %sFixed:'%s' @ %"IVdf \ - " %sFloat: '%s' @ %"IVdf"/%"IVdf"", \ + Perl_re_printf( aTHX_ \ + "Last:'%s' %" IVdf ":%" IVdf "/%" IVdf \ + " %sFixed:'%s' @ %" IVdf \ + " %sFloat: '%s' @ %" IVdf "/%" IVdf, \ SvPVX_const((data)->last_found), \ (IV)((data)->last_end), \ (IV)((data)->last_start_min), \ @@ -2012,7 +2009,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, for( state = 1 ; state < trie->statecount ; state++ ) { const U32 base = trie->states[ state ].trans.base; - Perl_re_indentf( aTHX_ "#%4"UVXf"|", depth+1, (UV)state); + Perl_re_indentf( aTHX_ "#%4" UVXf "|", depth+1, (UV)state); if ( trie->states[ state ].wordnum ) { Perl_re_printf( aTHX_ " W%4X", trie->states[ state ].wordnum ); @@ -2020,7 +2017,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, Perl_re_printf( aTHX_ "%6s", "" ); } - Perl_re_printf( aTHX_ " @%4"UVXf" ", (UV)base ); + Perl_re_printf( aTHX_ " @%4" UVXf " ", (UV)base ); if ( base ) { U32 ofs = 0; @@ -2031,7 +2028,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, != state)) ofs++; - Perl_re_printf( aTHX_ "+%2"UVXf"[ ", (UV)ofs); + Perl_re_printf( aTHX_ "+%2" UVXf "[ ", (UV)ofs); for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) { if ( ( base + ofs >= trie->uniquecharcount ) @@ -2040,7 +2037,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, && trie->trans[ base + ofs - trie->uniquecharcount ].check == state ) { - Perl_re_printf( aTHX_ "%*"UVXf, colwidth, + Perl_re_printf( aTHX_ "%*" UVXf, colwidth, (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next ); } else { @@ -2089,7 +2086,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, for( state=1 ; state < next_alloc ; state ++ ) { U16 charid; - Perl_re_indentf( aTHX_ " %4"UVXf" :", + Perl_re_indentf( aTHX_ " %4" UVXf " :", depth+1, (UV)state ); if ( ! trie->states[ state ].wordnum ) { Perl_re_printf( aTHX_ "%5s| ",""); @@ -2102,7 +2099,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0); if ( tmp ) { - Perl_re_printf( aTHX_ "%*s:%3X=%4"UVXf" | ", + Perl_re_printf( aTHX_ "%*s:%3X=%4" UVXf " | ", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, @@ -2173,22 +2170,22 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) { - Perl_re_indentf( aTHX_ "%4"UVXf" : ", + Perl_re_indentf( aTHX_ "%4" UVXf " : ", depth+1, (UV)TRIE_NODENUM( state ) ); for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) { UV v=(UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next ); if (v) - Perl_re_printf( aTHX_ "%*"UVXf, colwidth, v ); + Perl_re_printf( aTHX_ "%*" UVXf, colwidth, v ); else Perl_re_printf( aTHX_ "%*s", colwidth, "." ); } if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) { - Perl_re_printf( aTHX_ " (%4"UVXf")\n", + Perl_re_printf( aTHX_ " (%4" UVXf ")\n", (UV)trie->trans[ state ].check ); } else { - Perl_re_printf( aTHX_ " (%4"UVXf") W%4X\n", + Perl_re_printf( aTHX_ " (%4" UVXf ") W%4X\n", (UV)trie->trans[ state ].check, trie->states[ TRIE_NODENUM( state ) ].wordnum ); } @@ -2702,7 +2699,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 ); if ( !svpp ) - Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc ); + Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%" UVXf, uvc ); if ( !SvTRUE( *svpp ) ) { sv_setiv( *svpp, ++trie->uniquecharcount ); @@ -2846,7 +2843,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } state = newstate; } else { - Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc ); + Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc ); } } } @@ -3043,7 +3040,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } state = trie->trans[ state + charid ].next; } else { - Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc ); + Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc ); } /* charid is now 0 if we dont know the char read, or * nonzero if we do */ @@ -3176,7 +3173,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, PerlMemShared_realloc( trie->states, laststate * sizeof(reg_trie_state) ); DEBUG_TRIE_COMPILE_MORE_r( - Perl_re_indentf( aTHX_ "Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n", + Perl_re_indentf( aTHX_ "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n", depth+1, (int)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1 ), @@ -3188,7 +3185,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } /* end table compress */ } DEBUG_TRIE_COMPILE_MORE_r( - Perl_re_indentf( aTHX_ "Statecount:%"UVxf" Lasttrans:%"UVxf"\n", + Perl_re_indentf( aTHX_ "Statecount:%" UVxf " Lasttrans:%" UVxf "\n", depth+1, (UV)trie->statecount, (UV)trie->lasttrans) @@ -3239,7 +3236,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, }); } DEBUG_OPTIMISE_r( - Perl_re_indentf( aTHX_ "MJD offset:%"UVuf" MJD length:%"UVuf"\n", + Perl_re_indentf( aTHX_ "MJD offset:%" UVuf " MJD length:%" UVuf "\n", depth+1, (UV)mjd_offset, (UV)mjd_nodelen) ); @@ -3248,6 +3245,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, split out as an EXACT and put in front of the TRIE node. */ trie->startstate= 1; if ( trie->bitmap && !widecharmap && !trie->jump ) { + /* we want to find the first state that has more than + * one transition, if that state is not the first state + * then we have a common prefix which we can remove. + */ U32 state; for ( state = 1 ; state < trie->statecount-1 ; state++ ) { U32 ofs = 0; @@ -3256,6 +3257,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, U32 count = 0; const U32 base = trie->states[ state ].trans.base; + /* does this state terminate an alternation? */ if ( trie->states[state].wordnum ) count = 1; @@ -3265,13 +3267,23 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, trie->trans[ base + ofs - trie->uniquecharcount ].check == state ) { if ( ++count > 1 ) { - SV **tmp = av_fetch( revcharmap, ofs, 0); - const U8 *ch = (U8*)SvPV_nolen_const( *tmp ); + /* we have more than one transition */ + SV **tmp; + U8 *ch; + /* if this is the first state there is no common prefix + * to extract, so we can exit */ if ( state == 1 ) break; + tmp = av_fetch( revcharmap, ofs, 0); + ch = (U8*)SvPV_nolen_const( *tmp ); + + /* if we are on count 2 then we need to initialize the + * bitmap, and store the previous char if there was one + * in it*/ if ( count == 2 ) { + /* clear the bitmap */ Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char); DEBUG_OPTIMISE_r( - Perl_re_indentf( aTHX_ "New Start State=%"UVuf" Class: [", + Perl_re_indentf( aTHX_ "New Start State=%" UVuf " Class: [", depth+1, (UV)state)); if (first_ofs >= 0) { @@ -3292,12 +3304,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } } if ( count == 1 ) { + /* This state has only one transition, its transition is part + * of a common prefix - we need to concatenate the char it + * represents to what we have so far. */ SV **tmp = av_fetch( revcharmap, first_ofs, 0); STRLEN len; char *ch = SvPV( *tmp, len ); DEBUG_OPTIMISE_r({ SV *sv=sv_newmortal(); - Perl_re_indentf( aTHX_ "Prefix State: %"UVuf" Ofs:%"UVuf" Char='%s'\n", + Perl_re_indentf( aTHX_ "Prefix State: %" UVuf " Ofs:%" UVuf " Char='%s'\n", depth+1, (UV)state, (UV)first_ofs, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6, @@ -3590,11 +3605,11 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour */ fail[ 0 ] = fail[ 1 ] = 0; DEBUG_TRIE_COMPILE_r({ - Perl_re_indentf( aTHX_ "Stclass Failtable (%"UVuf" states): 0", + Perl_re_indentf( aTHX_ "Stclass Failtable (%" UVuf " states): 0", depth, (UV)numstates ); for( q_read=1; q_readpos_min += minnext * (mincount - counted); #if 0 -Perl_re_printf( aTHX_ "counted=%"UVuf" deltanext=%"UVuf - " SSize_t_MAX=%"UVuf" minnext=%"UVuf - " maxcount=%"UVuf" mincount=%"UVuf"\n", +Perl_re_printf( aTHX_ "counted=%" UVuf " deltanext=%" UVuf + " SSize_t_MAX=%" UVuf " minnext=%" UVuf + " maxcount=%" UVuf " mincount=%" UVuf "\n", (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount, (UV)mincount); if (deltanext != SSize_t_MAX) -Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", +Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", (UV)(-counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta)); #endif @@ -5577,7 +5592,7 @@ Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", FAIL("Variable length lookbehind not implemented"); } else if (minnext > (I32)U8_MAX) { - FAIL2("Lookbehind longer than %"UVuf" not implemented", + FAIL2("Lookbehind longer than %" UVuf " not implemented", (UV)U8_MAX); } scan->flags = (U8)minnext; @@ -5666,7 +5681,7 @@ Perl_re_printf( aTHX_ "LHS=%"UVuf" RHS=%"UVuf"\n", FAIL("Variable length lookbehind not implemented"); } else if (*minnextp > (I32)U8_MAX) { - FAIL2("Lookbehind longer than %"UVuf" not implemented", + FAIL2("Lookbehind longer than %" UVuf " not implemented", (UV)U8_MAX); } scan->flags = (U8)*minnextp; @@ -6080,7 +6095,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags) /* Dispatch a request to compile a regexp to correct regexp engine. */ DEBUG_COMPILE_r({ - Perl_re_printf( aTHX_ "Using engine %"UVxf"\n", + Perl_re_printf( aTHX_ "Using engine %" UVxf "\n", PTR2UV(eng)); }); return CALLREGCOMP_ENG(eng, pattern, flags); @@ -6501,8 +6516,12 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, *p++ = pat[s]; } *p++ = '\''; - if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) + if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) { *p++ = 'x'; + if (pRExC_state->pm_flags & RXf_PMf_EXTENDED_MORE) { + *p++ = 'x'; + } + } *p++ = '\0'; DEBUG_COMPILE_r({ Perl_re_printf( aTHX_ @@ -6531,7 +6550,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, { Safefree(pRExC_state->code_blocks); /* use croak_sv ? */ - Perl_croak_nocontext("%"SVf, SVfARG(errsv)); + Perl_croak_nocontext("%" SVf, SVfARG(errsv)); } } assert(SvROK(qr_ref)); @@ -6659,7 +6678,11 @@ S_setup_longest(pTHX_ RExC_state_t *pRExC_state, SV* sv_longest, calculate it.*/ ml = minlen ? *(minlen) : (SSize_t)longest_length; *rx_end_shift = ml - offset - - longest_length + (SvTAIL(sv_longest) != 0) + - longest_length + /* XXX SvTAIL is always false here - did you mean FBMcf_TAIL + * intead? - DAPM + + (SvTAIL(sv_longest) != 0) + */ + lookbehind; t = (eol/* Can't have SEOL and MULTI */ @@ -6899,7 +6922,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_uni_semantics = 0; RExC_seen_unfolded_sharp_s = 0; RExC_contains_locale = 0; - RExC_contains_i = 0; RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT); RExC_study_started = 0; pRExC_state->runtime_code_qr = NULL; @@ -6951,9 +6973,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, rx_flags = orig_rx_flags; - if (rx_flags & PMf_FOLD) { - RExC_contains_i = 1; - } if ( initial_charset == REGEX_DEPENDS_CHARSET && (RExC_utf8 ||RExC_uni_semantics)) { @@ -6990,7 +7009,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_in_lookbehind = 0; RExC_seen_zerolen = *exp == '^' ? -1 : 0; RExC_extralen = 0; - RExC_override_recoding = 0; #ifdef EBCDIC RExC_recode_x_to_native = 0; #endif @@ -7066,14 +7084,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, goto redo_first_pass; } - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#" UVxf, (UV) flags); } if (code_blocksv) SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ DEBUG_PARSE_r({ Perl_re_printf( aTHX_ - "Required size %"IVdf" nodes\n" + "Required size %" IVdf " nodes\n" "Starting second pass (creation)\n", (IV)RExC_size); RExC_lastnum=0; @@ -7148,7 +7166,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, == REG_RUN_ON_COMMENT_SEEN); U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD) >> RXf_PMf_STD_PMMOD_SHIFT); - const char *fptr = STD_PAT_MODS; /*"msixn"*/ + const char *fptr = STD_PAT_MODS; /*"msixxn"*/ char *p; /* We output all the necessary flags; we never output a minus, as all @@ -7211,7 +7229,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, #ifdef RE_TRACK_PATTERN_OFFSETS Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ DEBUG_OFFSETS_r(Perl_re_printf( aTHX_ - "%s %"UVuf" bytes for offset annotations.\n", + "%s %" UVuf " bytes for offset annotations.\n", ri->u.offsets ? "Got" : "Couldn't get", (UV)((2*RExC_size+1) * sizeof(U32)))); #endif @@ -7265,7 +7283,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_npar = 1; if (reg(pRExC_state, 0, &flags,1) == NULL) { ReREFCNT_dec(rx); - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#" UVxf, (UV) flags); } DEBUG_OPTIMISE_r( Perl_re_printf( aTHX_ "Starting post parse optimization\n"); @@ -7431,12 +7449,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, #ifdef TRIE_STUDY_OPT DEBUG_PARSE_r( if (!restudied) - Perl_re_printf( aTHX_ "first at %"IVdf"\n", + Perl_re_printf( aTHX_ "first at %" IVdf "\n", (IV)(first - scan + 1)) ); #else DEBUG_PARSE_r( - Perl_re_printf( aTHX_ "first at %"IVdf"\n", + Perl_re_printf( aTHX_ "first at %" IVdf "\n", (IV)(first - scan + 1)) ); #endif @@ -7663,7 +7681,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* Guard against an embedded (?=) or (?<=) with a longer minlen than the "real" pattern. */ DEBUG_OPTIMISE_r({ - Perl_re_printf( aTHX_ "minlen: %"IVdf" r->minlen:%"IVdf" maxlen:%"IVdf"\n", + Perl_re_printf( aTHX_ "minlen: %" IVdf " r->minlen:%" IVdf " maxlen:%" IVdf "\n", (IV)minlen, (IV)r->minlen, (IV)RExC_maxlen); }); r->minlenret = minlen; @@ -7771,10 +7789,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, STRLEN i; GET_RE_DEBUG_FLAGS_DECL; Perl_re_printf( aTHX_ - "Offsets: [%"UVuf"]\n\t", (UV)ri->u.offsets[0]); + "Offsets: [%" UVuf "]\n\t", (UV)ri->u.offsets[0]); for (i = 1; i <= len; i++) { if (ri->u.offsets[i*2-1] || ri->u.offsets[i*2]) - Perl_re_printf( aTHX_ "%"UVuf":%"UVuf"[%"UVuf"] ", + Perl_re_printf( aTHX_ "%" UVuf ":%" UVuf "[%" UVuf "] ", (UV)i, (UV)ri->u.offsets[i*2-1], (UV)ri->u.offsets[i*2]); } Perl_re_printf( aTHX_ "\n"); @@ -8116,7 +8134,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, } } else { ret_undef: - sv_setsv(sv,&PL_sv_undef); + sv_set_undef(sv); return; } } @@ -8248,17 +8266,18 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) assert (RExC_parse <= RExC_end); if (RExC_parse == RExC_end) NOOP; - else if (isIDFIRST_lazy_if(RExC_parse, UTF)) { + else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) { /* Note that the code here assumes well-formed UTF-8. Skip IDFIRST by * using do...while */ if (UTF) do { RExC_parse += UTF8SKIP(RExC_parse); - } while (isWORDCHAR_utf8((U8*)RExC_parse)); + } while ( RExC_parse < RExC_end + && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end)); else do { RExC_parse++; - } while (isWORDCHAR(*RExC_parse)); + } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse)); } else { RExC_parse++; /* so the <- from the vFAIL is after the offending character */ @@ -8709,7 +8728,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, if ( array[final_element] > start || ELEMENT_RANGE_MATCHES_INVLIST(final_element)) { - Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c", + Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c", array[final_element], start, ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f'); } @@ -9938,18 +9957,18 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style) invlist_iterinit(invlist); while (invlist_iternext(invlist, &start, &end)) { if (end == UV_MAX) { - Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c", + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFINITY%c", start, intra_range_delimiter, inter_range_delimiter); } else if (end != start) { - Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c", + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c", start, intra_range_delimiter, end, inter_range_delimiter); } else { - Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c", + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c", start, inter_range_delimiter); } } @@ -9996,16 +10015,16 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, while (invlist_iternext(invlist, &start, &end)) { if (end == UV_MAX) { Perl_dump_indent(aTHX_ level, file, - "%s[%"UVuf"] 0x%04"UVXf" .. INFINITY\n", + "%s[%" UVuf "] 0x%04" UVXf " .. INFINITY\n", indent, (UV)count, start); } else if (end != start) { Perl_dump_indent(aTHX_ level, file, - "%s[%"UVuf"] 0x%04"UVXf" .. 0x%04"UVXf"\n", + "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n", indent, (UV)count, start, end); } else { - Perl_dump_indent(aTHX_ level, file, "%s[%"UVuf"] 0x%04"UVXf"\n", + Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n", indent, (UV)count, start); } count += 2; @@ -10021,9 +10040,10 @@ Perl__load_PL_utf8_foldclosures (pTHX) * to force that */ if (! PL_utf8_tofold) { U8 dummy[UTF8_MAXBYTES_CASE+1]; + const U8 hyphen[] = HYPHEN_UTF8; /* This string is just a short named one above \xff */ - to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL); + toFOLD_utf8_safe(hyphen, hyphen + sizeof(hyphen) - 1, dummy, NULL); assert(PL_utf8_tofold); /* Verify that worked */ } PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold); @@ -10043,9 +10063,6 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) UV len_a = _invlist_len(a); UV len_b = _invlist_len(b); - UV i = 0; /* current index into the arrays */ - bool retval = TRUE; /* Assume are identical until proven otherwise */ - PERL_ARGS_ASSERT__INVLISTEQ; /* If are to compare 'a' with the complement of b, set it @@ -10075,20 +10092,9 @@ Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) } } - /* Make sure that the lengths are the same, as well as the final element - * before looping through the remainder. (Thus we test the length, final, - * and first elements right off the bat) */ - if (len_a != len_b || array_a[len_a-1] != array_b[len_a-1]) { - retval = FALSE; - } - else for (i = 0; i < len_a - 1; i++) { - if (array_a[i] != array_b[i]) { - retval = FALSE; - break; - } - } + return len_a == len_b + && memEQ(array_a, array_b, len_a * sizeof(array_a[0])); - return retval; } #endif @@ -10174,7 +10180,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node) } else { STRLEN len; - to_utf8_fold(s, d, &len); + toFOLD_utf8_safe(s, e, d, &len); d += len; s += UTF8SKIP(s); } @@ -10424,26 +10430,28 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state) } flagsp = &negflags; wastedflags = 0; /* reset so (?g-c) warns twice */ + x_mod_count = 0; break; case ':': case ')': + + if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) { + negflags |= RXf_PMf_EXTENDED_MORE; + } RExC_flags |= posflags; + + if (negflags & RXf_PMf_EXTENDED) { + negflags |= RXf_PMf_EXTENDED_MORE; + } RExC_flags &= ~negflags; set_regex_charset(&RExC_flags, cs); - if (RExC_flags & RXf_PMf_FOLD) { - RExC_contains_i = 1; - } - if (UNLIKELY((x_mod_count) > 1)) { - vFAIL("Only one /x regex modifier is allowed"); - } return; - /*NOTREACHED*/ default: fail_modifiers: RExC_parse += SKIP_IF_CHAR(RExC_parse); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ - vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized", + vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized", UTF8fARG(UTF, RExC_parse-seqstart, seqstart)); NOT_REACHED; /*NOTREACHED*/ } @@ -10655,7 +10663,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if ( ! op ) { RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; vFAIL2utf8f( - "Unknown verb pattern '%"UTF8f"'", + "Unknown verb pattern '%" UTF8f "'", UTF8fARG(UTF, verb_len, start_verb)); } if ( arg_required && !start_arg ) { @@ -10952,7 +10960,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } RExC_recurse_count++; DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ - "%*s%*s Recurse #%"UVuf" to %"IVdf"\n", + "%*s%*s Recurse #%" UVuf " to %" IVdf "\n", 22, "| |", (int)(depth * 2 + 1), "", (UV)ARG(ret), (IV)ARG2L(ret))); } @@ -10974,7 +10982,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse += SKIP_IF_CHAR(RExC_parse); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f( - "Sequence (%"UTF8f"...) not recognized", + "Sequence (%" UTF8f "...) not recognized", UTF8fARG(UTF, RExC_parse-seqstart, seqstart)); NOT_REACHED; /*NOTREACHED*/ } @@ -11176,7 +11184,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", + FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags); } else REGTAIL(pRExC_state, br, reganode(pRExC_state, @@ -11197,7 +11205,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", + FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags); } REGTAIL(pRExC_state, ret, lastbr); @@ -11265,7 +11273,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (RExC_open_parens && !RExC_open_parens[parno]) { DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ - "%*s%*s Setting open paren #%"IVdf" to %d\n", + "%*s%*s Setting open paren #%" IVdf " to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ret))); RExC_open_parens[parno]= ret; @@ -11295,7 +11303,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags); } if (*RExC_parse == '|') { if (!SIZE_ONLY && RExC_extralen) { @@ -11342,7 +11350,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#" UVxf, (UV) flags); } REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */ lastbr = br; @@ -11359,7 +11367,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) ender = reganode(pRExC_state, CLOSE, parno); if ( RExC_close_parens ) { DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ - "%*s%*s Setting close paren #%"IVdf" to %d\n", + "%*s%*s Setting close paren #%" IVdf " to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender))); RExC_close_parens[parno]= ender; if (RExC_nestroot == parno) @@ -11396,7 +11404,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) DEBUG_PARSE_MSG("lsbr"); regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state); regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state); - Perl_re_printf( aTHX_ "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", + Perl_re_printf( aTHX_ "~ tying lastbr %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n", SvPV_nolen_const(RExC_mysv1), (IV)REG_NODE_NUM(lastbr), SvPV_nolen_const(RExC_mysv2), @@ -11435,7 +11443,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) DEBUG_PARSE_MSG("NADA"); regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state); regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state); - Perl_re_printf( aTHX_ "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", + Perl_re_printf( aTHX_ "~ converting ret %s (%" IVdf ") to ender %s (%" IVdf ") offset %" IVdf "\n", SvPV_nolen_const(RExC_mysv1), (IV)REG_NODE_NUM(ret), SvPV_nolen_const(RExC_mysv2), @@ -11556,7 +11564,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regpiece returned NULL, flags=%#" UVxf, (UV) flags); } else if (ret == NULL) ret = latest; @@ -11585,7 +11593,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) } /* - - regpiece - something followed by possible [*+?] + - regpiece - something followed by possible quantifier * + ? {n,m} * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as @@ -11628,7 +11636,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8)) *flagp |= flags & (TRYAGAIN|RESTART_PASS1|NEED_UTF8); else - FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regatom returned NULL, flags=%#" UVxf, (UV) flags); return(NULL); } @@ -11809,7 +11817,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) { SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */ ckWARN2reg(RExC_parse, - "%"UTF8f" matches null string many times", + "%" UTF8f " matches null string many times", UTF8fARG(UTF, (RExC_parse >= origparse ? RExC_parse - origparse : 0), @@ -11983,13 +11991,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_parse++; /* Skip past the '{' */ - if (! (endbrace = strchr(RExC_parse, '}')) /* no trailing brace */ - || ! (endbrace == RExC_parse /* nothing between the {} */ + if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */ + vFAIL2("Missing right brace on \\%c{}", 'N'); + } + else if(!(endbrace == RExC_parse /* nothing between the {} */ || (endbrace - RExC_parse >= 2 /* U+ (bad hex is checked... */ && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better error msg) */ { - if (endbrace) RExC_parse = endbrace; /* position msg's '<--HERE' */ + RExC_parse = endbrace; /* position msg's '<--HERE' */ vFAIL("\\N{NAME} must be resolved by the lexer"); } @@ -12135,7 +12145,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, /* The values are Unicode, and therefore not subject to recoding, but * have to be converted to native on a non-Unicode (meaning non-ASCII) * platform. */ - RExC_override_recoding = 1; #ifdef EBCDIC RExC_recode_x_to_native = 1; #endif @@ -12146,7 +12155,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, *flagp = flags & (RESTART_PASS1|NEED_UTF8); return FALSE; } - FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"", + FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#" UVxf, (UV) flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); @@ -12156,7 +12165,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_start = RExC_adjusted_start = save_start; RExC_parse = endbrace; RExC_end = orig_end; - RExC_override_recoding = 0; #ifdef EBCDIC RExC_recode_x_to_native = 0; #endif @@ -12512,7 +12520,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (ret == NULL) { if (*flagp & (RESTART_PASS1|NEED_UTF8)) return NULL; - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", + FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf, (UV) *flagp); } if (*RExC_parse != ']') { @@ -12539,7 +12547,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) *flagp = flags & (RESTART_PASS1|NEED_UTF8); return NULL; } - FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"", + FAIL2("panic: reg returned NULL to regatom, flags=%#" UVxf, (UV) flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); @@ -12702,7 +12710,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) bad_bound_type: RExC_parse = endbrace; vFAIL2utf8f( - "'%"UTF8f"' is an unknown bound type", + "'%" UTF8f "' is an unknown bound type", UTF8fARG(UTF, length, endbrace - length)); NOT_REACHED; /*NOTREACHED*/ } @@ -12820,7 +12828,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if * multi-char folds are allowed. */ if (!ret) - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", + FAIL2("panic: regclass returned NULL to regatom, flags=%#" UVxf, (UV) *flagp); RExC_parse--; @@ -13355,6 +13363,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) RExC_parse = p + 1; vFAIL("Unescaped left brace in regex is illegal here"); } + goto normal_default; + case '}': + case ']': + if (PASS2 && p > RExC_parse && RExC_strict) { + ckWARN2reg(p + 1, "Unescaped literal '%c'", *p); + } /*FALLTHROUGH*/ default: /* A literal character */ normal_default: @@ -14652,7 +14666,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, ? "^" : ""; RExC_parse = (char *) p; - vFAIL3utf8f("POSIX class [:%s%"UTF8f":] unknown", + vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown", complement_string, UTF8fARG(UTF, RExC_parse - name_start - 2, name_start)); } @@ -14795,7 +14809,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, &posix_warnings )) FAIL2("panic: regclass returned NULL to handle_sets, " - "flags=%#"UVxf"", (UV) *flagp); + "flags=%#" UVxf, (UV) *flagp); /* function call leaves parse pointing to the ']', except * if we faked it */ @@ -15064,7 +15078,7 @@ redo_curchar: NULL)) { FAIL2("panic: regclass returned NULL to handle_sets, " - "flags=%#"UVxf"", (UV) *flagp); + "flags=%#" UVxf, (UV) *flagp); } /* regclass() will return with parsing just the \ sequence, @@ -15103,7 +15117,7 @@ redo_curchar: )) { FAIL2("panic: regclass returned NULL to handle_sets, " - "flags=%#"UVxf"", (UV) *flagp); + "flags=%#" UVxf, (UV) *flagp); } /* function call leaves parse pointing to the ']', except if we @@ -15399,10 +15413,10 @@ redo_curchar: result_string = newSVpvs(""); while (invlist_iternext(final, &start, &end)) { if (start == end) { - Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}", start); + Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}", start); } else { - Perl_sv_catpvf(aTHX_ result_string, "\\x{%"UVXf"}-\\x{%"UVXf"}", + Perl_sv_catpvf(aTHX_ result_string, "\\x{%" UVXf "}-\\x{%" UVXf "}", start, end); } } @@ -15432,7 +15446,7 @@ redo_curchar: NULL ); if (!node) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf, + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#" UVxf, PTR2UV(flagp)); /* Fix up the node type if we are in locale. (We have pretended we are @@ -15789,8 +15803,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, character; used under /i */ UV n; char * stop_ptr = RExC_end; /* where to stop parsing */ - const bool skip_white = cBOOL(ret_invlist); /* ignore unescaped white - space? */ + + /* ignore unescaped whitespace? */ + const bool skip_white = cBOOL( ret_invlist + || (RExC_flags & RXf_PMf_EXTENDED_MORE)); /* Unicode properties are stored in a swash; this holds the current one * being parsed. If this swash is the only above-latin1 component of the @@ -16261,7 +16277,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, RExC_parse = e + 1; /* diag_listed_as: Can't find Unicode property definition "%s" */ - vFAIL3utf8f("%s \"%"UTF8f"\"", + vFAIL3utf8f("%s \"%" UTF8f "\"", msg, UTF8fARG(UTF, n, name)); } @@ -16280,7 +16296,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, SAVEFREEPV(name); } } - Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n", + Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%" UTF8f "%s\n", (value == 'p' ? '+' : '!'), (FOLD) ? "__" : "", UTF8fARG(UTF, n, name), @@ -16450,13 +16466,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, : 0; if (strict) { vFAIL2utf8f( - "False [] range \"%"UTF8f"\"", + "False [] range \"%" UTF8f "\"", UTF8fARG(UTF, w, rangebegin)); } else { SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */ ckWARN2reg(RExC_parse, - "False [] range \"%"UTF8f"\"", + "False [] range \"%" UTF8f "\"", UTF8fARG(UTF, w, rangebegin)); (void)ReREFCNT_inc(RExC_rx_sv); cp_list = add_cp_to_invlist(cp_list, '-'); @@ -16644,7 +16660,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, #endif w = RExC_parse - rangebegin; vFAIL2utf8f( - "Invalid [] range \"%"UTF8f"\"", + "Invalid [] range \"%" UTF8f "\"", UTF8fARG(UTF, w, rangebegin)); NOT_REACHED; /* NOTREACHED */ } @@ -16749,7 +16765,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, foldbuf + foldlen); SV* multi_fold = sv_2mortal(newSVpvs("")); - Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value); + Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%" UVXf "}", value); multi_char_matches = add_multi_match(multi_char_matches, @@ -17005,7 +17021,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, RExC_adjusted_start = RExC_start + prefix_end; RExC_end = RExC_parse + len; RExC_in_multi_char_class = 1; - RExC_override_recoding = 1; RExC_emit = (regnode *)orig_emit; ret = reg(pRExC_state, 1, ®_flags, depth+1); @@ -17018,7 +17033,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, RExC_precomp_adj = 0; RExC_end = save_end; RExC_in_multi_char_class = 0; - RExC_override_recoding = 0; SvREFCNT_dec_NN(multi_char_matches); return ret; } @@ -18353,7 +18367,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_ #else if (RExC_offsets) { /* MJD */ MJD_OFFSET_DEBUG( - ("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n", + ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n", name, __LINE__, PL_reg_name[op], (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] @@ -18458,7 +18472,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) dst = RExC_emit; if (RExC_open_parens) { int paren; - /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/ + /*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/ /* remember that RExC_npar is rex->nparens + 1, * iow it is 1 more than the number of parens seen in * the pattern so far. */ @@ -18488,7 +18502,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD 20010112 */ MJD_OFFSET_DEBUG( - ("%s(%d): (op %s) %s copy %"UVuf" -> %"UVuf" (max %"UVuf").\n", + ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n", "reg_insert", __LINE__, PL_reg_name[op], @@ -18508,7 +18522,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD */ MJD_OFFSET_DEBUG( - ("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n", + ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n", "reginsert", __LINE__, PL_reg_name[op], @@ -18657,7 +18671,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_MSG(""); regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state); Perl_re_printf( aTHX_ - "~ attach to %s (%"IVdf") offset to %"IVdf"\n", + "~ attach to %s (%" IVdf ") offset to %" IVdf "\n", SvPV_nolen_const(RExC_mysv), (IV)REG_NODE_NUM(val), (IV)(val - scan) @@ -18770,14 +18784,14 @@ Perl_regdump(pTHX_ const regexp *r) RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr), RE_SV_DUMPLEN(r->anchored_substr), 30); Perl_re_printf( aTHX_ - "anchored %s%s at %"IVdf" ", + "anchored %s%s at %" IVdf " ", s, RE_SV_TAIL(r->anchored_substr), (IV)r->anchored_offset); } else if (r->anchored_utf8) { RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8), RE_SV_DUMPLEN(r->anchored_utf8), 30); Perl_re_printf( aTHX_ - "anchored utf8 %s%s at %"IVdf" ", + "anchored utf8 %s%s at %" IVdf " ", s, RE_SV_TAIL(r->anchored_utf8), (IV)r->anchored_offset); } @@ -18785,14 +18799,14 @@ Perl_regdump(pTHX_ const regexp *r) RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr), RE_SV_DUMPLEN(r->float_substr), 30); Perl_re_printf( aTHX_ - "floating %s%s at %"IVdf"..%"UVuf" ", + "floating %s%s at %" IVdf "..%" UVuf " ", s, RE_SV_TAIL(r->float_substr), (IV)r->float_min_offset, (UV)r->float_max_offset); } else if (r->float_utf8) { RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8), RE_SV_DUMPLEN(r->float_utf8), 30); Perl_re_printf( aTHX_ - "floating utf8 %s%s at %"IVdf"..%"UVuf" ", + "floating utf8 %s%s at %" IVdf "..%" UVuf " ", s, RE_SV_TAIL(r->float_utf8), (IV)r->float_min_offset, (UV)r->float_max_offset); } @@ -18824,12 +18838,12 @@ Perl_regdump(pTHX_ const regexp *r) Perl_re_printf( aTHX_ " "); } if (r->intflags & PREGf_GPOS_SEEN) - Perl_re_printf( aTHX_ "GPOS:%"UVuf" ", (UV)r->gofs); + Perl_re_printf( aTHX_ "GPOS:%" UVuf " ", (UV)r->gofs); if (r->intflags & PREGf_SKIP) Perl_re_printf( aTHX_ "plus "); if (r->intflags & PREGf_IMPLICIT) Perl_re_printf( aTHX_ "implicit "); - Perl_re_printf( aTHX_ "minlen %"IVdf" ", (IV)r->minlen); + Perl_re_printf( aTHX_ "minlen %" IVdf " ", (IV)r->minlen); if (r->extflags & RXf_EVAL_SEEN) Perl_re_printf( aTHX_ "with eval "); Perl_re_printf( aTHX_ "\n"); @@ -18945,7 +18959,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (trie->jump) sv_catpvs(sv, "(JUMP)"); Perl_sv_catpvf(aTHX_ sv, - "", + "", (UV)trie->startstate, (IV)trie->statecount-1, /* -1 because of the unused 0 element */ (UV)trie->wordcount, @@ -18986,7 +19000,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ { AV *name_list= NULL; U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o); - Perl_sv_catpvf(aTHX_ sv, "%"UVuf, (UV)parno); /* Parenth number */ + Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno); /* Parenth number */ if ( RXp_PAREN_NAMES(prog) ) { name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]); } else if ( pRExC_state ) { @@ -18996,7 +19010,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if ( k != REF || (OP(o) < NREF)) { SV **name= av_fetch(name_list, parno, 0 ); if (name) - Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name)); + Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name)); } else { SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]); @@ -19005,10 +19019,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ I32 n; if (name) { for ( n=0; nflags) { - Perl_sv_catpvf(aTHX_ sv, ":%"SVf, + Perl_sv_catpvf(aTHX_ sv, ":%" SVf, SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ])))); } #else @@ -19814,7 +19828,7 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...) l1 = 512; Copy(message, buf, l1 , char); /* l1-1 to avoid \n */ - Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf)); + Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, l1-1, buf)); } /* XXX Here's a total kludge. But we need to re-enter for swash routines. */ @@ -19866,7 +19880,7 @@ S_put_code_point(pTHX_ SV *sv, UV c) PERL_ARGS_ASSERT_PUT_CODE_POINT; if (c > 255) { - Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c); + Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c); } else if (isPRINT(c)) { const char string = (char) c; @@ -20050,10 +20064,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals) : NUM_ANYOF_CODE_POINTS - 1; #if NUM_ANYOF_CODE_POINTS > 256 format = (this_end < 256) - ? "\\x%02"UVXf"-\\x%02"UVXf"" - : "\\x{%04"UVXf"}-\\x{%04"UVXf"}"; + ? "\\x%02" UVXf "-\\x%02" UVXf + : "\\x{%04" UVXf "}-\\x{%04" UVXf "}"; #else - format = "\\x%02"UVXf"-\\x%02"UVXf""; + format = "\\x%02" UVXf "-\\x%02" UVXf; #endif GCC_DIAG_IGNORE(-Wformat-nonliteral); Perl_sv_catpvf(aTHX_ sv, format, start, this_end); @@ -20455,7 +20469,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, #define CLEAR_OPTSTART \ if (optstart) STMT_START { \ DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ \ - " (%"IVdf" nodes)\n", (IV)(node - optstart))); \ + " (%" IVdf " nodes)\n", (IV)(node - optstart))); \ optstart=NULL; \ } STMT_END @@ -20504,7 +20518,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, CLEAR_OPTSTART; regprop(r, sv, node, NULL, NULL); - Perl_re_printf( aTHX_ "%4"IVdf":%*s%s", (IV)(node - start), + Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start), (int)(2*indent + 1), "", SvPVX_const(sv)); if (OP(node) != OPTIMIZED) { @@ -20514,7 +20528,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, && PL_regkind[OP(next)] != BRANCH ) Perl_re_printf( aTHX_ " (FAIL)"); else - Perl_re_printf( aTHX_ " (%"IVdf")", (IV)(next - start)); + Perl_re_printf( aTHX_ " (%" IVdf ")", (IV)(next - start)); Perl_re_printf( aTHX_ "\n"); } @@ -20569,7 +20583,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, ); if (trie->jump) { U16 dist= trie->jump[word_idx+1]; - Perl_re_printf( aTHX_ "(%"UVuf")\n", + Perl_re_printf( aTHX_ "(%" UVuf ")\n", (UV)((dist ? this_trie + dist : next) - start)); if (dist) { if (!nextbranch)