X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b94eb3d40b0d2ecf5510a08ef19e565105f6f7fe..f6555ff3f309865a88085cd93354cd93bfb96fd3:/regcomp.c diff --git a/regcomp.c b/regcomp.c index ab0085f..e7c6662 100644 --- a/regcomp.c +++ b/regcomp.c @@ -93,6 +93,8 @@ EXTERN_C const struct regexp_engine my_reg_engine; #define HAS_NONLATIN1_FOLD_CLOSURE(i) \ _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) +#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \ + _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) #define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) #define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) @@ -1718,7 +1720,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, May be the same as tail. tail : item following the branch sequence count : words in the sequence - flags : currently the OP() type we will be building one of /EXACT(|F|Fl)/ + flags : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS)/ depth : indent depth Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node. @@ -2982,8 +2984,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, : MADE_TRIE; } -STATIC void -S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode *stclass, U32 depth) +STATIC regnode * +S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth) { /* The Trie is constructed and compressed now so we can build a fail array if * it's needed @@ -3021,13 +3023,26 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode U32 *fail; reg_ac_data *aho; const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T")); + regnode *stclass; GET_RE_DEBUG_FLAGS_DECL; - PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE; + PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE; #ifndef DEBUGGING PERL_UNUSED_ARG(depth); #endif + if ( OP(source) == TRIE ) { + struct regnode_1 *op = (struct regnode_1 *) + PerlMemShared_calloc(1, sizeof(struct regnode_1)); + StructCopy(source,op,struct regnode_1); + stclass = (regnode *)op; + } else { + struct regnode_charclass *op = (struct regnode_charclass *) + PerlMemShared_calloc(1, sizeof(struct regnode_charclass)); + StructCopy(source,op,struct regnode_charclass); + stclass = (regnode *)op; + } + OP(stclass)+=2; /* covert the TRIE type to its AHO-CORASICK equivalent */ ARG_SET( stclass, data_slot ); aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) ); @@ -3094,6 +3109,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode }); Safefree(q); /*RExC_seen |= REG_TRIEDFA_SEEN;*/ + return stclass; } @@ -4322,7 +4338,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* Some characters match above-Latin1 ones under /i. This * is true of EXACTFL ones when the locale is UTF-8 */ - if (HAS_NONLATIN1_FOLD_CLOSURE(uc) + if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc) && (! isASCII(uc) || (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE))) { @@ -4763,13 +4779,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* It is counted once already... */ data->pos_min += minnext * (mincount - counted); #if 0 -PerlIO_printf(Perl_debug_log, "counted=%"UVdf" deltanext=%"UVdf - " SSize_t_MAX=%"UVdf" minnext=%"UVdf - " maxcount=%"UVdf" mincount=%"UVdf"\n", +PerlIO_printf(Perl_debug_log, "counted=%"UVuf" deltanext=%"UVuf + " SSize_t_MAX=%"UVuf" minnext=%"UVuf + " maxcount=%"UVuf" mincount=%"UVuf"\n", (UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount, (UV)mincount); if (deltanext != SSize_t_MAX) -PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n", +PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n", (UV)(-counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta)); #endif @@ -5704,7 +5720,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, /* if we know we have at least two args, create an empty string, * then concatenate args to that. For no args, return an empty string */ if (!pat && pat_count != 1) { - pat = newSVpvn("", 0); + pat = newSVpvs(""); SAVEFREESV(pat); alloced = TRUE; } @@ -6822,22 +6838,8 @@ reStudy: else if (PL_regkind[OP(first)] == TRIE && ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0) { - regnode *trie_op; - /* this can happen only on restudy */ - if ( OP(first) == TRIE ) { - struct regnode_1 *trieop = (struct regnode_1 *) - PerlMemShared_calloc(1, sizeof(struct regnode_1)); - StructCopy(first,trieop,struct regnode_1); - trie_op=(regnode *)trieop; - } else { - struct regnode_charclass *trieop = (struct regnode_charclass *) - PerlMemShared_calloc(1, sizeof(struct regnode_charclass)); - StructCopy(first,trieop,struct regnode_charclass); - trie_op=(regnode *)trieop; - } - OP(trie_op)+=2; - make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0); - ri->regstclass = trie_op; + /* this can happen only on restudy */ + ri->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0); } #endif else if (REGNODE_SIMPLE(OP(first))) @@ -13564,7 +13566,6 @@ parseit: } if (!SIZE_ONLY) { SV* invlist; - char* formatted; char* name; if (UCHARAT(RExC_parse) == '^') { @@ -13585,14 +13586,13 @@ parseit: * will have its name be <__NAME_i>. The design is * discussed in commit * 2f833f5208e26b208886e51e09e2c072b5eabb46 */ - formatted = Perl_form(aTHX_ + name = savepv(Perl_form(aTHX_ "%s%.*s%s\n", (FOLD) ? "__" : "", (int)n, RExC_parse, (FOLD) ? "_i" : "" - ); - name = savepvn(formatted, strlen(formatted)); + )); /* Look up the property name, and get its swash and * inversion list, if the property is found */ @@ -13621,6 +13621,19 @@ parseit: "Property '%"UTF8f"' is unknown", UTF8fARG(UTF, n, name)); } + + /* If the property name doesn't already have a package + * name, add the current one to it so that it can be + * referred to outside it. [perl #121777] */ + if (! instr(name, "::") && PL_curstash) { + char* full_name = Perl_form(aTHX_ + "%s::%s", + HvNAME(PL_curstash), + name); + n = strlen(full_name); + Safefree(name); + name = savepvn(full_name, n); + } Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n", (value == 'p' ? '+' : '!'), UTF8fARG(UTF, n, name)); @@ -14075,7 +14088,7 @@ parseit: AV* this_array; STRLEN cp_count = utf8_length(foldbuf, foldbuf + foldlen); - SV* multi_fold = sv_2mortal(newSVpvn("", 0)); + SV* multi_fold = sv_2mortal(newSVpvs("")); Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value); @@ -14142,7 +14155,7 @@ parseit: && ((prevvalue >= 'a' && value <= 'z') || (prevvalue >= 'A' && value <= 'Z'))) { - _invlist_intersection(this_range, PL_ASCII, + _invlist_intersection(this_range, PL_XPosix_ptrs[_CC_ASCII], &this_range); /* Since this above only contains ascii, the intersection of it @@ -14498,7 +14511,7 @@ parseit: } } - if (HAS_NONLATIN1_FOLD_CLOSURE(j) + if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(j) && (! isASCII(j) || ! ASCII_FOLD_RESTRICTED)) { add_above_Latin1_folds(pRExC_state, @@ -16179,7 +16192,16 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) PerlMemShared_free(aho->fail); /* do this last!!!! */ PerlMemShared_free(ri->data->data[n]); - PerlMemShared_free(ri->regstclass); + /* we should only ever get called once, so + * assert as much, and also guard the free + * which /might/ happen twice. At the least + * it will make code anlyzers happy and it + * doesn't cost much. - Yves */ + assert(ri->regstclass); + if (ri->regstclass) { + PerlMemShared_free(ri->regstclass); + ri->regstclass = 0; + } } } break;