#define HAS_NONLATIN1_FOLD_CLOSURE(i) \
_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \
+ _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
#define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
May be the same as tail.
tail : item following the branch sequence
count : words in the sequence
- flags : currently the OP() type we will be building one of /EXACT(|F|Fl)/
+ flags : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS)/
depth : indent depth
Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
: MADE_TRIE;
}
-STATIC void
-S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode *stclass, U32 depth)
+STATIC regnode *
+S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
{
/* The Trie is constructed and compressed now so we can build a fail array if
* it's needed
U32 *fail;
reg_ac_data *aho;
const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
+ regnode *stclass;
GET_RE_DEBUG_FLAGS_DECL;
- PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE;
+ PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
#endif
+ if ( OP(source) == TRIE ) {
+ struct regnode_1 *op = (struct regnode_1 *)
+ PerlMemShared_calloc(1, sizeof(struct regnode_1));
+ StructCopy(source,op,struct regnode_1);
+ stclass = (regnode *)op;
+ } else {
+ struct regnode_charclass *op = (struct regnode_charclass *)
+ PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
+ StructCopy(source,op,struct regnode_charclass);
+ stclass = (regnode *)op;
+ }
+ OP(stclass)+=2; /* covert the TRIE type to its AHO-CORASICK equivalent */
ARG_SET( stclass, data_slot );
aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
});
Safefree(q);
/*RExC_seen |= REG_TRIEDFA_SEEN;*/
+ return stclass;
}
/* Some characters match above-Latin1 ones under /i. This
* is true of EXACTFL ones when the locale is UTF-8 */
- if (HAS_NONLATIN1_FOLD_CLOSURE(uc)
+ if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
&& (! isASCII(uc) || (OP(scan) != EXACTFA
&& OP(scan) != EXACTFA_NO_TRIE)))
{
/* It is counted once already... */
data->pos_min += minnext * (mincount - counted);
#if 0
-PerlIO_printf(Perl_debug_log, "counted=%"UVdf" deltanext=%"UVdf
- " SSize_t_MAX=%"UVdf" minnext=%"UVdf
- " maxcount=%"UVdf" mincount=%"UVdf"\n",
+PerlIO_printf(Perl_debug_log, "counted=%"UVuf" deltanext=%"UVuf
+ " SSize_t_MAX=%"UVuf" minnext=%"UVuf
+ " maxcount=%"UVuf" mincount=%"UVuf"\n",
(UV)counted, (UV)deltanext, (UV)SSize_t_MAX, (UV)minnext, (UV)maxcount,
(UV)mincount);
if (deltanext != SSize_t_MAX)
-PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
+PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
(UV)(-counted * deltanext + (minnext + deltanext) * maxcount
- minnext * mincount), (UV)(SSize_t_MAX - data->pos_delta));
#endif
/* if we know we have at least two args, create an empty string,
* then concatenate args to that. For no args, return an empty string */
if (!pat && pat_count != 1) {
- pat = newSVpvn("", 0);
+ pat = newSVpvs("");
SAVEFREESV(pat);
alloced = TRUE;
}
else if (PL_regkind[OP(first)] == TRIE &&
((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
{
- regnode *trie_op;
- /* this can happen only on restudy */
- if ( OP(first) == TRIE ) {
- struct regnode_1 *trieop = (struct regnode_1 *)
- PerlMemShared_calloc(1, sizeof(struct regnode_1));
- StructCopy(first,trieop,struct regnode_1);
- trie_op=(regnode *)trieop;
- } else {
- struct regnode_charclass *trieop = (struct regnode_charclass *)
- PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
- StructCopy(first,trieop,struct regnode_charclass);
- trie_op=(regnode *)trieop;
- }
- OP(trie_op)+=2;
- make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
- ri->regstclass = trie_op;
+ /* this can happen only on restudy */
+ ri->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0);
}
#endif
else if (REGNODE_SIMPLE(OP(first)))
}
if (!SIZE_ONLY) {
SV* invlist;
- char* formatted;
char* name;
if (UCHARAT(RExC_parse) == '^') {
* will have its name be <__NAME_i>. The design is
* discussed in commit
* 2f833f5208e26b208886e51e09e2c072b5eabb46 */
- formatted = Perl_form(aTHX_
+ name = savepv(Perl_form(aTHX_
"%s%.*s%s\n",
(FOLD) ? "__" : "",
(int)n,
RExC_parse,
(FOLD) ? "_i" : ""
- );
- name = savepvn(formatted, strlen(formatted));
+ ));
/* Look up the property name, and get its swash and
* inversion list, if the property is found */
"Property '%"UTF8f"' is unknown",
UTF8fARG(UTF, n, name));
}
+
+ /* If the property name doesn't already have a package
+ * name, add the current one to it so that it can be
+ * referred to outside it. [perl #121777] */
+ if (! instr(name, "::") && PL_curstash) {
+ char* full_name = Perl_form(aTHX_
+ "%s::%s",
+ HvNAME(PL_curstash),
+ name);
+ n = strlen(full_name);
+ Safefree(name);
+ name = savepvn(full_name, n);
+ }
Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
(value == 'p' ? '+' : '!'),
UTF8fARG(UTF, n, name));
AV* this_array;
STRLEN cp_count = utf8_length(foldbuf,
foldbuf + foldlen);
- SV* multi_fold = sv_2mortal(newSVpvn("", 0));
+ SV* multi_fold = sv_2mortal(newSVpvs(""));
Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
&& ((prevvalue >= 'a' && value <= 'z')
|| (prevvalue >= 'A' && value <= 'Z')))
{
- _invlist_intersection(this_range, PL_ASCII,
+ _invlist_intersection(this_range, PL_XPosix_ptrs[_CC_ASCII],
&this_range);
/* Since this above only contains ascii, the intersection of it
}
}
- if (HAS_NONLATIN1_FOLD_CLOSURE(j)
+ if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(j)
&& (! isASCII(j) || ! ASCII_FOLD_RESTRICTED))
{
add_above_Latin1_folds(pRExC_state,
PerlMemShared_free(aho->fail);
/* do this last!!!! */
PerlMemShared_free(ri->data->data[n]);
- PerlMemShared_free(ri->regstclass);
+ /* we should only ever get called once, so
+ * assert as much, and also guard the free
+ * which /might/ happen twice. At the least
+ * it will make code anlyzers happy and it
+ * doesn't cost much. - Yves */
+ assert(ri->regstclass);
+ if (ri->regstclass) {
+ PerlMemShared_free(ri->regstclass);
+ ri->regstclass = 0;
+ }
}
}
break;