|NN I32 *flagp|I32 first|U32 depth
Es |STRLEN |reguni |NN const struct RExC_state_t *pRExC_state \
|UV uv|NN char *s
-Es |regnode*|regclass |NN struct RExC_state_t *pRExC_state|U32 depth
+Es |regnode*|regclass |NN struct RExC_state_t *pRExC_state \
+ |NN I32 *flagp|U32 depth
Es |regnode*|reg_node |NN struct RExC_state_t *pRExC_state|U8 op
Es |UV |reg_recode |const char value|NN SV **encp
Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |bool |grok_bslash_N |NN struct RExC_state_t *pRExC_state \
|NULLOK regnode** nodep|NULLOK UV *valuep \
- |NULLOK I32 *flagp|U32 depth|bool in_char_class
+ |NN I32 *flagp|U32 depth|bool in_char_class
Es |void |reginsert |NN struct RExC_state_t *pRExC_state \
|U8 op|NN regnode *opnd|U32 depth
Es |void |regtail |NN struct RExC_state_t *pRExC_state \
EsRn |char * |regwhite |NN struct RExC_state_t *pRExC_state \
|NN char *p
Ei |void |alloc_maybe_populate_EXACT|NN struct RExC_state_t *pRExC_state \
- |NN regnode *node|STRLEN len|UV code_point
+ |NN regnode *node|NN I32 *flagp|STRLEN len \
+ |UV code_point
Ei |U8 |compute_EXACTish|NN struct RExC_state_t *pRExC_state
Es |char * |nextchar |NN struct RExC_state_t *pRExC_state
Es |bool |reg_skipcomment|NN struct RExC_state_t *pRExC_state
#define add_alternate(a,b,c) S_add_alternate(aTHX_ a,b,c)
#define add_cp_to_invlist(a,b) S_add_cp_to_invlist(aTHX_ a,b)
#define add_data S_add_data
-#define alloc_maybe_populate_EXACT(a,b,c,d) S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d)
+#define alloc_maybe_populate_EXACT(a,b,c,d,e) S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d,e)
#define checkposixcc(a) S_checkposixcc(aTHX_ a)
#define cl_and S_cl_and
#define cl_anything S_cl_anything
#define reganode(a,b,c) S_reganode(aTHX_ a,b,c)
#define regatom(a,b,c) S_regatom(aTHX_ a,b,c)
#define regbranch(a,b,c,d) S_regbranch(aTHX_ a,b,c,d)
-#define regclass(a,b) S_regclass(aTHX_ a,b)
+#define regclass(a,b,c) S_regclass(aTHX_ a,b,c)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
#define regpposixcc(a,b) S_regpposixcc(aTHX_ a,b)
#define PERL_ARGS_ASSERT_ADD_DATA \
assert(pRExC_state); assert(s)
-PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ struct RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ struct RExC_state_t *pRExC_state, regnode *node, I32 *flagp, STRLEN len, UV code_point)
__attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2);
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_3);
#define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT \
- assert(pRExC_state); assert(node)
+ assert(pRExC_state); assert(node); assert(flagp)
STATIC void S_checkposixcc(pTHX_ struct RExC_state_t *pRExC_state)
__attribute__nonnull__(pTHX_1);
assert(invlist)
STATIC bool S_grok_bslash_N(pTHX_ struct RExC_state_t *pRExC_state, regnode** nodep, UV *valuep, I32 *flagp, U32 depth, bool in_char_class)
- __attribute__nonnull__(pTHX_1);
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
- assert(pRExC_state)
+ assert(pRExC_state); assert(flagp)
PERL_STATIC_INLINE UV* S_invlist_array(pTHX_ SV* const invlist)
__attribute__warn_unused_result__
#define PERL_ARGS_ASSERT_REGBRANCH \
assert(pRExC_state); assert(flagp)
-STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, U32 depth)
- __attribute__nonnull__(pTHX_1);
+STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
#define PERL_ARGS_ASSERT_REGCLASS \
- assert(pRExC_state)
+ assert(pRExC_state); assert(flagp)
STATIC void S_reginsert(pTHX_ struct RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
__attribute__nonnull__(pTHX_1)
}
PERL_STATIC_INLINE void
-S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, STRLEN len, UV code_point)
+S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32* flagp, STRLEN len, UV code_point)
{
- /* This knows the details about sizing an EXACTish node, and potentially
- * populating it with a single character. If <len> is non-zero, it assumes
- * that the node has already been populated, and just does the sizing,
- * ignoring <code_point>. Otherwise it looks at <code_point> and
- * calculates what <len> should be. In pass 1, it sizes the node
- * appropriately. In pass 2, it additionally will populate the node's
- * STRING with <code_point>, if <len> is 0.
+ /* This knows the details about sizing an EXACTish node, setting flags for
+ * it (by setting <*flagp>, and potentially populating it with a single
+ * character.
+ *
+ * If <len> is non-zero, this function assumes that the node has already
+ * been populated, and just does the sizing. In this case <code_point>
+ * should be the final code point that has already been placed into the
+ * node. This value will be ignored except that under some circumstances
+ * <*flagp> is set based on it.
+ *
+ * If <len is zero, the function assumes that the node is to contain only
+ * the single character given by <code_point> and calculates what <len>
+ * should be. In pass 1, it sizes the node appropriately. In pass 2, it
+ * additionally will populate the node's STRING with <code_point>, if <len>
+ * is 0. In both cases <*flagp> is appropriately set
*
* It knows that under FOLD, UTF characters and the Latin Sharp S must be
* folded (the latter only when the rules indicate it can match 'ss') */
Copy((char *) character, STRING(node), len, char);
}
}
+
+ *flagp |= HASWIDTH;
+ if (len == 1 && UNI_IS_INVARIANT(code_point))
+ *flagp |= SIMPLE;
}
/*
case '[':
{
char * const oregcomp_parse = ++RExC_parse;
- ret = regclass(pRExC_state,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1);
if (*RExC_parse != ']') {
RExC_parse = oregcomp_parse;
vFAIL("Unmatched [");
}
nextchar(pRExC_state);
- *flagp |= HASWIDTH|SIMPLE;
Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
break;
}
}
RExC_parse--;
- ret = regclass(pRExC_state,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1);
RExC_end = oldregxend;
RExC_parse--;
Set_Node_Offset(ret, parse_start + 2);
Set_Node_Cur_Length(ret);
nextchar(pRExC_state);
- *flagp |= HASWIDTH|SIMPLE;
}
break;
case 'N':
loopdone: /* Jumped to when encounters something that shouldn't be in
the node */
+
+ alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender);
+
RExC_parse = p - 1;
Set_Node_Cur_Length(ret); /* MJD */
nextchar(pRExC_state);
if (iv < 0)
vFAIL("Internal disaster");
}
- if (len > 0)
- *flagp |= HASWIDTH;
- if (len == 1 && UNI_IS_INVARIANT(ender))
- *flagp |= SIMPLE;
- alloc_maybe_populate_EXACT(pRExC_state, ret, len, 0);
} /* End of label 'defchar:' */
break;
} /* End of giant switch on input character */
above 255, a range list is used */
STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
{
dVAR;
register UV nextvalue;
if this makes sense as it does change the behaviour
from earlier versions, OTOH that behaviour was broken
as well. */
- if (! grok_bslash_N(pRExC_state, NULL, &value, NULL, depth,
+ if (! grok_bslash_N(pRExC_state, NULL, &value, flagp, depth,
TRUE /* => charclass */))
{
goto parseit;
if (invert) {
op += NALNUM - ALNUM;
}
+ *flagp |= HASWIDTH|SIMPLE;
break;
/* The second group doesn't depend of the charset modifiers.
case ANYOF_HORIZWS:
is_horizws:
op = (invert) ? NHORIZWS : HORIZWS;
+ *flagp |= HASWIDTH|SIMPLE;
break;
case ANYOF_NVERTWS:
/* FALLTHROUGH */
case ANYOF_VERTWS:
op = (invert) ? NVERTWS : VERTWS;
+ *flagp |= HASWIDTH|SIMPLE;
break;
case ANYOF_MAX:
if (invert) {
if (! LOC && value == '\n') {
op = REG_ANY; /* Optimize [^\n] */
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
}
}
else if (value < 256 || UTF) {
if (prevvalue == '0') {
if (value == '9') {
op = (invert) ? NDIGITA : DIGITA;
+ *flagp |= HASWIDTH|SIMPLE;
}
}
}
if (! SIZE_ONLY) {
FLAGS(ret) = arg;
}
+ *flagp |= HASWIDTH|SIMPLE;
}
else if (PL_regkind[op] == EXACT) {
- alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
+ alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
}
RExC_parse = (char *) cur_parse;
* it doesn't match anything. (perluniprops.pod notes such
* properties) */
op = OPFAIL;
+ *flagp |= HASWIDTH|SIMPLE;
}
else if (start == end) { /* The range is a single code point */
if (! invlist_iternext(cp_list, &start, &end)
else if (start == 0) {
if (end == UV_MAX) {
op = SANY;
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
}
else if (end == '\n' - 1
&& invlist_iternext(cp_list, &start, &end)
&& start == '\n' + 1 && end == UV_MAX)
{
op = REG_ANY;
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
}
}
RExC_parse = (char *)cur_parse;
if (PL_regkind[op] == EXACT) {
- alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
+ alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
}
SvREFCNT_dec(listsv);
RExC_rxi->data->data[n] = (void*)rv;
ARG_SET(ret, n);
}
+
+ *flagp |= HASWIDTH|SIMPLE;
return ret;
}
#undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION