Es |UV |reg_recode |const char value|NN SV **encp
Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
-Es |bool |grok_bslash_N |NN struct RExC_state_t *pRExC_state \
- |NULLOK regnode** nodep|NULLOK UV *valuep \
- |NN I32 *flagp|U32 depth|bool in_char_class
+Es |bool |grok_bslash_N |NN struct RExC_state_t *pRExC_state \
+ |NULLOK regnode** nodep|NULLOK UV *valuep \
+ |NN I32 *flagp|U32 depth|bool in_char_class \
+ |const bool strict
Es |void |reginsert |NN struct RExC_state_t *pRExC_state \
|U8 op|NN regnode *opnd|U32 depth
Es |void |regtail |NN struct RExC_state_t *pRExC_state \
#define get_invlist_previous_index_addr(a) S_get_invlist_previous_index_addr(aTHX_ a)
#define get_invlist_version_id_addr(a) S_get_invlist_version_id_addr(aTHX_ a)
#define get_invlist_zero_addr(a) S_get_invlist_zero_addr(aTHX_ a)
-#define grok_bslash_N(a,b,c,d,e,f) S_grok_bslash_N(aTHX_ a,b,c,d,e,f)
+#define grok_bslash_N(a,b,c,d,e,f,g) S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
#define handle_sets(a,b,c,d) S_handle_sets(aTHX_ a,b,c,d)
#define invlist_array(a) S_invlist_array(aTHX_ a)
#define invlist_clone(a) S_invlist_clone(aTHX_ a)
#define PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR \
assert(invlist)
-STATIC bool S_grok_bslash_N(pTHX_ struct RExC_state_t *pRExC_state, regnode** nodep, UV *valuep, I32 *flagp, U32 depth, bool in_char_class)
+STATIC bool S_grok_bslash_N(pTHX_ struct RExC_state_t *pRExC_state, regnode** nodep, UV *valuep, I32 *flagp, U32 depth, bool in_char_class, const bool strict)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
}
STATIC bool
-S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I32 *flagp, U32 depth, bool in_char_class)
+S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I32 *flagp, U32 depth, bool in_char_class,
+ const bool strict /* Apply stricter parsing rules? */
+ )
{
/* This is expected to be called by a parser routine that has recognized '\N'
}
else if (in_char_class) {
if (SIZE_ONLY && in_char_class) {
- ckWARNreg(RExC_parse,
- "Ignoring zero length \\N{} in character class"
- );
+ if (strict) {
+ RExC_parse++; /* Position after the "}" */
+ vFAIL("Zero length \\N{}");
+ }
+ else {
+ ckWARNreg(RExC_parse,
+ "Ignoring zero length \\N{} in character class");
+ }
}
ret = FALSE;
}
}
if (in_char_class && has_multiple_chars) {
- ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
+ if (strict) {
+ RExC_parse = endbrace;
+ vFAIL("\\N{} in character class restricted to one character");
+ }
+ else {
+ ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
+ }
}
RExC_parse = endbrace + 1;
* special treatment for quantifiers is not needed for such single
* character sequences */
++RExC_parse;
- if (! grok_bslash_N(pRExC_state, &ret, NULL, flagp, depth, FALSE)) {
+ if (! grok_bslash_N(pRExC_state, &ret, NULL, flagp, depth, FALSE,
+ FALSE /* not strict */ )) {
RExC_parse--;
goto defchar;
}
* */
RExC_parse = p + 1;
if (! grok_bslash_N(pRExC_state, NULL, &ender,
- flagp, depth, FALSE))
+ flagp, depth, FALSE,
+ FALSE /* not strict */ ))
{
RExC_parse = p = oldp;
goto loopdone;
from earlier versions, OTOH that behaviour was broken
as well. */
if (! grok_bslash_N(pRExC_state, NULL, &value, flagp, depth,
- TRUE /* => charclass */))
+ TRUE, /* => charclass */
+ strict))
{
goto parseit;
}
Unicode surrogate U+%X is illegal in UTF-8
UTF-16 surrogate U+%X
False [] range "%s" in regex; marked by <-- HERE in m/%s/
+\N{} in character class restricted to one character in regex; marked by <-- HERE in m/%s/
+Zero length \N{} in regex; marked by <-- HERE in m/%s/
ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/, 'Verify that long string works';
ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/i, 'Verify under folding that long string works';
+ eval '/(?[[\N{EMPTY-STR}]])/';
+ ok $@ && $@ =~ /Zero length \\N\{}/;
+
undef $w;
eval q [is("\N{TOO MANY SPACES}", "TOO MANY SPACES", "Multiple spaces in character name works")];
like ($w, qr/A sequence of multiple spaces in a charnames alias definition is deprecated/, "... but returns a deprecation warning");
'm/(?[[\w-x]])/' => 'False [] range "\w-" in regex; marked by {#} in m/(?[[\w-{#}x]])/',
'm/(?[[a-\pM]])/' => 'False [] range "a-\pM" in regex; marked by {#} in m/(?[[a-\pM{#}]])/',
'm/(?[[\pM-x]])/' => 'False [] range "\pM-" in regex; marked by {#} in m/(?[[\pM-{#}x]])/',
+ 'm/(?[[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]])/' => '\N{} in character class restricted to one character in regex; marked by {#} in m/(?[[\N{U+100.300{#}}]])/',
);
+# Tests involving a user-defined charnames translator are in pat_advanced.t
##
## Key-value pairs of code/error of code that should have non-fatal warnings.