ANYOFHr sv 1 Like ANYOFH, but the flags field contains
packed bounds for all matchable UTF-8 start
bytes.
- ANYOFHs sv anyofhs Like ANYOFHb, but has a string field that
+ ANYOFHs sv 1 Like ANYOFHb, but has a string field that
gives the leading matchable UTF-8 bytes;
flags field is len
ANYOFR packed 1 Matches any character in the range given by
regarglen[op] + STR_SZ(len),
"anyofhs");
FILL_NODE(ret, op);
- RExC_emit += 1 + regarglen[op]
- - 1 + STR_SZ(len); /* Replace the [1]
- element of the struct
- by the real value */
- REGNODE_p(ret)->flags = len;
+ ((struct regnode_anyofhs *) REGNODE_p(ret))->str_len
+ = len;
Copy(low_utf8, /* Add the common bytes */
((struct regnode_anyofhs *) REGNODE_p(ret))->string,
len, U8);
- NEXT_OFF(REGNODE_p(ret)) = regarglen[op] + STR_SZ(len);
+ RExC_emit += NODE_SZ_STR(REGNODE_p(ret));
set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
NULL, only_utf8_locale_list);
goto not_anyof;
else if ( op == PLUS || op == STAR) {
DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
}
- else if (PL_regkind[(U8)op] == EXACT) {
+ else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) {
/* Literal string, where present. */
node += NODE_SZ_STR(node) - 1;
node = NEXTOPER(node);
};
struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */
- U8 flags;
+ U8 str_len;
U8 type;
U16 next_off;
U32 arg1; /* set by set_ANYOF_arg() */
ANYOFH ANYOF, sv 1 S ; Like ANYOF, but only has "High" matches, none in the bitmap; the flags field contains the lowest matchable UTF-8 start byte
ANYOFHb ANYOF, sv 1 S ; Like ANYOFH, but all matches share the same UTF-8 start byte, given in the flags field
ANYOFHr ANYOF, sv 1 S ; Like ANYOFH, but the flags field contains packed bounds for all matchable UTF-8 start bytes.
-ANYOFHs ANYOF, sv anyofhs S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len
+ANYOFHs ANYOF, sv 1 S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len
ANYOFR ANYOFR, packed 1 S ; Matches any character in the range given by its packed args: upper 12 bits is the max delta from the base lower 20; the flags field contains the lowest matchable UTF-8 start byte
ANYOFRb ANYOFR, packed 1 S ; Like ANYOFR, but all matches share the same UTF-8 start byte, given in the flags field
# There is no ANYOFRr because khw doesn't think there are likely to be real-world cases where such a large range is used.
EXTRA_SIZE(struct regnode_1), /* ANYOFH */
EXTRA_SIZE(struct regnode_1), /* ANYOFHb */
EXTRA_SIZE(struct regnode_1), /* ANYOFHr */
- EXTRA_SIZE(struct regnode_anyofhs), /* ANYOFHs */
+ EXTRA_SIZE(struct regnode_1), /* ANYOFHs */
EXTRA_SIZE(struct regnode_1), /* ANYOFR */
EXTRA_SIZE(struct regnode_1), /* ANYOFRb */
EXTRA_SIZE(struct regnode_1), /* ANYOFM */
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 1005; # Update this when adding/deleting tests.
+plan tests => 1011; # Update this when adding/deleting tests.
run_tests() unless caller;
unlike("\x{4000004}", $pat, "4000004 isn't in pattern");
like("\x{4000005}", $pat, "4000005 is in pattern");
unlike("\x{4000006}", $pat, "4000006 isn't in pattern");
+
+ # gh #17319
+ $pat = qr/[\N{U+200D}\N{U+2000}]()/;
+ unlike("\x{1FFF}", $pat, "1FFF isn't in pattern");
+ like("\x{2000}", $pat, "2000 is in pattern");
+ unlike("\x{2001}", $pat, "2001 isn't in pattern");
+ unlike("\x{200C}", $pat, "200C isn't in pattern");
+ like("\x{200D}", $pat, "200 is in pattern");
+ unlike("\x{200E}", $pat, "200E isn't in pattern");
}
} # End of sub run_tests