use utf8;
-plan tests => 215;
+plan tests => 216;
# Test this first before we extend the stack with other operations.
# This caused an asan failure due to a bad write past the end of the stack.
}
+{ # [perl #130656] This bug happens when the tr is split across lines, so
+ # that the first line causes it to go into UTF-8, and the 2nd is only
+ # things like \x
+ my $x = "\x{E235}";
+ $x =~ tr
+ [\x{E234}-\x{E342}\x{E5B5}-\x{E5DF}]
+ [\x{E5CD}-\x{E5DF}\x{EA80}-\x{EAFA}\x{EB0E}-\x{EB8E}\x{EAFB}-\x{EB0D}\x{E5B5}-\x{E5CC}];
+
+ is $x, "\x{E5CE}", '[perl #130656]';
+}
1;
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
bool has_utf8 = FALSE; /* Output constant is UTF8 */
- bool has_above_latin1 = FALSE; /* does something require special
- handling in tr/// ? */
bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
STRLEN offset_to_max; /* The offset in the output to where the range
high-end character is temporarily placed */
+ /* Does something require special handling in tr/// ? This avoids extra
+ * work in a less likely case. As such, khw didn't feel it was worth
+ * adding any branches to the more mainline code to handle this, which
+ * means that this doesn't get set in some circumstances when things like
+ * \x{100} get expanded out. As a result there needs to be extra testing
+ * done in the tr code */
+ bool has_above_latin1 = FALSE;
+
/* Note on sizing: The scanned constant is placed into sv, which is
* initialized by newSV() assuming one byte of output for every byte of
* input. This routine expects newSV() to allocate an extra byte for a
/* The tests here for being above Latin1 and similar ones
* in the following 'else' suffice to find all such
* occurences in the constant, except those added by a
- * backslash escape sequence, like \x{100}. And all those
+ * backslash escape sequence, like \x{100}. Mostly, those
* set 'has_above_latin1' as appropriate */
if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
+
+ /* This compensates for not all code setting
+ * 'has_above_latin1', so that we don't skip stuff that
+ * should be executed */
+ if (range_max > 255) {
+ has_above_latin1 = TRUE;
+ }
}
else {
min_ptr = max_ptr - 1;