use utf8;
-plan tests => 214;
+plan tests => 215;
# Test this first before we extend the stack with other operations.
# This caused an asan failure due to a bad write past the end of the stack.
tr/b-y/B-Y/;
is($_, "aBCDEFGHIJKLMNOPQRSTUVWXYz", 'partial uc');
+tr/a-a/AB/;
+is($_, "ABCDEFGHIJKLMNOPQRSTUVWXYz", 'single char range a-a');
+
eval 'tr/a/\N{KATAKANA LETTER AINU P}/;';
like $@,
qr/\\N\{KATAKANA LETTER AINU P\} must not be a named sequence in transliteration operator/,
* 'offset_to_max' is the offset in 'sv' at which the character
* (the range's maximum end point) before 'd' begins.
*/
- const char * max_ptr = SvPVX_const(sv) + offset_to_max;
+ char * max_ptr = SvPVX(sv) + offset_to_max;
const char * min_ptr;
IV range_min;
IV range_max; /* last character in range */
range_max = * (U8*) max_ptr;
}
+ /* If the range is just a single code point, like tr/a-a/.../,
+ * that code point is already in the output, twice. We can
+ * just back up over the second instance and avoid all the rest
+ * of the work. But if it is a variant character, it's been
+ * counted twice, so decrement */
+ if (UNLIKELY(range_max == range_min)) {
+ d = max_ptr;
+ if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+ utf8_variant_count--;
+ }
+ goto range_done;
+ }
+
#ifdef EBCDIC
/* On EBCDIC platforms, we may have to deal with portable
* ranges. These happen if at least one range endpoint is a