use Cname;
ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname";
+ my $name = "foo\xDF";
+ my $result = eval "'A${name}B' =~ /^A\\N{$name}B\$/";
+ ok !$@ && $result, "Passthrough charname of non-ASCII, Latin1";
#
# Why doesn't must_warn work here?
#
ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in name gives error';
eval 'q() =~ /\N{COM,MA}/';
ok $@ && $@ =~ /Invalid character/, 'Verify that comma in name gives error';
- my $name = "A\x{D7}O";
+ $name = "A\x{D7}O";
eval "q(W) =~ /\\N{$name}/";
ok $@ && $@ =~ /Invalid character/, 'Verify that latin1 symbol in name gives error';
my $utf8_name = "7 CITIES OF GOLD";
}
}
- /* A custom translator can leave res not in UTF-8, so make sure. XXX This
- * can be revisited to not use utf8 for characters that don't need it when
- * regexes don't have to be in utf8 for Unicode semantics. If doing so,
- * remember EBCDIC */
- if (! SvUTF8(res)) {
- sv_utf8_upgrade(res);
- }
- else { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed input */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
/* Here it looks like a named character */
- if (PL_lex_inpat) {
-
- /* XXX This block is temporary code. \N{} implies that the
- * pattern is to have Unicode semantics, and therefore
- * currently has to be encoded in utf8. By putting it in
- * utf8 now, we save a whole pass in the regular expression
- * compiler. Once that code is changed so Unicode
- * semantics doesn't necessarily have to be in utf8, this
- * block should be removed. However, the code that parses
- * the output of this would have to be changed to not
- * necessarily expect utf8 */
- if (!has_utf8) {
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- /* 5 = '\N{' + cur char + NUL */
- (STRLEN)(send - s) + 5);
- d = SvPVX(sv) + SvCUR(sv);
- has_utf8 = TRUE;
- }
- }
-
if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
| PERL_SCAN_DISALLOW_PREFIX;
const char *str_end = str + len;
const STRLEN off = d - SvPVX_const(sv);
+
+ if (! SvUTF8(res)) {
+ /* For the non-UTF-8 case, we can determine the
+ * exact length needed without having to parse
+ * through the string. Each character takes up
+ * 2 hex digits plus either a trailing dot or
+ * the "}" */
+ d = off + SvGROW(sv, off
+ + 3 * len
+ + 6 /* For the "\N{U+", and
+ trailing NUL */
+ + (STRLEN)(send - e));
+ Copy("\\N{U+", d, 5, char);
+ d += 5;
+ while (str < str_end) {
+ char hex_string[4];
+ my_snprintf(hex_string, sizeof(hex_string),
+ "%02X.", (U8) *str);
+ Copy(hex_string, d, 3, char);
+ d += 3;
+ str++;
+ }
+ d--; /* We will overwrite below the final
+ dot with a right brace */
+ }
+ else {
STRLEN char_length; /* cur char's byte length */
STRLEN output_length; /* and the number of bytes
after this is translated
into hex digits */
-
/* 2 hex per byte; 2 chars for '\N'; 2 chars for
* max('U+', '.'); and 1 for NUL */
char hex_string[2 * UTF8_MAXBYTES + 5];
Copy(hex_string, d, output_length, char);
d += output_length;
}
+ }
*d++ = '}'; /* Done. Add the trailing brace */
}