void
Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
{
+ dVAR;
char *bufptr;
PERL_ARGS_ASSERT_LEX_STUFF_PVN;
if (flags & ~(LEX_STUFF_UTF8))
I32
Perl_lex_peek_unichar(pTHX_ U32 flags)
{
+ dVAR;
char *s, *bufend;
if (flags & ~(LEX_KEEP_PREVIOUS))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
* errors and upgrading to utf8) is:
* Further disambiguate between the two meanings of \N, and if
* not a charname, go process it elsewhere
- * If of form \N{U+...}, pass it through if a pattern; otherwise
- * convert to utf8
- * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a pattern;
- * otherwise convert to utf8 */
+ * If of form \N{U+...}, pass it through if a pattern;
+ * otherwise convert to utf8
+ * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+ * pattern; otherwise convert to utf8 */
/* Here, s points to the 'N'; the test below is guaranteed to
* succeed if we are being called on a pattern as we already
}
s++;
- /* If there is no matching '}', it is an error outside of a
- * pattern, or ambiguous inside. */
+ /* If there is no matching '}', it is an error. */
if (! (e = strchr(s, '}'))) {
if (! PL_lex_inpat) {
yyerror("Missing right brace on \\N{}");
- continue;
- }
- else {
-
- /* A missing brace means it can't be a legal character
- * name, and it could be a legal "match non-newline".
- * But it's kind of weird without an unescaped left
- * brace, so warn. */
- if (ckWARN(WARN_SYNTAX)) {
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Missing right brace on \\N{} or unescaped left brace after \\N. Assuming the latter");
- }
- s -= 3; /* Backup over cur char, {, N, to the '\' */
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
- goto default_action;
+ } else {
+ yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
}
+ continue;
}
/* Here it looks like a named character */
/* Pass through to the regex compiler unchanged. The
* reason we evaluated the number above is to make sure
- * there wasn't a syntax error. It also makes sure
- * that the syntax created below, \N{Uc1.c2...}, is
- * internal-only */
+ * there wasn't a syntax error. */
s -= 5; /* Include the '\N{U+' */
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
d += len;
}
SvREFCNT_dec(res);
- }
+
+ /* Deprecate non-approved name syntax */
+ if (ckWARN_d(WARN_DEPRECATED)) {
+ bool problematic = FALSE;
+ char* i = s;
+
+ /* For non-ut8 input, look to see that the first
+ * character is an alpha, then loop through the rest
+ * checking that each is a continuation */
+ if (! this_utf8) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ else for (i = s + 1; i < e; i++) {
+ if (isCHARNAME_CONT(*i)) continue;
+ problematic = TRUE;
+ break;
+ }
+ }
+ else {
+ /* Similarly for utf8. For invariants can check
+ * directly. We accept anything above the latin1
+ * range because it is immaterial to Perl if it is
+ * correct or not, and is expensive to check. But
+ * it is fairly easy in the latin1 range to convert
+ * the variants into a single character and check
+ * those */
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (! isALPHAU(*i)) problematic = TRUE;
+ } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+ *(i+1)))))
+ {
+ problematic = TRUE;
+ }
+ }
+ if (! problematic) for (i = s + UTF8SKIP(s);
+ i < e;
+ i+= UTF8SKIP(i))
+ {
+ if (UTF8_IS_INVARIANT(*i)) {
+ if (isCHARNAME_CONT(*i)) continue;
+ } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+ continue;
+ } else if (isCHARNAME_CONT(
+ UNI_TO_NATIVE(
+ UTF8_ACCUMULATE(*i, *(i+1)))))
+ {
+ continue;
+ }
+ problematic = TRUE;
+ break;
+ }
+ }
+ if (problematic) {
+ char *string;
+ Newx(string, e - i + 1, char);
+ Copy(i, string, e - i, char);
+ string[e - i] = '\0';
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Deprecated character(s) in \\N{...} starting at '%s'",
+ string);
+ Safefree(string);
+ }
+ }
+ } /* End \N{NAME} */
#ifdef EBCDIC
if (!dorange)
native_range = FALSE; /* \N{} is defined to be Unicode */
}
/* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && strEQ(key,"charnames")) return res;
+ if (PL_error_count > 0 && strEQ(key,"charnames"))
+ return &PL_sv_undef;
cvp = hv_fetch(table, key, keylen, FALSE);
if (!cvp || !SvOK(*cvp)) {