From 0a96133fa53fe1b2ee03b5cce4cdd9c7c96a0867 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 18 Feb 2010 15:06:51 -0700 Subject: [PATCH] Make a missing right brace on \N{ fatal It was decided that this should be a fatal error instead of a warning. Also some comments were updated.. --- pod/perldiag.pod | 30 +++++++++++++++--------------- toke.c | 33 +++++++++------------------------ 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 486a515..4a12889 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2512,32 +2512,32 @@ can vary from one line to the next. (F) Missing right brace in C<\x{...}>, C<\p{...}>, C<\P{...}>, or C<\N{...}>. -=item Missing right brace on \\N{} or unescaped left brace after \\N. Assuming the latter +=item Missing right brace on \\N{} or unescaped left brace after \\N -(W syntax) -C<\N> has traditionally been followed by a name enclosed in braces, -meaning the character (or sequence of characters) given by that name. +(F) +C<\N> has two meanings. + +The traditional one has it followed by a name enclosed +in braces, meaning the character (or sequence of characters) given by that name. Thus C<\N{ASTERISK}> is another way of writing C<*>, valid in both -double-quoted strings and regular expression patterns. -In patterns, it doesn't have the meaning an unescaped C<*> does. +double-quoted strings and regular expression patterns. In patterns, it doesn't +have the meaning an unescaped C<*> does. -Starting in Perl 5.12.0, C<\N> also can have an additional meaning in patterns, -namely to match a non-newline character. (This is like C<.> but is not -affected by the C modifier.) +Starting in Perl 5.12.0, C<\N> also can have an additional meaning (only) in +patterns, namely to match a non-newline character. (This is like C<.> but is +not affected by the C modifier.) This can lead to some ambiguities. When C<\N> is not followed immediately by a left brace, Perl assumes the "match non-newline character" meaning. Also, if the braces form a valid quantifier such as C<\N{3}> or C<\N{5,}>, Perl assumes that this means to match the given quantity of non-newlines (in these examples, -3, and 5 or more, respectively). In all other case, where there is a C<\N{> +3; and 5 or more, respectively). In all other case, where there is a C<\N{> and a matching C<}>, Perl assumes that a character name is desired. However, if there is no matching C<}>, Perl doesn't know if it was mistakenly -omitted, or if "match non-newline" followed by "match a C<{>" was desired. -It assumes the latter because that is actually a valid interpretation as -written, unlike the other case. If you meant the former, you need to add the -matching right brace. If you did mean the latter, you can silence this warning -by writing instead C<\N\{>. +omitted, or if "match non-newline" followed by "match a C<{>" was desired, and +raises this error. If you meant the former, add the right brace; if you meant +the latter, escape the brace with a backslash, like so: C<\N\{> =item Missing right curly or square bracket diff --git a/toke.c b/toke.c index fcfdd71..997b46a 100644 --- a/toke.c +++ b/toke.c @@ -2968,10 +2968,10 @@ S_scan_const(pTHX_ char *start) * errors and upgrading to utf8) is: * Further disambiguate between the two meanings of \N, and if * not a charname, go process it elsewhere - * If of form \N{U+...}, pass it through if a pattern; otherwise - * convert to utf8 - * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a pattern; - * otherwise convert to utf8 */ + * If of form \N{U+...}, pass it through if a pattern; + * otherwise convert to utf8 + * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a + * pattern; otherwise convert to utf8 */ /* Here, s points to the 'N'; the test below is guaranteed to * succeed if we are being called on a pattern as we already @@ -2985,27 +2985,14 @@ S_scan_const(pTHX_ char *start) } s++; - /* If there is no matching '}', it is an error outside of a - * pattern, or ambiguous inside. */ + /* If there is no matching '}', it is an error. */ if (! (e = strchr(s, '}'))) { if (! PL_lex_inpat) { yyerror("Missing right brace on \\N{}"); - continue; - } - else { - - /* A missing brace means it can't be a legal character - * name, and it could be a legal "match non-newline". - * But it's kind of weird without an unescaped left - * brace, so warn. */ - if (ckWARN(WARN_SYNTAX)) { - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Missing right brace on \\N{} or unescaped left brace after \\N. Assuming the latter"); - } - s -= 3; /* Backup over cur char, {, N, to the '\' */ - *d++ = NATIVE_TO_NEED(has_utf8,'\\'); - goto default_action; + } else { + yyerror("Missing right brace on \\N{} or unescaped left brace after \\N."); } + continue; } /* Here it looks like a named character */ @@ -3053,9 +3040,7 @@ S_scan_const(pTHX_ char *start) /* Pass through to the regex compiler unchanged. The * reason we evaluated the number above is to make sure - * there wasn't a syntax error. It also makes sure - * that the syntax created below, \N{Uc1.c2...}, is - * internal-only */ + * there wasn't a syntax error. */ s -= 5; /* Include the '\N{U+' */ Copy(s, d, e - s + 1, char); /* 1 = include the } */ d += e - s + 1; -- 1.8.3.1