This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix Module::CoreList's own $VERSION in 5.11.4
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index fcfdd71..7167004 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -938,6 +938,7 @@ function is more convenient.
 void
 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 {
+    dVAR;
     char *bufptr;
     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
     if (flags & ~(LEX_STUFF_UTF8))
@@ -1302,6 +1303,7 @@ is encountered, an exception is generated.
 I32
 Perl_lex_peek_unichar(pTHX_ U32 flags)
 {
+    dVAR;
     char *s, *bufend;
     if (flags & ~(LEX_KEEP_PREVIOUS))
        Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
@@ -2968,10 +2970,10 @@ S_scan_const(pTHX_ char *start)
                 * errors and upgrading to utf8) is:
                 *  Further disambiguate between the two meanings of \N, and if
                 *      not a charname, go process it elsewhere
-                *  If of form \N{U+...}, pass it through if a pattern; otherwise
-                *      convert to utf8
-                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a pattern;
-                *      otherwise convert to utf8 */
+                *  If of form \N{U+...}, pass it through if a pattern;
+                *      otherwise convert to utf8
+                *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
+                *  pattern; otherwise convert to utf8 */
 
                /* Here, s points to the 'N'; the test below is guaranteed to
                 * succeed if we are being called on a pattern as we already
@@ -2985,27 +2987,14 @@ S_scan_const(pTHX_ char *start)
                }
                s++;
 
-               /* If there is no matching '}', it is an error outside of a
-                * pattern, or ambiguous inside. */
+               /* If there is no matching '}', it is an error. */
                if (! (e = strchr(s, '}'))) {
                    if (! PL_lex_inpat) {
                        yyerror("Missing right brace on \\N{}");
-                       continue;
-                   }
-                   else {
-
-                       /* A missing brace means it can't be a legal character
-                        * name, and it could be a legal "match non-newline".
-                        * But it's kind of weird without an unescaped left
-                        * brace, so warn. */
-                       if (ckWARN(WARN_SYNTAX)) {
-                           Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                   "Missing right brace on \\N{} or unescaped left brace after \\N.  Assuming the latter");
-                       }
-                       s -= 3; /* Backup over cur char, {, N, to the '\' */
-                       *d++ = NATIVE_TO_NEED(has_utf8,'\\');
-                       goto default_action;
+                   } else {
+                       yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
                    }
+                   continue;
                }
 
                /* Here it looks like a named character */
@@ -3053,9 +3042,7 @@ S_scan_const(pTHX_ char *start)
 
                        /* Pass through to the regex compiler unchanged.  The
                         * reason we evaluated the number above is to make sure
-                        * there wasn't a syntax error.  It also makes sure
-                        * that the syntax created below, \N{Uc1.c2...}, is
-                        * internal-only */
+                        * there wasn't a syntax error. */
                        s -= 5;     /* Include the '\N{U+' */
                        Copy(s, d, e - s + 1, char);    /* 1 = include the } */
                        d += e - s + 1;
@@ -3219,7 +3206,70 @@ S_scan_const(pTHX_ char *start)
                        d += len;
                    }
                    SvREFCNT_dec(res);
-               }
+
+                   /* Deprecate non-approved name syntax */
+                   if (ckWARN_d(WARN_DEPRECATED)) {
+                       bool problematic = FALSE;
+                       char* i = s;
+
+                       /* For non-ut8 input, look to see that the first
+                        * character is an alpha, then loop through the rest
+                        * checking that each is a continuation */
+                       if (! this_utf8) {
+                           if (! isALPHAU(*i)) problematic = TRUE;
+                           else for (i = s + 1; i < e; i++) {
+                               if (isCHARNAME_CONT(*i)) continue;
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       else {
+                           /* Similarly for utf8.  For invariants can check
+                            * directly.  We accept anything above the latin1
+                            * range because it is immaterial to Perl if it is
+                            * correct or not, and is expensive to check.  But
+                            * it is fairly easy in the latin1 range to convert
+                            * the variants into a single character and check
+                            * those */
+                           if (UTF8_IS_INVARIANT(*i)) {
+                               if (! isALPHAU(*i)) problematic = TRUE;
+                           } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                               if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
+                                                                           *(i+1)))))
+                               {
+                                   problematic = TRUE;
+                               }
+                           }
+                           if (! problematic) for (i = s + UTF8SKIP(s);
+                                                   i < e;
+                                                   i+= UTF8SKIP(i))
+                           {
+                               if (UTF8_IS_INVARIANT(*i)) {
+                                   if (isCHARNAME_CONT(*i)) continue;
+                               } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
+                                   continue;
+                               } else if (isCHARNAME_CONT(
+                                           UNI_TO_NATIVE(
+                                           UTF8_ACCUMULATE(*i, *(i+1)))))
+                               {
+                                   continue;
+                               }
+                               problematic = TRUE;
+                               break;
+                           }
+                       }
+                       if (problematic) {
+                           char *string;
+                           Newx(string, e - i + 1, char);
+                           Copy(i, string, e - i, char);
+                           string[e - i] = '\0';
+                           Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                               "Deprecated character(s) in \\N{...} starting at '%s'",
+                               string);
+                           Safefree(string);
+                       }
+                   }
+               } /* End \N{NAME} */
 #ifdef EBCDIC
                if (!dorange) 
                    native_range = FALSE; /* \N{} is defined to be Unicode */
@@ -11520,7 +11570,8 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
     }
 
     /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && strEQ(key,"charnames")) return res;
+    if (PL_error_count > 0 && strEQ(key,"charnames"))
+       return &PL_sv_undef;
 
     cvp = hv_fetch(table, key, keylen, FALSE);
     if (!cvp || !SvOK(*cvp)) {