This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add rt69056.t to MANIFEST
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index ca18af1..de163fb 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -124,16 +124,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 #endif
 
+/* The maximum number of characters preceding the unrecognized one to display */
+#define UNRECOGNIZED_PRECEDE_COUNT 10
+
 /* In variables named $^X, these are the legal values for X.
  * 1999-02-27 mjd-perl-patch@plover.com */
 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 
-/* On MacOS, respect nonbreaking spaces */
-#ifdef MACOS_TRADITIONAL
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
-#else
 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
-#endif
 
 /* LEX_* are values for PL_lex_state, the state of the lexer.
  * They are arranged oddly so that the guard on the switch statement
@@ -1386,7 +1384,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
 {
     dVAR;
     SV * const sv = newSVpvn_utf8(start, len,
-                                 UTF && !IN_BYTES
+                                 !IN_BYTES
+                                 && UTF
+                                 && !is_ascii_string((const U8*)start, len)
                                  && is_utf8_string((const U8*)start, len));
     return sv;
 }
@@ -2826,7 +2826,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
       bare_package:
            start_force(PL_curforce);
            NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
-                                                  newSVpvn(tmpbuf,len));
+                                                 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
            NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
            if (PL_madskills)
                curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
@@ -3657,8 +3657,17 @@ Perl_yylex(pTHX)
     default:
        if (isIDFIRST_lazy_if(s,UTF))
            goto keylookup;
-       len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
-       Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
+       {
+        unsigned char c = *s;
+        len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
+        if (len > UNRECOGNIZED_PRECEDE_COUNT) {
+            d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
+        } else {
+            d = PL_linestart;
+        }      
+        *s = '\0';
+        Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
+    }
     case 4:
     case 26:
        goto fake_eof;                  /* emulate EOF on ^D or ^Z */
@@ -3948,7 +3957,6 @@ Perl_yylex(pTHX)
                        *s = '#';       /* Don't try to parse shebang line */
                }
 #endif /* ALTERNATE_SHEBANG */
-#ifndef MACOS_TRADITIONAL
                if (!d &&
                    *s == '#' &&
                    ipathend > ipath &&
@@ -3964,7 +3972,7 @@ Perl_yylex(pTHX)
                    while (s < PL_bufend && isSPACE(*s))
                        s++;
                    if (s < PL_bufend) {
-                       Newxz(newargv,PL_origargc+3,char*);
+                       Newx(newargv,PL_origargc+3,char*);
                        newargv[1] = s;
                        while (s < PL_bufend && !isSPACE(*s))
                            s++;
@@ -3979,7 +3987,6 @@ Perl_yylex(pTHX)
                    PERL_FPU_POST_EXEC
                    Perl_croak(aTHX_ "Can't exec %s", ipath);
                }
-#endif
                if (d) {
                    while (*d && !isSPACE(*d))
                        d++;
@@ -3994,7 +4001,14 @@ Perl_yylex(pTHX)
                        const char *d1 = d;
 
                        do {
-                           if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
+                           bool baduni = FALSE;
+                           if (*d1 == 'C') {
+                               const char *d2 = d1 + 1;
+                               if (parse_unicode_opts((const char **)&d2)
+                                   != PL_unicode)
+                                   baduni = TRUE;
+                           }
+                           if (baduni || *d1 == 'M' || *d1 == 'm') {
                                const char * const m = d1;
                                while (*d1 && !isSPACE(*d1))
                                    d1++;
@@ -4042,9 +4056,6 @@ Perl_yylex(pTHX)
       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
 #endif
     case ' ': case '\t': case '\f': case 013:
-#ifdef MACOS_TRADITIONAL
-    case '\312':
-#endif
 #ifdef PERL_MAD
        PL_realtokenstart = -1;
        if (!PL_thiswhite)
@@ -4376,11 +4387,6 @@ Perl_yylex(pTHX)
                    if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
                        sv_free(sv);
                        if (PL_in_my == KEY_our) {
-#ifdef USE_ITHREADS
-                           GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
-#else
-                           /* skip to avoid loading attributes.pm */
-#endif
                            deprecate(":unique");
                        }
                        else
@@ -4808,10 +4814,6 @@ Perl_yylex(pTHX)
        pl_yylval.ival = 0;
        OPERATOR(ASSIGNOP);
     case '!':
-       if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') {
-           s += 3;
-           LOP(OP_DIE,XTERM);
-       }
        s++;
        {
            const char tmp = *s++;
@@ -5063,10 +5065,6 @@ Perl_yylex(pTHX)
            AOPERATOR(DORDOR);
        }
      case '?':                 /* may either be conditional or pattern */
-       if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') {
-           s += 3;
-           LOP(OP_WARN,XTERM);
-       }
        if (PL_expect == XOPERATOR) {
             char tmp = *s++;
             if(tmp == '?') {
@@ -5282,14 +5280,17 @@ Perl_yylex(pTHX)
        /* Is this a label? */
        if (!tmp && PL_expect == XSTATE
              && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
+           tmp = keyword(PL_tokenbuf, len, 0);
+           if (tmp)
+               Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
            s = d + 1;
            pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
            CLINE;
            TOKEN(LABEL);
        }
-
-       /* Check for keywords */
-       tmp = keyword(PL_tokenbuf, len, 0);
+       else
+           /* Check for keywords */
+           tmp = keyword(PL_tokenbuf, len, 0);
 
        /* Is this a word before a => operator? */
        if (*d == '=' && d[1] == '>') {
@@ -5700,10 +5701,22 @@ Perl_yylex(pTHX)
 
                /* Call it a bare word */
 
-               bareword:
                if (PL_hints & HINT_STRICT_SUBS)
                    pl_yylval.opval->op_private |= OPpCONST_STRICT;
                else {
+               bareword:
+                   /* after "print" and similar functions (corresponding to
+                    * "F? L" in opcode.pl), whatever wasn't already parsed as
+                    * a filehandle should be subject to "strict subs".
+                    * Likewise for the optional indirect-object argument to system
+                    * or exec, which can't be a bareword */
+                   if ((PL_last_lop_op == OP_PRINT
+                           || PL_last_lop_op == OP_PRTF
+                           || PL_last_lop_op == OP_SAY
+                           || PL_last_lop_op == OP_SYSTEM
+                           || PL_last_lop_op == OP_EXEC)
+                           && (PL_hints & HINT_STRICT_SUBS))
+                       pl_yylval.opval->op_private |= OPpCONST_STRICT;
                    if (lastchar != '-') {
                        if (ckWARN(WARN_RESERVED)) {
                            d = PL_tokenbuf;