This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
podcheck.t: Move a declaration
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index d2a7e7c..2c6cd85 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -52,7 +52,6 @@ Individual members of C<PL_parser> have their own documentation.
 #define PL_lex_brackstack      (PL_parser->lex_brackstack)
 #define PL_lex_casemods                (PL_parser->lex_casemods)
 #define PL_lex_casestack        (PL_parser->lex_casestack)
-#define PL_lex_defer           (PL_parser->lex_defer)
 #define PL_lex_dojoin          (PL_parser->lex_dojoin)
 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
 #define PL_lex_inpat           (PL_parser->lex_inpat)
@@ -142,7 +141,6 @@ static const char* const ident_too_long = "Identifier too long";
                                        string or after \E, $foo, etc       */
 #define LEX_INTERPCONST                 2 /* NOT USED */
 #define LEX_FORMLINE            1 /* expecting a format line               */
-#define LEX_KNOWNEXT            0 /* next token known; just return it      */
 
 
 #ifdef DEBUGGING
@@ -1694,7 +1692,7 @@ S_incline(pTHX_ const char *s)
     }
     else {
        t = s;
-       while (!isSPACE(*t))
+       while (*t && !isSPACE(*t))
            t++;
        e = t;
     }
@@ -1919,10 +1917,6 @@ S_force_next(pTHX_ I32 type)
     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
     PL_nexttype[PL_nexttoke] = type;
     PL_nexttoke++;
-    if (PL_lex_state != LEX_KNOWNEXT) {
-       PL_lex_defer = PL_lex_state;
-       PL_lex_state = LEX_KNOWNEXT;
-    }
 }
 
 /*
@@ -1938,13 +1932,13 @@ static int
 S_postderef(pTHX_ int const funny, char const next)
 {
     assert(funny == DOLSHARP || strchr("$@%&*", funny));
-    assert(strchr("*[{", next));
     if (next == '*') {
        PL_expect = XOPERATOR;
        if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
            assert('@' == funny || '$' == funny || DOLSHARP == funny);
            PL_lex_state = LEX_INTERPEND;
-           force_next(POSTJOIN);
+           if ('@' == funny)
+               force_next(POSTJOIN);
        }
        force_next(next);
        PL_bufptr+=2;
@@ -2346,7 +2340,6 @@ S_sublex_push(pTHX)
     SAVEI32(PL_lex_casemods);
     SAVEI32(PL_lex_starts);
     SAVEI8(PL_lex_state);
-    SAVEI8(PL_lex_defer);
     SAVESPTR(PL_lex_repl);
     SAVEVPTR(PL_lex_inpat);
     SAVEI16(PL_lex_inwhat);
@@ -2518,8 +2511,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
 
-    if (!SvCUR(res))
+    if (!SvCUR(res)) {
+        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+                       "Unknown charname '' is deprecated");
         return res;
+    }
 
     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
                                      e - backslash_ptr,
@@ -2585,11 +2581,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
            if (*s == ' ' && *(s-1) == ' ') {
                 goto multi_spaces;
             }
-           if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
-                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                           "NO-BREAK SPACE in a charnames "
-                           "alias definition is deprecated");
-            }
             s++;
         }
     }
@@ -2637,14 +2628,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                 {
                     goto bad_charname;
                 }
-                if (*s == *NBSP_UTF8
-                    && *(s+1) == *(NBSP_UTF8+1)
-                    && ckWARN_d(WARN_DEPRECATED))
-                {
-                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                "NO-BREAK SPACE in a charnames "
-                                "alias definition is deprecated");
-                }
                 s += 2;
             }
             else {
@@ -4450,15 +4433,15 @@ S_check_scalar_slice(pTHX_ char *s)
     The type of the next token
 
   Structure:
+      Check if we have already built the token; if so, use it.
       Switch based on the current state:
-         - if we already built the token before, use it
          - if we have a case modifier in a string, deal with that
          - handle other cases of interpolation inside a string
          - scan the next line if we are inside a format
-      In the normal state switch on the next character:
+      In the normal state, switch on the next character:
          - default:
            if alphabetic, go to key lookup
-           unrecoginized character - croak
+           unrecognized character - croak
          - 0/4/26: handle end-of-line or EOF
          - cases for whitespace
          - \n and #: handle comments and line numbers
@@ -4517,10 +4500,6 @@ Perl_yylex(pTHX)
     if (PL_nexttoke) {
        PL_nexttoke--;
        pl_yylval = PL_nextval[PL_nexttoke];
-       if (!PL_nexttoke) {
-           PL_lex_state = PL_lex_defer;
-           PL_lex_defer = LEX_NORMAL;
-       }
        {
            I32 next_type;
            next_type = PL_nexttype[PL_nexttoke];
@@ -4694,14 +4673,6 @@ Perl_yylex(pTHX)
        /* FALLTHROUGH */
 
     case LEX_INTERPEND:
-       /* Treat state as LEX_NORMAL if we have no inner lexing scope.
-          XXX This hack can be removed if we stop setting PL_lex_state to
-          LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
-       if (UNLIKELY(!PL_lex_inwhat)) {
-           PL_lex_state = LEX_NORMAL;
-           break;
-       }
-
        if (PL_lex_dojoin) {
            const U8 dojoin_was = PL_lex_dojoin;
            PL_lex_dojoin = FALSE;
@@ -4753,14 +4724,6 @@ Perl_yylex(pTHX)
            Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
                       (long) PL_lex_brackets);
 #endif
-       /* Treat state as LEX_NORMAL when not in an inner lexing scope.
-          XXX This hack can be removed if we stop setting PL_lex_state to
-          LEX_KNOWNEXT.  */
-       if (UNLIKELY(!PL_lex_inwhat)) {
-           PL_lex_state = LEX_NORMAL;
-           break;
-       }
-
        if (PL_bufptr == PL_bufend)
            return REPORT(sublex_done());
 
@@ -4818,9 +4781,22 @@ Perl_yylex(pTHX)
   retry:
     switch (*s) {
     default:
-       if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+       if (UTF) {
+            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+                ENTER;
+                SAVESPTR(PL_warnhook);
+                PL_warnhook = PERL_WARNHOOK_FATAL;
+                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+                LEAVE;
+            }
+            if (isIDFIRST_utf8((U8*)s)) {
+                goto keylookup;
+            }
+        }
+        else if (isALNUMC(*s)) {
            goto keylookup;
-       {
+       }
+    {
         SV *dsv = newSVpvs_flags("", SVs_TEMP);
         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
                                                     UTF8SKIP(s),
@@ -5856,12 +5832,12 @@ Perl_yylex(pTHX)
                    else
                        /* skip plain q word */
                        while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-                            t += UTF8SKIP(t);
+                           t += UTF ? UTF8SKIP(t) : 1;
                }
                else if (isWORDCHAR_lazy_if(t,UTF)) {
-                   t += UTF8SKIP(t);
+                   t += UTF ? UTF8SKIP(t) : 1;
                    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-                        t += UTF8SKIP(t);
+                       t += UTF ? UTF8SKIP(t) : 1;
                }
                while (t < PL_bufend && isSPACE(*t))
                    t++;
@@ -6016,6 +5992,8 @@ Perl_yylex(pTHX)
        {
            const char tmp = *s++;
            if (tmp == '=') {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5))
+                   Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
                if (!PL_lex_allbrackets
                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
                 {
@@ -6130,8 +6108,11 @@ Perl_yylex(pTHX)
        if (PL_expect != XOPERATOR) {
            if (s[1] != '<' && !strchr(s,'>'))
                check_uni();
-           if (s[1] == '<' && s[2] != '>')
+           if (s[1] == '<' && s[2] != '>') {
+               if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5))
+                   Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s);
                s = scan_heredoc(s);
+           }
            else
                s = scan_inputsymbol(s);
            PL_expect = XOPERATOR;
@@ -6141,6 +6122,8 @@ Perl_yylex(pTHX)
        {
            char tmp = *s++;
            if (tmp == '<') {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5))
+                   Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
                if (*s == '=' && !PL_lex_allbrackets
                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
                 {
@@ -6181,6 +6164,8 @@ Perl_yylex(pTHX)
        {
            const char tmp = *s++;
            if (tmp == '>') {
+               if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5))
+                   Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
                if (*s == '=' && !PL_lex_allbrackets
                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
                 {
@@ -7733,7 +7718,6 @@ Perl_yylex(pTHX)
            UNI(OP_LCFIRST);
 
        case KEY_local:
-           pl_yylval.ival = 0;
            OPERATOR(LOCAL);
 
        case KEY_length:
@@ -7805,17 +7789,7 @@ Perl_yylex(pTHX)
            if (isIDFIRST_lazy_if(s,UTF)) {
                s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
-               {
-                   if (!FEATURE_LEXSUBS_IS_ENABLED)
-                       Perl_croak(aTHX_
-                                 "Experimental \"%s\" subs not enabled",
-                                  tmp == KEY_my    ? "my"    :
-                                  tmp == KEY_state ? "state" : "our");
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
-                       "The lexical_subs feature is experimental");
                    goto really_sub;
-               }
                PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
                if (!PL_in_my_stash) {
                    char tmpbuf[1024];
@@ -7826,7 +7800,6 @@ Perl_yylex(pTHX)
                    yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
                }
            }
-           pl_yylval.ival = 1;
            OPERATOR(MY);
 
        case KEY_next:
@@ -8888,26 +8861,17 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN
  *          2) '{'
  *     The final case currently doesn't get this far in the program, so we
  *     don't test for it.  If that were to change, it would be ok to allow it.
- *  c) When not under Unicode rules, any upper Latin1 character
- *  d) Otherwise, when unicode rules are used, all XIDS characters.
+ *  b) When not under Unicode rules, any upper Latin1 character
+ *  c) Otherwise, when unicode rules are used, all XIDS characters.
  *
  *      Because all ASCII characters have the same representation whether
  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- *      '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
+ *      '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8)                                       \
     (isGRAPH_A(*(s)) || ((is_utf8)                                            \
                          ? isIDFIRST_utf8((U8*) (s))                          \
                          : (isGRAPH_L1(*s)                                    \
                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-#   define VALID_LEN_ONE_IDENT(s, is_utf8)                                    \
-    (isGRAPH_A(*(s)) || ((is_utf8)                                            \
-                         ? isIDFIRST_utf8((U8*) (s))                          \
-                         : ! isASCII_utf8((U8*) (s))))
-#endif
 
 STATIC char *
 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
@@ -8972,18 +8936,6 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                           : 1)
         && VALID_LEN_ONE_IDENT(s, is_utf8))
     {
-        /* Deprecate all non-graphic characters.  Include SHY as a non-graphic,
-         * because often it has no graphic representation.  (We can't get to
-         * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
-         * test for it.) */
-        if ((is_utf8)
-            ? ! isGRAPH_utf8( (U8*) s)
-            : (! isGRAPH_L1( (U8) *s)
-               || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
-        {
-            deprecate("literal non-graphic characters in variable names");
-        }
-
         if (is_utf8) {
             const STRLEN skip = UTF8SKIP(s);
             STRLEN i;
@@ -9266,7 +9218,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
                       "Use of /c modifier is meaningless without /g" );
     }
 
-    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+    if (UNLIKELY((x_mod_count) > 1)) {
+        yyerror("Only one /x regex modifier is allowed");
+    }
 
     PL_lex_op = (OP*)pm;
     pl_yylval.ival = OP_MATCH;
@@ -9321,7 +9275,9 @@ S_scan_subst(pTHX_ char *start)
        }
     }
 
-    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+    if (UNLIKELY((x_mod_count) > 1)) {
+        yyerror("Only one /x regex modifier is allowed");
+    }
 
     if ((pm->op_pmflags & PMf_CONTINUE)) {
         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
@@ -9542,7 +9498,7 @@ S_scan_heredoc(pTHX_ char *s)
        SV *linestr;
        char *bufend;
        char * const olds = s;
-       PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+       PERL_CONTEXT * const cx = CX_CUR();
        /* These two fields are not set until an inner lexing scope is
           entered.  But we need them set here. */
        shared->ls_bufptr  = s;
@@ -9577,9 +9533,10 @@ S_scan_heredoc(pTHX_ char *s)
                goto streaming;
            }
          }
-       else {  /* eval */
+       else {  /* eval or we've already hit EOF */
            s = (char*)memchr((void*)s, '\n', PL_bufend - s);
-           assert(s);
+           if (!s)
+                goto interminable;
        }
        linestr = shared->ls_linestr;
        bufend = SvEND(linestr);
@@ -11077,8 +11034,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     else if (yychar > 255)
        sv_catpvs(where_sv, "next token ???");
     else if (yychar == YYEMPTY) {
-       if (    PL_lex_state == LEX_NORMAL
-            || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+       if (PL_lex_state == LEX_NORMAL)
            sv_catpvs(where_sv, "at end of line");
        else if (PL_lex_inpat)
            sv_catpvs(where_sv, "within pattern");
@@ -11747,7 +11703,7 @@ Perl_parse_label(pTHX_ U32 flags)
 {
     if (flags & ~PARSE_OPTIONAL)
        Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
-    if (PL_lex_state == LEX_KNOWNEXT) {
+    if (PL_nexttoke) {
        PL_parser->yychar = yylex();
        if (PL_parser->yychar == LABEL) {
            char * const lpv = pl_yylval.pval;