[perl #125373] set $! in chdir() if env not set, clarify docs

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index fa4de96..763baa5 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1252,7 +1252,7 @@ buffer has reached the end of the input text.
  */
  
  #define LEX_FAKE_EOF 0x80000000
-#define LEX_NO_TERM  0x40000000
+#define LEX_NO_TERM  0x40000000 /* here-doc */
  
  bool
  Perl_lex_next_chunk(pTHX_ U32 flags)
@@ -1266,6 +1266,8 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
      bool got_some;
      if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+    if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
+       return FALSE;
      linestr = PL_parser->linestr;
      buf = SvPVX(linestr);
      if (!(flags & LEX_KEEP_PREVIOUS) &&
@@ -1647,6 +1649,7 @@ S_incline(pTHX_ const char *s)
      const char *n;
      const char *e;
      line_t line_num;
+    UV uv;
  
      PERL_ARGS_ASSERT_INCLINE;
  
@@ -1696,7 +1699,9 @@ S_incline(pTHX_ const char *s)
      if (*e != '\n' && *e != '\0')
         return;         /* false alarm */
  
-    line_num = grok_atou(n, &e) - 1;
+    if (!grok_atoUV(n, &uv, &e))
+        return;
+    line_num = ((line_t)uv) - 1;
  
      if (t - s > 0) {
         const STRLEN len = t - s;
@@ -1804,7 +1809,7 @@ S_skipspace_flags(pTHX_ char *s, U32 flags)
         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
         PL_bufptr = s;
         lex_read_space(flags | LEX_KEEP_PREVIOUS |
-               (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+               (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
                     LEX_NO_NEXT_CHUNK : 0));
         s = PL_bufptr;
         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
@@ -1836,13 +1841,13 @@ S_check_uni(pTHX)
         PL_last_uni++;
      s = PL_last_uni;
      while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
-       s++;
+       s += UTF ? UTF8SKIP(s) : 1;
      if ((t = strchr(s, '(')) && t < PL_bufptr)
         return;
  
      Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
-                    (int)(s - PL_last_uni), PL_last_uni);
+                    "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+                    UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
  }
  
  /*
@@ -1907,6 +1912,7 @@ S_force_next(pTHX_ I32 type)
         tokereport(type, &NEXTVAL_NEXTTOKE);
      }
  #endif
+    assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
      PL_nexttype[PL_nexttoke] = type;
      PL_nexttoke++;
      if (PL_lex_state != LEX_KNOWNEXT) {
@@ -2336,6 +2342,7 @@ S_sublex_push(pTHX)
      SAVEI32(PL_lex_casemods);
      SAVEI32(PL_lex_starts);
      SAVEI8(PL_lex_state);
+    SAVEI8(PL_lex_defer);
      SAVESPTR(PL_lex_repl);
      SAVEVPTR(PL_lex_inpat);
      SAVEI16(PL_lex_inwhat);
@@ -2378,6 +2385,7 @@ S_sublex_push(pTHX)
         popping.  We must not have a PL_lex_stuff value left dangling, as
         that breaks assumptions elsewhere.  See bug #123617.  */
      SAVEGENERICSV(PL_lex_stuff);
+    SAVEGENERICSV(PL_sublex_info.repl);
  
      PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
         = SvPVX(PL_linestr);
@@ -2483,7 +2491,6 @@ S_sublex_done(pTHX)
         PL_bufend = SvPVX(PL_linestr);
         PL_bufend += SvCUR(PL_linestr);
         PL_expect = XOPERATOR;
-       PL_sublex_info.sub_inwhat = 0;
         return ')';
      }
  }
@@ -2523,9 +2530,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* We deliberately don't try to print the malformed character, which
           * might not print very well; it also may be just the first of many
           * malformations, so don't print what comes after it */
-        yyerror(Perl_form(aTHX_
+        yyerror_pv(Perl_form(aTHX_
              "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
+                   SVf_UTF8);
         return NULL;
      }
  
@@ -3050,7 +3058,7 @@ S_scan_const(pTHX_ char *start)
            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
            */
         else if (*s == '@' && s[1]) {
-           if (isWORDCHAR_lazy_if(s+1,UTF))
+           if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
                 break;
             if (strchr(":'{$", s[1]))
                 break;
@@ -3270,12 +3278,7 @@ S_scan_const(pTHX_ char *start)
                   *  Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
                   *      if a pattern; otherwise convert to utf8
                   *
-                 * If the regex compiler should ever need to differentiate
-                 * between the \N{U+...} and \N{name} forms, that could easily
-                 * be done here by stripping any leading zeros from the
-                 * \N{U+...} case, and adding them to the other one. */
-
-                /* Here, 's' points to the 'N'; the test below is guaranteed to
+                 * Here, 's' points to the 'N'; the test below is guaranteed to
                  * succeed if we are being called on a pattern, as we already
                   * know from a test above that the next character is a '{'.  A
                   * non-pattern \N must mean 'named character', which requires
@@ -3407,9 +3410,15 @@ S_scan_const(pTHX_ char *start)
                                      char hex_string[4];
                                      int len =
                                          my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    "%02X.", (U8) *str);
-                                    PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
+                                                  sizeof(hex_string),
+                                                  "%02X.",
+
+                                                  /* The regex compiler is
+                                                   * expecting Unicode, not
+                                                   * native */
+                                                  (U8) NATIVE_TO_LATIN1(*str));
+                                    PERL_MY_SNPRINTF_POST_GUARD(len,
+                                                           sizeof(hex_string));
                                      Copy(hex_string, d, 3, char);
                                      d += 3;
                                      str++;
@@ -3433,12 +3442,12 @@ S_scan_const(pTHX_ char *start)
                                                          len,
                                                          &char_length,
                                                          UTF8_ALLOW_ANYUV);
-                                /* Convert first code point to hex, including
-                                 * the boiler plate before it. */
+                                /* Convert first code point to Unicode hex,
+                                 * including the boiler plate before it. */
                                  output_length =
                                      my_snprintf(hex_string, sizeof(hex_string),
-                                                "\\N{U+%X",
-                                                (unsigned int) uv);
+                                             "\\N{U+%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
  
                                  /* Make sure there is enough space to hold it */
                                  d = off + SvGROW(sv, off
@@ -3450,7 +3459,7 @@ S_scan_const(pTHX_ char *start)
                                  d += output_length;
  
                                  /* For each subsequent character, append dot and
-                                * its ordinal in hex */
+                                * its Unicode code point in hex */
                                  while ((str += char_length) < str_end) {
                                      const STRLEN off = d - SvPVX_const(sv);
                                      U32 uv = utf8n_to_uvchr((U8 *) str,
@@ -3459,9 +3468,9 @@ S_scan_const(pTHX_ char *start)
                                                              UTF8_ALLOW_ANYUV);
                                      output_length =
                                          my_snprintf(hex_string,
-                                                    sizeof(hex_string),
-                                                    ".%X",
-                                                    (unsigned int) uv);
+                                             sizeof(hex_string),
+                                             ".%X",
+                                             (unsigned int) NATIVE_TO_UNI(uv));
  
                                      d = off + SvGROW(sv, off
                                                          + output_length
@@ -4311,13 +4320,8 @@ Perl_yylex(pTHX)
         SvREFCNT_dec(tmp);
      } );
  
-    switch (PL_lex_state) {
-    case LEX_NORMAL:
-    case LEX_INTERPNORMAL:
-       break;
-
      /* when we've already built the next token, just pull it out of the queue */
-    case LEX_KNOWNEXT:
+    if (PL_nexttoke) {
         PL_nexttoke--;
         pl_yylval = PL_nextval[PL_nexttoke];
         if (!PL_nexttoke) {
@@ -4342,6 +4346,12 @@ Perl_yylex(pTHX)
             }
             return REPORT(next_type == 'p' ? pending_ident() : next_type);
         }
+    }
+
+    switch (PL_lex_state) {
+    case LEX_NORMAL:
+    case LEX_INTERPNORMAL:
+       break;
  
      /* interpolated case modifiers like \L \U, including \Q and \E.
         when we get here, PL_bufptr is at the \
@@ -4490,6 +4500,14 @@ Perl_yylex(pTHX)
         /* FALLTHROUGH */
  
      case LEX_INTERPEND:
+       /* Treat state as LEX_NORMAL if we have no inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
         if (PL_lex_dojoin) {
             const U8 dojoin_was = PL_lex_dojoin;
             PL_lex_dojoin = FALSE;
@@ -4541,6 +4559,14 @@ Perl_yylex(pTHX)
             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
                        (long) PL_lex_brackets);
  #endif
+       /* Treat state as LEX_NORMAL when not in an inner lexing scope.
+          XXX This hack can be removed if we stop setting PL_lex_state to
+          LEX_KNOWNEXT.  */
+       if (UNLIKELY(!PL_lex_inwhat)) {
+           PL_lex_state = LEX_NORMAL;
+           break;
+       }
+
         if (PL_bufptr == PL_bufend)
             return REPORT(sublex_done());
  
@@ -4621,7 +4647,8 @@ Perl_yylex(pTHX)
      case 26:
         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
      case 0:
-       if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
+       if ((!PL_rsfp || PL_lex_inwhat)
+        && (!PL_parser->filtered || s+1 < PL_bufend)) {
             PL_last_uni = 0;
             PL_last_lop = 0;
             if (PL_lex_brackets &&
@@ -4836,6 +4863,8 @@ Perl_yylex(pTHX)
                 d = instr(s,"perl -");
                 if (!d) {
                     d = instr(s,"perl");
+                    if (d && d[4] == '6')
+                        d = NULL;
  #if defined(DOSISH)
                     /* avoid getting into infinite loops when shebang
                      * line contains "Perl" rather than "perl" */
@@ -4960,7 +4989,6 @@ Perl_yylex(pTHX)
         }
         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
             PL_lex_state = LEX_FORMLINE;
-           NEXTVAL_NEXTTOKE.ival = 0;
             force_next(FORMRBRACK);
             TOKEN(';');
         }
@@ -5003,7 +5031,6 @@ Perl_yylex(pTHX)
                  incline(s);
             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
                 PL_lex_state = LEX_FORMLINE;
-               NEXTVAL_NEXTTOKE.ival = 0;
                 force_next(FORMRBRACK);
                 TOKEN(';');
             }
@@ -5113,10 +5140,6 @@ Perl_yylex(pTHX)
                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
                  ))
                 {
-                   Perl_ck_warner_d(aTHX_
-                       packWARN(WARN_EXPERIMENTAL__POSTDEREF),
-                       "Postfix dereference is experimental"
-                   );
                     PL_expect = XPOSTDEREF;
                     TOKEN(ARROW);
                 }
@@ -5752,12 +5775,12 @@ Perl_yylex(pTHX)
         PL_tokenbuf[0] = '&';
         s = scan_ident(s - 1, PL_tokenbuf + 1,
                        sizeof PL_tokenbuf - 1, TRUE);
+       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
         if (PL_tokenbuf[1]) {
             force_ident_maybe_lex('&');
         }
         else
             PREREF('&');
-       pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
         TERM('&');
  
      case '|':
@@ -5986,8 +6009,14 @@ Perl_yylex(pTHX)
             PL_tokenbuf[0] = '@';
             s = scan_ident(s + 1, PL_tokenbuf + 1,
                            sizeof PL_tokenbuf - 1, FALSE);
-           if (PL_expect == XOPERATOR)
-               no_op("Array length", s);
+            if (PL_expect == XOPERATOR) {
+                d = s;
+                if (PL_bufptr > s) {
+                    d = PL_bufptr-1;
+                    PL_bufptr = PL_oldbufptr;
+                }
+               no_op("Array length", d);
+            }
             if (!PL_tokenbuf[1])
                 PREREF(DOLSHARP);
             PL_expect = XOPERATOR;
@@ -6026,14 +6055,14 @@ Perl_yylex(pTHX)
                         char *t = s+1;
  
                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
-                           t++;
+                           t += UTF ? UTF8SKIP(t) : 1;
                         if (*t++ == ',') {
                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
                             while (t < PL_bufend && *t != ']')
                                 t++;
                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                       "Multidimensional syntax %.*s not supported",
-                                   (int)((t - PL_bufptr) + 1), PL_bufptr);
+                                       "Multidimensional syntax %"UTF8f" not supported",
+                                        UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
                         }
                     }
                 }
@@ -9187,10 +9216,14 @@ S_scan_heredoc(pTHX_ char *s)
             term = '"';
         if (!isWORDCHAR_lazy_if(s,UTF))
             deprecate("bare << to mean <<\"\"");
-       for (; isWORDCHAR_lazy_if(s,UTF); s++) {
-           if (d < e)
-               *d++ = *s;
+       peek = s;
+       while (isWORDCHAR_lazy_if(peek,UTF)) {
+           peek += UTF ? UTF8SKIP(peek) : 1;
         }
+       len = (peek - s >= e - d) ? (e - d) : (peek - s);
+       Copy(s, d, len, char);
+       s += len;
+       d += len;
      }
      if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
         Perl_croak(aTHX_ "Delimiter for here document is too long");
@@ -9261,8 +9294,13 @@ S_scan_heredoc(pTHX_ char *s)
                lexing scope.  In a file, we will have broken out of the
                loop in the previous iteration.  In an eval, the string buf-
                fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null. */
-           assert(shared);
+              evaluated to false.  So shared can never be null.  Or so you
+              might think.  Odd syntax errors like s;@{<<; can gobble up
+              the implicit semicolon at the end of a flie, causing the
+              file handle to be closed even when we are not in a string
+              eval.  So shared may be null in that case.  */
+           if (UNLIKELY(!shared))
+               goto interminable;
             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
                most lexing scope.  In a file, shared->ls_linestr at that
                level is just one line, so there is no body to steal. */
@@ -10444,7 +10482,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                floatit = TRUE;
          }
         if (floatit) {
-            STORE_NUMERIC_LOCAL_SET_STANDARD();
+            STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
             /* terminate the string */
             *d = '\0';
              if (UNLIKELY(hexfp)) {
@@ -10461,7 +10499,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
              } else {
                  nv = Atof(PL_tokenbuf);
              }
-            RESTORE_NUMERIC_LOCAL();
+            RESTORE_LC_NUMERIC_UNDERLYING();
              sv = newSVnv(nv);
         }
  
@@ -11709,11 +11747,5 @@ Perl_parse_subsignature(pTHX)
  }
  
  /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
   * ex: set ts=8 sts=4 sw=4 et:
   */