remove unused perldelta sections

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 3add418..694b008 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -371,6 +371,8 @@ static struct debug_tokens {
      { SHIFTOP,         TOKENTYPE_OPNUM,        "SHIFTOP" },
      { SIGSUB,          TOKENTYPE_NONE,         "SIGSUB" },
      { SUB,             TOKENTYPE_NONE,         "SUB" },
+    { SUBLEXEND,       TOKENTYPE_NONE,         "SUBLEXEND" },
+    { SUBLEXSTART,     TOKENTYPE_NONE,         "SUBLEXSTART" },
      { THING,           TOKENTYPE_OPVAL,        "THING" },
      { UMINUS,          TOKENTYPE_NONE,         "UMINUS" },
      { UNIOP,           TOKENTYPE_OPNUM,        "UNIOP" },
@@ -666,7 +668,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
  #endif
  
  /*
-=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
+=for apidoc lex_start
  
  Creates and initialises a new lexer/parser state object, supplying
  a context in which to lex and parse from a new source of Perl code.
@@ -729,7 +731,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      parser->lex_state = LEX_NORMAL;
      parser->expect = XSTATE;
      parser->rsfp = rsfp;
-    parser->recheck_utf8_validity = FALSE;
+    parser->recheck_utf8_validity = TRUE;
      parser->rsfp_filters =
        !(flags & LEX_START_SAME_FILTER) || !oparser
          ? NULL
@@ -831,7 +833,7 @@ Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
  
  
  /*
-=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+=for apidoc AmxUN|SV *|PL_parser-E<gt>linestr
  
  Buffer scalar containing the chunk currently under consideration of the
  text currently being lexed.  This is always a plain string scalar (for
@@ -858,7 +860,7 @@ lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
  of these pointers is usually preferable to examination of the scalar
  through normal scalar means.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+=for apidoc AmxUN|char *|PL_parser-E<gt>bufend
  
  Direct pointer to the end of the chunk of text currently being lexed, the
  end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
@@ -866,7 +868,7 @@ end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
  always located at the end of the buffer, and does not count as part of
  the buffer's contents.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+=for apidoc AmxUN|char *|PL_parser-E<gt>bufptr
  
  Points to the current position of lexing inside the lexer buffer.
  Characters around this point may be freely examined, within
@@ -884,7 +886,7 @@ Interpretation of the buffer's octets can be abstracted out by
  using the slightly higher-level functions L</lex_peek_unichar> and
  L</lex_read_unichar>.
  
-=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+=for apidoc AmxUN|char *|PL_parser-E<gt>linestart
  
  Points to the start of the current line inside the lexer buffer.
  This is useful for indicating at which column an error occurred, and
@@ -895,7 +897,7 @@ a newline; the function L</lex_read_to> handles this detail.
  */
  
  /*
-=for apidoc Amx|bool|lex_bufutf8
+=for apidoc lex_bufutf8
  
  Indicates whether the octets in the lexer buffer
  (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
@@ -926,7 +928,7 @@ Perl_lex_bufutf8(pTHX)
  }
  
  /*
-=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+=for apidoc lex_grow_linestr
  
  Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
  at least C<len> octets (including terminating C<NUL>).  Returns a
@@ -989,7 +991,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
+=for apidoc lex_stuff_pvn
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1084,7 +1086,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
+=for apidoc lex_stuff_pv
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1113,7 +1115,7 @@ Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+=for apidoc lex_stuff_sv
  
  Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
  immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
@@ -1145,7 +1147,7 @@ Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_unstuff|char *ptr
+=for apidoc lex_unstuff
  
  Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
  C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
@@ -1179,7 +1181,7 @@ Perl_lex_unstuff(pTHX_ char *ptr)
  }
  
  /*
-=for apidoc Amx|void|lex_read_to|char *ptr
+=for apidoc lex_read_to
  
  Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
  to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
@@ -1210,7 +1212,7 @@ Perl_lex_read_to(pTHX_ char *ptr)
  }
  
  /*
-=for apidoc Amx|void|lex_discard_to|char *ptr
+=for apidoc lex_discard_to
  
  Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
  up to C<ptr>.  The remaining content of the buffer will be moved, and
@@ -1282,7 +1284,7 @@ Perl_notify_parser_that_changed_to_utf8(pTHX)
  }
  
  /*
-=for apidoc Amx|bool|lex_next_chunk|U32 flags
+=for apidoc lex_next_chunk
  
  Reads in the next chunk of text to be lexed, appending it to
  L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
@@ -1334,7 +1336,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
             PL_parser->last_lop = NULL;
         last_uni_pos = last_lop_pos = 0;
         *buf = 0;
-       SvCUR(linestr) = 0;
+       SvCUR_set(linestr, 0);
      } else {
         old_bufend_pos = PL_parser->bufend - buf;
         bufptr_pos = PL_parser->bufptr - buf;
@@ -1422,7 +1424,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+=for apidoc lex_peek_unichar
  
  Looks ahead one (Unicode) character in the text currently being lexed.
  Returns the codepoint (unsigned integer value) of the next character,
@@ -1491,7 +1493,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|I32|lex_read_unichar|U32 flags
+=for apidoc lex_read_unichar
  
  Reads the next (Unicode) character in the text currently being lexed.
  Returns the codepoint (unsigned integer value) of the character read,
@@ -1529,7 +1531,7 @@ Perl_lex_read_unichar(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|void|lex_read_space|U32 flags
+=for apidoc lex_read_space
  
  Reads optional spaces, in Perl style, in the text currently being
  lexed.  The spaces may include ordinary whitespace characters and
@@ -1604,7 +1606,7 @@ Perl_lex_read_space(pTHX_ U32 flags)
  
  /*
  
-=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+=for apidoc validate_proto
  
  This function performs syntax checking on a prototype, C<proto>.
  If C<warn> is true, any illegal characters or mismatched brackets
@@ -1885,8 +1887,8 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
  #define skipspace(s) skipspace_flags(s, 0)
  #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
  
-STATIC char *
-S_skipspace_flags(pTHX_ char *s, U32 flags)
+char *
+Perl_skipspace_flags(pTHX_ char *s, U32 flags)
  {
      PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
      if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
@@ -2507,7 +2509,7 @@ S_sublex_push(pTHX)
      PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
      PL_in_eval &= ~EVAL_RE_REPARSING;
  
-    return '(';
+    return SUBLEXSTART;
  }
  
  /*
@@ -2582,7 +2584,7 @@ S_sublex_done(pTHX)
         PL_bufend = SvPVX(PL_linestr);
         PL_bufend += SvCUR(PL_linestr);
         PL_expect = XOPERATOR;
-       return ')';
+       return SUBLEXEND;
      }
  }
  
@@ -3194,11 +3196,21 @@ S_scan_const(pTHX_ char *start)
                         && (range_min > 255 || ! convert_unicode)
  #endif
                      ) {
+                        const STRLEN off = d - SvPVX(sv);
+                        const STRLEN extra = 1 + (send - s) + 1;
+                        char *e;
+
                          /* Move the high character one byte to the right; then
                           * insert between it and the range begin, an illegal
                           * byte which serves to indicate this is a range (using
                           * a '-' would be ambiguous). */
-                        char *e = d++;
+
+                        if (off + extra > SvLEN(sv)) {
+                            d = off + SvGROW(sv, off + extra);
+                            max_ptr = d - off + offset_to_max;
+                        }
+
+                        e = d++;
                          while (e-- > max_ptr) {
                              *(e + 1) = *e;
                          }
@@ -3399,8 +3411,19 @@ S_scan_const(pTHX_ char *start)
               * friends */
         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
             if (s[2] == '#') {
-               while (s+1 < send && *s != ')')
-                   *d++ = *s++;
+                if (s_is_utf8) {
+                    PERL_UINT_FAST8_T  len = UTF8SKIP(s);
+
+                    while (s + len < send && *s != ')') {
+                        Copy(s, d, len, U8);
+                        d += len;
+                        s += len;
+                        len = UTF8_SAFE_SKIP(s, send);
+                    }
+                }
+                else while (s+1 < send && *s != ')') {
+                    *d++ = *s++;
+                }
             }
             else if (!PL_lex_casemods
                       && (    s[2] == '{' /* This should match regcomp.c */
@@ -4076,10 +4099,12 @@ S_scan_const(pTHX_ char *start)
              goto default_action; /* Redo, having upgraded so both are UTF-8 */
          }
          else {  /* UTF8ness matters: convert this non-UTF8 source char to
-                   UTF-8 for output.  It will occupy 2 bytes */
-            if (d + 2 >= SvEND(sv)) {
-                const STRLEN extra = 2 + (send - s - 1) + 1;
-               const STRLEN off = d - SvPVX_const(sv);
+                   UTF-8 for output.  It will occupy 2 bytes, but don't include
+                   the input byte since we haven't incremented 's' yet. See
+                   Note on sizing above. */
+            const STRLEN off = d - SvPVX(sv);
+            const STRLEN extra = 2 + (send - s - 1) + 1;
+            if (off + extra > SvLEN(sv)) {
                 d = off + SvGROW(sv, off + extra);
             }
              *d++ = UTF8_EIGHT_BIT_HI(*s);
@@ -4088,12 +4113,31 @@ S_scan_const(pTHX_ char *start)
         }
      } /* while loop to process each character */
  
+    {
+        const STRLEN off = d - SvPVX(sv);
+
+        /* See if room for the terminating NUL */
+        if (UNLIKELY(off >= SvLEN(sv))) {
+
+#ifndef DEBUGGING
+
+            if (off > SvLEN(sv))
+#endif
+                Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
+                        " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
+
+            /* Whew!  Here we don't have room for the terminating NUL, but
+             * everything else so far has fit.  It's not too late to grow
+             * to fit the NUL and continue on.  But it is a bug, as the code
+             * above was supposed to have made room for this, so under
+             * DEBUGGING builds, we panic anyway.  */
+            d = off + SvGROW(sv, off + 1);
+        }
+    }
+
      /* terminate the string and set up the sv */
      *d = '\0';
      SvCUR_set(sv, d - SvPVX_const(sv));
-    if (SvCUR(sv) >= SvLEN(sv))
-       Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
-                  " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
  
      SvPOK_on(sv);
      if (d_is_utf8) {
@@ -5949,7 +5993,7 @@ Perl_yylex(pTHX)
  
         switch (PL_expect) {
         case XOPERATOR:
-           if (!PL_in_my || PL_lex_state != LEX_NORMAL)
+           if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
                 break;
             PL_bufptr = s;      /* update in case we back off */
             if (*s == '=') {
@@ -6796,7 +6840,7 @@ Perl_yylex(pTHX)
             }
  
             PL_expect = XOPERATOR;
-           if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
+           if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
                 if (!islop || PL_last_lop_op == OP_GREPSTART)
                     PL_expect = XOPERATOR;
@@ -6869,7 +6913,7 @@ Perl_yylex(pTHX)
         if (!PL_tokenbuf[1]) {
             PREREF('@');
         }
-       if (PL_lex_state == LEX_NORMAL)
+       if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
             s = skipspace(s);
         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
              && intuit_more(s, PL_bufend))
@@ -9423,8 +9467,8 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
  /* Returns a NUL terminated string, with the length of the string written to
     *slp
     */
-STATIC char *
-S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+char *
+Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
  {
      char *d = dest;
      char * const e = d + destlen - 3;  /* two-character token, ending NUL */
@@ -9618,7 +9662,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
                 PL_lex_state = LEX_INTERPEND;
                 PL_expect = XREF;
             }
-           if (PL_lex_state == LEX_NORMAL) {
+           if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
                 if (ckWARN(WARN_AMBIGUOUS)
                      && (keyword(dest, d - dest, 0)
                         || get_cvn_flags(dest, d - dest, is_utf8
@@ -10653,8 +10697,8 @@ S_scan_inputsymbol(pTHX_ char *start)
     SvIVX of the SV.
  */
  
-STATIC char *
-S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
+char *
+Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
                  char **delimp
      )
  {
@@ -10983,6 +11027,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             I32 shift;
             bool overflowed = FALSE;
             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
+            bool has_digs = FALSE;
             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
             static const char* const bases[5] =
               { "", "binary", "", "octal", "hexadecimal" };
@@ -11074,6 +11119,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
  
                   digit:
                     just_zero = FALSE;
+                    has_digs = TRUE;
                     if (!overflowed) {
                         assert(shift >= 0);
                         x = u << shift; /* make room for the digit */
@@ -11289,6 +11335,13 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                  }
              }
  
+            if (shift != 3 && !has_digs) {
+                /* 0x or 0b with no digits, treat it as if the x or b is part of the
+                   next token
+                */
+                s = start + 1;
+            }
+
             if (overflowed) {
                 if (n > 4294967295.0)
                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
@@ -12226,7 +12279,7 @@ Perl_keyword_plugin_standard(pTHX_
  }
  
  /*
-=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
+=for apidoc wrap_keyword_plugin
  
  Puts a C function into the chain of keyword plugins.  This is the
  preferred way to manipulate the L</PL_keyword_plugin> variable.
@@ -12345,7 +12398,7 @@ S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_arithexpr|U32 flags
+=for apidoc parse_arithexpr
  
  Parse a Perl arithmetic expression.  This may contain operators of precedence
  down to the bit shift operators.  The expression must be followed (and thus
@@ -12377,7 +12430,7 @@ Perl_parse_arithexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_termexpr|U32 flags
+=for apidoc parse_termexpr
  
  Parse a Perl term expression.  This may contain operators of precedence
  down to the assignment operators.  The expression must be followed (and thus
@@ -12409,7 +12462,7 @@ Perl_parse_termexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_listexpr|U32 flags
+=for apidoc parse_listexpr
  
  Parse a Perl list expression.  This may contain operators of precedence
  down to the comma operator.  The expression must be followed (and thus
@@ -12441,7 +12494,7 @@ Perl_parse_listexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_fullexpr|U32 flags
+=for apidoc parse_fullexpr
  
  Parse a single complete Perl expression.  This allows the full
  expression grammar, including the lowest-precedence operators such
@@ -12474,7 +12527,7 @@ Perl_parse_fullexpr(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_block|U32 flags
+=for apidoc parse_block
  
  Parse a single complete Perl code block.  This consists of an opening
  brace, a sequence of statements, and a closing brace.  The block
@@ -12510,7 +12563,7 @@ Perl_parse_block(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_barestmt|U32 flags
+=for apidoc parse_barestmt
  
  Parse a single unadorned Perl statement.  This may be a normal imperative
  statement or a declaration that has compile-time effect.  It does not
@@ -12548,7 +12601,7 @@ Perl_parse_barestmt(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|SV *|parse_label|U32 flags
+=for apidoc parse_label
  
  Parse a single label, possibly optional, of the type that may prefix a
  Perl statement.  It is up to the caller to ensure that the dynamic parser
@@ -12618,7 +12671,7 @@ Perl_parse_label(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+=for apidoc parse_fullstmt
  
  Parse a single complete Perl statement.  This may be a normal imperative
  statement or a declaration that has compile-time effect, and may include
@@ -12653,7 +12706,7 @@ Perl_parse_fullstmt(pTHX_ U32 flags)
  }
  
  /*
-=for apidoc Amx|OP *|parse_stmtseq|U32 flags
+=for apidoc parse_stmtseq
  
  Parse a sequence of zero or more Perl statements.  These may be normal
  imperative statements, including optional labels, or declarations