{ SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
{ SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
{ SUB, TOKENTYPE_NONE, "SUB" },
+ { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
+ { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
{ THING, TOKENTYPE_OPVAL, "THING" },
{ UMINUS, TOKENTYPE_NONE, "UMINUS" },
{ UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
#endif
/*
-=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
+=for apidoc lex_start
Creates and initialises a new lexer/parser state object, supplying
a context in which to lex and parse from a new source of Perl code.
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->recheck_utf8_validity = FALSE;
+ parser->recheck_utf8_validity = TRUE;
parser->rsfp_filters =
!(flags & LEX_START_SAME_FILTER) || !oparser
? NULL
/*
-=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+=for apidoc AmxUN|SV *|PL_parser-E<gt>linestr
Buffer scalar containing the chunk currently under consideration of the
text currently being lexed. This is always a plain string scalar (for
of these pointers is usually preferable to examination of the scalar
through normal scalar means.
-=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+=for apidoc AmxUN|char *|PL_parser-E<gt>bufend
Direct pointer to the end of the chunk of text currently being lexed, the
end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
always located at the end of the buffer, and does not count as part of
the buffer's contents.
-=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+=for apidoc AmxUN|char *|PL_parser-E<gt>bufptr
Points to the current position of lexing inside the lexer buffer.
Characters around this point may be freely examined, within
using the slightly higher-level functions L</lex_peek_unichar> and
L</lex_read_unichar>.
-=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+=for apidoc AmxUN|char *|PL_parser-E<gt>linestart
Points to the start of the current line inside the lexer buffer.
This is useful for indicating at which column an error occurred, and
*/
/*
-=for apidoc Amx|bool|lex_bufutf8
+=for apidoc lex_bufutf8
Indicates whether the octets in the lexer buffer
(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
}
/*
-=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+=for apidoc lex_grow_linestr
Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
at least C<len> octets (including terminating C<NUL>). Returns a
}
/*
-=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
+=for apidoc lex_stuff_pvn
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
}
/*
-=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
+=for apidoc lex_stuff_pv
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
}
/*
-=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+=for apidoc lex_stuff_sv
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
}
/*
-=for apidoc Amx|void|lex_unstuff|char *ptr
+=for apidoc lex_unstuff
Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
}
/*
-=for apidoc Amx|void|lex_read_to|char *ptr
+=for apidoc lex_read_to
Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
}
/*
-=for apidoc Amx|void|lex_discard_to|char *ptr
+=for apidoc lex_discard_to
Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
up to C<ptr>. The remaining content of the buffer will be moved, and
}
/*
-=for apidoc Amx|bool|lex_next_chunk|U32 flags
+=for apidoc lex_next_chunk
Reads in the next chunk of text to be lexed, appending it to
L</PL_parser-E<gt>linestr>. This should be called when lexing code has
PL_parser->last_lop = NULL;
last_uni_pos = last_lop_pos = 0;
*buf = 0;
- SvCUR(linestr) = 0;
+ SvCUR_set(linestr, 0);
} else {
old_bufend_pos = PL_parser->bufend - buf;
bufptr_pos = PL_parser->bufptr - buf;
}
/*
-=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+=for apidoc lex_peek_unichar
Looks ahead one (Unicode) character in the text currently being lexed.
Returns the codepoint (unsigned integer value) of the next character,
}
/*
-=for apidoc Amx|I32|lex_read_unichar|U32 flags
+=for apidoc lex_read_unichar
Reads the next (Unicode) character in the text currently being lexed.
Returns the codepoint (unsigned integer value) of the character read,
}
/*
-=for apidoc Amx|void|lex_read_space|U32 flags
+=for apidoc lex_read_space
Reads optional spaces, in Perl style, in the text currently being
lexed. The spaces may include ordinary whitespace characters and
/*
-=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+=for apidoc validate_proto
This function performs syntax checking on a prototype, C<proto>.
If C<warn> is true, any illegal characters or mismatched brackets
#define skipspace(s) skipspace_flags(s, 0)
#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
-STATIC char *
-S_skipspace_flags(pTHX_ char *s, U32 flags)
+char *
+Perl_skipspace_flags(pTHX_ char *s, U32 flags)
{
PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
PL_in_eval &= ~EVAL_RE_REPARSING;
- return '(';
+ return SUBLEXSTART;
}
/*
PL_bufend = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_expect = XOPERATOR;
- return ')';
+ return SUBLEXEND;
}
}
&& (range_min > 255 || ! convert_unicode)
#endif
) {
+ const STRLEN off = d - SvPVX(sv);
+ const STRLEN extra = 1 + (send - s) + 1;
+ char *e;
+
/* Move the high character one byte to the right; then
* insert between it and the range begin, an illegal
* byte which serves to indicate this is a range (using
* a '-' would be ambiguous). */
- char *e = d++;
+
+ if (off + extra > SvLEN(sv)) {
+ d = off + SvGROW(sv, off + extra);
+ max_ptr = d - off + offset_to_max;
+ }
+
+ e = d++;
while (e-- > max_ptr) {
*(e + 1) = *e;
}
* friends */
else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
- while (s+1 < send && *s != ')')
- *d++ = *s++;
+ if (s_is_utf8) {
+ PERL_UINT_FAST8_T len = UTF8SKIP(s);
+
+ while (s + len < send && *s != ')') {
+ Copy(s, d, len, U8);
+ d += len;
+ s += len;
+ len = UTF8_SAFE_SKIP(s, send);
+ }
+ }
+ else while (s+1 < send && *s != ')') {
+ *d++ = *s++;
+ }
}
else if (!PL_lex_casemods
&& ( s[2] == '{' /* This should match regcomp.c */
goto default_action; /* Redo, having upgraded so both are UTF-8 */
}
else { /* UTF8ness matters: convert this non-UTF8 source char to
- UTF-8 for output. It will occupy 2 bytes */
- if (d + 2 >= SvEND(sv)) {
- const STRLEN extra = 2 + (send - s - 1) + 1;
- const STRLEN off = d - SvPVX_const(sv);
+ UTF-8 for output. It will occupy 2 bytes, but don't include
+ the input byte since we haven't incremented 's' yet. See
+ Note on sizing above. */
+ const STRLEN off = d - SvPVX(sv);
+ const STRLEN extra = 2 + (send - s - 1) + 1;
+ if (off + extra > SvLEN(sv)) {
d = off + SvGROW(sv, off + extra);
}
*d++ = UTF8_EIGHT_BIT_HI(*s);
}
} /* while loop to process each character */
+ {
+ const STRLEN off = d - SvPVX(sv);
+
+ /* See if room for the terminating NUL */
+ if (UNLIKELY(off >= SvLEN(sv))) {
+
+#ifndef DEBUGGING
+
+ if (off > SvLEN(sv))
+#endif
+ Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
+ " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
+
+ /* Whew! Here we don't have room for the terminating NUL, but
+ * everything else so far has fit. It's not too late to grow
+ * to fit the NUL and continue on. But it is a bug, as the code
+ * above was supposed to have made room for this, so under
+ * DEBUGGING builds, we panic anyway. */
+ d = off + SvGROW(sv, off + 1);
+ }
+ }
+
/* terminate the string and set up the sv */
*d = '\0';
SvCUR_set(sv, d - SvPVX_const(sv));
- if (SvCUR(sv) >= SvLEN(sv))
- Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
- " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
if (d_is_utf8) {
switch (PL_expect) {
case XOPERATOR:
- if (!PL_in_my || PL_lex_state != LEX_NORMAL)
+ if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
break;
PL_bufptr = s; /* update in case we back off */
if (*s == '=') {
}
PL_expect = XOPERATOR;
- if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
+ if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
const bool islop = (PL_last_lop == PL_oldoldbufptr);
if (!islop || PL_last_lop_op == OP_GREPSTART)
PL_expect = XOPERATOR;
if (!PL_tokenbuf[1]) {
PREREF('@');
}
- if (PL_lex_state == LEX_NORMAL)
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
s = skipspace(s);
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s, PL_bufend))
/* Returns a NUL terminated string, with the length of the string written to
*slp
*/
-STATIC char *
-S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+char *
+Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
PL_lex_state = LEX_INTERPEND;
PL_expect = XREF;
}
- if (PL_lex_state == LEX_NORMAL) {
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
if (ckWARN(WARN_AMBIGUOUS)
&& (keyword(dest, d - dest, 0)
|| get_cvn_flags(dest, d - dest, is_utf8
SvIVX of the SV.
*/
-STATIC char *
-S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
+char *
+Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
char **delimp
)
{
I32 shift;
bool overflowed = FALSE;
bool just_zero = TRUE; /* just plain 0 or binary number? */
+ bool has_digs = FALSE;
static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
static const char* const bases[5] =
{ "", "binary", "", "octal", "hexadecimal" };
digit:
just_zero = FALSE;
+ has_digs = TRUE;
if (!overflowed) {
assert(shift >= 0);
x = u << shift; /* make room for the digit */
}
}
+ if (shift != 3 && !has_digs) {
+ /* 0x or 0b with no digits, treat it as if the x or b is part of the
+ next token
+ */
+ s = start + 1;
+ }
+
if (overflowed) {
if (n > 4294967295.0)
Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
}
/*
-=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
+=for apidoc wrap_keyword_plugin
Puts a C function into the chain of keyword plugins. This is the
preferred way to manipulate the L</PL_keyword_plugin> variable.
}
/*
-=for apidoc Amx|OP *|parse_arithexpr|U32 flags
+=for apidoc parse_arithexpr
Parse a Perl arithmetic expression. This may contain operators of precedence
down to the bit shift operators. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_termexpr|U32 flags
+=for apidoc parse_termexpr
Parse a Perl term expression. This may contain operators of precedence
down to the assignment operators. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_listexpr|U32 flags
+=for apidoc parse_listexpr
Parse a Perl list expression. This may contain operators of precedence
down to the comma operator. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_fullexpr|U32 flags
+=for apidoc parse_fullexpr
Parse a single complete Perl expression. This allows the full
expression grammar, including the lowest-precedence operators such
}
/*
-=for apidoc Amx|OP *|parse_block|U32 flags
+=for apidoc parse_block
Parse a single complete Perl code block. This consists of an opening
brace, a sequence of statements, and a closing brace. The block
}
/*
-=for apidoc Amx|OP *|parse_barestmt|U32 flags
+=for apidoc parse_barestmt
Parse a single unadorned Perl statement. This may be a normal imperative
statement or a declaration that has compile-time effect. It does not
}
/*
-=for apidoc Amx|SV *|parse_label|U32 flags
+=for apidoc parse_label
Parse a single label, possibly optional, of the type that may prefix a
Perl statement. It is up to the caller to ensure that the dynamic parser
}
/*
-=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+=for apidoc parse_fullstmt
Parse a single complete Perl statement. This may be a normal imperative
statement or a declaration that has compile-time effect, and may include
}
/*
-=for apidoc Amx|OP *|parse_stmtseq|U32 flags
+=for apidoc parse_stmtseq
Parse a sequence of zero or more Perl statements. These may be normal
imperative statements, including optional labels, or declarations