=head1 Lexer interface
This is the lower layer of the Perl parser, managing characters and tokens.
-=for apidoc AmU|yy_parser *|PL_parser
+=for apidoc AmnU|yy_parser *|PL_parser
Pointer to a structure encapsulating the state of the parsing operation
currently in progress. The pointer can be locally changed to perform
#define PERL_IN_TOKE_C
#include "perl.h"
#include "dquote_inline.h"
+#include "invlist_inline.h"
-#define new_constant(a,b,c,d,e,f,g) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
{ ANDAND, TOKENTYPE_NONE, "ANDAND" },
{ ANDOP, TOKENTYPE_NONE, "ANDOP" },
{ ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
+ { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
{ ARROW, TOKENTYPE_NONE, "ARROW" },
{ ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
{ BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
{ GIVEN, TOKENTYPE_IVAL, "GIVEN" },
{ HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
{ IF, TOKENTYPE_IVAL, "IF" },
- { LABEL, TOKENTYPE_PVAL, "LABEL" },
+ { LABEL, TOKENTYPE_OPVAL, "LABEL" },
{ LOCAL, TOKENTYPE_IVAL, "LOCAL" },
{ LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
{ LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
{ RELOP, TOKENTYPE_OPNUM, "RELOP" },
{ REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
{ SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
+ { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
{ SUB, TOKENTYPE_NONE, "SUB" },
+ { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
+ { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
{ THING, TOKENTYPE_OPVAL, "THING" },
{ UMINUS, TOKENTYPE_NONE, "UMINUS" },
{ UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
PERL_ARGS_ASSERT_PRINTBUF;
- GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
SvREFCNT_dec(tmp);
}
#endif
/*
-=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
+=for apidoc lex_start
Creates and initialises a new lexer/parser state object, supplying
a context in which to lex and parse from a new source of Perl code.
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->recheck_utf8_validity = FALSE;
+ parser->recheck_utf8_validity = TRUE;
parser->rsfp_filters =
!(flags & LEX_START_SAME_FILTER) || !oparser
? NULL
/*
-=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
+=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
Buffer scalar containing the chunk currently under consideration of the
text currently being lexed. This is always a plain string scalar (for
of these pointers is usually preferable to examination of the scalar
through normal scalar means.
-=for apidoc AmxU|char *|PL_parser-E<gt>bufend
+=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
Direct pointer to the end of the chunk of text currently being lexed, the
end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
always located at the end of the buffer, and does not count as part of
the buffer's contents.
-=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
+=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
Points to the current position of lexing inside the lexer buffer.
Characters around this point may be freely examined, within
using the slightly higher-level functions L</lex_peek_unichar> and
L</lex_read_unichar>.
-=for apidoc AmxU|char *|PL_parser-E<gt>linestart
+=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
Points to the start of the current line inside the lexer buffer.
This is useful for indicating at which column an error occurred, and
*/
/*
-=for apidoc Amx|bool|lex_bufutf8
+=for apidoc lex_bufutf8
Indicates whether the octets in the lexer buffer
(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
}
/*
-=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
+=for apidoc lex_grow_linestr
Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
at least C<len> octets (including terminating C<NUL>). Returns a
}
/*
-=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
+=for apidoc lex_stuff_pvn
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
if (flags & LEX_STUFF_UTF8) {
goto plain_copy;
} else {
- STRLEN highhalf = 0; /* Count of variants */
- const char *p, *e = pv+len;
- for (p = pv; p != e; p++) {
- if (! UTF8_IS_INVARIANT(*p)) {
- highhalf++;
- }
- }
+ STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+ (U8 *) pv + len);
+ const char *p, *e = pv+len;;
if (!highhalf)
goto plain_copy;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
SvCUR(PL_parser->linestr) + len+highhalf);
PL_parser->bufend += len+highhalf;
for (p = pv; p != e; p++) {
- U8 c = (U8)*p;
- if (! UTF8_IS_INVARIANT(c)) {
- *bufptr++ = UTF8_TWO_BYTE_HI(c);
- *bufptr++ = UTF8_TWO_BYTE_LO(c);
- } else {
- *bufptr++ = (char)c;
- }
+ append_utf8_from_native_byte(*p, (U8 **) &bufptr);
}
}
} else {
}
/*
-=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
+=for apidoc lex_stuff_pv
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
}
/*
-=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
+=for apidoc lex_stuff_sv
Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
}
/*
-=for apidoc Amx|void|lex_unstuff|char *ptr
+=for apidoc lex_unstuff
Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
}
/*
-=for apidoc Amx|void|lex_read_to|char *ptr
+=for apidoc lex_read_to
Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
}
/*
-=for apidoc Amx|void|lex_discard_to|char *ptr
+=for apidoc lex_discard_to
Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
up to C<ptr>. The remaining content of the buffer will be moved, and
}
/*
-=for apidoc Amx|bool|lex_next_chunk|U32 flags
+=for apidoc lex_next_chunk
Reads in the next chunk of text to be lexed, appending it to
L</PL_parser-E<gt>linestr>. This should be called when lexing code has
PL_parser->last_lop = NULL;
last_uni_pos = last_lop_pos = 0;
*buf = 0;
- SvCUR(linestr) = 0;
+ SvCUR_set(linestr, 0);
} else {
old_bufend_pos = PL_parser->bufend - buf;
bufptr_pos = PL_parser->bufptr - buf;
}
/*
-=for apidoc Amx|I32|lex_peek_unichar|U32 flags
+=for apidoc lex_peek_unichar
Looks ahead one (Unicode) character in the text currently being lexed.
Returns the codepoint (unsigned integer value) of the next character,
}
/*
-=for apidoc Amx|I32|lex_read_unichar|U32 flags
+=for apidoc lex_read_unichar
Reads the next (Unicode) character in the text currently being lexed.
Returns the codepoint (unsigned integer value) of the character read,
}
/*
-=for apidoc Amx|void|lex_read_space|U32 flags
+=for apidoc lex_read_space
Reads optional spaces, in Perl style, in the text currently being
lexed. The spaces may include ordinary whitespace characters and
/*
-=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+=for apidoc validate_proto
This function performs syntax checking on a prototype, C<proto>.
If C<warn> is true, any illegal characters or mismatched brackets
}
else if (GvAV(cfgv)) {
AV * const av = GvAV(cfgv);
- const I32 start = CopLINE(PL_curcop)+1;
- I32 items = AvFILLp(av) - start;
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
if (items > 0) {
AV * const av2 = GvAVn(gv2);
SV **svp = AvARRAY(av) + start;
- I32 l = (I32)line_num+1;
- while (items--)
- av_store(av2, l++, SvREFCNT_inc(*svp++));
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
}
}
}
#define skipspace(s) skipspace_flags(s, 0)
#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
-STATIC char *
-S_skipspace_flags(pTHX_ char *s, U32 flags)
+char *
+Perl_skipspace_flags(pTHX_ char *s, U32 flags)
{
PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
s = PL_last_uni;
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
s += UTF ? UTF8SKIP(s) : 1;
- if (memchr(s, '(', PL_bufptr - s))
+ if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
* S_postderef
*
* This subroutine handles postfix deref syntax after the arrow has already
- * been emitted. @* $* etc. are emitted as two separate token right here.
+ * been emitted. @* $* etc. are emitted as two separate tokens right here.
* @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
* only the first, leaving yylex to find the next.
*/
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
SV * const sv = newSVpvn_utf8(start, len,
- !IN_BYTES
- && UTF
- && !is_utf8_invariant_string((const U8*)start, len)
- && is_utf8_string((const U8*)start, len));
+ ! IN_BYTES
+ && UTF
+ && len != 0
+ && is_utf8_non_invariant_string((const U8*)start, len));
return sv;
}
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
return sv;
}
PL_parser->lex_super_state = PL_lex_state;
PL_parser->lex_sub_inwhat = (U16)op_type;
PL_parser->lex_sub_op = PL_lex_op;
+ PL_parser->sub_no_recover = FALSE;
+ PL_parser->sub_error_count = PL_error_count;
PL_lex_state = LEX_INTERPPUSH;
PL_expect = XTERM;
PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
PL_in_eval &= ~EVAL_RE_REPARSING;
- return '(';
+ return SUBLEXSTART;
}
/*
else {
const line_t l = CopLINE(PL_curcop);
LEAVE;
+ if (PL_parser->sub_error_count != PL_error_count) {
+ if (PL_parser->sub_no_recover) {
+ yyquit();
+ NOT_REACHED;
+ }
+ }
if (PL_multi_close == '<')
PL_parser->herelines += l - PL_multi_end;
PL_bufend = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_expect = XOPERATOR;
- return ')';
+ return SUBLEXEND;
}
}
STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+ /* This justs wraps get_and_check_backslash_N_name() to output any error
+ * message it returns. */
+
+ const char * error_msg = NULL;
+ SV * result;
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0) {
+ return NULL;
+ }
+
+ result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+ if (error_msg) {
+ yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+ }
+
+ return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+ const char* const e,
+ const bool is_utf8,
+ const char ** error_msg)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
- * an SV* containing it; NULL if no valid one found */
-
- SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+ * an SV* containing it; NULL if no valid one found.
+ *
+ * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+ * doesn't have to be. */
+ SV* res;
HV * table;
SV **cvp;
SV *cv;
SV *rv;
HV *stash;
const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+ dVAR;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ assert(e >= s);
+ assert(s > (char *) 3);
+
+ res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
if (!SvCUR(res)) {
SvREFCNT_dec_NN(res);
/* diag_listed_as: Unknown charname '%s' */
- yyerror("Unknown charname ''");
+ *error_msg = Perl_form(aTHX_ "Unknown charname ''");
return NULL;
}
res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
/* include the <}> */
- e - backslash_ptr + 1);
+ e - backslash_ptr + 1, error_msg);
if (! SvPOK(res)) {
SvREFCNT_dec_NN(res);
return NULL;
* characters that begin a character name alias are alphabetic, otherwise
* would have to create a isCHARNAME_BEGIN macro */
- if (! UTF) {
+ if (! is_utf8) {
if (! isALPHAU(*s)) {
goto bad_charname;
}
s += 2;
}
else {
- if (! PL_utf8_charname_begin) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_begin = _core_swash_init("utf8",
- "_Perl_Charname_Begin",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_begin,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
s += 2;
}
else {
- if (! PL_utf8_charname_continue) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_continue = _core_swash_init("utf8",
- "_Perl_Charname_Continue",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_continue,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
/* diag_listed_as: charnames alias definitions may not contain
trailing white-space; marked by <-- HERE in %s
*/
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain trailing "
"white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
- if (SvUTF8(res)) { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed charname value */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
0 /* 0 means don't die */ );
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
(int) (e - backslash_ptr + 1), backslash_ptr,
- (int) ((char *) first_bad_char_loc - str), str
- ),
- SVf_UTF8);
+ (int) ((char *) first_bad_char_loc - str), str);
return NULL;
}
}
* that this print won't run off the end of the string */
/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
in \N{%s} */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
/* diag_listed_as: charnames alias definitions may not contain a
sequence of multiple spaces; marked by <-- HERE
in %s */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain a sequence of "
"multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
- bool has_utf8 = FALSE; /* Output constant is UTF8 */
- bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
+ bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
+ bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
example when it is entirely composed
should we have to convert to
UTF-8) */
SV *res; /* result from charnames */
- STRLEN offset_to_max; /* The offset in the output to where the range
- high-end character is temporarily placed */
+ STRLEN offset_to_max = 0; /* The offset in the output to where the range
+ high-end character is temporarily placed */
/* Does something require special handling in tr/// ? This avoids extra
* work in a less likely case. As such, khw didn't feel it was worth
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
- has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
- this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ d_is_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+ s_is_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
/* Protect sv from errors and fatal warnings. */
ENTER_with_name("scan_const");
SAVEFREESV(sv);
+ /* A bunch of code in the loop below assumes that if s[n] exists and is not
+ * NUL, then s[n+1] exists. This assertion makes sure that assumption is
+ * valid */
+ assert(*send == '\0');
+
while (s < send
|| dorange /* Handle tr/// range at right edge of input */
) {
* occurences in the constant, except those added by a
* backslash escape sequence, like \x{100}. Mostly, those
* set 'has_above_latin1' as appropriate */
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
* time through the loop */
offset_to_max = d - SvPVX_const(sv);
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
IV real_range_max = 0;
#endif
/* Get the code point values of the range ends. */
- if (has_utf8) {
+ if (d_is_utf8) {
/* We know the utf8 is valid, because we just constructed
* it ourselves in previous loop iterations */
min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
* get it out of the way now.) */
if (UNLIKELY(range_max == range_min)) {
d = max_ptr;
- if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+ if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
utf8_variant_count--;
}
goto range_done;
/* Here the range contains at least 3 code points */
- if (has_utf8) {
+ if (d_is_utf8) {
/* If everything in the transliteration is below 256, we
* can avoid special handling later. A translation table
&& (range_min > 255 || ! convert_unicode)
#endif
) {
+ const STRLEN off = d - SvPVX(sv);
+ const STRLEN extra = 1 + (send - s) + 1;
+ char *e;
+
/* Move the high character one byte to the right; then
* insert between it and the range begin, an illegal
* byte which serves to indicate this is a range (using
* a '-' would be ambiguous). */
- char *e = d++;
+
+ if (off + extra > SvLEN(sv)) {
+ d = off + SvGROW(sv, off + extra);
+ max_ptr = d - off + offset_to_max;
+ }
+
+ e = d++;
while (e-- > max_ptr) {
*(e + 1) = *e;
}
* */
grow = (range_max - 1) - (range_min + 1) + 1;
- if (has_utf8) {
+ if (d_is_utf8) {
#ifdef EBCDIC
/* In some cases in EBCDIC, we haven't yet calculated a
* precise amount needed for the UTF-8 variants. Just
/* Recall that the min and max are now in Unicode terms, so
* we have to convert each character to its native
* equivalent */
- if (has_utf8) {
+ if (d_is_utf8) {
for (i = range_min; i <= range_max; i++) {
append_utf8_from_native_byte(
LATIN1_TO_NATIVE((U8) i),
/* Here, no conversions are necessary, which means that the
* first character in the range is already in 'd' and
* valid, so we can skip overwriting it */
- if (has_utf8) {
+ if (d_is_utf8) {
SSize_t i;
d += UTF8SKIP(d);
for (i = range_min + 1; i <= range_max; i++) {
* friends */
else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
- while (s+1 < send && *s != ')')
- *d++ = *s++;
+ if (s_is_utf8) {
+ PERL_UINT_FAST8_T len = UTF8SKIP(s);
+
+ while (s + len < send && *s != ')') {
+ Copy(s, d, len, U8);
+ d += len;
+ s += len;
+ len = UTF8_SAFE_SKIP(s, send);
+ }
+ }
+ else while (s+1 < send && *s != ')') {
+ *d++ = *s++;
+ }
}
else if (!PL_lex_casemods
&& ( s[2] == '{' /* This should match regcomp.c */
{
const char* error;
- bool valid = grok_bslash_o(&s, PL_bufend,
+ bool valid = grok_bslash_o(&s, send,
&uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
{
const char* error;
- bool valid = grok_bslash_x(&s, PL_bufend,
+ bool valid = grok_bslash_x(&s, send,
&uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
*d++ = (char) uv;
}
else {
- if (!has_utf8 && uv > 255) {
+ if (!d_is_utf8 && uv > 255) {
/* Here, 'uv' won't fit unless we convert to UTF-8.
* If we've only seen invariants so far, all we have to
}
has_above_latin1 = TRUE;
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
}
- if (! has_utf8) {
+ if (! d_is_utf8) {
*d++ = (char)uv;
utf8_variant_count++;
}
* tr/// doesn't care about Unicode rules, so no need
* there to upgrade to UTF-8 for small enough code
* points */
- if (! has_utf8 && ( uv > 0xFF
+ if (! d_is_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
/* See Note on sizing above. */
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
has_above_latin1 = TRUE;
}
/* Add the (Unicode) code point to the output. */
- if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
+ if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
- if ((res = get_and_check_backslash_N_name(s, e)))
- {
+ if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
+ { /* Failed. We should die eventually, but for now use a NUL
+ to keep parsing */
+ *d++ = '\0';
+ }
+ else { /* Successfully evaluated the name */
STRLEN len;
const char *str = SvPV_const(res, len);
if (PL_lex_inpat) {
if (! len) { /* The name resolved to an empty string */
- Copy("\\N{}", d, 4, char);
- d += 4;
+ const char empty_N[] = "\\N{_}";
+ Copy(empty_N, d, sizeof(empty_N) - 1, char);
+ d += sizeof(empty_N) - 1;
}
else {
/* In order to not lose information for the regex
/* Upgrade destination to be utf8 if this new
* component is */
- if (! has_utf8 && SvUTF8(res)) {
+ if (! d_is_utf8 && SvUTF8(res)) {
/* See Note on sizing above. */
const STRLEN extra = len + (send - s) + 1;
extra);
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
/* See Note on sizing above. (NOTE: SvCUR() is not
if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
*d++ = *s++;
}
- else if (! this_utf8 && ! has_utf8) {
+ else if (! s_is_utf8 && ! d_is_utf8) {
/* If neither source nor output is UTF-8, is also a single byte,
* just copy it; but this byte counts should we later have to
* convert to UTF-8 */
*d++ = *s++;
utf8_variant_count++;
}
- else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
+ else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
const STRLEN len = UTF8SKIP(s);
/* We expect the source to have already been checked for
d += len;
s += len;
}
- else { /* UTF8ness matters and doesn't match, need to convert */
- STRLEN len = 1;
- const UV nextuv = (this_utf8)
- ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
- : (UV) ((U8) *s);
- STRLEN need = UVCHR_SKIP(nextuv);
-
- if (!has_utf8) {
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
+ else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
+ STRLEN need = send - s + 1; /* See Note on sizing above. */
- /* See Note on sizing above. */
- need += (STRLEN)(send - s) + 1;
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ SvPOK_on(sv);
+ *d = '\0';
- if (utf8_variant_count == 0) {
- SvUTF8_on(sv);
- d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
- }
- else {
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- need);
- d = SvPVX(sv) + SvCUR(sv);
- }
- has_utf8 = TRUE;
- } else if (need > len) {
- /* encoded value larger than old, may need extra space (NOTE:
- * SvCUR() is not set correctly here). See Note on sizing
- * above. */
- const STRLEN extra = need + (send - s) + 1;
- const STRLEN off = d - SvPVX_const(sv);
+ if (utf8_variant_count == 0) {
+ SvUTF8_on(sv);
+ d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+ }
+ else {
+ sv_utf8_upgrade_flags_grow(sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ need);
+ d = SvPVX(sv) + SvCUR(sv);
+ }
+ d_is_utf8 = TRUE;
+ goto default_action; /* Redo, having upgraded so both are UTF-8 */
+ }
+ else { /* UTF8ness matters: convert this non-UTF8 source char to
+ UTF-8 for output. It will occupy 2 bytes, but don't include
+ the input byte since we haven't incremented 's' yet. See
+ Note on sizing above. */
+ const STRLEN off = d - SvPVX(sv);
+ const STRLEN extra = 2 + (send - s - 1) + 1;
+ if (off + extra > SvLEN(sv)) {
d = off + SvGROW(sv, off + extra);
}
- s += len;
-
- d = (char*)uvchr_to_utf8((U8*)d, nextuv);
+ *d++ = UTF8_EIGHT_BIT_HI(*s);
+ *d++ = UTF8_EIGHT_BIT_LO(*s);
+ s++;
}
} /* while loop to process each character */
+ {
+ const STRLEN off = d - SvPVX(sv);
+
+ /* See if room for the terminating NUL */
+ if (UNLIKELY(off >= SvLEN(sv))) {
+
+#ifndef DEBUGGING
+
+ if (off > SvLEN(sv))
+#endif
+ Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
+ " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
+
+ /* Whew! Here we don't have room for the terminating NUL, but
+ * everything else so far has fit. It's not too late to grow
+ * to fit the NUL and continue on. But it is a bug, as the code
+ * above was supposed to have made room for this, so under
+ * DEBUGGING builds, we panic anyway. */
+ d = off + SvGROW(sv, off + 1);
+ }
+ }
+
/* terminate the string and set up the sv */
*d = '\0';
SvCUR_set(sv, d - SvPVX_const(sv));
- if (SvCUR(sv) >= SvLEN(sv))
- Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
- " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (has_utf8) {
+ if (d_is_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
PL_parser->lex_sub_op->op_private |=
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen);
+ type, typelen, NULL);
}
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
return TRUE;
if (*s != '{' && *s != '[')
return FALSE;
+ PL_parser->sub_no_recover = TRUE;
if (!PL_lex_inpat)
return TRUE;
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
{
filter_t funcp;
+ I32 ret;
SV *datasv = NULL;
/* This API is bad. It should have been using unsigned int for maxlen.
Not sure if we want to change the API, but if not we should sanity
/* Call function. The function is expected to */
/* call "FILTER_READ(idx+1, buf_sv)" first. */
/* Return: <0:error, =0:eof, >0:not eof */
- return (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ ENTER;
+ save_scalar(PL_errgv);
+ ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ LEAVE;
+ return ret;
}
STATIC char *
return yylex();
case LEX_FORMLINE:
+ if (PL_parser->sub_error_count != PL_error_count) {
+ /* There was an error parsing a formline, which tends to
+ mess up the parser.
+ Unlike interpolated sub-parsing, we can't treat any of
+ these as recoverable, so no need to check sub_no_recover.
+ */
+ yyquit();
+ }
assert(PL_lex_formbrack);
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
- 0, cBOOL(UTF), FALSE);
+ 0, cBOOL(UTF), FALSE, FALSE);
*dest = '\0';
assert(PL_tokenbuf[1]); /* we have a variable name */
}
switch (PL_expect) {
case XOPERATOR:
- if (!PL_in_my || PL_lex_state != LEX_NORMAL)
+ if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
break;
PL_bufptr = s; /* update in case we back off */
if (*s == '=') {
case XATTRTERM:
PL_expect = XTERMBLOCK;
grabattrs:
+ /* NB: as well as parsing normal attributes, we also end up
+ * here if there is something looking like attributes
+ * following a signature (which is illegal, but used to be
+ * legal in 5.20..5.26). If the latter, we still parse the
+ * attributes so that error messages(s) are less confusing,
+ * but ignore them (parser->sig_seen).
+ */
s = skipspace(s);
attrs = NULL;
while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+ bool sig = PL_parser->sig_seen;
I32 tmp;
SV *sv;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
the CVf_BUILTIN_ATTRS define in cv.h! */
if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
sv_free(sv);
- CvLVALUE_on(PL_compcv);
+ if (!sig)
+ CvLVALUE_on(PL_compcv);
}
else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
sv_free(sv);
- CvMETHOD_on(PL_compcv);
+ if (!sig)
+ CvMETHOD_on(PL_compcv);
}
else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
{
sv_free(sv);
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
- ":const is experimental"
- );
- CvANONCONST_on(PL_compcv);
- if (!CvANON(PL_compcv))
- yyerror(":const is not permitted on named "
- "subroutines");
+ if (!sig) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(PL_compcv);
+ if (!CvANON(PL_compcv))
+ yyerror(":const is not permitted on named "
+ "subroutines");
+ }
}
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
}
}
got_attrs:
+ if (PL_parser->sig_seen) {
+ /* see comment about about sig_seen and parser error
+ * handling */
+ if (attrs)
+ op_free(attrs);
+ Perl_croak(aTHX_ "Subroutine attributes must come "
+ "before the signature");
+ }
if (attrs) {
NEXTVAL_NEXTTOKE.opval = attrs;
force_next(THING);
SAVEI32(PL_lex_formbrack);
PL_parser->form_lex_state = PL_lex_state;
PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_parser->sub_error_count = PL_error_count;
goto leftbracket;
}
}
}
PL_expect = XOPERATOR;
- if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
+ if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
const bool islop = (PL_last_lop == PL_oldoldbufptr);
if (!islop || PL_last_lop_op == OP_GREPSTART)
PL_expect = XOPERATOR;
if (!PL_tokenbuf[1]) {
PREREF('@');
}
- if (PL_lex_state == LEX_NORMAL)
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
s = skipspace(s);
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s, PL_bufend))
}
if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
s += 3;
- TERM(YADAYADA);
+ OPERATOR(YADAYADA);
}
if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
char tmp = *s++;
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
s = d + 1;
- pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
- pl_yylval.pval[len] = '\0';
- pl_yylval.pval[len+1] = UTF ? 1 : 0;
+ pl_yylval.opval =
+ newSVOP(OP_CONST, 0,
+ newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
CLINE;
TOKEN(LABEL);
}
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump) {
- Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
- "dump() better written as CORE::dump(). "
- "dump() will no longer be available "
- "in Perl 5.30");
+ Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
}
gv = NULL;
gvp = 0;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
bool safebw;
+ bool no_op_error = FALSE;
+ if (PL_expect == XOPERATOR) {
+ if (PL_bufptr == PL_linestart) {
+ CopLINE_dec(PL_curcop);
+ Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
+ CopLINE_inc(PL_curcop);
+ }
+ else
+ /* We want to call no_op with s pointing after the
+ bareword, so defer it. But we want it to come
+ before the Bad name croak. */
+ no_op_error = TRUE;
+ }
/* Get the rest if it looks like a package qualifier */
STRLEN morelen;
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
+ if (no_op_error) {
+ no_op("Bareword",s);
+ no_op_error = FALSE;
+ }
if (!morelen)
Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
UTF8fARG(UTF, len, PL_tokenbuf),
pkgname = 1;
}
- if (PL_expect == XOPERATOR) {
- if (PL_bufptr == PL_linestart) {
- CopLINE_dec(PL_curcop);
- Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
- CopLINE_inc(PL_curcop);
- }
- else
+ if (no_op_error)
no_op("Bareword",s);
- }
/* See if the name is "Foo::",
in which case Foo is a bareword
if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
{
/* PL_warn_reserved is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
PL_tokenbuf);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
}
}
if (!GvIO(gv))
GvIOp(gv) = newIO();
IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
- {
- const int fd = PerlIO_fileno(PL_rsfp);
- if (fd >= 3) {
- fcntl(fd,F_SETFD, FD_CLOEXEC);
- }
- }
-#endif
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
if ((PerlIO*)PL_rsfp == PerlIO_stdin())
really_sub:
{
char * const tmpbuf = PL_tokenbuf + 1;
- expectation attrful;
bool have_name, have_proto;
const int key = tmp;
SV *format_name = NULL;
+ bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
SSize_t off = s-SvPVX(PL_linestr);
s = skipspace(s);
d = SvPVX(PL_linestr)+off;
+ SAVEBOOL(PL_parser->sig_seen);
+ PL_parser->sig_seen = FALSE;
+
if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| *s == '\''
|| (*s == ':' && s[1] == ':'))
{
- PL_expect = XBLOCK;
- attrful = XATTRBLOCK;
+ PL_expect = XATTRBLOCK;
d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
&len);
if (key == KEY_format)
Perl_croak(aTHX_
"Missing name in \"%s\"", PL_bufptr);
}
- PL_expect = XTERMBLOCK;
- attrful = XATTRTERM;
+ PL_expect = XATTRTERM;
sv_setpvs(PL_subname,"?");
have_name = FALSE;
}
}
/* Look for a prototype */
- if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
+ if (*s == '(' && !is_sigsub) {
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
+ COPLINE_SET_FROM_MULTI_END;
(void)validate_proto(PL_subname, PL_lex_stuff,
ckWARN(WARN_ILLEGALPROTO), 0);
have_proto = TRUE;
else
have_proto = FALSE;
- if (*s == ':' && s[1] != ':')
- PL_expect = attrful;
- else if ((*s != '{' && *s != '(') && key != KEY_format) {
+ if ( !(*s == ':' && s[1] != ':')
+ && (*s != '{' && *s != '(') && key != KEY_format)
+ {
assert(key == KEY_sub || key == KEY_AUTOLOAD ||
key == KEY_DESTROY || key == KEY_BEGIN ||
key == KEY_UNITCHECK || key == KEY_CHECK ||
sv_setpvs(PL_subname, "__ANON__");
else
sv_setpvs(PL_subname, "__ANON__::__ANON__");
- TOKEN(ANONSUB);
+ if (is_sigsub)
+ TOKEN(ANON_SIGSUB);
+ else
+ TOKEN(ANONSUB);
}
force_ident_maybe_lex('&');
- TOKEN(SUB);
+ if (is_sigsub)
+ TOKEN(SIGSUB);
+ else
+ TOKEN(SUB);
}
case KEY_system:
Looks up an identifier in the pad or in a package
- is_sig indicates that this is a subroutine signature variable
+ PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
rather than a plain pad var.
Returns:
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Pending identifier '%s'\n", PL_tokenbuf); });
+ assert(tokenbuf_len >= 2);
/* if we're in a my(), we can't allow dynamics here.
$foo'bar has already been turned into $foo::bar, so
if (has_colon) {
/* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
PL_in_my == KEY_my ? "my" : "state",
*PL_tokenbuf == '&' ? "subroutin" : "variabl",
PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
if (PL_in_my == KEY_sigvar) {
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
+ sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
&& PL_lex_state != LEX_NORMAL
&& !PL_lex_brackets)
{
- GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* build ops for a bareword */
pl_yylval.opval = newSVOP(OP_CONST, 0,
newSVpvn_flags(PL_tokenbuf + 1,
- tokenbuf_len - 1,
+ tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
- gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+ gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
(PL_in_eval ? GV_ADDMULTI : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
- <type> is assumed to be well formed UTF-8 */
+ <type> is assumed to be well formed UTF-8.
+
+ If error_msg is not NULL, *error_msg will be set to any error encountered.
+ Otherwise yyerror() will be used to output it */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen)
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
+ const char ** error_msg)
{
dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
if (*key == 'c') { assert (strEQ(key, "charnames")); }
assert(type || s);
- /* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && *key == 'c')
- {
- SvREFCNT_dec_NN(sv);
- return &PL_sv_undef;
- }
-
sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
(type ? type: s), why1, why2, why3);
}
}
- yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ if (error_msg) {
+ *error_msg = msg;
+ }
+ else {
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ }
return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
- bool is_utf8, bool check_dollar)
+ bool is_utf8, bool check_dollar, bool tick_warn)
{
+ int saw_tick = 0;
+ const char *olds = *s;
PERL_ARGS_ASSERT_PARSE_IDENT;
while (*s < PL_bufend) {
*(*d)++ = ':';
*(*d)++ = ':';
(*s)++;
+ saw_tick++;
}
else if (allow_package && **s == ':' && (*s)[1] == ':'
/* Disallow things like Foo::$bar. For the curious, this is
else
break;
}
+ if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
+ && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
+ char *d;
+ char *d2;
+ Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+ d2 = d;
+ SAVEFREEPV(d);
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Old package separator used in string");
+ if (olds[-1] == '#')
+ *d2++ = olds[-2];
+ *d2++ = olds[-1];
+ while (olds < *s) {
+ if (*olds == '\'') {
+ *d2++ = '\\';
+ *d2++ = *olds++;
+ }
+ else
+ *d2++ = *olds++;
+ }
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+ UTF8fARG(is_utf8, d2-d, d));
+ }
return;
}
/* Returns a NUL terminated string, with the length of the string written to
*slp
*/
-STATIC char *
-S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+char *
+Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
PERL_ARGS_ASSERT_SCAN_WORD;
- parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
+ parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
*d = '\0';
*slp = d - dest;
return s;
}
}
else { /* See if it is a "normal" identifier */
- parse_ident(&s, &d, e, 1, is_utf8, FALSE);
+ parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
}
*d = '\0';
d = dest;
s = skipspace(s);
}
}
- if ((s <= PL_bufend - (is_utf8)
+ if ((s <= PL_bufend - ((is_utf8)
? UTF8SKIP(s)
- : 1)
+ : 1))
&& VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
{
if (is_utf8) {
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
d += is_utf8 ? UTF8SKIP(d) : 1;
- parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+ parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
*d = '\0';
}
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
PL_lex_state = LEX_INTERPEND;
PL_expect = XREF;
}
- if (PL_lex_state == LEX_NORMAL) {
+ if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
if (ckWARN(WARN_AMBIGUOUS)
&& (keyword(dest, d - dest, 0)
|| get_cvn_flags(dest, d - dest, is_utf8
CopLINE_set(PL_curcop, orig_copline);
PL_parser->herelines = herelines;
*dest = '\0';
+ PL_parser->sub_no_recover = TRUE;
}
}
else if ( PL_lex_state == LEX_INTERPNORMAL
* the NVX field indicates how many src code lines the replacement
* spreads over */
sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
- ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
+ ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
cBOOL(es);
}
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
*PL_tokenbuf = '\n';
peek = s;
+
if (*peek == '~') {
indented = TRUE;
peek++; s++;
}
+
while (SPACE_OR_TAB(*peek))
peek++;
+
if (*peek == '`' || *peek == '\'' || *peek =='"') {
s = peek;
term = *s++;
s++, term = '\'';
else
term = '"';
+
if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+
peek = s;
+
while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
peek += UTF ? UTF8SKIP(peek) : 1;
}
+
len = (peek - s >= e - d) ? (e - d) : (peek - s);
Copy(s, d, len, char);
s += len;
d += len;
}
+
if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
Perl_croak(aTHX_ "Delimiter for here document is too long");
+
*d++ = '\n';
*d = '\0';
len = d - PL_tokenbuf;
PL_multi_start = origline + 1 + PL_parser->herelines;
PL_multi_open = PL_multi_close = '<';
+
/* inside a string eval or quote-like operator */
if (!infile || PL_lex_inwhat) {
SV *linestr;
entered. But we need them set here. */
shared->ls_bufptr = s;
shared->ls_linestr = PL_linestr;
- if (PL_lex_inwhat)
- /* Look for a newline. If the current buffer does not have one,
- peek into the line buffer of the parent lexing scope, going
- up as many levels as necessary to find one with a newline
- after bufptr.
- */
- while (!(s = (char *)memchr(
- (void *)shared->ls_bufptr, '\n',
- SvEND(shared->ls_linestr)-shared->ls_bufptr
- ))) {
- shared = shared->ls_prev;
- /* shared is only null if we have gone beyond the outermost
- lexing scope. In a file, we will have broken out of the
- loop in the previous iteration. In an eval, the string buf-
- fer ends with "\n;", so the while condition above will have
- evaluated to false. So shared can never be null. Or so you
- might think. Odd syntax errors like s;@{<<; can gobble up
- the implicit semicolon at the end of a flie, causing the
- file handle to be closed even when we are not in a string
- eval. So shared may be null in that case.
- (Closing '}' here to balance the earlier open brace for
- editors that look for matched pairs.) */
- if (UNLIKELY(!shared))
- goto interminable;
- /* A LEXSHARED struct with a null ls_prev pointer is the outer-
- most lexing scope. In a file, shared->ls_linestr at that
- level is just one line, so there is no body to steal. */
- if (infile && !shared->ls_prev) {
- s = olds;
- goto streaming;
- }
- }
+
+ if (PL_lex_inwhat) {
+ /* Look for a newline. If the current buffer does not have one,
+ peek into the line buffer of the parent lexing scope, going
+ up as many levels as necessary to find one with a newline
+ after bufptr.
+ */
+ while (!(s = (char *)memchr(
+ (void *)shared->ls_bufptr, '\n',
+ SvEND(shared->ls_linestr)-shared->ls_bufptr
+ )))
+ {
+ shared = shared->ls_prev;
+ /* shared is only null if we have gone beyond the outermost
+ lexing scope. In a file, we will have broken out of the
+ loop in the previous iteration. In an eval, the string buf-
+ fer ends with "\n;", so the while condition above will have
+ evaluated to false. So shared can never be null. Or so you
+ might think. Odd syntax errors like s;@{<<; can gobble up
+ the implicit semicolon at the end of a flie, causing the
+ file handle to be closed even when we are not in a string
+ eval. So shared may be null in that case.
+ (Closing '>>}' here to balance the earlier open brace for
+ editors that look for matched pairs.) */
+ if (UNLIKELY(!shared))
+ goto interminable;
+ /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+ most lexing scope. In a file, shared->ls_linestr at that
+ level is just one line, so there is no body to steal. */
+ if (infile && !shared->ls_prev) {
+ s = olds;
+ goto streaming;
+ }
+ }
+ }
else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
if (!s)
goto interminable;
}
+
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
d = s;
if (! SPACE_OR_TAB(*backup)) {
break;
}
-
indent_len++;
}
/* No whitespace or all! */
if (backup == s || *backup == '\n') {
- Newxz(indent, indent_len + 1, char);
+ Newx(indent, indent_len + 1, char);
memcpy(indent, backup + 1, indent_len);
+ indent[indent_len] = 0;
s--; /* before our delimiter */
PL_parser->herelines--; /* this line doesn't count */
break;
}
}
}
- } else {
+ }
+ else {
while (s < bufend - len + 1
&& memNE(s,PL_tokenbuf,len) )
{
if (s >= bufend - len + 1) {
goto interminable;
}
+
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
/* the preceding stmt passes a newline */
bufend - shared->re_eval_start);
shared->re_eval_start -= s-d;
}
+
if (cxstack_ix >= 0
&& CxTYPE(cx) == CXt_EVAL
&& CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
cx->blk_eval.cur_text = newSVsv(linestr);
cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
}
+
/* Copy everything from s onwards back to d. */
Move(s,d,bufend-s + 1,char);
SvCUR_set(linestr, SvCUR(linestr) - (s-d));
/* Setting PL_bufend only applies when we have not dug deeper
into other scopes, because sublex_done sets PL_bufend to
SvEND(PL_linestr). */
- if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+ if (shared == PL_parser->lex_shared)
+ PL_bufend = SvEND(linestr);
s = olds;
}
- else
- {
- SV *linestr_save;
- char *oldbufptr_save;
- char *oldoldbufptr_save;
- streaming:
- SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
- term = PL_tokenbuf[1];
- len--;
- linestr_save = PL_linestr; /* must restore this afterwards */
- d = s; /* and this */
- oldbufptr_save = PL_oldbufptr;
- oldoldbufptr_save = PL_oldoldbufptr;
- PL_linestr = newSVpvs("");
- PL_bufend = SvPVX(PL_linestr);
- while (1) {
- PL_bufptr = PL_bufend;
- CopLINE_set(PL_curcop,
- origline + 1 + PL_parser->herelines);
- if (!lex_next_chunk(LEX_NO_TERM)
- && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- /* Simply freeing linestr_save might seem simpler here, as it
- does not matter what PL_linestr points to, since we are
- about to croak; but in a quote-like op, linestr_save
- will have been prospectively freed already, via
- SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
- restore PL_linestr. */
- SvREFCNT_dec_NN(PL_linestr);
- PL_linestr = linestr_save;
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- goto interminable;
- }
- CopLINE_set(PL_curcop, origline);
- if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
- s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
- /* ^That should be enough to avoid this needing to grow: */
- sv_catpvs(PL_linestr, "\n\0");
- assert(s == SvPVX(PL_linestr));
- PL_bufend = SvEND(PL_linestr);
- }
- s = PL_bufptr;
- PL_parser->herelines++;
- PL_last_lop = PL_last_uni = NULL;
+ else {
+ SV *linestr_save;
+ char *oldbufptr_save;
+ char *oldoldbufptr_save;
+ streaming:
+ SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
+ linestr_save = PL_linestr; /* must restore this afterwards */
+ d = s; /* and this */
+ oldbufptr_save = PL_oldbufptr;
+ oldoldbufptr_save = PL_oldoldbufptr;
+ PL_linestr = newSVpvs("");
+ PL_bufend = SvPVX(PL_linestr);
+
+ while (1) {
+ PL_bufptr = PL_bufend;
+ CopLINE_set(PL_curcop,
+ origline + 1 + PL_parser->herelines);
+
+ if ( !lex_next_chunk(LEX_NO_TERM)
+ && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
+ {
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ goto interminable;
+ }
+
+ CopLINE_set(PL_curcop, origline);
+
+ if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+ s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+ /* ^That should be enough to avoid this needing to grow: */
+ sv_catpvs(PL_linestr, "\n\0");
+ assert(s == SvPVX(PL_linestr));
+ PL_bufend = SvEND(PL_linestr);
+ }
+
+ s = PL_bufptr;
+ PL_parser->herelines++;
+ PL_last_lop = PL_last_uni = NULL;
+
#ifndef PERL_STRICT_CR
- if (PL_bufend - PL_linestart >= 2) {
- if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
- || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
- {
- PL_bufend[-2] = '\n';
- PL_bufend--;
- SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
- }
- else if (PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
- }
- else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
+ if (PL_bufend - PL_linestart >= 2) {
+ if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+ || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+ {
+ PL_bufend[-2] = '\n';
+ PL_bufend--;
+ SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+ }
+ else if (PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
+ }
+ else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
#endif
- if (indented && (PL_bufend-s) >= len) {
- char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- if (found) {
- char *backup = found;
- indent_len = 0;
+ if (indented && (PL_bufend-s) >= len) {
+ char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- /* Only valid if it's preceded by whitespace only */
- while (backup != s && --backup >= s) {
- if (! SPACE_OR_TAB(*backup)) {
- break;
- }
- indent_len++;
- }
+ if (found) {
+ char *backup = found;
+ indent_len = 0;
- /* All whitespace or none! */
- if (backup == found || SPACE_OR_TAB(*backup)) {
- Newxz(indent, indent_len + 1, char);
- memcpy(indent, backup, indent_len);
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- }
- }
+ /* Only valid if it's preceded by whitespace only */
+ while (backup != s && --backup >= s) {
+ if (! SPACE_OR_TAB(*backup)) {
+ break;
+ }
+ indent_len++;
+ }
- /* Didn't find it */
- sv_catsv(tmpstr,PL_linestr);
- } else {
- if (*s == term && PL_bufend-s >= len
- && memEQ(s,PL_tokenbuf + 1,len))
- {
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- } else {
- sv_catsv(tmpstr,PL_linestr);
- }
- }
- }
+ /* All whitespace or none! */
+ if (backup == found || SPACE_OR_TAB(*backup)) {
+ Newx(indent, indent_len + 1, char);
+ memcpy(indent, backup, indent_len);
+ indent[indent_len] = 0;
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ }
+
+ /* Didn't find it */
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ else {
+ if (*s == term && PL_bufend-s >= len
+ && memEQ(s,PL_tokenbuf + 1,len))
+ {
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ else {
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ }
+ } /* while (1) */
}
+
PL_multi_end = origline + PL_parser->herelines;
+
if (indented && indent) {
STRLEN linecount = 1;
STRLEN herelen = SvCUR(tmpstr);
while (ss < se) {
/* newline only? Copy and move on */
if (*ss == '\n') {
- sv_catpv(newstr,"\n");
+ sv_catpvs(newstr,"\n");
ss++;
linecount++;
/* Found our indentation? Strip it */
- } else if (se - ss >= indent_len
+ }
+ else if (se - ss >= indent_len
&& memEQ(ss, indent, indent_len))
{
STRLEN le = 0;
-
ss += indent_len;
while ((ss + le) < se && *(ss + le) != '\n')
le++;
sv_catpvn(newstr, ss, le);
-
ss += le;
/* Line doesn't begin with our indentation? Croak */
- } else {
+ }
+ else {
+ Safefree(indent);
Perl_croak(aTHX_
"Indentation on line %d of here-doc doesn't match delimiter",
(int)linecount
);
}
- }
+ } /* while */
+
/* avoid sv_setsv() as we dont wan't to COW here */
sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
Safefree(indent);
SvREFCNT_dec_NN(newstr);
}
+
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
SvPV_shrink_to_cur(tmpstr);
}
+
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
}
+
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
return s;
interminable:
+ if (indent)
+ Safefree(indent);
SvREFCNT_dec(tmpstr);
CopLINE_set(PL_curcop, origline);
missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
}
+
/* scan_inputsymbol
takes: position of first '<' in input buffer
returns: position of first char following the matching '>' in
SvIVX of the SV.
*/
-STATIC char *
-S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
+char *
+Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
char **delimp
)
{
char term; /* terminating character */
char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf8 = FALSE; /* is there any utf8 content? */
+ bool d_is_utf8 = FALSE; /* is there any utf8 content? */
IV termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+ U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
STRLEN termlen; /* length of terminating string */
line_t herelines;
const char * opening_delims = "([{<";
const char * closing_delims = ")]}>";
+ /* The only non-UTF character that isn't a stand alone grapheme is
+ * white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
" non-standalone grapheme for a delimiter"
- " will be a fatal error starting in Perl"
- " 5.30";
- /* The only non-UTF character that isn't a stand alone grapheme is
- * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */
- bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+ " is not allowed";
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
}
else {
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
- if (check_grapheme) {
- if ( UNLIKELY(UNICODE_IS_SUPER(termcode))
- || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
- {
- /* These are considered graphemes, and since the ending
- * delimiter will be the same, we don't have to check the other
- * end */
- check_grapheme = FALSE;
- }
- else if (UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
- /* Don't have to check the other end, as have already warned at
- * this one */
- check_grapheme = FALSE;
- }
+ if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ yyerror(non_grapheme_msg);
}
Copy(s, termstr, termlen, U8);
if ( s + termlen <= PL_bufend
&& memEQ(s + 1, (char*)termstr + 1, termlen - 1))
{
- if ( check_grapheme
+ if ( UTF
&& UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
+ (U8 *) s,
+ (U8 *) PL_bufend,
termcode)))
{
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "%s", non_grapheme_msg);
+ yyerror(non_grapheme_msg);
}
break;
}
}
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
+ d_is_utf8 = TRUE;
}
*to = *s;
break;
else if ((UV)*s == PL_multi_open)
brackets++;
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+ d_is_utf8 = TRUE;
*to = *s;
}
}
sv_catpvn(sv, s, termlen);
s += termlen;
- if (has_utf8)
+ if (d_is_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
I32 shift;
bool overflowed = FALSE;
bool just_zero = TRUE; /* just plain 0 or binary number? */
+ bool has_digs = FALSE;
static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
static const char* const bases[5] =
{ "", "binary", "", "octal", "hexadecimal" };
digit:
just_zero = FALSE;
+ has_digs = TRUE;
if (!overflowed) {
+ assert(shift >= 0);
x = u << shift; /* make room for the digit */
total_bits += shift;
NV nv_mult = 1.0;
#endif
bool accumulate = TRUE;
- for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
+ U8 b;
+ int lim = 1 << shift;
+ for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
+ *h == '_'); h++) {
if (isXDIGIT(*h)) {
- U8 b = XDIGIT_VALUE(*h);
significant_bits += shift;
#ifdef HEXFP_UQUAD
if (accumulate) {
if (significant_bits < NV_MANT_DIG) {
/* We are in the long "run" of xdigits,
* accumulate the full four bits. */
+ assert(shift >= 0);
hexfp_uquad <<= shift;
hexfp_uquad |= b;
hexfp_frac_bits += shift;
- } else {
+ } else if (significant_bits - shift < NV_MANT_DIG) {
/* We are at a hexdigit either at,
* or straddling, the edge of mantissa.
* We will try grabbing as many as
significant_bits - NV_MANT_DIG;
if (tail <= 0)
tail += shift;
+ assert(tail >= 0);
hexfp_uquad <<= tail;
+ assert((shift - tail) >= 0);
hexfp_uquad |= b >> (shift - tail);
hexfp_frac_bits += tail;
}
#else /* HEXFP_NV */
if (accumulate) {
- nv_mult /= 16.0;
+ nv_mult /= nvshift[shift];
if (nv_mult > 0.0)
hexfp_nv += b * nv_mult;
else
}
}
+ if (shift != 3 && !has_digs) {
+ /* 0x or 0b with no digits, treat it as an error.
+ Originally this backed up the parse before the b or
+ x, but that has the potential for silent changes in
+ behaviour, like for: "0x.3" and "0x+$foo".
+ */
+ const char *d = s;
+ char *oldbp = PL_bufptr;
+ if (*d) ++d; /* so the user sees the bad non-digit */
+ PL_bufptr = (char *)d; /* so yyerror reports the context */
+ yyerror(Perl_form(aTHX_ "No digits found for %s literal",
+ shift == 4 ? "hexadecimal" : "binary"));
+ PL_bufptr = oldbp;
+ }
+
if (overflowed) {
if (n > 4294967295.0)
Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0);
+ sv, NULL, NULL, 0, NULL);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
}
break;
floatit = TRUE;
}
if (floatit) {
- STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
/* terminate the string */
*d = '\0';
if (UNLIKELY(hexfp)) {
} else {
nv = Atof(PL_tokenbuf);
}
- RESTORE_LC_NUMERIC_UNDERLYING();
sv = newSVnv(nv);
}
const char *const key = floatit ? "float" : "integer";
const STRLEN keylen = floatit ? 5 : 7;
sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0);
+ key, keylen, sv, NULL, NULL, 0, NULL);
}
break;
return oldsavestack_ix;
}
+
+/* Do extra initialisation of a CV (typically one just created by
+ * start_subparse()) if that CV is for a named sub
+ */
+
+void
+Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
+{
+ PERL_ARGS_ASSERT_INIT_NAMED_CV;
+
+ if (nameop->op_type == OP_CONST) {
+ const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
+ if ( strEQ(name, "BEGIN")
+ || strEQ(name, "END")
+ || strEQ(name, "INIT")
+ || strEQ(name, "CHECK")
+ || strEQ(name, "UNITCHECK")
+ )
+ CvSPECIAL_on(cv);
+ }
+ else
+ /* State subs inside anonymous subs need to be
+ clonable themselves. */
+ if ( CvANON(CvOUTSIDE(cv))
+ || CvCLONE(CvOUTSIDE(cv))
+ || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
+ CvOUTSIDE(cv)
+ ))[nameop->op_targ])
+ )
+ CvCLONE_on(cv);
+}
+
+
static int
S_yywarn(pTHX_ const char *const s, U32 flags)
{
}
}
+ /* 'chars' isn't quite the right name, as code points above 0xFFFF
+ * require 4 bytes per char */
chars = SvCUR(utf16_buffer) >> 1;
have = SvCUR(utf8_buffer);
- SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ /* Assume the worst case size as noted by the functions: twice the
+ * number of input bytes */
+ SvGROW(utf8_buffer, have + chars * 4 + 1);
if (reverse) {
end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
return KEYWORD_PLUGIN_DECLINE;
}
+/*
+=for apidoc wrap_keyword_plugin
+
+Puts a C function into the chain of keyword plugins. This is the
+preferred way to manipulate the L</PL_keyword_plugin> variable.
+C<new_plugin> is a pointer to the C function that is to be added to the
+keyword plugin chain, and C<old_plugin_p> points to the storage location
+where a pointer to the next function in the chain will be stored. The
+value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
+while the value previously stored there is written to C<*old_plugin_p>.
+
+L</PL_keyword_plugin> is global to an entire process, and a module wishing
+to hook keyword parsing may find itself invoked more than once per
+process, typically in different threads. To handle that situation, this
+function is idempotent. The location C<*old_plugin_p> must initially
+(once per process) contain a null pointer. A C variable of static
+duration (declared at file scope, typically also marked C<static> to give
+it internal linkage) will be implicitly initialised appropriately, if it
+does not have an explicit initialiser. This function will only actually
+modify the plugin chain if it finds C<*old_plugin_p> to be null. This
+function is also thread safe on the small scale. It uses appropriate
+locking to avoid race conditions in accessing L</PL_keyword_plugin>.
+
+When this function is called, the function referenced by C<new_plugin>
+must be ready to be called, except for C<*old_plugin_p> being unfilled.
+In a threading situation, C<new_plugin> may be called immediately, even
+before this function has returned. C<*old_plugin_p> will always be
+appropriately set before C<new_plugin> is called. If C<new_plugin>
+decides not to do anything special with the identifier that it is given
+(which is the usual case for most calls to a keyword plugin), it must
+chain the plugin function referenced by C<*old_plugin_p>.
+
+Taken all together, XS code to install a keyword plugin should typically
+look something like this:
+
+ static Perl_keyword_plugin_t next_keyword_plugin;
+ static OP *my_keyword_plugin(pTHX_
+ char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
+ {
+ if (memEQs(keyword_ptr, keyword_len,
+ "my_new_keyword")) {
+ ...
+ } else {
+ return next_keyword_plugin(aTHX_
+ keyword_ptr, keyword_len, op_ptr);
+ }
+ }
+ BOOT:
+ wrap_keyword_plugin(my_keyword_plugin,
+ &next_keyword_plugin);
+
+Direct access to L</PL_keyword_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_keyword_plugin(pTHX_
+ Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
+{
+ dVAR;
+
+ PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
+ if (*old_plugin_p) return;
+ KEYWORD_PLUGIN_MUTEX_LOCK;
+ if (!*old_plugin_p) {
+ *old_plugin_p = PL_keyword_plugin;
+ PL_keyword_plugin = new_plugin;
+ }
+ KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
static void
S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
}
/*
-=for apidoc Amx|OP *|parse_arithexpr|U32 flags
+=for apidoc parse_arithexpr
Parse a Perl arithmetic expression. This may contain operators of precedence
down to the bit shift operators. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_termexpr|U32 flags
+=for apidoc parse_termexpr
Parse a Perl term expression. This may contain operators of precedence
down to the assignment operators. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_listexpr|U32 flags
+=for apidoc parse_listexpr
Parse a Perl list expression. This may contain operators of precedence
down to the comma operator. The expression must be followed (and thus
}
/*
-=for apidoc Amx|OP *|parse_fullexpr|U32 flags
+=for apidoc parse_fullexpr
Parse a single complete Perl expression. This allows the full
expression grammar, including the lowest-precedence operators such
}
/*
-=for apidoc Amx|OP *|parse_block|U32 flags
+=for apidoc parse_block
Parse a single complete Perl code block. This consists of an opening
brace, a sequence of statements, and a closing brace. The block
}
/*
-=for apidoc Amx|OP *|parse_barestmt|U32 flags
+=for apidoc parse_barestmt
Parse a single unadorned Perl statement. This may be a normal imperative
statement or a declaration that has compile-time effect. It does not
}
/*
-=for apidoc Amx|SV *|parse_label|U32 flags
+=for apidoc parse_label
Parse a single label, possibly optional, of the type that may prefix a
Perl statement. It is up to the caller to ensure that the dynamic parser
if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
- char * const lpv = pl_yylval.pval;
- STRLEN llen = strlen(lpv);
+ SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
PL_parser->yychar = YYEMPTY;
- return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
+ cSVOPx(pl_yylval.opval)->op_sv = NULL;
+ op_free(pl_yylval.opval);
+ return labelsv;
} else {
yyunlex();
goto no_label;
}
/*
-=for apidoc Amx|OP *|parse_fullstmt|U32 flags
+=for apidoc parse_fullstmt
Parse a single complete Perl statement. This may be a normal imperative
statement or a declaration that has compile-time effect, and may include
}
/*
-=for apidoc Amx|OP *|parse_stmtseq|U32 flags
+=for apidoc parse_stmtseq
Parse a sequence of zero or more Perl statements. These may be normal
imperative statements, including optional labels, or declarations
}
/*
+=for apidoc parse_subsignature
+
+Parse a subroutine signature declaration. This is the contents of the
+parentheses following a named or anonymous subroutine declaration when the
+C<signatures> feature is enabled. Note that this function neither expects
+nor consumes the opening and closing parentheses around the signature; it
+is the caller's job to handle these.
+
+This function must only be called during parsing of a subroutine; after
+L</start_subparse> has been called. It might allocate lexical variables on
+the pad for the current subroutine.
+
+The op tree to unpack the arguments from the stack at runtime is returned.
+This op tree should appear at the beginning of the compiled function. The
+caller may wish to use L</op_append_list> to build their function body
+after it, or splice it together with the body before calling L</newATTRSUB>.
+
+The C<flags> parameter is reserved for future use, and must always
+be zero.
+
+=cut
+*/
+
+OP *
+Perl_parse_subsignature(pTHX_ U32 flags)
+{
+ if (flags)
+ Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
+ return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
+}
+
+/*
* ex: set ts=8 sts=4 sw=4 et:
*/