#define PERL_IN_TOKE_C
#include "perl.h"
#include "dquote_inline.h"
+#include "invlist_inline.h"
-#define new_constant(a,b,c,d,e,f,g) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
{ ANDAND, TOKENTYPE_NONE, "ANDAND" },
{ ANDOP, TOKENTYPE_NONE, "ANDOP" },
{ ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
+ { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
{ ARROW, TOKENTYPE_NONE, "ARROW" },
{ ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
{ BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
{ RELOP, TOKENTYPE_OPNUM, "RELOP" },
{ REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
{ SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
+ { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
{ SUB, TOKENTYPE_NONE, "SUB" },
{ THING, TOKENTYPE_OPVAL, "THING" },
{ UMINUS, TOKENTYPE_NONE, "UMINUS" },
PERL_ARGS_ASSERT_PRINTBUF;
- GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
SvREFCNT_dec(tmp);
}
*/
STATIC void
-S_missingterm(pTHX_ char *s, const STRLEN len)
+S_missingterm(pTHX_ char *s, STRLEN len)
{
char tmpbuf[UTF8_MAXBYTES + 1];
char q;
SV *sv;
if (s) {
char * const nl = (char *) my_memrchr(s, '\n', len);
- if (nl)
- *nl = '\0';
+ if (nl) {
+ *nl = '\0';
+ len = nl - s;
+ }
uni = UTF;
}
else if (PL_multi_close < 32) {
tmpbuf[1] = (char)toCTRL(PL_multi_close);
tmpbuf[2] = '\0';
s = tmpbuf;
+ len = 2;
}
else {
if (LIKELY(PL_multi_close < 256)) {
*tmpbuf = (char)PL_multi_close;
tmpbuf[1] = '\0';
+ len = 1;
}
else {
+ char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
+ *end = '\0';
+ len = end - tmpbuf;
uni = TRUE;
- *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
}
s = tmpbuf;
}
q = memchr(s, '"', len) ? '\'' : '"';
- sv = sv_2mortal(newSVpv(s,0));
+ sv = sv_2mortal(newSVpvn(s, len));
if (uni)
SvUTF8_on(sv);
- Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
- "%c anywhere before EOF",q,SVfARG(sv),q);
+ Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
+ " anywhere before EOF", q, SVfARG(sv), q);
}
#include "feature.h"
SvCUR(PL_parser->linestr) + len+highhalf);
PL_parser->bufend += len+highhalf;
for (p = pv; p != e; p++) {
- U8 c = (U8)*p;
- if (! UTF8_IS_INVARIANT(c)) {
- *bufptr++ = UTF8_TWO_BYTE_HI(c);
- *bufptr++ = UTF8_TWO_BYTE_LO(c);
- } else {
- *bufptr++ = (char)c;
- }
+ append_utf8_from_native_byte(*p, (U8 **) &bufptr);
}
}
} else {
}
else if (GvAV(cfgv)) {
AV * const av = GvAV(cfgv);
- const I32 start = CopLINE(PL_curcop)+1;
- I32 items = AvFILLp(av) - start;
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
if (items > 0) {
AV * const av2 = GvAVn(gv2);
SV **svp = AvARRAY(av) + start;
- I32 l = (I32)line_num+1;
- while (items--)
- av_store(av2, l++, SvREFCNT_inc(*svp++));
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
}
}
}
s = PL_last_uni;
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
s += UTF ? UTF8SKIP(s) : 1;
- if (memchr(s, '(', PL_bufptr - s))
+ if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
* S_postderef
*
* This subroutine handles postfix deref syntax after the arrow has already
- * been emitted. @* $* etc. are emitted as two separate token right here.
+ * been emitted. @* $* etc. are emitted as two separate tokens right here.
* @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
* only the first, leaving yylex to find the next.
*/
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
SV * const sv = newSVpvn_utf8(start, len,
- !IN_BYTES
- && UTF
- && !is_utf8_invariant_string((const U8*)start, len)
- && is_utf8_string((const U8*)start, len));
+ ! IN_BYTES
+ && UTF
+ && len != 0
+ && is_utf8_non_invariant_string((const U8*)start, len));
return sv;
}
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
return sv;
}
PL_parser->lex_super_state = PL_lex_state;
PL_parser->lex_sub_inwhat = (U16)op_type;
PL_parser->lex_sub_op = PL_lex_op;
+ PL_parser->sub_no_recover = FALSE;
+ PL_parser->sub_error_count = PL_error_count;
PL_lex_state = LEX_INTERPPUSH;
PL_expect = XTERM;
else {
const line_t l = CopLINE(PL_curcop);
LEAVE;
+ if (PL_parser->sub_error_count != PL_error_count) {
+ if (PL_parser->sub_no_recover) {
+ yyquit();
+ NOT_REACHED;
+ }
+ }
if (PL_multi_close == '<')
PL_parser->herelines += l - PL_multi_end;
PL_bufend = SvPVX(PL_linestr);
}
STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+ /* This justs wraps get_and_check_backslash_N_name() to output any error
+ * message it returns. */
+
+ const char * error_msg = NULL;
+ SV * result;
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0) {
+ return NULL;
+ }
+
+ result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+ if (error_msg) {
+ yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+ }
+
+ return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+ const char* const e,
+ const bool is_utf8,
+ const char ** error_msg)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
- * an SV* containing it; NULL if no valid one found */
-
- SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+ * an SV* containing it; NULL if no valid one found.
+ *
+ * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+ * doesn't have to be. */
+ SV* res;
HV * table;
SV **cvp;
SV *cv;
SV *rv;
HV *stash;
const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+ dVAR;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ assert(e >= s);
+ assert(s > (char *) 3);
+
+ res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
if (!SvCUR(res)) {
SvREFCNT_dec_NN(res);
/* diag_listed_as: Unknown charname '%s' */
- yyerror("Unknown charname ''");
+ *error_msg = Perl_form(aTHX_ "Unknown charname ''");
return NULL;
}
res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
/* include the <}> */
- e - backslash_ptr + 1);
+ e - backslash_ptr + 1, error_msg);
if (! SvPOK(res)) {
SvREFCNT_dec_NN(res);
return NULL;
* characters that begin a character name alias are alphabetic, otherwise
* would have to create a isCHARNAME_BEGIN macro */
- if (! UTF) {
+ if (! is_utf8) {
if (! isALPHAU(*s)) {
goto bad_charname;
}
s += 2;
}
else {
- if (! PL_utf8_charname_begin) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_begin = _core_swash_init("utf8",
- "_Perl_Charname_Begin",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_begin,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
s += 2;
}
else {
- if (! PL_utf8_charname_continue) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_continue = _core_swash_init("utf8",
- "_Perl_Charname_Continue",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_continue,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
/* diag_listed_as: charnames alias definitions may not contain
trailing white-space; marked by <-- HERE in %s
*/
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain trailing "
"white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
- if (SvUTF8(res)) { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed charname value */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
0 /* 0 means don't die */ );
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
(int) (e - backslash_ptr + 1), backslash_ptr,
- (int) ((char *) first_bad_char_loc - str), str
- ),
- SVf_UTF8);
+ (int) ((char *) first_bad_char_loc - str), str);
return NULL;
}
}
* that this print won't run off the end of the string */
/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
in \N{%s} */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
/* diag_listed_as: charnames alias definitions may not contain a
sequence of multiple spaces; marked by <-- HERE
in %s */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain a sequence of "
"multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
should we have to convert to
UTF-8) */
SV *res; /* result from charnames */
- STRLEN offset_to_max; /* The offset in the output to where the range
- high-end character is temporarily placed */
+ STRLEN offset_to_max = 0; /* The offset in the output to where the range
+ high-end character is temporarily placed */
/* Does something require special handling in tr/// ? This avoids extra
* work in a less likely case. As such, khw didn't feel it was worth
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
- if ((res = get_and_check_backslash_N_name(s, e)))
+ if ((res = get_and_check_backslash_N_name_wrapper(s, e)))
{
STRLEN len;
const char *str = SvPV_const(res, len);
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen);
+ type, typelen, NULL);
}
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
return TRUE;
if (*s != '{' && *s != '[')
return FALSE;
+ PL_parser->sub_no_recover = TRUE;
if (!PL_lex_inpat)
return TRUE;
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
{
filter_t funcp;
+ I32 ret;
SV *datasv = NULL;
/* This API is bad. It should have been using unsigned int for maxlen.
Not sure if we want to change the API, but if not we should sanity
/* Call function. The function is expected to */
/* call "FILTER_READ(idx+1, buf_sv)" first. */
/* Return: <0:error, =0:eof, >0:not eof */
- return (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ ENTER;
+ save_scalar(PL_errgv);
+ ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ LEAVE;
+ return ret;
}
STATIC char *
return yylex();
case LEX_FORMLINE:
+ if (PL_parser->sub_error_count != PL_error_count) {
+ /* There was an error parsing a formline, which tends to
+ mess up the parser.
+ Unlike interpolated sub-parsing, we can't treat any of
+ these as recoverable, so no need to check sub_no_recover.
+ */
+ yyquit();
+ }
assert(PL_lex_formbrack);
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
- 0, cBOOL(UTF), FALSE);
+ 0, cBOOL(UTF), FALSE, FALSE);
*dest = '\0';
assert(PL_tokenbuf[1]); /* we have a variable name */
}
case XATTRTERM:
PL_expect = XTERMBLOCK;
grabattrs:
+ /* NB: as well as parsing normal attributes, we also end up
+ * here if there is something looking like attributes
+ * following a signature (which is illegal, but used to be
+ * legal in 5.20..5.26). If the latter, we still parse the
+ * attributes so that error messages(s) are less confusing,
+ * but ignore them (parser->sig_seen).
+ */
s = skipspace(s);
attrs = NULL;
while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+ bool sig = PL_parser->sig_seen;
I32 tmp;
SV *sv;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
the CVf_BUILTIN_ATTRS define in cv.h! */
if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
sv_free(sv);
- CvLVALUE_on(PL_compcv);
+ if (!sig)
+ CvLVALUE_on(PL_compcv);
}
else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
sv_free(sv);
- CvMETHOD_on(PL_compcv);
+ if (!sig)
+ CvMETHOD_on(PL_compcv);
}
else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
{
sv_free(sv);
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
- ":const is experimental"
- );
- CvANONCONST_on(PL_compcv);
- if (!CvANON(PL_compcv))
- yyerror(":const is not permitted on named "
- "subroutines");
+ if (!sig) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(PL_compcv);
+ if (!CvANON(PL_compcv))
+ yyerror(":const is not permitted on named "
+ "subroutines");
+ }
}
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
}
}
got_attrs:
+ if (PL_parser->sig_seen) {
+ /* see comment about about sig_seen and parser error
+ * handling */
+ if (attrs)
+ op_free(attrs);
+ Perl_croak(aTHX_ "Subroutine attributes must come "
+ "before the signature");
+ }
if (attrs) {
NEXTVAL_NEXTTOKE.opval = attrs;
force_next(THING);
SAVEI32(PL_lex_formbrack);
PL_parser->form_lex_state = PL_lex_state;
PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_parser->sub_error_count = PL_error_count;
goto leftbracket;
}
}
}
if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
s += 3;
- TERM(YADAYADA);
+ OPERATOR(YADAYADA);
}
if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
char tmp = *s++;
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump) {
- Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
- "dump() better written as CORE::dump(). "
- "dump() will no longer be available "
- "in Perl 5.30");
+ Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
}
gv = NULL;
gvp = 0;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
bool safebw;
+ bool no_op_error = FALSE;
+ if (PL_expect == XOPERATOR) {
+ if (PL_bufptr == PL_linestart) {
+ CopLINE_dec(PL_curcop);
+ Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
+ CopLINE_inc(PL_curcop);
+ }
+ else
+ /* We want to call no_op with s pointing after the
+ bareword, so defer it. But we want it to come
+ before the Bad name croak. */
+ no_op_error = TRUE;
+ }
/* Get the rest if it looks like a package qualifier */
STRLEN morelen;
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
+ if (no_op_error) {
+ no_op("Bareword",s);
+ no_op_error = FALSE;
+ }
if (!morelen)
Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
UTF8fARG(UTF, len, PL_tokenbuf),
pkgname = 1;
}
- if (PL_expect == XOPERATOR) {
- if (PL_bufptr == PL_linestart) {
- CopLINE_dec(PL_curcop);
- Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
- CopLINE_inc(PL_curcop);
- }
- else
+ if (no_op_error)
no_op("Bareword",s);
- }
/* See if the name is "Foo::",
in which case Foo is a bareword
if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
{
/* PL_warn_reserved is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
PL_tokenbuf);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
}
}
if (!GvIO(gv))
GvIOp(gv) = newIO();
IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
- {
- const int fd = PerlIO_fileno(PL_rsfp);
- if (fd >= 3) {
- fcntl(fd,F_SETFD, FD_CLOEXEC);
- }
- }
-#endif
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
if ((PerlIO*)PL_rsfp == PerlIO_stdin())
really_sub:
{
char * const tmpbuf = PL_tokenbuf + 1;
- expectation attrful;
bool have_name, have_proto;
const int key = tmp;
SV *format_name = NULL;
+ bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
SSize_t off = s-SvPVX(PL_linestr);
s = skipspace(s);
d = SvPVX(PL_linestr)+off;
+ SAVEBOOL(PL_parser->sig_seen);
+ PL_parser->sig_seen = FALSE;
+
if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| *s == '\''
|| (*s == ':' && s[1] == ':'))
{
- PL_expect = XBLOCK;
- attrful = XATTRBLOCK;
+ PL_expect = XATTRBLOCK;
d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
&len);
if (key == KEY_format)
Perl_croak(aTHX_
"Missing name in \"%s\"", PL_bufptr);
}
- PL_expect = XTERMBLOCK;
- attrful = XATTRTERM;
+ PL_expect = XATTRTERM;
sv_setpvs(PL_subname,"?");
have_name = FALSE;
}
}
/* Look for a prototype */
- if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
+ if (*s == '(' && !is_sigsub) {
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
+ COPLINE_SET_FROM_MULTI_END;
(void)validate_proto(PL_subname, PL_lex_stuff,
ckWARN(WARN_ILLEGALPROTO), 0);
have_proto = TRUE;
else
have_proto = FALSE;
- if (*s == ':' && s[1] != ':')
- PL_expect = attrful;
- else if ((*s != '{' && *s != '(') && key != KEY_format) {
+ if ( !(*s == ':' && s[1] != ':')
+ && (*s != '{' && *s != '(') && key != KEY_format)
+ {
assert(key == KEY_sub || key == KEY_AUTOLOAD ||
key == KEY_DESTROY || key == KEY_BEGIN ||
key == KEY_UNITCHECK || key == KEY_CHECK ||
sv_setpvs(PL_subname, "__ANON__");
else
sv_setpvs(PL_subname, "__ANON__::__ANON__");
- TOKEN(ANONSUB);
+ if (is_sigsub)
+ TOKEN(ANON_SIGSUB);
+ else
+ TOKEN(ANONSUB);
}
force_ident_maybe_lex('&');
- TOKEN(SUB);
+ if (is_sigsub)
+ TOKEN(SIGSUB);
+ else
+ TOKEN(SUB);
}
case KEY_system:
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Pending identifier '%s'\n", PL_tokenbuf); });
+ assert(tokenbuf_len >= 2);
/* if we're in a my(), we can't allow dynamics here.
$foo'bar has already been turned into $foo::bar, so
if (has_colon) {
/* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
PL_in_my == KEY_my ? "my" : "state",
*PL_tokenbuf == '&' ? "subroutin" : "variabl",
PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
if (PL_in_my == KEY_sigvar) {
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
+ sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
&& PL_lex_state != LEX_NORMAL
&& !PL_lex_brackets)
{
- GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* build ops for a bareword */
pl_yylval.opval = newSVOP(OP_CONST, 0,
newSVpvn_flags(PL_tokenbuf + 1,
- tokenbuf_len - 1,
+ tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
- gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+ gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
(PL_in_eval ? GV_ADDMULTI : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
- <type> is assumed to be well formed UTF-8 */
+ <type> is assumed to be well formed UTF-8.
+
+ If error_msg is not NULL, *error_msg will be set to any error encountered.
+ Otherwise yyerror() will be used to output it */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen)
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
+ const char ** error_msg)
{
dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
if (*key == 'c') { assert (strEQ(key, "charnames")); }
assert(type || s);
- /* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && *key == 'c')
- {
- SvREFCNT_dec_NN(sv);
- return &PL_sv_undef;
- }
-
sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
(type ? type: s), why1, why2, why3);
}
}
- yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ if (error_msg) {
+ *error_msg = msg;
+ }
+ else {
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ }
return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
- bool is_utf8, bool check_dollar)
+ bool is_utf8, bool check_dollar, bool tick_warn)
{
+ int saw_tick = 0;
+ const char *olds = *s;
PERL_ARGS_ASSERT_PARSE_IDENT;
while (*s < PL_bufend) {
*(*d)++ = ':';
*(*d)++ = ':';
(*s)++;
+ saw_tick++;
}
else if (allow_package && **s == ':' && (*s)[1] == ':'
/* Disallow things like Foo::$bar. For the curious, this is
else
break;
}
+ if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
+ && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
+ char *d;
+ char *d2;
+ Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+ d2 = d;
+ SAVEFREEPV(d);
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Old package separator used in string");
+ if (olds[-1] == '#')
+ *d2++ = olds[-2];
+ *d2++ = olds[-1];
+ while (olds < *s) {
+ if (*olds == '\'') {
+ *d2++ = '\\';
+ *d2++ = *olds++;
+ }
+ else
+ *d2++ = *olds++;
+ }
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+ UTF8fARG(is_utf8, d2-d, d));
+ }
return;
}
PERL_ARGS_ASSERT_SCAN_WORD;
- parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
+ parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
*d = '\0';
*slp = d - dest;
return s;
}
}
else { /* See if it is a "normal" identifier */
- parse_ident(&s, &d, e, 1, is_utf8, FALSE);
+ parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
}
*d = '\0';
d = dest;
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
d += is_utf8 ? UTF8SKIP(d) : 1;
- parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+ parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
*d = '\0';
}
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
CopLINE_set(PL_curcop, orig_copline);
PL_parser->herelines = herelines;
*dest = '\0';
+ PL_parser->sub_no_recover = TRUE;
}
}
else if ( PL_lex_state == LEX_INTERPNORMAL
* the NVX field indicates how many src code lines the replacement
* spreads over */
sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
- ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
+ ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
cBOOL(es);
}
/* No whitespace or all! */
if (backup == s || *backup == '\n') {
- Newxz(indent, indent_len + 1, char);
+ Newx(indent, indent_len + 1, char);
memcpy(indent, backup + 1, indent_len);
+ indent[indent_len] = 0;
s--; /* before our delimiter */
PL_parser->herelines--; /* this line doesn't count */
break;
/* All whitespace or none! */
if (backup == found || SPACE_OR_TAB(*backup)) {
- Newxz(indent, indent_len + 1, char);
+ Newx(indent, indent_len + 1, char);
memcpy(indent, backup, indent_len);
+ indent[indent_len] = 0;
SvREFCNT_dec(PL_linestr);
PL_linestr = linestr_save;
PL_linestart = SvPVX(linestr_save);
while (ss < se) {
/* newline only? Copy and move on */
if (*ss == '\n') {
- sv_catpv(newstr,"\n");
+ sv_catpvs(newstr,"\n");
ss++;
linecount++;
I32 brackets = 1; /* bracket nesting level */
bool has_utf8 = FALSE; /* is there any utf8 content? */
IV termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+ U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
STRLEN termlen; /* length of terminating string */
line_t herelines;
const char * opening_delims = "([{<";
const char * closing_delims = ")]}>";
+ /* The only non-UTF character that isn't a stand alone grapheme is
+ * white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
" non-standalone grapheme for a delimiter"
- " will be a fatal error starting in Perl"
- " 5.30";
- /* The only non-UTF character that isn't a stand alone grapheme is
- * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */
- bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+ " is not allowed";
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
}
else {
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
- if (check_grapheme) {
- if ( UNLIKELY(UNICODE_IS_SUPER(termcode))
- || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
- {
- /* These are considered graphemes, and since the ending
- * delimiter will be the same, we don't have to check the other
- * end */
- check_grapheme = FALSE;
- }
- else if (UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
- /* Don't have to check the other end, as have already warned at
- * this one */
- check_grapheme = FALSE;
- }
+ if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ yyerror(non_grapheme_msg);
}
Copy(s, termstr, termlen, U8);
if ( s + termlen <= PL_bufend
&& memEQ(s + 1, (char*)termstr + 1, termlen - 1))
{
- if ( check_grapheme
+ if ( UTF
&& UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
+ (U8 *) s,
+ (U8 *) PL_bufend,
termcode)))
{
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "%s", non_grapheme_msg);
+ yyerror(non_grapheme_msg);
}
break;
}
digit:
just_zero = FALSE;
if (!overflowed) {
+ assert(shift >= 0);
x = u << shift; /* make room for the digit */
total_bits += shift;
NV nv_mult = 1.0;
#endif
bool accumulate = TRUE;
- for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
+ U8 b;
+ int lim = 1 << shift;
+ for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
+ *h == '_'); h++) {
if (isXDIGIT(*h)) {
- U8 b = XDIGIT_VALUE(*h);
significant_bits += shift;
#ifdef HEXFP_UQUAD
if (accumulate) {
if (significant_bits < NV_MANT_DIG) {
/* We are in the long "run" of xdigits,
* accumulate the full four bits. */
+ assert(shift >= 0);
hexfp_uquad <<= shift;
hexfp_uquad |= b;
hexfp_frac_bits += shift;
- } else {
+ } else if (significant_bits - shift < NV_MANT_DIG) {
/* We are at a hexdigit either at,
* or straddling, the edge of mantissa.
* We will try grabbing as many as
significant_bits - NV_MANT_DIG;
if (tail <= 0)
tail += shift;
+ assert(tail >= 0);
hexfp_uquad <<= tail;
+ assert((shift - tail) >= 0);
hexfp_uquad |= b >> (shift - tail);
hexfp_frac_bits += tail;
}
#else /* HEXFP_NV */
if (accumulate) {
- nv_mult /= 16.0;
+ nv_mult /= nvshift[shift];
if (nv_mult > 0.0)
hexfp_nv += b * nv_mult;
else
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0);
+ sv, NULL, NULL, 0, NULL);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
}
break;
floatit = TRUE;
}
if (floatit) {
- STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
/* terminate the string */
*d = '\0';
if (UNLIKELY(hexfp)) {
} else {
nv = Atof(PL_tokenbuf);
}
- RESTORE_LC_NUMERIC_UNDERLYING();
sv = newSVnv(nv);
}
const char *const key = floatit ? "float" : "integer";
const STRLEN keylen = floatit ? 5 : 7;
sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0);
+ key, keylen, sv, NULL, NULL, 0, NULL);
}
break;
return oldsavestack_ix;
}
+
+/* Do extra initialisation of a CV (typically one just created by
+ * start_subparse()) if that CV is for a named sub
+ */
+
+void
+Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
+{
+ PERL_ARGS_ASSERT_INIT_NAMED_CV;
+
+ if (nameop->op_type == OP_CONST) {
+ const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
+ if ( strEQ(name, "BEGIN")
+ || strEQ(name, "END")
+ || strEQ(name, "INIT")
+ || strEQ(name, "CHECK")
+ || strEQ(name, "UNITCHECK")
+ )
+ CvSPECIAL_on(cv);
+ }
+ else
+ /* State subs inside anonymous subs need to be
+ clonable themselves. */
+ if ( CvANON(CvOUTSIDE(cv))
+ || CvCLONE(CvOUTSIDE(cv))
+ || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
+ CvOUTSIDE(cv)
+ ))[nameop->op_targ])
+ )
+ CvCLONE_on(cv);
+}
+
+
static int
S_yywarn(pTHX_ const char *const s, U32 flags)
{
}
}
+ /* 'chars' isn't quite the right name, as code points above 0xFFFF
+ * require 4 bytes per char */
chars = SvCUR(utf16_buffer) >> 1;
have = SvCUR(utf8_buffer);
- SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ /* Assume the worst case size as noted by the functions: twice the
+ * number of input bytes */
+ SvGROW(utf8_buffer, have + chars * 4 + 1);
if (reverse) {
end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
return KEYWORD_PLUGIN_DECLINE;
}
+/*
+=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
+
+Puts a C function into the chain of keyword plugins. This is the
+preferred way to manipulate the L</PL_keyword_plugin> variable.
+C<new_plugin> is a pointer to the C function that is to be added to the
+keyword plugin chain, and C<old_plugin_p> points to the storage location
+where a pointer to the next function in the chain will be stored. The
+value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
+while the value previously stored there is written to C<*old_plugin_p>.
+
+L</PL_keyword_plugin> is global to an entire process, and a module wishing
+to hook keyword parsing may find itself invoked more than once per
+process, typically in different threads. To handle that situation, this
+function is idempotent. The location C<*old_plugin_p> must initially
+(once per process) contain a null pointer. A C variable of static
+duration (declared at file scope, typically also marked C<static> to give
+it internal linkage) will be implicitly initialised appropriately, if it
+does not have an explicit initialiser. This function will only actually
+modify the plugin chain if it finds C<*old_plugin_p> to be null. This
+function is also thread safe on the small scale. It uses appropriate
+locking to avoid race conditions in accessing L</PL_keyword_plugin>.
+
+When this function is called, the function referenced by C<new_plugin>
+must be ready to be called, except for C<*old_plugin_p> being unfilled.
+In a threading situation, C<new_plugin> may be called immediately, even
+before this function has returned. C<*old_plugin_p> will always be
+appropriately set before C<new_plugin> is called. If C<new_plugin>
+decides not to do anything special with the identifier that it is given
+(which is the usual case for most calls to a keyword plugin), it must
+chain the plugin function referenced by C<*old_plugin_p>.
+
+Taken all together, XS code to install a keyword plugin should typically
+look something like this:
+
+ static Perl_keyword_plugin_t next_keyword_plugin;
+ static OP *my_keyword_plugin(pTHX_
+ char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
+ {
+ if (memEQs(keyword_ptr, keyword_len,
+ "my_new_keyword")) {
+ ...
+ } else {
+ return next_keyword_plugin(aTHX_
+ keyword_ptr, keyword_len, op_ptr);
+ }
+ }
+ BOOT:
+ wrap_keyword_plugin(my_keyword_plugin,
+ &next_keyword_plugin);
+
+Direct access to L</PL_keyword_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_keyword_plugin(pTHX_
+ Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
+{
+ dVAR;
+
+ PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
+ if (*old_plugin_p) return;
+ KEYWORD_PLUGIN_MUTEX_LOCK;
+ if (!*old_plugin_p) {
+ *old_plugin_p = PL_keyword_plugin;
+ PL_keyword_plugin = new_plugin;
+ }
+ KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
static void
S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)