#define PERL_IN_TOKE_C
#include "perl.h"
#include "dquote_inline.h"
+#include "invlist_inline.h"
-#define new_constant(a,b,c,d,e,f,g) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
{ ANDAND, TOKENTYPE_NONE, "ANDAND" },
{ ANDOP, TOKENTYPE_NONE, "ANDOP" },
{ ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
+ { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
{ ARROW, TOKENTYPE_NONE, "ARROW" },
{ ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
{ BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
{ GIVEN, TOKENTYPE_IVAL, "GIVEN" },
{ HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
{ IF, TOKENTYPE_IVAL, "IF" },
- { LABEL, TOKENTYPE_PVAL, "LABEL" },
+ { LABEL, TOKENTYPE_OPVAL, "LABEL" },
{ LOCAL, TOKENTYPE_IVAL, "LOCAL" },
{ LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
{ LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
{ RELOP, TOKENTYPE_OPNUM, "RELOP" },
{ REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
{ SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
+ { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
{ SUB, TOKENTYPE_NONE, "SUB" },
{ THING, TOKENTYPE_OPVAL, "THING" },
{ UMINUS, TOKENTYPE_NONE, "UMINUS" },
PERL_ARGS_ASSERT_PRINTBUF;
- GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
SvREFCNT_dec(tmp);
}
*/
STATIC void
-S_missingterm(pTHX_ char *s)
+S_missingterm(pTHX_ char *s, STRLEN len)
{
char tmpbuf[UTF8_MAXBYTES + 1];
char q;
bool uni = FALSE;
SV *sv;
if (s) {
- char * const nl = strrchr(s,'\n');
- if (nl)
- *nl = '\0';
+ char * const nl = (char *) my_memrchr(s, '\n', len);
+ if (nl) {
+ *nl = '\0';
+ len = nl - s;
+ }
uni = UTF;
}
else if (PL_multi_close < 32) {
tmpbuf[1] = (char)toCTRL(PL_multi_close);
tmpbuf[2] = '\0';
s = tmpbuf;
+ len = 2;
}
else {
if (LIKELY(PL_multi_close < 256)) {
*tmpbuf = (char)PL_multi_close;
tmpbuf[1] = '\0';
+ len = 1;
}
else {
+ char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
+ *end = '\0';
+ len = end - tmpbuf;
uni = TRUE;
- *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
}
s = tmpbuf;
}
- q = strchr(s,'"') ? '\'' : '"';
- sv = sv_2mortal(newSVpv(s,0));
+ q = memchr(s, '"', len) ? '\'' : '"';
+ sv = sv_2mortal(newSVpvn(s, len));
if (uni)
SvUTF8_on(sv);
- Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
- "%c anywhere before EOF",q,SVfARG(sv),q);
+ Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
+ " anywhere before EOF", q, SVfARG(sv), q);
}
#include "feature.h"
if (flags & LEX_STUFF_UTF8) {
goto plain_copy;
} else {
- STRLEN highhalf = 0; /* Count of variants */
- const char *p, *e = pv+len;
- for (p = pv; p != e; p++) {
- if (! UTF8_IS_INVARIANT(*p)) {
- highhalf++;
- }
- }
+ STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+ (U8 *) pv + len);
+ const char *p, *e = pv+len;;
if (!highhalf)
goto plain_copy;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
SvCUR(PL_parser->linestr) + len+highhalf);
PL_parser->bufend += len+highhalf;
for (p = pv; p != e; p++) {
- U8 c = (U8)*p;
- if (! UTF8_IS_INVARIANT(c)) {
- *bufptr++ = UTF8_TWO_BYTE_HI(c);
- *bufptr++ = UTF8_TWO_BYTE_LO(c);
- } else {
- *bufptr++ = (char)c;
- }
+ append_utf8_from_native_byte(*p, (U8 **) &bufptr);
}
}
} else {
if (s == bufend)
need_incline = 1;
else
- incline(s);
+ incline(s, bufend);
}
} else if (isSPACE(c)) {
s++;
if (!got_more)
break;
if (can_incline && need_incline && PL_parser->rsfp) {
- incline(s);
+ incline(s, bufend);
need_incline = 0;
}
} else if (!c) {
*/
STATIC void
-S_incline(pTHX_ const char *s)
+S_incline(pTHX_ const char *s, const char *end)
{
const char *t;
const char *n;
PERL_ARGS_ASSERT_INCLINE;
+ assert(end >= s);
+
COPLINE_INC_WITH_HERELINES;
if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
&& s+1 == PL_bufend && *s == ';') {
return;
while (SPACE_OR_TAB(*s))
s++;
- if (strBEGINs(s, "line"))
- s += 4;
+ if (memBEGINs(s, (STRLEN) (end - s), "line"))
+ s += sizeof("line") - 1;
else
return;
if (SPACE_OR_TAB(*s))
return;
while (SPACE_OR_TAB(*s))
s++;
- if (*s == '"' && (t = strchr(s+1, '"'))) {
+ if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
s++;
e = t + 1;
}
}
else if (GvAV(cfgv)) {
AV * const av = GvAV(cfgv);
- const I32 start = CopLINE(PL_curcop)+1;
- I32 items = AvFILLp(av) - start;
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
if (items > 0) {
AV * const av2 = GvAVn(gv2);
SV **svp = AvARRAY(av) + start;
- I32 l = (I32)line_num+1;
- while (items--)
- av_store(av2, l++, SvREFCNT_inc(*svp++));
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
}
}
}
S_check_uni(pTHX)
{
const char *s;
- const char *t;
if (PL_oldoldbufptr != PL_last_uni)
return;
s = PL_last_uni;
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
s += UTF ? UTF8SKIP(s) : 1;
- if ((t = strchr(s, '(')) && t < PL_bufptr)
+ if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
* S_postderef
*
* This subroutine handles postfix deref syntax after the arrow has already
- * been emitted. @* $* etc. are emitted as two separate token right here.
+ * been emitted. @* $* etc. are emitted as two separate tokens right here.
* @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
* only the first, leaving yylex to find the next.
*/
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
SV * const sv = newSVpvn_utf8(start, len,
- !IN_BYTES
- && UTF
- && !is_utf8_invariant_string((const U8*)start, len)
- && is_utf8_string((const U8*)start, len));
+ ! IN_BYTES
+ && UTF
+ && len != 0
+ && is_utf8_non_invariant_string((const U8*)start, len));
return sv;
}
if (check_keyword) {
char *s2 = PL_tokenbuf;
STRLEN len2 = len;
- if (allow_pack && len > 6 && strBEGINs(s2, "CORE::"))
- s2 += 6, len2 -= 6;
+ if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
+ s2 += sizeof("CORE::") - 1;
+ len2 -= sizeof("CORE::") - 1;
+ }
if (keyword(s2, len2, 0))
return start;
}
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
return sv;
}
PL_parser->lex_super_state = PL_lex_state;
PL_parser->lex_sub_inwhat = (U16)op_type;
PL_parser->lex_sub_op = PL_lex_op;
+ PL_parser->sub_no_recover = FALSE;
+ PL_parser->sub_error_count = PL_error_count;
PL_lex_state = LEX_INTERPPUSH;
PL_expect = XTERM;
else {
const line_t l = CopLINE(PL_curcop);
LEAVE;
+ if (PL_parser->sub_error_count != PL_error_count) {
+ if (PL_parser->sub_no_recover) {
+ yyquit();
+ NOT_REACHED;
+ }
+ }
if (PL_multi_close == '<')
PL_parser->herelines += l - PL_multi_end;
PL_bufend = SvPVX(PL_linestr);
}
STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+ /* This justs wraps get_and_check_backslash_N_name() to output any error
+ * message it returns. */
+
+ const char * error_msg = NULL;
+ SV * result;
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0) {
+ return NULL;
+ }
+
+ result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+ if (error_msg) {
+ yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+ }
+
+ return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+ const char* const e,
+ const bool is_utf8,
+ const char ** error_msg)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
- * an SV* containing it; NULL if no valid one found */
-
- SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+ * an SV* containing it; NULL if no valid one found.
+ *
+ * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+ * doesn't have to be. */
+ SV* res;
HV * table;
SV **cvp;
SV *cv;
SV *rv;
HV *stash;
const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+ dVAR;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ assert(e >= s);
+ assert(s > (char *) 3);
+
+ res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
if (!SvCUR(res)) {
SvREFCNT_dec_NN(res);
/* diag_listed_as: Unknown charname '%s' */
- yyerror("Unknown charname ''");
+ *error_msg = Perl_form(aTHX_ "Unknown charname ''");
return NULL;
}
res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
/* include the <}> */
- e - backslash_ptr + 1);
+ e - backslash_ptr + 1, error_msg);
if (! SvPOK(res)) {
SvREFCNT_dec_NN(res);
return NULL;
SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
{
const char * const name = HvNAME(stash);
- if (HvNAMELEN(stash) == sizeof("_charnames")-1
- && strEQ(name, "_charnames")) {
+ if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
return res;
}
}
* characters that begin a character name alias are alphabetic, otherwise
* would have to create a isCHARNAME_BEGIN macro */
- if (! UTF) {
+ if (! is_utf8) {
if (! isALPHAU(*s)) {
goto bad_charname;
}
s += 2;
}
else {
- if (! PL_utf8_charname_begin) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_begin = _core_swash_init("utf8",
- "_Perl_Charname_Begin",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_begin,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
s += 2;
}
else {
- if (! PL_utf8_charname_continue) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_continue = _core_swash_init("utf8",
- "_Perl_Charname_Continue",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_continue,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
/* diag_listed_as: charnames alias definitions may not contain
trailing white-space; marked by <-- HERE in %s
*/
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain trailing "
"white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
- if (SvUTF8(res)) { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed charname value */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
0 /* 0 means don't die */ );
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
(int) (e - backslash_ptr + 1), backslash_ptr,
- (int) ((char *) first_bad_char_loc - str), str
- ),
- SVf_UTF8);
+ (int) ((char *) first_bad_char_loc - str), str);
return NULL;
}
}
* that this print won't run off the end of the string */
/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
in \N{%s} */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
/* diag_listed_as: charnames alias definitions may not contain a
sequence of multiple spaces; marked by <-- HERE
in %s */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain a sequence of "
"multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
- bool has_utf8 = FALSE; /* Output constant is UTF8 */
- bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
+ bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
+ bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
example when it is entirely composed
should we have to convert to
UTF-8) */
SV *res; /* result from charnames */
- STRLEN offset_to_max; /* The offset in the output to where the range
- high-end character is temporarily placed */
+ STRLEN offset_to_max = 0; /* The offset in the output to where the range
+ high-end character is temporarily placed */
/* Does something require special handling in tr/// ? This avoids extra
* work in a less likely case. As such, khw didn't feel it was worth
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
- has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
- this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ d_is_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+ s_is_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
/* Protect sv from errors and fatal warnings. */
* occurences in the constant, except those added by a
* backslash escape sequence, like \x{100}. Mostly, those
* set 'has_above_latin1' as appropriate */
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
* time through the loop */
offset_to_max = d - SvPVX_const(sv);
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
IV real_range_max = 0;
#endif
/* Get the code point values of the range ends. */
- if (has_utf8) {
+ if (d_is_utf8) {
/* We know the utf8 is valid, because we just constructed
* it ourselves in previous loop iterations */
min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
* get it out of the way now.) */
if (UNLIKELY(range_max == range_min)) {
d = max_ptr;
- if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+ if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
utf8_variant_count--;
}
goto range_done;
/* Here the range contains at least 3 code points */
- if (has_utf8) {
+ if (d_is_utf8) {
/* If everything in the transliteration is below 256, we
* can avoid special handling later. A translation table
* */
grow = (range_max - 1) - (range_min + 1) + 1;
- if (has_utf8) {
+ if (d_is_utf8) {
#ifdef EBCDIC
/* In some cases in EBCDIC, we haven't yet calculated a
* precise amount needed for the UTF-8 variants. Just
/* Recall that the min and max are now in Unicode terms, so
* we have to convert each character to its native
* equivalent */
- if (has_utf8) {
+ if (d_is_utf8) {
for (i = range_min; i <= range_max; i++) {
append_utf8_from_native_byte(
LATIN1_TO_NATIVE((U8) i),
/* Here, no conversions are necessary, which means that the
* first character in the range is already in 'd' and
* valid, so we can skip overwriting it */
- if (has_utf8) {
+ if (d_is_utf8) {
SSize_t i;
d += UTF8SKIP(d);
for (i = range_min + 1; i <= range_max; i++) {
{
const char* error;
- bool valid = grok_bslash_o(&s, &uv, &error,
+ bool valid = grok_bslash_o(&s, send,
+ &uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
TRUE, /* Output warnings for
{
const char* error;
- bool valid = grok_bslash_x(&s, &uv, &error,
+ bool valid = grok_bslash_x(&s, send,
+ &uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
TRUE, /* Output warnings for
*d++ = (char) uv;
}
else {
- if (!has_utf8 && uv > 255) {
+ if (!d_is_utf8 && uv > 255) {
/* Here, 'uv' won't fit unless we convert to UTF-8.
* If we've only seen invariants so far, all we have to
}
has_above_latin1 = TRUE;
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
}
- if (! has_utf8) {
+ if (! d_is_utf8) {
*d++ = (char)uv;
utf8_variant_count++;
}
s++;
/* If there is no matching '}', it is an error. */
- if (! (e = strchr(s, '}'))) {
+ if (! (e = (char *) memchr(s, '}', send - s))) {
if (! PL_lex_inpat) {
yyerror("Missing right brace on \\N{}");
} else {
* tr/// doesn't care about Unicode rules, so no need
* there to upgrade to UTF-8 for small enough code
* points */
- if (! has_utf8 && ( uv > 0xFF
+ if (! d_is_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
/* See Note on sizing above. */
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
has_above_latin1 = TRUE;
}
/* Add the (Unicode) code point to the output. */
- if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
+ if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
- if ((res = get_and_check_backslash_N_name(s, e)))
- {
+ if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
+ { /* Failed. We should die eventually, but for now use a NUL
+ to keep parsing */
+ *d++ = '\0';
+ }
+ else { /* Successfully evaluated the name */
STRLEN len;
const char *str = SvPV_const(res, len);
if (PL_lex_inpat) {
if (! len) { /* The name resolved to an empty string */
- Copy("\\N{}", d, 4, char);
- d += 4;
+ const char empty_N[] = "\\N{_}";
+ Copy(empty_N, d, sizeof(empty_N) - 1, char);
+ d += sizeof(empty_N) - 1;
}
else {
/* In order to not lose information for the regex
/* Upgrade destination to be utf8 if this new
* component is */
- if (! has_utf8 && SvUTF8(res)) {
+ if (! d_is_utf8 && SvUTF8(res)) {
/* See Note on sizing above. */
const STRLEN extra = len + (send - s) + 1;
extra);
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
/* See Note on sizing above. (NOTE: SvCUR() is not
if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
*d++ = *s++;
}
- else if (! this_utf8 && ! has_utf8) {
+ else if (! s_is_utf8 && ! d_is_utf8) {
/* If neither source nor output is UTF-8, is also a single byte,
* just copy it; but this byte counts should we later have to
* convert to UTF-8 */
*d++ = *s++;
utf8_variant_count++;
}
- else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
+ else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
const STRLEN len = UTF8SKIP(s);
/* We expect the source to have already been checked for
}
else { /* UTF8ness matters and doesn't match, need to convert */
STRLEN len = 1;
- const UV nextuv = (this_utf8)
+ const UV nextuv = (s_is_utf8)
? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
: (UV) ((U8) *s);
STRLEN need = UVCHR_SKIP(nextuv);
- if (!has_utf8) {
+ if (!d_is_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
need);
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
} else if (need > len) {
/* encoded value larger than old, may need extra space (NOTE:
* SvCUR() is not set correctly here). See Note on sizing
" >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (has_utf8) {
+ if (d_is_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
PL_parser->lex_sub_op->op_private |=
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen);
+ type, typelen, NULL);
}
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
STATIC int
-S_intuit_more(pTHX_ char *s)
+S_intuit_more(pTHX_ char *s, char *e)
{
PERL_ARGS_ASSERT_INTUIT_MORE;
return TRUE;
if (*s != '{' && *s != '[')
return FALSE;
+ PL_parser->sub_no_recover = TRUE;
if (!PL_lex_inpat)
return TRUE;
/* this is terrifying, and it works */
int weight;
char seen[256];
- const char * const send = strchr(s,']');
+ const char * const send = (char *) memchr(s, ']', e - s);
unsigned char un_char, last_un_char;
char tmpbuf[sizeof PL_tokenbuf * 4];
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
{
filter_t funcp;
+ I32 ret;
SV *datasv = NULL;
/* This API is bad. It should have been using unsigned int for maxlen.
Not sure if we want to change the API, but if not we should sanity
/* Call function. The function is expected to */
/* call "FILTER_READ(idx+1, buf_sv)" first. */
/* Return: <0:error, =0:eof, >0:not eof */
- return (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ ENTER;
+ save_scalar(PL_errgv);
+ ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
+ LEAVE;
+ return ret;
}
STATIC char *
PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
- if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
+ if (memEQs(pkgname, len, "__PACKAGE__"))
return PL_curstash;
if (len > 2
}
else {
I32 tmp;
- if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
+ if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
+ || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
+ {
tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
+ }
if ((*s == 'L' || *s == 'U' || *s == 'F')
&& (strpbrk(PL_lex_casestack, "LUF")))
{
return yylex();
case LEX_INTERPENDMAYBE:
- if (intuit_more(PL_bufptr)) {
+ if (intuit_more(PL_bufptr, PL_bufend)) {
PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
break;
}
return yylex();
case LEX_FORMLINE:
+ if (PL_parser->sub_error_count != PL_error_count) {
+ /* There was an error parsing a formline, which tends to
+ mess up the parser.
+ Unlike interpolated sub-parsing, we can't treat any of
+ these as recoverable, so no need to check sub_no_recover.
+ */
+ yyquit();
+ }
assert(PL_lex_formbrack);
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
- 0, cBOOL(UTF), FALSE);
+ 0, cBOOL(UTF), FALSE, FALSE);
*dest = '\0';
assert(PL_tokenbuf[1]); /* we have a variable name */
}
|| *PL_splitstr == '\''
|| *PL_splitstr == '"')
&& strchr(PL_splitstr + 1, *PL_splitstr))
+ {
+ /* strchr is ok, because -F pattern can't contain
+ * embeddded NULs */
Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
+ }
else {
/* "q\0${splitstr}\0" is legal perl. Yes, even NUL
bytes can be used as quoting characters. :-) */
}
if (PL_parser->in_pod) {
/* Incest with pod. */
- if (*s == '=' && strBEGINs(s, "=cut") && !isALPHA(s[4])) {
+ if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
+ && !isALPHA(s[4]))
+ {
SvPVCLEAR(PL_linestr);
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
}
}
if (PL_rsfp || PL_parser->filtered)
- incline(s);
+ incline(s, PL_bufend);
} while (PL_parser->in_pod);
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
&& !PL_rsfp && !PL_parser->filtered) {
/* handle eval qq[#line 1 "foo"\n ...] */
CopLINE_dec(PL_curcop);
- incline(s);
+ incline(s, PL_bufend);
}
d = s;
while (d < PL_bufend && *d != '\n')
&& PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
&& SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
else
- incline(s);
+ incline(s, PL_bufend);
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_lex_state = LEX_FORMLINE;
force_next(FORMRBRACK);
{
s++;
if (s < PL_bufend)
- incline(s);
+ incline(s, PL_bufend);
}
}
goto retry;
while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
- if (strBEGINs(s,"=>")) {
+ if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
OPERATOR('-'); /* unary minus */
if (!PL_tokenbuf[1]) {
PREREF('%');
}
- if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
+ if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
+ && intuit_more(s, PL_bufend)) {
if (*s == '[')
PL_tokenbuf[0] = '@';
}
case XATTRTERM:
PL_expect = XTERMBLOCK;
grabattrs:
+ /* NB: as well as parsing normal attributes, we also end up
+ * here if there is something looking like attributes
+ * following a signature (which is illegal, but used to be
+ * legal in 5.20..5.26). If the latter, we still parse the
+ * attributes so that error messages(s) are less confusing,
+ * but ignore them (parser->sig_seen).
+ */
s = skipspace(s);
attrs = NULL;
while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+ bool sig = PL_parser->sig_seen;
I32 tmp;
SV *sv;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
else {
/* NOTE: any CV attrs applied here need to be part of
the CVf_BUILTIN_ATTRS define in cv.h! */
- if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
+ if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
sv_free(sv);
- CvLVALUE_on(PL_compcv);
+ if (!sig)
+ CvLVALUE_on(PL_compcv);
}
- else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
+ else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
sv_free(sv);
- CvMETHOD_on(PL_compcv);
+ if (!sig)
+ CvMETHOD_on(PL_compcv);
}
- else if (!PL_in_my && len == 5
- && strnEQ(SvPVX(sv), "const", len))
+ else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
{
sv_free(sv);
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
- ":const is experimental"
- );
- CvANONCONST_on(PL_compcv);
- if (!CvANON(PL_compcv))
- yyerror(":const is not permitted on named "
- "subroutines");
+ if (!sig) {
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(PL_compcv);
+ if (!CvANON(PL_compcv))
+ yyerror(":const is not permitted on named "
+ "subroutines");
+ }
}
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
}
}
got_attrs:
+ if (PL_parser->sig_seen) {
+ /* see comment about about sig_seen and parser error
+ * handling */
+ if (attrs)
+ op_free(attrs);
+ Perl_croak(aTHX_ "Subroutine attributes must come "
+ "before the signature");
+ }
if (attrs) {
NEXTVAL_NEXTTOKE.opval = attrs;
force_next(THING);
PL_expect = XTERM;
break;
}
- if (strBEGINs(s, "sub")) {
+ if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
PL_bufptr = s;
d = s + 3;
d = skipspace(d);
{
const char tmp = *s++;
if (tmp == '=') {
- if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, "=====")) {
+ if ( (s == PL_linestart+2 || s[-3] == '\n')
+ && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
+ {
s = vcs_conflict_marker(s + 5);
goto retry;
}
d = PL_bufend;
while (s < d) {
if (*s++ == '\n') {
- incline(s);
- if (strBEGINs(s,"=cut")) {
- s = strchr(s,'\n');
+ incline(s, PL_bufend);
+ if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
+ {
+ s = (char *) memchr(s,'\n', d - s);
if (s)
s++;
else
s = d;
- incline(s);
+ incline(s, PL_bufend);
goto retry;
}
}
SAVEI32(PL_lex_formbrack);
PL_parser->form_lex_state = PL_lex_state;
PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_parser->sub_error_count = PL_error_count;
goto leftbracket;
}
}
OPERATOR('!');
case '<':
if (PL_expect != XOPERATOR) {
- if (s[1] != '<' && !strchr(s,'>'))
+ if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
check_uni();
if (s[1] == '<' && s[2] != '>') {
- if ((s == PL_linestart || s[-1] == '\n') && strBEGINs(s+2, "<<<<<")) {
+ if ( (s == PL_linestart || s[-1] == '\n')
+ && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
+ {
s = vcs_conflict_marker(s + 7);
goto retry;
}
{
char tmp = *s++;
if (tmp == '<') {
- if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, "<<<<<")) {
+ if ( (s == PL_linestart+2 || s[-3] == '\n')
+ && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
+ {
s = vcs_conflict_marker(s + 5);
goto retry;
}
{
const char tmp = *s++;
if (tmp == '>') {
- if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, ">>>>>")) {
+ if ( (s == PL_linestart+2 || s[-3] == '\n')
+ && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
+ {
s = vcs_conflict_marker(s + 5);
goto retry;
}
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
s = skipspace(s);
- if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
- && intuit_more(s)) {
+ if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
+ && intuit_more(s, PL_bufend)) {
if (*s == '[') {
PL_tokenbuf[0] = '@';
if (ckWARN(WARN_SYNTAX)) {
else if (*s == '{') {
char *t;
PL_tokenbuf[0] = '%';
- if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
- && (t = strchr(s, '}')) && (t = strchr(t, '=')))
+ if ( strEQ(PL_tokenbuf+1, "SIG")
+ && ckWARN(WARN_SYNTAX)
+ && (t = (char *) memchr(s, '}', PL_bufend - s))
+ && (t = (char *) memchr(t, '=', PL_bufend - t)))
{
char tmpbuf[sizeof PL_tokenbuf];
do {
}
if (PL_lex_state == LEX_NORMAL)
s = skipspace(s);
- if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
+ if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
+ && intuit_more(s, PL_bufend))
+ {
if (*s == '{')
PL_tokenbuf[0] = '%';
}
if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
s += 3;
- TERM(YADAYADA);
+ OPERATOR(YADAYADA);
}
if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
char tmp = *s++;
case '\'':
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
COPLINE_SET_FROM_MULTI_END;
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
no_op("String",s);
}
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
pl_yylval.ival = OP_CONST;
/* FIXME. I think that this can be const if char *d is replaced by
more localised variables. */
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
pl_yylval.ival = OP_BACKTICK;
TERM(sublex_start());
/* x::* is just a word, unless x is "CORE" */
if (!anydelim && *s == ':' && s[1] == ':') {
- if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
+ if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
goto just_a_word;
}
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
s = d + 1;
- pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
- pl_yylval.pval[len] = '\0';
- pl_yylval.pval[len+1] = UTF ? 1 : 0;
+ pl_yylval.opval =
+ newSVOP(OP_CONST, 0,
+ newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
CLINE;
TOKEN(LABEL);
}
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump) {
- Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
- "dump() better written as CORE::dump(). "
- "dump() will no longer be available "
- "in Perl 5.30");
+ Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
}
gv = NULL;
gvp = 0;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
bool safebw;
+ bool no_op_error = FALSE;
+ if (PL_expect == XOPERATOR) {
+ if (PL_bufptr == PL_linestart) {
+ CopLINE_dec(PL_curcop);
+ Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
+ CopLINE_inc(PL_curcop);
+ }
+ else
+ /* We want to call no_op with s pointing after the
+ bareword, so defer it. But we want it to come
+ before the Bad name croak. */
+ no_op_error = TRUE;
+ }
/* Get the rest if it looks like a package qualifier */
STRLEN morelen;
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
+ if (no_op_error) {
+ no_op("Bareword",s);
+ no_op_error = FALSE;
+ }
if (!morelen)
Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
UTF8fARG(UTF, len, PL_tokenbuf),
pkgname = 1;
}
- if (PL_expect == XOPERATOR) {
- if (PL_bufptr == PL_linestart) {
- CopLINE_dec(PL_curcop);
- Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
- CopLINE_inc(PL_curcop);
- }
- else
+ if (no_op_error)
no_op("Bareword",s);
- }
/* See if the name is "Foo::",
in which case Foo is a bareword
if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
{
/* PL_warn_reserved is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
PL_tokenbuf);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
}
}
if (!GvIO(gv))
GvIOp(gv) = newIO();
IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
- {
- const int fd = PerlIO_fileno(PL_rsfp);
- if (fd >= 3) {
- fcntl(fd,F_SETFD, FD_CLOEXEC);
- }
- }
-#endif
/* Mark this internal pseudo-handle as clean */
IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
if ((PerlIO*)PL_rsfp == PerlIO_stdin())
*PL_tokenbuf = '&';
d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
1, &len);
- if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
+ if (len && memNEs(PL_tokenbuf+1, len, "CORE")
&& !keyword(PL_tokenbuf + 1, len, 0)) {
SSize_t off = s-SvPVX(PL_linestr);
d = skipspace(d);
char *p = s;
SSize_t s_off = s - SvPVX(PL_linestr);
- if ((PL_bufend - p) >= 3
- && strBEGINs(p, "my") && isSPACE(*(p + 2)))
+ if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
+ && isSPACE(*(p + 2)))
{
- p += 2;
+ p += 2;
}
- else if ((PL_bufend - p) >= 4
- && strBEGINs(p, "our") && isSPACE(*(p + 3)))
- p += 3;
+ else if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
+ && isSPACE(*(p + 3)))
+ {
+ p += 3;
+ }
+
p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
s = skipspace(s);
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- if (len == 3 && strBEGINs(PL_tokenbuf, "sub"))
+ if (memEQs(PL_tokenbuf, len, "sub"))
goto really_sub;
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
case KEY_q:
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
COPLINE_SET_FROM_MULTI_END;
pl_yylval.ival = OP_CONST;
TERM(sublex_start());
OP *words = NULL;
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
COPLINE_SET_FROM_MULTI_END;
PL_expect = XOPERATOR;
if (SvCUR(PL_lex_stuff)) {
case KEY_qq:
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
pl_yylval.ival = OP_STRINGIFY;
if (SvIVX(PL_lex_stuff) == '\'')
SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
case KEY_qx:
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
- missingterm(NULL);
+ missingterm(NULL, 0);
pl_yylval.ival = OP_BACKTICK;
TERM(sublex_start());
really_sub:
{
char * const tmpbuf = PL_tokenbuf + 1;
- expectation attrful;
bool have_name, have_proto;
const int key = tmp;
SV *format_name = NULL;
+ bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
SSize_t off = s-SvPVX(PL_linestr);
s = skipspace(s);
d = SvPVX(PL_linestr)+off;
+ SAVEBOOL(PL_parser->sig_seen);
+ PL_parser->sig_seen = FALSE;
+
if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| *s == '\''
|| (*s == ':' && s[1] == ':'))
{
- PL_expect = XBLOCK;
- attrful = XATTRBLOCK;
+ PL_expect = XATTRBLOCK;
d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
&len);
if (key == KEY_format)
Perl_croak(aTHX_
"Missing name in \"%s\"", PL_bufptr);
}
- PL_expect = XTERMBLOCK;
- attrful = XATTRTERM;
+ PL_expect = XATTRTERM;
sv_setpvs(PL_subname,"?");
have_name = FALSE;
}
}
/* Look for a prototype */
- if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
+ if (*s == '(' && !is_sigsub) {
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
+ COPLINE_SET_FROM_MULTI_END;
(void)validate_proto(PL_subname, PL_lex_stuff,
ckWARN(WARN_ILLEGALPROTO), 0);
have_proto = TRUE;
else
have_proto = FALSE;
- if (*s == ':' && s[1] != ':')
- PL_expect = attrful;
- else if ((*s != '{' && *s != '(') && key != KEY_format) {
+ if ( !(*s == ':' && s[1] != ':')
+ && (*s != '{' && *s != '(') && key != KEY_format)
+ {
assert(key == KEY_sub || key == KEY_AUTOLOAD ||
key == KEY_DESTROY || key == KEY_BEGIN ||
key == KEY_UNITCHECK || key == KEY_CHECK ||
sv_setpvs(PL_subname, "__ANON__");
else
sv_setpvs(PL_subname, "__ANON__::__ANON__");
- TOKEN(ANONSUB);
+ if (is_sigsub)
+ TOKEN(ANON_SIGSUB);
+ else
+ TOKEN(ANONSUB);
}
force_ident_maybe_lex('&');
- TOKEN(SUB);
+ if (is_sigsub)
+ TOKEN(SIGSUB);
+ else
+ TOKEN(SUB);
}
case KEY_system:
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Pending identifier '%s'\n", PL_tokenbuf); });
+ assert(tokenbuf_len >= 2);
/* if we're in a my(), we can't allow dynamics here.
$foo'bar has already been turned into $foo::bar, so
if (has_colon) {
/* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
- GCC_DIAG_IGNORE(-Wformat-nonliteral);
+ GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
PL_in_my == KEY_my ? "my" : "state",
*PL_tokenbuf == '&' ? "subroutin" : "variabl",
PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
if (PL_in_my == KEY_sigvar) {
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
+ sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
&& PL_lex_state != LEX_NORMAL
&& !PL_lex_brackets)
{
- GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* build ops for a bareword */
pl_yylval.opval = newSVOP(OP_CONST, 0,
newSVpvn_flags(PL_tokenbuf + 1,
- tokenbuf_len - 1,
+ tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
- gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+ gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
(PL_in_eval ? GV_ADDMULTI : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
- <type> is assumed to be well formed UTF-8 */
+ <type> is assumed to be well formed UTF-8.
+
+ If error_msg is not NULL, *error_msg will be set to any error encountered.
+ Otherwise yyerror() will be used to output it */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen)
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
+ const char ** error_msg)
{
dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
if (*key == 'c') { assert (strEQ(key, "charnames")); }
assert(type || s);
- /* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && *key == 'c')
- {
- SvREFCNT_dec_NN(sv);
- return &PL_sv_undef;
- }
-
sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
(type ? type: s), why1, why2, why3);
}
}
- yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ if (error_msg) {
+ *error_msg = msg;
+ }
+ else {
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ }
return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
- bool is_utf8, bool check_dollar)
+ bool is_utf8, bool check_dollar, bool tick_warn)
{
+ int saw_tick = 0;
+ const char *olds = *s;
PERL_ARGS_ASSERT_PARSE_IDENT;
while (*s < PL_bufend) {
*(*d)++ = ':';
*(*d)++ = ':';
(*s)++;
+ saw_tick++;
}
else if (allow_package && **s == ':' && (*s)[1] == ':'
/* Disallow things like Foo::$bar. For the curious, this is
else
break;
}
+ if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
+ && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
+ char *d;
+ char *d2;
+ Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
+ d2 = d;
+ SAVEFREEPV(d);
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Old package separator used in string");
+ if (olds[-1] == '#')
+ *d2++ = olds[-2];
+ *d2++ = olds[-1];
+ while (olds < *s) {
+ if (*olds == '\'') {
+ *d2++ = '\\';
+ *d2++ = *olds++;
+ }
+ else
+ *d2++ = *olds++;
+ }
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Did you mean \"%" UTF8f "\" instead?)\n",
+ UTF8fARG(is_utf8, d2-d, d));
+ }
return;
}
PERL_ARGS_ASSERT_SCAN_WORD;
- parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
+ parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
*d = '\0';
*slp = d - dest;
return s;
}
}
else { /* See if it is a "normal" identifier */
- parse_ident(&s, &d, e, 1, is_utf8, FALSE);
+ parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
}
*d = '\0';
d = dest;
|| isDIGIT_A((U8)s[1])
|| s[1] == '$'
|| s[1] == '{'
- || strBEGINs(s+1,"::")) )
+ || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
{
/* Dereferencing a value in a scalar variable.
The alternatives are different syntaxes for a scalar variable.
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
d += is_utf8 ? UTF8SKIP(d) : 1;
- parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+ parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
*d = '\0';
}
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
CopLINE_set(PL_curcop, orig_copline);
PL_parser->herelines = herelines;
*dest = '\0';
+ PL_parser->sub_no_recover = TRUE;
}
}
- else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
+ else if ( PL_lex_state == LEX_INTERPNORMAL
+ && !PL_lex_brackets
+ && !intuit_more(s, PL_bufend))
PL_lex_state = LEX_INTERPEND;
return s;
}
* the NVX field indicates how many src code lines the replacement
* spreads over */
sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
- ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
+ ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
cBOOL(es);
}
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
*PL_tokenbuf = '\n';
peek = s;
+
if (*peek == '~') {
indented = TRUE;
peek++; s++;
}
+
while (SPACE_OR_TAB(*peek))
peek++;
+
if (*peek == '`' || *peek == '\'' || *peek =='"') {
s = peek;
term = *s++;
s++, term = '\'';
else
term = '"';
+
if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+
peek = s;
+
while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
peek += UTF ? UTF8SKIP(peek) : 1;
}
+
len = (peek - s >= e - d) ? (e - d) : (peek - s);
Copy(s, d, len, char);
s += len;
d += len;
}
+
if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
Perl_croak(aTHX_ "Delimiter for here document is too long");
+
*d++ = '\n';
*d = '\0';
len = d - PL_tokenbuf;
#ifndef PERL_STRICT_CR
- d = strchr(s, '\r');
+ d = (char *) memchr(s, '\r', PL_bufend - s);
if (d) {
char * const olds = s;
s = d;
PL_multi_start = origline + 1 + PL_parser->herelines;
PL_multi_open = PL_multi_close = '<';
+
/* inside a string eval or quote-like operator */
if (!infile || PL_lex_inwhat) {
SV *linestr;
entered. But we need them set here. */
shared->ls_bufptr = s;
shared->ls_linestr = PL_linestr;
- if (PL_lex_inwhat)
- /* Look for a newline. If the current buffer does not have one,
- peek into the line buffer of the parent lexing scope, going
- up as many levels as necessary to find one with a newline
- after bufptr.
- */
- while (!(s = (char *)memchr(
- (void *)shared->ls_bufptr, '\n',
- SvEND(shared->ls_linestr)-shared->ls_bufptr
- ))) {
- shared = shared->ls_prev;
- /* shared is only null if we have gone beyond the outermost
- lexing scope. In a file, we will have broken out of the
- loop in the previous iteration. In an eval, the string buf-
- fer ends with "\n;", so the while condition above will have
- evaluated to false. So shared can never be null. Or so you
- might think. Odd syntax errors like s;@{<<; can gobble up
- the implicit semicolon at the end of a flie, causing the
- file handle to be closed even when we are not in a string
- eval. So shared may be null in that case.
- (Closing '}' here to balance the earlier open brace for
- editors that look for matched pairs.) */
- if (UNLIKELY(!shared))
- goto interminable;
- /* A LEXSHARED struct with a null ls_prev pointer is the outer-
- most lexing scope. In a file, shared->ls_linestr at that
- level is just one line, so there is no body to steal. */
- if (infile && !shared->ls_prev) {
- s = olds;
- goto streaming;
- }
- }
+
+ if (PL_lex_inwhat) {
+ /* Look for a newline. If the current buffer does not have one,
+ peek into the line buffer of the parent lexing scope, going
+ up as many levels as necessary to find one with a newline
+ after bufptr.
+ */
+ while (!(s = (char *)memchr(
+ (void *)shared->ls_bufptr, '\n',
+ SvEND(shared->ls_linestr)-shared->ls_bufptr
+ )))
+ {
+ shared = shared->ls_prev;
+ /* shared is only null if we have gone beyond the outermost
+ lexing scope. In a file, we will have broken out of the
+ loop in the previous iteration. In an eval, the string buf-
+ fer ends with "\n;", so the while condition above will have
+ evaluated to false. So shared can never be null. Or so you
+ might think. Odd syntax errors like s;@{<<; can gobble up
+ the implicit semicolon at the end of a flie, causing the
+ file handle to be closed even when we are not in a string
+ eval. So shared may be null in that case.
+ (Closing '>>}' here to balance the earlier open brace for
+ editors that look for matched pairs.) */
+ if (UNLIKELY(!shared))
+ goto interminable;
+ /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+ most lexing scope. In a file, shared->ls_linestr at that
+ level is just one line, so there is no body to steal. */
+ if (infile && !shared->ls_prev) {
+ s = olds;
+ goto streaming;
+ }
+ }
+ }
else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
if (!s)
goto interminable;
}
+
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
d = s;
if (! SPACE_OR_TAB(*backup)) {
break;
}
-
indent_len++;
}
/* No whitespace or all! */
if (backup == s || *backup == '\n') {
- Newxz(indent, indent_len + 1, char);
+ Newx(indent, indent_len + 1, char);
memcpy(indent, backup + 1, indent_len);
+ indent[indent_len] = 0;
s--; /* before our delimiter */
PL_parser->herelines--; /* this line doesn't count */
break;
}
}
}
- } else {
+ }
+ else {
while (s < bufend - len + 1
&& memNE(s,PL_tokenbuf,len) )
{
if (s >= bufend - len + 1) {
goto interminable;
}
+
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
/* the preceding stmt passes a newline */
bufend - shared->re_eval_start);
shared->re_eval_start -= s-d;
}
+
if (cxstack_ix >= 0
&& CxTYPE(cx) == CXt_EVAL
&& CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
cx->blk_eval.cur_text = newSVsv(linestr);
cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
}
+
/* Copy everything from s onwards back to d. */
Move(s,d,bufend-s + 1,char);
SvCUR_set(linestr, SvCUR(linestr) - (s-d));
/* Setting PL_bufend only applies when we have not dug deeper
into other scopes, because sublex_done sets PL_bufend to
SvEND(PL_linestr). */
- if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+ if (shared == PL_parser->lex_shared)
+ PL_bufend = SvEND(linestr);
s = olds;
}
- else
- {
- SV *linestr_save;
- char *oldbufptr_save;
- char *oldoldbufptr_save;
- streaming:
- SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
- term = PL_tokenbuf[1];
- len--;
- linestr_save = PL_linestr; /* must restore this afterwards */
- d = s; /* and this */
- oldbufptr_save = PL_oldbufptr;
- oldoldbufptr_save = PL_oldoldbufptr;
- PL_linestr = newSVpvs("");
- PL_bufend = SvPVX(PL_linestr);
- while (1) {
- PL_bufptr = PL_bufend;
- CopLINE_set(PL_curcop,
- origline + 1 + PL_parser->herelines);
- if (!lex_next_chunk(LEX_NO_TERM)
- && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- /* Simply freeing linestr_save might seem simpler here, as it
- does not matter what PL_linestr points to, since we are
- about to croak; but in a quote-like op, linestr_save
- will have been prospectively freed already, via
- SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
- restore PL_linestr. */
- SvREFCNT_dec_NN(PL_linestr);
- PL_linestr = linestr_save;
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- goto interminable;
- }
- CopLINE_set(PL_curcop, origline);
- if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
- s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
- /* ^That should be enough to avoid this needing to grow: */
- sv_catpvs(PL_linestr, "\n\0");
- assert(s == SvPVX(PL_linestr));
- PL_bufend = SvEND(PL_linestr);
- }
- s = PL_bufptr;
- PL_parser->herelines++;
- PL_last_lop = PL_last_uni = NULL;
+ else {
+ SV *linestr_save;
+ char *oldbufptr_save;
+ char *oldoldbufptr_save;
+ streaming:
+ SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
+ linestr_save = PL_linestr; /* must restore this afterwards */
+ d = s; /* and this */
+ oldbufptr_save = PL_oldbufptr;
+ oldoldbufptr_save = PL_oldoldbufptr;
+ PL_linestr = newSVpvs("");
+ PL_bufend = SvPVX(PL_linestr);
+
+ while (1) {
+ PL_bufptr = PL_bufend;
+ CopLINE_set(PL_curcop,
+ origline + 1 + PL_parser->herelines);
+
+ if ( !lex_next_chunk(LEX_NO_TERM)
+ && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
+ {
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ goto interminable;
+ }
+
+ CopLINE_set(PL_curcop, origline);
+
+ if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+ s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+ /* ^That should be enough to avoid this needing to grow: */
+ sv_catpvs(PL_linestr, "\n\0");
+ assert(s == SvPVX(PL_linestr));
+ PL_bufend = SvEND(PL_linestr);
+ }
+
+ s = PL_bufptr;
+ PL_parser->herelines++;
+ PL_last_lop = PL_last_uni = NULL;
+
#ifndef PERL_STRICT_CR
- if (PL_bufend - PL_linestart >= 2) {
- if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
- || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
- {
- PL_bufend[-2] = '\n';
- PL_bufend--;
- SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
- }
- else if (PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
- }
- else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
+ if (PL_bufend - PL_linestart >= 2) {
+ if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+ || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+ {
+ PL_bufend[-2] = '\n';
+ PL_bufend--;
+ SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+ }
+ else if (PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
+ }
+ else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
#endif
- if (indented && (PL_bufend-s) >= len) {
- char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- if (found) {
- char *backup = found;
- indent_len = 0;
+ if (indented && (PL_bufend-s) >= len) {
+ char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- /* Only valid if it's preceded by whitespace only */
- while (backup != s && --backup >= s) {
- if (! SPACE_OR_TAB(*backup)) {
- break;
- }
- indent_len++;
- }
+ if (found) {
+ char *backup = found;
+ indent_len = 0;
- /* All whitespace or none! */
- if (backup == found || SPACE_OR_TAB(*backup)) {
- Newxz(indent, indent_len + 1, char);
- memcpy(indent, backup, indent_len);
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- }
- }
+ /* Only valid if it's preceded by whitespace only */
+ while (backup != s && --backup >= s) {
+ if (! SPACE_OR_TAB(*backup)) {
+ break;
+ }
+ indent_len++;
+ }
- /* Didn't find it */
- sv_catsv(tmpstr,PL_linestr);
- } else {
- if (*s == term && PL_bufend-s >= len
- && memEQ(s,PL_tokenbuf + 1,len))
- {
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- } else {
- sv_catsv(tmpstr,PL_linestr);
- }
- }
- }
+ /* All whitespace or none! */
+ if (backup == found || SPACE_OR_TAB(*backup)) {
+ Newx(indent, indent_len + 1, char);
+ memcpy(indent, backup, indent_len);
+ indent[indent_len] = 0;
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ }
+
+ /* Didn't find it */
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ else {
+ if (*s == term && PL_bufend-s >= len
+ && memEQ(s,PL_tokenbuf + 1,len))
+ {
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ else {
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ }
+ } /* while (1) */
}
+
PL_multi_end = origline + PL_parser->herelines;
+
if (indented && indent) {
STRLEN linecount = 1;
STRLEN herelen = SvCUR(tmpstr);
while (ss < se) {
/* newline only? Copy and move on */
if (*ss == '\n') {
- sv_catpv(newstr,"\n");
+ sv_catpvs(newstr,"\n");
ss++;
linecount++;
/* Found our indentation? Strip it */
- } else if (se - ss >= indent_len
+ }
+ else if (se - ss >= indent_len
&& memEQ(ss, indent, indent_len))
{
STRLEN le = 0;
-
ss += indent_len;
while ((ss + le) < se && *(ss + le) != '\n')
le++;
sv_catpvn(newstr, ss, le);
-
ss += le;
/* Line doesn't begin with our indentation? Croak */
- } else {
+ }
+ else {
+ Safefree(indent);
Perl_croak(aTHX_
"Indentation on line %d of here-doc doesn't match delimiter",
(int)linecount
);
}
- }
+ } /* while */
+
/* avoid sv_setsv() as we dont wan't to COW here */
sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
Safefree(indent);
SvREFCNT_dec_NN(newstr);
}
+
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
SvPV_shrink_to_cur(tmpstr);
}
+
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
}
+
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
return s;
interminable:
+ if (indent)
+ Safefree(indent);
SvREFCNT_dec(tmpstr);
CopLINE_set(PL_curcop, origline);
- missingterm(PL_tokenbuf + 1);
+ missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
}
+
/* scan_inputsymbol
takes: position of first '<' in input buffer
returns: position of first char following the matching '>' in
PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
- end = strchr(s, '\n');
+ end = (char *) memchr(s, '\n', PL_bufend - s);
if (!end)
end = PL_bufend;
if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
char term; /* terminating character */
char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf8 = FALSE; /* is there any utf8 content? */
+ bool d_is_utf8 = FALSE; /* is there any utf8 content? */
IV termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+ U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
STRLEN termlen; /* length of terminating string */
line_t herelines;
const char * opening_delims = "([{<";
const char * closing_delims = ")]}>";
+ /* The only non-UTF character that isn't a stand alone grapheme is
+ * white-space, hence can't be a delimiter. */
const char * non_grapheme_msg = "Use of unassigned code point or"
" non-standalone grapheme for a delimiter"
- " will be a fatal error starting in Perl"
- " 5.30";
- /* The only non-UTF character that isn't a stand alone grapheme is
- * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */
- bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
-
+ " is not allowed";
PERL_ARGS_ASSERT_SCAN_STR;
/* skip space before the delimiter */
}
else {
termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
- if (check_grapheme) {
- if ( UNLIKELY(UNICODE_IS_SUPER(termcode))
- || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
- {
- /* These are considered graphemes, and since the ending
- * delimiter will be the same, we don't have to check the other
- * end */
- check_grapheme = FALSE;
- }
- else if (UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
- termcode)))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
-
- /* Don't have to check the other end, as have already warned at
- * this one */
- check_grapheme = FALSE;
- }
+ if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
+ (U8 *) s,
+ (U8 *) PL_bufend,
+ termcode)))
+ {
+ yyerror(non_grapheme_msg);
}
Copy(s, termstr, termlen, U8);
if ( s + termlen <= PL_bufend
&& memEQ(s + 1, (char*)termstr + 1, termlen - 1))
{
- if ( check_grapheme
+ if ( UTF
&& UNLIKELY(! _is_grapheme((U8 *) start,
- (U8 *) s,
- (U8 *) PL_bufend,
+ (U8 *) s,
+ (U8 *) PL_bufend,
termcode)))
{
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "%s", non_grapheme_msg);
+ yyerror(non_grapheme_msg);
}
break;
}
}
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
+ d_is_utf8 = TRUE;
}
*to = *s;
break;
else if ((UV)*s == PL_multi_open)
brackets++;
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+ d_is_utf8 = TRUE;
*to = *s;
}
}
sv_catpvn(sv, s, termlen);
s += termlen;
- if (has_utf8)
+ if (d_is_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
digit:
just_zero = FALSE;
if (!overflowed) {
+ assert(shift >= 0);
x = u << shift; /* make room for the digit */
total_bits += shift;
NV nv_mult = 1.0;
#endif
bool accumulate = TRUE;
- for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
+ U8 b;
+ int lim = 1 << shift;
+ for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
+ *h == '_'); h++) {
if (isXDIGIT(*h)) {
- U8 b = XDIGIT_VALUE(*h);
significant_bits += shift;
#ifdef HEXFP_UQUAD
if (accumulate) {
if (significant_bits < NV_MANT_DIG) {
/* We are in the long "run" of xdigits,
* accumulate the full four bits. */
+ assert(shift >= 0);
hexfp_uquad <<= shift;
hexfp_uquad |= b;
hexfp_frac_bits += shift;
- } else {
+ } else if (significant_bits - shift < NV_MANT_DIG) {
/* We are at a hexdigit either at,
* or straddling, the edge of mantissa.
* We will try grabbing as many as
significant_bits - NV_MANT_DIG;
if (tail <= 0)
tail += shift;
+ assert(tail >= 0);
hexfp_uquad <<= tail;
+ assert((shift - tail) >= 0);
hexfp_uquad |= b >> (shift - tail);
hexfp_frac_bits += tail;
}
#else /* HEXFP_NV */
if (accumulate) {
- nv_mult /= 16.0;
+ nv_mult /= nvshift[shift];
if (nv_mult > 0.0)
hexfp_nv += b * nv_mult;
else
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0);
+ sv, NULL, NULL, 0, NULL);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
}
break;
floatit = TRUE;
}
if (floatit) {
- STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
/* terminate the string */
*d = '\0';
if (UNLIKELY(hexfp)) {
} else {
nv = Atof(PL_tokenbuf);
}
- RESTORE_LC_NUMERIC_UNDERLYING();
sv = newSVnv(nv);
}
const char *const key = floatit ? "float" : "integer";
const STRLEN keylen = floatit ? 5 : 7;
sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0);
+ key, keylen, sv, NULL, NULL, 0, NULL);
}
break;
if (!got_some)
break;
}
- incline(s);
+ incline(s, PL_bufend);
}
enough:
if (!SvCUR(stuff) || needargs)
return oldsavestack_ix;
}
+
+/* Do extra initialisation of a CV (typically one just created by
+ * start_subparse()) if that CV is for a named sub
+ */
+
+void
+Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
+{
+ PERL_ARGS_ASSERT_INIT_NAMED_CV;
+
+ if (nameop->op_type == OP_CONST) {
+ const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
+ if ( strEQ(name, "BEGIN")
+ || strEQ(name, "END")
+ || strEQ(name, "INIT")
+ || strEQ(name, "CHECK")
+ || strEQ(name, "UNITCHECK")
+ )
+ CvSPECIAL_on(cv);
+ }
+ else
+ /* State subs inside anonymous subs need to be
+ clonable themselves. */
+ if ( CvANON(CvOUTSIDE(cv))
+ || CvCLONE(CvOUTSIDE(cv))
+ || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
+ CvOUTSIDE(cv)
+ ))[nameop->op_targ])
+ )
+ CvCLONE_on(cv);
+}
+
+
static int
S_yywarn(pTHX_ const char *const s, U32 flags)
{
}
break;
case BOM_UTF8_FIRST_BYTE: {
- const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
- if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+ if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
#ifdef DEBUGGING
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
#endif
- s += len + 1; /* UTF-8 */
+ s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
}
break;
}
}
}
+ /* 'chars' isn't quite the right name, as code points above 0xFFFF
+ * require 4 bytes per char */
chars = SvCUR(utf16_buffer) >> 1;
have = SvCUR(utf8_buffer);
- SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ /* Assume the worst case size as noted by the functions: twice the
+ * number of input bytes */
+ SvGROW(utf8_buffer, have + chars * 4 + 1);
if (reverse) {
end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
return KEYWORD_PLUGIN_DECLINE;
}
+/*
+=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
+
+Puts a C function into the chain of keyword plugins. This is the
+preferred way to manipulate the L</PL_keyword_plugin> variable.
+C<new_plugin> is a pointer to the C function that is to be added to the
+keyword plugin chain, and C<old_plugin_p> points to the storage location
+where a pointer to the next function in the chain will be stored. The
+value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
+while the value previously stored there is written to C<*old_plugin_p>.
+
+L</PL_keyword_plugin> is global to an entire process, and a module wishing
+to hook keyword parsing may find itself invoked more than once per
+process, typically in different threads. To handle that situation, this
+function is idempotent. The location C<*old_plugin_p> must initially
+(once per process) contain a null pointer. A C variable of static
+duration (declared at file scope, typically also marked C<static> to give
+it internal linkage) will be implicitly initialised appropriately, if it
+does not have an explicit initialiser. This function will only actually
+modify the plugin chain if it finds C<*old_plugin_p> to be null. This
+function is also thread safe on the small scale. It uses appropriate
+locking to avoid race conditions in accessing L</PL_keyword_plugin>.
+
+When this function is called, the function referenced by C<new_plugin>
+must be ready to be called, except for C<*old_plugin_p> being unfilled.
+In a threading situation, C<new_plugin> may be called immediately, even
+before this function has returned. C<*old_plugin_p> will always be
+appropriately set before C<new_plugin> is called. If C<new_plugin>
+decides not to do anything special with the identifier that it is given
+(which is the usual case for most calls to a keyword plugin), it must
+chain the plugin function referenced by C<*old_plugin_p>.
+
+Taken all together, XS code to install a keyword plugin should typically
+look something like this:
+
+ static Perl_keyword_plugin_t next_keyword_plugin;
+ static OP *my_keyword_plugin(pTHX_
+ char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
+ {
+ if (memEQs(keyword_ptr, keyword_len,
+ "my_new_keyword")) {
+ ...
+ } else {
+ return next_keyword_plugin(aTHX_
+ keyword_ptr, keyword_len, op_ptr);
+ }
+ }
+ BOOT:
+ wrap_keyword_plugin(my_keyword_plugin,
+ &next_keyword_plugin);
+
+Direct access to L</PL_keyword_plugin> should be avoided.
+
+=cut
+*/
+
+void
+Perl_wrap_keyword_plugin(pTHX_
+ Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
+{
+ dVAR;
+
+ PERL_UNUSED_CONTEXT;
+ PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
+ if (*old_plugin_p) return;
+ KEYWORD_PLUGIN_MUTEX_LOCK;
+ if (!*old_plugin_p) {
+ *old_plugin_p = PL_keyword_plugin;
+ PL_keyword_plugin = new_plugin;
+ }
+ KEYWORD_PLUGIN_MUTEX_UNLOCK;
+}
+
#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
static void
S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
- char * const lpv = pl_yylval.pval;
- STRLEN llen = strlen(lpv);
+ SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
PL_parser->yychar = YYEMPTY;
- return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
+ cSVOPx(pl_yylval.opval)->op_sv = NULL;
+ op_free(pl_yylval.opval);
+ return labelsv;
} else {
yyunlex();
goto no_label;