#define PL_multi_start (PL_parser->multi_start)
#define PL_multi_open (PL_parser->multi_open)
#define PL_multi_close (PL_parser->multi_close)
-#define PL_pending_ident (PL_parser->pending_ident)
#define PL_preambled (PL_parser->preambled)
#define PL_sublex_info (PL_parser->sublex_info)
#define PL_linestr (PL_parser->linestr)
# define PL_nextval (PL_parser->nextval)
#endif
-/* This can't be done with embed.fnc, because struct yy_parser contains a
- member named pending_ident, which clashes with the generated #define */
-static int
-S_pending_ident(pTHX);
-
-static const char ident_too_long[] = "Identifier too long";
+static const char* const ident_too_long = "Identifier too long";
#ifdef PERL_MAD
# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
* 1999-02-27 mjd-perl-patch@plover.com */
#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
+#define SPACE_OR_TAB(c) isBLANK_A(c)
/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
#define COPLINE_INC_WITH_HERELINES \
STMT_START { \
CopLINE_inc(PL_curcop); \
- if (PL_parser->herelines) \
- CopLINE(PL_curcop) += PL_parser->herelines, \
- PL_parser->herelines = 0; \
+ if (PL_parser->lex_shared->herelines) \
+ CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
+ PL_parser->lex_shared->herelines = 0; \
} STMT_END
{ GIVEN, TOKENTYPE_IVAL, "GIVEN" },
{ HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
{ IF, TOKENTYPE_IVAL, "IF" },
- { LABEL, TOKENTYPE_OPVAL, "LABEL" },
+ { LABEL, TOKENTYPE_PVAL, "LABEL" },
{ LOCAL, TOKENTYPE_IVAL, "LOCAL" },
{ LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
{ LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
{ METHOD, TOKENTYPE_OPVAL, "METHOD" },
{ MULOP, TOKENTYPE_OPNUM, "MULOP" },
{ MY, TOKENTYPE_IVAL, "MY" },
- { MYSUB, TOKENTYPE_NONE, "MYSUB" },
{ NOAMP, TOKENTYPE_NONE, "NOAMP" },
{ NOTOP, TOKENTYPE_NONE, "NOTOP" },
{ OROP, TOKENTYPE_IVAL, "OROP" },
}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv < '~')
+ else if ((char)rv > ' ' && (char)rv <= '~')
+ {
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+ if ((char)rv == 'p')
+ sv_catpvs(report, " (pending identifier)");
+ }
else if (!rv)
sv_catpvs(report, "EOF");
else
"\t(Missing semicolon on previous line?)\n");
else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
const char *t;
- for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
+ for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
t += UTF ? UTF8SKIP(t) : 1)
NOOP;
if (t < PL_bufptr && isSPACE(*t))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Do you need to predeclare %"SVf"?)\n",
- SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Do you need to predeclare %"UTF8f"?)\n",
+ UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
}
else {
assert(s >= oldbp);
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing operator before %"SVf"?)\n",
- SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Missing operator before %"UTF8f"?)\n",
+ UTF8fARG(UTF, s - oldbp, oldbp));
}
}
PL_bufptr = oldbp;
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
*parser->lex_casestack = '\0';
+ Newxz(parser->lex_shared, 1, LEXSHARED);
if (line) {
STRLEN len;
parser->linestr = flags & LEX_START_COPIED
? SvREFCNT_inc_simple_NN(line)
: newSVpvn_flags(s, len, SvUTF8(line));
- if (!len || s[len-1] != ';')
- sv_catpvs(parser->linestr, "\n;");
+ sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
} else {
- parser->linestr = newSVpvs("\n;");
+ parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
}
parser->oldoldbufptr =
parser->oldbufptr =
(parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
PerlIO_close(parser->rsfp);
SvREFCNT_dec(parser->rsfp_filters);
+ SvREFCNT_dec(parser->lex_stuff);
+ SvREFCNT_dec(parser->sublex_info.repl);
Safefree(parser->lex_brackstack);
Safefree(parser->lex_casestack);
+ Safefree(parser->lex_shared);
PL_parser = parser->old_parser;
Safefree(parser);
}
+void
+Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
+{
+#ifdef PERL_MAD
+ I32 nexttoke = parser->lasttoke;
+#else
+ I32 nexttoke = parser->nexttoke;
+#endif
+ PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
+ while (nexttoke--) {
+#ifdef PERL_MAD
+ if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
+ & 0xffff)
+ && parser->nexttoke[nexttoke].next_val.opval
+ && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
+ && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
+ op_free(parser->nexttoke[nexttoke].next_val.opval);
+ parser->nexttoke[nexttoke].next_val.opval = NULL;
+ }
+#else
+ if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
+ && parser->nextval[nexttoke].opval
+ && parser->nextval[nexttoke].opval->op_slabbed
+ && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
+ op_free(parser->nextval[nexttoke].opval);
+ parser->nextval[nexttoke].opval = NULL;
+ }
+#endif
+ }
+}
+
/*
=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
linestart_pos = PL_parser->linestart - buf;
last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
- re_eval_start_pos = PL_sublex_info.re_eval_start ?
- PL_sublex_info.re_eval_start - buf : 0;
+ re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+ PL_parser->lex_shared->re_eval_start - buf : 0;
buf = sv_grow(linestr, len);
PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
PL_parser->last_lop = buf + last_lop_pos;
- if (PL_sublex_info.re_eval_start)
- PL_sublex_info.re_eval_start = buf + re_eval_start_pos;
+ if (PL_parser->lex_shared->re_eval_start)
+ PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
return buf;
}
if (flags & LEX_STUFF_UTF8) {
goto plain_copy;
} else {
- STRLEN highhalf = 0;
+ STRLEN highhalf = 0; /* Count of variants */
const char *p, *e = pv+len;
- for (p = pv; p != e; p++)
- highhalf += !!(((U8)*p) & 0x80);
+ for (p = pv; p != e; p++) {
+ if (! UTF8_IS_INVARIANT(*p)) {
+ highhalf++;
+ }
+ }
if (!highhalf)
goto plain_copy;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
PL_parser->bufend += len+highhalf;
for (p = pv; p != e; p++) {
U8 c = (U8)*p;
- if (c & 0x80) {
- *bufptr++ = (char)(0xc0 | (c >> 6));
- *bufptr++ = (char)(0x80 | (c & 0x3f));
+ if (! UTF8_IS_INVARIANT(c)) {
+ *bufptr++ = UTF8_TWO_BYTE_HI(c);
+ *bufptr++ = UTF8_TWO_BYTE_LO(c);
} else {
*bufptr++ = (char)c;
}
const char *p, *e = pv+len;
for (p = pv; p != e; p++) {
U8 c = (U8)*p;
- if (c >= 0xc4) {
+ if (UTF8_IS_ABOVE_LATIN1(c)) {
Perl_croak(aTHX_ "Lexing code attempted to stuff "
"non-Latin-1 character into Latin-1 input");
- } else if (c >= 0xc2 && p+1 != e &&
- (((U8)p[1]) & 0xc0) == 0x80) {
+ } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
p++;
highhalf++;
- } else if (c >= 0x80) {
+ } else if (! UTF8_IS_INVARIANT(c)) {
/* malformed UTF-8 */
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+ utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
LEAVE;
}
}
SvCUR_set(PL_parser->linestr,
SvCUR(PL_parser->linestr) + len-highhalf);
PL_parser->bufend += len-highhalf;
- for (p = pv; p != e; p++) {
- U8 c = (U8)*p;
- if (c & 0x80) {
- *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
- p++;
- } else {
- *bufptr++ = (char)c;
+ p = pv;
+ while (p < e) {
+ if (UTF8_IS_INVARIANT(*p)) {
+ *bufptr++ = *p;
+ p++;
}
+ else {
+ assert(p < e -1 );
+ *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ p += 2;
+ }
}
} else {
- plain_copy:
+ plain_copy:
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
bufptr = PL_parser->bufptr;
Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
bufend = PL_parser->bufend;
}
head = (U8)*s;
- if (!(head & 0x80))
+ if (UTF8_IS_INVARIANT(head))
return head;
- if (head & 0x40) {
- len = PL_utf8skip[head];
+ if (UTF8_IS_START(head)) {
+ len = UTF8SKIP(&head);
while ((STRLEN)(bufend-s) < len) {
if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
break;
bufend = PL_parser->bufend;
}
}
- unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+ unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
if (retlen == (STRLEN)-1) {
/* malformed UTF-8 */
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+ utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
LEAVE;
}
return unichar;
=cut
*/
+#define LEX_NO_INCLINE 0x40000000
#define LEX_NO_NEXT_CHUNK 0x80000000
void
Perl_lex_read_space(pTHX_ U32 flags)
{
char *s, *bufend;
+ const bool can_incline = !(flags & LEX_NO_INCLINE);
bool need_incline = 0;
- if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
+ if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
#ifdef PERL_MAD
if (PL_skipwhite) {
} while (!(c == '\n' || (c == 0 && s == bufend)));
} else if (c == '\n') {
s++;
- PL_parser->linestart = s;
- if (s == bufend)
- need_incline = 1;
- else
- incline(s);
+ if (can_incline) {
+ PL_parser->linestart = s;
+ if (s == bufend)
+ need_incline = 1;
+ else
+ incline(s);
+ }
} else if (isSPACE(c)) {
s++;
} else if (c == 0 && s == bufend) {
if (flags & LEX_NO_NEXT_CHUNK)
break;
PL_parser->bufptr = s;
- COPLINE_INC_WITH_HERELINES;
+ if (can_incline) COPLINE_INC_WITH_HERELINES;
got_more = lex_next_chunk(flags);
- CopLINE_dec(PL_curcop);
+ if (can_incline) CopLINE_dec(PL_curcop);
s = PL_parser->bufptr;
bufend = PL_parser->bufend;
if (!got_more)
break;
- if (need_incline && PL_parser->rsfp) {
+ if (can_incline && need_incline && PL_parser->rsfp) {
incline(s);
need_incline = 0;
}
}
/*
+
+=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+
+This function performs syntax checking on a prototype, C<proto>.
+If C<warn> is true, any illegal characters or mismatched brackets
+will trigger illegalproto warnings, declaring that they were
+detected in the prototype for C<name>.
+
+The return value is C<true> if this is a valid prototype, and
+C<false> if it is not, regardless of whether C<warn> was C<true> or
+C<false>.
+
+Note that C<NULL> is a valid C<proto> and will always return C<true>.
+
+=cut
+
+ */
+
+bool
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+{
+ STRLEN len, origlen;
+ char *p = proto ? SvPV(proto, len) : NULL;
+ bool bad_proto = FALSE;
+ bool in_brackets = FALSE;
+ bool after_slash = FALSE;
+ char greedy_proto = ' ';
+ bool proto_after_greedy_proto = FALSE;
+ bool must_be_last = FALSE;
+ bool underscore = FALSE;
+ bool bad_proto_after_underscore = FALSE;
+
+ PERL_ARGS_ASSERT_VALIDATE_PROTO;
+
+ if (!proto)
+ return TRUE;
+
+ origlen = len;
+ for (; len--; p++) {
+ if (!isSPACE(*p)) {
+ if (must_be_last)
+ proto_after_greedy_proto = TRUE;
+ if (underscore) {
+ if (!strchr(";@%", *p))
+ bad_proto_after_underscore = TRUE;
+ underscore = FALSE;
+ }
+ if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+ bad_proto = TRUE;
+ }
+ else {
+ if (*p == '[')
+ in_brackets = TRUE;
+ else if (*p == ']')
+ in_brackets = FALSE;
+ else if ((*p == '@' || *p == '%') &&
+ !after_slash &&
+ !in_brackets ) {
+ must_be_last = TRUE;
+ greedy_proto = *p;
+ }
+ else if (*p == '_')
+ underscore = TRUE;
+ }
+ if (*p == '\\')
+ after_slash = TRUE;
+ else
+ after_slash = FALSE;
+ }
+ }
+
+ if (warn) {
+ SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
+ p -= origlen;
+ p = SvUTF8(proto)
+ ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+ origlen, UNI_DISPLAY_ISPRINT)
+ : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
+
+ if (proto_after_greedy_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Prototype after '%c' for %"SVf" : %s",
+ greedy_proto, SVfARG(name), p);
+ if (in_brackets)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Missing ']' in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ if (bad_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ if (bad_proto_after_underscore)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character after '_' in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ }
+
+ return (! (proto_after_greedy_proto || bad_proto) );
+}
+
+/*
* S_incline
* This subroutine has nothing to do with tilting, whether at windmills
* or pinball tables. Its name is short for "increment line". It
PERL_ARGS_ASSERT_INCLINE;
COPLINE_INC_WITH_HERELINES;
+ if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
+ && s+1 == PL_bufend && *s == ';') {
+ /* fake newline in string eval */
+ CopLINE_dec(PL_curcop);
+ return;
+ }
if (*s++ != '#')
return;
while (SPACE_OR_TAB(*s))
if (t - s > 0) {
const STRLEN len = t - s;
- SV *const temp_sv = CopFILESV(PL_curcop);
- const char *cf;
- STRLEN tmplen;
-
- if (temp_sv) {
- cf = SvPVX(temp_sv);
- tmplen = SvCUR(temp_sv);
- } else {
- cf = NULL;
- tmplen = 0;
- }
if (!PL_rsfp && !PL_parser->filtered) {
/* must copy *{"::_<(eval N)[oldfilename:L]"}
* to *{"::_<newfilename"} */
/* However, the long form of evals is only turned on by the
debugger - usually they're "(eval %lu)" */
- char smallbuf[128];
- char *tmpbuf;
- GV **gvp;
- STRLEN tmplen2 = len;
- if (tmplen + 2 <= sizeof smallbuf)
- tmpbuf = smallbuf;
- else
- Newx(tmpbuf, tmplen + 2, char);
- tmpbuf[0] = '_';
- tmpbuf[1] = '<';
- memcpy(tmpbuf + 2, cf, tmplen);
- tmplen += 2;
- gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
- if (gvp) {
+ GV * const cfgv = CopFILEGV(PL_curcop);
+ if (cfgv) {
+ char smallbuf[128];
+ STRLEN tmplen2 = len;
char *tmpbuf2;
GV *gv2;
else
Newx(tmpbuf2, tmplen2 + 2, char);
- if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
- /* Either they malloc'd it, or we malloc'd it,
- so no prefix is present in ours. */
- tmpbuf2[0] = '_';
- tmpbuf2[1] = '<';
- }
+ tmpbuf2[0] = '_';
+ tmpbuf2[1] = '<';
memcpy(tmpbuf2 + 2, s, tmplen2);
tmplen2 += 2;
alias the saved lines that are in the array.
Otherwise alias the whole array. */
if (CopLINE(PL_curcop) == line_num) {
- GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
- GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
}
- else if (GvAV(*gvp)) {
- AV * const av = GvAV(*gvp);
+ else if (GvAV(cfgv)) {
+ AV * const av = GvAV(cfgv);
const I32 start = CopLINE(PL_curcop)+1;
I32 items = AvFILLp(av) - start;
if (items > 0) {
if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
}
- if (tmpbuf != smallbuf) Safefree(tmpbuf);
}
CopFILE_free(PL_curcop);
CopFILE_setn(PL_curcop, s, len);
CopLINE_set(PL_curcop, line_num);
}
+#define skipspace(s) skipspace_flags(s, 0)
+
#ifdef PERL_MAD
/* skip space before PL_thistoken */
STATIC char *
-S_skipspace0(pTHX_ register char *s)
+S_skipspace0(pTHX_ char *s)
{
PERL_ARGS_ASSERT_SKIPSPACE0;
/* skip space after PL_thistoken */
STATIC char *
-S_skipspace1(pTHX_ register char *s)
+S_skipspace1(pTHX_ char *s)
{
const char *start = s;
I32 startoff = start - SvPVX(PL_linestr);
}
STATIC char *
-S_skipspace2(pTHX_ register char *s, SV **svp)
+S_skipspace2(pTHX_ char *s, SV **svp)
{
char *start;
- const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
const I32 startoff = s - SvPVX(PL_linestr);
PERL_ARGS_ASSERT_SKIPSPACE2;
s = skipspace(s);
- PL_bufptr = SvPVX(PL_linestr) + bufptroff;
if (!PL_madskills || !svp)
return s;
start = SvPVX(PL_linestr) + startoff;
if (av) {
SV * const sv = newSV_type(SVt_PVMG);
if (orig_sv)
- sv_setsv(sv, orig_sv);
+ sv_setsv_flags(sv, orig_sv, 0); /* no cow */
else
sv_setpvn(sv, buf, len);
(void)SvIOK_on(sv);
SvIV_set(sv, 0);
- av_store(av, (I32)CopLINE(PL_curcop), sv);
+ av_store(av, CopLINE(PL_curcop), sv);
}
}
*/
STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace_flags(pTHX_ char *s, U32 flags)
{
#ifdef PERL_MAD
char *start = s;
#endif /* PERL_MAD */
- PERL_ARGS_ASSERT_SKIPSPACE;
+ PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
#ifdef PERL_MAD
if (PL_skipwhite) {
sv_free(PL_skipwhite);
} else {
STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
PL_bufptr = s;
- lex_read_space(LEX_KEEP_PREVIOUS |
+ lex_read_space(flags | LEX_KEEP_PREVIOUS |
(PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
LEX_NO_NEXT_CHUNK : 0));
s = PL_bufptr;
while (isSPACE(*PL_last_uni))
PL_last_uni++;
s = PL_last_uni;
- while (isALNUM_lazy_if(s,UTF) || *s == '-')
+ while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
s++;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
tokereport(type, &NEXTVAL_NEXTTOKE);
}
#endif
- /* Don’t let opslab_force_free snatch it */
- if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
- assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
- NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
- }
#ifdef PERL_MAD
if (PL_curforce < 0)
start_force(PL_lasttoke);
*/
STATIC char *
-S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
{
dVAR;
char *s;
start = SKIPSPACE1(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF) ||
- (allow_pack && *s == ':') ||
- (allow_initial_tick && *s == '\'') )
+ (allow_pack && *s == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
- if (check_keyword && keyword(PL_tokenbuf, len, 0))
+ if (check_keyword) {
+ char *s2 = PL_tokenbuf;
+ if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
+ s2 += 6, len -= 6;
+ if (keyword(s2, len, 0))
return start;
+ }
start_force(PL_curforce);
if (PL_madskills)
curmad('X', newSVpvn(start,s-start));
*/
STATIC void
-S_force_ident(pTHX_ register const char *s, int kind)
+S_force_ident(pTHX_ const char *s, int kind)
{
dVAR;
PERL_ARGS_ASSERT_FORCE_IDENT;
- if (*s) {
- const STRLEN len = strlen(s);
+ if (s[0]) {
+ const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
UTF ? SVf_UTF8 : 0));
start_force(PL_curforce);
}
}
+static void
+S_force_ident_maybe_lex(pTHX_ char pit)
+{
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.ival = pit;
+ force_next('p');
+}
+
NV
Perl_str_to_version(pTHX_ SV *sv)
{
return THING;
}
else if (op_type == OP_BACKTICK && PL_lex_op) {
- /* readpipe() vas overriden */
+ /* readpipe() was overridden */
cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
S_sublex_push(pTHX)
{
dVAR;
+ LEXSHARED *shared;
ENTER;
PL_lex_state = PL_sublex_info.super_state;
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
SAVESPTR(PL_lex_repl);
- SAVEPPTR(PL_sublex_info.re_eval_start);
- SAVESPTR(PL_sublex_info.re_eval_str);
- SAVEPPTR(PL_sublex_info.super_bufptr);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
SAVESPTR(PL_linestr);
SAVEGENERICPV(PL_lex_brackstack);
SAVEGENERICPV(PL_lex_casestack);
+ SAVEGENERICPV(PL_parser->lex_shared);
+ SAVEBOOL(PL_parser->lex_re_reparsing);
/* The here-doc parser needs to be able to peek into outer lexing
- scopes to find the body of the here-doc. We use SvIVX(PL_linestr)
- to store the outer PL_bufptr and SvNVX to store the outer
- PL_linestr. Since SvIVX already means something else, we use
- PL_sublex_info.super_bufptr for the innermost scope (the one we are
- now entering), and a localised SvIVX for outer scopes.
+ scopes to find the body of the here-doc. So we put PL_linestr and
+ PL_bufptr into lex_shared, to ‘share’ those values.
*/
- SvUPGRADE(PL_linestr, SVt_PVIV);
- /* A null super_bufptr means the outer lexing scope is not peekable,
- because it is a single line from an input stream. */
- SAVEIV(SvIVX(PL_linestr));
- SvIVX(PL_linestr) = PTR2IV(PL_sublex_info.super_bufptr);
- PL_sublex_info.super_bufptr =
- (SvTYPE(PL_linestr) < SVt_PVNV || !SvNVX(PL_linestr))
- && (PL_rsfp || PL_parser->filtered)
- ? NULL
- : PL_bufptr;
- SvUPGRADE(PL_lex_stuff, SVt_PVNV);
- SvNVX(PL_lex_stuff) = PTR2NV(PL_linestr);
+ PL_parser->lex_shared->ls_linestr = PL_linestr;
+ PL_parser->lex_shared->ls_bufptr = PL_bufptr;
PL_linestr = PL_lex_stuff;
PL_lex_repl = PL_sublex_info.repl;
PL_lex_stuff = NULL;
PL_sublex_info.repl = NULL;
- PL_sublex_info.re_eval_start = NULL;
- PL_sublex_info.re_eval_str = NULL;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
SAVEFREESV(PL_linestr);
+ if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
PL_lex_dojoin = FALSE;
PL_lex_brackets = PL_lex_formbrack = 0;
PL_lex_starts = 0;
PL_lex_state = LEX_INTERPCONCAT;
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+
+ Newxz(shared, 1, LEXSHARED);
+ shared->ls_prev = PL_parser->lex_shared;
+ PL_parser->lex_shared = shared;
PL_lex_inwhat = PL_sublex_info.sub_inwhat;
if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
else
PL_lex_inpat = NULL;
+ PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
+ PL_in_eval &= ~EVAL_RE_REPARSING;
+
return '(';
}
/* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
- SvUPGRADE(PL_lex_repl, SVt_PVNV);
- SvNVX(PL_lex_repl) = SvNVX(PL_linestr);
PL_linestr = PL_lex_repl;
PL_lex_inpat = 0;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
- SAVEFREESV(PL_linestr);
PL_lex_dojoin = FALSE;
PL_lex_brackets = 0;
PL_lex_allbrackets = 0;
}
}
+PERL_STATIC_INLINE SV*
+S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+{
+ /* <s> points to first character of interior of \N{}, <e> to one beyond the
+ * interior, hence to the "}". Finds what the name resolves to, returning
+ * an SV* containing it; NULL if no valid one found */
+
+ SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+
+ HV * table;
+ SV **cvp;
+ SV *cv;
+ SV *rv;
+ HV *stash;
+ const U8* first_bad_char_loc;
+ const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+
+ if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
+ e - backslash_ptr,
+ &first_bad_char_loc))
+ {
+ /* If warnings are on, this will print a more detailed analysis of what
+ * is wrong than the error message below */
+ utf8n_to_uvchr(first_bad_char_loc,
+ e - ((char *) first_bad_char_loc),
+ NULL, 0);
+
+ /* We deliberately don't try to print the malformed character, which
+ * might not print very well; it also may be just the first of many
+ * malformations, so don't print what comes after it */
+ yyerror(Perl_form(aTHX_
+ "Malformed UTF-8 character immediately after '%.*s'",
+ (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+ return NULL;
+ }
+
+ res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
+ /* include the <}> */
+ e - backslash_ptr + 1);
+ if (! SvPOK(res)) {
+ SvREFCNT_dec_NN(res);
+ return NULL;
+ }
+
+ /* See if the charnames handler is the Perl core's, and if so, we can skip
+ * the validation needed for a user-supplied one, as Perl's does its own
+ * validation. */
+ table = GvHV(PL_hintgv); /* ^H */
+ cvp = hv_fetchs(table, "charnames", FALSE);
+ if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
+ && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
+ {
+ const char * const name = HvNAME(stash);
+ if strEQ(name, "_charnames") {
+ return res;
+ }
+ }
+
+ /* Here, it isn't Perl's charname handler. We can't rely on a
+ * user-supplied handler to validate the input name. For non-ut8 input,
+ * look to see that the first character is legal. Then loop through the
+ * rest checking that each is a continuation */
+
+ /* This code needs to be sync'ed with a regex in _charnames.pm which does
+ * the same thing */
+
+ if (! UTF) {
+ if (! isALPHAU(*s)) {
+ goto bad_charname;
+ }
+ s++;
+ while (s < e) {
+ if (! isCHARNAME_CONT(*s)) {
+ goto bad_charname;
+ }
+ if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "A sequence of multiple spaces in a charnames "
+ "alias definition is deprecated");
+ }
+ s++;
+ }
+ if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Trailing white-space in a charnames alias "
+ "definition is deprecated");
+ }
+ }
+ else {
+ /* Similarly for utf8. For invariants can check directly; for other
+ * Latin1, can calculate their code point and check; otherwise use a
+ * swash */
+ if (UTF8_IS_INVARIANT(*s)) {
+ if (! isALPHAU(*s)) {
+ goto bad_charname;
+ }
+ s++;
+ } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+ if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
+ goto bad_charname;
+ }
+ s += 2;
+ }
+ else {
+ if (! PL_utf8_charname_begin) {
+ U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+ PL_utf8_charname_begin = _core_swash_init("utf8",
+ "_Perl_Charname_Begin",
+ &PL_sv_undef,
+ 1, 0, NULL, &flags);
+ }
+ if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+ goto bad_charname;
+ }
+ s += UTF8SKIP(s);
+ }
+
+ while (s < e) {
+ if (UTF8_IS_INVARIANT(*s)) {
+ if (! isCHARNAME_CONT(*s)) {
+ goto bad_charname;
+ }
+ if (*s == ' ' && *(s-1) == ' '
+ && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "A sequence of multiple spaces in a charnam"
+ "es alias definition is deprecated");
+ }
+ s++;
+ }
+ else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+ if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
+ {
+ goto bad_charname;
+ }
+ s += 2;
+ }
+ else {
+ if (! PL_utf8_charname_continue) {
+ U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+ PL_utf8_charname_continue = _core_swash_init("utf8",
+ "_Perl_Charname_Continue",
+ &PL_sv_undef,
+ 1, 0, NULL, &flags);
+ }
+ if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+ goto bad_charname;
+ }
+ s += UTF8SKIP(s);
+ }
+ }
+ if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Trailing white-space in a charnames alias "
+ "definition is deprecated");
+ }
+ }
+
+ if (SvUTF8(res)) { /* Don't accept malformed input */
+ const U8* first_bad_char_loc;
+ STRLEN len;
+ const char* const str = SvPV_const(res, len);
+ if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
+ /* If warnings are on, this will print a more detailed analysis of
+ * what is wrong than the error message below */
+ utf8n_to_uvchr(first_bad_char_loc,
+ (char *) first_bad_char_loc - str,
+ NULL, 0);
+
+ /* We deliberately don't try to print the malformed character,
+ * which might not print very well; it also may be just the first
+ * of many malformations, so don't print what comes after it */
+ yyerror_pv(
+ Perl_form(aTHX_
+ "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
+ (int) (e - backslash_ptr + 1), backslash_ptr,
+ (int) ((char *) first_bad_char_loc - str), str
+ ),
+ SVf_UTF8);
+ return NULL;
+ }
+ }
+
+ return res;
+
+ bad_charname: {
+ int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
+
+ /* The final %.*s makes sure that should the trailing NUL be missing
+ * that this print won't run off the end of the string */
+ yyerror_pv(
+ Perl_form(aTHX_
+ "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
+ (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
+ (int)(e - s + bad_char_size), s + bad_char_size
+ ),
+ UTF ? SVf_UTF8 : 0);
+ return NULL;
+ }
+}
+
/*
scan_const
In patterns:
expand:
- \N{ABC} => \N{U+41.42.43}
+ \N{FOO} => \N{U+hex_for_character_FOO}
+ (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
pass through:
all other \-char, including \N and \N{ apart from \N{ABC}
isn't utf8, as for example
when it is entirely composed
of hex constants */
+ SV *res; /* result from charnames */
/* Note on sizing: The scanned constant is placed into sv, which is
* initialized by newSV() assuming one byte of output for every byte of
* far, plus the length the current construct will occupy, plus room for
* the trailing NUL, plus one byte for every input byte still unscanned */
- UV uv;
+ UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
+ before set */
#ifdef EBCDIC
UV literal_endpoint = 0;
bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
+ /* Protect sv from errors and fatal warnings. */
+ ENTER_with_name("scan_const");
+ SAVEFREESV(sv);
while (s < send || dorange) {
#ifdef EBCDIC
&& !native_range
#endif
- ) {
+ ) {
char * const c = (char*)utf8_hop((U8*)d, -1);
char *e = d++;
while (e-- > c)
*(e + 1) = *e;
- *c = (char)UTF_TO_NATIVE(0xff);
+ *c = (char) ILLEGAL_UTF8_BYTE;
/* mark the range as done, and continue */
dorange = FALSE;
didrange = TRUE;
#ifdef EBCDIC
if (literal_endpoint == 2 &&
- ((isLOWER(min) && isLOWER(max)) ||
- (isUPPER(min) && isUPPER(max)))) {
- if (isLOWER(min)) {
+ ((isLOWER_A(min) && isLOWER_A(max)) ||
+ (isUPPER_A(min) && isUPPER_A(max)))) {
+ if (isLOWER_A(min)) {
for (i = min; i <= max; i++)
- if (isLOWER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
+ if (isLOWER_A(i))
+ *d++ = i;
} else {
for (i = min; i <= max; i++)
- if (isUPPER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
+ if (isUPPER_A(i))
+ *d++ = i;
}
}
else
for (i = min; i <= max; i++)
#ifdef EBCDIC
if (has_utf8) {
- const U8 ch = (U8)NATIVE_TO_UTF(i);
- if (UNI_IS_INVARIANT(ch))
- *d++ = (U8)i;
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
- }
+ append_utf8_from_native_byte(i, &d);
}
else
#endif
if (uvmax) {
d = (char*)uvchr_to_utf8((U8*)d, 0x100);
if (uvmax > 0x101)
- *d++ = (char)UTF_TO_NATIVE(0xff);
+ *d++ = (char) ILLEGAL_UTF8_BYTE;
if (uvmax > 0x100)
d = (char*)uvchr_to_utf8((U8*)d, uvmax);
}
&& !native_range
#endif
) {
- *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
+ *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
s++;
continue;
}
* char, which will be done separately.
* Stop on (?{..}) and friends */
- else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+ else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
- else if (!PL_lex_casemods && !in_charclass &&
+ else if (!PL_lex_casemods &&
( s[2] == '{' /* This should match regcomp.c */
|| (s[2] == '?' && s[3] == '{')))
{
}
/* likewise skip #-initiated comments in //x patterns */
- else if (*s == '#' && PL_lex_inpat &&
+ else if (*s == '#' && PL_lex_inpat && !in_charclass &&
((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
while (s+1 < send && *s != '\n')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
/* no further processing of single-quoted regex */
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
else if (*s == '@' && s[1]) {
- if (isALNUM_lazy_if(s+1,UTF))
+ if (isWORDCHAR_lazy_if(s+1,UTF))
break;
if (strchr(":'{$", s[1]))
break;
else if (PL_lex_inpat
&& (*s != 'N'
|| s[1] != '{'
- || regcurly(s + 1)))
+ || regcurly(s + 1, FALSE)))
{
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+ *d++ = '\\';
goto default_action;
}
/* FALL THROUGH */
default:
{
- if ((isALNUMC(*s)))
+ if ((isALPHANUMERIC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
- I32 flags = 0;
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN len = 3;
- uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+ uv = grok_oct(s, &len, &flags, NULL);
s += len;
+ if (len < 3 && s < send && isDIGIT(*s)
+ && ckWARN(WARN_MISC))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_MISC),
+ "%s", form_short_octal_warning(s, len));
+ }
}
goto NUM_ESCAPE_INSERT;
/* eg. \o{24} indicates the octal constant \024 */
case 'o':
{
- STRLEN len;
const char* error;
- bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
- s += len;
+ bool valid = grok_bslash_o(&s, &uv, &error,
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ TRUE, /* Output warnings for
+ non-portables */
+ UTF);
if (! valid) {
yyerror(error);
continue;
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
{
- STRLEN len;
const char* error;
- bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
- s += len;
+ bool valid = grok_bslash_x(&s, &uv, &error,
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ TRUE, /* Output warnings for
+ non-portables */
+ UTF);
if (! valid) {
yyerror(error);
continue;
* UTF-8 sequence they can end up as, except if they force us
* to recode the rest of the string into utf8 */
- /* Here uv is the ordinal of the next character being added in
- * unicode (converted from native). */
- if (!UNI_IS_INVARIANT(uv)) {
+ /* Here uv is the ordinal of the next character being added */
+ if (!NATIVE_IS_INVARIANT(uv)) {
if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
}
if (has_utf8) {
- d = (char*)uvuni_to_utf8((U8*)d, uv);
+ d = (char*)uvchr_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS &&
PL_sublex_info.sub_op) {
PL_sublex_info.sub_op->op_private |=
* now, while preserving the fact that it was a named character
* so that the regex compiler knows this */
- /* This section of code doesn't generally use the
- * NATIVE_TO_NEED() macro to transform the input. I (khw) did
- * a close examination of this macro and determined it is a
- * no-op except on utfebcdic variant characters. Every
- * character generated by this that would normally need to be
- * enclosed by this macro is invariant, so the macro is not
- * needed, and would complicate use of copy(). XXX There are
- * other parts of this file where the macro is used
- * inconsistently, but are saved by it being a no-op */
-
/* The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
* Further disambiguate between the two meanings of \N, and if
/* Here it looks like a named character */
- if (PL_lex_inpat) {
-
- /* XXX This block is temporary code. \N{} implies that the
- * pattern is to have Unicode semantics, and therefore
- * currently has to be encoded in utf8. By putting it in
- * utf8 now, we save a whole pass in the regular expression
- * compiler. Once that code is changed so Unicode
- * semantics doesn't necessarily have to be in utf8, this
- * block should be removed. However, the code that parses
- * the output of this would have to be changed to not
- * necessarily expect utf8 */
- if (!has_utf8) {
- SvCUR_set(sv, d - SvPVX_const(sv));
- SvPOK_on(sv);
- *d = '\0';
- /* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- /* 5 = '\N{' + cur char + NUL */
- (STRLEN)(send - s) + 5);
- d = SvPVX(sv) + SvCUR(sv);
- has_utf8 = TRUE;
- }
- }
-
if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
| PERL_SCAN_DISALLOW_PREFIX;
has_utf8 = TRUE;
}
- /* Add the string to the output */
+ /* Add the (Unicode) code point to the output. */
if (UNI_IS_INVARIANT(uv)) {
- *d++ = (char) uv;
+ *d++ = (char) LATIN1_TO_NATIVE(uv);
}
- else d = (char*)uvuni_to_utf8((U8*)d, uv);
+ else {
+ d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+ }
}
}
- else { /* Here is \N{NAME} but not \N{U+...}. */
-
- SV *res; /* result from charnames */
- const char *str; /* the string in 'res' */
- STRLEN len; /* its length */
-
- /* Get the value for NAME */
- res = newSVpvn(s, e - s);
- res = new_constant( NULL, 0, "charnames",
- /* includes all of: \N{...} */
- res, NULL, s - 3, e - s + 4 );
-
- /* Most likely res will be in utf8 already since the
- * standard charnames uses pack U, but a custom translator
- * can leave it otherwise, so make sure. XXX This can be
- * revisited to not have charnames use utf8 for characters
- * that don't need it when regexes don't have to be in utf8
- * for Unicode semantics. If doing so, remember EBCDIC */
- sv_utf8_upgrade(res);
- str = SvPV_const(res, len);
-
- /* Don't accept malformed input */
- if (! is_utf8_string((U8 *) str, len)) {
- yyerror("Malformed UTF-8 returned by \\N");
- }
- else if (PL_lex_inpat) {
+ else /* Here is \N{NAME} but not \N{U+...}. */
+ if ((res = get_and_check_backslash_N_name(s, e)))
+ {
+ STRLEN len;
+ const char *str = SvPV_const(res, len);
+ if (PL_lex_inpat) {
if (! len) { /* The name resolved to an empty string */
Copy("\\N{}", d, 4, char);
* returned by charnames */
const char *str_end = str + len;
- STRLEN char_length; /* cur char's byte length */
- STRLEN output_length; /* and the number of bytes
- after this is translated
- into hex digits */
const STRLEN off = d - SvPVX_const(sv);
- /* 2 hex per byte; 2 chars for '\N'; 2 chars for
- * max('U+', '.'); and 1 for NUL */
- char hex_string[2 * UTF8_MAXBYTES + 5];
-
- /* Get the first character of the result. */
- U32 uv = utf8n_to_uvuni((U8 *) str,
- len,
- &char_length,
- UTF8_ALLOW_ANYUV);
-
- /* The call to is_utf8_string() above hopefully
- * guarantees that there won't be an error. But
- * it's easy here to make sure. The function just
- * above warns and returns 0 if invalid utf8, but
- * it can also return 0 if the input is validly a
- * NUL. Disambiguate */
- if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
- uv = UNICODE_REPLACEMENT;
- }
-
- /* Convert first code point to hex, including the
- * boiler plate before it. For all these, we
- * convert to native format so that downstream code
- * can continue to assume the input is native */
- output_length =
- my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X",
- (unsigned int) UNI_TO_NATIVE(uv));
-
- /* Make sure there is enough space to hold it */
- d = off + SvGROW(sv, off
- + output_length
- + (STRLEN)(send - e)
- + 2); /* '}' + NUL */
- /* And output it */
- Copy(hex_string, d, output_length, char);
- d += output_length;
-
- /* For each subsequent character, append dot and
- * its ordinal in hex */
- while ((str += char_length) < str_end) {
- const STRLEN off = d - SvPVX_const(sv);
- U32 uv = utf8n_to_uvuni((U8 *) str,
- str_end - str,
- &char_length,
- UTF8_ALLOW_ANYUV);
- if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
- uv = UNICODE_REPLACEMENT;
- }
-
- output_length =
- my_snprintf(hex_string, sizeof(hex_string),
- ".%X",
- (unsigned int) UNI_TO_NATIVE(uv));
-
- d = off + SvGROW(sv, off
- + output_length
- + (STRLEN)(send - e)
- + 2); /* '}' + NUL */
- Copy(hex_string, d, output_length, char);
- d += output_length;
+ if (! SvUTF8(res)) {
+ /* For the non-UTF-8 case, we can determine the
+ * exact length needed without having to parse
+ * through the string. Each character takes up
+ * 2 hex digits plus either a trailing dot or
+ * the "}" */
+ d = off + SvGROW(sv, off
+ + 3 * len
+ + 6 /* For the "\N{U+", and
+ trailing NUL */
+ + (STRLEN)(send - e));
+ Copy("\\N{U+", d, 5, char);
+ d += 5;
+ while (str < str_end) {
+ char hex_string[4];
+ my_snprintf(hex_string, sizeof(hex_string),
+ "%02X.", (U8) *str);
+ Copy(hex_string, d, 3, char);
+ d += 3;
+ str++;
+ }
+ d--; /* We will overwrite below the final
+ dot with a right brace */
+ }
+ else {
+ STRLEN char_length; /* cur char's byte length */
+
+ /* and the number of bytes after this is
+ * translated into hex digits */
+ STRLEN output_length;
+
+ /* 2 hex per byte; 2 chars for '\N'; 2 chars
+ * for max('U+', '.'); and 1 for NUL */
+ char hex_string[2 * UTF8_MAXBYTES + 5];
+
+ /* Get the first character of the result. */
+ U32 uv = utf8n_to_uvchr((U8 *) str,
+ len,
+ &char_length,
+ UTF8_ALLOW_ANYUV);
+ /* Convert first code point to hex, including
+ * the boiler plate before it. */
+ output_length =
+ my_snprintf(hex_string, sizeof(hex_string),
+ "\\N{U+%X",
+ (unsigned int) uv);
+
+ /* Make sure there is enough space to hold it */
+ d = off + SvGROW(sv, off
+ + output_length
+ + (STRLEN)(send - e)
+ + 2); /* '}' + NUL */
+ /* And output it */
+ Copy(hex_string, d, output_length, char);
+ d += output_length;
+
+ /* For each subsequent character, append dot and
+ * its ordinal in hex */
+ while ((str += char_length) < str_end) {
+ const STRLEN off = d - SvPVX_const(sv);
+ U32 uv = utf8n_to_uvchr((U8 *) str,
+ str_end - str,
+ &char_length,
+ UTF8_ALLOW_ANYUV);
+ output_length =
+ my_snprintf(hex_string,
+ sizeof(hex_string),
+ ".%X",
+ (unsigned int) uv);
+
+ d = off + SvGROW(sv, off
+ + output_length
+ + (STRLEN)(send - e)
+ + 2); /* '}' + NUL */
+ Copy(hex_string, d, output_length, char);
+ d += output_length;
+ }
}
*d++ = '}'; /* Done. Add the trailing brace */
Copy(str, d, len, char);
d += len;
}
+
SvREFCNT_dec(res);
- /* Deprecate non-approved name syntax */
- if (ckWARN_d(WARN_DEPRECATED)) {
- bool problematic = FALSE;
- char* i = s;
-
- /* For non-ut8 input, look to see that the first
- * character is an alpha, then loop through the rest
- * checking that each is a continuation */
- if (! this_utf8) {
- if (! isALPHAU(*i)) problematic = TRUE;
- else for (i = s + 1; i < e; i++) {
- if (isCHARNAME_CONT(*i)) continue;
- problematic = TRUE;
- break;
- }
- }
- else {
- /* Similarly for utf8. For invariants can check
- * directly. We accept anything above the latin1
- * range because it is immaterial to Perl if it is
- * correct or not, and is expensive to check. But
- * it is fairly easy in the latin1 range to convert
- * the variants into a single character and check
- * those */
- if (UTF8_IS_INVARIANT(*i)) {
- if (! isALPHAU(*i)) problematic = TRUE;
- } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
- if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
- *(i+1)))))
- {
- problematic = TRUE;
- }
- }
- if (! problematic) for (i = s + UTF8SKIP(s);
- i < e;
- i+= UTF8SKIP(i))
- {
- if (UTF8_IS_INVARIANT(*i)) {
- if (isCHARNAME_CONT(*i)) continue;
- } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
- continue;
- } else if (isCHARNAME_CONT(
- UNI_TO_NATIVE(
- TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
- {
- continue;
- }
- problematic = TRUE;
- break;
- }
- }
- if (problematic) {
- /* The e-i passed to the final %.*s makes sure that
- * should the trailing NUL be missing that this
- * print won't run off the end of the string */
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
- (int)(i - s + 1), s, (int)(e - i), i + 1);
- }
- }
} /* End \N{NAME} */
#ifdef EBCDIC
if (!dorange)
/* printf-style backslashes, formfeeds, newlines, etc */
case 'b':
- *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+ *d++ = '\b';
break;
case 'n':
- *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+ *d++ = '\n';
break;
case 'r':
- *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+ *d++ = '\r';
break;
case 'f':
- *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+ *d++ = '\f';
break;
case 't':
- *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+ *d++ = '\t';
break;
case 'e':
- *d++ = ASCII_TO_NEED(has_utf8,'\033');
+ *d++ = ASCII_TO_NATIVE('\033');
break;
case 'a':
- *d++ = ASCII_TO_NEED(has_utf8,'\007');
+ *d++ = '\a';
break;
} /* end switch */
#endif
}
else {
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
} /* while loop to process each character */
/* return the substring (via pl_yylval) only if we parsed anything */
if (s > PL_bufptr) {
- if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+ SvREFCNT_inc_simple_void_NN(sv);
+ if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+ && ! PL_parser->lex_re_reparsing)
+ {
const char *const key = PL_lex_inpat ? "qr" : "q";
const STRLEN keylen = PL_lex_inpat ? 2 : 1;
const char *type;
type, typelen);
}
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
- } else
- SvREFCNT_dec(sv);
+ }
+ LEAVE_with_name("scan_const");
return s;
}
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
STATIC int
-S_intuit_more(pTHX_ register char *s)
+S_intuit_more(pTHX_ char *s)
{
dVAR;
/* In a pattern, so maybe we have {n,m}. */
if (*s == '{') {
- if (regcurly(s)) {
+ if (regcurly(s, FALSE)) {
return FALSE;
}
return TRUE;
return FALSE;
else {
/* this is terrifying, and it works */
- int weight = 2; /* let's weigh the evidence */
+ int weight;
char seen[256];
- unsigned char un_char = 255, last_un_char;
const char * const send = strchr(s,']');
+ unsigned char un_char, last_un_char;
char tmpbuf[sizeof PL_tokenbuf * 4];
if (!send) /* has to be an expression */
return TRUE;
+ weight = 2; /* let's weigh the evidence */
- Zero(seen,256,char);
if (*s == '$')
weight -= 3;
else if (isDIGIT(*s)) {
else
weight -= 100;
}
+ Zero(seen,256,char);
+ un_char = 255;
for (; s < send; s++) {
last_un_char = un_char;
un_char = (unsigned char)*s;
case '&':
case '$':
weight -= seen[un_char] * 10;
- if (isALNUM_lazy_if(s+1,UTF)) {
+ if (isWORDCHAR_lazy_if(s+1,UTF)) {
int len;
scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
len = (int)strlen(tmpbuf);
weight -= 5; /* cope with negative subscript */
break;
default:
- if (!isALNUM(last_un_char)
+ if (!isWORDCHAR(last_un_char)
&& !(last_un_char == '$' || last_un_char == '@'
|| last_un_char == '&')
&& isALPHA(*s) && s[1] && isALPHA(s[1])) {
if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
return 0;
if (cv && SvPOK(cv)) {
- const char *proto = CvPROTO(cv);
- if (proto) {
- if (*proto == ';')
- proto++;
- if (*proto == '*')
- return 0;
- }
+ const char *proto = CvPROTO(cv);
+ if (proto) {
+ while (*proto && (isSPACE(*proto) || *proto == ';'))
+ proto++;
+ if (*proto == '*')
+ return 0;
+ }
}
- s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
- /* start is the beginning of the possible filehandle/object,
- * and s is the end of it
- * tmpbuf is a copy of it
- */
if (*start == '$') {
if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
PL_expect = XREF;
return *s == '(' ? FUNCMETH : METHOD;
}
+
+ s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+ /* start is the beginning of the possible filehandle/object,
+ * and s is the end of it
+ * tmpbuf is a copy of it (but with single quotes as double colons)
+ */
+
if (!keyword(tmpbuf, len, 0)) {
if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
len -= 2;
}
STATIC char *
-S_filter_gets(pTHX_ register SV *sv, STRLEN append)
+S_filter_gets(pTHX_ SV *sv, STRLEN append)
{
dVAR;
PL_thiswhite = 0;
PL_thismad = 0;
- /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
- if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
- return S_pending_ident(aTHX);
-
/* previous token ate up our whitespace? */
if (!PL_lasttoke && PL_nextwhite) {
PL_thiswhite = PL_nextwhite;
case FUNC0SUB:
case UNIOPSUB:
case LSTOPSUB:
- case LABEL:
if (pl_yylval.opval)
append_madprops(PL_thismad, pl_yylval.opval, 0);
PL_thismad = 0;
}
break;
+ /* pval */
+ case LABEL:
+ break;
+
case ']':
case '}':
if (PL_faketokens)
force_next(WORD);
}
else if (*s == 'v') {
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
}
else {
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
pl_yylval.ival = is_use;
stitching them into a tree.
Returns:
- PRIVATEREF
+ The type of the next token
Structure:
- if read an identifier
- if we're in a my declaration
- croak if they tried to say my($foo::bar)
- build the ops for a my() declaration
- if it's an access to a my() variable
- are we in a sort block?
- croak if my($a); $a <=> $b
- build ops for access to a my() variable
- if in a dq string, and they've said @foo and we can't find @foo
- croak
- build ops for a bareword
- if we already built the token before, use it.
+ Switch based on the current state:
+ - if we already built the token before, use it
+ - if we have a case modifier in a string, deal with that
+ - handle other cases of interpolation inside a string
+ - scan the next line if we are inside a format
+ In the normal state switch on the next character:
+ - default:
+ if alphabetic, go to key lookup
+ unrecoginized character - croak
+ - 0/4/26: handle end-of-line or EOF
+ - cases for whitespace
+ - \n and #: handle comments and line numbers
+ - various operators, brackets and sigils
+ - numbers
+ - quotes
+ - 'v': vstrings (or go to key lookup)
+ - 'x' repetition operator (or go to key lookup)
+ - other ASCII alphanumerics (key lookup begins here):
+ word before => ?
+ keyword plugin
+ scan built-in keyword (but do nothing with it yet)
+ check for statement label
+ check for lexical subs
+ goto just_a_word if there is one
+ see whether built-in keyword is overridden
+ switch on keyword number:
+ - default: just_a_word:
+ not a built-in keyword; handle bareword lookup
+ disambiguate between method and sub call
+ fall back to bareword
+ - cases for built-in keywords
*/
char *d;
STRLEN len;
bool bof = FALSE;
+ const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
U8 formbrack = 0;
U32 fake_eof = 0;
pv_display(tmp, s, strlen(s), 0, 60));
SvREFCNT_dec(tmp);
} );
- /* check if there's an identifier for us to look at */
- if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
- return REPORT(S_pending_ident(aTHX));
-
- /* no identifier pending identification */
switch (PL_lex_state) {
#ifdef COMMENTARY
PL_lex_allbrackets--;
next_type &= 0xffff;
}
- if (S_is_opval_token(next_type) && pl_yylval.opval)
- pl_yylval.opval->op_savefree = 0; /* release */
- return REPORT(next_type);
+ return REPORT(next_type == 'p' ? pending_ident() : next_type);
}
/* interpolated case modifiers like \L \U, including \Q and \E.
#ifdef PERL_MAD
while (PL_bufptr != PL_bufend &&
PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
- if (!PL_thiswhite)
+ if (PL_madskills) {
+ if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
- sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+ sv_catpvn(PL_thiswhite, PL_bufptr, 2);
+ }
PL_bufptr += 2;
}
#else
s = PL_bufptr + 1;
if (s[1] == '\\' && s[2] == 'E') {
#ifdef PERL_MAD
- if (!PL_thiswhite)
+ if (PL_madskills) {
+ if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
- sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+ sv_catpvn(PL_thiswhite, PL_bufptr, 4);
+ }
#endif
PL_bufptr = s + 3;
PL_lex_state = LEX_INTERPCONCAT;
DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
- PL_lex_dojoin = (*PL_bufptr == '@');
+ /* for /@a/, we leave the joining for the regex engine to do
+ * (unless we're within \Q etc) */
+ PL_lex_dojoin = (*PL_bufptr == '@'
+ && (!PL_lex_inpat || PL_lex_casemods));
PL_lex_state = LEX_INTERPNORMAL;
if (PL_lex_dojoin) {
start_force(PL_curforce);
}
/* Convert (?{...}) and friends to 'do {...}' */
if (PL_lex_inpat && *PL_bufptr == '(') {
- PL_sublex_info.re_eval_start = PL_bufptr;
+ PL_parser->lex_shared->re_eval_start = PL_bufptr;
PL_bufptr += 2;
if (*PL_bufptr != '{')
PL_bufptr++;
re_eval_str. If the here-doc body’s length equals the previous
value of re_eval_start, re_eval_start will now be null. So
check re_eval_str as well. */
- if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
+ if (PL_parser->lex_shared->re_eval_start
+ || PL_parser->lex_shared->re_eval_str) {
SV *sv;
if (*PL_bufptr != ')')
Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
PL_bufptr++;
/* having compiled a (?{..}) expression, return the original
* text too, as a const */
- if (PL_sublex_info.re_eval_str) {
- sv = PL_sublex_info.re_eval_str;
- PL_sublex_info.re_eval_str = NULL;
- SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start);
+ if (PL_parser->lex_shared->re_eval_str) {
+ sv = PL_parser->lex_shared->re_eval_str;
+ PL_parser->lex_shared->re_eval_str = NULL;
+ SvCUR_set(sv,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
SvPV_shrink_to_cur(sv);
}
- else sv = newSVpvn(PL_sublex_info.re_eval_start,
- PL_bufptr - PL_sublex_info.re_eval_start);
+ else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
+ PL_bufptr - PL_parser->lex_shared->re_eval_start);
start_force(PL_curforce);
/* XXX probably need a CURMAD(something) here */
NEXTVAL_NEXTTOKE.opval =
(OP*)newSVOP(OP_CONST, 0,
sv);
force_next(THING);
- PL_sublex_info.re_eval_start = NULL;
+ PL_parser->lex_shared->re_eval_start = NULL;
PL_expect = XTERM;
return REPORT(',');
}
return yylex();
}
+ /* We really do *not* want PL_linestr ever becoming a COW. */
+ assert (!SvIsCOW(PL_linestr));
s = PL_bufptr;
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
+ PL_parser->saw_infix_sigil = 0;
retry:
#ifdef PERL_MAD
#endif
switch (*s) {
default:
- if (isIDFIRST_lazy_if(s,UTF))
+ if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
goto keylookup;
{
SV *dsv = newSVpvs_flags("", SVs_TEMP);
case ' ': case '\t': case '\f': case 013:
#ifdef PERL_MAD
PL_realtokenstart = -1;
- if (!PL_thiswhite)
+ if (PL_madskills) {
+ if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
- sv_catpvn(PL_thiswhite, s, 1);
+ sv_catpvn(PL_thiswhite, s, 1);
+ }
#endif
s++;
goto retry;
incline(s);
}
else {
+ const bool in_comment = *s == '#';
d = s;
while (d < PL_bufend && *d != '\n')
d++;
PL_thiswhite = newSVpvn(s, d - s);
#endif
s = d;
- incline(s);
+ if (in_comment && d == PL_bufend
+ && PL_lex_state == LEX_INTERPNORMAL
+ && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+ && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
+ else incline(s);
}
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_lex_state = LEX_FORMLINE;
s = SKIPSPACE0(s);
}
else {
-/* if (PL_madskills && PL_lex_formbrack) { */
- d = s;
- while (d < PL_bufend && *d != '\n')
- d++;
- if (d < PL_bufend)
- d++;
- else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+ if (PL_madskills) d = s;
+ while (s < PL_bufend && *s != '\n')
+ s++;
+ if (s < PL_bufend)
+ {
+ s++;
+ if (s < PL_bufend)
+ incline(s);
+ }
+ else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
if (PL_madskills && CopLINE(PL_curcop) >= 1) {
if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
sv_setpvs(PL_thiswhite, "");
PL_faketokens = 0;
}
- sv_catpvn(PL_thiswhite, s, d - s);
+ sv_catpvn(PL_thiswhite, d, s - d);
}
- s = d;
-/* }
- *s = '\0';
- PL_bufend = s; */
}
-#else
- *s = '\0';
- PL_bufend = s;
#endif
}
goto retry;
case '-':
- if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
+ if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
I32 ftst = 0;
char tmp;
s++;
if (strnEQ(s,"=>",2)) {
- s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
+ s = force_word(PL_bufptr,WORD,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
OPERATOR('-'); /* unary minus */
}
s++;
s = SKIPSPACE1(s);
if (isIDFIRST_lazy_if(s,UTF)) {
- s = force_word(s,METHOD,FALSE,TRUE,FALSE);
+ s = force_word(s,METHOD,FALSE,TRUE);
TOKEN(ARROW);
}
else if (*s == '$')
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MULTIPLY);
case '%':
PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
TOKEN(0);
++s;
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MODULO);
}
PL_tokenbuf[0] = '%';
if (!PL_tokenbuf[1]) {
PREREF('%');
}
- PL_pending_ident = '%';
+ PL_expect = XOPERATOR;
+ force_ident_maybe_lex('%');
TERM('%');
case '^':
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
TOKEN(0);
s += 2;
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "Smartmatch is experimental");
Eop(OP_SMARTMATCH);
}
s++;
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE,FALSE);
+ d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
}
switch (PL_expect) {
case XTERM:
- if (PL_oldoldbufptr == PL_last_lop)
- PL_lex_brackstack[PL_lex_brackets++] = XTERM;
- else
- PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
+ PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
PL_lex_allbrackets++;
OPERATOR(HASHBRACK);
case XOPERATOR:
d++;
if (*d == '}') {
const char minus = (PL_tokenbuf[0] == '-');
- s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+ s = force_word(s + minus, WORD, FALSE, TRUE);
if (minus)
force_next('-');
}
}
else if (*s == 'q') {
if (++t < PL_bufend
- && (!isALNUM(*t)
+ && (!isWORDCHAR(*t)
|| ((*t == 'q' || *t == 'x') && ++t < PL_bufend
- && !isALNUM(*t))))
+ && !isWORDCHAR(*t))))
{
/* skip q//-like construct */
const char *tmps;
}
else
/* skip plain q word */
- while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+ while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
t += UTF8SKIP(t);
}
- else if (isALNUM_lazy_if(t,UTF)) {
+ else if (isWORDCHAR_lazy_if(t,UTF)) {
t += UTF8SKIP(t);
- while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+ while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
t += UTF8SKIP(t);
}
while (t < PL_bufend && isSPACE(*t))
#endif
return yylex(); /* ignore fake brackets */
}
- if (*s == '-' && s[1] == '>')
+ if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
+ && SvEVALED(PL_lex_repl))
+ PL_lex_state = LEX_INTERPEND;
+ else if (*s == '-' && s[1] == '>')
PL_lex_state = LEX_INTERPENDMAYBE;
else if (*s != '[' && *s != '{')
PL_lex_state = LEX_INTERPEND;
force_next(formbrack ? '.' : '}');
if (formbrack) LEAVE;
#ifdef PERL_MAD
- if (!PL_thistoken)
+ if (PL_madskills && !PL_thistoken)
PL_thistoken = newSVpvs("");
#endif
if (formbrack == 2) { /* means . where arguments were expected */
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
BAop(OP_BIT_AND);
}
- s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
- if (*PL_tokenbuf) {
+ PL_tokenbuf[0] = '&';
+ s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
+ sizeof PL_tokenbuf - 1, TRUE);
+ if (PL_tokenbuf[1]) {
PL_expect = XOPERATOR;
- force_ident(PL_tokenbuf, '&');
+ force_ident_maybe_lex('&');
}
else
PREREF('&');
if (*t == '/' || *t == '?' ||
((*t == 'm' || *t == 's' || *t == 'y')
- && !isALNUM(t[1])) ||
- (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+ && !isWORDCHAR(t[1])) ||
+ (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"!=~ should be !~");
}
s = scan_heredoc(s);
else
s = scan_inputsymbol(s);
- TERM(sublex_start());
+ PL_expect = XOPERATOR;
+ TOKEN(sublex_start());
}
s++;
{
if (!PL_tokenbuf[1])
PREREF(DOLSHARP);
PL_expect = XOPERATOR;
- PL_pending_ident = '#';
+ force_ident_maybe_lex('#');
TOKEN(DOLSHARP);
}
if (ckWARN(WARN_SYNTAX)) {
char *t = s+1;
- while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
+ while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
t++;
if (*t++ == ',') {
PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
if (*t == ';'
&& get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "You need to quote \"%"SVf"\"",
- SVfARG(newSVpvn_flags(tmpbuf, len,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "You need to quote \"%"UTF8f"\"",
+ UTF8fARG(UTF, len, tmpbuf));
}
}
}
PL_expect = XTERM; /* print $fh <<"EOF" */
}
}
- PL_pending_ident = '$';
+ force_ident_maybe_lex('$');
TOKEN('$');
case '@':
if (*s == '[' || *s == '{') {
if (ckWARN(WARN_SYNTAX)) {
const char *t = s + 1;
- while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
+ while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
t += UTF ? UTF8SKIP(t) : 1;
if (*t == '}' || *t == ']') {
t++;
PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
/* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Scalar value %"SVf" better written as $%"SVf,
- SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
- SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
+ "Scalar value %"UTF8f" better written as $%"UTF8f,
+ UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
+ UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
}
}
}
}
- PL_pending_ident = '@';
+ PL_expect = XOPERATOR;
+ force_ident_maybe_lex('@');
TERM('@');
case '/': /* may be division, defined-or, or pattern */
if (PL_oldoldbufptr == PL_last_uni
&& (*PL_last_uni != 's' || s - PL_last_uni < 5
|| memNE(PL_last_uni, "study", 5)
- || isALNUM_lazy_if(PL_last_uni+5,UTF)
+ || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
))
check_uni();
if (*s == '?')
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
s = scan_num(s, &pl_yylval);
TERM(THING);
}
+ else if ((*start == ':' && start[1] == ':')
+ || (PL_expect == XSTATE && *start == ':'))
+ goto keylookup;
+ else if (PL_expect == XSTATE) {
+ d = start;
+ while (d < PL_bufend && isSPACE(*d)) d++;
+ if (*d == ':') goto keylookup;
+ }
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
- else if (!isALPHA(*start) && (PL_expect == XTERM
+ if (!isALPHA(*start) && (PL_expect == XTERM
|| PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
GV *const gv = gv_fetchpvn_flags(s, start - s,
keylookup: {
bool anydelim;
+ bool lex;
I32 tmp;
+ SV *sv;
+ CV *cv;
+ PADOFFSET off;
+ OP *rv2cv_op;
+ lex = FALSE;
orig_keyword = 0;
+ off = 0;
+ sv = NULL;
+ cv = NULL;
gv = NULL;
gvp = NULL;
+ rv2cv_op = NULL;
PL_bufptr = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
/* Is this a word before a => operator? */
if (*d == '=' && d[1] == '>') {
+ fat_arrow:
CLINE;
pl_yylval.opval
= (OP*)newSVOP(OP_CONST, 0,
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
s = d + 1;
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
- newSVpvn_flags(PL_tokenbuf,
- len, UTF ? SVf_UTF8 : 0));
+ pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
+ pl_yylval.pval[len] = '\0';
+ pl_yylval.pval[len+1] = UTF ? 1 : 0;
CLINE;
TOKEN(LABEL);
}
+ /* Check for lexical sub */
+ if (PL_expect != XOPERATOR) {
+ char tmpbuf[sizeof PL_tokenbuf + 1];
+ *tmpbuf = '&';
+ Copy(PL_tokenbuf, tmpbuf+1, len, char);
+ off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ if (off != NOT_IN_PAD) {
+ assert(off); /* we assume this is boolean-true below */
+ if (PAD_COMPNAME_FLAGS_isOUR(off)) {
+ HV * const stash = PAD_COMPNAME_OURSTASH(off);
+ HEK * const stashname = HvNAME_HEK(stash);
+ sv = newSVhek(stashname);
+ sv_catpvs(sv, "::");
+ sv_catpvn_flags(sv, PL_tokenbuf, len,
+ (UTF ? SV_CATUTF8 : SV_CATBYTES));
+ gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
+ SVt_PVCV);
+ off = 0;
+ if (!gv) {
+ sv_free(sv);
+ sv = NULL;
+ goto just_a_word;
+ }
+ }
+ else {
+ rv2cv_op = newOP(OP_PADANY, 0);
+ rv2cv_op->op_targ = off;
+ cv = find_lexical_cv(off);
+ }
+ lex = TRUE;
+ goto just_a_word;
+ }
+ off = 0;
+ }
+
if (tmp < 0) { /* second-class keyword? */
GV *ogv = NULL; /* override (winner) */
GV *hgv = NULL; /* hidden (loser) */
}
}
+ if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
+ && (!anydelim || *s != '#')) {
+ /* no override, and not s### either; skipspace is safe here
+ * check for => on following line */
+ STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
+ STRLEN soff = s - SvPVX(PL_linestr);
+ s = skipspace_flags(s, LEX_NO_INCLINE);
+ if (*s == '=' && s[1] == '>') goto fat_arrow;
+ PL_bufptr = SvPVX(PL_linestr) + bufoff;
+ s = SvPVX(PL_linestr) + soff;
+ }
+
reserved_word:
switch (tmp) {
earlier ':' case doesn't bypass the initialisation. */
if (0) {
just_a_word_zero_gv:
+ sv = NULL;
+ cv = NULL;
gv = NULL;
gvp = NULL;
+ rv2cv_op = NULL;
orig_keyword = 0;
+ lex = 0;
+ off = 0;
}
just_a_word: {
- SV *sv;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
- OP *rv2cv_op;
- CV *cv;
+ const char penultchar =
+ lastchar && PL_bufptr - 2 >= PL_linestart
+ ? PL_bufptr[-2]
+ : 0;
#ifdef PERL_MAD
SV *nextPL_nextwhite = 0;
#endif
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
if (!morelen)
- Perl_croak(aTHX_ "Bad name after %"SVf"%s",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+ Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+ UTF8fARG(UTF, len, PL_tokenbuf),
*s == '\'' ? "'" : "::");
len += morelen;
pkgname = 1;
}
/* Look for a subroutine with this name in current package,
- unless name is "Foo::", in which case Foo is a bareword
+ unless this is a lexical sub, or name is "Foo::",
+ in which case Foo is a bareword
(and a package name). */
if (len > 2 && !PL_madskills &&
if (ckWARN(WARN_BAREWORD)
&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
- "Bareword \"%"SVf"\" refers to nonexistent package",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ "Bareword \"%"UTF8f"\" refers to nonexistent package",
+ UTF8fARG(UTF, len, PL_tokenbuf));
len -= 2;
PL_tokenbuf[len] = '\0';
gv = NULL;
gvp = 0;
}
else {
- if (!gv) {
+ if (!lex && !gv) {
/* Mustn't actually add anything to a symbol table.
But also don't want to "initialise" any placeholder
constants that might already be there into full
/* if we saw a global override before, get the right name */
- sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
+ if (!sv)
+ sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
len ? len : strlen(PL_tokenbuf));
if (gvp) {
SV * const tmp_sv = sv;
if (len)
goto safe_bareword;
+ if (!off)
{
OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
+ cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
}
- cv = rv2cv_op_cv(rv2cv_op, 0);
/* See if it's the indirect object for a list operator. */
if (*s == '=' && s[1] == '>' && !pkgname) {
op_free(rv2cv_op);
CLINE;
+ /* This is our own scalar, created a few lines above,
+ so this is safe. */
+ SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
+ SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
TERM(WORD);
}
d = s + 1;
while (SPACE_OR_TAB(*d))
d++;
- if (*d == ')' && (sv = cv_const_sv(cv))) {
+ if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
s = d + 1;
goto its_constant;
}
}
start_force(PL_curforce);
#endif
- NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
+ NEXTVAL_NEXTTOKE.opval =
+ off ? rv2cv_op : pl_yylval.opval;
PL_expect = XOPERATOR;
#ifdef PERL_MAD
if (PL_madskills) {
PL_thistoken = newSVpvs("");
}
#endif
- op_free(rv2cv_op);
- force_next(WORD);
+ if (off)
+ op_free(pl_yylval.opval), force_next(PRIVATEREF);
+ else op_free(rv2cv_op), force_next(WORD);
pl_yylval.ival = 0;
TOKEN('&');
}
/* Not a method, so call it a subroutine (if defined) */
if (cv) {
- if (lastchar == '-') {
- const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+ if (lastchar == '-' && penultchar != '-') {
+ const STRLEN l = len ? len : strlen(PL_tokenbuf);
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+ UTF8fARG(UTF, l, PL_tokenbuf),
+ UTF8fARG(UTF, l, PL_tokenbuf));
}
/* Check for a constant sub */
- if ((sv = cv_const_sv(cv))) {
+ if ((sv = cv_const_sv_or_av(cv))) {
its_constant:
op_free(rv2cv_op);
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
- pl_yylval.opval->op_private = OPpCONST_FOLDED;
- pl_yylval.opval->op_flags |= OPf_SPECIAL;
+ if (SvTYPE(sv) == SVt_PVAV)
+ pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+ pl_yylval.opval);
+ else {
+ pl_yylval.opval->op_private = OPpCONST_FOLDED;
+ pl_yylval.opval->op_folded = 1;
+ pl_yylval.opval->op_flags |= OPf_SPECIAL;
+ }
TOKEN(WORD);
}
op_free(pl_yylval.opval);
- pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval =
+ off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
STRLEN protolen = CvPROTOLEN(cv);
const char *proto = CvPROTO(cv);
bool optional;
+ proto = S_strip_spaces(aTHX_ proto, &protolen);
if (!protolen)
TERM(FUNC0SUB);
if ((optional = *proto == ';'))
curmad('X', PL_thistoken);
PL_thistoken = newSVpvs("");
}
- force_next(WORD);
+ force_next(off ? PRIVATEREF : WORD);
if (!PL_lex_allbrackets &&
PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
SVt_PVCV);
op_free(pl_yylval.opval);
- pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval =
+ off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
PL_nextwhite = nextPL_nextwhite;
curmad('X', PL_thistoken);
PL_thistoken = newSVpvs("");
- force_next(WORD);
+ force_next(off ? PRIVATEREF : WORD);
if (!PL_lex_allbrackets &&
PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
#else
NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
- force_next(WORD);
+ force_next(off ? PRIVATEREF : WORD);
if (!PL_lex_allbrackets &&
PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
op_free(rv2cv_op);
safe_bareword:
- if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+ && saw_infix_sigil) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%"SVf,
- lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
- strlen(PL_tokenbuf),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "Operator or semicolon missing before %c%"UTF8f,
+ lastchar,
+ UTF8fARG(UTF, strlen(PL_tokenbuf),
+ PL_tokenbuf));
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
"Ambiguous use of %c resolved as operator %c",
lastchar, lastchar);
case KEY___END__: {
GV *gv;
if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
- const char *pname = "main";
- STRLEN plen = 4;
- U32 putf8 = 0;
- if (PL_tokenbuf[2] == 'D')
- {
- HV * const stash =
- PL_curstash ? PL_curstash : PL_defstash;
- pname = HvNAME_get(stash);
- plen = HvNAMELEN (stash);
- if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
- }
- gv = gv_fetchpvn_flags(
- Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
- plen+6, GV_ADD|putf8, SVt_PVIO
- );
+ HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+ ? PL_curstash
+ : PL_defstash;
+ gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+ if (!isGV(gv))
+ gv_init(gv,stash,"DATA",4,0);
GvMULTI_on(gv);
if (!GvIO(gv))
GvIOp(gv) = newIO();
goto just_a_word;
}
if (!tmp)
- Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+ UTF8fARG(UTF, len, PL_tokenbuf));
if (tmp < 0)
tmp = -tmp;
else if (tmp == KEY_require || tmp == KEY_do
if (*s == '{')
PRETERMBLOCK(DO);
if (*s != '\'') {
- d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
- if (len) {
+ *PL_tokenbuf = '&';
+ d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
+ 1, &len);
+ if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
d = SKIPSPACE1(d);
- if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ if (*d == '(') {
+ force_ident_maybe_lex('&');
+ s = d;
+ }
}
}
if (orig_keyword == KEY_do) {
UNI(OP_DBMCLOSE);
case KEY_dump:
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ PL_expect = XOPERATOR;
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_DUMP);
case KEY_else:
LOP(OP_GREPSTART, XREF);
case KEY_goto:
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ PL_expect = XOPERATOR;
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_GOTO);
case KEY_gmtime:
case KEY_given:
pl_yylval.ival = CopLINE(PL_curcop);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "given is experimental");
OPERATOR(GIVEN);
case KEY_glob:
LOP(OP_KILL,XTERM);
case KEY_last:
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ PL_expect = XOPERATOR;
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_LAST);
case KEY_lc:
#endif
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
+ {
+ if (!FEATURE_LEXSUBS_IS_ENABLED)
+ Perl_croak(aTHX_
+ "Experimental \"%s\" subs not enabled",
+ tmp == KEY_my ? "my" :
+ tmp == KEY_state ? "state" : "our");
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
+ "The lexical_subs feature is experimental");
goto really_sub;
+ }
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
OPERATOR(MY);
case KEY_next:
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ PL_expect = XOPERATOR;
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_NEXT);
case KEY_ne:
case KEY_open:
s = SKIPSPACE1(s);
if (isIDFIRST_lazy_if(s,UTF)) {
- const char *t;
- for (d = s; isALNUM_lazy_if(d,UTF);) {
- d += UTF ? UTF8SKIP(d) : 1;
- if (UTF) {
- while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
- d += UTF ? UTF8SKIP(d) : 1;
- }
- }
- }
+ const char *t;
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+ &len);
for (t=d; isSPACE(*t);)
t++;
if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
&& !(t[0] == ':' && t[1] == ':')
&& !keyword(s, d-s, 0)
) {
- SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0));
Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
- "Precedence problem: open %"SVf" should be open(%"SVf")",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+ UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
}
}
LOP(OP_OPEN,XTERM);
LOP(OP_PACK,XTERM);
case KEY_package:
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = SKIPSPACE1(s);
s = force_strict_version(s);
PL_lex_expect = XBLOCK;
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
case KEY_require:
s = SKIPSPACE1(s);
+ PL_expect = XOPERATOR;
if (isDIGIT(*s)) {
s = force_version(s, FALSE);
}
|| (s = force_version(s, TRUE), *s == 'v'))
{
*PL_tokenbuf = '\0';
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ s = force_word(s,WORD,TRUE,TRUE);
if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
GV_ADD | (UTF ? SVf_UTF8 : 0));
UNI(OP_RESET);
case KEY_redo:
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ PL_expect = XOPERATOR;
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_REDO);
case KEY_rename:
checkcomma(s,PL_tokenbuf,"subroutine name");
s = SKIPSPACE1(s);
PL_expect = XTERM;
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ s = force_word(s,WORD,TRUE,TRUE);
LOP(OP_SORT,XREF);
case KEY_split:
case KEY_sub:
really_sub:
{
- char tmpbuf[sizeof PL_tokenbuf];
- SSize_t tboffset = 0;
+ char * const tmpbuf = PL_tokenbuf + 1;
expectation attrful;
bool have_name, have_proto;
const int key = tmp;
+#ifndef PERL_MAD
+ SV *format_name = NULL;
+#endif
#ifdef PERL_MAD
SV *tmpwhite = 0;
char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
- SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
+ SV *subtoken = PL_madskills
+ ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
+ : NULL;
PL_thistoken = 0;
d = s;
s = SKIPSPACE2(s,tmpwhite);
#else
+ d = s;
s = skipspace(s);
#endif
PL_expect = XBLOCK;
attrful = XATTRBLOCK;
- /* remember buffer pos'n for later force_word */
- tboffset = s - PL_oldbufptr;
- d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+ d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
+ &len);
#ifdef PERL_MAD
if (PL_madskills)
nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
-#endif
- if (memchr(tmpbuf, ':', len))
+#else
+ if (key == KEY_format)
+ format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
+#endif
+ *PL_tokenbuf = '&';
+ if (memchr(tmpbuf, ':', len) || key != KEY_sub
+ || pad_findmy_pvn(
+ PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ ) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
sv_setsv(PL_subname,PL_curstname);
SvUTF8_on(PL_subname);
have_name = TRUE;
-#ifdef PERL_MAD
+#ifdef PERL_MAD
start_force(0);
CURMAD('X', nametoke);
CURMAD('_', tmpwhite);
- (void) force_word(PL_oldbufptr + tboffset, WORD,
- FALSE, TRUE, TRUE);
+ force_ident_maybe_lex('&');
s = SKIPSPACE2(d,tmpwhite);
#else
#endif
}
else {
- if (key == KEY_my)
- Perl_croak(aTHX_ "Missing name in \"my sub\"");
+ if (key == KEY_my || key == KEY_our || key==KEY_state)
+ {
+ *d = '\0';
+ /* diag_listed_as: Missing name in "%s sub" */
+ Perl_croak(aTHX_
+ "Missing name in \"%s\"", PL_bufptr);
+ }
PL_expect = XTERMBLOCK;
attrful = XATTRTERM;
sv_setpvs(PL_subname,"?");
PL_thistoken = subtoken;
s = d;
#else
- if (have_name)
- (void) force_word(PL_oldbufptr + tboffset, WORD,
- FALSE, TRUE, TRUE);
+ if (format_name) {
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.opval
+ = (OP*)newSVOP(OP_CONST,0, format_name);
+ NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
+ force_next(WORD);
+ }
#endif
PREBLOCK(FORMAT);
}
/* Look for a prototype */
if (*s == '(') {
- char *p;
- bool bad_proto = FALSE;
- bool in_brackets = FALSE;
- char greedy_proto = ' ';
- bool proto_after_greedy_proto = FALSE;
- bool must_be_last = FALSE;
- bool underscore = FALSE;
- bool seen_underscore = FALSE;
- const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
- STRLEN tmplen;
-
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
- /* strip spaces and check for bad characters */
- d = SvPV(PL_lex_stuff, tmplen);
- tmp = 0;
- for (p = d; tmplen; tmplen--, ++p) {
- if (!isSPACE(*p)) {
- d[tmp++] = *p;
-
- if (warnillegalproto) {
- if (must_be_last)
- proto_after_greedy_proto = TRUE;
- if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
- bad_proto = TRUE;
- }
- else {
- if ( underscore ) {
- if ( !strchr(";@%", *p) )
- bad_proto = TRUE;
- underscore = FALSE;
- }
- if ( *p == '[' ) {
- in_brackets = TRUE;
- }
- else if ( *p == ']' ) {
- in_brackets = FALSE;
- }
- else if ( (*p == '@' || *p == '%') &&
- ( tmp < 2 || d[tmp-2] != '\\' ) &&
- !in_brackets ) {
- must_be_last = TRUE;
- greedy_proto = *p;
- }
- else if ( *p == '_' ) {
- underscore = seen_underscore = TRUE;
- }
- }
- }
- }
- }
- d[tmp] = '\0';
- if (proto_after_greedy_proto)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Prototype after '%c' for %"SVf" : %s",
- greedy_proto, SVfARG(PL_subname), d);
- if (bad_proto) {
- SV *dsv = newSVpvs_flags("", SVs_TEMP);
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character %sin prototype for %"SVf" : %s",
- seen_underscore ? "after '_' " : "",
- SVfARG(PL_subname),
- SvUTF8(PL_lex_stuff)
- ? sv_uni_display(dsv,
- newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
- tmp,
- UNI_DISPLAY_ISPRINT)
- : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
- PERL_PV_ESCAPE_NONASCII));
- }
- SvCUR_set(PL_lex_stuff, tmp);
+ (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
have_proto = TRUE;
#ifdef PERL_MAD
force_next(0);
PL_thistoken = subtoken;
+ PERL_UNUSED_VAR(have_proto);
#else
if (have_proto) {
NEXTVAL_NEXTTOKE.opval =
TOKEN(ANONSUB);
}
#ifndef PERL_MAD
- (void) force_word(PL_oldbufptr + tboffset, WORD,
- FALSE, TRUE, TRUE);
+ force_ident_maybe_lex('&');
#endif
- if (key == KEY_my)
- TOKEN(MYSUB);
TOKEN(SUB);
}
LOP(OP_SYSWRITE,XTERM);
case KEY_tr:
+ case KEY_y:
s = scan_trans(s);
TERM(sublex_start());
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "when is experimental");
OPERATOR(WHEN);
case KEY_while:
return REPORT(0);
pl_yylval.ival = OP_XOR;
OPERATOR(OROP);
-
- case KEY_y:
- s = scan_trans(s);
- TERM(sublex_start());
}
}}
}
#pragma segment Main
#endif
+/*
+ S_pending_ident
+
+ Looks up an identifier in the pad or in a package
+
+ Returns:
+ PRIVATEREF if this is a lexical name.
+ WORD if this belongs to a package.
+
+ Structure:
+ if we're in a my declaration
+ croak if they tried to say my($foo::bar)
+ build the ops for a my() declaration
+ if it's an access to a my() variable
+ build ops for access to a my() variable
+ if in a dq string, and they've said @foo and we can't find @foo
+ warn
+ build ops for a bareword
+*/
+
static int
S_pending_ident(pTHX)
{
dVAR;
PADOFFSET tmp = 0;
- /* pit holds the identifier we read and pending_ident is reset */
- char pit = PL_pending_ident;
+ const char pit = (char)pl_yylval.ival;
const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
/* All routes through this function want to know if there is a colon. */
const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
- PL_pending_ident = 0;
- /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Pending identifier '%s'\n", PL_tokenbuf); });
pl_yylval.opval = newOP(OP_PADANY, 0);
pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
UTF ? SVf_UTF8 : 0);
- return PRIVATEREF;
+ return PRIVATEREF;
}
}
sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
- gv_fetchsv(sym,
+ if (pit != '&')
+ gv_fetchsv(sym,
(PL_in_eval
? (GV_ADDMULTI | GV_ADDINEVAL)
: GV_ADDMULTI
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %"SVf" in string",
- SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
- SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+ "Possible unintended interpolation of %"UTF8f
+ " in string",
+ UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
}
}
/* build ops for a bareword */
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ newSVpvn_flags(PL_tokenbuf + 1,
tokenbuf_len - 1,
UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
- gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
+ if (pit != '&')
+ gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
(PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
if (isIDFIRST_lazy_if(s,UTF)) {
const char * const w = s;
s += UTF ? UTF8SKIP(s) : 1;
- while (isALNUM_lazy_if(s,UTF))
+ while (isWORDCHAR_lazy_if(s,UTF))
s += UTF ? UTF8SKIP(s) : 1;
while (s < PL_bufend && isSPACE(*s))
s++;
}
}
-/* Either returns sv, or mortalizes sv and returns a new SV*.
+/* S_new_constant(): do any overload::constant lookup.
+
+ Either returns sv, or mortalizes/frees sv and returns a new SV*.
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
- and type is used with error messages only. */
+ and <type> is used with error messages only.
+ <type> is assumed to be well formed UTF-8 */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
dVAR; dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
SV *res;
+ SV *errsv = NULL;
SV **cvp;
SV *cv, *typesv;
const char *why1 = "", *why2 = "", *why3 = "";
PERL_ARGS_ASSERT_NEW_CONSTANT;
+ /* We assume that this is true: */
+ if (*key == 'c') { assert (strEQ(key, "charnames")); }
+ assert(type || s);
/* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && strEQ(key,"charnames"))
+ if (PL_error_count > 0 && *key == 'c')
+ {
+ SvREFCNT_dec_NN(sv);
return &PL_sv_undef;
+ }
+ sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
|| ! SvOK(*cvp))
{
- SV *msg;
+ char *msg;
/* Here haven't found what we're looking for. If it is charnames,
* perhaps it needs to be loaded. Try doing that before giving up */
- if (strEQ(key,"charnames")) {
+ if (*key == 'c') {
Perl_load_module(aTHX_
0,
newSVpvs("_charnames"),
}
}
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
- msg = Perl_newSVpvf(aTHX_
- "Constant(%s) unknown", (type ? type: "undef"));
+ msg = Perl_form(aTHX_
+ "Constant(%.*s) unknown",
+ (int)(type ? typelen : len),
+ (type ? type: s));
}
else {
- why1 = "$^H{";
- why2 = key;
- why3 = "} is not defined";
- report:
- msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
- (type ? type: "undef"), why1, why2, why3);
- }
- yyerror(SvPVX_const(msg));
- SvREFCNT_dec(msg);
- return sv;
+ why1 = "$^H{";
+ why2 = key;
+ why3 = "} is not defined";
+ report:
+ if (*key == 'c') {
+ msg = Perl_form(aTHX_
+ /* The +3 is for '\N{'; -4 for that, plus '}' */
+ "Unknown charname '%.*s'", (int)typelen - 4, type + 3
+ );
+ }
+ else {
+ msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
+ (int)(type ? typelen : len),
+ (type ? type: s), why1, why2, why3);
+ }
+ }
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
- sv_2mortal(sv); /* Parent created it permanently */
cv = *cvp;
if (!pv && s)
pv = newSVpvn_flags(s, len, SVs_TEMP);
SPAGAIN ;
/* Check the eval first */
- if (!PL_in_eval && SvTRUE(ERRSV)) {
- sv_catpvs(ERRSV, "Propagated");
- yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
+ if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
+ STRLEN errlen;
+ const char * errstr;
+ sv_catpvs(errsv, "Propagated");
+ errstr = SvPV_const(errsv, errlen);
+ yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
(void)POPs;
- res = SvREFCNT_inc_simple(sv);
+ res = SvREFCNT_inc_simple_NN(sv);
}
else {
res = POPs;
- SvREFCNT_inc_simple_void(res);
+ SvREFCNT_inc_simple_void_NN(res);
}
PUTBACK ;
why2 = key;
why3 = "}} did not return a defined value";
sv = res;
+ (void)sv_2mortal(sv);
goto report;
}
return res;
}
+PERL_STATIC_INLINE void
+S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
+ dVAR;
+ PERL_ARGS_ASSERT_PARSE_IDENT;
+
+ for (;;) {
+ if (*d >= e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+ /* The UTF-8 case must come first, otherwise things
+ * like c\N{COMBINING TILDE} would start failing, as the
+ * isWORDCHAR_A case below would gobble the 'c' up.
+ */
+
+ char *t = *s + UTF8SKIP(*s);
+ while (isIDCONT_utf8((U8*)t))
+ t += UTF8SKIP(t);
+ if (*d + (t - *s) > e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ Copy(*s, *d, t - *s, char);
+ *d += t - *s;
+ *s = t;
+ }
+ else if ( isWORDCHAR_A(**s) ) {
+ do {
+ *(*d)++ = *(*s)++;
+ } while isWORDCHAR_A(**s);
+ }
+ else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+ *(*d)++ = ':';
+ *(*d)++ = ':';
+ (*s)++;
+ }
+ else if (allow_package && **s == ':' && (*s)[1] == ':'
+ /* Disallow things like Foo::$bar. For the curious, this is
+ * the code path that triggers the "Bad name after" warning
+ * when looking for barewords.
+ */
+ && (*s)[2] != '$') {
+ *(*d)++ = *(*s)++;
+ *(*d)++ = *(*s)++;
+ }
+ else
+ break;
+ }
+ return;
+}
+
/* Returns a NUL terminated string, with the length of the string written to
*slp
*/
STATIC char *
-S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
dVAR;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ bool is_utf8 = cBOOL(UTF);
PERL_ARGS_ASSERT_SCAN_WORD;
- for (;;) {
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
- *d++ = *s++;
- else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
- *d++ = ':';
- *d++ = ':';
- s++;
- }
- else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
- *d++ = *s++;
- *d++ = *s++;
- }
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- size_t len;
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- len = t - s;
- if (d + len > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, len, char);
- d += len;
- s = t;
- }
- else {
- *d = '\0';
- *slp = d - dest;
- return s;
- }
- }
+ parse_ident(&s, &d, e, allow_package, is_utf8);
+ *d = '\0';
+ *slp = d - dest;
+ return s;
}
STATIC char *
-S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
{
dVAR;
char *bracket = NULL;
char funny = *s++;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ bool is_utf8 = cBOOL(UTF);
PERL_ARGS_ASSERT_SCAN_IDENT;
if (isDIGIT(*s)) {
while (isDIGIT(*s)) {
if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
+ Perl_croak(aTHX_ "%s", ident_too_long);
*d++ = *s++;
}
}
else {
- for (;;) {
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s)) /* UTF handled below */
- *d++ = *s++;
- else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
- *d++ = ':';
- *d++ = ':';
- s++;
- }
- else if (*s == ':' && s[1] == ':') {
- *d++ = *s++;
- *d++ = *s++;
- }
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- if (d + (t - s) > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, t - s, char);
- d += t - s;
- s = t;
- }
- else
- break;
- }
+ parse_ident(&s, &d, e, 1, is_utf8);
}
*d = '\0';
d = dest;
if (*d) {
+ /* Either a digit variable, or parse_ident() found an identifier
+ (anything valid as a bareword), so job done and return. */
if (PL_lex_state != LEX_NORMAL)
PL_lex_state = LEX_INTERPENDMAYBE;
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+ (isIDFIRST_lazy_if(s+1,is_utf8)
+ || isDIGIT_A((U8)s[1])
+ || s[1] == '$'
+ || s[1] == '{'
+ || strnEQ(s+1,"::",2)) )
{
+ /* Dereferencing a value in a scalar variable.
+ The alternatives are different syntaxes for a scalar variable.
+ Using ' as a leading package separator isn't allowed. :: is. */
return s;
}
+ /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
if (*s == '{') {
bracket = s;
s++;
+ while (s < send && SPACE_OR_TAB(*s))
+ s++;
}
- if (s < send) {
- if (UTF) {
+
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
+ || isCNTRL_A((U8)(d)) \
+ || isDIGIT_A((U8)(d)) \
+ || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+ if (s < send
+ && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ {
+ if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
d[skip] = '\0';
d[1] = '\0';
}
}
+ /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
if (*d == '^' && *s && isCONTROLVAR(*s)) {
*d = toCTRL(*s);
s++;
}
+ /* Warn about ambiguous code after unary operators if {...} notation isn't
+ used. There's no difference in ambiguity; it's merely a heuristic
+ about when not to warn. */
else if (ck_uni && !bracket)
check_uni();
if (bracket) {
- if (isSPACE(s[-1])) {
- while (s < send) {
- const char ch = *s++;
- if (!SPACE_OR_TAB(ch)) {
- *d = ch;
- break;
- }
- }
- }
- if (isIDFIRST_lazy_if(d,UTF)) {
- d += UTF8SKIP(d);
- if (UTF) {
- char *end = s;
- while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
- end += UTF8SKIP(end);
- while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
- end += UTF8SKIP(end);
- }
- Copy(s, d, end - s, char);
- d += end - s;
- s = end;
- }
- else {
- while ((isALNUM(*s) || *s == ':') && d < e)
- *d++ = *s++;
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- }
+ /* If we were processing {...} notation then... */
+ if (isIDFIRST_lazy_if(d,is_utf8)) {
+ /* if it starts as a valid identifier, assume that it is one.
+ (the later check for } being at the expected point will trap
+ cases where this doesn't pan out.) */
+ d += is_utf8 ? UTF8SKIP(d) : 1;
+ parse_ident(&s, &d, e, 1, is_utf8);
*d = '\0';
while (s < send && SPACE_OR_TAB(*s))
s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+ /* ${foo[0]} and ${foo{bar}} notation. */
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
const char * const brack =
(const char *)
}
/* Handle extended ${^Foo} variables
* 1999-02-27 mjd-perl-patch@plover.com */
- else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
- && isALNUM(*s))
+ else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+ && isWORDCHAR(*s))
{
d++;
- while (isALNUM(*s) && d < e) {
+ while (isWORDCHAR(*s) && d < e) {
*d++ = *s++;
}
if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
+ Perl_croak(aTHX_ "%s", ident_too_long);
*d = '\0';
}
+
+ while (s < send && SPACE_OR_TAB(*s))
+ s++;
+
+ /* Expect to find a closing } after consuming any trailing whitespace.
+ */
if (*s == '}') {
s++;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
if (PL_lex_state == LEX_NORMAL) {
if (ckWARN(WARN_AMBIGUOUS) &&
(keyword(dest, d - dest, 0)
- || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
+ || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
{
SV *tmp = newSVpvn_flags( dest, d - dest,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
+ SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
if (funny == '#')
funny = '@';
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
}
}
else {
+ /* Didn't find the closing } at the point we expected, so restore
+ state such that the next thing to process is the opening { and */
s = bracket; /* let the parser handle it */
*dest = '\0';
}
STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
if ( charlen != 1 || ! strchr(valid_flags, c) ) {
- if (isALNUM_lazy_if(*s, UTF)) {
+ if (isWORDCHAR_lazy_if(*s, UTF)) {
yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
UTF ? SVf_UTF8 : 0);
(*s) += charlen;
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
+ char *s;
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
- /* this was only needed for the initial scan_str; set it to false
- * so that any (?{}) code blocks etc are parsed normally */
- PL_reg_state.re_reparsing = FALSE;
+ s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
+ TRUE /* look for escaped bracketed metas */ );
+
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE,
+ TRUE /* look for escaped bracketed metas */ );
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
}
sv_catpvs(repl, "{");
sv_catsv(repl, PL_sublex_info.repl);
- if (strchr(SvPVX(PL_sublex_info.repl), '#'))
- sv_catpvs(repl, "\n");
sv_catpvs(repl, "}");
SvEVALED_on(repl);
SvREFCNT_dec(PL_sublex_info.repl);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
a whole string being evalled, or the contents of the current quote-
like operator.
- The three methods are:
- - Steal lines from the input stream (stream)
- - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
- - Peek at the PL_linestr of outer lexing scopes (peek)
-
- They are used in these cases:
- file scope or filtered eval stream
- string eval linestr
- multiline quoted construct linestr
- single-line quoted construct in file stream
- single-line quoted construct in eval or quote peek
+ The two basic methods are:
+ - Steal lines from the input stream
+ - Scan the heredoc in PL_linestr and remove it therefrom
- Single-line also applies to heredocs that begin on the last line of a
- quote-like operator.
+ In a file scope or filtered eval, the first method is used; in a
+ string eval, the second.
- Peeking within a quote also involves falling back to the stream method,
- if the outer quote-like operators are all on one line (or the heredoc
- marker is on the last line).
+ In a quote-like operator, we have to choose between the two,
+ depending on where we can find a newline. We peek into outer lex-
+ ing scopes until we find one with a newline in it. If we reach the
+ outermost lexing scope and it is a file, we use the stream method.
+ Otherwise it is treated as an eval.
*/
STATIC char *
-S_scan_heredoc(pTHX_ register char *s)
+S_scan_heredoc(pTHX_ char *s)
{
dVAR;
- SV *herewas;
I32 op_type = OP_SCALAR;
I32 len;
SV *tmpstr;
char term;
- const char *found_newline = 0;
char *d;
char *e;
char *peek;
const bool infile = PL_rsfp || PL_parser->filtered;
+ LEXSHARED *shared = PL_parser->lex_shared;
#ifdef PERL_MAD
I32 stuffstart = s - SvPVX(PL_linestr);
char *tstart;
s++, term = '\'';
else
term = '"';
- if (!isALNUM_lazy_if(s,UTF))
+ if (!isWORDCHAR_lazy_if(s,UTF))
deprecate("bare << to mean <<\"\"");
- for (; isALNUM_lazy_if(s,UTF); s++) {
+ for (; isWORDCHAR_lazy_if(s,UTF); s++) {
if (d < e)
*d++ = *s;
}
s = olds;
}
#endif
- if ((infile && !PL_lex_inwhat)
- || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
- herewas = newSVpvn(s,PL_bufend-s);
- }
- else {
-#ifdef PERL_MAD
- herewas = newSVpvn(s-1,found_newline-s+1);
-#else
- s--;
- herewas = newSVpvn(s,found_newline-s);
-#endif
- }
#ifdef PERL_MAD
if (PL_madskills) {
tstart = SvPVX(PL_linestr) + stuffstart;
else
PL_thisstuff = newSVpvn(tstart, s - tstart);
}
-#endif
- s += SvCUR(herewas);
-#ifdef PERL_MAD
stuffstart = s - SvPVX(PL_linestr);
-
- if (found_newline)
- s--;
#endif
tmpstr = newSV_type(SVt_PVIV);
SvIV_set(tmpstr, '\\');
}
- CLINE;
- PL_multi_start = CopLINE(PL_curcop);
+ PL_multi_start = CopLINE(PL_curcop) + 1;
PL_multi_open = PL_multi_close = '<';
- if (PL_lex_inwhat && !found_newline) {
- /* Peek into the line buffer of the parent lexing scope, going up
- as many levels as necessary to find one with a newline after
- bufptr. See the comments in sublex_push for how IVX and NVX
- are abused.
- */
- SV *linestr = NUM2PTR(SV *, SvNVX(PL_linestr));
- char *bufptr = PL_sublex_info.super_bufptr;
- char *bufend = SvEND(linestr);
- char * const olds = s - SvCUR(herewas);
- char * const real_olds = s;
- if (!bufptr) {
- s = real_olds;
- goto streaming;
- }
- while (!(s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr))){
- if (SvIVX(linestr)) {
- bufptr = INT2PTR(char *, SvIVX(linestr));
- linestr = NUM2PTR(SV *, SvNVX(linestr));
- bufend = SvEND(linestr);
- }
- else if (infile) {
- s = real_olds;
+ /* inside a string eval or quote-like operator */
+ if (!infile || PL_lex_inwhat) {
+ SV *linestr;
+ char *bufend;
+ char * const olds = s;
+ PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ /* These two fields are not set until an inner lexing scope is
+ entered. But we need them set here. */
+ shared->ls_bufptr = s;
+ shared->ls_linestr = PL_linestr;
+ if (PL_lex_inwhat)
+ /* Look for a newline. If the current buffer does not have one,
+ peek into the line buffer of the parent lexing scope, going
+ up as many levels as necessary to find one with a newline
+ after bufptr.
+ */
+ while (!(s = (char *)memchr(
+ (void *)shared->ls_bufptr, '\n',
+ SvEND(shared->ls_linestr)-shared->ls_bufptr
+ ))) {
+ shared = shared->ls_prev;
+ /* shared is only null if we have gone beyond the outermost
+ lexing scope. In a file, we will have broken out of the
+ loop in the previous iteration. In an eval, the string buf-
+ fer ends with "\n;", so the while condition above will have
+ evaluated to false. So shared can never be null. */
+ assert(shared);
+ /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+ most lexing scope. In a file, shared->ls_linestr at that
+ level is just one line, so there is no body to steal. */
+ if (infile && !shared->ls_prev) {
+ s = olds;
goto streaming;
}
- else {
- s = bufend;
- break;
- }
- }
- d = s;
- while (s < bufend &&
- (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
- if (*s++ == '\n')
- ++PL_parser->herelines;
- }
- if (s >= bufend) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf + 1);
+ }
+ else { /* eval */
+ s = (char*)memchr((void*)s, '\n', PL_bufend - s);
+ assert(s);
}
- sv_setpvn(herewas,bufptr,d-bufptr+1);
- sv_setpvn(tmpstr,d+1,s-d);
- s += len - 1;
- sv_catpvn(herewas,s,bufend-s);
- Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
- SvCUR_set(linestr,
- bufptr-SvPVX_const(linestr)
- + SvCUR(herewas));
-
- s = olds;
- goto retval;
- }
- else if (!infile || found_newline) {
- char * const olds = s - SvCUR(herewas);
+ linestr = shared->ls_linestr;
+ bufend = SvEND(linestr);
d = s;
- while (s < PL_bufend &&
- (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
+ while (s < bufend - len + 1 &&
+ memNE(s,PL_tokenbuf,len) ) {
if (*s++ == '\n')
- ++PL_parser->herelines;
+ ++shared->herelines;
}
- if (s >= PL_bufend) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf + 1);
+ if (s >= bufend - len + 1) {
+ goto interminable;
}
sv_setpvn(tmpstr,d+1,s-d);
#ifdef PERL_MAD
}
#endif
s += len - 1;
- PL_parser->herelines++; /* the preceding stmt passes a newline */
+ /* the preceding stmt passes a newline */
+ shared->herelines++;
/* s now points to the newline after the heredoc terminator.
d points to the newline before the body of the heredoc.
*/
+
+ /* We are going to modify linestr in place here, so set
+ aside copies of the string if necessary for re-evals or
+ (caller $n)[6]. */
/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
- check PL_sublex_info.re_eval_str. */
- if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
+ check shared->re_eval_str. */
+ if (shared->re_eval_start || shared->re_eval_str) {
/* Set aside the rest of the regexp */
- if (!PL_sublex_info.re_eval_str)
- PL_sublex_info.re_eval_str =
- newSVpvn(PL_sublex_info.re_eval_start,
- PL_bufend - PL_sublex_info.re_eval_start);
- PL_sublex_info.re_eval_start -= s-d;
+ if (!shared->re_eval_str)
+ shared->re_eval_str =
+ newSVpvn(shared->re_eval_start,
+ bufend - shared->re_eval_start);
+ shared->re_eval_start -= s-d;
+ }
+ if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
+ CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
+ cx->blk_eval.cur_text == linestr)
+ {
+ cx->blk_eval.cur_text = newSVsv(linestr);
+ SvSCREAM_on(cx->blk_eval.cur_text);
}
/* Copy everything from s onwards back to d. */
- Move(s,d,PL_bufend-s + 1,char);
- SvCUR_set(PL_linestr, SvCUR(PL_linestr) - (s-d));
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ Move(s,d,bufend-s + 1,char);
+ SvCUR_set(linestr, SvCUR(linestr) - (s-d));
+ /* Setting PL_bufend only applies when we have not dug deeper
+ into other scopes, because sublex_done sets PL_bufend to
+ SvEND(PL_linestr). */
+ if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
s = olds;
}
else
- sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
- streaming:
- term = PL_tokenbuf[1];
- len--;
- while (s >= PL_bufend) { /* multiple line string? */
+ {
+ SV *linestr_save;
+ streaming:
+ sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
+ linestr_save = PL_linestr; /* must restore this afterwards */
+ d = s; /* and this */
+ PL_linestr = newSVpvs("");
+ PL_bufend = SvPVX(PL_linestr);
+ while (1) {
#ifdef PERL_MAD
if (PL_madskills) {
tstart = SvPVX(PL_linestr) + stuffstart;
PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
}
#endif
- PL_bufptr = s;
- CopLINE_set(PL_curcop, PL_multi_start + PL_parser->herelines + 1);
+ PL_bufptr = PL_bufend;
+ CopLINE_set(PL_curcop,
+ PL_multi_start + shared->herelines);
if (!lex_next_chunk(LEX_NO_TERM)
&& (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf + 1);
+ SvREFCNT_dec(linestr_save);
+ goto interminable;
}
- CopLINE_set(PL_curcop, (line_t)PL_multi_start);
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
- lex_grow_linestr(SvCUR(PL_linestr) + 2);
+ s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+ /* ^That should be enough to avoid this needing to grow: */
sv_catpvs(PL_linestr, "\n\0");
+ assert(s == SvPVX(PL_linestr));
+ PL_bufend = SvEND(PL_linestr);
}
s = PL_bufptr;
#ifdef PERL_MAD
stuffstart = s - SvPVX(PL_linestr);
#endif
- PL_parser->herelines++;
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ shared->herelines++;
PL_last_lop = PL_last_uni = NULL;
#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
PL_bufend[-1] = '\n';
#endif
if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
- STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
- *(SvPVX(PL_linestr) + off ) = ' ';
- lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
- sv_catsv(PL_linestr,herewas);
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
+ s = d;
+ break;
}
else {
- s = PL_bufend;
sv_catsv(tmpstr,PL_linestr);
}
+ }
}
- s++;
-retval:
PL_multi_end = CopLINE(PL_curcop);
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
SvPV_shrink_to_cur(tmpstr);
}
- SvREFCNT_dec(herewas);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
return s;
+
+ interminable:
+ SvREFCNT_dec(tmpstr);
+ CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
+ missingterm(PL_tokenbuf + 1);
}
/* scan_inputsymbol
if (*d == '$' && d[1]) d++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
- while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+ while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
d += UTF ? UTF8SKIP(d) : 1;
/* If we've tried to read what we allow filehandles to look like, and
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
/* scan_str
- takes: start position in buffer
- keep_quoted preserve \ on the embedded delimiter(s)
- keep_delims preserve the delimiters around the string
- re_reparse compiling a run-time /(?{})/:
- collapse // to /, and skip encoding src
+ takes:
+ start position in buffer
+ keep_quoted preserve \ on the embedded delimiter(s)
+ keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
+ deprecate_escaped_meta issue a deprecation warning for cer-
+ tain paired metacharacters that appear
+ escaped within it
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
+ bool deprecate_escaped_meta
+ )
{
dVAR;
- SV *sv; /* scalar value: string */
- const char *tmps; /* temp string, used for delimiter matching */
+ SV *sv; /* scalar value: string */
+ const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
char term; /* terminating character */
char *to; /* current position in the sv's data */
- I32 brackets = 1; /* bracket nesting level */
- bool has_utf8 = FALSE; /* is there any utf8 content? */
- I32 termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES]; /* terminating string */
- STRLEN termlen; /* length of terminating string */
- int last_off = 0; /* last position for nesting bracket */
+ I32 brackets = 1; /* bracket nesting level */
+ bool has_utf8 = FALSE; /* is there any utf8 content? */
+ I32 termcode; /* terminating char. code */
+ U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+ STRLEN termlen; /* length of terminating string */
+ int last_off = 0; /* last position for nesting bracket */
+ char *escaped_open = NULL;
#ifdef PERL_MAD
int stuffstart;
char *tstart;
PL_multi_close = term;
+ /* A warning is raised if the input parameter requires it for escaped (by a
+ * backslash) paired metacharacters {} [] and () when the delimiters are
+ * those same characters, and the backslash is ineffective. This doesn't
+ * happen for <>, as they aren't metas. */
+ if (deprecate_escaped_meta
+ && (PL_multi_open == PL_multi_close
+ || ! ckWARN_d(WARN_DEPRECATED)
+ || PL_multi_open == '<'))
+ {
+ deprecate_escaped_meta = FALSE;
+ }
+
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
s += termlen;
#ifdef PERL_MAD
tstart = SvPVX(PL_linestr) + stuffstart;
- if (!PL_thisopen && !keep_delims) {
+ if (PL_madskills && !PL_thisopen && !keep_delims) {
PL_thisopen = newSVpvn(tstart, s - tstart);
stuffstart = s - SvPVX(PL_linestr);
}
int offset = s - SvPVX_const(PL_linestr);
const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
&offset, (char*)termstr, termlen);
- const char * const ns = SvPVX_const(PL_linestr) + offset;
- char * const svlast = SvEND(sv) - 1;
+ const char *ns;
+ char *svlast;
+
+ if (SvIsCOW(PL_linestr)) {
+ STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
+ STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
+ STRLEN last_lop_pos, re_eval_start_pos, s_pos;
+ char *buf = SvPVX(PL_linestr);
+ bufend_pos = PL_parser->bufend - buf;
+ bufptr_pos = PL_parser->bufptr - buf;
+ oldbufptr_pos = PL_parser->oldbufptr - buf;
+ oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+ linestart_pos = PL_parser->linestart - buf;
+ last_uni_pos = PL_parser->last_uni
+ ? PL_parser->last_uni - buf
+ : 0;
+ last_lop_pos = PL_parser->last_lop
+ ? PL_parser->last_lop - buf
+ : 0;
+ re_eval_start_pos =
+ PL_parser->lex_shared->re_eval_start ?
+ PL_parser->lex_shared->re_eval_start - buf : 0;
+ s_pos = s - buf;
+
+ sv_force_normal(PL_linestr);
+
+ buf = SvPVX(PL_linestr);
+ PL_parser->bufend = buf + bufend_pos;
+ PL_parser->bufptr = buf + bufptr_pos;
+ PL_parser->oldbufptr = buf + oldbufptr_pos;
+ PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+ PL_parser->linestart = buf + linestart_pos;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni = buf + last_uni_pos;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop = buf + last_lop_pos;
+ if (PL_parser->lex_shared->re_eval_start)
+ PL_parser->lex_shared->re_eval_start =
+ buf + re_eval_start_pos;
+ s = buf + s_pos;
+ }
+ ns = SvPVX_const(PL_linestr) + offset;
+ svlast = SvEND(sv) - 1;
for (; s < ns; s++) {
if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_quoted &&
((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+ {
s++;
+
+ /* Here, 'deprecate_escaped_meta' is true iff the
+ * delimiters are paired metacharacters, and 's' points
+ * to an occurrence of one of them within the string,
+ * which was preceded by a backslash. If this is a
+ * context where the delimiter is also a metacharacter,
+ * the backslash is useless, and deprecated. () and []
+ * are meta in any context. {} are meta only when
+ * appearing in a quantifier or in things like '\p{'
+ * (but '\\p{' isn't meta). They also aren't meta
+ * unless there is a matching closed, escaped char
+ * later on within the string. If 's' points to an
+ * open, set a flag; if to a close, test that flag, and
+ * raise a warning if it was set */
+
+ if (deprecate_escaped_meta) {
+ if (*s == PL_multi_open) {
+ if (*s != '{') {
+ escaped_open = s;
+ }
+ /* Look for a closing '\}' */
+ else if (regcurly(s, TRUE)) {
+ escaped_open = s;
+ }
+ /* Look for e.g. '\x{' */
+ else if (s - start > 2
+ && _generic_isCC(*(s-2),
+ _CC_BACKSLASH_FOO_LBRACE_IS_META))
+ { /* Exclude '\\x', '\\\\x', etc. */
+ char *lookbehind = s - 4;
+ bool is_meta = TRUE;
+ while (lookbehind >= start
+ && *lookbehind == '\\')
+ {
+ is_meta = ! is_meta;
+ lookbehind--;
+ }
+ if (is_meta) {
+ escaped_open = s;
+ }
+ }
+ }
+ else if (escaped_open) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
+ escaped_open = NULL;
+ }
+ }
+ }
else
*to++ = *s++;
}
SV *sv = NULL; /* place to put the converted number */
bool floatit; /* boolean: int or float? */
const char *lastub = NULL; /* position of last underbar */
- static char const number_too_long[] = "Number too long";
+ static const char* const number_too_long = "Number too long";
PERL_ARGS_ASSERT_SCAN_NUM;
else {
/* check for end of fixed-length buffer */
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
/* if we're ok, copy the character */
*d++ = *s++;
}
for (; isDIGIT(*s) || *s == '_'; s++) {
/* fixed length buffer check */
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
if (*s == '_') {
if (lastub && s == lastub + 1)
Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
while (isDIGIT(*s) || *s == '_') {
if (isDIGIT(*s)) {
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
*d++ = *s++;
}
else {
case 'v':
vstring:
sv = newSV(5); /* preallocate storage space */
+ ENTER_with_name("scan_vstring");
+ SAVEFREESV(sv);
s = scan_vstring(s, PL_bufend, sv);
+ SvREFCNT_inc_simple_void_NN(sv);
+ LEAVE_with_name("scan_vstring");
break;
}
}
STATIC char *
-S_scan_formline(pTHX_ register char *s)
+S_scan_formline(pTHX_ char *s)
{
dVAR;
char *eol;
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
+ CvPADLIST(PL_compcv)->xpadl_outid =
+ PadlistNAMES(CvPADLIST(outsidecv));
return oldsavestack_ix;
}
SV *msg;
SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
int yychar = PL_parser->yychar;
- U32 is_utf8 = flags & SVf_UTF8;
PERL_ARGS_ASSERT_YYERROR_PVN;
else
Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
}
- msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
+ msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
if (context)
- Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
- SVfARG(newSVpvn_flags(context, contlen,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+ UTF8fARG(UTF, contlen, context));
else
Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
else
qerror(msg);
if (PL_error_count >= 10) {
- if (PL_in_eval && SvCUR(ERRSV))
+ SV * errsv;
+ if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
- SVfARG(ERRSV), OutCopFILE(PL_curcop));
+ SVfARG(errsv), OutCopFILE(PL_curcop));
else
Perl_croak(aTHX_ "%s has too many errors.\n",
OutCopFILE(PL_curcop));
Function must be called like
- sv = newSV(5);
+ sv = sv_2mortal(newSV(5));
s = scan_vstring(s,e,sv);
where s and e are the start and end of the string.
The sv should already be large enough to store the vstring
passed in, for performance reasons.
+This function may croak if fatal warnings are enabled in the
+calling scope, hence the sv_2mortal in the example (to prevent
+a leak). Make sure to do SvREFCNT_inc afterwards if you use
+sv_2mortal.
+
*/
char *
/* Append native character for the rev point */
tmpend = uvchr_to_utf8(tmpbuf, rev);
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
- if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+ if (!NATIVE_IS_INVARIANT(rev))
SvUTF8_on(sv);
if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
s = ++pos;
if (PL_lex_state == LEX_KNOWNEXT) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
- SV *lsv;
+ char * const lpv = pl_yylval.pval;
+ STRLEN llen = strlen(lpv);
PL_parser->yychar = YYEMPTY;
- lsv = newSV_type(SVt_PV);
- sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
- return lsv;
+ return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
} else {
yyunlex();
goto no_label;