#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
-#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
+#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
+#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
-#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv <= '~')
+ else if (isGRAPH(rv))
{
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
if ((char)rv == 'p')
pl_yylval.ival = OP_DORASSIGN;
toketype = ASSIGNOP;
}
- return toketype;
+ return REPORT(toketype);
}
/*
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
- assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+ STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
- s2 += 6, len -= 6;
- if (keyword(s2, len, 0))
+ s2 += 6, len2 -= 6;
+ if (keyword(s2, len2, 0))
return start;
}
if (token == METHOD) {
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+ /* Above-latin1 in string
+ * implies no encoding */
+ |SV_UTF8_NO_ENCODING,
+ UNISKIP(uv) + (STRLEN)(send - s) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
- sv_utf8_upgrade(res);
+ if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
str = SvPV_const(res, len);
}
Copy(str, d, len, char);
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (PL_encoding && !has_utf8) {
- sv_recode_to_utf8(sv, PL_encoding);
+ if (IN_ENCODING && !has_utf8) {
+ sv_recode_to_utf8(sv, _get_encoding());
if (SvUTF8(sv))
has_utf8 = TRUE;
}
Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
- case ' ': case '\t': case '\f': case 013:
+ case ' ': case '\t': case '\f': case '\v':
s++;
goto retry;
case '#':
OPERATOR(HASHBRACK);
}
if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
- /* ${...} or @{...} etc., but not print {...} */
- PL_expect = XTERM;
- break;
+ /* ${...} or @{...} etc., but not print {...}
+ * Skip the disambiguation and treat this as a block.
+ */
+ goto block_expectation;
}
/* This hack serves to disambiguate a pair of curlies
* as being a block or an anon hash. Normally, expectation
|| (*t == '=' && t[1] == '>')))
OPERATOR(HASHBRACK);
if (PL_expect == XREF)
- PL_expect = XTERM;
+ {
+ block_expectation:
+ /* If there is an opening brace or 'sub:', treat it
+ as a term to make ${{...}}{k} and &{sub:attr...}
+ dwim. Otherwise, treat it as a statement, so
+ map {no strict; ...} works.
+ */
+ s = skipspace(s);
+ if (*s == '{') {
+ PL_expect = XTERM;
+ break;
+ }
+ if (strnEQ(s, "sub", 3)) {
+ d = s + 3;
+ d = skipspace(d);
+ if (*d == ':') {
+ PL_expect = XTERM;
+ break;
+ }
+ }
+ PL_expect = XSTATE;
+ }
else {
PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
PL_expect = XSTATE;
break;
}
pl_yylval.ival = CopLINE(PL_curcop);
- if (isSPACE(*s) || *s == '#')
- PL_copline = NOLINE; /* invalidate current command line number */
+ PL_copline = NOLINE; /* invalidate current command line number */
TOKEN(formbrack ? '=' : '{');
case '}':
if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
if (!isALPHA(*start) && (PL_expect == XTERM
- || PL_expect == XSTATE
+ || PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
GV *const gv = gv_fetchpvn_flags(s, start - s,
UTF ? SVf_UTF8 : 0, SVt_PVCV);
char tmpbuf[sizeof PL_tokenbuf + 1];
*tmpbuf = '&';
Copy(PL_tokenbuf, tmpbuf+1, len, char);
- off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, len+1, 0);
if (off != NOT_IN_PAD) {
assert(off); /* we assume this is boolean-true below */
if (PAD_COMPNAME_FLAGS_isOUR(off)) {
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (PL_encoding) {
+ else if (IN_ENCODING) {
SV *name;
dSP;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(PL_encoding);
+ XPUSHs(_get_encoding());
PUTBACK;
call_method("name", G_SCALAR);
SPAGAIN;
}
case KEY___SUB__:
- FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+ FUN0OP(CvCLONE(PL_compcv)
+ ? newOP(OP_RUNCV, 0)
+ : newPVOP(OP_RUNCV,0,NULL));
case KEY_AUTOLOAD:
case KEY_DESTROY:
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
- PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ PL_tokenbuf, len + 1, 0
) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
if (!has_colon) {
if (!PL_in_my)
tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
- UTF ? SVf_UTF8 : 0);
+ 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
char tmpbuf[256];
Copy(w, tmpbuf+1, s - w, char);
*tmpbuf = '&';
- off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
if (off != NOT_IN_PAD) return;
}
Perl_croak(aTHX_ "No comma allowed after %s", what);
/* Is the byte 'd' a legal single character identifier name? 'u' is true
* iff Unicode semantics are to be used. The legal ones are any of:
- * a) ASCII digits
- * b) ASCII punctuation
+ * a) all ASCII characters except:
+ * 1) space-type ones, like \t and SPACE;
+ 2) NUL;
+ * 3) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
* c) When not under Unicode rules, any upper Latin1 character
- * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
- * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
- * the \s ones. */
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
- || isDIGIT_A((U8)(d)) \
- || (!(u) && !isASCII((U8)(d))) \
- || ((((U8)(d)) < 32) \
- && (((((U8)(d)) >= 14) \
- || (((U8)(d)) <= 8 && (d) != 0) \
- || (((U8)(d)) == 13)))) \
- || (((U8)(d)) == toCTRL('?')))
- if (s < PL_bufend
- && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ * d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8((U8*) (s)) \
+ : (isGRAPH_L1(*s) \
+ && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
+ && LIKELY(*(s) != '\0') \
+ && (! is_utf8 \
+ || isASCII_utf8((U8*) (s)) \
+ || isIDFIRST_utf8((U8*) (s))))
+#endif
+ if ((s <= PL_bufend - (is_utf8)
+ ? UTF8SKIP(s)
+ : 1)
+ && VALID_LEN_ONE_IDENT(s, is_utf8))
{
- if ( isCNTRL_A((U8)*s) ) {
- deprecate("literal control characters in variable names");
+ /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
+ * because often it has no graphic representation. (We can't get to
+ * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
+ * test for it.) */
+ if ((is_utf8)
+ ? ! isGRAPH_utf8( (U8*) s)
+ : (! isGRAPH_L1( (U8) *s)
+ || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
+ {
+ /* Split messages for back compat */
+ if (isCNTRL_A( (U8) *s)) {
+ deprecate("literal control characters in variable names");
+ }
+ else {
+ deprecate("literal non-graphic characters in variable names");
+ }
}
if (is_utf8) {
origline + 1 + PL_parser->herelines);
if (!lex_next_chunk(LEX_NO_TERM)
&& (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- SvREFCNT_dec(linestr_save);
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (PL_encoding)
- sv_recode_to_utf8(tmpstr, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (PL_encoding && !UTF && !re_reparse) {
+ if (IN_ENCODING && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+ const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
&offset, (char*)termstr, termlen);
const char *ns;
char *svlast;
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF || re_reparse) {
+ if (!IN_ENCODING || UTF || re_reparse) {
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
}
- if (has_utf8 || (PL_encoding && !re_reparse))
+ if (has_utf8 || (IN_ENCODING && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
if (needargs) {
const char *s2 = s;
while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == 013)
+ || *s2 == '\v')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (PL_encoding)
- sv_recode_to_utf8(stuff, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(stuff, _get_encoding());
}
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
- CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
+ CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid =
- PadlistNAMES(CvPADLIST(outsidecv));
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
PL_in_eval |= EVAL_WARNONLY;
yyerror_pv(s, flags);
- PL_in_eval &= ~EVAL_WARNONLY;
return 0;
}
}
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
- else if (yychar == -2) { /* YYEMPTY */
+ else if (yychar == YYEMPTY) {
if (PL_lex_state == LEX_NORMAL ||
(PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
sv_catpvs(where_sv, "at end of line");
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
+ PL_in_eval &= ~EVAL_WARNONLY;
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
}
else
PL_bufptr = s;
if (d == PL_tokenbuf+1)
return NULL;
- *d = 0;
var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
OPf_MOD | (OPpLVAL_INTRO<<8));
var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(1))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Odd name/value argument "
- "for subroutine"))));
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))));
if (pos != min_arity)
chkop = newLOGOP(OP_AND, 0,
newBINOP(OP_GT, 0,
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(min_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too few arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
if (max_arity != -1) {
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(max_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too many arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
return initops;