# define PL_nextval (PL_parser->nextval)
#endif
-static const char ident_too_long[] = "Identifier too long";
+static const char* const ident_too_long = "Identifier too long";
#ifdef PERL_MAD
# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
* 1999-02-27 mjd-perl-patch@plover.com */
#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
-#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
+#define SPACE_OR_TAB(c) isBLANK_A(c)
/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
if (name)
Perl_sv_catpv(aTHX_ report, name);
else if ((char)rv > ' ' && (char)rv <= '~')
+ {
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+ if ((char)rv == 'p')
+ sv_catpvs(report, " (pending identifier)");
+ }
else if (!rv)
sv_catpvs(report, "EOF");
else
"\t(Missing semicolon on previous line?)\n");
else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
const char *t;
- for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
+ for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
t += UTF ? UTF8SKIP(t) : 1)
NOOP;
if (t < PL_bufptr && isSPACE(*t))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Do you need to predeclare %"SVf"?)\n",
- SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Do you need to predeclare %"UTF8f"?)\n",
+ UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
}
else {
assert(s >= oldbp);
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing operator before %"SVf"?)\n",
- SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Missing operator before %"UTF8f"?)\n",
+ UTF8fARG(UTF, s - oldbp, oldbp));
}
}
PL_bufptr = oldbp;
/* skip space before PL_thistoken */
STATIC char *
-S_skipspace0(pTHX_ register char *s)
+S_skipspace0(pTHX_ char *s)
{
PERL_ARGS_ASSERT_SKIPSPACE0;
/* skip space after PL_thistoken */
STATIC char *
-S_skipspace1(pTHX_ register char *s)
+S_skipspace1(pTHX_ char *s)
{
const char *start = s;
I32 startoff = start - SvPVX(PL_linestr);
}
STATIC char *
-S_skipspace2(pTHX_ register char *s, SV **svp)
+S_skipspace2(pTHX_ char *s, SV **svp)
{
char *start;
const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
*/
STATIC char *
-S_skipspace(pTHX_ register char *s)
+S_skipspace(pTHX_ char *s)
{
#ifdef PERL_MAD
char *start = s;
while (isSPACE(*PL_last_uni))
PL_last_uni++;
s = PL_last_uni;
- while (isALNUM_lazy_if(s,UTF) || *s == '-')
+ while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
s++;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
*/
STATIC char *
-S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
+S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
{
dVAR;
char *s;
start = SKIPSPACE1(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF) ||
- (allow_pack && *s == ':') ||
- (allow_initial_tick && *s == '\'') )
+ (allow_pack && *s == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
- if (check_keyword && keyword(PL_tokenbuf, len, 0))
+ if (check_keyword) {
+ char *s2 = PL_tokenbuf;
+ if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
+ s2 += 6, len -= 6;
+ if (keyword(s2, len, 0))
return start;
+ }
start_force(PL_curforce);
if (PL_madskills)
curmad('X', newSVpvn(start,s-start));
*/
STATIC void
-S_force_ident(pTHX_ register const char *s, int kind)
+S_force_ident(pTHX_ const char *s, int kind)
{
dVAR;
SAVEGENERICPV(PL_lex_brackstack);
SAVEGENERICPV(PL_lex_casestack);
SAVEGENERICPV(PL_parser->lex_shared);
+ SAVEBOOL(PL_parser->lex_re_reparsing);
/* The here-doc parser needs to be able to peek into outer lexing
scopes to find the body of the here-doc. So we put PL_linestr and
else
PL_lex_inpat = NULL;
+ PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
+ PL_in_eval &= ~EVAL_RE_REPARSING;
+
return '(';
}
/* include the <}> */
e - backslash_ptr + 1);
if (! SvPOK(res)) {
+ SvREFCNT_dec_NN(res);
return NULL;
}
* validation. */
table = GvHV(PL_hintgv); /* ^H */
cvp = hv_fetchs(table, "charnames", FALSE);
- cv = *cvp;
- if (((rv = SvRV(cv)) != NULL)
- && ((stash = CvSTASH(rv)) != NULL))
+ if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
+ && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
{
const char * const name = HvNAME(stash);
if strEQ(name, "_charnames") {
if (! isCHARNAME_CONT(*s)) {
goto bad_charname;
}
+ if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "A sequence of multiple spaces in a charnames "
+ "alias definition is deprecated");
+ }
s++;
}
+ if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Trailing white-space in a charnames alias "
+ "definition is deprecated");
+ }
}
else {
/* Similarly for utf8. For invariants can check directly; for other
if (! isCHARNAME_CONT(*s)) {
goto bad_charname;
}
+ if (*s == ' ' && *(s-1) == ' '
+ && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "A sequence of multiple spaces in a charnam"
+ "es alias definition is deprecated");
+ }
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
s += UTF8SKIP(s);
}
}
+ if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Trailing white-space in a charnames alias "
+ "definition is deprecated");
+ }
}
if (SvUTF8(res)) { /* Don't accept malformed input */
In patterns:
expand:
- \N{ABC} => \N{U+41.42.43}
+ \N{FOO} => \N{U+hex_for_character_FOO}
+ (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
pass through:
all other \-char, including \N and \N{ apart from \N{ABC}
* far, plus the length the current construct will occupy, plus room for
* the trailing NUL, plus one byte for every input byte still unscanned */
- UV uv;
+ UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
+ before set */
#ifdef EBCDIC
UV literal_endpoint = 0;
bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
+ /* Protect sv from errors and fatal warnings. */
+ ENTER_with_name("scan_const");
+ SAVEFREESV(sv);
while (s < send || dorange) {
#endif
if (min > max) {
- SvREFCNT_dec(sv);
Perl_croak(aTHX_
"Invalid range \"%c-%c\" in transliteration operator",
(char)min, (char)max);
/* range begins (ignore - as first or last char) */
else if (*s == '-' && s+1 < send && s != start) {
if (didrange) {
- SvREFCNT_dec(sv);
Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
}
if (has_utf8
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
else if (*s == '@' && s[1]) {
- if (isALNUM_lazy_if(s+1,UTF))
+ if (isWORDCHAR_lazy_if(s+1,UTF))
break;
if (strchr(":'{$", s[1]))
break;
else if (PL_lex_inpat
&& (*s != 'N'
|| s[1] != '{'
- || regcurly(s + 1)))
+ || regcurly(s + 1, FALSE)))
{
*d++ = NATIVE_TO_NEED(has_utf8,'\\');
goto default_action;
/* FALL THROUGH */
default:
{
- if ((isALNUMC(*s)))
+ if ((isALPHANUMERIC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
- I32 flags = 0;
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN len = 3;
uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
s += len;
+ if (len < 3 && s < send && isDIGIT(*s)
+ && ckWARN(WARN_MISC))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_MISC),
+ "%s", form_short_octal_warning(s, len));
+ }
}
goto NUM_ESCAPE_INSERT;
/* eg. \o{24} indicates the octal constant \024 */
case 'o':
{
- STRLEN len;
const char* error;
- bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
- s += len;
+ bool valid = grok_bslash_o(&s, &uv, &error,
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ TRUE, /* Output warnings for
+ non-portables */
+ UTF);
if (! valid) {
yyerror(error);
continue;
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
{
- STRLEN len;
const char* error;
- bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
- s += len;
+ bool valid = grok_bslash_x(&s, &uv, &error,
+ TRUE, /* Output warning */
+ FALSE, /* Not strict */
+ TRUE, /* Output warnings for
+ non-portables */
+ UTF);
if (! valid) {
yyerror(error);
continue;
/* return the substring (via pl_yylval) only if we parsed anything */
if (s > PL_bufptr) {
- if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+ SvREFCNT_inc_simple_void_NN(sv);
+ if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+ && ! PL_parser->lex_re_reparsing)
+ {
const char *const key = PL_lex_inpat ? "qr" : "q";
const STRLEN keylen = PL_lex_inpat ? 2 : 1;
const char *type;
type, typelen);
}
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
- } else
- SvREFCNT_dec(sv);
+ }
+ LEAVE_with_name("scan_const");
return s;
}
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
STATIC int
-S_intuit_more(pTHX_ register char *s)
+S_intuit_more(pTHX_ char *s)
{
dVAR;
/* In a pattern, so maybe we have {n,m}. */
if (*s == '{') {
- if (regcurly(s)) {
+ if (regcurly(s, FALSE)) {
return FALSE;
}
return TRUE;
return FALSE;
else {
/* this is terrifying, and it works */
- int weight = 2; /* let's weigh the evidence */
+ int weight;
char seen[256];
- unsigned char un_char = 255, last_un_char;
const char * const send = strchr(s,']');
+ unsigned char un_char, last_un_char;
char tmpbuf[sizeof PL_tokenbuf * 4];
if (!send) /* has to be an expression */
return TRUE;
+ weight = 2; /* let's weigh the evidence */
- Zero(seen,256,char);
if (*s == '$')
weight -= 3;
else if (isDIGIT(*s)) {
else
weight -= 100;
}
+ Zero(seen,256,char);
+ un_char = 255;
for (; s < send; s++) {
last_un_char = un_char;
un_char = (unsigned char)*s;
case '&':
case '$':
weight -= seen[un_char] * 10;
- if (isALNUM_lazy_if(s+1,UTF)) {
+ if (isWORDCHAR_lazy_if(s+1,UTF)) {
int len;
scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
len = (int)strlen(tmpbuf);
weight -= 5; /* cope with negative subscript */
break;
default:
- if (!isALNUM(last_un_char)
+ if (!isWORDCHAR(last_un_char)
&& !(last_un_char == '$' || last_un_char == '@'
|| last_un_char == '&')
&& isALPHA(*s) && s[1] && isALPHA(s[1])) {
if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
return 0;
if (cv && SvPOK(cv)) {
- const char *proto = CvPROTO(cv);
- if (proto) {
- if (*proto == ';')
- proto++;
- if (*proto == '*')
- return 0;
- }
+ const char *proto = CvPROTO(cv);
+ if (proto) {
+ while (*proto && (isSPACE(*proto) || *proto == ';'))
+ proto++;
+ if (*proto == '*')
+ return 0;
+ }
}
- s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
- /* start is the beginning of the possible filehandle/object,
- * and s is the end of it
- * tmpbuf is a copy of it
- */
if (*start == '$') {
if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
PL_expect = XREF;
return *s == '(' ? FUNCMETH : METHOD;
}
+
+ s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
+ /* start is the beginning of the possible filehandle/object,
+ * and s is the end of it
+ * tmpbuf is a copy of it (but with single quotes as double colons)
+ */
+
if (!keyword(tmpbuf, len, 0)) {
if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
len -= 2;
}
STATIC char *
-S_filter_gets(pTHX_ register SV *sv, STRLEN append)
+S_filter_gets(pTHX_ SV *sv, STRLEN append)
{
dVAR;
force_next(WORD);
}
else if (*s == 'v') {
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
}
else {
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
pl_yylval.ival = is_use;
char *d;
STRLEN len;
bool bof = FALSE;
+ const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
U8 formbrack = 0;
U32 fake_eof = 0;
DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
- PL_lex_dojoin = (*PL_bufptr == '@');
+ /* for /@a/, we leave the joining for the regex engine to do
+ * (unless we're within \Q etc) */
+ PL_lex_dojoin = (*PL_bufptr == '@'
+ && (!PL_lex_inpat || PL_lex_casemods));
PL_lex_state = LEX_INTERPNORMAL;
if (PL_lex_dojoin) {
start_force(PL_curforce);
s = PL_bufptr;
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
+ PL_parser->saw_infix_sigil = 0;
retry:
#ifdef PERL_MAD
#endif
switch (*s) {
default:
- if (isIDFIRST_lazy_if(s,UTF))
+ if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
goto keylookup;
{
SV *dsv = newSVpvs_flags("", SVs_TEMP);
}
goto retry;
case '-':
- if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
+ if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
I32 ftst = 0;
char tmp;
s++;
if (strnEQ(s,"=>",2)) {
- s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
+ s = force_word(PL_bufptr,WORD,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
OPERATOR('-'); /* unary minus */
}
s++;
s = SKIPSPACE1(s);
if (isIDFIRST_lazy_if(s,UTF)) {
- s = force_word(s,METHOD,FALSE,TRUE,FALSE);
+ s = force_word(s,METHOD,FALSE,TRUE);
TOKEN(ARROW);
}
else if (*s == '$')
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MULTIPLY);
case '%':
PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
TOKEN(0);
++s;
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MODULO);
}
PL_tokenbuf[0] = '%';
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
TOKEN(0);
s += 2;
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "Smartmatch is experimental");
Eop(OP_SMARTMATCH);
}
s++;
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE,FALSE);
+ d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
d++;
if (*d == '}') {
const char minus = (PL_tokenbuf[0] == '-');
- s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+ s = force_word(s + minus, WORD, FALSE, TRUE);
if (minus)
force_next('-');
}
}
else if (*s == 'q') {
if (++t < PL_bufend
- && (!isALNUM(*t)
+ && (!isWORDCHAR(*t)
|| ((*t == 'q' || *t == 'x') && ++t < PL_bufend
- && !isALNUM(*t))))
+ && !isWORDCHAR(*t))))
{
/* skip q//-like construct */
const char *tmps;
}
else
/* skip plain q word */
- while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+ while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
t += UTF8SKIP(t);
}
- else if (isALNUM_lazy_if(t,UTF)) {
+ else if (isWORDCHAR_lazy_if(t,UTF)) {
t += UTF8SKIP(t);
- while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
+ while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
t += UTF8SKIP(t);
}
while (t < PL_bufend && isSPACE(*t))
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
BAop(OP_BIT_AND);
}
if (*t == '/' || *t == '?' ||
((*t == 'm' || *t == 's' || *t == 'y')
- && !isALNUM(t[1])) ||
- (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+ && !isWORDCHAR(t[1])) ||
+ (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"!=~ should be !~");
}
if (ckWARN(WARN_SYNTAX)) {
char *t = s+1;
- while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
+ while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
t++;
if (*t++ == ',') {
PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
if (*t == ';'
&& get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "You need to quote \"%"SVf"\"",
- SVfARG(newSVpvn_flags(tmpbuf, len,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "You need to quote \"%"UTF8f"\"",
+ UTF8fARG(UTF, len, tmpbuf));
}
}
}
if (*s == '[' || *s == '{') {
if (ckWARN(WARN_SYNTAX)) {
const char *t = s + 1;
- while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
+ while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
t += UTF ? UTF8SKIP(t) : 1;
if (*t == '}' || *t == ']') {
t++;
PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
/* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Scalar value %"SVf" better written as $%"SVf,
- SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
- SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
+ "Scalar value %"UTF8f" better written as $%"UTF8f,
+ UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
+ UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
}
}
}
if (PL_oldoldbufptr == PL_last_uni
&& (*PL_last_uni != 's' || s - PL_last_uni < 5
|| memNE(PL_last_uni, "study", 5)
- || isALNUM_lazy_if(PL_last_uni+5,UTF)
+ || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
))
check_uni();
if (*s == '?')
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
SVt_PVCV);
off = 0;
+ if (!gv) {
+ sv_free(sv);
+ sv = NULL;
+ goto just_a_word;
+ }
}
else {
rv2cv_op = newOP(OP_PADANY, 0);
rv2cv_op->op_targ = off;
- rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
- cv = (CV *)PAD_SV(off);
+ cv = find_lexical_cv(off);
}
lex = TRUE;
goto just_a_word;
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
if (!morelen)
- Perl_croak(aTHX_ "Bad name after %"SVf"%s",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+ Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+ UTF8fARG(UTF, len, PL_tokenbuf),
*s == '\'' ? "'" : "::");
len += morelen;
pkgname = 1;
if (ckWARN(WARN_BAREWORD)
&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
- "Bareword \"%"SVf"\" refers to nonexistent package",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ "Bareword \"%"UTF8f"\" refers to nonexistent package",
+ UTF8fARG(UTF, len, PL_tokenbuf));
len -= 2;
PL_tokenbuf[len] = '\0';
gv = NULL;
if (*s == '=' && s[1] == '>' && !pkgname) {
op_free(rv2cv_op);
CLINE;
+ /* This is our own scalar, created a few lines above,
+ so this is safe. */
+ SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
+ SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
TERM(WORD);
}
if (cv) {
if (lastchar == '-' && penultchar != '-') {
- const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+ const STRLEN l = len ? len : strlen(PL_tokenbuf);
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+ UTF8fARG(UTF, l, PL_tokenbuf),
+ UTF8fARG(UTF, l, PL_tokenbuf));
}
/* Check for a constant sub */
if ((sv = cv_const_sv(cv))) {
}
op_free(pl_yylval.opval);
- pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval =
+ off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
STRLEN protolen = CvPROTOLEN(cv);
const char *proto = CvPROTO(cv);
bool optional;
+ proto = S_strip_spaces(aTHX_ proto, &protolen);
if (!protolen)
TERM(FUNC0SUB);
if ((optional = *proto == ';'))
gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
SVt_PVCV);
op_free(pl_yylval.opval);
- pl_yylval.opval = rv2cv_op;
+ pl_yylval.opval =
+ off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
op_free(rv2cv_op);
safe_bareword:
- if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+ && saw_infix_sigil) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%"SVf,
- lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
- strlen(PL_tokenbuf),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "Operator or semicolon missing before %c%"UTF8f,
+ lastchar,
+ UTF8fARG(UTF, strlen(PL_tokenbuf),
+ PL_tokenbuf));
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
"Ambiguous use of %c resolved as operator %c",
lastchar, lastchar);
goto just_a_word;
}
if (!tmp)
- Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+ UTF8fARG(UTF, len, PL_tokenbuf));
if (tmp < 0)
tmp = -tmp;
else if (tmp == KEY_require || tmp == KEY_do
case KEY_dump:
PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_DUMP);
case KEY_else:
case KEY_goto:
PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_GOTO);
case KEY_gmtime:
case KEY_given:
pl_yylval.ival = CopLINE(PL_curcop);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "given is experimental");
OPERATOR(GIVEN);
case KEY_glob:
case KEY_last:
PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_LAST);
case KEY_lc:
"Experimental \"%s\" subs not enabled",
tmp == KEY_my ? "my" :
tmp == KEY_state ? "state" : "our");
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
+ "The lexical_subs feature is experimental");
goto really_sub;
}
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
case KEY_next:
PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_NEXT);
case KEY_ne:
case KEY_open:
s = SKIPSPACE1(s);
if (isIDFIRST_lazy_if(s,UTF)) {
- const char *t;
- for (d = s; isALNUM_lazy_if(d,UTF);) {
- d += UTF ? UTF8SKIP(d) : 1;
- if (UTF) {
- while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
- d += UTF ? UTF8SKIP(d) : 1;
- }
- }
- }
+ const char *t;
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+ &len);
for (t=d; isSPACE(*t);)
t++;
if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
&& !(t[0] == ':' && t[1] == ':')
&& !keyword(s, d-s, 0)
) {
- SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0));
Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
- "Precedence problem: open %"SVf" should be open(%"SVf")",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+ UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
}
}
LOP(OP_OPEN,XTERM);
LOP(OP_PACK,XTERM);
case KEY_package:
- s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_word(s,WORD,FALSE,TRUE);
s = SKIPSPACE1(s);
s = force_strict_version(s);
PL_lex_expect = XBLOCK;
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
|| (s = force_version(s, TRUE), *s == 'v'))
{
*PL_tokenbuf = '\0';
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ s = force_word(s,WORD,TRUE,TRUE);
if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
GV_ADD | (UTF ? SVf_UTF8 : 0));
case KEY_redo:
PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE,FALSE);
+ s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_REDO);
case KEY_rename:
checkcomma(s,PL_tokenbuf,"subroutine name");
s = SKIPSPACE1(s);
PL_expect = XTERM;
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ s = force_word(s,WORD,TRUE,TRUE);
LOP(OP_SORT,XREF);
case KEY_split:
really_sub:
{
char * const tmpbuf = PL_tokenbuf + 1;
- SSize_t tboffset = 0;
expectation attrful;
bool have_name, have_proto;
const int key = tmp;
+#ifndef PERL_MAD
+ SV *format_name = NULL;
+#endif
#ifdef PERL_MAD
SV *tmpwhite = 0;
PL_expect = XBLOCK;
attrful = XATTRBLOCK;
- /* remember buffer pos'n for later force_word */
- tboffset = s - PL_oldbufptr;
d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
&len);
#ifdef PERL_MAD
if (PL_madskills)
nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
+#else
+ if (key == KEY_format)
+ format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
#endif
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
PL_thistoken = subtoken;
s = d;
#else
- if (have_name)
- (void) force_word(PL_oldbufptr + tboffset, WORD,
- FALSE, TRUE, TRUE);
+ if (format_name) {
+ start_force(PL_curforce);
+ NEXTVAL_NEXTTOKE.opval
+ = (OP*)newSVOP(OP_CONST,0, format_name);
+ NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
+ force_next(WORD);
+ }
#endif
PREBLOCK(FORMAT);
}
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
force_next(0);
PL_thistoken = subtoken;
+ PERL_UNUSED_VAR(have_proto);
#else
if (have_proto) {
NEXTVAL_NEXTTOKE.opval =
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
+ "when is experimental");
OPERATOR(WHEN);
case KEY_while:
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %"SVf" in string",
- SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
- SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+ "Possible unintended interpolation of %"UTF8f
+ " in string",
+ UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
}
}
if (isIDFIRST_lazy_if(s,UTF)) {
const char * const w = s;
s += UTF ? UTF8SKIP(s) : 1;
- while (isALNUM_lazy_if(s,UTF))
+ while (isWORDCHAR_lazy_if(s,UTF))
s += UTF ? UTF8SKIP(s) : 1;
while (s < PL_bufend && isSPACE(*s))
s++;
}
}
-/* Either returns sv, or mortalizes sv and returns a new SV*.
+/* S_new_constant(): do any overload::constant lookup.
+
+ Either returns sv, or mortalizes/frees sv and returns a new SV*.
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
dVAR; dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
SV *res;
+ SV *errsv = NULL;
SV **cvp;
SV *cv, *typesv;
const char *why1 = "", *why2 = "", *why3 = "";
PERL_ARGS_ASSERT_NEW_CONSTANT;
+ /* We assume that this is true: */
+ if (*key == 'c') { assert (strEQ(key, "charnames")); }
+ assert(type || s);
/* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && strEQ(key,"charnames"))
+ if (PL_error_count > 0 && *key == 'c')
+ {
+ SvREFCNT_dec_NN(sv);
return &PL_sv_undef;
+ }
+ sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
|| ! (cvp = hv_fetch(table, key, keylen, FALSE))
|| ! SvOK(*cvp))
{
- SV *msg;
+ char *msg;
/* Here haven't found what we're looking for. If it is charnames,
* perhaps it needs to be loaded. Try doing that before giving up */
- if (strEQ(key,"charnames")) {
+ if (*key == 'c') {
Perl_load_module(aTHX_
0,
newSVpvs("_charnames"),
}
}
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
- msg = Perl_newSVpvf(aTHX_
- "Constant(%s) unknown", (type ? type: "undef"));
+ msg = Perl_form(aTHX_
+ "Constant(%.*s) unknown",
+ (int)(type ? typelen : len),
+ (type ? type: s));
}
else {
why1 = "$^H{";
why2 = key;
why3 = "} is not defined";
report:
- if (strEQ(key,"charnames")) {
- yyerror_pv(Perl_form(aTHX_
+ if (*key == 'c') {
+ msg = Perl_form(aTHX_
/* The +3 is for '\N{'; -4 for that, plus '}' */
"Unknown charname '%.*s'", (int)typelen - 4, type + 3
- ),
- UTF ? SVf_UTF8 : 0);
- return sv;
+ );
}
else {
- msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
- (type ? type: "undef"), why1, why2, why3);
+ msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
+ (int)(type ? typelen : len),
+ (type ? type: s), why1, why2, why3);
}
}
- yyerror(SvPVX_const(msg));
- SvREFCNT_dec(msg);
- return sv;
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
- sv_2mortal(sv); /* Parent created it permanently */
cv = *cvp;
if (!pv && s)
pv = newSVpvn_flags(s, len, SVs_TEMP);
SPAGAIN ;
/* Check the eval first */
- if (!PL_in_eval && SvTRUE(ERRSV)) {
+ if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
STRLEN errlen;
const char * errstr;
- sv_catpvs(ERRSV, "Propagated");
- errstr = SvPV_const(ERRSV, errlen);
+ sv_catpvs(errsv, "Propagated");
+ errstr = SvPV_const(errsv, errlen);
yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
(void)POPs;
- res = SvREFCNT_inc_simple(sv);
+ res = SvREFCNT_inc_simple_NN(sv);
}
else {
res = POPs;
- SvREFCNT_inc_simple_void(res);
+ SvREFCNT_inc_simple_void_NN(res);
}
PUTBACK ;
why2 = key;
why3 = "}} did not return a defined value";
sv = res;
+ (void)sv_2mortal(sv);
goto report;
}
return res;
}
+PERL_STATIC_INLINE void
+S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
+ dVAR;
+ PERL_ARGS_ASSERT_PARSE_IDENT;
+
+ for (;;) {
+ if (*d >= e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+ /* The UTF-8 case must come first, otherwise things
+ * like c\N{COMBINING TILDE} would start failing, as the
+ * isWORDCHAR_A case below would gobble the 'c' up.
+ */
+
+ char *t = *s + UTF8SKIP(*s);
+ while (isIDCONT_utf8((U8*)t))
+ t += UTF8SKIP(t);
+ if (*d + (t - *s) > e)
+ Perl_croak(aTHX_ "%s", ident_too_long);
+ Copy(*s, *d, t - *s, char);
+ *d += t - *s;
+ *s = t;
+ }
+ else if ( isWORDCHAR_A(**s) ) {
+ do {
+ *(*d)++ = *(*s)++;
+ } while isWORDCHAR_A(**s);
+ }
+ else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+ *(*d)++ = ':';
+ *(*d)++ = ':';
+ (*s)++;
+ }
+ else if (allow_package && **s == ':' && (*s)[1] == ':'
+ /* Disallow things like Foo::$bar. For the curious, this is
+ * the code path that triggers the "Bad name after" warning
+ * when looking for barewords.
+ */
+ && (*s)[2] != '$') {
+ *(*d)++ = *(*s)++;
+ *(*d)++ = *(*s)++;
+ }
+ else
+ break;
+ }
+ return;
+}
+
/* Returns a NUL terminated string, with the length of the string written to
*slp
*/
STATIC char *
-S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
+S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
dVAR;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ bool is_utf8 = cBOOL(UTF);
PERL_ARGS_ASSERT_SCAN_WORD;
- for (;;) {
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
- *d++ = *s++;
- else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
- *d++ = ':';
- *d++ = ':';
- s++;
- }
- else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
- *d++ = *s++;
- *d++ = *s++;
- }
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- size_t len;
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- len = t - s;
- if (d + len > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, len, char);
- d += len;
- s = t;
- }
- else {
- *d = '\0';
- *slp = d - dest;
- return s;
- }
- }
+ parse_ident(&s, &d, e, allow_package, is_utf8);
+ *d = '\0';
+ *slp = d - dest;
+ return s;
}
STATIC char *
-S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
+S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
{
dVAR;
char *bracket = NULL;
char funny = *s++;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
+ bool is_utf8 = cBOOL(UTF);
PERL_ARGS_ASSERT_SCAN_IDENT;
if (isDIGIT(*s)) {
while (isDIGIT(*s)) {
if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
+ Perl_croak(aTHX_ "%s", ident_too_long);
*d++ = *s++;
}
}
else {
- for (;;) {
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- if (isALNUM(*s)) /* UTF handled below */
- *d++ = *s++;
- else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
- *d++ = ':';
- *d++ = ':';
- s++;
- }
- else if (*s == ':' && s[1] == ':') {
- *d++ = *s++;
- *d++ = *s++;
- }
- else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
- char *t = s + UTF8SKIP(s);
- while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
- t += UTF8SKIP(t);
- if (d + (t - s) > e)
- Perl_croak(aTHX_ ident_too_long);
- Copy(s, d, t - s, char);
- d += t - s;
- s = t;
- }
- else
- break;
- }
+ parse_ident(&s, &d, e, 1, is_utf8);
}
*d = '\0';
d = dest;
if (*d) {
+ /* Either a digit variable, or parse_ident() found an identifier
+ (anything valid as a bareword), so job done and return. */
if (PL_lex_state != LEX_NORMAL)
PL_lex_state = LEX_INTERPENDMAYBE;
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
+ (isIDFIRST_lazy_if(s+1,is_utf8)
+ || isDIGIT_A((U8)s[1])
+ || s[1] == '$'
+ || s[1] == '{'
+ || strnEQ(s+1,"::",2)) )
{
+ /* Dereferencing a value in a scalar variable.
+ The alternatives are different syntaxes for a scalar variable.
+ Using ' as a leading package separator isn't allowed. :: is. */
return s;
}
+ /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
if (*s == '{') {
bracket = s;
s++;
+ while (s < send && SPACE_OR_TAB(*s))
+ s++;
}
- if (s < send) {
- if (UTF) {
+
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
+ || isCNTRL_A((U8)(d)) \
+ || isDIGIT_A((U8)(d)) \
+ || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+ if (s < send
+ && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ {
+ if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
d[skip] = '\0';
d[1] = '\0';
}
}
+ /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
if (*d == '^' && *s && isCONTROLVAR(*s)) {
*d = toCTRL(*s);
s++;
}
+ /* Warn about ambiguous code after unary operators if {...} notation isn't
+ used. There's no difference in ambiguity; it's merely a heuristic
+ about when not to warn. */
else if (ck_uni && !bracket)
check_uni();
if (bracket) {
- if (isSPACE(s[-1])) {
- while (s < send) {
- const char ch = *s++;
- if (!SPACE_OR_TAB(ch)) {
- *d = ch;
- break;
- }
- }
- }
- if (isIDFIRST_lazy_if(d,UTF)) {
- d += UTF8SKIP(d);
- if (UTF) {
- char *end = s;
- while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
- end += UTF8SKIP(end);
- while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
- end += UTF8SKIP(end);
- }
- Copy(s, d, end - s, char);
- d += end - s;
- s = end;
- }
- else {
- while ((isALNUM(*s) || *s == ':') && d < e)
- *d++ = *s++;
- if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
- }
+ /* If we were processing {...} notation then... */
+ if (isIDFIRST_lazy_if(d,is_utf8)) {
+ /* if it starts as a valid identifier, assume that it is one.
+ (the later check for } being at the expected point will trap
+ cases where this doesn't pan out.) */
+ d += is_utf8 ? UTF8SKIP(d) : 1;
+ parse_ident(&s, &d, e, 1, is_utf8);
*d = '\0';
while (s < send && SPACE_OR_TAB(*s))
s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+ /* ${foo[0]} and ${foo{bar}} notation. */
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
const char * const brack =
(const char *)
}
/* Handle extended ${^Foo} variables
* 1999-02-27 mjd-perl-patch@plover.com */
- else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
- && isALNUM(*s))
+ else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+ && isWORDCHAR(*s))
{
d++;
- while (isALNUM(*s) && d < e) {
+ while (isWORDCHAR(*s) && d < e) {
*d++ = *s++;
}
if (d >= e)
- Perl_croak(aTHX_ ident_too_long);
+ Perl_croak(aTHX_ "%s", ident_too_long);
*d = '\0';
}
+
+ while (s < send && SPACE_OR_TAB(*s))
+ s++;
+
+ /* Expect to find a closing } after consuming any trailing whitespace.
+ */
if (*s == '}') {
s++;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
if (PL_lex_state == LEX_NORMAL) {
if (ckWARN(WARN_AMBIGUOUS) &&
(keyword(dest, d - dest, 0)
- || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
+ || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
{
SV *tmp = newSVpvn_flags( dest, d - dest,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
+ SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
if (funny == '#')
funny = '@';
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
}
}
else {
+ /* Didn't find the closing } at the point we expected, so restore
+ state such that the next thing to process is the opening { and */
s = bracket; /* let the parser handle it */
*dest = '\0';
}
STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
if ( charlen != 1 || ! strchr(valid_flags, c) ) {
- if (isALNUM_lazy_if(*s, UTF)) {
+ if (isWORDCHAR_lazy_if(*s, UTF)) {
yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
UTF ? SVf_UTF8 : 0);
(*s) += charlen;
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
+ char *s;
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
- /* this was only needed for the initial scan_str; set it to false
- * so that any (?{}) code blocks etc are parsed normally */
- PL_reg_state.re_reparsing = FALSE;
+ s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
+ TRUE /* look for escaped bracketed metas */ );
+
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE,
+ TRUE /* look for escaped bracketed metas */ );
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
*/
STATIC char *
-S_scan_heredoc(pTHX_ register char *s)
+S_scan_heredoc(pTHX_ char *s)
{
dVAR;
I32 op_type = OP_SCALAR;
s++, term = '\'';
else
term = '"';
- if (!isALNUM_lazy_if(s,UTF))
+ if (!isWORDCHAR_lazy_if(s,UTF))
deprecate("bare << to mean <<\"\"");
- for (; isALNUM_lazy_if(s,UTF); s++) {
+ for (; isWORDCHAR_lazy_if(s,UTF); s++) {
if (d < e)
*d++ = *s;
}
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
d = s;
- while (s < bufend &&
- (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
+ while (s < bufend - len + 1 &&
+ memNE(s,PL_tokenbuf,len) ) {
if (*s++ == '\n')
++shared->herelines;
}
- if (s >= bufend) {
+ if (s >= bufend - len + 1) {
goto interminable;
}
sv_setpvn(tmpstr,d+1,s-d);
if (*d == '$' && d[1]) d++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
- while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+ while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
d += UTF ? UTF8SKIP(d) : 1;
/* If we've tried to read what we allow filehandles to look like, and
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
+ bool deprecate_escaped_meta /* Should we issue a deprecation warning
+ for certain paired metacharacters that
+ appear escaped within it */
+ )
{
dVAR;
- SV *sv; /* scalar value: string */
- const char *tmps; /* temp string, used for delimiter matching */
+ SV *sv; /* scalar value: string */
+ const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
char term; /* terminating character */
char *to; /* current position in the sv's data */
- I32 brackets = 1; /* bracket nesting level */
- bool has_utf8 = FALSE; /* is there any utf8 content? */
- I32 termcode; /* terminating char. code */
- U8 termstr[UTF8_MAXBYTES]; /* terminating string */
- STRLEN termlen; /* length of terminating string */
- int last_off = 0; /* last position for nesting bracket */
+ I32 brackets = 1; /* bracket nesting level */
+ bool has_utf8 = FALSE; /* is there any utf8 content? */
+ I32 termcode; /* terminating char. code */
+ U8 termstr[UTF8_MAXBYTES]; /* terminating string */
+ STRLEN termlen; /* length of terminating string */
+ int last_off = 0; /* last position for nesting bracket */
+ char *escaped_open = NULL;
#ifdef PERL_MAD
int stuffstart;
char *tstart;
PL_multi_close = term;
+ /* A warning is raised if the input parameter requires it for escaped (by a
+ * backslash) paired metacharacters {} [] and () when the delimiters are
+ * those same characters, and the backslash is ineffective. This doesn't
+ * happen for <>, as they aren't metas. */
+ if (deprecate_escaped_meta
+ && (PL_multi_open == PL_multi_close
+ || ! ckWARN_d(WARN_DEPRECATED)
+ || PL_multi_open == '<'))
+ {
+ deprecate_escaped_meta = FALSE;
+ }
+
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_quoted &&
((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+ {
s++;
+
+ /* Here, 'deprecate_escaped_meta' is true iff the
+ * delimiters are paired metacharacters, and 's' points
+ * to an occurrence of one of them within the string,
+ * which was preceded by a backslash. If this is a
+ * context where the delimiter is also a metacharacter,
+ * the backslash is useless, and deprecated. () and []
+ * are meta in any context. {} are meta only when
+ * appearing in a quantifier or in things like '\p{'.
+ * They also aren't meta unless there is a matching
+ * closed, escaped char later on within the string.
+ * If 's' points to an open, set a flag; if to a close,
+ * test that flag, and raise a warning if it was set */
+
+ if (deprecate_escaped_meta) {
+ if (*s == PL_multi_open) {
+ if (*s != '{') {
+ escaped_open = s;
+ }
+ else if (regcurly(s,
+ TRUE /* Look for a closing
+ '\}' */)
+ || (s - start > 2 /* Look for e.g.
+ '\x{' */
+ && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
+ {
+ escaped_open = s;
+ }
+ }
+ else if (escaped_open) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
+ escaped_open = NULL;
+ }
+ }
+ }
else
*to++ = *s++;
}
SV *sv = NULL; /* place to put the converted number */
bool floatit; /* boolean: int or float? */
const char *lastub = NULL; /* position of last underbar */
- static char const number_too_long[] = "Number too long";
+ static const char* const number_too_long = "Number too long";
PERL_ARGS_ASSERT_SCAN_NUM;
else {
/* check for end of fixed-length buffer */
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
/* if we're ok, copy the character */
*d++ = *s++;
}
for (; isDIGIT(*s) || *s == '_'; s++) {
/* fixed length buffer check */
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
if (*s == '_') {
if (lastub && s == lastub + 1)
Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
while (isDIGIT(*s) || *s == '_') {
if (isDIGIT(*s)) {
if (d >= e)
- Perl_croak(aTHX_ number_too_long);
+ Perl_croak(aTHX_ "%s", number_too_long);
*d++ = *s++;
}
else {
case 'v':
vstring:
sv = newSV(5); /* preallocate storage space */
+ ENTER_with_name("scan_vstring");
+ SAVEFREESV(sv);
s = scan_vstring(s, PL_bufend, sv);
+ SvREFCNT_inc_simple_void_NN(sv);
+ LEAVE_with_name("scan_vstring");
break;
}
}
STATIC char *
-S_scan_formline(pTHX_ register char *s)
+S_scan_formline(pTHX_ char *s)
{
dVAR;
char *eol;
Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
if (context)
- Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
- SVfARG(newSVpvn_flags(context, contlen,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+ UTF8fARG(UTF, contlen, context));
else
Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
else
qerror(msg);
if (PL_error_count >= 10) {
- if (PL_in_eval && SvCUR(ERRSV))
+ SV * errsv;
+ if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
- SVfARG(ERRSV), OutCopFILE(PL_curcop));
+ SVfARG(errsv), OutCopFILE(PL_curcop));
else
Perl_croak(aTHX_ "%s has too many errors.\n",
OutCopFILE(PL_curcop));
Function must be called like
- sv = newSV(5);
+ sv = sv_2mortal(newSV(5));
s = scan_vstring(s,e,sv);
where s and e are the start and end of the string.
The sv should already be large enough to store the vstring
passed in, for performance reasons.
+This function may croak if fatal warnings are enabled in the
+calling scope, hence the sv_2mortal in the example (to prevent
+a leak). Make sure to do SvREFCNT_inc afterwards if you use
+sv_2mortal.
+
*/
char *