if (is_first)
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"\t(Missing semicolon on previous line?)\n");
- else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
+ else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
+ PL_bufend,
+ UTF))
+ {
const char *t;
- for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
- t += UTF ? UTF8SKIP(t) : 1)
+ for (t = PL_oldoldbufptr;
+ (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
+ t += UTF ? UTF8SKIP(t) : 1)
+ {
NOOP;
+ }
if (t < PL_bufptr && isSPACE(*t))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"\t(Do you need to predeclare %" UTF8f "?)\n",
PL_parser->bufend - PL_parser->bufptr,
&first_bad_char_loc))
{
-
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_parser->bufend,
0,
while (isSPACE(*PL_last_uni))
PL_last_uni++;
s = PL_last_uni;
- while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
+ while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
s += UTF ? UTF8SKIP(s) : 1;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
start = skipspace(start);
s = start;
- if (isIDFIRST_lazy_if(s,UTF)
+ if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| (allow_pack && *s == ':' && s[1] == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (is_heredoc)
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
PL_copline = NOLINE;
-
+
Newxz(shared, 1, LEXSHARED);
shared->ls_prev = PL_parser->lex_shared;
PL_parser->lex_shared = shared;
* the needed size, SvGROW() is called. Its size parameter each time is
* based on the best guess estimate at the time, namely the length used so
* far, plus the length the current construct will occupy, plus room for
- * the trailing NUL, plus one byte for every input byte still unscanned */
+ * the trailing NUL, plus one byte for every input byte still unscanned */
UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
before set */
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
else if (*s == '@' && s[1]) {
- if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
+ if (UTF
+ ? isIDFIRST_utf8_safe(s+1, send)
+ : isWORDCHAR_A(s[1]))
+ {
break;
+ }
if (strchr(":'{$", s[1]))
break;
if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
* utf-ebcdic. */
-
+
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
* braces */
s++;
if (*s != '{') {
- yyerror("Missing braces on \\N{}");
+ yyerror("Missing braces on \\N{}");
continue;
}
s++;
case '&':
case '$':
weight -= seen[un_char] * 10;
- if (isWORDCHAR_lazy_if(s+1,UTF)) {
+ if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
int len;
char *tmp = PL_bufend;
PL_bufend = (char*)send;
STRLEN const last_lop_pos =
PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
av_push(PL_rsfp_filters, linestr);
- PL_parser->linestr =
+ PL_parser->linestr =
newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
buf = SvPVX(PL_parser->linestr);
PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
{
s++;
while (*s == ' ' || *s == '\t') s++;
- if (*s == 'q' && s[1] == 'w'
- && !isWORDCHAR_lazy_if(s+2,UTF))
+ if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
+ PL_bufend,
+ UTF))
+ {
return;
- while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
- s += UTF ? UTF8SKIP(s) : 1;
+ }
+ while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
+ || (*s && strchr(" \t$#+-'\"", *s)))
+ {
+ s += UTF ? UTF8SKIP(s) : 1;
+ }
if (*s == '}' || *s == ']')
pl_yylval.ival = OPpSLICEWARNING;
}
break;
}
s = skipspace(s);
- if (isIDFIRST_lazy_if(s, UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char *dest = PL_tokenbuf + 1;
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
1 /* 1 means die */ );
NOT_REACHED; /* NOTREACHED */
}
- if (isIDFIRST_utf8((U8*)s)) {
+ if (isIDFIRST_utf8_safe(s, PL_bufend)) {
goto keylookup;
}
}
PL_expect = XPOSTDEREF;
TOKEN(ARROW);
}
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
s = force_word(s,METHOD,FALSE,TRUE);
TOKEN(ARROW);
}
grabattrs:
s = skipspace(s);
attrs = NULL;
- while (isIDFIRST_lazy_if(s,UTF)) {
+ while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
I32 tmp;
SV *sv;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
while (d < PL_bufend && SPACE_OR_TAB(*d))
d++;
}
- if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
+ if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
FALSE, &len);
while (d < PL_bufend && SPACE_OR_TAB(*d))
}
else
/* skip plain q word */
- while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+ while ( t < PL_bufend
+ && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
}
- else if (isWORDCHAR_lazy_if(t,UTF)) {
+ else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
t += UTF ? UTF8SKIP(t) : 1;
- while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
+ while ( t < PL_bufend
+ && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
}
while (t < PL_bufend && isSPACE(*t))
t++;
}
s--;
if (PL_expect == XOPERATOR) {
- if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
- && isIDFIRST_lazy_if(s,UTF))
+ if ( PL_bufptr == PL_linestart
+ && ckWARN(WARN_SEMICOLON)
+ && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
{
CopLINE_dec(PL_curcop);
Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
POSTDEREF('$');
}
- if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
+ if ( s[1] == '#'
+ && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
+ || strchr("{$:+-@", s[2])))
+ {
PL_tokenbuf[0] = '@';
s = scan_ident(s + 1, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, FALSE);
if (ckWARN(WARN_SYNTAX)) {
char *t = s+1;
- while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
+ while ( isSPACE(*t)
+ || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
+ || *t == '$')
+ {
t += UTF ? UTF8SKIP(t) : 1;
+ }
if (*t++ == ',') {
PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
while (t < PL_bufend && *t != ']')
do {
t++;
} while (isSPACE(*t));
- if (isIDFIRST_lazy_if(t,UTF)) {
+ if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
STRLEN len;
t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
&len);
while (isSPACE(*t))
t++;
- if (*t == ';'
- && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
+ if ( *t == ';'
+ && get_cvn_flags(tmpbuf, len, UTF
+ ? SVf_UTF8
+ : 0))
+ {
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"You need to quote \"%" UTF8f "\"",
UTF8fARG(UTF, len, tmpbuf));
+ }
}
}
}
PL_expect = XOPERATOR;
else if (strchr("$@\"'`q", *s))
PL_expect = XTERM; /* e.g. print $fh "foo" */
- else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
+ else if ( strchr("&*<%", *s)
+ && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
+ {
PL_expect = XTERM; /* e.g. print $fh &sub */
- else if (isIDFIRST_lazy_if(s,UTF)) {
+ }
+ else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
char tmpbuf[sizeof PL_tokenbuf];
int t2;
scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
}
else {
/* Disable warning on "study /blah/" */
- if (PL_oldoldbufptr == PL_last_uni
- && (*PL_last_uni != 's' || s - PL_last_uni < 5
- || memNE(PL_last_uni, "study", 5)
- || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
+ if ( PL_oldoldbufptr == PL_last_uni
+ && ( *PL_last_uni != 's' || s - PL_last_uni < 5
+ || memNE(PL_last_uni, "study", 5)
+ || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
))
check_uni();
s = scan_pat(s,OP_MATCH);
s = skipspace(s);
/* Two barewords in a row may indicate method call. */
-
- if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+ if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
+ || *s == '$')
&& (tmp = intuit_method(s, lex ? NULL : sv, cv)))
{
goto method;
/* If followed by a bareword, see if it looks like indir obj. */
- if (tmp == 1 && !orig_keyword
- && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
- && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+ if ( tmp == 1
+ && !orig_keyword
+ && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
+ && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+ {
method:
if (lex && !off) {
assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
s = skipspace(s);
- if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
+ if ( PL_expect == XSTATE
+ && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
+ {
char *p = s;
if ((PL_bufend - p) >= 3
p += 3;
p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
- if (isIDFIRST_lazy_if(p,UTF)) {
+ if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
p = skipspace(p);
}
}
PL_in_my = (U16)tmp;
s = skipspace(s);
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strEQs(PL_tokenbuf, "sub"))
goto really_sub;
case KEY_open:
s = skipspace(s);
- if (isIDFIRST_lazy_if(s,UTF)) {
- const char *t;
- d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
- &len);
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
+ const char *t;
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
+ &len);
for (t=d; isSPACE(*t);)
t++;
if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
{
*PL_tokenbuf = '\0';
s = force_word(s,BAREWORD,TRUE,TRUE);
- if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
+ if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
+ PL_tokenbuf + sizeof(PL_tokenbuf),
+ UTF))
+ {
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
GV_ADD | (UTF ? SVf_UTF8 : 0));
+ }
else if (*s == '<')
yyerror("<> at require-statement should be quotes");
}
orig_keyword = 0;
pl_yylval.ival = 1;
}
- else
+ else
pl_yylval.ival = 0;
PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
PL_bufptr = s;
s = skipspace(s);
d = SvPVX(PL_linestr)+off;
- if (isIDFIRST_lazy_if(s,UTF)
+ if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
|| *s == '\''
|| (*s == ':' && s[1] == ':'))
{
s++;
while (s < PL_bufend && isSPACE(*s))
s++;
- if (isIDFIRST_lazy_if(s,UTF)) {
+ if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
const char * const w = s;
s += UTF ? UTF8SKIP(s) : 1;
- while (isWORDCHAR_lazy_if(s,UTF))
+ while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
s += UTF ? UTF8SKIP(s) : 1;
while (s < PL_bufend && isSPACE(*s))
s++;
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
- bool is_utf8, bool check_dollar) {
+ bool is_utf8, bool check_dollar)
+{
PERL_ARGS_ASSERT_PARSE_IDENT;
for (;;) {
if (*d >= e)
Perl_croak(aTHX_ "%s", ident_too_long);
- if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
+ if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
/* The UTF-8 case must come first, otherwise things
* like c\N{COMBINING TILDE} would start failing, as the
* isWORDCHAR_A case below would gobble the 'c' up.
*/
char *t = *s + UTF8SKIP(*s);
- while (isIDCONT_utf8((U8*)t))
+ while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
t += UTF8SKIP(t);
+ }
if (*d + (t - *s) > e)
Perl_croak(aTHX_ "%s", ident_too_long);
Copy(*s, *d, t - *s, char);
*(*d)++ = *(*s)++;
} while (isWORDCHAR_A(**s) && *d < e);
}
- else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
+ else if ( allow_package
+ && **s == '\''
+ && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
+ {
*(*d)++ = ':';
*(*d)++ = ':';
(*s)++;
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
* '{' without knowing if is UTF-8 or not. */
-#define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : (isGRAPH_L1(*s) \
+#define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8_safe(s, e) \
+ : (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
STATIC char *
/* Here, it is not a run-of-the-mill identifier name */
if (*s == '$' && s[1]
- && (isIDFIRST_lazy_if(s+1,is_utf8)
+ && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
|| isDIGIT_A((U8)s[1])
|| s[1] == '$'
|| s[1] == '{'
if ((s <= PL_bufend - (is_utf8)
? UTF8SKIP(s)
: 1)
- && VALID_LEN_ONE_IDENT(s, is_utf8))
+ && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
{
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
bool skip;
char *s2;
/* If we were processing {...} notation then... */
- if (isIDFIRST_lazy_if(d,is_utf8)) {
+ if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
/* if it starts as a valid identifier, assume that it is one.
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
s2 = peekspace(s);
else
s2 = s;
-
+
/* Expect to find a closing } after consuming any trailing whitespace.
*/
if (*s2 == '}') {
STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
if ( charlen != 1 || ! strchr(valid_flags, c) ) {
- if (isWORDCHAR_lazy_if(*s, UTF)) {
+ if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
UTF ? SVf_UTF8 : 0);
(*s) += charlen;
/* issue a warning if /c is specified,but /g is not */
if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
- Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
"Use of /c modifier is meaningless without /g" );
}
s++, term = '\'';
else
term = '"';
- if (!isWORDCHAR_lazy_if(s,UTF))
+ if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
deprecate("bare << to mean <<\"\"");
peek = s;
- while (isWORDCHAR_lazy_if(peek,UTF)) {
+ while (
+ isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
+ {
peek += UTF ? UTF8SKIP(peek) : 1;
}
len = (peek - s >= e - d) ? (e - d) : (peek - s);
if (*d == '$' && d[1]) d++;
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
- while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
+ while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
d += UTF ? UTF8SKIP(d) : 1;
+ }
/* If we've tried to read what we allow filehandles to look like, and
there's still text left, then it must be a glob() and not a getline.
STRLEN wlen, bufptr_pos;
lex_read_space(0);
t = s = PL_bufptr;
- if (!isIDFIRST_lazy_if(s, UTF))
+ if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
goto no_label;
t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
if (word_takes_any_delimiter(s, wlen))