#define PL_lex_brackstack (PL_parser->lex_brackstack)
#define PL_lex_casemods (PL_parser->lex_casemods)
#define PL_lex_casestack (PL_parser->lex_casestack)
-#define PL_lex_defer (PL_parser->lex_defer)
#define PL_lex_dojoin (PL_parser->lex_dojoin)
#define PL_lex_formbrack (PL_parser->lex_formbrack)
#define PL_lex_inpat (PL_parser->lex_inpat)
string or after \E, $foo, etc */
#define LEX_INTERPCONST 2 /* NOT USED */
#define LEX_FORMLINE 1 /* expecting a format line */
-#define LEX_KNOWNEXT 0 /* next token known; just return it */
#ifdef DEBUGGING
}
else {
t = s;
- while (!isSPACE(*t))
+ while (*t && !isSPACE(*t))
t++;
e = t;
}
assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
PL_nexttype[PL_nexttoke] = type;
PL_nexttoke++;
- if (PL_lex_state != LEX_KNOWNEXT) {
- PL_lex_defer = PL_lex_state;
- PL_lex_state = LEX_KNOWNEXT;
- }
}
/*
S_postderef(pTHX_ int const funny, char const next)
{
assert(funny == DOLSHARP || strchr("$@%&*", funny));
- assert(strchr("*[{", next));
if (next == '*') {
PL_expect = XOPERATOR;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
assert('@' == funny || '$' == funny || DOLSHARP == funny);
PL_lex_state = LEX_INTERPEND;
- force_next(POSTJOIN);
+ if ('@' == funny)
+ force_next(POSTJOIN);
}
force_next(next);
PL_bufptr+=2;
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
- SAVEI8(PL_lex_defer);
SAVESPTR(PL_lex_repl);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- if (!SvCUR(res))
+ if (!SvCUR(res)) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ "Unknown charname '' is deprecated");
return res;
+ }
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
if (*s == ' ' && *(s-1) == ' ') {
goto multi_spaces;
}
- if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s++;
}
}
{
goto bad_charname;
}
- if (*s == *NBSP_UTF8
- && *(s+1) == *(NBSP_UTF8+1)
- && ckWARN_d(WARN_DEPRECATED))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s += 2;
}
else {
The type of the next token
Structure:
+ Check if we have already built the token; if so, use it.
Switch based on the current state:
- - if we already built the token before, use it
- if we have a case modifier in a string, deal with that
- handle other cases of interpolation inside a string
- scan the next line if we are inside a format
- In the normal state switch on the next character:
+ In the normal state, switch on the next character:
- default:
if alphabetic, go to key lookup
- unrecoginized character - croak
+ unrecognized character - croak
- 0/4/26: handle end-of-line or EOF
- cases for whitespace
- \n and #: handle comments and line numbers
if (PL_nexttoke) {
PL_nexttoke--;
pl_yylval = PL_nextval[PL_nexttoke];
- if (!PL_nexttoke) {
- PL_lex_state = PL_lex_defer;
- PL_lex_defer = LEX_NORMAL;
- }
{
I32 next_type;
next_type = PL_nexttype[PL_nexttoke];
/* FALLTHROUGH */
case LEX_INTERPEND:
- /* Treat state as LEX_NORMAL if we have no inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_lex_dojoin) {
const U8 dojoin_was = PL_lex_dojoin;
PL_lex_dojoin = FALSE;
Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
(long) PL_lex_brackets);
#endif
- /* Treat state as LEX_NORMAL when not in an inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
retry:
switch (*s) {
default:
- if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+ if (UTF) {
+ if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+ LEAVE;
+ }
+ if (isIDFIRST_utf8((U8*)s)) {
+ goto keylookup;
+ }
+ }
+ else if (isALNUMC(*s)) {
goto keylookup;
- {
+ }
+ {
SV *dsv = newSVpvs_flags("", SVs_TEMP);
const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
UTF8SKIP(s),
else
/* skip plain q word */
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
else if (isWORDCHAR_lazy_if(t,UTF)) {
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
while (t < PL_bufend && isSPACE(*t))
t++;
{
const char tmp = *s++;
if (tmp == '=') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
{
if (PL_expect != XOPERATOR) {
if (s[1] != '<' && !strchr(s,'>'))
check_uni();
- if (s[1] == '<' && s[2] != '>')
+ if (s[1] == '<' && s[2] != '>') {
+ if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s);
s = scan_heredoc(s);
+ }
else
s = scan_inputsymbol(s);
PL_expect = XOPERATOR;
{
char tmp = *s++;
if (tmp == '<') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
{
const char tmp = *s++;
if (tmp == '>') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
UNI(OP_LCFIRST);
case KEY_local:
- pl_yylval.ival = 0;
OPERATOR(LOCAL);
case KEY_length:
if (isIDFIRST_lazy_if(s,UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
- {
- if (!FEATURE_LEXSUBS_IS_ENABLED)
- Perl_croak(aTHX_
- "Experimental \"%s\" subs not enabled",
- tmp == KEY_my ? "my" :
- tmp == KEY_state ? "state" : "our");
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
- "The lexical_subs feature is experimental");
goto really_sub;
- }
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
}
}
- pl_yylval.ival = 1;
OPERATOR(MY);
case KEY_next:
* 2) '{'
* The final case currently doesn't get this far in the program, so we
* don't test for it. If that were to change, it would be ok to allow it.
- * c) When not under Unicode rules, any upper Latin1 character
- * d) Otherwise, when unicode rules are used, all XIDS characters.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
*
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ * '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8) \
(isGRAPH_A(*(s)) || ((is_utf8) \
? isIDFIRST_utf8((U8*) (s)) \
: (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : ! isASCII_utf8((U8*) (s))))
-#endif
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
: 1)
&& VALID_LEN_ONE_IDENT(s, is_utf8))
{
- /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
- * because often it has no graphic representation. (We can't get to
- * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
- * test for it.) */
- if ((is_utf8)
- ? ! isGRAPH_utf8( (U8*) s)
- : (! isGRAPH_L1( (U8) *s)
- || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
- {
- deprecate("literal non-graphic characters in variable names");
- }
-
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
"Use of /c modifier is meaningless without /g" );
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
}
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
SV *linestr;
char *bufend;
char * const olds = s;
- PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ PERL_CONTEXT * const cx = CX_CUR();
/* These two fields are not set until an inner lexing scope is
entered. But we need them set here. */
shared->ls_bufptr = s;
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
else if (yychar == YYEMPTY) {
- if ( PL_lex_state == LEX_NORMAL
- || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+ if (PL_lex_state == LEX_NORMAL)
sv_catpvs(where_sv, "at end of line");
else if (PL_lex_inpat)
sv_catpvs(where_sv, "within pattern");
{
if (flags & ~PARSE_OPTIONAL)
Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
- if (PL_lex_state == LEX_KNOWNEXT) {
+ if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
char * const lpv = pl_yylval.pval;