#define PL_lex_brackstack (PL_parser->lex_brackstack)
#define PL_lex_casemods (PL_parser->lex_casemods)
#define PL_lex_casestack (PL_parser->lex_casestack)
-#define PL_lex_defer (PL_parser->lex_defer)
#define PL_lex_dojoin (PL_parser->lex_dojoin)
#define PL_lex_formbrack (PL_parser->lex_formbrack)
#define PL_lex_inpat (PL_parser->lex_inpat)
string or after \E, $foo, etc */
#define LEX_INTERPCONST 2 /* NOT USED */
#define LEX_FORMLINE 1 /* expecting a format line */
-#define LEX_KNOWNEXT 0 /* next token known; just return it */
#ifdef DEBUGGING
}
else {
t = s;
- while (!isSPACE(*t))
+ while (*t && !isSPACE(*t))
t++;
e = t;
}
assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
PL_nexttype[PL_nexttoke] = type;
PL_nexttoke++;
- if (PL_lex_state != LEX_KNOWNEXT) {
- PL_lex_defer = PL_lex_state;
- PL_lex_state = LEX_KNOWNEXT;
- }
}
/*
S_postderef(pTHX_ int const funny, char const next)
{
assert(funny == DOLSHARP || strchr("$@%&*", funny));
- assert(strchr("*[{", next));
if (next == '*') {
PL_expect = XOPERATOR;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
assert('@' == funny || '$' == funny || DOLSHARP == funny);
PL_lex_state = LEX_INTERPEND;
- force_next(POSTJOIN);
+ if ('@' == funny)
+ force_next(POSTJOIN);
}
force_next(next);
PL_bufptr+=2;
start = skipspace(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF)
- || (allow_pack && *s == ':') )
+ || (allow_pack && *s == ':' && s[1] == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
- SAVEI8(PL_lex_defer);
SAVESPTR(PL_lex_repl);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- if (!SvCUR(res))
+ if (!SvCUR(res)) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ "Unknown charname '' is deprecated");
return res;
+ }
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
if (*s == ' ' && *(s-1) == ' ') {
goto multi_spaces;
}
- if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s++;
}
}
{
goto bad_charname;
}
- if (*s == *NBSP_UTF8
- && *(s+1) == *(NBSP_UTF8+1)
- && ckWARN_d(WARN_DEPRECATED))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s += 2;
}
else {
};
#endif
-#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
+#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
STATIC bool
-S_word_takes_any_delimeter(char *p, STRLEN len)
+S_word_takes_any_delimiter(char *p, STRLEN len)
{
return (len == 1 && strchr("msyq", p[0]))
|| (len == 2
The type of the next token
Structure:
+ Check if we have already built the token; if so, use it.
Switch based on the current state:
- - if we already built the token before, use it
- if we have a case modifier in a string, deal with that
- handle other cases of interpolation inside a string
- scan the next line if we are inside a format
- In the normal state switch on the next character:
+ In the normal state, switch on the next character:
- default:
if alphabetic, go to key lookup
- unrecoginized character - croak
+ unrecognized character - croak
- 0/4/26: handle end-of-line or EOF
- cases for whitespace
- \n and #: handle comments and line numbers
if (PL_nexttoke) {
PL_nexttoke--;
pl_yylval = PL_nextval[PL_nexttoke];
- if (!PL_nexttoke) {
- PL_lex_state = PL_lex_defer;
- PL_lex_defer = LEX_NORMAL;
- }
{
I32 next_type;
next_type = PL_nexttype[PL_nexttoke];
/* FALLTHROUGH */
case LEX_INTERPEND:
- /* Treat state as LEX_NORMAL if we have no inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_lex_dojoin) {
const U8 dojoin_was = PL_lex_dojoin;
PL_lex_dojoin = FALSE;
Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
(long) PL_lex_brackets);
#endif
- /* Treat state as LEX_NORMAL when not in an inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
retry:
switch (*s) {
default:
- if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+ if (UTF) {
+ if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+ LEAVE;
+ }
+ if (isIDFIRST_utf8((U8*)s)) {
+ goto keylookup;
+ }
+ }
+ else if (isALNUMC(*s)) {
goto keylookup;
- {
+ }
+ {
SV *dsv = newSVpvs_flags("", SVs_TEMP);
const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
UTF8SKIP(s),
else
/* skip plain q word */
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
else if (isWORDCHAR_lazy_if(t,UTF)) {
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
while (t < PL_bufend && isSPACE(*t))
t++;
{
const char tmp = *s++;
if (tmp == '=') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
{
if (PL_expect != XOPERATOR) {
if (s[1] != '<' && !strchr(s,'>'))
check_uni();
- if (s[1] == '<' && s[2] != '>')
+ if (s[1] == '<' && s[2] != '>') {
+ if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s);
s = scan_heredoc(s);
+ }
else
s = scan_inputsymbol(s);
PL_expect = XOPERATOR;
{
char tmp = *s++;
if (tmp == '<') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
{
const char tmp = *s++;
if (tmp == '>') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5))
+ Perl_croak(aTHX_ "Version control conflict marker '%.*s'", 7, s - 2);
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
TOKEN('$');
case '@':
- if (PL_expect == XOPERATOR)
- no_op("Array", s);
- else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
+ if (PL_expect == XPOSTDEREF)
+ POSTDEREF('@');
PL_tokenbuf[0] = '@';
s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+ if (PL_expect == XOPERATOR) {
+ d = s;
+ if (PL_bufptr > s) {
+ d = PL_bufptr-1;
+ PL_bufptr = PL_oldbufptr;
+ }
+ no_op("Array", d);
+ }
pl_yylval.ival = 0;
if (!PL_tokenbuf[1]) {
PREREF('@');
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
/* Some keywords can be followed by any delimiter, including ':' */
- anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
+ anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
/* x::* is just a word, unless x is "CORE" */
if (!anydelim && *s == ':' && s[1] == ':') {
UNI(OP_LCFIRST);
case KEY_local:
- pl_yylval.ival = 0;
OPERATOR(LOCAL);
case KEY_length:
case KEY_my:
case KEY_state:
if (PL_in_my) {
+ PL_bufptr = s;
yyerror(Perl_form(aTHX_
"Can't redeclare \"%s\" in \"%s\"",
tmp == KEY_my ? "my" :
if (isIDFIRST_lazy_if(s,UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
- {
- if (!FEATURE_LEXSUBS_IS_ENABLED)
- Perl_croak(aTHX_
- "Experimental \"%s\" subs not enabled",
- tmp == KEY_my ? "my" :
- tmp == KEY_state ? "state" : "our");
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
- "The lexical_subs feature is experimental");
goto really_sub;
- }
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
}
}
- pl_yylval.ival = 1;
OPERATOR(MY);
case KEY_next:
* 2) '{'
* The final case currently doesn't get this far in the program, so we
* don't test for it. If that were to change, it would be ok to allow it.
- * c) When not under Unicode rules, any upper Latin1 character
- * d) Otherwise, when unicode rules are used, all XIDS characters.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
*
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ * '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8) \
(isGRAPH_A(*(s)) || ((is_utf8) \
? isIDFIRST_utf8((U8*) (s)) \
: (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : ! isASCII_utf8((U8*) (s))))
-#endif
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
: 1)
&& VALID_LEN_ONE_IDENT(s, is_utf8))
{
- /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
- * because often it has no graphic representation. (We can't get to
- * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
- * test for it.) */
- if ((is_utf8)
- ? ! isGRAPH_utf8( (U8*) s)
- : (! isGRAPH_L1( (U8) *s)
- || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
- {
- deprecate("literal non-graphic characters in variable names");
- }
-
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
"Use of /c modifier is meaningless without /g" );
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
}
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
SV *linestr;
char *bufend;
char * const olds = s;
- PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ PERL_CONTEXT * const cx = CX_CUR();
/* These two fields are not set until an inner lexing scope is
entered. But we need them set here. */
shared->ls_bufptr = s;
goto streaming;
}
}
- else { /* eval */
+ else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
- assert(s);
+ if (!s)
+ goto interminable;
}
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
* multiple fp operations. */
bool hexfp = FALSE;
int total_bits = 0;
+ int significant_bits = 0;
#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
# define HEXFP_UQUAD
Uquad_t hexfp_uquad = 0;
#endif
NV hexfp_mult = 1.0;
UV high_non_zero = 0; /* highest digit */
+ int non_zero_integer_digits = 0;
PERL_ARGS_ASSERT_SCAN_NUM;
if (high_non_zero == 0 && b > 0)
high_non_zero = b;
+ if (high_non_zero)
+ non_zero_integer_digits++;
+
/* this could be hexfp, but peek ahead
* to avoid matching ".." */
if (UNLIKELY(HEXFP_PEEK(s))) {
* detection will shortly be more thorough with the
* underbar checks. */
const char* h = s;
+ significant_bits = non_zero_integer_digits * shift;
#ifdef HEXFP_UQUAD
hexfp_uquad = u;
#else /* HEXFP_NV */
hexfp_nv = u;
#endif
+ /* Ignore the leading zero bits of
+ * the high (first) non-zero digit. */
+ if (high_non_zero) {
+ if (high_non_zero < 0x8)
+ significant_bits--;
+ if (high_non_zero < 0x4)
+ significant_bits--;
+ if (high_non_zero < 0x2)
+ significant_bits--;
+ }
+
if (*h == '.') {
#ifdef HEXFP_NV
- NV mult = 1 / 16.0;
+ NV nv_mult = 1.0;
#endif
+ bool accumulate = TRUE;
for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
if (isXDIGIT(*h)) {
U8 b = XDIGIT_VALUE(*h);
- total_bits += shift;
- if (total_bits < NV_MANT_DIG) {
+ significant_bits += shift;
#ifdef HEXFP_UQUAD
- hexfp_uquad <<= shift;
- hexfp_uquad |= b;
- hexfp_frac_bits += shift;
-#else /* HEXFP_NV */
- hexfp_nv += b * mult;
- mult /= 16.0;
-#endif
- } else if (total_bits - shift < NV_MANT_DIG) {
- /* A hexdigit straddling the edge of
- * mantissa. We can try grabbing as
- * many as possible bits. */
- int shift2 = 0;
- if (b & 1) {
- shift2 = 4;
- } else if (b & 2) {
- shift2 = 3;
- total_bits--;
- } else if (b & 4) {
- shift2 = 2;
- total_bits -= 2;
- } else if (b & 8) {
- shift2 = 1;
- total_bits -= 3;
+ if (accumulate) {
+ if (significant_bits < NV_MANT_DIG) {
+ /* We are in the long "run" of xdigits,
+ * accumulate the full four bits. */
+ hexfp_uquad <<= shift;
+ hexfp_uquad |= b;
+ hexfp_frac_bits += shift;
+ } else {
+ /* We are at a hexdigit either at,
+ * or straddling, the edge of mantissa.
+ * We will try grabbing as many as
+ * possible bits. */
+ int tail =
+ significant_bits - NV_MANT_DIG;
+ if (tail <= 0)
+ tail += shift;
+ hexfp_uquad <<= tail;
+ hexfp_uquad |= b >> (shift - tail);
+ hexfp_frac_bits += tail;
+
+ /* Ignore the trailing zero bits
+ * of the last non-zero xdigit.
+ *
+ * The assumption here is that if
+ * one has input of e.g. the xdigit
+ * eight (0x8), there is only one
+ * bit being input, not the full
+ * four bits. Conversely, if one
+ * specifies a zero xdigit, the
+ * assumption is that one really
+ * wants all those bits to be zero. */
+ if (b) {
+ if ((b & 0x1) == 0x0) {
+ significant_bits--;
+ if ((b & 0x2) == 0x0) {
+ significant_bits--;
+ if ((b & 0x4) == 0x0) {
+ significant_bits--;
+ }
+ }
+ }
+ }
+
+ accumulate = FALSE;
}
-#ifdef HEXFP_UQUAD
- hexfp_uquad <<= shift2;
- hexfp_uquad |= b;
- hexfp_frac_bits += shift2;
+ } else {
+ /* Keep skipping the xdigits, and
+ * accumulating the significant bits,
+ * but do not shift the uquad
+ * (which would catastrophically drop
+ * high-order bits) or accumulate the
+ * xdigits anymore. */
+ }
#else /* HEXFP_NV */
- PERL_UNUSED_VAR(shift2);
- hexfp_nv += b * mult;
- mult /= 16.0;
-#endif
+ if (accumulate) {
+ nv_mult /= 16.0;
+ if (nv_mult > 0.0)
+ hexfp_nv += b * nv_mult;
+ else
+ accumulate = FALSE;
}
+#endif
}
+ if (significant_bits >= NV_MANT_DIG)
+ accumulate = FALSE;
}
}
- if (total_bits >= 4) {
- if (high_non_zero < 0x8)
- total_bits--;
- if (high_non_zero < 0x4)
- total_bits--;
- if (high_non_zero < 0x2)
- total_bits--;
- }
-
- if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+ if ((total_bits > 0 || significant_bits > 0) &&
+ isALPHA_FOLD_EQ(*h, 'p')) {
bool negexp = FALSE;
h++;
if (*h == '+')
*d = '\0';
if (UNLIKELY(hexfp)) {
# ifdef NV_MANT_DIG
- if (total_bits > NV_MANT_DIG)
+ if (significant_bits > NV_MANT_DIG)
Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: mantissa overflow");
# endif
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
else if (yychar == YYEMPTY) {
- if ( PL_lex_state == LEX_NORMAL
- || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+ if (PL_lex_state == LEX_NORMAL)
sv_catpvs(where_sv, "at end of line");
else if (PL_lex_inpat)
sv_catpvs(where_sv, "within pattern");
{
if (flags & ~PARSE_OPTIONAL)
Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
- if (PL_lex_state == LEX_KNOWNEXT) {
+ if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
char * const lpv = pl_yylval.pval;
if (!isIDFIRST_lazy_if(s, UTF))
goto no_label;
t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
- if (word_takes_any_delimeter(s, wlen))
+ if (word_takes_any_delimiter(s, wlen))
goto no_label;
bufptr_pos = s - SvPVX(PL_linestr);
PL_bufptr = t;