X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d6e6386c7743857a2277042f4fefddb4055fe274..88cb850087cc0ad53c82068a153d89273c31675e:/toke.c diff --git a/toke.c b/toke.c index 877bf40..98fd125 100644 --- a/toke.c +++ b/toke.c @@ -54,7 +54,6 @@ Individual members of C have their own documentation. #define PL_lex_casestack (PL_parser->lex_casestack) #define PL_lex_defer (PL_parser->lex_defer) #define PL_lex_dojoin (PL_parser->lex_dojoin) -#define PL_lex_expect (PL_parser->lex_expect) #define PL_lex_formbrack (PL_parser->lex_formbrack) #define PL_lex_inpat (PL_parser->lex_inpat) #define PL_lex_inwhat (PL_parser->lex_inwhat) @@ -114,6 +113,11 @@ static const char* const ident_too_long = "Identifier too long"; #define SPACE_OR_TAB(c) isBLANK_A(c) +#define HEXFP_PEEK(s) \ + (((s[0] == '.') && \ + (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ + isALPHA_FOLD_EQ(s[0], 'p')) + /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). @@ -191,6 +195,7 @@ static const char* const lex_state_names[] = { * PWop : power operator * PMop : pattern-matching operator * Aop : addition-level operator + * AopNOASSIGN : addition-level operator that is never part of .= * Mop : multiplication-level operator * Eop : equality-testing operator * Rop : relational operator <= != gt @@ -212,7 +217,10 @@ static const char* const lex_state_names[] = { #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) -#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) +#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \ + pl_yylval.ival=f, \ + PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ + REPORT((int)LOOPEX)) #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) @@ -223,6 +231,7 @@ static const char* const lex_state_names[] = { #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) +#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) @@ -378,8 +387,6 @@ static struct debug_tokens { STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) { - dVAR; - PERL_ARGS_ASSERT_TOKEREPORT; if (DEBUG_T_TEST) { @@ -467,14 +474,13 @@ S_deprecate_commaless_var_list(pTHX) { /* * S_ao * - * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR - * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN + * This subroutine looks for an '=' next to the operator that has just been + * parsed and turns it into an ASSIGNOP if it finds one. */ STATIC int S_ao(pTHX_ int toketype) { - dVAR; if (*PL_bufptr == '=') { PL_bufptr++; if (toketype == ANDAND) @@ -504,7 +510,6 @@ S_ao(pTHX_ int toketype) STATIC void S_no_op(pTHX_ const char *const what, char *s) { - dVAR; char * const oldbp = PL_bufptr; const bool is_first = (PL_oldbufptr == PL_linestart); @@ -551,7 +556,6 @@ S_no_op(pTHX_ const char *const what, char *s) STATIC void S_missingterm(pTHX_ char *s) { - dVAR; char tmpbuf[3]; char q; if (s) { @@ -582,7 +586,6 @@ S_missingterm(pTHX_ char *s) bool Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) { - dVAR; char he_name[8 + MAX_FEATURE_LEN] = "feature_"; PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; @@ -674,7 +677,6 @@ used by perl internally, so extensions should always pass zero. void Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) { - dVAR; const char *s = NULL; yy_parser *parser, *oparser; if (flags && flags & ~LEX_START_FLAGS) @@ -1638,7 +1640,6 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) STATIC void S_incline(pTHX_ const char *s) { - dVAR; const char *t; const char *n; const char *e; @@ -1692,7 +1693,7 @@ S_incline(pTHX_ const char *s) if (*e != '\n' && *e != '\0') return; /* false alarm */ - line_num = atoi(n)-1; + line_num = grok_atou(n, &e) - 1; if (t - s > 0) { const STRLEN len = t - s; @@ -1823,7 +1824,6 @@ S_skipspace_flags(pTHX_ char *s, U32 flags) STATIC void S_check_uni(pTHX) { - dVAR; const char *s; const char *t; @@ -1852,7 +1852,10 @@ S_check_uni(pTHX) /* * S_lop * Build a list operator (or something that might be one). The rules: - * - if we have a next token, then it's a list operator [why?] + * - if we have a next token, then it's a list operator (no parens) for + * which the next token has already been parsed; e.g., + * sort foo @args + * sort foo (@args) * - if the next thing is an opening paren, then it's a function * - else it's a list operator */ @@ -1860,18 +1863,16 @@ S_check_uni(pTHX) STATIC I32 S_lop(pTHX_ I32 f, int x, char *s) { - dVAR; - PERL_ARGS_ASSERT_LOP; pl_yylval.ival = f; CLINE; - PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) goto lstop; + PL_expect = x; if (*s == '(') return REPORT(FUNC); s = PEEKSPACE(s); @@ -1897,7 +1898,6 @@ S_lop(pTHX_ I32 f, int x, char *s) STATIC void S_force_next(pTHX_ I32 type) { - dVAR; #ifdef DEBUGGING if (DEBUG_T_TEST) { PerlIO_printf(Perl_debug_log, "### forced token:\n"); @@ -1908,7 +1908,6 @@ S_force_next(pTHX_ I32 type) PL_nexttoke++; if (PL_lex_state != LEX_KNOWNEXT) { PL_lex_defer = PL_lex_state; - PL_lex_expect = PL_expect; PL_lex_state = LEX_KNOWNEXT; } } @@ -1925,7 +1924,6 @@ S_force_next(pTHX_ I32 type) static int S_postderef(pTHX_ int const funny, char const next) { - dVAR; assert(funny == DOLSHARP || strchr("$@%&*", funny)); assert(strchr("*[{", next)); if (next == '*') { @@ -1972,7 +1970,6 @@ Perl_yyunlex(pTHX) STATIC SV * S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { - dVAR; SV * const sv = newSVpvn_utf8(start, len, !IN_BYTES && UTF @@ -2001,7 +1998,6 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) STATIC char * S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) { - dVAR; char *s; STRLEN len; @@ -2049,8 +2045,6 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) STATIC void S_force_ident(pTHX_ const char *s, int kind) { - dVAR; - PERL_ARGS_ASSERT_FORCE_IDENT; if (s[0]) { @@ -2122,7 +2116,6 @@ Perl_str_to_version(pTHX_ SV *sv) STATIC char * S_force_version(pTHX_ char *s, int guessing) { - dVAR; OP *version = NULL; char *d; @@ -2167,7 +2160,6 @@ S_force_version(pTHX_ char *s, int guessing) STATIC char * S_force_strict_version(pTHX_ char *s) { - dVAR; OP *version = NULL; const char *errstr = NULL; @@ -2208,7 +2200,6 @@ S_force_strict_version(pTHX_ char *s) STATIC SV * S_tokeq(pTHX_ SV *sv) { - dVAR; char *s; char *send; char *d; @@ -2279,7 +2270,6 @@ S_tokeq(pTHX_ SV *sv) STATIC I32 S_sublex_start(pTHX) { - dVAR; const I32 op_type = pl_yylval.ival; if (op_type == OP_NULL) { @@ -2329,7 +2319,6 @@ S_sublex_start(pTHX) STATIC I32 S_sublex_push(pTHX) { - dVAR; LEXSHARED *shared; const bool is_heredoc = PL_multi_close == '<'; ENTER; @@ -2426,7 +2415,6 @@ S_sublex_push(pTHX) STATIC I32 S_sublex_done(pTHX) { - dVAR; if (!PL_lex_starts++) { SV * const sv = newSVpvs(""); if (SvUTF8(PL_linestr)) @@ -2806,22 +2794,20 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) STATIC char * S_scan_const(pTHX_ char *start) { - dVAR; char *send = PL_bufend; /* end of the constant */ - SV *sv = newSV(send - start); /* sv for the constant. See - note below on sizing. */ + SV *sv = newSV(send - start); /* sv for the constant. See note below + on sizing. */ char *s = start; /* start of the constant */ char *d = SvPVX(sv); /* destination for copies */ - bool dorange = FALSE; /* are we in a translit range? */ - bool didrange = FALSE; /* did we just finish a range? */ - bool in_charclass = FALSE; /* within /[...]/ */ - bool has_utf8 = FALSE; /* Output constant is UTF8 */ - bool this_utf8 = cBOOL(UTF); /* Is the source string assumed - to be UTF8? But, this can - show as true when the source - isn't utf8, as for example - when it is entirely composed - of hex constants */ + bool dorange = FALSE; /* are we in a translit range? */ + bool didrange = FALSE; /* did we just finish a range? */ + bool in_charclass = FALSE; /* within /[...]/ */ + bool has_utf8 = FALSE; /* Output constant is UTF8 */ + bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be + UTF8? But, this can show as true + when the source isn't utf8, as for + example when it is entirely composed + of hex constants */ SV *res; /* result from charnames */ /* Note on sizing: The scanned constant is placed into sv, which is @@ -2889,9 +2875,9 @@ S_scan_const(pTHX_ char *start) i = d - SvPVX_const(sv); /* remember current offset */ #ifdef EBCDIC SvGROW(sv, - SvLEN(sv) + (has_utf8 ? - (512 - UTF_CONTINUATION_MARK + - UNISKIP(0x100)) + SvLEN(sv) + ((has_utf8) + ? (512 - UTF_CONTINUATION_MARK + + UNISKIP(0x100)) : 256)); /* How many two-byte within 0..255: 128 in UTF-8, * 96 in UTF-8-mod. */ @@ -2932,6 +2918,8 @@ S_scan_const(pTHX_ char *start) } #ifdef EBCDIC + /* Because of the discontinuities in EBCDIC A-Z and a-z, expand + * any subsets of these ranges into individual characters */ if (literal_endpoint == 2 && ((isLOWER_A(min) && isLOWER_A(max)) || (isUPPER_A(min) && isUPPER_A(max)))) @@ -3379,8 +3367,11 @@ S_scan_const(pTHX_ char *start) d += 5; while (str < str_end) { char hex_string[4]; - my_snprintf(hex_string, sizeof(hex_string), - "%02X.", (U8) *str); + int len = + my_snprintf(hex_string, + sizeof(hex_string), + "%02X.", (U8) *str); + PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string)); Copy(hex_string, d, 3, char); d += 3; str++; @@ -3516,7 +3507,7 @@ S_scan_const(pTHX_ char *start) *d++ = '\t'; break; case 'e': - *d++ = ASCII_TO_NATIVE('\033'); + *d++ = ESC_NATIVE; break; case 'a': *d++ = '\a'; @@ -3669,8 +3660,6 @@ S_scan_const(pTHX_ char *start) STATIC int S_intuit_more(pTHX_ char *s) { - dVAR; - PERL_ARGS_ASSERT_INTUIT_MORE; if (PL_lex_brackets) @@ -3831,7 +3820,6 @@ S_intuit_more(pTHX_ char *s) STATIC int S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) { - dVAR; char *s = start + (*start == '$'); char tmpbuf[sizeof PL_tokenbuf]; STRLEN len; @@ -3914,7 +3902,6 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) SV * Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) { - dVAR; if (!funcp) return NULL; @@ -3983,7 +3970,6 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) void Perl_filter_del(pTHX_ filter_t funcp) { - dVAR; SV *datasv; PERL_ARGS_ASSERT_FILTER_DEL; @@ -4011,7 +3997,6 @@ Perl_filter_del(pTHX_ filter_t funcp) I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) { - dVAR; filter_t funcp; SV *datasv = NULL; /* This API is bad. It should have been using unsigned int for maxlen. @@ -4101,8 +4086,6 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) STATIC char * S_filter_gets(pTHX_ SV *sv, STRLEN append) { - dVAR; - PERL_ARGS_ASSERT_FILTER_GETS; #ifdef PERL_CR_FILTER @@ -4125,7 +4108,6 @@ S_filter_gets(pTHX_ SV *sv, STRLEN append) STATIC HV * S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) { - dVAR; GV *gv; PERL_ARGS_ASSERT_FIND_IN_MY_STASH; @@ -4154,8 +4136,6 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) STATIC char * S_tokenize_use(pTHX_ int is_use, char *s) { - dVAR; - PERL_ARGS_ASSERT_TOKENIZE_USE; if (PL_expect != XSTATE) @@ -4185,7 +4165,8 @@ S_tokenize_use(pTHX_ int is_use, char *s) { #ifdef DEBUGGING static const char* const exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR" + "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", + "TERMORDORDOR" }; #endif @@ -4298,7 +4279,6 @@ Perl_yylex(pTHX) pl_yylval = PL_nextval[PL_nexttoke]; if (!PL_nexttoke) { PL_lex_state = PL_lex_defer; - PL_expect = PL_lex_expect; PL_lex_defer = LEX_NORMAL; } { @@ -4406,9 +4386,9 @@ Perl_yylex(pTHX) PL_lex_starts = 0; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (PL_lex_casemods == 1 && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else return yylex(); @@ -4453,9 +4433,9 @@ Perl_yylex(pTHX) s = PL_bufptr; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } return yylex(); @@ -4543,9 +4523,9 @@ Perl_yylex(pTHX) if (PL_lex_starts++) { /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else { PL_bufptr = s; @@ -4586,7 +4566,7 @@ Perl_yylex(pTHX) : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); if (len > UNRECOGNIZED_PRECEDE_COUNT) { - d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; } else { d = PL_linestart; } @@ -4818,7 +4798,7 @@ Perl_yylex(pTHX) * line contains "Perl" rather than "perl" */ if (!d) { for (d = ipathend-4; d >= ipath; --d) { - if ((*d == 'p' || *d == 'P') + if (isALPHA_FOLD_EQ(*d, 'p') && !ibcmp(d, "perl", 4)) { break; @@ -4900,7 +4880,7 @@ Perl_yylex(pTHX) != PL_unicode) baduni = TRUE; } - if (baduni || *d1 == 'M' || *d1 == 'm') { + if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -5404,7 +5384,8 @@ Perl_yylex(pTHX) TOKEN(0); CLINE; s++; - OPERATOR(';'); + PL_expect = XSTATE; + TOKEN(';'); case ')': if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) TOKEN(0); @@ -5468,15 +5449,20 @@ Perl_yylex(pTHX) } } /* FALLTHROUGH */ + case XATTRTERM: + case XTERMBLOCK: + PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + PL_lex_allbrackets++; + PL_expect = XSTATE; + break; case XATTRBLOCK: case XBLOCK: PL_lex_brackstack[PL_lex_brackets++] = XSTATE; PL_lex_allbrackets++; PL_expect = XSTATE; break; - case XATTRTERM: - case XTERMBLOCK: - PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + case XBLOCKTERM: + PL_lex_brackstack[PL_lex_brackets++] = XTERM; PL_lex_allbrackets++; PL_expect = XSTATE; break; @@ -5498,6 +5484,11 @@ Perl_yylex(pTHX) } OPERATOR(HASHBRACK); } + if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { + /* ${...} or @{...} etc., but not print {...} */ + PL_expect = XTERM; + break; + } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation * determines that, but in cases where we're not in a @@ -5517,7 +5508,7 @@ Perl_yylex(pTHX) if (*s == '\'' || *s == '"' || *s == '`') { /* common case: get past first string, handling escapes */ for (t++; t < PL_bufend && *t != *s;) - if (*t++ == '\\' && (*t == '\\' || *t == *s)) + if (*t++ == '\\') t++; t++; } @@ -6049,62 +6040,43 @@ Perl_yylex(pTHX) TERM('@'); case '/': /* may be division, defined-or, or pattern */ - if (PL_expect == XTERMORDORDOR && s[1] == '/') { + if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') { if (!PL_lex_allbrackets && PL_lex_fakeeof >= (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) TOKEN(0); s += 2; AOPERATOR(DORDOR); } - /* FALLTHROUGH */ - case '?': /* may either be conditional or pattern */ - if (PL_expect == XOPERATOR) { - char tmp = *s++; - if(tmp == '?') { - if (!PL_lex_allbrackets && - PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { - s--; - TOKEN(0); - } - PL_lex_allbrackets++; - OPERATOR('?'); - } - else { - tmp = *s++; - if(tmp == '/') { - /* A // operator. */ - if (!PL_lex_allbrackets && PL_lex_fakeeof >= - (*s == '=' ? LEX_FAKEEOF_ASSIGN : - LEX_FAKEEOF_LOGIC)) { - s -= 2; - TOKEN(0); - } - AOPERATOR(DORDOR); - } - else { - s--; - if (*s == '=' && !PL_lex_allbrackets && - PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { - s--; - TOKEN(0); - } - Mop(OP_DIVIDE); - } - } - } - else { - /* Disable warning on "study /blah/" */ - if (PL_oldoldbufptr == PL_last_uni - && (*PL_last_uni != 's' || s - PL_last_uni < 5 - || memNE(PL_last_uni, "study", 5) - || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) - )) - check_uni(); - if (*s == '?') - deprecate("?PATTERN? without explicit operator"); - s = scan_pat(s,OP_MATCH); - TERM(sublex_start()); - } + else if (PL_expect == XOPERATOR) { + s++; + if (*s == '=' && !PL_lex_allbrackets && + PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { + s--; + TOKEN(0); + } + Mop(OP_DIVIDE); + } + else { + /* Disable warning on "study /blah/" */ + if (PL_oldoldbufptr == PL_last_uni + && (*PL_last_uni != 's' || s - PL_last_uni < 5 + || memNE(PL_last_uni, "study", 5) + || isWORDCHAR_lazy_if(PL_last_uni+5,UTF) + )) + check_uni(); + s = scan_pat(s,OP_MATCH); + TERM(sublex_start()); + } + + case '?': /* conditional */ + s++; + if (!PL_lex_allbrackets && + PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { + s--; + TOKEN(0); + } + PL_lex_allbrackets++; + OPERATOR('?'); case '.': if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack @@ -6347,12 +6319,12 @@ Perl_yylex(pTHX) } else if (result == KEYWORD_PLUGIN_STMT) { pl_yylval.opval = o; CLINE; - PL_expect = XSTATE; + if (!PL_nexttoke) PL_expect = XSTATE; return REPORT(PLUGSTMT); } else if (result == KEYWORD_PLUGIN_EXPR) { pl_yylval.opval = o; CLINE; - PL_expect = XOPERATOR; + if (!PL_nexttoke) PL_expect = XOPERATOR; return REPORT(PLUGEXPR); } else { Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", @@ -6667,7 +6639,6 @@ Perl_yylex(pTHX) } NEXTVAL_NEXTTOKE.opval = off ? rv2cv_op : pl_yylval.opval; - PL_expect = XOPERATOR; if (off) op_free(pl_yylval.opval), force_next(PRIVATEREF); else op_free(rv2cv_op), force_next(WORD); @@ -6684,7 +6655,9 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - PREBLOCK(METHOD); + PL_expect = XBLOCKTERM; + PL_bufptr = s; + return REPORT(METHOD); } /* If followed by a bareword, see if it looks like indir obj. */ @@ -7117,8 +7090,6 @@ Perl_yylex(pTHX) UNI(OP_DBMCLOSE); case KEY_dump: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7242,8 +7213,6 @@ Perl_yylex(pTHX) LOP(OP_GREPSTART, XREF); case KEY_goto: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7368,8 +7337,6 @@ Perl_yylex(pTHX) LOP(OP_KILL,XTERM); case KEY_last: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -7457,8 +7424,10 @@ Perl_yylex(pTHX) PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); if (!PL_in_my_stash) { char tmpbuf[1024]; + int len; PL_bufptr = s; - my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); + len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); + PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf)); yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); } } @@ -7466,8 +7435,6 @@ Perl_yylex(pTHX) OPERATOR(MY); case KEY_next: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -7477,7 +7444,7 @@ Perl_yylex(pTHX) case KEY_no: s = tokenize_use(0, s); - TERM(USE); + TOKEN(USE); case KEY_not: if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) @@ -7552,8 +7519,7 @@ Perl_yylex(pTHX) s = force_word(s,WORD,FALSE,TRUE); s = SKIPSPACE1(s); s = force_strict_version(s); - PL_lex_expect = XBLOCK; - OPERATOR(PACKAGE); + PREBLOCK(PACKAGE); case KEY_pipe: LOP(OP_PIPE_OP,XTERM); @@ -7646,7 +7612,6 @@ Perl_yylex(pTHX) case KEY_require: s = SKIPSPACE1(s); - PL_expect = XOPERATOR; if (isDIGIT(*s)) { s = force_version(s, FALSE); } @@ -7667,7 +7632,7 @@ Perl_yylex(pTHX) } else pl_yylval.ival = 0; - PL_expect = XTERM; + PL_expect = PL_nexttoke ? XOPERATOR : XTERM; PL_bufptr = s; PL_last_uni = PL_oldbufptr; PL_last_lop_op = OP_REQUIRE; @@ -7678,8 +7643,6 @@ Perl_yylex(pTHX) UNI(OP_RESET); case KEY_redo: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -8040,7 +8003,7 @@ Perl_yylex(pTHX) case KEY_use: s = tokenize_use(1, s); - OPERATOR(USE); + TOKEN(USE); case KEY_values: UNI(OP_VALUES); @@ -8124,7 +8087,6 @@ Perl_yylex(pTHX) static int S_pending_ident(pTHX) { - dVAR; PADOFFSET tmp = 0; const char pit = (char)pl_yylval.ival; const STRLEN tokenbuf_len = strlen(PL_tokenbuf); @@ -8244,8 +8206,6 @@ S_pending_ident(pTHX) STATIC void S_checkcomma(pTHX_ const char *s, const char *name, const char *what) { - dVAR; - PERL_ARGS_ASSERT_CHECKCOMMA; if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ @@ -8307,7 +8267,7 @@ STATIC SV * S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, SV *sv, SV *pv, const char *type, STRLEN typelen) { - dVAR; dSP; + dSP; HV * table = GvHV(PL_hintgv); /* ^H */ SV *res; SV *errsv = NULL; @@ -8443,7 +8403,6 @@ now_ok: PERL_STATIC_INLINE void S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { - dVAR; PERL_ARGS_ASSERT_PARSE_IDENT; for (;;) { @@ -8495,7 +8454,6 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool STATIC char * S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { - dVAR; char *d = dest; char * const e = d + destlen - 3; /* two-character token, ending NUL */ bool is_utf8 = cBOOL(UTF); @@ -8511,7 +8469,6 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN STATIC char * S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) { - dVAR; I32 herelines = PL_parser->herelines; SSize_t bracket = -1; char funny = *s++; @@ -8803,7 +8760,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse STATIC char * S_scan_pat(pTHX_ char *start, I32 type) { - dVAR; PMOP *pm; char *s; const char * const valid_flags = @@ -8813,14 +8769,8 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); - if (!s) { - const char * const delimiter = skipspace(start); - Perl_croak(aTHX_ - (const char *) - (*delimiter == '?' - ? "Search pattern not terminated or ternary operator parsed as search pattern" - : "Search pattern not terminated" )); - } + if (!s) + Perl_croak(aTHX_ "Search pattern not terminated"); pm = (PMOP*)newPMOP(type, 0); if (PL_multi_open == '?') { @@ -8880,7 +8830,6 @@ S_scan_pat(pTHX_ char *start, I32 type) STATIC char * S_scan_subst(pTHX_ char *start) { - dVAR; char *s; PMOP *pm; I32 first_start; @@ -8963,7 +8912,6 @@ S_scan_subst(pTHX_ char *start) STATIC char * S_scan_trans(pTHX_ char *start) { - dVAR; char* s; OP *o; U8 squash; @@ -9053,7 +9001,6 @@ S_scan_trans(pTHX_ char *start) STATIC char * S_scan_heredoc(pTHX_ char *s) { - dVAR; I32 op_type = OP_SCALAR; I32 len; SV *tmpstr; @@ -9326,7 +9273,6 @@ S_scan_heredoc(pTHX_ char *s) STATIC char * S_scan_inputsymbol(pTHX_ char *start) { - dVAR; char *s = start; /* current position in buffer */ char *end; I32 len; @@ -9437,8 +9383,6 @@ intro_sym: newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv))); } - if (!readline_overriden) - PL_lex_op->op_flags |= OPf_SPECIAL; /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ pl_yylval.ival = OP_NULL; } @@ -9518,7 +9462,6 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re char **delimp ) { - dVAR; SV *sv; /* scalar value: string */ const char *tmps; /* temp string, used for delimiter matching */ char *s = start; /* current position in the buffer */ @@ -9841,9 +9784,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 - 0b[01](_?[01])* - 0[0-7](_?[0-7])* - 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* + 0b[01](_?[01])* binary integers + 0[0-7](_?[0-7])* octal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the thing it reads. @@ -9856,7 +9800,6 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re char * Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) { - dVAR; const char *s = start; /* current position in buffer */ char *d; /* destination in temp buffer */ char *e; /* end of temp buffer */ @@ -9865,6 +9808,27 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) bool floatit; /* boolean: int or float? */ const char *lastub = NULL; /* position of last underbar */ static const char* const number_too_long = "Number too long"; + /* Hexadecimal floating point. + * + * In many places (where we have quads and NV is IEEE 754 double) + * we can fit the mantissa bits of a NV into an unsigned quad. + * (Note that UVs might not be quads even when we have quads.) + * This will not work everywhere, though (either no quads, or + * using long doubles), in which case we have to resort to NV, + * which will probably mean horrible loss of precision due to + * multiple fp operations. */ + bool hexfp = FALSE; + int total_bits = 0; +#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) +# define HEXFP_UQUAD + Uquad_t hexfp_uquad = 0; + int hexfp_frac_bits = 0; +#else +# define HEXFP_NV + NV hexfp_nv = 0.0; +#endif + NV hexfp_mult = 1.0; + UV high_non_zero = 0; /* highest digit */ PERL_ARGS_ASSERT_SCAN_NUM; @@ -9907,17 +9871,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) const char *base, *Base, *max; /* check for hex */ - if (s[1] == 'x' || s[1] == 'X') { + if (isALPHA_FOLD_EQ(s[1], 'x')) { shift = 4; s += 2; just_zero = FALSE; - } else if (s[1] == 'b' || s[1] == 'B') { + } else if (isALPHA_FOLD_EQ(s[1], 'b')) { shift = 1; s += 2; just_zero = FALSE; } /* check for a decimal in disguise */ - else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') + else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) goto decimal; /* so it must be octal */ else { @@ -9989,6 +9953,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (!overflowed) { x = u << shift; /* make room for the digit */ + total_bits += shift; + if ((x >> shift) != u && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; @@ -10011,6 +9977,16 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) * amount. */ n += (NV) b; } + + if (high_non_zero == 0 && b > 0) + high_non_zero = b; + + /* this could be hexfp, but peek ahead + * to avoid matching ".." */ + if (UNLIKELY(HEXFP_PEEK(s))) { + goto out; + } + break; } } @@ -10025,6 +10001,96 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } + if (UNLIKELY(HEXFP_PEEK(s))) { + /* Do sloppy (on the underbars) but quick detection + * (and value construction) for hexfp, the decimal + * detection will shortly be more thorough with the + * underbar checks. */ + const char* h = s; +#ifdef HEXFP_UQUAD + hexfp_uquad = u; +#else /* HEXFP_NV */ + hexfp_nv = u; +#endif + if (*h == '.') { +#ifdef HEXFP_NV + NV mult = 1 / 16.0; +#endif + h++; + while (isXDIGIT(*h) || *h == '_') { + if (isXDIGIT(*h)) { + U8 b = XDIGIT_VALUE(*h); + total_bits += shift; +#ifdef HEXFP_UQUAD + hexfp_uquad <<= shift; + hexfp_uquad |= b; + hexfp_frac_bits += shift; +#else /* HEXFP_NV */ + hexfp_nv += b * mult; + mult /= 16.0; +#endif + } + h++; + } + } + + if (total_bits >= 4) { + if (high_non_zero < 0x8) + total_bits--; + if (high_non_zero < 0x4) + total_bits--; + if (high_non_zero < 0x2) + total_bits--; + } + + if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) { + bool negexp = FALSE; + h++; + if (*h == '+') + h++; + else if (*h == '-') { + negexp = TRUE; + h++; + } + if (isDIGIT(*h)) { + I32 hexfp_exp = 0; + while (isDIGIT(*h) || *h == '_') { + if (isDIGIT(*h)) { + hexfp_exp *= 10; + hexfp_exp += *h - '0'; +#ifdef NV_MIN_EXP + if (negexp && + -hexfp_exp < NV_MIN_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent underflow"); +#endif + break; + } + else { +#ifdef NV_MAX_EXP + if (!negexp && + hexfp_exp > NV_MAX_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent overflow"); + break; + } +#endif + } + } + h++; + } + if (negexp) + hexfp_exp = -hexfp_exp; +#ifdef HEXFP_UQUAD + hexfp_exp -= hexfp_frac_bits; +#endif + hexfp_mult = pow(2.0, hexfp_exp); + hexfp = TRUE; + goto decimal; + } + } + } + if (overflowed) { if (n > 4294967295.0) Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), @@ -10058,10 +10124,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) decimal: d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ - floatit = FALSE; + floatit = FALSE; + if (hexfp) { + floatit = TRUE; + *d++ = '0'; + *d++ = 'x'; + s = start + 2; + } /* read next group of digits and _ and copy into d */ - while (isDIGIT(*s) || *s == '_') { + while (isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s))) { /* skip underscores, checking for misplaced ones if -w is on */ @@ -10101,7 +10174,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* copy, ignoring underbars, until we run out of digits. */ - for (; isDIGIT(*s) || *s == '_'; s++) { + for (; isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s)); + s++) { /* fixed length buffer check */ if (d >= e) Perl_croak(aTHX_ "%s", number_too_long); @@ -10127,12 +10202,24 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* read exponent part, if present */ - if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { - floatit = TRUE; + if ((isALPHA_FOLD_EQ(*s, 'e') + || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) + && strchr("+-0123456789_", s[1])) + { + floatit = TRUE; + + /* regardless of whether user said 3E5 or 3e5, use lower 'e', + ditto for p (hexfloats) */ + if ((isALPHA_FOLD_EQ(*s, 'e'))) { + /* At least some Mach atof()s don't grok 'E' */ + *d++ = 'e'; + } + else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { + *d++ = 'p'; + } + s++; - /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ - *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ /* stray preinitial _ */ if (*s == '_') { @@ -10196,9 +10283,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STORE_NUMERIC_LOCAL_SET_STANDARD(); /* terminate the string */ *d = '\0'; - nv = Atof(PL_tokenbuf); + if (UNLIKELY(hexfp)) { +# ifdef NV_MANT_DIG + if (total_bits > NV_MANT_DIG) + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: mantissa overflow"); +# endif +#ifdef HEXFP_UQUAD + nv = hexfp_uquad * hexfp_mult; +#else /* HEXFP_NV */ + nv = hexfp_nv * hexfp_mult; +#endif + } else { + nv = Atof(PL_tokenbuf); + } RESTORE_NUMERIC_LOCAL(); - sv = newSVnv(nv); + sv = newSVnv(nv); } if ( floatit @@ -10235,7 +10335,6 @@ vstring: STATIC char * S_scan_formline(pTHX_ char *s) { - dVAR; char *eol; char *t; SV * const stuff = newSVpvs(""); @@ -10337,7 +10436,6 @@ S_scan_formline(pTHX_ char *s) I32 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) { - dVAR; const I32 oldsavestack_ix = PL_savestack_ix; CV* const outsidecv = PL_compcv; @@ -10362,8 +10460,6 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) static int S_yywarn(pTHX_ const char *const s, U32 flags) { - dVAR; - PERL_ARGS_ASSERT_YYWARN; PL_in_eval |= EVAL_WARNONLY; @@ -10389,7 +10485,6 @@ Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) int Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) { - dVAR; const char *context = NULL; int contlen = -1; SV *msg; @@ -10494,7 +10589,6 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) STATIC char* S_swallow_bom(pTHX_ U8 *s) { - dVAR; const STRLEN slen = SvCUR(PL_linestr); PERL_ARGS_ASSERT_SWALLOW_BOM; @@ -10586,7 +10680,6 @@ S_swallow_bom(pTHX_ U8 *s) static I32 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { - dVAR; SV *const filter = FILTER_DATA(idx); /* We re-use this each time round, throwing the contents away before we return. */ @@ -10754,7 +10847,6 @@ sv_2mortal. char * Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) { - dVAR; const char *pos = s; const char *start = s;