X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/96e440d2eb546f4493feffce002f2ec8886f13a3..5d288d736c2758c27a5943647f4a524f0e93a642:/toke.c diff --git a/toke.c b/toke.c index cb379ef..8585b7a 100644 --- a/toke.c +++ b/toke.c @@ -54,7 +54,6 @@ Individual members of C have their own documentation. #define PL_lex_casestack (PL_parser->lex_casestack) #define PL_lex_defer (PL_parser->lex_defer) #define PL_lex_dojoin (PL_parser->lex_dojoin) -#define PL_lex_expect (PL_parser->lex_expect) #define PL_lex_formbrack (PL_parser->lex_formbrack) #define PL_lex_inpat (PL_parser->lex_inpat) #define PL_lex_inwhat (PL_parser->lex_inwhat) @@ -114,6 +113,11 @@ static const char* const ident_too_long = "Identifier too long"; #define SPACE_OR_TAB(c) isBLANK_A(c) +#define HEXFP_PEEK(s) \ + (((s[0] == '.') && \ + (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ + isALPHA_FOLD_EQ(s[0], 'p')) + /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). @@ -163,11 +167,6 @@ static const char* const lex_state_names[] = { #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) -# define SKIPSPACE0(s) skipspace(s) -# define SKIPSPACE1(s) skipspace(s) -# define SKIPSPACE2(s,tsv) skipspace(s) -# define PEEKSPACE(s) skipspace(s) - /* * Convenience functions to return different tokens and prime the * lexer for the next token. They all take an argument. @@ -191,6 +190,7 @@ static const char* const lex_state_names[] = { * PWop : power operator * PMop : pattern-matching operator * Aop : addition-level operator + * AopNOASSIGN : addition-level operator that is never part of .= * Mop : multiplication-level operator * Eop : equality-testing operator * Rop : relational operator <= != gt @@ -212,7 +212,10 @@ static const char* const lex_state_names[] = { #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) -#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) +#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \ + pl_yylval.ival=f, \ + PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ + REPORT((int)LOOPEX)) #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) @@ -223,6 +226,7 @@ static const char* const lex_state_names[] = { #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) +#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) @@ -240,7 +244,7 @@ static const char* const lex_state_names[] = { PL_last_lop_op = f; \ if (*s == '(') \ return REPORT( (int)FUNC1 ); \ - s = PEEKSPACE(s); \ + s = skipspace(s); \ return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ } #define UNI(f) UNI3(f,XTERM,1) @@ -378,8 +382,6 @@ static struct debug_tokens { STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) { - dVAR; - PERL_ARGS_ASSERT_TOKEREPORT; if (DEBUG_T_TEST) { @@ -467,8 +469,8 @@ S_deprecate_commaless_var_list(pTHX) { /* * S_ao * - * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR - * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN + * This subroutine looks for an '=' next to the operator that has just been + * parsed and turns it into an ASSIGNOP if it finds one. */ STATIC int @@ -1845,7 +1847,10 @@ S_check_uni(pTHX) /* * S_lop * Build a list operator (or something that might be one). The rules: - * - if we have a next token, then it's a list operator [why?] + * - if we have a next token, then it's a list operator (no parens) for + * which the next token has already been parsed; e.g., + * sort foo @args + * sort foo (@args) * - if the next thing is an opening paren, then it's a function * - else it's a list operator */ @@ -1857,15 +1862,15 @@ S_lop(pTHX_ I32 f, int x, char *s) pl_yylval.ival = f; CLINE; - PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) goto lstop; + PL_expect = x; if (*s == '(') return REPORT(FUNC); - s = PEEKSPACE(s); + s = skipspace(s); if (*s == '(') return REPORT(FUNC); else { @@ -1898,7 +1903,6 @@ S_force_next(pTHX_ I32 type) PL_nexttoke++; if (PL_lex_state != LEX_KNOWNEXT) { PL_lex_defer = PL_lex_state; - PL_lex_expect = PL_expect; PL_lex_state = LEX_KNOWNEXT; } } @@ -1983,7 +1987,6 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) * a keyword (do this if the word is a label, e.g. goto FOO) * int allow_pack : if true, : characters will also be allowed (require, * use, etc. do this) - * int allow_initial_tick : used by the "sub" lexer only. */ STATIC char * @@ -1994,7 +1997,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) PERL_ARGS_ASSERT_FORCE_WORD; - start = SKIPSPACE1(start); + start = skipspace(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || (allow_pack && *s == ':') ) @@ -2008,7 +2011,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) return start; } if (token == METHOD) { - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '(') PL_expect = XTERM; else { @@ -2050,7 +2053,7 @@ S_force_ident(pTHX_ const char *s, int kind) warnings if the symbol must be introduced in an eval. GSAR 96-10-12 */ gv_fetchpvn_flags(s, len, - (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) + (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), kind == '$' ? SVt_PV : kind == '@' ? SVt_PVAV : @@ -2112,7 +2115,7 @@ S_force_version(pTHX_ char *s, int guessing) PERL_ARGS_ASSERT_FORCE_VERSION; - s = SKIPSPACE1(s); + s = skipspace(s); d = s; if (*d == 'v') @@ -2165,7 +2168,7 @@ S_force_strict_version(pTHX_ char *s) version = newSVOP(OP_CONST, 0, ver); } else if ( (*s != ';' && *s != '{' && *s != '}' ) && - (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' ))) + (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) { PL_bufptr = s; if (errstr) @@ -3498,7 +3501,7 @@ S_scan_const(pTHX_ char *start) *d++ = '\t'; break; case 'e': - *d++ = ASCII_TO_NATIVE('\033'); + *d++ = ESC_NATIVE; break; case 'a': *d++ = '\a'; @@ -3809,12 +3812,18 @@ S_intuit_more(pTHX_ char *s) */ STATIC int -S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) +S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) { char *s = start + (*start == '$'); char tmpbuf[sizeof PL_tokenbuf]; STRLEN len; GV* indirgv; + /* Mustn't actually add anything to a symbol table. + But also don't want to "initialise" any placeholder + constants that might already be there into full + blown PVGVs with attached PVCV. */ + GV * const gv = + ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; PERL_ARGS_ASSERT_INTUIT_METHOD; @@ -3834,7 +3843,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) return 0; - s = PEEKSPACE(s); + s = skipspace(s); PL_bufptr = start; PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; @@ -3857,7 +3866,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) return 0; /* filehandle or package name makes it a method */ if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { - s = PEEKSPACE(s); + s = skipspace(s); if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') return 0; /* no assumptions -- "=>" quotes bareword */ bare_package: @@ -4118,7 +4127,7 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) if (gv && GvCV(gv)) { SV * const sv = cv_const_sv(GvCV(gv)); if (sv) - pkgname = SvPV_const(sv, len); + return gv_stashsv(sv, 0); } return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); @@ -4133,11 +4142,11 @@ S_tokenize_use(pTHX_ int is_use, char *s) { yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); PL_expect = XTERM; - s = SKIPSPACE1(s); + s = skipspace(s); if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { s = force_version(s, TRUE); if (*s == ';' || *s == '}' - || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { + || (s = skipspace(s), (*s == ';' || *s == '}'))) { NEXTVAL_NEXTTOKE.opval = NULL; force_next(WORD); } @@ -4156,7 +4165,8 @@ S_tokenize_use(pTHX_ int is_use, char *s) { #ifdef DEBUGGING static const char* const exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR" + "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", + "TERMORDORDOR" }; #endif @@ -4269,7 +4279,6 @@ Perl_yylex(pTHX) pl_yylval = PL_nextval[PL_nexttoke]; if (!PL_nexttoke) { PL_lex_state = PL_lex_defer; - PL_expect = PL_lex_expect; PL_lex_defer = LEX_NORMAL; } { @@ -4377,9 +4386,9 @@ Perl_yylex(pTHX) PL_lex_starts = 0; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (PL_lex_casemods == 1 && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else return yylex(); @@ -4424,9 +4433,9 @@ Perl_yylex(pTHX) s = PL_bufptr; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } return yylex(); @@ -4514,9 +4523,9 @@ Perl_yylex(pTHX) if (PL_lex_starts++) { /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else { PL_bufptr = s; @@ -4789,7 +4798,7 @@ Perl_yylex(pTHX) * line contains "Perl" rather than "perl" */ if (!d) { for (d = ipathend-4; d >= ipath; --d) { - if ((*d == 'p' || *d == 'P') + if (isALPHA_FOLD_EQ(*d, 'p') && !ibcmp(d, "perl", 4)) { break; @@ -4871,7 +4880,7 @@ Perl_yylex(pTHX) != PL_unicode) baduni = TRUE; } - if (baduni || *d1 == 'M' || *d1 == 'm') { + if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -5053,7 +5062,7 @@ Perl_yylex(pTHX) } else if (*s == '>') { s++; - s = SKIPSPACE1(s); + s = skipspace(s); if (FEATURE_POSTDEREF_IS_ENABLED && ( ((*s == '$' || *s == '&') && s[1] == '*') ||(*s == '$' && s[1] == '#' && s[2] == '*') @@ -5230,7 +5239,7 @@ Perl_yylex(pTHX) case XATTRTERM: PL_expect = XTERMBLOCK; grabattrs: - s = PEEKSPACE(s); + s = skipspace(s); attrs = NULL; while (isIDFIRST_lazy_if(s,UTF)) { I32 tmp; @@ -5314,9 +5323,9 @@ Perl_yylex(pTHX) newSVOP(OP_CONST, 0, sv)); } - s = PEEKSPACE(d); + s = skipspace(d); if (*s == ':' && s[1] != ':') - s = PEEKSPACE(s+1); + s = skipspace(s+1); else if (s == d) break; /* require real whitespace or :'s */ /* XXX losing whitespace on sequential attributes here */ @@ -5367,7 +5376,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; - s = SKIPSPACE1(s); + s = skipspace(s); PL_lex_allbrackets++; TOKEN('('); case ';': @@ -5375,13 +5384,14 @@ Perl_yylex(pTHX) TOKEN(0); CLINE; s++; - OPERATOR(';'); + PL_expect = XSTATE; + TOKEN(';'); case ')': if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) TOKEN(0); s++; PL_lex_allbrackets--; - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') PREBLOCK(')'); TERM(')'); @@ -5439,15 +5449,20 @@ Perl_yylex(pTHX) } } /* FALLTHROUGH */ + case XATTRTERM: + case XTERMBLOCK: + PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + PL_lex_allbrackets++; + PL_expect = XSTATE; + break; case XATTRBLOCK: case XBLOCK: PL_lex_brackstack[PL_lex_brackets++] = XSTATE; PL_lex_allbrackets++; PL_expect = XSTATE; break; - case XATTRTERM: - case XTERMBLOCK: - PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + case XBLOCKTERM: + PL_lex_brackstack[PL_lex_brackets++] = XTERM; PL_lex_allbrackets++; PL_expect = XSTATE; break; @@ -5458,7 +5473,7 @@ Perl_yylex(pTHX) else PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; PL_lex_allbrackets++; - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '}') { if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { PL_expect = XTERM; @@ -5469,6 +5484,11 @@ Perl_yylex(pTHX) } OPERATOR(HASHBRACK); } + if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { + /* ${...} or @{...} etc., but not print {...} */ + PL_expect = XTERM; + break; + } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation * determines that, but in cases where we're not in a @@ -5488,7 +5508,7 @@ Perl_yylex(pTHX) if (*s == '\'' || *s == '"' || *s == '`') { /* common case: get past first string, handling escapes */ for (t++; t < PL_bufend && *t != *s;) - if (*t++ == '\\' && (*t == '\\' || *t == *s)) + if (*t++ == '\\') t++; t++; } @@ -5891,7 +5911,7 @@ Perl_yylex(pTHX) { const char tmp = *s; if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) - s = SKIPSPACE1(s); + s = skipspace(s); if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { @@ -5903,7 +5923,7 @@ Perl_yylex(pTHX) while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') t++; if (*t++ == ',') { - PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ + PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ while (t < PL_bufend && *t != ']') t++; Perl_warner(aTHX_ packWARN(WARN_SYNTAX), @@ -6003,7 +6023,7 @@ Perl_yylex(pTHX) PREREF('@'); } if (PL_lex_state == LEX_NORMAL) - s = SKIPSPACE1(s); + s = skipspace(s); if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { if (*s == '{') PL_tokenbuf[0] = '%'; @@ -6194,7 +6214,7 @@ Perl_yylex(pTHX) } /* avoid v123abc() or $h{v1}, allow C */ if (!isALPHA(*start) && (PL_expect == XTERM - || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XSTATE || PL_expect == XTERMORDORDOR)) { GV *const gv = gv_fetchpvn_flags(s, start - s, UTF ? SVf_UTF8 : 0, SVt_PVCV); @@ -6299,12 +6319,12 @@ Perl_yylex(pTHX) } else if (result == KEYWORD_PLUGIN_STMT) { pl_yylval.opval = o; CLINE; - PL_expect = XSTATE; + if (!PL_nexttoke) PL_expect = XSTATE; return REPORT(PLUGSTMT); } else if (result == KEYWORD_PLUGIN_EXPR) { pl_yylval.opval = o; CLINE; - PL_expect = XOPERATOR; + if (!PL_nexttoke) PL_expect = XOPERATOR; return REPORT(PLUGEXPR); } else { Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", @@ -6453,10 +6473,7 @@ Perl_yylex(pTHX) just_a_word: { int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); - const char penultchar = - lastchar && PL_bufptr - 2 >= PL_linestart - ? PL_bufptr[-2] - : 0; + bool safebw; /* Get the rest if it looks like a package qualifier */ @@ -6483,8 +6500,7 @@ Perl_yylex(pTHX) no_op("Bareword",s); } - /* Look for a subroutine with this name in current package, - unless this is a lexical sub, or name is "Foo::", + /* See if the name is "Foo::", in which case Foo is a bareword (and a package name). */ @@ -6500,25 +6516,17 @@ Perl_yylex(pTHX) PL_tokenbuf[len] = '\0'; gv = NULL; gvp = 0; + safebw = TRUE; } else { - if (!lex && !gv) { - /* Mustn't actually add anything to a symbol table. - But also don't want to "initialise" any placeholder - constants that might already be there into full - blown PVGVs with attached PVCV. */ - gv = gv_fetchpvn_flags(PL_tokenbuf, len, - GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ), - SVt_PVCV); - } - len = 0; + safebw = FALSE; } /* if we saw a global override before, get the right name */ if (!sv) sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, - len ? len : strlen(PL_tokenbuf)); + len); if (gvp) { SV * const tmp_sv = sv; sv = newSVpvs("CORE::GLOBAL::"); @@ -6533,17 +6541,28 @@ Perl_yylex(pTHX) pl_yylval.opval->op_private = OPpCONST_BARE; /* And if "Foo::", then that's what it certainly is. */ - if (len) + if (safebw) goto safe_bareword; if (!off) { OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); const_op->op_private = OPpCONST_BARE; - rv2cv_op = newCVREF(0, const_op); - cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0); + rv2cv_op = + newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); + cv = lex + ? isGV(gv) + ? GvCV(gv) + : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV + ? (CV *)SvRV(gv) + : (CV *)gv + : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); } + /* Use this var to track whether intuit_method has been + called. intuit_method returns 0 or > 255. */ + tmp = 1; + /* See if it's the indirect object for a list operator. */ if (PL_oldoldbufptr && @@ -6557,17 +6576,13 @@ Perl_yylex(pTHX) bool immediate_paren = *s == '('; /* (Now we can afford to cross potential line boundary.) */ - s = SKIPSPACE2(s,nextPL_nextwhite); + s = skipspace(s); /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && - (tmp = intuit_method(s, gv, cv))) { - op_free(rv2cv_op); - if (tmp == METHOD && !PL_lex_allbrackets && - PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - return REPORT(tmp); + (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + goto method; } /* If not a declared subroutine, it's an indirect object. */ @@ -6595,13 +6610,17 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>' && !pkgname) { op_free(rv2cv_op); CLINE; - /* This is our own scalar, created a few lines above, - so this is safe. */ - SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); - sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); - if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); - SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); + if (gvp || (lex && !off)) { + assert (cSVOPx(pl_yylval.opval)->op_sv == sv); + /* This is our own scalar, created a few lines + above, so this is safe. */ + SvREADONLY_off(sv); + sv_setpv(sv, PL_tokenbuf); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(sv); + SvREADONLY_on(sv); + } TERM(WORD); } @@ -6619,7 +6638,6 @@ Perl_yylex(pTHX) } NEXTVAL_NEXTTOKE.opval = off ? rv2cv_op : pl_yylval.opval; - PL_expect = XOPERATOR; if (off) op_free(pl_yylval.opval), force_next(PRIVATEREF); else op_free(rv2cv_op), force_next(WORD); @@ -6636,14 +6654,26 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - PREBLOCK(METHOD); + PL_expect = XBLOCKTERM; + PL_bufptr = s; + return REPORT(METHOD); } /* If followed by a bareword, see if it looks like indir obj. */ - if (!orig_keyword + if (tmp == 1 && !orig_keyword && (isIDFIRST_lazy_if(s,UTF) || *s == '$') - && (tmp = intuit_method(s, gv, cv))) { + && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + method: + if (lex && !off) { + assert(cSVOPx(pl_yylval.opval)->op_sv == sv); + SvREADONLY_off(sv); + sv_setpvn(sv, PL_tokenbuf, len); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on (sv); + else SvUTF8_off(sv); + } op_free(rv2cv_op); if (tmp == METHOD && !PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) @@ -6654,13 +6684,6 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-' && penultchar != '-') { - const STRLEN l = len ? len : strlen(PL_tokenbuf); - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", - UTF8fARG(UTF, l, PL_tokenbuf), - UTF8fARG(UTF, l, PL_tokenbuf)); - } /* Check for a constant sub */ if ((sv = cv_const_sv_or_av(cv))) { its_constant: @@ -7021,7 +7044,7 @@ Perl_yylex(pTHX) PREBLOCK(DEFAULT); case KEY_do: - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') PRETERMBLOCK(DO); if (*s != '\'') { @@ -7030,7 +7053,7 @@ Perl_yylex(pTHX) 1, &len); if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) && !keyword(PL_tokenbuf + 1, len, 0)) { - d = SKIPSPACE1(d); + d = skipspace(d); if (*d == '(') { force_ident_maybe_lex('&'); s = d; @@ -7069,8 +7092,6 @@ Perl_yylex(pTHX) UNI(OP_DBMCLOSE); case KEY_dump: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7092,7 +7113,7 @@ Perl_yylex(pTHX) UNI(OP_EXIT); case KEY_eval: - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') { /* block eval */ PL_expect = XTERMBLOCK; UNIBRACK(OP_ENTERTRY); @@ -7141,7 +7162,7 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) return REPORT(0); pl_yylval.ival = CopLINE(PL_curcop); - s = SKIPSPACE1(s); + s = skipspace(s); if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { char *p = s; @@ -7151,11 +7172,11 @@ Perl_yylex(pTHX) else if ((PL_bufend - p) >= 4 && strnEQ(p, "our", 3) && isSPACE(*(p + 3))) p += 3; - p = PEEKSPACE(p); + p = skipspace(p); /* skip optional package name, as in "for my abc $x (..)" */ if (isIDFIRST_lazy_if(p,UTF)) { p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); - p = PEEKSPACE(p); + p = skipspace(p); } if (*p != '$') Perl_croak(aTHX_ "Missing $ on loop variable"); @@ -7194,8 +7215,6 @@ Perl_yylex(pTHX) LOP(OP_GREPSTART, XREF); case KEY_goto: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7320,8 +7339,6 @@ Perl_yylex(pTHX) LOP(OP_KILL,XTERM); case KEY_last: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -7391,7 +7408,7 @@ Perl_yylex(pTHX) case KEY_my: case KEY_state: PL_in_my = (U16)tmp; - s = SKIPSPACE1(s); + s = skipspace(s); if (isIDFIRST_lazy_if(s,UTF)) { s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) @@ -7420,8 +7437,6 @@ Perl_yylex(pTHX) OPERATOR(MY); case KEY_next: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -7431,10 +7446,10 @@ Perl_yylex(pTHX) case KEY_no: s = tokenize_use(0, s); - TERM(USE); + TOKEN(USE); case KEY_not: - if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) + if (*s == '(' || (s = skipspace(s), *s == '(')) FUN1(OP_NOT); else { if (!PL_lex_allbrackets && @@ -7444,7 +7459,7 @@ Perl_yylex(pTHX) } case KEY_open: - s = SKIPSPACE1(s); + s = skipspace(s); if (isIDFIRST_lazy_if(s,UTF)) { const char *t; d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, @@ -7504,10 +7519,9 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE); - s = SKIPSPACE1(s); + s = skipspace(s); s = force_strict_version(s); - PL_lex_expect = XBLOCK; - OPERATOR(PACKAGE); + PREBLOCK(PACKAGE); case KEY_pipe: LOP(OP_PIPE_OP,XTERM); @@ -7599,8 +7613,7 @@ Perl_yylex(pTHX) OLDLOP(OP_RETURN); case KEY_require: - s = SKIPSPACE1(s); - PL_expect = XOPERATOR; + s = skipspace(s); if (isDIGIT(*s)) { s = force_version(s, FALSE); } @@ -7621,7 +7634,7 @@ Perl_yylex(pTHX) } else pl_yylval.ival = 0; - PL_expect = XTERM; + PL_expect = PL_nexttoke ? XOPERATOR : XTERM; PL_bufptr = s; PL_last_uni = PL_oldbufptr; PL_last_lop_op = OP_REQUIRE; @@ -7632,8 +7645,6 @@ Perl_yylex(pTHX) UNI(OP_RESET); case KEY_redo: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -7772,7 +7783,7 @@ Perl_yylex(pTHX) case KEY_sort: checkcomma(s,PL_tokenbuf,"subroutine name"); - s = SKIPSPACE1(s); + s = skipspace(s); PL_expect = XTERM; s = force_word(s,WORD,TRUE,TRUE); LOP(OP_SORT,XREF); @@ -7994,7 +8005,7 @@ Perl_yylex(pTHX) case KEY_use: s = tokenize_use(1, s); - OPERATOR(USE); + TOKEN(USE); case KEY_values: UNI(OP_VALUES); @@ -8139,10 +8150,7 @@ S_pending_ident(pTHX) pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') gv_fetchsv(sym, - (PL_in_eval - ? (GV_ADDMULTI | GV_ADDINEVAL) - : GV_ADDMULTI - ), + GV_ADDMULTI, ((PL_tokenbuf[0] == '$') ? SVt_PV : (PL_tokenbuf[0] == '@') ? SVt_PVAV : SVt_PVHV)); @@ -8186,7 +8194,7 @@ S_pending_ident(pTHX) pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, - (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) + (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), ((PL_tokenbuf[0] == '$') ? SVt_PV : (PL_tokenbuf[0] == '@') ? SVt_PVAV @@ -8235,12 +8243,20 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; + PADOFFSET off; if (keyword(w, s - w, 0)) return; gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); if (gv && GvCVu(gv)) return; + if (s - w <= 254) { + char tmpbuf[256]; + Copy(w, tmpbuf+1, s - w, char); + *tmpbuf = '&'; + off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0); + if (off != NOT_IN_PAD) return; + } Perl_croak(aTHX_ "No comma allowed after %s", what); } } @@ -8471,7 +8487,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) PERL_ARGS_ASSERT_SCAN_IDENT; if (isSPACE(*s)) - s = PEEKSPACE(s); + s = skipspace(s); if (isDIGIT(*s)) { while (isDIGIT(*s)) { if (d >= e) @@ -8509,7 +8525,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) s++; orig_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } } @@ -8569,7 +8585,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) *d = '\0'; tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { /* ${foo[0]} and ${foo{bar}} notation. */ @@ -8608,7 +8624,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } /* Expect to find a closing } after consuming any trailing whitespace. @@ -9212,7 +9228,8 @@ S_scan_heredoc(pTHX_ char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { + if (*s == term && PL_bufend-s >= len + && memEQ(s,PL_tokenbuf + 1,len)) { SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -9361,9 +9378,7 @@ S_scan_inputsymbol(pTHX_ char *start) ++d; intro_sym: gv = gv_fetchpv(d, - (PL_in_eval - ? (GV_ADDMULTI | GV_ADDINEVAL) - : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ), + GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), SVt_PV); PL_lex_op = readline_overriden ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, @@ -9374,8 +9389,6 @@ intro_sym: newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv))); } - if (!readline_overriden) - PL_lex_op->op_flags |= OPf_SPECIAL; /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ pl_yylval.ival = OP_NULL; } @@ -9472,7 +9485,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* skip space before the delimiter */ if (isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } /* mark where we are, in case we need to report errors */ @@ -9777,9 +9790,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 - 0b[01](_?[01])* - 0[0-7](_?[0-7])* - 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* + 0b[01](_?[01])* binary integers + 0[0-7](_?[0-7])* octal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the thing it reads. @@ -9800,6 +9814,27 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) bool floatit; /* boolean: int or float? */ const char *lastub = NULL; /* position of last underbar */ static const char* const number_too_long = "Number too long"; + /* Hexadecimal floating point. + * + * In many places (where we have quads and NV is IEEE 754 double) + * we can fit the mantissa bits of a NV into an unsigned quad. + * (Note that UVs might not be quads even when we have quads.) + * This will not work everywhere, though (either no quads, or + * using long doubles), in which case we have to resort to NV, + * which will probably mean horrible loss of precision due to + * multiple fp operations. */ + bool hexfp = FALSE; + int total_bits = 0; +#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) +# define HEXFP_UQUAD + Uquad_t hexfp_uquad = 0; + int hexfp_frac_bits = 0; +#else +# define HEXFP_NV + NV hexfp_nv = 0.0; +#endif + NV hexfp_mult = 1.0; + UV high_non_zero = 0; /* highest digit */ PERL_ARGS_ASSERT_SCAN_NUM; @@ -9842,17 +9877,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) const char *base, *Base, *max; /* check for hex */ - if (s[1] == 'x' || s[1] == 'X') { + if (isALPHA_FOLD_EQ(s[1], 'x')) { shift = 4; s += 2; just_zero = FALSE; - } else if (s[1] == 'b' || s[1] == 'B') { + } else if (isALPHA_FOLD_EQ(s[1], 'b')) { shift = 1; s += 2; just_zero = FALSE; } /* check for a decimal in disguise */ - else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') + else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) goto decimal; /* so it must be octal */ else { @@ -9924,6 +9959,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (!overflowed) { x = u << shift; /* make room for the digit */ + total_bits += shift; + if ((x >> shift) != u && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; @@ -9946,6 +9983,16 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) * amount. */ n += (NV) b; } + + if (high_non_zero == 0 && b > 0) + high_non_zero = b; + + /* this could be hexfp, but peek ahead + * to avoid matching ".." */ + if (UNLIKELY(HEXFP_PEEK(s))) { + goto out; + } + break; } } @@ -9960,6 +10007,96 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } + if (UNLIKELY(HEXFP_PEEK(s))) { + /* Do sloppy (on the underbars) but quick detection + * (and value construction) for hexfp, the decimal + * detection will shortly be more thorough with the + * underbar checks. */ + const char* h = s; +#ifdef HEXFP_UQUAD + hexfp_uquad = u; +#else /* HEXFP_NV */ + hexfp_nv = u; +#endif + if (*h == '.') { +#ifdef HEXFP_NV + NV mult = 1 / 16.0; +#endif + h++; + while (isXDIGIT(*h) || *h == '_') { + if (isXDIGIT(*h)) { + U8 b = XDIGIT_VALUE(*h); + total_bits += shift; +#ifdef HEXFP_UQUAD + hexfp_uquad <<= shift; + hexfp_uquad |= b; + hexfp_frac_bits += shift; +#else /* HEXFP_NV */ + hexfp_nv += b * mult; + mult /= 16.0; +#endif + } + h++; + } + } + + if (total_bits >= 4) { + if (high_non_zero < 0x8) + total_bits--; + if (high_non_zero < 0x4) + total_bits--; + if (high_non_zero < 0x2) + total_bits--; + } + + if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) { + bool negexp = FALSE; + h++; + if (*h == '+') + h++; + else if (*h == '-') { + negexp = TRUE; + h++; + } + if (isDIGIT(*h)) { + I32 hexfp_exp = 0; + while (isDIGIT(*h) || *h == '_') { + if (isDIGIT(*h)) { + hexfp_exp *= 10; + hexfp_exp += *h - '0'; +#ifdef NV_MIN_EXP + if (negexp && + -hexfp_exp < NV_MIN_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent underflow"); +#endif + break; + } + else { +#ifdef NV_MAX_EXP + if (!negexp && + hexfp_exp > NV_MAX_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent overflow"); + break; + } +#endif + } + } + h++; + } + if (negexp) + hexfp_exp = -hexfp_exp; +#ifdef HEXFP_UQUAD + hexfp_exp -= hexfp_frac_bits; +#endif + hexfp_mult = pow(2.0, hexfp_exp); + hexfp = TRUE; + goto decimal; + } + } + } + if (overflowed) { if (n > 4294967295.0) Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), @@ -9993,10 +10130,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) decimal: d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ - floatit = FALSE; + floatit = FALSE; + if (hexfp) { + floatit = TRUE; + *d++ = '0'; + *d++ = 'x'; + s = start + 2; + } /* read next group of digits and _ and copy into d */ - while (isDIGIT(*s) || *s == '_') { + while (isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s))) { /* skip underscores, checking for misplaced ones if -w is on */ @@ -10036,7 +10180,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* copy, ignoring underbars, until we run out of digits. */ - for (; isDIGIT(*s) || *s == '_'; s++) { + for (; isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s)); + s++) { /* fixed length buffer check */ if (d >= e) Perl_croak(aTHX_ "%s", number_too_long); @@ -10062,12 +10208,24 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* read exponent part, if present */ - if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { - floatit = TRUE; + if ((isALPHA_FOLD_EQ(*s, 'e') + || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) + && strchr("+-0123456789_", s[1])) + { + floatit = TRUE; + + /* regardless of whether user said 3E5 or 3e5, use lower 'e', + ditto for p (hexfloats) */ + if ((isALPHA_FOLD_EQ(*s, 'e'))) { + /* At least some Mach atof()s don't grok 'E' */ + *d++ = 'e'; + } + else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { + *d++ = 'p'; + } + s++; - /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ - *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ /* stray preinitial _ */ if (*s == '_') { @@ -10131,9 +10289,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STORE_NUMERIC_LOCAL_SET_STANDARD(); /* terminate the string */ *d = '\0'; - nv = Atof(PL_tokenbuf); + if (UNLIKELY(hexfp)) { +# ifdef NV_MANT_DIG + if (total_bits > NV_MANT_DIG) + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: mantissa overflow"); +# endif +#ifdef HEXFP_UQUAD + nv = hexfp_uquad * hexfp_mult; +#else /* HEXFP_NV */ + nv = hexfp_nv * hexfp_mult; +#endif + } else { + nv = Atof(PL_tokenbuf); + } RESTORE_NUMERIC_LOCAL(); - sv = newSVnv(nv); + sv = newSVnv(nv); } if ( floatit