X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/20b7effb9761caf5aee8475b6a6d731b40c80cd7..4e96da834c8a37737d5de382697fd3646ba68673:/toke.c diff --git a/toke.c b/toke.c index c9f7e92..ae832c0 100644 --- a/toke.c +++ b/toke.c @@ -54,7 +54,6 @@ Individual members of C have their own documentation. #define PL_lex_casestack (PL_parser->lex_casestack) #define PL_lex_defer (PL_parser->lex_defer) #define PL_lex_dojoin (PL_parser->lex_dojoin) -#define PL_lex_expect (PL_parser->lex_expect) #define PL_lex_formbrack (PL_parser->lex_formbrack) #define PL_lex_inpat (PL_parser->lex_inpat) #define PL_lex_inwhat (PL_parser->lex_inwhat) @@ -100,9 +99,9 @@ static const char* const ident_too_long = "Identifier too long"; #define XFAKEBRACK 0x80 #ifdef USE_UTF8_SCRIPTS -# define UTF (!IN_BYTES) +# define UTF cBOOL(!IN_BYTES) #else -# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) +# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8))) #endif /* The maximum number of characters preceding the unrecognized one to display */ @@ -114,6 +113,11 @@ static const char* const ident_too_long = "Identifier too long"; #define SPACE_OR_TAB(c) isBLANK_A(c) +#define HEXFP_PEEK(s) \ + (((s[0] == '.') && \ + (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \ + isALPHA_FOLD_EQ(s[0], 'p')) + /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). @@ -163,11 +167,6 @@ static const char* const lex_state_names[] = { #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline)) -# define SKIPSPACE0(s) skipspace(s) -# define SKIPSPACE1(s) skipspace(s) -# define SKIPSPACE2(s,tsv) skipspace(s) -# define PEEKSPACE(s) skipspace(s) - /* * Convenience functions to return different tokens and prime the * lexer for the next token. They all take an argument. @@ -191,6 +190,7 @@ static const char* const lex_state_names[] = { * PWop : power operator * PMop : pattern-matching operator * Aop : addition-level operator + * AopNOASSIGN : addition-level operator that is never part of .= * Mop : multiplication-level operator * Eop : equality-testing operator * Rop : relational operator <= != gt @@ -206,24 +206,28 @@ static const char* const lex_state_names[] = { #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) -#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) +#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval)) #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) -#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) +#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \ + pl_yylval.ival=f, \ + PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ + REPORT((int)LOOPEX)) #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) -#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) -#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) -#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) -#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) +#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) +#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) +#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) +#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) -#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) -#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) +#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) +#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) +#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) @@ -240,7 +244,7 @@ static const char* const lex_state_names[] = { PL_last_lop_op = f; \ if (*s == '(') \ return REPORT( (int)FUNC1 ); \ - s = PEEKSPACE(s); \ + s = skipspace(s); \ return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ } #define UNI(f) UNI3(f,XTERM,1) @@ -378,8 +382,6 @@ static struct debug_tokens { STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) { - dVAR; - PERL_ARGS_ASSERT_TOKEREPORT; if (DEBUG_T_TEST) { @@ -467,8 +469,8 @@ S_deprecate_commaless_var_list(pTHX) { /* * S_ao * - * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR - * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN + * This subroutine looks for an '=' next to the operator that has just been + * parsed and turns it into an ASSIGNOP if it finds one. */ STATIC int @@ -484,7 +486,7 @@ S_ao(pTHX_ int toketype) pl_yylval.ival = OP_DORASSIGN; toketype = ASSIGNOP; } - return toketype; + return REPORT(toketype); } /* @@ -728,7 +730,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; - assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES + STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES |LEX_DONT_CLOSE_RSFP)); parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES |LEX_DONT_CLOSE_RSFP)); @@ -1686,7 +1688,7 @@ S_incline(pTHX_ const char *s) if (*e != '\n' && *e != '\0') return; /* false alarm */ - line_num = atoi(n)-1; + line_num = grok_atou(n, &e) - 1; if (t - s > 0) { const STRLEN len = t - s; @@ -1845,7 +1847,10 @@ S_check_uni(pTHX) /* * S_lop * Build a list operator (or something that might be one). The rules: - * - if we have a next token, then it's a list operator [why?] + * - if we have a next token, then it's a list operator (no parens) for + * which the next token has already been parsed; e.g., + * sort foo @args + * sort foo (@args) * - if the next thing is an opening paren, then it's a function * - else it's a list operator */ @@ -1857,15 +1862,15 @@ S_lop(pTHX_ I32 f, int x, char *s) pl_yylval.ival = f; CLINE; - PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) goto lstop; + PL_expect = x; if (*s == '(') return REPORT(FUNC); - s = PEEKSPACE(s); + s = skipspace(s); if (*s == '(') return REPORT(FUNC); else { @@ -1898,7 +1903,6 @@ S_force_next(pTHX_ I32 type) PL_nexttoke++; if (PL_lex_state != LEX_KNOWNEXT) { PL_lex_defer = PL_lex_state; - PL_lex_expect = PL_expect; PL_lex_state = LEX_KNOWNEXT; } } @@ -1964,7 +1968,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) SV * const sv = newSVpvn_utf8(start, len, !IN_BYTES && UTF - && !is_ascii_string((const U8*)start, len) + && !is_invariant_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -1983,7 +1987,6 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) * a keyword (do this if the word is a label, e.g. goto FOO) * int allow_pack : if true, : characters will also be allowed (require, * use, etc. do this) - * int allow_initial_tick : used by the "sub" lexer only. */ STATIC char * @@ -1994,7 +1997,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) PERL_ARGS_ASSERT_FORCE_WORD; - start = SKIPSPACE1(start); + start = skipspace(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || (allow_pack && *s == ':') ) @@ -2008,7 +2011,7 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) return start; } if (token == METHOD) { - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '(') PL_expect = XTERM; else { @@ -2050,7 +2053,7 @@ S_force_ident(pTHX_ const char *s, int kind) warnings if the symbol must be introduced in an eval. GSAR 96-10-12 */ gv_fetchpvn_flags(s, len, - (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) + (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), kind == '$' ? SVt_PV : kind == '@' ? SVt_PVAV : @@ -2112,7 +2115,7 @@ S_force_version(pTHX_ char *s, int guessing) PERL_ARGS_ASSERT_FORCE_VERSION; - s = SKIPSPACE1(s); + s = skipspace(s); d = s; if (*d == 'v') @@ -2165,7 +2168,7 @@ S_force_strict_version(pTHX_ char *s) version = newSVOP(OP_CONST, 0, ver); } else if ( (*s != ';' && *s != '{' && *s != '}' ) && - (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' ))) + (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) { PL_bufptr = s; if (errstr) @@ -2786,20 +2789,19 @@ STATIC char * S_scan_const(pTHX_ char *start) { char *send = PL_bufend; /* end of the constant */ - SV *sv = newSV(send - start); /* sv for the constant. See - note below on sizing. */ + SV *sv = newSV(send - start); /* sv for the constant. See note below + on sizing. */ char *s = start; /* start of the constant */ char *d = SvPVX(sv); /* destination for copies */ - bool dorange = FALSE; /* are we in a translit range? */ - bool didrange = FALSE; /* did we just finish a range? */ - bool in_charclass = FALSE; /* within /[...]/ */ - bool has_utf8 = FALSE; /* Output constant is UTF8 */ - bool this_utf8 = cBOOL(UTF); /* Is the source string assumed - to be UTF8? But, this can - show as true when the source - isn't utf8, as for example - when it is entirely composed - of hex constants */ + bool dorange = FALSE; /* are we in a translit range? */ + bool didrange = FALSE; /* did we just finish a range? */ + bool in_charclass = FALSE; /* within /[...]/ */ + bool has_utf8 = FALSE; /* Output constant is UTF8 */ + bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be + UTF8? But, this can show as true + when the source isn't utf8, as for + example when it is entirely composed + of hex constants */ SV *res; /* result from charnames */ /* Note on sizing: The scanned constant is placed into sv, which is @@ -2867,9 +2869,9 @@ S_scan_const(pTHX_ char *start) i = d - SvPVX_const(sv); /* remember current offset */ #ifdef EBCDIC SvGROW(sv, - SvLEN(sv) + (has_utf8 ? - (512 - UTF_CONTINUATION_MARK + - UNISKIP(0x100)) + SvLEN(sv) + ((has_utf8) + ? (512 - UTF_CONTINUATION_MARK + + UNISKIP(0x100)) : 256)); /* How many two-byte within 0..255: 128 in UTF-8, * 96 in UTF-8-mod. */ @@ -2910,6 +2912,8 @@ S_scan_const(pTHX_ char *start) } #ifdef EBCDIC + /* Because of the discontinuities in EBCDIC A-Z and a-z, expand + * any subsets of these ranges into individual characters */ if (literal_endpoint == 2 && ((isLOWER_A(min) && isLOWER_A(max)) || (isUPPER_A(min) && isUPPER_A(max)))) @@ -3080,6 +3084,7 @@ S_scan_const(pTHX_ char *start) * symbol meaning, e.g. \x{2E} would be confused with a dot. But * in spite of this, we do have to process \N here while the proper * charnames handler is in scope. See bugs #56444 and #62056. + * * There is a complication because \N in a pattern may also stand * for 'match a non-nl', and not mean a charname, in which case its * processing should be deferred to the regex compiler. To be a @@ -3184,9 +3189,13 @@ S_scan_const(pTHX_ char *start) SvPOK_on(sv); *d = '\0'; /* See Note on sizing above. */ - sv_utf8_upgrade_flags_grow(sv, - SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - UNISKIP(uv) + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow( + sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE + /* Above-latin1 in string + * implies no encoding */ + |SV_UTF8_NO_ENCODING, + UNISKIP(uv) + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } @@ -3214,31 +3223,44 @@ S_scan_const(pTHX_ char *start) continue; case 'N': - /* In a non-pattern \N must be a named character, like \N{LATIN - * SMALL LETTER A} or \N{U+0041}. For patterns, it also can - * mean to match a non-newline. For non-patterns, named - * characters are converted to their string equivalents. In - * patterns, named characters are not converted to their - * ultimate forms for the same reasons that other escapes - * aren't. Instead, they are converted to the \N{U+...} form - * to get the value from the charnames that is in effect right - * now, while preserving the fact that it was a named character - * so that the regex compiler knows this */ - - /* The structure of this section of code (besides checking for + /* In a non-pattern \N must be like \N{U+0041}, or it can be a + * named character, like \N{LATIN SMALL LETTER A}, or a named + * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND + * GRAVE}. For convenience all three forms are referred to as + * "named characters" below. + * + * For patterns, \N also can mean to match a non-newline. Code + * before this 'switch' statement should already have handled + * this situation, and hence this code only has to deal with + * the named character cases. + * + * For non-patterns, the named characters are converted to + * their string equivalents. In patterns, named characters are + * not converted to their ultimate forms for the same reasons + * that other escapes aren't. Instead, they are converted to + * the \N{U+...} form to get the value from the charnames that + * is in effect right now, while preserving the fact that it + * was a named character, so that the regex compiler knows + * this. + * + * The structure of this section of code (besides checking for * errors and upgrading to utf8) is: - * Further disambiguate between the two meanings of \N, and if - * not a charname, go process it elsewhere - * If of form \N{U+...}, pass it through if a pattern; - * otherwise convert to utf8 - * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a - * pattern; otherwise convert to utf8 */ - - /* Here, s points to the 'N'; the test below is guaranteed to - * succeed if we are being called on a pattern as we already - * know from a test above that the next character is a '{'. - * On a non-pattern \N must mean 'named sequence, which - * requires braces */ + * If the named character is of the form \N{U+...}, pass it + * through if a pattern; otherwise convert the code point + * to utf8 + * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...} + * if a pattern; otherwise convert to utf8 + * + * If the regex compiler should ever need to differentiate + * between the \N{U+...} and \N{name} forms, that could easily + * be done here by stripping any leading zeros from the + * \N{U+...} case, and adding them to the other one. */ + + /* Here, 's' points to the 'N'; the test below is guaranteed to + * succeed if we are being called on a pattern, as we already + * know from a test above that the next character is a '{'. A + * non-pattern \N must mean 'named character', which requires + * braces */ s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); @@ -3263,8 +3285,6 @@ S_scan_const(pTHX_ char *start) | PERL_SCAN_DISALLOW_PREFIX; STRLEN len; - /* For \N{U+...}, the '...' is a unicode value even on - * EBCDIC machines */ s += 2; /* Skip to next char after the 'U+' */ len = e - s; uv = grok_hex(s, &len, &flags, NULL); @@ -3275,27 +3295,26 @@ S_scan_const(pTHX_ char *start) } if (PL_lex_inpat) { - - /* On non-EBCDIC platforms, pass through to the regex - * compiler unchanged. The reason we evaluated the - * number above is to make sure there wasn't a syntax - * error. But on EBCDIC we convert to native so - * downstream code can continue to assume it's native - */ s -= 5; /* Include the '\N{U+' */ #ifdef EBCDIC - d += my_snprintf(d, e - s + 1 + 1, /* includes the } + /* On EBCDIC platforms, in \N{U+...}, the '...' is a + * Unicode value, so convert to native so downstream + * code can continue to assume it's native */ + d += my_snprintf(d, e - s + 1 + 1, /* includes the '}' and the \0 */ - "\\N{U+%X}", - (unsigned int) UNI_TO_NATIVE(uv)); + "\\N{U+%X}", + (unsigned int) UNI_TO_NATIVE(uv)); #else - Copy(s, d, e - s + 1, char); /* 1 = include the } */ + /* On non-EBCDIC platforms, pass it through unchanged. + * The reason we evaluated the number above is to make + * sure there wasn't a syntax error. */ + Copy(s, d, e - s + 1, char); /* +1 is for the '}' */ d += e - s + 1; #endif } else { /* Not a pattern: convert the hex to string */ - /* If destination is not in utf8, unconditionally + /* If the destination is not in utf8, unconditionally * recode it to be so. This is because \N{} implies * Unicode semantics, and scalars have to be in utf8 * to guarantee those semantics */ @@ -3348,24 +3367,30 @@ S_scan_const(pTHX_ char *start) * through the string. Each character takes up * 2 hex digits plus either a trailing dot or * the "}" */ + const char initial_text[] = "\\N{U+"; + const STRLEN initial_len = sizeof(initial_text) + - 1; d = off + SvGROW(sv, off + 3 * len - + 6 /* For the "\N{U+", and - trailing NUL */ + + /* +1 for trailing NUL */ + + initial_len + 1 + + (STRLEN)(send - e)); - Copy("\\N{U+", d, 5, char); - d += 5; + Copy(initial_text, d, initial_len, char); + d += initial_len; while (str < str_end) { char hex_string[4]; - PERL_UNUSED_RESULT( + int len = my_snprintf(hex_string, sizeof(hex_string), - "%02X.", (U8) *str)); + "%02X.", (U8) *str); + PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string)); Copy(hex_string, d, 3, char); d += 3; str++; } - d--; /* We will overwrite below the final + d--; /* Below, we will overwrite the final dot with a right brace */ } else { @@ -3450,8 +3475,8 @@ S_scan_const(pTHX_ char *start) const STRLEN off = d - SvPVX_const(sv); d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1); } - if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */ - sv_utf8_upgrade(res); + if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */ + sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING); str = SvPV_const(res, len); } Copy(str, d, len, char); @@ -3496,7 +3521,7 @@ S_scan_const(pTHX_ char *start) *d++ = '\t'; break; case 'e': - *d++ = ASCII_TO_NATIVE('\033'); + *d++ = ESC_NATIVE; break; case 'a': *d++ = '\a'; @@ -3567,8 +3592,8 @@ S_scan_const(pTHX_ char *start) " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); SvPOK_on(sv); - if (PL_encoding && !has_utf8) { - sv_recode_to_utf8(sv, PL_encoding); + if (IN_ENCODING && !has_utf8) { + sv_recode_to_utf8(sv, _get_encoding()); if (SvUTF8(sv)) has_utf8 = TRUE; } @@ -3807,12 +3832,18 @@ S_intuit_more(pTHX_ char *s) */ STATIC int -S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) +S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) { char *s = start + (*start == '$'); char tmpbuf[sizeof PL_tokenbuf]; STRLEN len; GV* indirgv; + /* Mustn't actually add anything to a symbol table. + But also don't want to "initialise" any placeholder + constants that might already be there into full + blown PVGVs with attached PVCV. */ + GV * const gv = + ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; PERL_ARGS_ASSERT_INTUIT_METHOD; @@ -3832,7 +3863,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) return 0; - s = PEEKSPACE(s); + s = skipspace(s); PL_bufptr = start; PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; @@ -3855,7 +3886,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) return 0; /* filehandle or package name makes it a method */ if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { - s = PEEKSPACE(s); + s = skipspace(s); if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') return 0; /* no assumptions -- "=>" quotes bareword */ bare_package: @@ -4116,7 +4147,7 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) if (gv && GvCV(gv)) { SV * const sv = cv_const_sv(GvCV(gv)); if (sv) - pkgname = SvPV_const(sv, len); + return gv_stashsv(sv, 0); } return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); @@ -4131,11 +4162,11 @@ S_tokenize_use(pTHX_ int is_use, char *s) { yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); PL_expect = XTERM; - s = SKIPSPACE1(s); + s = skipspace(s); if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { s = force_version(s, TRUE); if (*s == ';' || *s == '}' - || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) { + || (s = skipspace(s), (*s == ';' || *s == '}'))) { NEXTVAL_NEXTTOKE.opval = NULL; force_next(WORD); } @@ -4154,7 +4185,8 @@ S_tokenize_use(pTHX_ int is_use, char *s) { #ifdef DEBUGGING static const char* const exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR" + "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", + "TERMORDORDOR" }; #endif @@ -4267,7 +4299,6 @@ Perl_yylex(pTHX) pl_yylval = PL_nextval[PL_nexttoke]; if (!PL_nexttoke) { PL_lex_state = PL_lex_defer; - PL_expect = PL_lex_expect; PL_lex_defer = LEX_NORMAL; } { @@ -4375,9 +4406,9 @@ Perl_yylex(pTHX) PL_lex_starts = 0; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (PL_lex_casemods == 1 && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else return yylex(); @@ -4422,9 +4453,9 @@ Perl_yylex(pTHX) s = PL_bufptr; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } return yylex(); @@ -4512,9 +4543,9 @@ Perl_yylex(pTHX) if (PL_lex_starts++) { /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (!PL_lex_casemods && PL_lex_inpat) - OPERATOR(','); + TOKEN(','); else - Aop(OP_CONCAT); + AopNOASSIGN(OP_CONCAT); } else { PL_bufptr = s; @@ -4787,7 +4818,7 @@ Perl_yylex(pTHX) * line contains "Perl" rather than "perl" */ if (!d) { for (d = ipathend-4; d >= ipath; --d) { - if ((*d == 'p' || *d == 'P') + if (isALPHA_FOLD_EQ(*d, 'p') && !ibcmp(d, "perl", 4)) { break; @@ -4869,7 +4900,7 @@ Perl_yylex(pTHX) != PL_unicode) baduni = TRUE; } - if (baduni || *d1 == 'M' || *d1 == 'm') { + if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -5051,7 +5082,7 @@ Perl_yylex(pTHX) } else if (*s == '>') { s++; - s = SKIPSPACE1(s); + s = skipspace(s); if (FEATURE_POSTDEREF_IS_ENABLED && ( ((*s == '$' || *s == '&') && s[1] == '*') ||(*s == '$' && s[1] == '#' && s[2] == '*') @@ -5228,7 +5259,7 @@ Perl_yylex(pTHX) case XATTRTERM: PL_expect = XTERMBLOCK; grabattrs: - s = PEEKSPACE(s); + s = skipspace(s); attrs = NULL; while (isIDFIRST_lazy_if(s,UTF)) { I32 tmp; @@ -5312,9 +5343,9 @@ Perl_yylex(pTHX) newSVOP(OP_CONST, 0, sv)); } - s = PEEKSPACE(d); + s = skipspace(d); if (*s == ':' && s[1] != ':') - s = PEEKSPACE(s+1); + s = skipspace(s+1); else if (s == d) break; /* require real whitespace or :'s */ /* XXX losing whitespace on sequential attributes here */ @@ -5365,7 +5396,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; - s = SKIPSPACE1(s); + s = skipspace(s); PL_lex_allbrackets++; TOKEN('('); case ';': @@ -5373,13 +5404,14 @@ Perl_yylex(pTHX) TOKEN(0); CLINE; s++; - OPERATOR(';'); + PL_expect = XSTATE; + TOKEN(';'); case ')': if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) TOKEN(0); s++; PL_lex_allbrackets--; - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') PREBLOCK(')'); TERM(')'); @@ -5437,15 +5469,20 @@ Perl_yylex(pTHX) } } /* FALLTHROUGH */ + case XATTRTERM: + case XTERMBLOCK: + PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + PL_lex_allbrackets++; + PL_expect = XSTATE; + break; case XATTRBLOCK: case XBLOCK: PL_lex_brackstack[PL_lex_brackets++] = XSTATE; PL_lex_allbrackets++; PL_expect = XSTATE; break; - case XATTRTERM: - case XTERMBLOCK: - PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; + case XBLOCKTERM: + PL_lex_brackstack[PL_lex_brackets++] = XTERM; PL_lex_allbrackets++; PL_expect = XSTATE; break; @@ -5456,7 +5493,7 @@ Perl_yylex(pTHX) else PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; PL_lex_allbrackets++; - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '}') { if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { PL_expect = XTERM; @@ -5467,6 +5504,12 @@ Perl_yylex(pTHX) } OPERATOR(HASHBRACK); } + if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) { + /* ${...} or @{...} etc., but not print {...} + * Skip the disambiguation and treat this as a block. + */ + goto block_expectation; + } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation * determines that, but in cases where we're not in a @@ -5486,7 +5529,7 @@ Perl_yylex(pTHX) if (*s == '\'' || *s == '"' || *s == '`') { /* common case: get past first string, handling escapes */ for (t++; t < PL_bufend && *t != *s;) - if (*t++ == '\\' && (*t == '\\' || *t == *s)) + if (*t++ == '\\') t++; t++; } @@ -5549,7 +5592,28 @@ Perl_yylex(pTHX) || (*t == '=' && t[1] == '>'))) OPERATOR(HASHBRACK); if (PL_expect == XREF) - PL_expect = XTERM; + { + block_expectation: + /* If there is an opening brace or 'sub:', treat it + as a term to make ${{...}}{k} and &{sub:attr...} + dwim. Otherwise, treat it as a statement, so + map {no strict; ...} works. + */ + s = skipspace(s); + if (*s == '{') { + PL_expect = XTERM; + break; + } + if (strnEQ(s, "sub", 3)) { + d = s + 3; + d = skipspace(d); + if (*d == ':') { + PL_expect = XTERM; + break; + } + } + PL_expect = XSTATE; + } else { PL_lex_brackstack[PL_lex_brackets-1] = XSTATE; PL_expect = XSTATE; @@ -5558,8 +5622,7 @@ Perl_yylex(pTHX) break; } pl_yylval.ival = CopLINE(PL_curcop); - if (isSPACE(*s) || *s == '#') - PL_copline = NOLINE; /* invalidate current command line number */ + PL_copline = NOLINE; /* invalidate current command line number */ TOKEN(formbrack ? '=' : '{'); case '}': if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) @@ -5774,7 +5837,7 @@ Perl_yylex(pTHX) if (PL_expect != XOPERATOR) { if (s[1] != '<' && !strchr(s,'>')) check_uni(); - if (s[1] == '<') + if (s[1] == '<' && s[2] != '>') s = scan_heredoc(s); else s = scan_inputsymbol(s); @@ -5889,7 +5952,7 @@ Perl_yylex(pTHX) { const char tmp = *s; if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) - s = SKIPSPACE1(s); + s = skipspace(s); if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { @@ -5901,7 +5964,7 @@ Perl_yylex(pTHX) while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$') t++; if (*t++ == ',') { - PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ + PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */ while (t < PL_bufend && *t != ']') t++; Perl_warner(aTHX_ packWARN(WARN_SYNTAX), @@ -6001,7 +6064,7 @@ Perl_yylex(pTHX) PREREF('@'); } if (PL_lex_state == LEX_NORMAL) - s = SKIPSPACE1(s); + s = skipspace(s); if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { if (*s == '{') PL_tokenbuf[0] = '%'; @@ -6297,12 +6360,12 @@ Perl_yylex(pTHX) } else if (result == KEYWORD_PLUGIN_STMT) { pl_yylval.opval = o; CLINE; - PL_expect = XSTATE; + if (!PL_nexttoke) PL_expect = XSTATE; return REPORT(PLUGSTMT); } else if (result == KEYWORD_PLUGIN_EXPR) { pl_yylval.opval = o; CLINE; - PL_expect = XOPERATOR; + if (!PL_nexttoke) PL_expect = XOPERATOR; return REPORT(PLUGEXPR); } else { Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", @@ -6329,7 +6392,7 @@ Perl_yylex(pTHX) char tmpbuf[sizeof PL_tokenbuf + 1]; *tmpbuf = '&'; Copy(PL_tokenbuf, tmpbuf+1, len, char); - off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); + off = pad_findmy_pvn(tmpbuf, len+1, 0); if (off != NOT_IN_PAD) { assert(off); /* we assume this is boolean-true below */ if (PAD_COMPNAME_FLAGS_isOUR(off)) { @@ -6451,10 +6514,7 @@ Perl_yylex(pTHX) just_a_word: { int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); - const char penultchar = - lastchar && PL_bufptr - 2 >= PL_linestart - ? PL_bufptr[-2] - : 0; + bool safebw; /* Get the rest if it looks like a package qualifier */ @@ -6481,8 +6541,7 @@ Perl_yylex(pTHX) no_op("Bareword",s); } - /* Look for a subroutine with this name in current package, - unless this is a lexical sub, or name is "Foo::", + /* See if the name is "Foo::", in which case Foo is a bareword (and a package name). */ @@ -6498,25 +6557,17 @@ Perl_yylex(pTHX) PL_tokenbuf[len] = '\0'; gv = NULL; gvp = 0; + safebw = TRUE; } else { - if (!lex && !gv) { - /* Mustn't actually add anything to a symbol table. - But also don't want to "initialise" any placeholder - constants that might already be there into full - blown PVGVs with attached PVCV. */ - gv = gv_fetchpvn_flags(PL_tokenbuf, len, - GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ), - SVt_PVCV); - } - len = 0; + safebw = FALSE; } /* if we saw a global override before, get the right name */ if (!sv) sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, - len ? len : strlen(PL_tokenbuf)); + len); if (gvp) { SV * const tmp_sv = sv; sv = newSVpvs("CORE::GLOBAL::"); @@ -6531,17 +6582,28 @@ Perl_yylex(pTHX) pl_yylval.opval->op_private = OPpCONST_BARE; /* And if "Foo::", then that's what it certainly is. */ - if (len) + if (safebw) goto safe_bareword; if (!off) { OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv)); const_op->op_private = OPpCONST_BARE; - rv2cv_op = newCVREF(0, const_op); - cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0); + rv2cv_op = + newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op); + cv = lex + ? isGV(gv) + ? GvCV(gv) + : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV + ? (CV *)SvRV(gv) + : ((CV *)gv) + : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB); } + /* Use this var to track whether intuit_method has been + called. intuit_method returns 0 or > 255. */ + tmp = 1; + /* See if it's the indirect object for a list operator. */ if (PL_oldoldbufptr && @@ -6555,17 +6617,13 @@ Perl_yylex(pTHX) bool immediate_paren = *s == '('; /* (Now we can afford to cross potential line boundary.) */ - s = SKIPSPACE2(s,nextPL_nextwhite); + s = skipspace(s); /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && - (tmp = intuit_method(s, gv, cv))) { - op_free(rv2cv_op); - if (tmp == METHOD && !PL_lex_allbrackets && - PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - return REPORT(tmp); + (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + goto method; } /* If not a declared subroutine, it's an indirect object. */ @@ -6593,13 +6651,17 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>' && !pkgname) { op_free(rv2cv_op); CLINE; - /* This is our own scalar, created a few lines above, - so this is safe. */ - SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); - sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); - if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); - SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); + if (gvp || (lex && !off)) { + assert (cSVOPx(pl_yylval.opval)->op_sv == sv); + /* This is our own scalar, created a few lines + above, so this is safe. */ + SvREADONLY_off(sv); + sv_setpv(sv, PL_tokenbuf); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(sv); + SvREADONLY_on(sv); + } TERM(WORD); } @@ -6617,7 +6679,6 @@ Perl_yylex(pTHX) } NEXTVAL_NEXTTOKE.opval = off ? rv2cv_op : pl_yylval.opval; - PL_expect = XOPERATOR; if (off) op_free(pl_yylval.opval), force_next(PRIVATEREF); else op_free(rv2cv_op), force_next(WORD); @@ -6634,14 +6695,26 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - PREBLOCK(METHOD); + PL_expect = XBLOCKTERM; + PL_bufptr = s; + return REPORT(METHOD); } /* If followed by a bareword, see if it looks like indir obj. */ - if (!orig_keyword + if (tmp == 1 && !orig_keyword && (isIDFIRST_lazy_if(s,UTF) || *s == '$') - && (tmp = intuit_method(s, gv, cv))) { + && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + method: + if (lex && !off) { + assert(cSVOPx(pl_yylval.opval)->op_sv == sv); + SvREADONLY_off(sv); + sv_setpvn(sv, PL_tokenbuf, len); + if (UTF && !IN_BYTES + && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on (sv); + else SvUTF8_off(sv); + } op_free(rv2cv_op); if (tmp == METHOD && !PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) @@ -6652,13 +6725,6 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-' && penultchar != '-') { - const STRLEN l = len ? len : strlen(PL_tokenbuf); - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", - UTF8fARG(UTF, l, PL_tokenbuf), - UTF8fARG(UTF, l, PL_tokenbuf)); - } /* Check for a constant sub */ if ((sv = cv_const_sv_or_av(cv))) { its_constant: @@ -6858,13 +6924,13 @@ Perl_yylex(pTHX) if (!IN_BYTES) { if (UTF) PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); - else if (PL_encoding) { + else if (IN_ENCODING) { SV *name; dSP; ENTER; SAVETMPS; PUSHMARK(sp); - XPUSHs(PL_encoding); + XPUSHs(_get_encoding()); PUTBACK; call_method("name", G_SCALAR); SPAGAIN; @@ -6884,7 +6950,9 @@ Perl_yylex(pTHX) } case KEY___SUB__: - FUN0OP(newPVOP(OP_RUNCV,0,NULL)); + FUN0OP(CvCLONE(PL_compcv) + ? newOP(OP_RUNCV, 0) + : newPVOP(OP_RUNCV,0,NULL)); case KEY_AUTOLOAD: case KEY_DESTROY: @@ -7019,7 +7087,7 @@ Perl_yylex(pTHX) PREBLOCK(DEFAULT); case KEY_do: - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') PRETERMBLOCK(DO); if (*s != '\'') { @@ -7028,7 +7096,7 @@ Perl_yylex(pTHX) 1, &len); if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) && !keyword(PL_tokenbuf + 1, len, 0)) { - d = SKIPSPACE1(d); + d = skipspace(d); if (*d == '(') { force_ident_maybe_lex('&'); s = d; @@ -7067,8 +7135,6 @@ Perl_yylex(pTHX) UNI(OP_DBMCLOSE); case KEY_dump: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7090,7 +7156,7 @@ Perl_yylex(pTHX) UNI(OP_EXIT); case KEY_eval: - s = SKIPSPACE1(s); + s = skipspace(s); if (*s == '{') { /* block eval */ PL_expect = XTERMBLOCK; UNIBRACK(OP_ENTERTRY); @@ -7139,7 +7205,7 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) return REPORT(0); pl_yylval.ival = CopLINE(PL_curcop); - s = SKIPSPACE1(s); + s = skipspace(s); if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) { char *p = s; @@ -7149,11 +7215,11 @@ Perl_yylex(pTHX) else if ((PL_bufend - p) >= 4 && strnEQ(p, "our", 3) && isSPACE(*(p + 3))) p += 3; - p = PEEKSPACE(p); + p = skipspace(p); /* skip optional package name, as in "for my abc $x (..)" */ if (isIDFIRST_lazy_if(p,UTF)) { p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); - p = PEEKSPACE(p); + p = skipspace(p); } if (*p != '$') Perl_croak(aTHX_ "Missing $ on loop variable"); @@ -7192,8 +7258,6 @@ Perl_yylex(pTHX) LOP(OP_GREPSTART, XREF); case KEY_goto: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7318,8 +7382,6 @@ Perl_yylex(pTHX) LOP(OP_KILL,XTERM); case KEY_last: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -7389,7 +7451,7 @@ Perl_yylex(pTHX) case KEY_my: case KEY_state: PL_in_my = (U16)tmp; - s = SKIPSPACE1(s); + s = skipspace(s); if (isIDFIRST_lazy_if(s,UTF)) { s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3)) @@ -7407,8 +7469,10 @@ Perl_yylex(pTHX) PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); if (!PL_in_my_stash) { char tmpbuf[1024]; + int len; PL_bufptr = s; - PERL_UNUSED_RESULT(my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf)); + len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); + PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf)); yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); } } @@ -7416,8 +7480,6 @@ Perl_yylex(pTHX) OPERATOR(MY); case KEY_next: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -7427,10 +7489,10 @@ Perl_yylex(pTHX) case KEY_no: s = tokenize_use(0, s); - TERM(USE); + TOKEN(USE); case KEY_not: - if (*s == '(' || (s = SKIPSPACE1(s), *s == '(')) + if (*s == '(' || (s = skipspace(s), *s == '(')) FUN1(OP_NOT); else { if (!PL_lex_allbrackets && @@ -7440,7 +7502,7 @@ Perl_yylex(pTHX) } case KEY_open: - s = SKIPSPACE1(s); + s = skipspace(s); if (isIDFIRST_lazy_if(s,UTF)) { const char *t; d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, @@ -7500,10 +7562,9 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE); - s = SKIPSPACE1(s); + s = skipspace(s); s = force_strict_version(s); - PL_lex_expect = XBLOCK; - OPERATOR(PACKAGE); + PREBLOCK(PACKAGE); case KEY_pipe: LOP(OP_PIPE_OP,XTERM); @@ -7595,8 +7656,7 @@ Perl_yylex(pTHX) OLDLOP(OP_RETURN); case KEY_require: - s = SKIPSPACE1(s); - PL_expect = XOPERATOR; + s = skipspace(s); if (isDIGIT(*s)) { s = force_version(s, FALSE); } @@ -7617,7 +7677,7 @@ Perl_yylex(pTHX) } else pl_yylval.ival = 0; - PL_expect = XTERM; + PL_expect = PL_nexttoke ? XOPERATOR : XTERM; PL_bufptr = s; PL_last_uni = PL_oldbufptr; PL_last_lop_op = OP_REQUIRE; @@ -7628,8 +7688,6 @@ Perl_yylex(pTHX) UNI(OP_RESET); case KEY_redo: - PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -7768,7 +7826,7 @@ Perl_yylex(pTHX) case KEY_sort: checkcomma(s,PL_tokenbuf,"subroutine name"); - s = SKIPSPACE1(s); + s = skipspace(s); PL_expect = XTERM; s = force_word(s,WORD,TRUE,TRUE); LOP(OP_SORT,XREF); @@ -7823,7 +7881,7 @@ Perl_yylex(pTHX) *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub || pad_findmy_pvn( - PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 + PL_tokenbuf, len + 1, 0 ) != NOT_IN_PAD) sv_setpvn(PL_subname, tmpbuf, len); else { @@ -7990,7 +8048,7 @@ Perl_yylex(pTHX) case KEY_use: s = tokenize_use(1, s); - OPERATOR(USE); + TOKEN(USE); case KEY_values: UNI(OP_VALUES); @@ -8099,10 +8157,13 @@ S_pending_ident(pTHX) } else { if (has_colon) { + /* "my" variable %s can't be in a package */ /* PL_no_myglob is constant */ GCC_DIAG_IGNORE(-Wformat-nonliteral); yyerror_pv(Perl_form(aTHX_ PL_no_myglob, - PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), + PL_in_my == KEY_my ? "my" : "state", + *PL_tokenbuf == '&' ? "subroutin" : "variabl", + PL_tokenbuf), UTF ? SVf_UTF8 : 0); GCC_DIAG_RESTORE; } @@ -8121,7 +8182,7 @@ S_pending_ident(pTHX) if (!has_colon) { if (!PL_in_my) tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, - UTF ? SVf_UTF8 : 0); + 0); if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { @@ -8135,10 +8196,7 @@ S_pending_ident(pTHX) pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') gv_fetchsv(sym, - (PL_in_eval - ? (GV_ADDMULTI | GV_ADDINEVAL) - : GV_ADDMULTI - ), + GV_ADDMULTI, ((PL_tokenbuf[0] == '$') ? SVt_PV : (PL_tokenbuf[0] == '@') ? SVt_PVAV : SVt_PVHV)); @@ -8182,7 +8240,7 @@ S_pending_ident(pTHX) pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, - (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) + (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), ((PL_tokenbuf[0] == '$') ? SVt_PV : (PL_tokenbuf[0] == '@') ? SVt_PVAV @@ -8231,12 +8289,20 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; + PADOFFSET off; if (keyword(w, s - w, 0)) return; gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); if (gv && GvCVu(gv)) return; + if (s - w <= 254) { + char tmpbuf[256]; + Copy(w, tmpbuf+1, s - w, char); + *tmpbuf = '&'; + off = pad_findmy_pvn(tmpbuf, s-w+1, 0); + if (off != NOT_IN_PAD) return; + } Perl_croak(aTHX_ "No comma allowed after %s", what); } } @@ -8467,7 +8533,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) PERL_ARGS_ASSERT_SCAN_IDENT; if (isSPACE(*s)) - s = PEEKSPACE(s); + s = skipspace(s); if (isDIGIT(*s)) { while (isDIGIT(*s)) { if (d >= e) @@ -8505,31 +8571,60 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) s++; orig_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } } /* Is the byte 'd' a legal single character identifier name? 'u' is true * iff Unicode semantics are to be used. The legal ones are any of: - * a) ASCII digits - * b) ASCII punctuation + * a) all ASCII characters except: + * 1) space-type ones, like \t and SPACE; + 2) NUL; + * 3) '{' + * The final case currently doesn't get this far in the program, so we + * don't test for it. If that were to change, it would be ok to allow it. * c) When not under Unicode rules, any upper Latin1 character - * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally - * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus - * the \s ones. */ -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ - || isDIGIT_A((U8)(d)) \ - || (!(u) && !isASCII((U8)(d))) \ - || ((((U8)(d)) < 32) \ - && (((((U8)(d)) >= 14) \ - || (((U8)(d)) <= 8 && (d) != 0) \ - || (((U8)(d)) == 13)))) \ - || (((U8)(d)) == toCTRL('?'))) - if (s < PL_bufend - && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) + * d) Otherwise, when unicode rules are used, all XIDS characters. + * + * Because all ASCII characters have the same representation whether + * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and + * '{' without knowing if is UTF-8 or not. + * EBCDIC already uses the rules that ASCII platforms will use after the + * deprecation cycle; see comment below about the deprecation. */ +#ifdef EBCDIC +# define VALID_LEN_ONE_IDENT(s, is_utf8) \ + (isGRAPH_A(*(s)) || ((is_utf8) \ + ? isIDFIRST_utf8((U8*) (s)) \ + : (isGRAPH_L1(*s) \ + && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) +#else +# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \ + && LIKELY(*(s) != '\0') \ + && (! is_utf8 \ + || isASCII_utf8((U8*) (s)) \ + || isIDFIRST_utf8((U8*) (s)))) +#endif + if ((s <= PL_bufend - (is_utf8) + ? UTF8SKIP(s) + : 1) + && VALID_LEN_ONE_IDENT(s, is_utf8)) { - if ( isCNTRL_A((U8)*s) ) { - deprecate("literal control characters in variable names"); + /* Deprecate all non-graphic characters. Include SHY as a non-graphic, + * because often it has no graphic representation. (We can't get to + * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 + * test for it.) */ + if ((is_utf8) + ? ! isGRAPH_utf8( (U8*) s) + : (! isGRAPH_L1( (U8) *s) + || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) + { + /* Split messages for back compat */ + if (isCNTRL_A( (U8) *s)) { + deprecate("literal control characters in variable names"); + } + else { + deprecate("literal non-graphic characters in variable names"); + } } if (is_utf8) { @@ -8565,7 +8660,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) *d = '\0'; tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { /* ${foo[0]} and ${foo{bar}} notation. */ @@ -8604,7 +8699,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } /* Expect to find a closing } after consuming any trailing whitespace. @@ -8648,14 +8743,14 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } static bool -S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) { - - /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in - * the parse starting at 's', based on the subset that are valid in this - * context input to this routine in 'valid_flags'. Advances s. Returns - * TRUE if the input should be treated as a valid flag, so the next char - * may be as well; otherwise FALSE. 'charset' should point to a NUL upon - * first call on the current regex. This routine will set it to any +S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) { + + /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag + * found in the parse starting at 's', based on the subset that are valid + * in this context input to this routine in 'valid_flags'. Advances s. + * Returns TRUE if the input should be treated as a valid flag, so the next + * char may be as well; otherwise FALSE. 'charset' should point to a NUL + * upon first call on the current regex. This routine will set it to any * charset modifier found. The caller shouldn't change it. This way, * another charset modifier encountered in the parse can be detected as an * error, as we have decided to allow only one */ @@ -8677,7 +8772,7 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse switch (c) { - CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); + CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count); case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; @@ -8752,6 +8847,7 @@ S_scan_pat(pTHX_ char *start, I32 type) const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ + unsigned int x_mod_count = 0; PERL_ARGS_ASSERT_SCAN_PAT; @@ -8801,7 +8897,9 @@ S_scan_pat(pTHX_ char *start, I32 type) pm->op_pmflags |= PMf_IS_QR; } - while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; + while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), + &s, &charset, &x_mod_count)) + {}; /* issue a warning if /c is specified,but /g is not */ if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { @@ -8809,6 +8907,8 @@ S_scan_pat(pTHX_ char *start, I32 type) "Use of /c modifier is meaningless without /g" ); } + STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + PL_lex_op = (OP*)pm; pl_yylval.ival = OP_MATCH; return s; @@ -8823,6 +8923,7 @@ S_scan_subst(pTHX_ char *start) line_t first_line; I32 es = 0; char charset = '\0'; /* character set modifier */ + unsigned int x_mod_count = 0; char *t; PERL_ARGS_ASSERT_SCAN_SUBST; @@ -8856,12 +8957,15 @@ S_scan_subst(pTHX_ char *start) s++; es++; } - else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset)) + else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), + &s, &charset, &x_mod_count)) { break; } } + STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + if ((pm->op_pmflags & PMf_CONTINUE)) { Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); } @@ -9179,7 +9283,14 @@ S_scan_heredoc(pTHX_ char *s) origline + 1 + PL_parser->herelines); if (!lex_next_chunk(LEX_NO_TERM) && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { - SvREFCNT_dec(linestr_save); + /* Simply freeing linestr_save might seem simpler here, as it + does not matter what PL_linestr points to, since we are + about to croak; but in a quote-like op, linestr_save + will have been prospectively freed already, via + SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to + restore PL_linestr. */ + SvREFCNT_dec_NN(PL_linestr); + PL_linestr = linestr_save; goto interminable; } CopLINE_set(PL_curcop, origline); @@ -9208,7 +9319,8 @@ S_scan_heredoc(pTHX_ char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { + if (*s == term && PL_bufend-s >= len + && memEQ(s,PL_tokenbuf + 1,len)) { SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -9228,8 +9340,8 @@ S_scan_heredoc(pTHX_ char *s) if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) SvUTF8_on(tmpstr); - else if (PL_encoding) - sv_recode_to_utf8(tmpstr, PL_encoding); + else if (IN_ENCODING) + sv_recode_to_utf8(tmpstr, _get_encoding()); } PL_lex_stuff = tmpstr; pl_yylval.ival = op_type; @@ -9249,6 +9361,7 @@ S_scan_heredoc(pTHX_ char *s) This code handles: <> read from ARGV + <<>> read from ARGV without magic open read from filehandle read from package qualified filehandle read from package qualified filehandle @@ -9263,6 +9376,7 @@ S_scan_inputsymbol(pTHX_ char *start) char *s = start; /* current position in buffer */ char *end; I32 len; + bool nomagicopen = FALSE; char *d = PL_tokenbuf; /* start of temp holding space */ const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ @@ -9271,7 +9385,14 @@ S_scan_inputsymbol(pTHX_ char *start) end = strchr(s, '\n'); if (!end) end = PL_bufend; - s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ + if (s[1] == '<' && s[2] == '>' && s[3] == '>') { + nomagicopen = TRUE; + *d = '\0'; + len = 0; + s += 3; + } + else + s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */ /* die if we didn't have space for the contents of the <>, or if it didn't end, or if we see a newline @@ -9331,7 +9452,7 @@ S_scan_inputsymbol(pTHX_ char *start) /* try to find it in the pad for this block, otherwise find add symbol table ops */ - const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); + const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); if (tmp != NOT_IN_PAD) { if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { HV * const stash = PAD_COMPNAME_OURSTASH(tmp); @@ -9357,9 +9478,7 @@ S_scan_inputsymbol(pTHX_ char *start) ++d; intro_sym: gv = gv_fetchpv(d, - (PL_in_eval - ? (GV_ADDMULTI | GV_ADDINEVAL) - : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ), + GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), SVt_PV); PL_lex_op = readline_overriden ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, @@ -9370,8 +9489,6 @@ intro_sym: newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv))); } - if (!readline_overriden) - PL_lex_op->op_flags |= OPf_SPECIAL; /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ pl_yylval.ival = OP_NULL; } @@ -9385,7 +9502,7 @@ intro_sym: op_append_elem(OP_LIST, newGVOP(OP_GV, 0, gv), newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) - : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); + : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); pl_yylval.ival = OP_NULL; } } @@ -9468,7 +9585,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* skip space before the delimiter */ if (isSPACE(*s)) { - s = PEEKSPACE(s); + s = skipspace(s); } /* mark where we are, in case we need to report errors */ @@ -9514,12 +9631,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re sv_catpvn(sv, s, termlen); s += termlen; for (;;) { - if (PL_encoding && !UTF && !re_reparse) { + if (IN_ENCODING && !UTF && !re_reparse) { bool cont = TRUE; while (cont) { int offset = s - SvPVX_const(PL_linestr); - const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr, &offset, (char*)termstr, termlen); const char *ns; char *svlast; @@ -9732,13 +9849,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re /* at this point, we have successfully read the delimited string */ - if (!PL_encoding || UTF || re_reparse) { + if (!IN_ENCODING || UTF || re_reparse) { if (keep_delims) sv_catpvn(sv, s, termlen); s += termlen; } - if (has_utf8 || (PL_encoding && !re_reparse)) + if (has_utf8 || (IN_ENCODING && !re_reparse)) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); @@ -9773,9 +9890,10 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 - 0b[01](_?[01])* - 0[0-7](_?[0-7])* - 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* + 0b[01](_?[01])* binary integers + 0[0-7](_?[0-7])* octal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the thing it reads. @@ -9796,6 +9914,27 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) bool floatit; /* boolean: int or float? */ const char *lastub = NULL; /* position of last underbar */ static const char* const number_too_long = "Number too long"; + /* Hexadecimal floating point. + * + * In many places (where we have quads and NV is IEEE 754 double) + * we can fit the mantissa bits of a NV into an unsigned quad. + * (Note that UVs might not be quads even when we have quads.) + * This will not work everywhere, though (either no quads, or + * using long doubles), in which case we have to resort to NV, + * which will probably mean horrible loss of precision due to + * multiple fp operations. */ + bool hexfp = FALSE; + int total_bits = 0; +#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) +# define HEXFP_UQUAD + Uquad_t hexfp_uquad = 0; + int hexfp_frac_bits = 0; +#else +# define HEXFP_NV + NV hexfp_nv = 0.0; +#endif + NV hexfp_mult = 1.0; + UV high_non_zero = 0; /* highest digit */ PERL_ARGS_ASSERT_SCAN_NUM; @@ -9838,17 +9977,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) const char *base, *Base, *max; /* check for hex */ - if (s[1] == 'x' || s[1] == 'X') { + if (isALPHA_FOLD_EQ(s[1], 'x')) { shift = 4; s += 2; just_zero = FALSE; - } else if (s[1] == 'b' || s[1] == 'B') { + } else if (isALPHA_FOLD_EQ(s[1], 'b')) { shift = 1; s += 2; just_zero = FALSE; } /* check for a decimal in disguise */ - else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') + else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) goto decimal; /* so it must be octal */ else { @@ -9920,6 +10059,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (!overflowed) { x = u << shift; /* make room for the digit */ + total_bits += shift; + if ((x >> shift) != u && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; @@ -9942,6 +10083,16 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) * amount. */ n += (NV) b; } + + if (high_non_zero == 0 && b > 0) + high_non_zero = b; + + /* this could be hexfp, but peek ahead + * to avoid matching ".." */ + if (UNLIKELY(HEXFP_PEEK(s))) { + goto out; + } + break; } } @@ -9956,6 +10107,96 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } + if (UNLIKELY(HEXFP_PEEK(s))) { + /* Do sloppy (on the underbars) but quick detection + * (and value construction) for hexfp, the decimal + * detection will shortly be more thorough with the + * underbar checks. */ + const char* h = s; +#ifdef HEXFP_UQUAD + hexfp_uquad = u; +#else /* HEXFP_NV */ + hexfp_nv = u; +#endif + if (*h == '.') { +#ifdef HEXFP_NV + NV mult = 1 / 16.0; +#endif + h++; + while (isXDIGIT(*h) || *h == '_') { + if (isXDIGIT(*h)) { + U8 b = XDIGIT_VALUE(*h); + total_bits += shift; +#ifdef HEXFP_UQUAD + hexfp_uquad <<= shift; + hexfp_uquad |= b; + hexfp_frac_bits += shift; +#else /* HEXFP_NV */ + hexfp_nv += b * mult; + mult /= 16.0; +#endif + } + h++; + } + } + + if (total_bits >= 4) { + if (high_non_zero < 0x8) + total_bits--; + if (high_non_zero < 0x4) + total_bits--; + if (high_non_zero < 0x2) + total_bits--; + } + + if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) { + bool negexp = FALSE; + h++; + if (*h == '+') + h++; + else if (*h == '-') { + negexp = TRUE; + h++; + } + if (isDIGIT(*h)) { + I32 hexfp_exp = 0; + while (isDIGIT(*h) || *h == '_') { + if (isDIGIT(*h)) { + hexfp_exp *= 10; + hexfp_exp += *h - '0'; +#ifdef NV_MIN_EXP + if (negexp && + -hexfp_exp < NV_MIN_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent underflow"); +#endif + break; + } + else { +#ifdef NV_MAX_EXP + if (!negexp && + hexfp_exp > NV_MAX_EXP - 1) { + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: exponent overflow"); + break; + } +#endif + } + } + h++; + } + if (negexp) + hexfp_exp = -hexfp_exp; +#ifdef HEXFP_UQUAD + hexfp_exp -= hexfp_frac_bits; +#endif + hexfp_mult = pow(2.0, hexfp_exp); + hexfp = TRUE; + goto decimal; + } + } + } + if (overflowed) { if (n > 4294967295.0) Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), @@ -9989,10 +10230,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) decimal: d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ - floatit = FALSE; + floatit = FALSE; + if (hexfp) { + floatit = TRUE; + *d++ = '0'; + *d++ = 'x'; + s = start + 2; + } /* read next group of digits and _ and copy into d */ - while (isDIGIT(*s) || *s == '_') { + while (isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s))) { /* skip underscores, checking for misplaced ones if -w is on */ @@ -10032,7 +10280,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* copy, ignoring underbars, until we run out of digits. */ - for (; isDIGIT(*s) || *s == '_'; s++) { + for (; isDIGIT(*s) || *s == '_' || + UNLIKELY(hexfp && isXDIGIT(*s)); + s++) { /* fixed length buffer check */ if (d >= e) Perl_croak(aTHX_ "%s", number_too_long); @@ -10058,12 +10308,24 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* read exponent part, if present */ - if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { - floatit = TRUE; + if ((isALPHA_FOLD_EQ(*s, 'e') + || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) + && strchr("+-0123456789_", s[1])) + { + floatit = TRUE; + + /* regardless of whether user said 3E5 or 3e5, use lower 'e', + ditto for p (hexfloats) */ + if ((isALPHA_FOLD_EQ(*s, 'e'))) { + /* At least some Mach atof()s don't grok 'E' */ + *d++ = 'e'; + } + else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { + *d++ = 'p'; + } + s++; - /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ - *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ /* stray preinitial _ */ if (*s == '_') { @@ -10127,9 +10389,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STORE_NUMERIC_LOCAL_SET_STANDARD(); /* terminate the string */ *d = '\0'; - nv = Atof(PL_tokenbuf); + if (UNLIKELY(hexfp)) { +# ifdef NV_MANT_DIG + if (total_bits > NV_MANT_DIG) + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: mantissa overflow"); +# endif +#ifdef HEXFP_UQUAD + nv = hexfp_uquad * hexfp_mult; +#else /* HEXFP_NV */ + nv = hexfp_nv * hexfp_mult; +#endif + } else { + nv = Atof(PL_tokenbuf); + } RESTORE_NUMERIC_LOCAL(); - sv = newSVnv(nv); + sv = newSVnv(nv); } if ( floatit @@ -10250,8 +10525,8 @@ S_scan_formline(pTHX_ char *s) if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) SvUTF8_on(stuff); - else if (PL_encoding) - sv_recode_to_utf8(stuff, PL_encoding); + else if (IN_ENCODING) + sv_recode_to_utf8(stuff, _get_encoding()); } NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); @@ -10278,12 +10553,11 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) CvFLAGS(PL_compcv) |= flags; PL_subline = CopLINE(PL_curcop); - CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); + CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB)); CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; if (outsidecv && CvPADLIST(outsidecv)) - CvPADLIST(PL_compcv)->xpadl_outid = - PadlistNAMES(CvPADLIST(outsidecv)); + CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; return oldsavestack_ix; } @@ -10295,7 +10569,6 @@ S_yywarn(pTHX_ const char *const s, U32 flags) PL_in_eval |= EVAL_WARNONLY; yyerror_pv(s, flags); - PL_in_eval &= ~EVAL_WARNONLY; return 0; } @@ -10399,6 +10672,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else @@ -11189,7 +11463,6 @@ S_parse_opt_lexvar(pTHX) PL_bufptr = s; if (d == PL_tokenbuf+1) return NULL; - *d = 0; var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV, OPf_MOD | (OPpLVAL_INTRO<<8)); var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0); @@ -11270,10 +11543,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(1))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Odd name/value argument " - "for subroutine")))); + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0)))))); if (pos != min_arity) chkop = newLOGOP(OP_AND, 0, newBINOP(OP_GT, 0, @@ -11336,9 +11615,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(min_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too few arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too few arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } if (max_arity != -1) { @@ -11349,9 +11635,16 @@ Perl_parse_subsignature(pTHX) scalar(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv))), newSVOP(OP_CONST, 0, newSViv(max_arity))), - newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0), - newSVOP(OP_CONST, 0, - newSVpvs("Too many arguments for subroutine"))))), + op_convert_list(OP_DIE, 0, + op_convert_list(OP_SPRINTF, 0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, + newSVpvs("Too many arguments for subroutine at %s line %d.\n")), + newSLICEOP(0, + op_append_list(OP_LIST, + newSVOP(OP_CONST, 0, newSViv(1)), + newSVOP(OP_CONST, 0, newSViv(2))), + newOP(OP_CALLER, 0))))))), initops); } return initops;