X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/1f9a1c0bdb88fe8f4b38dadcebb4988215612692..c3492809fd2c796c3cdab4de49e6c47560ce7f23:/toke.c diff --git a/toke.c b/toke.c index 1b7860a..70f00f3 100644 --- a/toke.c +++ b/toke.c @@ -1694,7 +1694,7 @@ S_incline(pTHX_ const char *s) } else { t = s; - while (!isSPACE(*t)) + while (*t && !isSPACE(*t)) t++; e = t; } @@ -1944,7 +1944,8 @@ S_postderef(pTHX_ int const funny, char const next) if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { assert('@' == funny || '$' == funny || DOLSHARP == funny); PL_lex_state = LEX_INTERPEND; - force_next(POSTJOIN); + if ('@' == funny) + force_next(POSTJOIN); } force_next(next); PL_bufptr+=2; @@ -2518,8 +2519,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; - if (!SvCUR(res)) + if (!SvCUR(res)) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), + "Unknown charname '' is deprecated"); return res; + } if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, e - backslash_ptr, @@ -2585,11 +2589,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (*s == ' ' && *(s-1) == ' ') { goto multi_spaces; } - if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) { - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), - "NO-BREAK SPACE in a charnames " - "alias definition is deprecated"); - } s++; } } @@ -2637,14 +2636,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) { goto bad_charname; } - if (*s == *NBSP_UTF8 - && *(s+1) == *(NBSP_UTF8+1) - && ckWARN_d(WARN_DEPRECATED)) - { - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), - "NO-BREAK SPACE in a charnames " - "alias definition is deprecated"); - } s += 2; } else { @@ -3357,10 +3348,7 @@ S_scan_const(pTHX_ char *start) } NUM_ESCAPE_INSERT: - /* Insert oct or hex escaped character. There will always be - * enough room in sv since such escapes will be longer than any - * UTF-8 sequence they can end up as, except if they force us - * to recode the rest of the string into utf8 */ + /* Insert oct or hex escaped character. */ /* Here uv is the ordinal of the next character being added */ if (UVCHR_IS_INVARIANT(uv)) { @@ -3388,6 +3376,20 @@ S_scan_const(pTHX_ char *start) } if (has_utf8) { + /* Usually, there will already be enough room in 'sv' + * since such escapes are likely longer than any UTF-8 + * sequence they can end up as. This isn't the case on + * EBCDIC where \x{40000000} contains 12 bytes, and the + * UTF-8 for it contains 14. And, we have to allow for + * a trailing NUL. It probably can't happen on ASCII + * platforms, but be safe */ + const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv) + + 1; + if (UNLIKELY(needed > SvLEN(sv))) { + SvCUR_set(sv, d - SvPVX_const(sv)); + d = sv_grow(sv, needed) + SvCUR(sv); + } + d = (char*)uvchr_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) @@ -3525,7 +3527,7 @@ S_scan_const(pTHX_ char *start) } /* Add the (Unicode) code point to the output. */ - if (UNI_IS_INVARIANT(uv)) { + if (OFFUNI_IS_INVARIANT(uv)) { *d++ = (char) LATIN1_TO_NATIVE(uv); } else { @@ -3582,7 +3584,7 @@ S_scan_const(pTHX_ char *start) /* The regex compiler is * expecting Unicode, not * native */ - (U8) NATIVE_TO_LATIN1(*str)); + NATIVE_TO_LATIN1(*str)); PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string)); Copy(hex_string, d, 3, char); @@ -4807,9 +4809,22 @@ Perl_yylex(pTHX) retry: switch (*s) { default: - if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) + if (UTF) { + if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { + ENTER; + SAVESPTR(PL_warnhook); + PL_warnhook = PERL_WARNHOOK_FATAL; + utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0); + LEAVE; + } + if (isIDFIRST_utf8((U8*)s)) { + goto keylookup; + } + } + else if (isALNUMC(*s)) { goto keylookup; - { + } + { SV *dsv = newSVpvs_flags("", SVs_TEMP); const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s, UTF8SKIP(s), @@ -5845,12 +5860,12 @@ Perl_yylex(pTHX) else /* skip plain q word */ while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) - t += UTF8SKIP(t); + t += UTF ? UTF8SKIP(t) : 1; } else if (isWORDCHAR_lazy_if(t,UTF)) { - t += UTF8SKIP(t); + t += UTF ? UTF8SKIP(t) : 1; while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF)) - t += UTF8SKIP(t); + t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)) t++; @@ -6353,11 +6368,18 @@ Perl_yylex(pTHX) TOKEN('$'); case '@': - if (PL_expect == XOPERATOR) - no_op("Array", s); - else if (PL_expect == XPOSTDEREF) POSTDEREF('@'); + if (PL_expect == XPOSTDEREF) + POSTDEREF('@'); PL_tokenbuf[0] = '@'; s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); + if (PL_expect == XOPERATOR) { + d = s; + if (PL_bufptr > s) { + d = PL_bufptr-1; + PL_bufptr = PL_oldbufptr; + } + no_op("Array", d); + } pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { PREREF('@'); @@ -8870,26 +8892,17 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN * 2) '{' * The final case currently doesn't get this far in the program, so we * don't test for it. If that were to change, it would be ok to allow it. - * c) When not under Unicode rules, any upper Latin1 character - * d) Otherwise, when unicode rules are used, all XIDS characters. + * b) When not under Unicode rules, any upper Latin1 character + * c) Otherwise, when unicode rules are used, all XIDS characters. * * Because all ASCII characters have the same representation whether * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and - * '{' without knowing if is UTF-8 or not. - * EBCDIC already uses the rules that ASCII platforms will use after the - * deprecation cycle; see comment below about the deprecation. */ -#ifdef EBCDIC -# define VALID_LEN_ONE_IDENT(s, is_utf8) \ + * '{' without knowing if is UTF-8 or not. */ +#define VALID_LEN_ONE_IDENT(s, is_utf8) \ (isGRAPH_A(*(s)) || ((is_utf8) \ ? isIDFIRST_utf8((U8*) (s)) \ : (isGRAPH_L1(*s) \ && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD))))) -#else -# define VALID_LEN_ONE_IDENT(s, is_utf8) \ - (isGRAPH_A(*(s)) || ((is_utf8) \ - ? isIDFIRST_utf8((U8*) (s)) \ - : ! isASCII_utf8((U8*) (s)))) -#endif STATIC char * S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) @@ -8954,18 +8967,6 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) : 1) && VALID_LEN_ONE_IDENT(s, is_utf8)) { - /* Deprecate all non-graphic characters. Include SHY as a non-graphic, - * because often it has no graphic representation. (We can't get to - * here with SHY when 'is_utf8' is true, so no need to include a UTF-8 - * test for it.) */ - if ((is_utf8) - ? ! isGRAPH_utf8( (U8*) s) - : (! isGRAPH_L1( (U8) *s) - || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD)))) - { - deprecate("literal non-graphic characters in variable names"); - } - if (is_utf8) { const STRLEN skip = UTF8SKIP(s); STRLEN i; @@ -9248,7 +9249,9 @@ S_scan_pat(pTHX_ char *start, I32 type) "Use of /c modifier is meaningless without /g" ); } - STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + if (UNLIKELY((x_mod_count) > 1)) { + yyerror("Only one /x regex modifier is allowed"); + } PL_lex_op = (OP*)pm; pl_yylval.ival = OP_MATCH; @@ -9303,7 +9306,9 @@ S_scan_subst(pTHX_ char *start) } } - STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count); + if (UNLIKELY((x_mod_count) > 1)) { + yyerror("Only one /x regex modifier is allowed"); + } if ((pm->op_pmflags & PMf_CONTINUE)) { Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); @@ -9524,7 +9529,7 @@ S_scan_heredoc(pTHX_ char *s) SV *linestr; char *bufend; char * const olds = s; - PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; + PERL_CONTEXT * const cx = CX_CUR(); /* These two fields are not set until an inner lexing scope is entered. But we need them set here. */ shared->ls_bufptr = s; @@ -9559,9 +9564,10 @@ S_scan_heredoc(pTHX_ char *s) goto streaming; } } - else { /* eval */ + else { /* eval or we've already hit EOF */ s = (char*)memchr((void*)s, '\n', PL_bufend - s); - assert(s); + if (!s) + goto interminable; } linestr = shared->ls_linestr; bufend = SvEND(linestr); @@ -9617,12 +9623,14 @@ S_scan_heredoc(pTHX_ char *s) else { SV *linestr_save; + char *oldbufptr_save; streaming: sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ term = PL_tokenbuf[1]; len--; linestr_save = PL_linestr; /* must restore this afterwards */ d = s; /* and this */ + oldbufptr_save = PL_oldbufptr; PL_linestr = newSVpvs(""); PL_bufend = SvPVX(PL_linestr); while (1) { @@ -9639,6 +9647,7 @@ S_scan_heredoc(pTHX_ char *s) restore PL_linestr. */ SvREFCNT_dec_NN(PL_linestr); PL_linestr = linestr_save; + PL_oldbufptr = oldbufptr_save; goto interminable; } CopLINE_set(PL_curcop, origline); @@ -9673,6 +9682,7 @@ S_scan_heredoc(pTHX_ char *s) PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_oldbufptr = oldbufptr_save; s = d; break; } @@ -10273,6 +10283,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) * multiple fp operations. */ bool hexfp = FALSE; int total_bits = 0; + int significant_bits = 0; #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t) # define HEXFP_UQUAD Uquad_t hexfp_uquad = 0; @@ -10283,6 +10294,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) #endif NV hexfp_mult = 1.0; UV high_non_zero = 0; /* highest digit */ + int non_zero_integer_digits = 0; PERL_ARGS_ASSERT_SCAN_NUM; @@ -10435,6 +10447,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (high_non_zero == 0 && b > 0) high_non_zero = b; + if (high_non_zero) + non_zero_integer_digits++; + /* this could be hexfp, but peek ahead * to avoid matching ".." */ if (UNLIKELY(HEXFP_PEEK(s))) { @@ -10461,43 +10476,103 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) * detection will shortly be more thorough with the * underbar checks. */ const char* h = s; + significant_bits = non_zero_integer_digits * shift; #ifdef HEXFP_UQUAD hexfp_uquad = u; #else /* HEXFP_NV */ hexfp_nv = u; #endif + /* Ignore the leading zero bits of + * the high (first) non-zero digit. */ + if (high_non_zero) { + if (high_non_zero < 0x8) + significant_bits--; + if (high_non_zero < 0x4) + significant_bits--; + if (high_non_zero < 0x2) + significant_bits--; + } + if (*h == '.') { #ifdef HEXFP_NV - NV mult = 1 / 16.0; + NV nv_mult = 1.0; #endif - h++; - while (isXDIGIT(*h) || *h == '_') { + bool accumulate = TRUE; + for (h++; (isXDIGIT(*h) || *h == '_'); h++) { if (isXDIGIT(*h)) { U8 b = XDIGIT_VALUE(*h); - total_bits += shift; + significant_bits += shift; #ifdef HEXFP_UQUAD - hexfp_uquad <<= shift; - hexfp_uquad |= b; - hexfp_frac_bits += shift; + if (accumulate) { + if (significant_bits < NV_MANT_DIG) { + /* We are in the long "run" of xdigits, + * accumulate the full four bits. */ + hexfp_uquad <<= shift; + hexfp_uquad |= b; + hexfp_frac_bits += shift; + } else { + /* We are at a hexdigit either at, + * or straddling, the edge of mantissa. + * We will try grabbing as many as + * possible bits. */ + int tail = + significant_bits - NV_MANT_DIG; + if (tail <= 0) + tail += shift; + hexfp_uquad <<= tail; + hexfp_uquad |= b >> (shift - tail); + hexfp_frac_bits += tail; + + /* Ignore the trailing zero bits + * of the last non-zero xdigit. + * + * The assumption here is that if + * one has input of e.g. the xdigit + * eight (0x8), there is only one + * bit being input, not the full + * four bits. Conversely, if one + * specifies a zero xdigit, the + * assumption is that one really + * wants all those bits to be zero. */ + if (b) { + if ((b & 0x1) == 0x0) { + significant_bits--; + if ((b & 0x2) == 0x0) { + significant_bits--; + if ((b & 0x4) == 0x0) { + significant_bits--; + } + } + } + } + + accumulate = FALSE; + } + } else { + /* Keep skipping the xdigits, and + * accumulating the significant bits, + * but do not shift the uquad + * (which would catastrophically drop + * high-order bits) or accumulate the + * xdigits anymore. */ + } #else /* HEXFP_NV */ - hexfp_nv += b * mult; - mult /= 16.0; + if (accumulate) { + nv_mult /= 16.0; + if (nv_mult > 0.0) + hexfp_nv += b * nv_mult; + else + accumulate = FALSE; + } #endif } - h++; + if (significant_bits >= NV_MANT_DIG) + accumulate = FALSE; } } - if (total_bits >= 4) { - if (high_non_zero < 0x8) - total_bits--; - if (high_non_zero < 0x4) - total_bits--; - if (high_non_zero < 0x2) - total_bits--; - } - - if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) { + if ((total_bits > 0 || significant_bits > 0) && + isALPHA_FOLD_EQ(*h, 'p')) { bool negexp = FALSE; h++; if (*h == '+') @@ -10741,7 +10816,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) *d = '\0'; if (UNLIKELY(hexfp)) { # ifdef NV_MANT_DIG - if (total_bits > NV_MANT_DIG) + if (significant_bits > NV_MANT_DIG) Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), "Hexadecimal float: mantissa overflow"); # endif @@ -11352,10 +11427,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) "Integer overflow in decimal number"); } } -#ifdef EBCDIC - if (rev > 0x7FFFFFFF) - Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); -#endif + /* Append native character for the rev point */ tmpend = uvchr_to_utf8(tmpbuf, rev); sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);