X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/89e006ae4e39db68ad35c878eb6e6de83ebd8ec9..085b2bf84ccfcb326dbdec9439d6c36861c3c5f7:/toke.c diff --git a/toke.c b/toke.c index 006f885..43adb3e 100644 --- a/toke.c +++ b/toke.c @@ -2525,6 +2525,7 @@ S_sublex_push(pTHX) SAVEGENERICPV(PL_lex_brackstack); SAVEGENERICPV(PL_lex_casestack); SAVEGENERICPV(PL_parser->lex_shared); + SAVEBOOL(PL_parser->lex_re_reparsing); /* The here-doc parser needs to be able to peek into outer lexing scopes to find the body of the here-doc. So we put PL_linestr and @@ -2568,6 +2569,9 @@ S_sublex_push(pTHX) else PL_lex_inpat = NULL; + PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); + PL_in_eval &= ~EVAL_RE_REPARSING; + return '('; } @@ -3751,7 +3755,9 @@ S_scan_const(pTHX_ char *start) /* return the substring (via pl_yylval) only if we parsed anything */ if (s > PL_bufptr) { SvREFCNT_inc_simple_void_NN(sv); - if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) { + if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) + && ! PL_parser->lex_re_reparsing) + { const char *const key = PL_lex_inpat ? "qr" : "q"; const STRLEN keylen = PL_lex_inpat ? 2 : 1; const char *type; @@ -5025,7 +5031,7 @@ Perl_yylex(pTHX) #endif switch (*s) { default: - if (isIDFIRST_lazy_if(s,UTF)) + if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s)) goto keylookup; { SV *dsv = newSVpvs_flags("", SVs_TEMP); @@ -5711,6 +5717,9 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) TOKEN(0); s += 2; + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "Smartmatch is experimental"); Eop(OP_SMARTMATCH); } s++; @@ -7935,6 +7944,9 @@ Perl_yylex(pTHX) case KEY_given: pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "given is experimental"); OPERATOR(GIVEN); case KEY_glob: @@ -8105,15 +8117,9 @@ Perl_yylex(pTHX) case KEY_open: s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { - const char *t; - for (d = s; isWORDCHAR_lazy_if(d,UTF);) { - d += UTF ? UTF8SKIP(d) : 1; - if (UTF) { - while (UTF8_IS_CONTINUED(*d) && _is_utf8_mark((U8*)d)) { - d += UTF ? UTF8SKIP(d) : 1; - } - } - } + const char *t; + d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, + &len); for (t=d; isSPACE(*t);) t++; if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) @@ -8797,6 +8803,9 @@ Perl_yylex(pTHX) if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) return REPORT(0); pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__SMARTMATCH), + "when is experimental"); OPERATOR(WHEN); case KEY_while: @@ -9044,7 +9053,9 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) } } -/* Either returns sv, or mortalizes/frees sv and returns a new SV*. +/* S_new_constant(): do any overload::constant lookup. + + Either returns sv, or mortalizes/frees sv and returns a new SV*. Best used as sv=new_constant(..., sv, ...). If s, pv are NULL, calls subroutine with one argument, and is used with error messages only. @@ -9188,6 +9199,54 @@ now_ok: return res; } +PERL_STATIC_INLINE void +S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) { + dVAR; + PERL_ARGS_ASSERT_PARSE_IDENT; + + for (;;) { + if (*d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + if (is_utf8 && isIDFIRST_utf8((U8*)*s)) { + /* The UTF-8 case must come first, otherwise things + * like c\N{COMBINING TILDE} would start failing, as the + * isWORDCHAR_A case below would gobble the 'c' up. + */ + + char *t = *s + UTF8SKIP(*s); + while (isIDCONT_utf8((U8*)t)) + t += UTF8SKIP(t); + if (*d + (t - *s) > e) + Perl_croak(aTHX_ "%s", ident_too_long); + Copy(*s, *d, t - *s, char); + *d += t - *s; + *s = t; + } + else if ( isWORDCHAR_A(**s) ) { + do { + *(*d)++ = *(*s)++; + } while isWORDCHAR_A(**s); + } + else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) { + *(*d)++ = ':'; + *(*d)++ = ':'; + (*s)++; + } + else if (allow_package && **s == ':' && (*s)[1] == ':' + /* Disallow things like Foo::$bar. For the curious, this is + * the code path that triggers the "Bad name after" warning + * when looking for barewords. + */ + && (*s)[2] != '$') { + *(*d)++ = *(*s)++; + *(*d)++ = *(*s)++; + } + else + break; + } + return; +} + /* Returns a NUL terminated string, with the length of the string written to *slp */ @@ -9197,44 +9256,14 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN dVAR; char *d = dest; char * const e = d + destlen - 3; /* two-character token, ending NUL */ + bool is_utf8 = cBOOL(UTF); PERL_ARGS_ASSERT_SCAN_WORD; - for (;;) { - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - if (isWORDCHAR(*s) - || (!UTF && isALPHANUMERIC_L1(*s))) /* UTF handled below */ - { - *d++ = *s++; - } - else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) { - *d++ = ':'; - *d++ = ':'; - s++; - } - else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) { - *d++ = *s++; - *d++ = *s++; - } - else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - size_t len; - while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - len = t - s; - if (d + len > e) - Perl_croak(aTHX_ "%s", ident_too_long); - Copy(s, d, len, char); - d += len; - s = t; - } - else { - *d = '\0'; - *slp = d - dest; - return s; - } - } + parse_ident(&s, &d, e, allow_package, is_utf8); + *d = '\0'; + *slp = d - dest; + return s; } STATIC char * @@ -9245,6 +9274,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck char funny = *s++; char *d = dest; char * const e = d + destlen - 3; /* two-character token, ending NUL */ + bool is_utf8 = cBOOL(UTF); PERL_ARGS_ASSERT_SCAN_IDENT; @@ -9258,33 +9288,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } } else { - for (;;) { - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - if (isWORDCHAR(*s)) /* UTF handled below */ - *d++ = *s++; - else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) { - *d++ = ':'; - *d++ = ':'; - s++; - } - else if (*s == ':' && s[1] == ':') { - *d++ = *s++; - *d++ = *s++; - } - else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) { - char *t = s + UTF8SKIP(s); - while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t)) - t += UTF8SKIP(t); - if (d + (t - s) > e) - Perl_croak(aTHX_ "%s", ident_too_long); - Copy(s, d, t - s, char); - d += t - s; - s = t; - } - else - break; - } + parse_ident(&s, &d, e, 1, is_utf8); } *d = '\0'; d = dest; @@ -9294,16 +9298,29 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck return s; } if (*s == '$' && s[1] && - (isWORDCHAR_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) + (isIDFIRST_lazy_if(s+1,is_utf8) + || isDIGIT_A((U8)s[1]) + || s[1] == '$' + || s[1] == '{' + || strnEQ(s+1,"::",2)) ) { return s; } if (*s == '{') { bracket = s; s++; + while (s < send && SPACE_OR_TAB(*s)) + s++; } - if (s < send) { - if (UTF) { + +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)*(d)) \ + || isCNTRL_A((U8)*(d)) \ + || isDIGIT_A((U8)*(d)) \ + || (!(u) && !UTF8_IS_INVARIANT((U8)*(d)))) + if (s < send + && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(s, is_utf8))) + { + if (is_utf8) { const STRLEN skip = UTF8SKIP(s); STRLEN i; d[skip] = '\0'; @@ -9322,34 +9339,9 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck else if (ck_uni && !bracket) check_uni(); if (bracket) { - if (isSPACE(s[-1])) { - while (s < send) { - const char ch = *s++; - if (!SPACE_OR_TAB(ch)) { - *d = ch; - break; - } - } - } - if (isIDFIRST_lazy_if(d,UTF)) { - d += UTF8SKIP(d); - if (UTF) { - char *end = s; - while ((end < send && isWORDCHAR_lazy_if(end,UTF)) || *end == ':') { - end += UTF8SKIP(end); - while (end < send && UTF8_IS_CONTINUED(*end) && _is_utf8_mark((U8*)end)) - end += UTF8SKIP(end); - } - Copy(s, d, end - s, char); - d += end - s; - s = end; - } - else { - while ((isWORDCHAR(*s) || *s == ':') && d < e) - *d++ = *s++; - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - } + if (isIDFIRST_lazy_if(d,is_utf8)) { + d += is_utf8 ? UTF8SKIP(d) : 1; + parse_ident(&s, &d, e, 1, is_utf8); *d = '\0'; while (s < send && SPACE_OR_TAB(*s)) s++; @@ -9382,6 +9374,10 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck Perl_croak(aTHX_ "%s", ident_too_long); *d = '\0'; } + + while (s < send && SPACE_OR_TAB(*s)) + s++; + if (*s == '}') { s++; if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { @@ -9391,10 +9387,10 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck if (PL_lex_state == LEX_NORMAL) { if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest, 0) - || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0))) + || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) { SV *tmp = newSVpvn_flags( dest, d - dest, - SVs_TEMP | (UTF ? SVf_UTF8 : 0) ); + SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); if (funny == '#') funny = '@'; Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), @@ -9514,8 +9510,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing, - TRUE /* look for escaped bracketed metas */ ); + char *s; const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9525,9 +9520,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; - /* this was only needed for the initial scan_str; set it to false - * so that any (?{}) code blocks etc are parsed normally */ - PL_reg_state.re_reparsing = FALSE; + s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), + TRUE /* look for escaped bracketed metas */ ); + if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -9980,12 +9975,12 @@ S_scan_heredoc(pTHX_ char *s) linestr = shared->ls_linestr; bufend = SvEND(linestr); d = s; - while (s < bufend && - (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) { + while (s < bufend - len + 1 && + memNE(s,PL_tokenbuf,len) ) { if (*s++ == '\n') ++shared->herelines; } - if (s >= bufend) { + if (s >= bufend - len + 1) { goto interminable; } sv_setpvn(tmpstr,d+1,s-d);