X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2cc6fe62efccaf47e15982ddbe988a976469d887..a446b943f7a5b38f67cca69a513d873bc8335552:/toke.c diff --git a/toke.c b/toke.c index a8ce485..bf9d160 100644 --- a/toke.c +++ b/toke.c @@ -586,7 +586,7 @@ S_missingterm(pTHX_ char *s) if (nl) *nl = '\0'; } - else if (isCNTRL(PL_multi_close)) { + else if ((U8) PL_multi_close < 32) { *tmpbuf = '^'; tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; @@ -753,9 +753,9 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->linestr = flags & LEX_START_COPIED ? SvREFCNT_inc_simple_NN(line) : newSVpvn_flags(s, len, SvUTF8(line)); - sv_catpvs(parser->linestr, "\n;"); + sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2); } else { - parser->linestr = newSVpvs("\n;"); + parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); } parser->oldoldbufptr = parser->oldbufptr = @@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) ENTER; SAVESPTR(PL_warnhook); PL_warnhook = PERL_WARNHOOK_FATAL; - utf8n_to_uvuni((U8*)p, e-p, NULL, 0); + utf8n_to_uvchr((U8*)p, e-p, NULL, 0); LEAVE; } } @@ -1073,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) } else { assert(p < e -1 ); - *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)); + *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)); p += 2; } } @@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags) bufend = PL_parser->bufend; } } - unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); + unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); if (retlen == (STRLEN)-1) { /* malformed UTF-8 */ ENTER; SAVESPTR(PL_warnhook); PL_warnhook = PERL_WARNHOOK_FATAL; - utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0); + utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0); LEAVE; } return unichar; @@ -1753,37 +1753,16 @@ S_incline(pTHX_ const char *s) if (t - s > 0) { const STRLEN len = t - s; - SV *const temp_sv = CopFILESV(PL_curcop); - const char *cf; - STRLEN tmplen; - - if (temp_sv) { - cf = SvPVX(temp_sv); - tmplen = SvCUR(temp_sv); - } else { - cf = NULL; - tmplen = 0; - } if (!PL_rsfp && !PL_parser->filtered) { /* must copy *{"::_<(eval N)[oldfilename:L]"} * to *{"::_ 0) { @@ -1830,7 +1805,6 @@ S_incline(pTHX_ const char *s) if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); } - if (tmpbuf != smallbuf) Safefree(tmpbuf); } CopFILE_free(PL_curcop); CopFILE_setn(PL_curcop, s, len); @@ -1895,13 +1869,11 @@ STATIC char * S_skipspace2(pTHX_ char *s, SV **svp) { char *start; - const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr); const I32 startoff = s - SvPVX(PL_linestr); PERL_ARGS_ASSERT_SKIPSPACE2; s = skipspace(s); - PL_bufptr = SvPVX(PL_linestr) + bufptroff; if (!PL_madskills || !svp) return s; start = SvPVX(PL_linestr) + startoff; @@ -1934,7 +1906,7 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) sv_setpvn(sv, buf, len); (void)SvIOK_on(sv); SvIV_set(sv, 0); - av_store(av, (I32)CopLINE(PL_curcop), sv); + av_store(av, CopLINE(PL_curcop), sv); } } @@ -2576,7 +2548,7 @@ S_sublex_start(pTHX) return THING; } else if (op_type == OP_BACKTICK && PL_lex_op) { - /* readpipe() vas overriden */ + /* readpipe() was overridden */ cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff); pl_yylval.opval = PL_lex_op; PL_lex_op = NULL; @@ -2789,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) { /* If warnings are on, this will print a more detailed analysis of what * is wrong than the error message below */ - utf8n_to_uvuni(first_bad_char_loc, + utf8n_to_uvchr(first_bad_char_loc, e - ((char *) first_bad_char_loc), NULL, 0); @@ -2864,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } s++; } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) { + if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) { goto bad_charname; } s += 2; @@ -2897,8 +2869,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s++; } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, - *(s+1))))) + if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) { goto bad_charname; } @@ -2932,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { /* If warnings are on, this will print a more detailed analysis of * what is wrong than the error message below */ - utf8n_to_uvuni(first_bad_char_loc, + utf8n_to_uvchr(first_bad_char_loc, (char *) first_bad_char_loc - str, NULL, 0); @@ -3133,7 +3104,7 @@ S_scan_const(pTHX_ char *start) char *e = d++; while (e-- > c) *(e + 1) = *e; - *c = (char)UTF_TO_NATIVE(0xff); + *c = (char) ILLEGAL_UTF8_BYTE; /* mark the range as done, and continue */ dorange = FALSE; didrange = TRUE; @@ -3187,16 +3158,12 @@ S_scan_const(pTHX_ char *start) #ifdef EBCDIC if (literal_endpoint == 2 && - ((isLOWER(min) && isLOWER(max)) || - (isUPPER(min) && isUPPER(max)))) { - if (isLOWER(min)) { - for (i = min; i <= max; i++) - if (isLOWER(i)) - *d++ = NATIVE_TO_NEED(has_utf8,i); - } else { - for (i = min; i <= max; i++) - if (isUPPER(i)) - *d++ = NATIVE_TO_NEED(has_utf8,i); + ((isLOWER_A(min) && isLOWER_A(max)) || + (isUPPER_A(min) && isUPPER_A(max)))) + { + for (i = min; i <= max; i++) { + if (isALPHA_A(i)) + *d++ = i; } } else @@ -3204,13 +3171,7 @@ S_scan_const(pTHX_ char *start) for (i = min; i <= max; i++) #ifdef EBCDIC if (has_utf8) { - const U8 ch = (U8)NATIVE_TO_UTF(i); - if (UNI_IS_INVARIANT(ch)) - *d++ = (U8)i; - else { - *d++ = (U8)UTF8_EIGHT_BIT_HI(ch); - *d++ = (U8)UTF8_EIGHT_BIT_LO(ch); - } + append_utf8_from_native_byte(i, &d); } else #endif @@ -3220,7 +3181,7 @@ S_scan_const(pTHX_ char *start) if (uvmax) { d = (char*)uvchr_to_utf8((U8*)d, 0x100); if (uvmax > 0x101) - *d++ = (char)UTF_TO_NATIVE(0xff); + *d++ = (char) ILLEGAL_UTF8_BYTE; if (uvmax > 0x100) d = (char*)uvchr_to_utf8((U8*)d, uvmax); } @@ -3245,7 +3206,7 @@ S_scan_const(pTHX_ char *start) && !native_range #endif ) { - *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ + *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */ s++; continue; } @@ -3285,12 +3246,12 @@ S_scan_const(pTHX_ char *start) * char, which will be done separately. * Stop on (?{..}) and friends */ - else if (*s == '(' && PL_lex_inpat && s[1] == '?') { + else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') - *d++ = NATIVE_TO_NEED(has_utf8,*s++); + *d++ = *s++; } - else if (!PL_lex_casemods && !in_charclass && + else if (!PL_lex_casemods && ( s[2] == '{' /* This should match regcomp.c */ || (s[2] == '?' && s[3] == '{'))) { @@ -3299,10 +3260,10 @@ S_scan_const(pTHX_ char *start) } /* likewise skip #-initiated comments in //x patterns */ - else if (*s == '#' && PL_lex_inpat && + else if (*s == '#' && PL_lex_inpat && !in_charclass && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { while (s+1 < send && *s != '\n') - *d++ = NATIVE_TO_NEED(has_utf8,*s++); + *d++ = *s++; } /* no further processing of single-quoted regex */ @@ -3377,7 +3338,7 @@ S_scan_const(pTHX_ char *start) || s[1] != '{' || regcurly(s + 1, FALSE))) { - *d++ = NATIVE_TO_NEED(has_utf8,'\\'); + *d++ = '\\'; goto default_action; } @@ -3406,7 +3367,7 @@ S_scan_const(pTHX_ char *start) { I32 flags = PERL_SCAN_SILENT_ILLDIGIT; STRLEN len = 3; - uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); + uv = grok_oct(s, &len, &flags, NULL); s += len; if (len < 3 && s < send && isDIGIT(*s) && ckWARN(WARN_MISC)) @@ -3458,9 +3419,8 @@ S_scan_const(pTHX_ char *start) * UTF-8 sequence they can end up as, except if they force us * to recode the rest of the string into utf8 */ - /* Here uv is the ordinal of the next character being added in - * unicode (converted from native). */ - if (!UNI_IS_INVARIANT(uv)) { + /* Here uv is the ordinal of the next character being added */ + if (!NATIVE_IS_INVARIANT(uv)) { if (!has_utf8 && uv > 255) { /* Might need to recode whatever we have accumulated so * far if it contains any chars variant in utf8 or @@ -3478,7 +3438,7 @@ S_scan_const(pTHX_ char *start) } if (has_utf8) { - d = (char*)uvuni_to_utf8((U8*)d, uv); + d = (char*)uvchr_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { PL_sublex_info.sub_op->op_private |= @@ -3511,16 +3471,6 @@ S_scan_const(pTHX_ char *start) * now, while preserving the fact that it was a named character * so that the regex compiler knows this */ - /* This section of code doesn't generally use the - * NATIVE_TO_NEED() macro to transform the input. I (khw) did - * a close examination of this macro and determined it is a - * no-op except on utfebcdic variant characters. Every - * character generated by this that would normally need to be - * enclosed by this macro is invariant, so the macro is not - * needed, and would complicate use of copy(). XXX There are - * other parts of this file where the macro is used - * inconsistently, but are saved by it being a no-op */ - /* The structure of this section of code (besides checking for * errors and upgrading to utf8) is: * Further disambiguate between the two meanings of \N, and if @@ -3608,11 +3558,13 @@ S_scan_const(pTHX_ char *start) has_utf8 = TRUE; } - /* Add the string to the output */ + /* Add the (Unicode) code point to the output. */ if (UNI_IS_INVARIANT(uv)) { - *d++ = (char) uv; + *d++ = (char) LATIN1_TO_NATIVE(uv); } - else d = (char*)uvuni_to_utf8((U8*)d, uv); + else { + d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0); + } } } else /* Here is \N{NAME} but not \N{U+...}. */ @@ -3672,19 +3624,16 @@ S_scan_const(pTHX_ char *start) char hex_string[2 * UTF8_MAXBYTES + 5]; /* Get the first character of the result. */ - U32 uv = utf8n_to_uvuni((U8 *) str, + U32 uv = utf8n_to_uvchr((U8 *) str, len, &char_length, UTF8_ALLOW_ANYUV); /* Convert first code point to hex, including - * the boiler plate before it. For all these, - * we convert to native format so that - * downstream code can continue to assume the - * input is native */ + * the boiler plate before it. */ output_length = my_snprintf(hex_string, sizeof(hex_string), - "\\N{U+%X", - (unsigned int) UNI_TO_NATIVE(uv)); + "\\N{U+%X", + (unsigned int) uv); /* Make sure there is enough space to hold it */ d = off + SvGROW(sv, off @@ -3699,15 +3648,15 @@ S_scan_const(pTHX_ char *start) * its ordinal in hex */ while ((str += char_length) < str_end) { const STRLEN off = d - SvPVX_const(sv); - U32 uv = utf8n_to_uvuni((U8 *) str, + U32 uv = utf8n_to_uvchr((U8 *) str, str_end - str, &char_length, UTF8_ALLOW_ANYUV); output_length = my_snprintf(hex_string, - sizeof(hex_string), - ".%X", - (unsigned int) UNI_TO_NATIVE(uv)); + sizeof(hex_string), + ".%X", + (unsigned int) uv); d = off + SvGROW(sv, off + output_length @@ -3772,25 +3721,25 @@ S_scan_const(pTHX_ char *start) /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': - *d++ = NATIVE_TO_NEED(has_utf8,'\b'); + *d++ = '\b'; break; case 'n': - *d++ = NATIVE_TO_NEED(has_utf8,'\n'); + *d++ = '\n'; break; case 'r': - *d++ = NATIVE_TO_NEED(has_utf8,'\r'); + *d++ = '\r'; break; case 'f': - *d++ = NATIVE_TO_NEED(has_utf8,'\f'); + *d++ = '\f'; break; case 't': - *d++ = NATIVE_TO_NEED(has_utf8,'\t'); + *d++ = '\t'; break; case 'e': - *d++ = ASCII_TO_NEED(has_utf8,'\033'); + *d++ = ASCII_TO_NATIVE('\033'); break; case 'a': - *d++ = ASCII_TO_NEED(has_utf8,'\007'); + *d++ = '\a'; break; } /* end switch */ @@ -3816,8 +3765,10 @@ S_scan_const(pTHX_ char *start) * routine that does the conversion checks for errors like * malformed utf8 */ - const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); - const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv)); + const UV nextuv = (this_utf8) + ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) + : (UV) ((U8) *s); + const STRLEN need = UNISKIP(nextuv); if (!has_utf8) { SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); @@ -3844,7 +3795,7 @@ S_scan_const(pTHX_ char *start) #endif } else { - *d++ = NATIVE_TO_NEED(has_utf8,*s++); + *d++ = *s++; } } /* while loop to process each character */ @@ -5309,7 +5260,7 @@ Perl_yylex(pTHX) * check if it in fact is. */ if (bof && PL_rsfp && (*s == 0 || - *(U8*)s == 0xEF || + *(U8*)s == BOM_UTF8_FIRST_BYTE || *(U8*)s >= 0xFE || s[1] == 0)) { Off_t offset = (IV)PerlIO_tell(PL_rsfp); @@ -5621,14 +5572,19 @@ Perl_yylex(pTHX) s = SKIPSPACE0(s); } else { -/* if (PL_madskills && PL_lex_formbrack) { */ - d = s; - while (d < PL_bufend && *d != '\n') - d++; - if (d < PL_bufend) - d++; - else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ +#endif + if (PL_madskills) d = s; + while (s < PL_bufend && *s != '\n') + s++; + if (s < PL_bufend) + { + s++; + if (s < PL_bufend) + incline(s); + } + else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ Perl_croak(aTHX_ "panic: input overflow"); +#ifdef PERL_MAD if (PL_madskills && CopLINE(PL_curcop) >= 1) { if (!PL_thiswhite) PL_thiswhite = newSVpvs(""); @@ -5636,20 +5592,9 @@ Perl_yylex(pTHX) sv_setpvs(PL_thiswhite, ""); PL_faketokens = 0; } - sv_catpvn(PL_thiswhite, s, d - s); + sv_catpvn(PL_thiswhite, d, s - d); } - s = d; -/* } - *s = '\0'; - PL_bufend = s; */ } -#else - while (s < PL_bufend && *s != '\n') - s++; - if (s < PL_bufend) - s++; - else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */ - Perl_croak(aTHX_ "panic: input overflow"); #endif } goto retry; @@ -7323,7 +7268,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = cv_const_sv(cv))) { + if (*d == ')' && (sv = cv_const_sv_or_av(cv))) { s = d + 1; goto its_constant; } @@ -7387,13 +7332,19 @@ Perl_yylex(pTHX) UTF8fARG(UTF, l, PL_tokenbuf)); } /* Check for a constant sub */ - if ((sv = cv_const_sv(cv))) { + if ((sv = cv_const_sv_or_av(cv))) { its_constant: op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); - pl_yylval.opval->op_private = OPpCONST_FOLDED; - pl_yylval.opval->op_flags |= OPf_SPECIAL; + if (SvTYPE(sv) == SVt_PVAV) + pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS, + pl_yylval.opval); + else { + pl_yylval.opval->op_private = OPpCONST_FOLDED; + pl_yylval.opval->op_folded = 1; + pl_yylval.opval->op_flags |= OPf_SPECIAL; + } TOKEN(WORD); } @@ -7595,21 +7546,12 @@ Perl_yylex(pTHX) case KEY___END__: { GV *gv; if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) { - const char *pname = "main"; - STRLEN plen = 4; - U32 putf8 = 0; - if (PL_tokenbuf[2] == 'D') - { - HV * const stash = - PL_curstash ? PL_curstash : PL_defstash; - pname = HvNAME_get(stash); - plen = HvNAMELEN (stash); - if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8; - } - gv = gv_fetchpvn_flags( - Perl_form(aTHX_ "%*s::DATA", (int)plen, pname), - plen+6, GV_ADD|putf8, SVt_PVIO - ); + HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash + ? PL_curstash + : PL_defstash; + gv = (GV *)*hv_fetchs(stash, "DATA", 1); + if (!isGV(gv)) + gv_init(gv,stash,"DATA",4,0); GvMULTI_on(gv); if (!GvIO(gv)) GvIOp(gv) = newIO(); @@ -8919,17 +8861,9 @@ Perl_yylex(pTHX) FUN0(OP_WANTARRAY); case KEY_write: -#ifdef EBCDIC - { - char ctl_l[2]; - ctl_l[0] = toCTRL('L'); - ctl_l[1] = '\0'; - gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV); - } -#else - /* Make sure $^L is defined */ - gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV); -#endif + /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and + * we use the same number on EBCDIC */ + gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV); UNI(OP_ENTERWRITE); case KEY_x: @@ -9410,10 +9344,17 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck s++; } -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ - || isCNTRL_A((U8)(d)) \ - || isDIGIT_A((U8)(d)) \ - || (!(u) && !UTF8_IS_INVARIANT((U8)(d)))) +/* \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally + * been matched by \s on ASCII platforms, are the legal control char names + * here, that is \c? plus 1-32 minus the \s ones. */ +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ + || isDIGIT_A((U8)(d)) \ + || (!(u) && !isASCII((U8)(d))) \ + || ((((U8)(d)) < 32) \ + && (((((U8)(d)) >= 14) \ + || (((U8)(d)) <= 8 && (d) != 0) \ + || (((U8)(d)) == 13)))) \ + || (((U8)(d)) == toCTRL('?'))) if (s < send && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) { @@ -10067,7 +10008,7 @@ S_scan_heredoc(pTHX_ char *s) /* shared is only null if we have gone beyond the outermost lexing scope. In a file, we will have broken out of the loop in the previous iteration. In an eval, the string buf- - fer ends with "\n;", so the while condition below will have + fer ends with "\n;", so the while condition above will have evaluated to false. So shared can never be null. */ assert(shared); /* A LEXSHARED struct with a null ls_prev pointer is the outer- @@ -10171,8 +10112,11 @@ S_scan_heredoc(pTHX_ char *s) } CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') { - lex_grow_linestr(SvCUR(PL_linestr) + 2); + s = lex_grow_linestr(SvLEN(PL_linestr) + 3); + /* ^That should be enough to avoid this needing to grow: */ sv_catpvs(PL_linestr, "\n\0"); + assert(s == SvPVX(PL_linestr)); + PL_bufend = SvEND(PL_linestr); } s = PL_bufptr; #ifdef PERL_MAD @@ -10390,11 +10334,15 @@ intro_sym: /* scan_str - takes: start position in buffer - keep_quoted preserve \ on the embedded delimiter(s) - keep_delims preserve the delimiters around the string - re_reparse compiling a run-time /(?{})/: - collapse // to /, and skip encoding src + takes: + start position in buffer + keep_quoted preserve \ on the embedded delimiter(s) + keep_delims preserve the delimiters around the string + re_reparse compiling a run-time /(?{})/: + collapse // to /, and skip encoding src + deprecate_escaped_meta issue a deprecation warning for cer- + tain paired metacharacters that appear + escaped within it returns: position to continue reading from buffer side-effects: multi_start, multi_close, lex_repl or lex_stuff, and updates the read buffer. @@ -10436,9 +10384,7 @@ intro_sym: STATIC char * S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, - bool deprecate_escaped_meta /* Should we issue a deprecation warning - for certain paired metacharacters that - appear escaped within it */ + bool deprecate_escaped_meta ) { dVAR; @@ -10506,8 +10452,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, * happen for <>, as they aren't metas. */ if (deprecate_escaped_meta && (PL_multi_open == PL_multi_close - || ! ckWARN_d(WARN_DEPRECATED) - || PL_multi_open == '<')) + || PL_multi_open == '<' + || ! ckWARN_d(WARN_DEPRECATED))) { deprecate_escaped_meta = FALSE; } @@ -10701,26 +10647,39 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, * context where the delimiter is also a metacharacter, * the backslash is useless, and deprecated. () and [] * are meta in any context. {} are meta only when - * appearing in a quantifier or in things like '\p{'. - * They also aren't meta unless there is a matching - * closed, escaped char later on within the string. - * If 's' points to an open, set a flag; if to a close, - * test that flag, and raise a warning if it was set */ + * appearing in a quantifier or in things like '\p{' + * (but '\\p{' isn't meta). They also aren't meta + * unless there is a matching closed, escaped char + * later on within the string. If 's' points to an + * open, set a flag; if to a close, test that flag, and + * raise a warning if it was set */ if (deprecate_escaped_meta) { if (*s == PL_multi_open) { if (*s != '{') { escaped_open = s; } - else if (regcurly(s, - TRUE /* Look for a closing - '\}' */) - || (s - start > 2 /* Look for e.g. - '\x{' */ - && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META))) - { + /* Look for a closing '\}' */ + else if (regcurly(s, TRUE)) { escaped_open = s; } + /* Look for e.g. '\x{' */ + else if (s - start > 2 + && _generic_isCC(*(s-2), + _CC_BACKSLASH_FOO_LBRACE_IS_META)) + { /* Exclude '\\x', '\\\\x', etc. */ + char *lookbehind = s - 4; + bool is_meta = TRUE; + while (lookbehind >= start + && *lookbehind == '\\') + { + is_meta = ! is_meta; + lookbehind--; + } + if (is_meta) { + escaped_open = s; + } + } } else if (escaped_open) { Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), @@ -11575,12 +11534,14 @@ S_swallow_bom(pTHX_ U8 *s) #endif } break; - case 0xEF: - if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); - s += 3; /* UTF-8 */ - } - break; + case BOM_UTF8_FIRST_BYTE: { + const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ + if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); + s += len + 1; /* UTF-8 */ + } + break; + } case 0: if (slen > 3) { if (s[1] == 0) { @@ -11603,14 +11564,6 @@ S_swallow_bom(pTHX_ U8 *s) #endif } } -#ifdef EBCDIC - case 0xDD: - if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) { - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); - s += 4; /* UTF-8 */ - } - break; -#endif default: if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { @@ -11855,7 +11808,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) /* Append native character for the rev point */ tmpend = uvchr_to_utf8(tmpbuf, rev); sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); - if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) + if (!NATIVE_IS_INVARIANT(rev)) SvUTF8_on(sv); if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) s = ++pos;