X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/4c32f29ba9da41e214e9e03a44a530ee0427f19a..246699ca5e3af04fcf52cf11c1535421dd986e59:/toke.c diff --git a/toke.c b/toke.c index 90b5ad5..fa2b2bd 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,6 @@ /* toke.c * - * Copyright (c) 1991-2000, Larry Wall + * Copyright (c) 1991-2001, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -13,7 +13,7 @@ /* * This file is the lexer for Perl. It's closely linked to the - * parser, perly.y. + * parser, perly.y. * * The main routine is yylex(), which returns the next token. */ @@ -36,10 +36,14 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); #define XFAKEBRACK 128 #define XENUMMASK 127 -/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/ -#define UTF (PL_hints & HINT_UTF8) +#ifdef EBCDIC +/* For now 'use utf8' does not affect tokenizer on EBCDIC */ +#define UTF (PL_linestr && DO_UTF8(PL_linestr)) +#else +#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) +#endif -/* In variables name $^X, these are the legal values for X. +/* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) @@ -79,13 +83,13 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); # endif YYSTYPE* yylval_pointer[YYMAXLEVEL]; int* yychar_pointer[YYMAXLEVEL]; -int yyactlevel = 0; +int yyactlevel = -1; # undef yylval # undef yychar # define yylval (*yylval_pointer[yyactlevel]) # define yychar (*yychar_pointer[yyactlevel]) # define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] -# undef yylex +# undef yylex # define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) #endif @@ -121,36 +125,47 @@ int yyactlevel = 0; * Aop : addition-level operator * Mop : multiplication-level operator * Eop : equality-testing operator - * Rop : relational operator <= != gt + * Rop : relational operator <= != gt * * Also see LOP and lop() below. */ -#define TOKEN(retval) return (PL_bufptr = s,(int)retval) -#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval) -#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval)) -#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval) -#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) -#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval) -#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval) -#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) -#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) -#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) -#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) -#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) -#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) -#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) -#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) -#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) -#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) -#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) -#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP) -#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP) +/* Note that REPORT() and REPORT2() will be expressions that supply + * their own trailing comma, not suitable for statements as such. */ +#ifdef DEBUGGING /* Serve -DT. */ +# define REPORT(x,retval) tokereport(x,s,(int)retval), +# define REPORT2(x,retval) tokereport(x,s, yylval.ival), +#else +# define REPORT(x,retval) +# define REPORT2(x,retval) +#endif + +#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval) +#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval) +#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)) +#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval) +#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) +#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval) +#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval) +#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) +#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) +#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) +#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) +#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) +#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) +#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) +#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) +#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) +#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) +#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) +#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP) +#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP) /* This bit of chicanery makes a unary function followed by * a parenthesis into a function with one argument, highest precedence. */ #define UNI(f) return(yylval.ival = f, \ + REPORT("uni",f) \ PL_expect = XTERM, \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ @@ -158,6 +173,7 @@ int yyactlevel = 0; (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) #define UNIBRACK(f) return(yylval.ival = f, \ + REPORT("uni",f) \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) @@ -165,6 +181,28 @@ int yyactlevel = 0; /* grandfather return to old style */ #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP) +#ifdef DEBUGGING + +STATIC void +S_tokereport(pTHX_ char *thing, char* s, I32 rv) +{ + DEBUG_T({ + SV* report = newSVpv(thing, 0); + Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop), + (IV)rv); + + if (s - PL_bufptr > 0) + sv_catpvn(report, PL_bufptr, s - PL_bufptr); + else { + if (PL_oldbufptr && *PL_oldbufptr) + sv_catpv(report, PL_tokenbuf); + } + PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report)); + }); +} + +#endif + /* * S_ao * @@ -274,7 +312,6 @@ S_missingterm(pTHX_ char *s) void Perl_deprecate(pTHX_ char *s) { - dTHR; if (ckWARN(WARN_DEPRECATED)) Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s); } @@ -337,7 +374,6 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) void Perl_lex_start(pTHX_ SV *line) { - dTHR; char *s; STRLEN len; @@ -361,6 +397,8 @@ Perl_lex_start(pTHX_ SV *line) SAVEPPTR(PL_bufend); SAVEPPTR(PL_oldbufptr); SAVEPPTR(PL_oldoldbufptr); + SAVEPPTR(PL_last_lop); + SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); SAVEPPTR(PL_lex_brackstack); @@ -403,6 +441,7 @@ Perl_lex_start(pTHX_ SV *line) SvTEMP_off(PL_linestr); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend = PL_bufptr + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; SvREFCNT_dec(PL_rs); PL_rs = newSVpvn("\n", 1); PL_rsfp = 0; @@ -433,7 +472,6 @@ Perl_lex_end(pTHX) STATIC void S_incline(pTHX_ char *s) { - dTHR; char *t; char *n; char *e; @@ -447,9 +485,9 @@ S_incline(pTHX_ char *s) s += 4; else return; - if (*s == ' ' || *s == '\t') + if (SPACE_OR_TAB(*s)) s++; - else + else return; while (SPACE_OR_TAB(*s)) s++; if (!isDIGIT(*s)) @@ -495,7 +533,6 @@ S_incline(pTHX_ char *s) STATIC char * S_skipspace(pTHX_ register char *s) { - dTHR; if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; @@ -504,7 +541,7 @@ S_skipspace(pTHX_ register char *s) for (;;) { STRLEN prevlen; SSize_t oldprevlen, oldoldprevlen; - SSize_t oldloplen, oldunilen; + SSize_t oldloplen = 0, oldunilen = 0; while (s < PL_bufend && isSPACE(*s)) { if (*s++ == '\n' && PL_in_eval && !PL_rsfp) incline(s); @@ -550,6 +587,7 @@ S_skipspace(pTHX_ register char *s) PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; /* Close the filehandle. Could be from -P preprocessor, * STDIN, or a regular file. If we were reading code from @@ -614,7 +652,6 @@ S_check_uni(pTHX) { char *s; char *t; - dTHR; if (PL_oldoldbufptr != PL_last_uni) return; @@ -626,8 +663,8 @@ S_check_uni(pTHX) if (ckWARN_d(WARN_AMBIGUOUS)){ char ch = *s; *s = '\0'; - Perl_warner(aTHX_ WARN_AMBIGUOUS, - "Warning: Use of \"%s\" without parens is ambiguous", + Perl_warner(aTHX_ WARN_AMBIGUOUS, + "Warning: Use of \"%s\" without parens is ambiguous", PL_last_uni); *s = ch; } @@ -680,9 +717,9 @@ S_uni(pTHX_ I32 f, char *s) STATIC I32 S_lop(pTHX_ I32 f, int x, char *s) { - dTHR; yylval.ival = f; CLINE; + REPORT("lop", f) PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; @@ -707,7 +744,7 @@ S_lop(pTHX_ I32 f, int x, char *s) * handles the token correctly. */ -STATIC void +STATIC void S_force_next(pTHX_ I32 type) { PL_nexttype[PL_nexttoke] = type; @@ -740,7 +777,7 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow { register char *s; STRLEN len; - + start = skipspace(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || @@ -782,7 +819,6 @@ S_force_ident(pTHX_ register char *s, int kind) PL_nextval[PL_nexttoke].opval = o; force_next(WORD); if (kind) { - dTHR; /* just for in_eval */ o->op_private = OPpCONST_ENTERED; /* XXX see note in pp_entereval() for why we forgo typo warnings if the symbol must be introduced in an eval. @@ -810,7 +846,7 @@ Perl_str_to_version(pTHX_ SV *sv) STRLEN skip; UV n; if (utf) - n = utf8_to_uv((U8*)start, len, &skip, 0); + n = utf8n_to_uvchr((U8*)start, len, &skip, 0); else { n = *(U8*)start; skip = 1; @@ -822,7 +858,7 @@ Perl_str_to_version(pTHX_ SV *sv) return retval; } -/* +/* * S_force_version * Forces the next token to be a version number. */ @@ -855,7 +891,7 @@ S_force_version(pTHX_ char *s) /* NOTE: The parser sees the package name and the VERSION swapped */ PL_nextval[PL_nexttoke].opval = version; - force_next(WORD); + force_next(WORD); return (s); } @@ -889,8 +925,11 @@ S_tokeq(pTHX_ SV *sv) if (s == send) goto finish; d = s; - if ( PL_hints & HINT_NEW_STRING ) + if ( PL_hints & HINT_NEW_STRING ) { pv = sv_2mortal(newSVpvn(SvPVX(pv), len)); + if (SvUTF8(sv)) + SvUTF8_on(pv); + } while (s < send) { if (*s == '\\') { if (s + 1 < send && (s[1] == '\\')) @@ -963,7 +1002,7 @@ S_sublex_start(pTHX) SvUTF8_on(nsv); SvREFCNT_dec(sv); sv = nsv; - } + } yylval.opval = (OP*)newSVOP(op_type, 0, sv); PL_lex_stuff = Nullsv; return THING; @@ -995,7 +1034,6 @@ S_sublex_start(pTHX) STATIC I32 S_sublex_push(pTHX) { - dTHR; ENTER; PL_lex_state = PL_sublex_info.super_state; @@ -1008,8 +1046,11 @@ S_sublex_push(pTHX) SAVEI32(PL_lex_inwhat); SAVECOPLINE(PL_curcop); SAVEPPTR(PL_bufptr); + SAVEPPTR(PL_bufend); SAVEPPTR(PL_oldbufptr); SAVEPPTR(PL_oldoldbufptr); + SAVEPPTR(PL_last_lop); + SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); SAVEPPTR(PL_lex_brackstack); @@ -1021,6 +1062,7 @@ S_sublex_push(pTHX) PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; SAVEFREESV(PL_linestr); PL_lex_dojoin = FALSE; @@ -1053,8 +1095,11 @@ STATIC I32 S_sublex_done(pTHX) { if (!PL_lex_starts++) { + SV *sv = newSVpvn("",0); + if (SvUTF8(PL_linestr)) + SvUTF8_on(sv); PL_expect = XOPERATOR; - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0)); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); return THING; } @@ -1069,6 +1114,7 @@ S_sublex_done(pTHX) PL_lex_inpat = 0; PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; SAVEFREESV(PL_linestr); PL_lex_dojoin = FALSE; PL_lex_brackets = 0; @@ -1169,7 +1215,7 @@ S_sublex_done(pTHX) } (end switch) } (end if backslash) } (end while character to read) - + */ STATIC char * @@ -1181,21 +1227,22 @@ S_scan_const(pTHX_ char *start) register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ - bool has_utf8 = FALSE; /* embedded \x{} */ + I32 has_utf8 = FALSE; /* Output constant is UTF8 */ + I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */ UV uv; - I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) - ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) - : UTF; - I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) - ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? - OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF)) - : UTF; const char *leaveit = /* set of acceptably-backslashed characters */ PL_lex_inpat ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" : ""; + if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { + /* If we are doing a trans and we know we want UTF8 set expectation */ + has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); + this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); + } + + while (s < send || dorange) { /* get transliterations out of the way (they're most literal) */ if (PL_lex_inwhat == OP_TRANS) { @@ -1205,6 +1252,18 @@ S_scan_const(pTHX_ char *start) I32 min; /* first character in range */ I32 max; /* last character in range */ + if (has_utf8) { + char *c = (char*)utf8_hop((U8*)d, -1); + char *e = d++; + while (e-- > c) + *(e + 1) = *e; + *c = (char)UTF_TO_NATIVE(0xff); + /* mark the range as done, and continue */ + dorange = FALSE; + didrange = TRUE; + continue; + } + i = d - SvPVX(sv); /* remember current offset */ SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ d = SvPVX(sv) + i; /* refresh d after realloc */ @@ -1219,17 +1278,17 @@ S_scan_const(pTHX_ char *start) (char)min, (char)max); } -#ifndef ASCIIish +#ifdef EBCDIC if ((isLOWER(min) && isLOWER(max)) || (isUPPER(min) && isUPPER(max))) { if (isLOWER(min)) { for (i = min; i <= max; i++) if (isLOWER(i)) - *d++ = i; + *d++ = NATIVE_TO_NEED(has_utf8,i); } else { for (i = min; i <= max; i++) if (isUPPER(i)) - *d++ = i; + *d++ = NATIVE_TO_NEED(has_utf8,i); } } else @@ -1241,15 +1300,15 @@ S_scan_const(pTHX_ char *start) dorange = FALSE; didrange = TRUE; continue; - } + } /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { - if (didrange) { + if (didrange) { Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); } - if (utf) { - *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */ + if (has_utf8) { + *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ s++; continue; } @@ -1268,7 +1327,7 @@ S_scan_const(pTHX_ char *start) else if (*s == '(' && PL_lex_inpat && s[1] == '?') { if (s[2] == '#') { while (s < send && *s != ')') - *d++ = *s++; + *d++ = NATIVE_TO_NEED(has_utf8,*s++); } else if (s[2] == '{' /* This should match regcomp.c */ || ((s[2] == 'p' || s[2] == '?') && s[3] == '{')) @@ -1280,9 +1339,9 @@ S_scan_const(pTHX_ char *start) while (count && (c = *regparse)) { if (c == '\\' && regparse[1]) regparse++; - else if (c == '{') + else if (c == '{') count++; - else if (c == '}') + else if (c == '}') count--; regparse++; } @@ -1291,7 +1350,7 @@ S_scan_const(pTHX_ char *start) yyerror("Sequence (?{...}) not terminated or not {}-balanced"); } while (s < regparse) - *d++ = *s++; + *d++ = NATIVE_TO_NEED(has_utf8,*s++); } } @@ -1299,7 +1358,7 @@ S_scan_const(pTHX_ char *start) else if (*s == '#' && PL_lex_inpat && ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) { while (s+1 < send && *s != '\n') - *d++ = *s++; + *d++ = NATIVE_TO_NEED(has_utf8,*s++); } /* check for embedded arrays @@ -1315,31 +1374,11 @@ S_scan_const(pTHX_ char *start) else if (*s == '$') { if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; - if (s + 1 < send && !strchr("()| \n\t", s[1])) + if (s + 1 < send && !strchr("()| \r\n\t", s[1])) break; /* in regexp, $ might be tail anchor */ } - /* (now in tr/// code again) */ - - if (*s & 0x80 && this_utf8) { - STRLEN len; - UV uv; - - uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY); - if (len == (STRLEN)-1) { - /* Illegal UTF8 (a high-bit byte), make it valid. */ - char *old_pvx = SvPVX(sv); - /* need space for one extra char (NOTE: SvCUR() not set here) */ - d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); - d = (char*)uv_to_utf8((U8*)d, (U8)*s++); - } - else { - while (len--) - *d++ = *s++; - } - has_utf8 = TRUE; - continue; - } + /* End of else if chain - OP_TRANS rejoin rest */ /* backslashes */ if (*s == '\\' && s+1 < send) { @@ -1347,8 +1386,8 @@ S_scan_const(pTHX_ char *start) /* some backslashes we leave behind */ if (*leaveit && *s && strchr(leaveit, *s)) { - *d++ = '\\'; - *d++ = *s++; + *d++ = NATIVE_TO_NEED(has_utf8,'\\'); + *d++ = NATIVE_TO_NEED(has_utf8,*s++); continue; } @@ -1356,7 +1395,6 @@ S_scan_const(pTHX_ char *start) if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { - dTHR; /* only for ckWARN */ if (ckWARN(WARN_SYNTAX)) Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s); *--s = '$'; @@ -1381,14 +1419,12 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - dTHR; if (ckWARN(WARN_MISC) && isALNUM(*s)) - Perl_warner(aTHX_ WARN_MISC, + Perl_warner(aTHX_ WARN_MISC, "Unrecognized escape \\%c passed through", *s); /* default action is to copy the quoted character */ - *d++ = *s++; - continue; + goto default_action; } /* \132 indicates an octal constant */ @@ -1406,15 +1442,14 @@ S_scan_const(pTHX_ char *start) ++s; if (*s == '{') { char* e = strchr(s, '}'); + STRLEN len = 1; /* allow underscores */ + if (!e) { yyerror("Missing right brace on \\x{}"); - e = s; - } - else { - STRLEN len = 1; /* allow underscores */ - uv = (UV)scan_hex(s + 1, e - s - 1, &len); - has_utf8 = TRUE; + ++s; + continue; } + uv = (UV)scan_hex(s + 1, e - s - 1, &len); s = e + 1; } else { @@ -1427,52 +1462,66 @@ S_scan_const(pTHX_ char *start) NUM_ESCAPE_INSERT: /* Insert oct or hex escaped character. - * There will always enough room in sv since such escapes will - * be longer than any utf8 sequence they can end up as - */ - if (uv > 127 || has_utf8) { - if (!this_utf8 && !has_utf8 && uv > 255) { - /* might need to recode whatever we have accumulated so far - * if it contains any hibit chars + * There will always enough room in sv since such + * escapes will be longer than any UTF-8 sequence + * they can end up as. */ + + /* We need to map to chars to ASCII before doing the tests + to cover EBCDIC + */ + if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) { + if (!has_utf8 && uv > 255) { + /* Might need to recode whatever we have + * accumulated so far if it contains any + * hibit chars. + * + * (Can't we keep track of that and avoid + * this rescan? --jhi) */ - int hicount = 0; - char *c; - for (c = SvPVX(sv); c < d; c++) { - if (*c & 0x80) + int hicount = 0; + U8 *c; + for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) { + if (!NATIVE_IS_INVARIANT(*c)) { hicount++; + } } if (hicount) { - char *old_pvx = SvPVX(sv); - char *src, *dst; - d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx); - - src = d - 1; - d += hicount; - dst = d - 1; - - while (src < dst) { - if (*src & 0x80) { - dst--; - uv_to_utf8((U8*)dst, (U8)*src--); - dst--; + STRLEN offset = d - SvPVX(sv); + U8 *src, *dst; + d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset; + src = (U8 *)d - 1; + dst = src+hicount; + d += hicount; + while (src >= (U8 *)SvPVX(sv)) { + if (!NATIVE_IS_INVARIANT(*src)) { + U8 ch = NATIVE_TO_ASCII(*src); + *dst-- = UTF8_EIGHT_BIT_LO(ch); + *dst-- = UTF8_EIGHT_BIT_HI(ch); } else { - *dst-- = *src--; + *dst-- = *src; } + src--; } } } if (has_utf8 || uv > 255) { - d = (char*)uv_to_utf8((U8*)d, uv); - this_utf8 = TRUE; + d = (char*)uvchr_to_utf8((U8*)d, uv); + has_utf8 = TRUE; + if (PL_lex_inwhat == OP_TRANS && + PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF + : OPpTRANS_TO_UTF); + } } else { *d++ = (char)uv; } } else { - *d++ = (char)uv; + *d++ = (char) uv; } continue; @@ -1484,15 +1533,17 @@ S_scan_const(pTHX_ char *start) SV *res; STRLEN len; char *str; - + if (!e) { yyerror("Missing right brace on \\N{}"); e = s - 1; goto cont_scan; } res = newSVpvn(s + 1, e - s - 1); - res = new_constant( Nullch, 0, "charnames", + res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); + if (has_utf8) + sv_utf8_upgrade(res); str = SvPV(res,len); if (!has_utf8 && SvUTF8(res)) { char *ostart = SvPVX(sv); @@ -1508,7 +1559,7 @@ S_scan_const(pTHX_ char *start) if (len > e - s + 4) { char *odest = SvPVX(sv); - SvGROW(sv, (SvCUR(sv) + len - (e - s + 4))); + SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); d = SvPVX(sv) + (d - odest); } Copy(str, d, len, char); @@ -1524,66 +1575,79 @@ S_scan_const(pTHX_ char *start) /* \c is a control character */ case 'c': s++; -#ifdef EBCDIC - *d = *s++; - if (isLOWER(*d)) - *d = toUPPER(*d); - *d = toCTRL(*d); - d++; -#else { U8 c = *s++; - *d++ = toCTRL(c); - } +#ifdef EBCDIC + if (isLOWER(c)) + c = toUPPER(c); #endif + *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); + } continue; /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': - *d++ = '\b'; + *d++ = NATIVE_TO_NEED(has_utf8,'\b'); break; case 'n': - *d++ = '\n'; + *d++ = NATIVE_TO_NEED(has_utf8,'\n'); break; case 'r': - *d++ = '\r'; + *d++ = NATIVE_TO_NEED(has_utf8,'\r'); break; case 'f': - *d++ = '\f'; + *d++ = NATIVE_TO_NEED(has_utf8,'\f'); break; case 't': - *d++ = '\t'; - break; -#ifdef EBCDIC - case 'e': - *d++ = '\047'; /* CP 1047 */ - break; - case 'a': - *d++ = '\057'; /* CP 1047 */ + *d++ = NATIVE_TO_NEED(has_utf8,'\t'); break; -#else case 'e': - *d++ = '\033'; + *d++ = ASCII_TO_NEED(has_utf8,'\033'); break; case 'a': - *d++ = '\007'; + *d++ = ASCII_TO_NEED(has_utf8,'\007'); break; -#endif } /* end switch */ s++; continue; } /* end if (backslash) */ - *d++ = *s++; + default_action: + /* If we started with encoded form, or already know we want it + and then encode the next character */ + if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) { + STRLEN len = 1; + UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); + STRLEN need = UNISKIP(NATIVE_TO_UNI(uv)); + s += len; + if (need > len) { + /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */ + STRLEN off = d - SvPVX(sv); + d = SvGROW(sv, SvLEN(sv) + (need-len)) + off; + } + d = (char*)uvchr_to_utf8((U8*)d, uv); + has_utf8 = TRUE; + } + else { + *d++ = NATIVE_TO_NEED(has_utf8,*s++); + } } /* while loop to process each character */ /* terminate the string and set up the sv */ *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); + if (SvCUR(sv) >= SvLEN(sv)) + Perl_croak(aTHX_ "panic: constant overflowed allocated space"); + SvPOK_on(sv); - if (has_utf8) + if (has_utf8) { SvUTF8_on(sv); + if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); + } + } /* shrink the sv if we allocated more than we used */ if (SvCUR(sv) + 5 < SvLEN(sv)) { @@ -1594,9 +1658,9 @@ S_scan_const(pTHX_ char *start) /* return the substring (via yylval) only if we parsed anything */ if (s > PL_bufptr) { if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) - sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"), + sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"), sv, Nullsv, - ( PL_lex_inwhat == OP_TRANS + ( PL_lex_inwhat == OP_TRANS ? "tr" : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) ? "s" @@ -1867,7 +1931,7 @@ S_incl_perldb(pTHX) /* Encoded script support. filter_add() effectively inserts a - * 'pre-processing' function into the current source input stream. + * 'pre-processing' function into the current source input stream. * Note that the filter function only applies to the current source file * (e.g., it will not affect files 'require'd or 'use'd by this one). * @@ -1903,7 +1967,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) av_store(PL_rsfp_filters, 0, datasv) ; return(datasv); } - + /* Delete most recently added instance of this filter function. */ void @@ -1930,8 +1994,8 @@ Perl_filter_del(pTHX_ filter_t funcp) /* Invoke the n'th filter function for the current rsfp. */ I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) - - + + /* 0 = read one text line */ { filter_t funcp; @@ -1944,7 +2008,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) /* Note that we append to the line. This is handy. */ DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_read %d: from rsfp\n", idx)); - if (maxlen) { + if (maxlen) { /* Want a block */ int len ; int old_len = SvCUR(buf_sv) ; @@ -2067,24 +2131,21 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) */ #ifdef USE_PURE_BISON -#ifdef __SC__ -#pragma segment Perl_yylex_r -#endif int Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) { - dTHR; int r; + yyactlevel++; yylval_pointer[yyactlevel] = lvalp; yychar_pointer[yyactlevel] = lcharp; - yyactlevel++; if (yyactlevel >= YYMAXLEVEL) Perl_croak(aTHX_ "panic: YYMAXLEVEL"); r = Perl_yylex(aTHX); - yyactlevel--; + if (yyactlevel > 0) + yyactlevel--; return r; } @@ -2093,21 +2154,16 @@ Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) #ifdef __SC__ #pragma segment Perl_yylex #endif - int -#ifdef USE_PURE_BISON -Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp) -#else Perl_yylex(pTHX) -#endif { - dTHR; register char *s; register char *d; register I32 tmp; STRLEN len; GV *gv = Nullgv; GV **gvp = 0; + bool bof = FALSE; /* check if there's an identifier for us to look at */ if (PL_pending_ident) { @@ -2116,7 +2172,7 @@ Perl_yylex(pTHX) PL_pending_ident = 0; DEBUG_T({ PerlIO_printf(Perl_debug_log, - "### Tokener saw identifier '%s'\n", PL_tokenbuf); }) + "### Tokener saw identifier '%s'\n", PL_tokenbuf); }); /* if we're in a my(), we can't allow dynamics here. $foo'bar has already been turned into $foo::bar, so @@ -2142,7 +2198,7 @@ Perl_yylex(pTHX) } } - /* + /* build the ops for accesses to a my() variable. Deny my($a) or my($b) in a sort block, *if* $a or $b is @@ -2257,7 +2313,7 @@ Perl_yylex(pTHX) } DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr, - (IV)PL_nexttype[PL_nexttoke]); }) + (IV)PL_nexttype[PL_nexttoke]); }); return(PL_nexttype[PL_nexttoke]); @@ -2291,7 +2347,7 @@ Perl_yylex(pTHX) } else { DEBUG_T({ PerlIO_printf(Perl_debug_log, - "### Saw case modifier at '%s'\n", PL_bufptr); }) + "### Saw case modifier at '%s'\n", PL_bufptr); }); s = PL_bufptr + 1; if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */ @@ -2343,7 +2399,7 @@ Perl_yylex(pTHX) if (PL_bufptr == PL_bufend) return sublex_done(); DEBUG_T({ PerlIO_printf(Perl_debug_log, - "### Interpolated variable at '%s'\n", PL_bufptr); }) + "### Interpolated variable at '%s'\n", PL_bufptr); }); PL_expect = XTERM; PL_lex_dojoin = (*PL_bufptr == '@'); PL_lex_state = LEX_INTERPNORMAL; @@ -2443,7 +2499,7 @@ Perl_yylex(pTHX) DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n", exp_name[PL_expect], s); - } ) + } ); retry: switch (*s) { @@ -2460,9 +2516,9 @@ Perl_yylex(pTHX) PL_last_lop = 0; if (PL_lex_brackets) yyerror("Missing right curly or square bracket"); - DEBUG_T( { PerlIO_printf(Perl_debug_log, + DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); - } ) + } ); TOKEN(0); } if (s++ < PL_bufend) @@ -2489,9 +2545,6 @@ Perl_yylex(pTHX) if (PL_minus_l) sv_catpv(PL_linestr,"chomp;"); if (PL_minus_a) { - GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV); - if (gv) - GvIMPORTED_AV_on(gv); if (PL_minus_F) { if (strchr("/'\"", *PL_splitstr) && strchr(PL_splitstr + 1, *PL_splitstr)) @@ -2501,7 +2554,7 @@ Perl_yylex(pTHX) s = "'~#\200\1'"; /* surely one char is unused...*/ while (s[1] && strchr(PL_splitstr, *s)) s++; delim = *s; - Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c", + Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c", "q" + (delim == '\''), delim); for (s = PL_splitstr; *s; s++) { if (*s == '\\') @@ -2512,12 +2565,13 @@ Perl_yylex(pTHX) } } else - sv_catpv(PL_linestr,"@F=split(' ');"); + sv_catpv(PL_linestr,"our @F=split(' ');"); } } sv_catpv(PL_linestr, "\n"); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; if (PERLDB_LINE && PL_curstash != PL_debstash) { SV *sv = NEWSV(85,0); @@ -2528,8 +2582,35 @@ Perl_yylex(pTHX) goto retry; } do { - bool bof = PL_rsfp ? TRUE : FALSE; - if (bof) { + bof = PL_rsfp ? TRUE : FALSE; + if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { + fake_eof: + if (PL_rsfp) { + if (PL_preprocess && !PL_in_eval) + (void)PerlProc_pclose(PL_rsfp); + else if ((PerlIO *)PL_rsfp == PerlIO_stdin()) + PerlIO_clearerr(PL_rsfp); + else + (void)PerlIO_close(PL_rsfp); + PL_rsfp = Nullfp; + PL_doextract = FALSE; + } + if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { + sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); + sv_catpv(PL_linestr,";}"); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; + PL_minus_n = PL_minus_p = 0; + goto retry; + } + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; + sv_setpv(PL_linestr,""); + TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + } + /* if it looks like the start of a BOM, check if it in fact is */ + else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) { #ifdef PERLIO_IS_STDIO # ifdef __GNU_LIBRARY__ # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ @@ -2549,38 +2630,14 @@ Perl_yylex(pTHX) * Workaround? Maybe attach some extra state to PL_rsfp? */ if (!PL_preprocess) - bof = PerlIO_tell(PL_rsfp) == 0; + bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #else - bof = PerlIO_tell(PL_rsfp) == 0; + bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #endif - } - s = filter_gets(PL_linestr, PL_rsfp, 0); - if (s == Nullch) { - fake_eof: - if (PL_rsfp) { - if (PL_preprocess && !PL_in_eval) - (void)PerlProc_pclose(PL_rsfp); - else if ((PerlIO *)PL_rsfp == PerlIO_stdin()) - PerlIO_clearerr(PL_rsfp); - else - (void)PerlIO_close(PL_rsfp); - PL_rsfp = Nullfp; - PL_doextract = FALSE; - } - if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { - sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); - sv_catpv(PL_linestr,";}"); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + if (bof) { PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_minus_n = PL_minus_p = 0; - goto retry; + s = swallow_bom((U8*)s); } - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - sv_setpv(PL_linestr,""); - TOKEN(';'); /* not infinite loop because rsfp is NULL now */ - } else if (bof) { - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - s = swallow_bom((U8*)s); } if (PL_doextract) { if (*s == '#' && s[1] == '!' && instr(s,"perl")) @@ -2591,9 +2648,10 @@ Perl_yylex(pTHX) sv_setpv(PL_linestr, ""); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; PL_doextract = FALSE; } - } + } incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -2605,6 +2663,7 @@ Perl_yylex(pTHX) av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); } PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; if (CopLINE(PL_curcop) == 1) { while (s < PL_bufend && isSPACE(*s)) s++; @@ -2718,7 +2777,7 @@ Perl_yylex(pTHX) else newargv = PL_origargv; newargv[0] = ipath; - PerlProc_execv(ipath, newargv); + PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); Perl_croak(aTHX_ "Can't exec %s", ipath); } #endif @@ -2748,6 +2807,7 @@ Perl_yylex(pTHX) sv_setpv(PL_linestr, ""); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; PL_preambled = FALSE; if (PERLDB_LINE) (void)gv_fetchfile(PL_origfilename); @@ -2766,7 +2826,7 @@ Perl_yylex(pTHX) case '\r': #ifdef PERL_STRICT_CR Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); - Perl_croak(aTHX_ + Perl_croak(aTHX_ "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: @@ -2788,6 +2848,8 @@ Perl_yylex(pTHX) s++; if (s < d) s++; + else if (s > d) /* Found by Ilya: feed random input to Perl. */ + Perl_croak(aTHX_ "panic: input overflow"); incline(s); if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_bufptr = s; @@ -2802,6 +2864,8 @@ Perl_yylex(pTHX) goto retry; case '-': if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) { + I32 ftst = 0; + s++; PL_bufptr = s; tmp = *s++; @@ -2811,48 +2875,65 @@ Perl_yylex(pTHX) if (strnEQ(s,"=>",2)) { s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); - DEBUG_T( { PerlIO_printf(Perl_debug_log, + DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Saw unary minus before =>, forcing word '%s'\n", s); - } ) + } ); OPERATOR('-'); /* unary minus */ } PL_last_uni = PL_oldbufptr; - PL_last_lop_op = OP_FTEREAD; /* good enough */ - DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### Saw file test %c\n", (int)tmp); - } ) switch (tmp) { - case 'r': FTST(OP_FTEREAD); - case 'w': FTST(OP_FTEWRITE); - case 'x': FTST(OP_FTEEXEC); - case 'o': FTST(OP_FTEOWNED); - case 'R': FTST(OP_FTRREAD); - case 'W': FTST(OP_FTRWRITE); - case 'X': FTST(OP_FTREXEC); - case 'O': FTST(OP_FTROWNED); - case 'e': FTST(OP_FTIS); - case 'z': FTST(OP_FTZERO); - case 's': FTST(OP_FTSIZE); - case 'f': FTST(OP_FTFILE); - case 'd': FTST(OP_FTDIR); - case 'l': FTST(OP_FTLINK); - case 'p': FTST(OP_FTPIPE); - case 'S': FTST(OP_FTSOCK); - case 'u': FTST(OP_FTSUID); - case 'g': FTST(OP_FTSGID); - case 'k': FTST(OP_FTSVTX); - case 'b': FTST(OP_FTBLK); - case 'c': FTST(OP_FTCHR); - case 't': FTST(OP_FTTTY); - case 'T': FTST(OP_FTTEXT); - case 'B': FTST(OP_FTBINARY); - case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME); - case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME); - case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME); + case 'r': ftst = OP_FTEREAD; break; + case 'w': ftst = OP_FTEWRITE; break; + case 'x': ftst = OP_FTEEXEC; break; + case 'o': ftst = OP_FTEOWNED; break; + case 'R': ftst = OP_FTRREAD; break; + case 'W': ftst = OP_FTRWRITE; break; + case 'X': ftst = OP_FTREXEC; break; + case 'O': ftst = OP_FTROWNED; break; + case 'e': ftst = OP_FTIS; break; + case 'z': ftst = OP_FTZERO; break; + case 's': ftst = OP_FTSIZE; break; + case 'f': ftst = OP_FTFILE; break; + case 'd': ftst = OP_FTDIR; break; + case 'l': ftst = OP_FTLINK; break; + case 'p': ftst = OP_FTPIPE; break; + case 'S': ftst = OP_FTSOCK; break; + case 'u': ftst = OP_FTSUID; break; + case 'g': ftst = OP_FTSGID; break; + case 'k': ftst = OP_FTSVTX; break; + case 'b': ftst = OP_FTBLK; break; + case 'c': ftst = OP_FTCHR; break; + case 't': ftst = OP_FTTTY; break; + case 'T': ftst = OP_FTTEXT; break; + case 'B': ftst = OP_FTBINARY; break; + case 'M': case 'A': case 'C': + gv_fetchpv("\024",TRUE, SVt_PV); + switch (tmp) { + case 'M': ftst = OP_FTMTIME; break; + case 'A': ftst = OP_FTATIME; break; + case 'C': ftst = OP_FTCTIME; break; + default: break; + } + break; default: - Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp); break; } + if (ftst) { + PL_last_lop_op = ftst; + DEBUG_T( { PerlIO_printf(Perl_debug_log, + "### Saw file test %c\n", (int)ftst); + } ); + FTST(ftst); + } + else { + /* Assume it was a minus followed by a one-letter named + * subroutine call (or a -bareword), then. */ + DEBUG_T( { PerlIO_printf(Perl_debug_log, + "### %c looked like a file test but was not\n", + (int)ftst); + } ); + s -= 2; + } } tmp = *s++; if (*s == tmp) { @@ -2981,10 +3062,6 @@ Perl_yylex(pTHX) if (*d == '(') { d = scan_str(d,TRUE,TRUE); if (!d) { - if (PL_lex_stuff) { - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; - } /* MUST advance bufptr here to avoid bogus "at end of line" context messages from yyerror(). */ @@ -3004,9 +3081,25 @@ Perl_yylex(pTHX) PL_lex_stuff = Nullsv; } else { - attrs = append_elem(OP_LIST, attrs, - newSVOP(OP_CONST, 0, - newSVpvn(s, len))); + if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len)) + CvLVALUE_on(PL_compcv); + else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len)) + CvLOCKED_on(PL_compcv); + else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) + CvMETHOD_on(PL_compcv); +#ifdef USE_ITHREADS + else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len)) + GvUNIQUE_on(cGVOPx_gv(yylval.opval)); +#endif + /* After we've set the flags, it could be argued that + we don't need to do the attributes.pm-based setting + process, and shouldn't bother appending recognized + flags. To experiment with that, uncomment the + following "else": */ + else + attrs = append_elem(OP_LIST, attrs, + newSVOP(OP_CONST, 0, + newSVpvn(s, len))); } s = skipspace(d); if (*s == ':' && s[1] != ':') @@ -3135,8 +3228,16 @@ Perl_yylex(pTHX) else PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; s = skipspace(s); - if (*s == '}') + if (*s == '}') { + if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { + PL_expect = XTERM; + /* This hack is to get the ${} in the message. */ + PL_bufptr = s+1; + yyerror("syntax error"); + break; + } OPERATOR(HASHBRACK); + } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation * determines that, but in cases where we're not in a @@ -3550,8 +3651,8 @@ Perl_yylex(pTHX) case '?': /* may either be conditional or pattern */ if (PL_expect != XOPERATOR) { /* Disable warning on "study /blah/" */ - if (PL_oldoldbufptr == PL_last_uni - && (*PL_last_uni != 's' || s - PL_last_uni < 5 + if (PL_oldoldbufptr == PL_last_uni + && (*PL_last_uni != 's' || s - PL_last_uni < 5 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy_if(PL_last_uni+5,UTF))) check_uni(); @@ -3596,18 +3697,18 @@ Perl_yylex(pTHX) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': s = scan_num(s, &yylval); - DEBUG_T( { PerlIO_printf(Perl_debug_log, + DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Saw number in '%s'\n", s); - } ) + } ); if (PL_expect == XOPERATOR) no_op("Number",s); TERM(THING); case '\'': s = scan_str(s,FALSE,FALSE); - DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### Saw string in '%s'\n", s); - } ) + DEBUG_T( { PerlIO_printf(Perl_debug_log, + "### Saw string before '%s'\n", s); + } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { PL_expect = XTERM; @@ -3624,9 +3725,9 @@ Perl_yylex(pTHX) case '"': s = scan_str(s,FALSE,FALSE); - DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### Saw string in '%s'\n", s); - } ) + DEBUG_T( { PerlIO_printf(Perl_debug_log, + "### Saw string before '%s'\n", s); + } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { PL_expect = XTERM; @@ -3640,7 +3741,7 @@ Perl_yylex(pTHX) missingterm((char*)0); yylval.ival = OP_CONST; for (d = SvPV(PL_lex_stuff, len); len; len--, d++) { - if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) { + if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) { yylval.ival = OP_STRINGIFY; break; } @@ -3649,9 +3750,9 @@ Perl_yylex(pTHX) case '`': s = scan_str(s,FALSE,FALSE); - DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### Saw backtick string in '%s'\n", s); - } ) + DEBUG_T( { PerlIO_printf(Perl_debug_log, + "### Saw backtick string before '%s'\n", s); + } ); if (PL_expect == XOPERATOR) no_op("Backticks",s); if (!s) @@ -3681,7 +3782,7 @@ Perl_yylex(pTHX) TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ - else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) { + else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) { char c = *start; GV *gv; *start = '\0'; @@ -3767,6 +3868,8 @@ Perl_yylex(pTHX) CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); yylval.opval->op_private = OPpCONST_BARE; + if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -3819,6 +3922,7 @@ Perl_yylex(pTHX) default: /* not a keyword */ just_a_word: { SV *sv; + int pkgname = 0; char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); /* Get the rest if it looks like a package qualifier */ @@ -3831,6 +3935,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf, *s == '\'' ? "'" : "::"); len += morelen; + pkgname = 1; } if (PL_expect == XOPERATOR) { @@ -3851,7 +3956,7 @@ Perl_yylex(pTHX) PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV)) - Perl_warner(aTHX_ WARN_BAREWORD, + Perl_warner(aTHX_ WARN_BAREWORD, "Bareword \"%s\" refers to nonexistent package", PL_tokenbuf); len -= 2; @@ -3908,24 +4013,25 @@ Perl_yylex(pTHX) /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ - if ((PL_last_lop_op == OP_SORT || - (!immediate_paren && (!gv || !GvCVu(gv)))) && + if ( !immediate_paren && (PL_last_lop_op == OP_SORT || + ((!gv || !GvCVu(gv)) && (PL_last_lop_op != OP_MAPSTART && - PL_last_lop_op != OP_GREPSTART)) + PL_last_lop_op != OP_GREPSTART)))) { PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR; goto bareword; } } - PL_expect = XOPERATOR; s = skipspace(s); /* Is this a word before a => operator? */ - if (*s == '=' && s[1] == '>') { + if (*s == '=' && s[1] == '>' && !pkgname) { CLINE; sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); + if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -4087,16 +4193,26 @@ Perl_yylex(pTHX) loc = PerlIO_tell(PL_rsfp); (void)PerlIO_seek(PL_rsfp, 0L, 0); } +#ifdef NETWARE + if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { +#else if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { -#if defined(__BORLANDC__) +#endif /* NETWARE */ +#ifdef PERLIO_IS_STDIO /* really? */ +# if defined(__BORLANDC__) /* XXX see note in do_binmode() */ - ((FILE*)PL_rsfp)->flags |= _F_BIN; + ((FILE*)PL_rsfp)->flags &= ~_F_BIN; +# endif #endif if (loc > 0) PerlIO_seek(PL_rsfp, loc, 0); } } #endif +#ifdef PERLIO_LAYERS + if (UTF && !IN_BYTES) + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); +#endif PL_rsfp = Nullfp; } goto fake_eof; @@ -4183,12 +4299,6 @@ Perl_yylex(pTHX) LOP(OP_CRYPT,XTERM); case KEY_chmod: - if (ckWARN(WARN_CHMOD)) { - for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; - if (*d != '0' && isDIGIT(*d)) - Perl_warner(aTHX_ WARN_CHMOD, - "chmod() mode argument is missing initial 0"); - } LOP(OP_CHMOD,XTERM); case KEY_chown: @@ -4247,7 +4357,7 @@ Perl_yylex(pTHX) case KEY_exists: UNI(OP_EXISTS); - + case KEY_exit: UNI(OP_EXIT); @@ -4451,7 +4561,7 @@ Perl_yylex(pTHX) case KEY_last: s = force_word(s,WORD,TRUE,FALSE,FALSE); LOOPX(OP_LAST); - + case KEY_lc: UNI(OP_LC); @@ -4596,7 +4706,7 @@ Perl_yylex(pTHX) case KEY_pos: UNI(OP_POS); - + case KEY_pack: LOP(OP_PACK,XTERM); @@ -4627,6 +4737,7 @@ Perl_yylex(pTHX) int warned = 0; d = SvPV_force(PL_lex_stuff, len); while (len) { + SV *sv; for (; isSPACE(*d) && len; --len, ++d) ; if (len) { char *b = d; @@ -4647,8 +4758,11 @@ Perl_yylex(pTHX) else { for (; !isSPACE(*d) && len; --len, ++d) ; } + sv = newSVpvn(b, d-b); + if (DO_UTF8(PL_lex_stuff)) + SvUTF8_on(sv); words = append_elem(OP_LIST, words, - newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b)))); + newSVOP(OP_CONST, 0, tokeq(sv))); } } if (words) { @@ -4656,9 +4770,10 @@ Perl_yylex(pTHX) force_next(THING); } } - if (PL_lex_stuff) + if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; + PL_lex_stuff = Nullsv; + } PL_expect = XTERM; TOKEN('('); @@ -4758,7 +4873,7 @@ Perl_yylex(pTHX) case KEY_chomp: UNI(OP_CHOMP); - + case KEY_scalar: UNI(OP_SCALAR); @@ -4878,7 +4993,7 @@ Perl_yylex(pTHX) really_sub: { char tmpbuf[sizeof PL_tokenbuf]; - SSize_t tboffset; + SSize_t tboffset = 0; expectation attrful; bool have_name, have_proto; int key = tmp; @@ -4926,12 +5041,8 @@ Perl_yylex(pTHX) char *p; s = scan_str(s,FALSE,FALSE); - if (!s) { - if (PL_lex_stuff) - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; + if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - } /* strip spaces */ d = SvPVX(PL_lex_stuff); tmp = 0; @@ -5045,12 +5156,6 @@ Perl_yylex(pTHX) LOP(OP_UTIME,XTERM); case KEY_umask: - if (ckWARN(WARN_UMASK)) { - for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; - if (*d != '0' && isDIGIT(*d)) - Perl_warner(aTHX_ WARN_UMASK, - "umask: argument is missing initial 0"); - } UNI(OP_UMASK); case KEY_unshift: @@ -5102,7 +5207,7 @@ Perl_yylex(pTHX) { static char ctl_l[2]; - if (ctl_l[0] == '\0') + if (ctl_l[0] == '\0') ctl_l[0] = toCTRL('L'); gv_fetchpv(ctl_l,TRUE, SVt_PV); } @@ -5474,7 +5579,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'p': switch (len) { case 3: - if (strEQ(d,"pop")) return -KEY_pop; + if (strEQ(d,"pop")) return -KEY_pop; if (strEQ(d,"pos")) return KEY_pos; break; case 4: @@ -5526,7 +5631,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"rindex")) return -KEY_rindex; break; case 7: - if (strEQ(d,"require")) return -KEY_require; + if (strEQ(d,"require")) return KEY_require; if (strEQ(d,"reverse")) return -KEY_reverse; if (strEQ(d,"readdir")) return -KEY_readdir; break; @@ -5740,7 +5845,6 @@ S_checkcomma(pTHX_ register char *s, char *name, char *what) char *w; if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ - dTHR; /* only for ckWARN */ if (ckWARN(WARN_SYNTAX)) { int level = 1; for (w = s+2; *w && level; w++) { @@ -5795,14 +5899,14 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, SV **cvp; SV *cv, *typesv; const char *why1, *why2, *why3; - + if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { SV *msg; why2 = strEQ(key,"charnames") ? "(possibly a missing \"use charnames ...\")" : ""; - msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", + msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", (type ? type: "undef"), why2); /* This is convoluted and evil ("goto considered harmful") @@ -5813,7 +5917,7 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, goto msgdone; report: - msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", + msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", (type ? type: "undef"), why1, why2, why3); msgdone: yyerror(SvPVX(msg)); @@ -5835,11 +5939,11 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, typesv = sv_2mortal(newSVpv(type, 0)); else typesv = &PL_sv_undef; - + PUSHSTACKi(PERLSI_OVERLOAD); ENTER ; SAVETMPS; - + PUSHMARK(SP) ; EXTEND(sp, 3); if (pv) @@ -5849,9 +5953,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, PUSHs(typesv); PUTBACK; call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); - + SPAGAIN ; - + /* Check the eval first */ if (!PL_in_eval && SvTRUE(ERRSV)) { STRLEN n_a; @@ -5864,12 +5968,12 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, res = POPs; (void)SvREFCNT_inc(res); } - + PUTBACK ; FREETMPS ; LEAVE ; POPSTACK; - + if (!SvOK(res)) { why1 = "Call to &{$^H{"; why2 = key; @@ -5880,7 +5984,7 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, return res; } - + STATIC char * S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { @@ -5900,9 +6004,9 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag *d++ = *s++; *d++ = *s++; } - else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) { + else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { char *t = s + UTF8SKIP(s); - while (*t & 0x80 && is_utf8_mark((U8*)t)) + while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) t += UTF8SKIP(t); if (d + (t - s) > e) Perl_croak(aTHX_ ident_too_long); @@ -5952,9 +6056,9 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des *d++ = *s++; *d++ = *s++; } - else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) { + else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) { char *t = s + UTF8SKIP(s); - while (*t & 0x80 && is_utf8_mark((U8*)t)) + while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t)) t += UTF8SKIP(t); if (d + (t - s) > e) Perl_croak(aTHX_ ident_too_long); @@ -6007,7 +6111,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des e = s; while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') { e += UTF8SKIP(e); - while (e < send && *e & 0x80 && is_utf8_mark((U8*)e)) + while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e)) e += UTF8SKIP(e); } Copy(s, d, e - s, char); @@ -6023,7 +6127,6 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des *d = '\0'; while (s < send && SPACE_OR_TAB(*s)) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { - dTHR; /* only for ckWARN */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) { const char *brack = *s == '[' ? "[...]" : "{...}"; Perl_warner(aTHX_ WARN_AMBIGUOUS, @@ -6034,8 +6137,8 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); return s; } - } - /* Handle extended ${^Foo} variables + } + /* Handle extended ${^Foo} variables * 1999-02-27 mjd-perl-patch@plover.com */ else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */ && isALNUM(*s)) @@ -6055,7 +6158,6 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des if (funny == '#') funny = '@'; if (PL_lex_state == LEX_NORMAL) { - dTHR; /* only for ckWARN */ if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest) || get_cv(dest, FALSE))) { @@ -6101,12 +6203,8 @@ S_scan_pat(pTHX_ char *start, I32 type) char *s; s = scan_str(start,FALSE,FALSE); - if (!s) { - if (PL_lex_stuff) - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; + if (!s) Perl_croak(aTHX_ "Search pattern not terminated"); - } pm = (PMOP*)newPMOP(type, 0); if (PL_multi_open == '?') @@ -6138,12 +6236,8 @@ S_scan_subst(pTHX_ char *start) s = scan_str(start,FALSE,FALSE); - if (!s) { - if (PL_lex_stuff) - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; + if (!s) Perl_croak(aTHX_ "Substitution pattern not terminated"); - } if (s[-1] == PL_multi_open) s--; @@ -6151,12 +6245,10 @@ S_scan_subst(pTHX_ char *start) first_start = PL_multi_start; s = scan_str(s,FALSE,FALSE); if (!s) { - if (PL_lex_stuff) + if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; - if (PL_lex_repl) - SvREFCNT_dec(PL_lex_repl); - PL_lex_repl = Nullsv; + PL_lex_stuff = Nullsv; + } Perl_croak(aTHX_ "Substitution replacement not terminated"); } PL_multi_start = first_start; /* so whole substitution is taken together */ @@ -6205,35 +6297,24 @@ S_scan_trans(pTHX_ char *start) I32 squash; I32 del; I32 complement; - I32 utf8; - I32 count = 0; yylval.ival = OP_NULL; s = scan_str(start,FALSE,FALSE); - if (!s) { - if (PL_lex_stuff) - SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; + if (!s) Perl_croak(aTHX_ "Transliteration pattern not terminated"); - } if (s[-1] == PL_multi_open) s--; s = scan_str(s,FALSE,FALSE); if (!s) { - if (PL_lex_stuff) + if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); - PL_lex_stuff = Nullsv; - if (PL_lex_repl) - SvREFCNT_dec(PL_lex_repl); - PL_lex_repl = Nullsv; + PL_lex_stuff = Nullsv; + } Perl_croak(aTHX_ "Transliteration replacement not terminated"); } - New(803,tbl,256,short); - o = newPVOP(OP_TRANS, 0, (char*)tbl); - complement = del = squash = 0; while (strchr("cds", *s)) { if (*s == 'c') @@ -6244,7 +6325,12 @@ S_scan_trans(pTHX_ char *start) squash = OPpTRANS_SQUASH; s++; } - o->op_private = del|squash|complement; + + New(803, tbl, complement&&!del?258:256, short); + o = newPVOP(OP_TRANS, 0, (char*)tbl); + o->op_private = del|squash|complement| + (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| + (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); PL_lex_op = o; yylval.ival = OP_TRANS; @@ -6254,7 +6340,6 @@ S_scan_trans(pTHX_ char *start) STATIC char * S_scan_heredoc(pTHX_ register char *s) { - dTHR; SV *herewas; I32 op_type = OP_SCALAR; I32 len; @@ -6387,6 +6472,7 @@ S_scan_heredoc(pTHX_ register char *s) sv_setsv(PL_linestr,herewas); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; } else sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */ @@ -6398,6 +6484,7 @@ S_scan_heredoc(pTHX_ register char *s) } CopLINE_inc(PL_curcop); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; #ifndef PERL_STRICT_CR if (PL_bufend - PL_linestart >= 2) { if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') || @@ -6439,6 +6526,8 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); + if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -6589,24 +6678,23 @@ S_scan_inputsymbol(pTHX_ char *start) calls scan_str(). s/// makes yylex() call scan_subst() which calls scan_str(). tr/// and y/// make yylex() call scan_trans() which calls scan_str(). - + It skips whitespace before the string starts, and treats the first character as the delimiter. If the delimiter is one of ([{< then the corresponding "close" character )]}> is used as the closing delimiter. It allows quoting of delimiters, and if the string has balanced delimiters ([{<>}]) it allows nesting. - The lexer always reads these strings into lex_stuff, except in the - case of the operators which take *two* arguments (s/// and tr///) - when it checks to see if lex_stuff is full (presumably with the 1st - arg to s or tr) and if so puts the string into lex_repl. - + On success, the SV with the resulting string is put into lex_stuff or, + if that is already non-NULL, into lex_repl. The second case occurs only + when parsing the RHS of the special constructs s/// and tr/// (y///). + For convenience, the terminating delimiter character is stuffed into + SvIVX of the SV. */ STATIC char * S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) { - dTHR; SV *sv; /* scalar value: string */ char *tmps; /* temp string, used for delimiter matching */ register char *s = start; /* current position in the buffer */ @@ -6624,7 +6712,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if ((term & 0x80) && UTF) + if (!UTF8_IS_INVARIANT((U8)term) && UTF) has_utf8 = TRUE; /* mark where we are */ @@ -6671,7 +6759,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) have found the terminator */ else if (*s == term) break; - else if (!has_utf8 && (*s & 0x80) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; } @@ -6700,7 +6788,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) break; else if (*s == PL_multi_open) brackets++; - else if (!has_utf8 && (*s & 0x80) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; } @@ -6755,8 +6843,9 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* having changed the buffer, we must update PL_bufend */ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; } - + /* at this point, we have successfully read the delimited string */ if (keep_delims) @@ -6775,7 +6864,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* decide whether this is the first or second quoted string we've read for this op */ - + if (PL_lex_stuff) PL_lex_repl = sv; else @@ -6791,11 +6880,11 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) Read a number in any of the formats that Perl accepts: - 0(x[0-7A-F]+)|([0-7]+)|(b[01]) - [\d_]+(\.[\d_]*)?[Ee](\d+) - - Underbars (_) are allowed in decimal numbers. If -w is on, - underbars before a decimal point must be at three digit intervals. + \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. + \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 + 0b[01](_?[01])* + 0[0-7](_?[0-7])* + 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the thing it reads. @@ -6804,7 +6893,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) try converting the number to an integer and see if it can do so without loss of precision. */ - + char * Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) { @@ -6813,7 +6902,8 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) register char *e; /* end of temp buffer */ NV nv; /* number read, as a double */ SV *sv = Nullsv; /* place to put the converted number */ - bool floatit; /* boolean: int or float? */ + bool floatit, /* boolean: int or float? */ + octal = 0; /* Is this an octal number? */ char *lastub = 0; /* position of last underbar */ static char number_too_long[] = "Number too long"; @@ -6822,7 +6912,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) switch (*s) { default: Perl_croak(aTHX_ "panic: scan_num"); - + /* if it starts with a 0, it could be an octal number, a decimal in 0.13 disguise, or a hexadecimal number, or a binary number. */ case '0': @@ -6837,7 +6927,6 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) we in octal/hex/binary?" indicator to disallow hex characters when in octal mode. */ - dTHR; NV n = 0.0; UV u = 0; I32 shift; @@ -6866,8 +6955,18 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') goto decimal; /* so it must be octal */ - else + else { shift = 3; + octal = 1; + s++; + } + + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; + } base = bases[shift]; Base = Bases[shift]; @@ -6885,9 +6984,12 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) default: goto out; - /* _ are ignored */ + /* _ are ignored -- but warned about if consecutive */ case '_': - s++; + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; break; /* 8 and 9 are not octal */ @@ -6925,7 +7027,6 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if ((x >> shift) != u && !(PL_hints & HINT_NEW_BINARY)) { - dTHR; overflowed = TRUE; n = (NV) u; if (ckWARN_d(WARN_OVERFLOW)) @@ -6955,9 +7056,15 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) the number. */ out: + + /* final misplaced underbar check */ + if (s[-1] == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); + } + sv = NEWSV(92,0); if (overflowed) { - dTHR; if (ckWARN(WARN_PORTABLE) && n > 4294967295.0) Perl_warner(aTHX_ WARN_PORTABLE, "%s number > %s non-portable", @@ -6966,7 +7073,6 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) } else { #if UVSIZE > 4 - dTHR; if (ckWARN(WARN_PORTABLE) && u > 0xffffffff) Perl_warner(aTHX_ WARN_PORTABLE, "%s number > %s non-portable", @@ -6992,14 +7098,14 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* read next group of digits and _ and copy into d */ while (isDIGIT(*s) || *s == '_') { - /* skip underscores, checking for misplaced ones + /* skip underscores, checking for misplaced ones if -w is on */ if (*s == '_') { - dTHR; /* only for ckWARN */ - if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3) - Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); - lastub = ++s; + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; } else { /* check for end of fixed-length buffer */ @@ -7011,8 +7117,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) } /* final misplaced underbar check */ - if (lastub && s - lastub != 3) { - dTHR; + if (lastub && s == lastub + 1) { if (ckWARN(WARN_SYNTAX)) Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); } @@ -7025,16 +7130,34 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) floatit = TRUE; *d++ = *s++; - /* copy, ignoring underbars, until we run out of - digits. Note: no misplaced underbar checks! + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s; + } + + /* copy, ignoring underbars, until we run out of digits. */ for (; isDIGIT(*s) || *s == '_'; s++) { /* fixed length buffer check */ if (d >= e) Perl_croak(aTHX_ number_too_long); - if (*s != '_') + if (*s == '_') { + if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s; + } + else *d++ = *s; } + /* fractional part ending in underbar? */ + if (s[-1] == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ s = start - 1; @@ -7043,113 +7166,87 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) } /* read exponent part, if present */ - if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) { + if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) { floatit = TRUE; s++; /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ + /* stray preinitial _ */ + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; + } + /* allow positive or negative exponent */ if (*s == '+' || *s == '-') *d++ = *s++; - /* read digits of exponent (no underbars :-) */ - while (isDIGIT(*s)) { - if (d >= e) - Perl_croak(aTHX_ number_too_long); - *d++ = *s++; + /* stray initial _ */ + if (*s == '_') { + if (ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; + } + + /* read digits of exponent */ + while (isDIGIT(*s) || *s == '_') { + if (isDIGIT(*s)) { + if (d >= e) + Perl_croak(aTHX_ number_too_long); + *d++ = *s++; + } + else { + if (ckWARN(WARN_SYNTAX) && + ((lastub && s == lastub + 1) || + (!isDIGIT(s[1]) && s[1] != '_'))) + Perl_warner(aTHX_ WARN_SYNTAX, + "Misplaced _ in number"); + lastub = s++; + } } } - /* terminate the string */ - *d = '\0'; /* make an sv from the string */ sv = NEWSV(92,0); -#if defined(Strtol) && defined(Strtoul) - /* - strtol/strtoll sets errno to ERANGE if the number is too big - for an integer. We try to do an integer conversion first - if no characters indicating "float" have been found. + We try to do an integer conversion first if no characters + indicating "float" have been found. */ if (!floatit) { - IV iv; UV uv; - errno = 0; - if (*PL_tokenbuf == '-') - iv = Strtol(PL_tokenbuf, (char**)NULL, 10); - else - uv = Strtoul(PL_tokenbuf, (char**)NULL, 10); - if (errno) - floatit = TRUE; /* Probably just too large. */ - else if (*PL_tokenbuf == '-') - sv_setiv(sv, iv); - else if (uv <= IV_MAX) + int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); + + if (flags == IS_NUMBER_IN_UV) { + if (uv <= IV_MAX) sv_setiv(sv, uv); /* Prefer IVs over UVs. */ - else + else sv_setuv(sv, uv); - } + } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { + if (uv <= (UV) IV_MIN) + sv_setiv(sv, -(IV)uv); + else + floatit = TRUE; + } else + floatit = TRUE; + } if (floatit) { + /* terminate the string */ + *d = '\0'; nv = Atof(PL_tokenbuf); sv_setnv(sv, nv); } -#else - /* - No working strtou?ll?. - - Unfortunately atol() doesn't do range checks (returning - LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows) - everywhere [1], so we cannot use use atol() (or atoll()). - If we could, they would be used, as Atol(), very much like - Strtol() and Strtoul() are used above. - - [1] XXX Configure test needed to check for atol() - (and atoll()) overflow behaviour XXX - - --jhi - - We need to do this the hard way. */ - - nv = Atof(PL_tokenbuf); - - /* See if we can make do with an integer value without loss of - precision. We use U_V to cast to a UV, because some - compilers have issues. Then we try casting it back and see - if it was the same [1]. We only do this if we know we - specifically read an integer. If floatit is true, then we - don't need to do the conversion at all. - - [1] Note that this is lossy if our NVs cannot preserve our - UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean) - and NV_PRESERVES_UV_BITS (a number), but in general we really - do hope all such potentially lossy platforms have strtou?ll? - to do a lossless IV/UV conversion. - Maybe could do some tricks with DBL_DIG, LDBL_DIG and - DBL_MANT_DIG and LDBL_MANT_DIG (these are already available - as NV_DIG and NV_MANT_DIG)? - - --jhi - */ - { - UV uv = U_V(nv); - if (!floatit && (NV)uv == nv) { - if (uv <= IV_MAX) - sv_setiv(sv, uv); /* Prefer IVs over UVs. */ - else - sv_setuv(sv, uv); - } - else - sv_setnv(sv, nv); - } -#endif if ( floatit ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) - sv = new_constant(PL_tokenbuf, d - PL_tokenbuf, + sv = new_constant(PL_tokenbuf, d - PL_tokenbuf, (floatit ? "float" : "integer"), sv, Nullsv, NULL); break; @@ -7164,9 +7261,8 @@ vstring: pos++; if (!isALPHA(*pos)) { UV rev; - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tmpend; - bool utf8 = FALSE; s++; /* get past 'v' */ sv = NEWSV(92,5); @@ -7192,9 +7288,11 @@ vstring: "Integer overflow in decimal number"); } } - tmpend = uv_to_utf8(tmpbuf, rev); - utf8 = utf8 || rev > 127; + /* Append native character for the rev point */ + tmpend = uvchr_to_utf8(tmpbuf, rev); sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); + if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) + SvUTF8_on(sv); if (*pos == '.' && isDIGIT(pos[1])) s = ++pos; else { @@ -7204,13 +7302,8 @@ vstring: while (isDIGIT(*pos) || *pos == '_') pos++; } - SvPOK_on(sv); SvREADONLY_on(sv); - if (utf8) { - SvUTF8_on(sv); - sv_utf8_downgrade(sv, TRUE); - } } } break; @@ -7218,8 +7311,11 @@ vstring: /* make the op for the constant and return */ - if (sv) + if (sv) { lvalp->opval = newSVOP(OP_CONST, 0, sv); + if (octal) + ((SVOP *)lvalp->opval)->op_private |= OPpCONST_OCTAL; + } else lvalp->opval = Nullop; @@ -7229,7 +7325,6 @@ vstring: STATIC char * S_scan_formline(pTHX_ register char *s) { - dTHR; register char *eol; register char *t; SV *stuff = newSVpvn("",0); @@ -7277,6 +7372,7 @@ S_scan_formline(pTHX_ register char *s) s = filter_gets(PL_linestr, PL_rsfp, 0); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend = PL_bufptr + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = Nullch; if (!s) { s = PL_bufptr; yyerror("Format not terminated"); @@ -7320,7 +7416,6 @@ S_set_csh(pTHX) I32 Perl_start_subparse(pTHX_ I32 is_format, U32 flags) { - dTHR; I32 oldsavestack_ix = PL_savestack_ix; CV* outsidecv = PL_compcv; AV* comppadlist; @@ -7342,6 +7437,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) PL_compcv = (CV*)NEWSV(1104,0); sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV); CvFLAGS(PL_compcv) |= flags; + CvDEFSTASH(PL_compcv) = PL_defstash; PL_comppad = newAV(); av_push(PL_comppad, Nullsv); @@ -7373,10 +7469,12 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) return oldsavestack_ix; } +#ifdef __SC__ +#pragma segment Perl_yylex +#endif int Perl_yywarn(pTHX_ char *s) { - dTHR; PL_in_eval |= EVAL_WARNONLY; yyerror(s); PL_in_eval &= ~EVAL_WARNONLY; @@ -7386,7 +7484,6 @@ Perl_yywarn(pTHX_ char *s) int Perl_yyerror(pTHX_ char *s) { - dTHR; char *where = NULL; char *context = NULL; int contlen = -1; @@ -7463,6 +7560,9 @@ Perl_yyerror(pTHX_ char *s) PL_in_my_stash = Nullhv; return 0; } +#ifdef __SC__ +#pragma segment Main +#endif STATIC char* S_swallow_bom(pTHX_ U8 *s) @@ -7470,8 +7570,8 @@ S_swallow_bom(pTHX_ U8 *s) STRLEN slen; slen = SvCUR(PL_linestr); switch (*s) { - case 0xFF: - if (s[1] == 0xFE) { + case 0xFF: + if (s[1] == 0xFE) { /* UTF-16 little-endian */ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ Perl_croak(aTHX_ "Unsupported script encoding"); @@ -7573,7 +7673,7 @@ utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen) if (!*SvPV_nolen(sv)) /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ return count; - + tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); sv_usepvn(sv, (char*)tmps, tend - tmps); }