X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/137443ea0a858c43f5a720730cac6209a7d41948..4ebdfc9900a36429be5fe85719cf00fc3ef0ebf9:/toke.c diff --git a/toke.c b/toke.c index d96d9ad..24bf27d 100644 --- a/toke.c +++ b/toke.c @@ -14,21 +14,24 @@ #include "EXTERN.h" #include "perl.h" +#ifndef PERL_OBJECT static void check_uni _((void)); static void force_next _((I32 type)); static char *force_version _((char *start)); static char *force_word _((char *start, int token, int check_keyword, int allow_pack, int allow_tick)); -static SV *q _((SV *sv)); +static SV *tokeq _((SV *sv)); static char *scan_const _((char *start)); static char *scan_formline _((char *s)); static char *scan_heredoc _((char *s)); -static char *scan_ident _((char *s, char *send, char *dest, I32 ck_uni)); +static char *scan_ident _((char *s, char *send, char *dest, STRLEN destlen, + I32 ck_uni)); static char *scan_inputsymbol _((char *start)); static char *scan_pat _((char *start)); static char *scan_str _((char *start)); static char *scan_subst _((char *start)); static char *scan_trans _((char *start)); -static char *scan_word _((char *s, char *dest, int allow_package, STRLEN *slp)); +static char *scan_word _((char *s, char *dest, STRLEN destlen, + int allow_package, STRLEN *slp)); static char *skipspace _((char *s)); static void checkcomma _((char *s, char *name, char *what)); static void force_ident _((char *s, int kind)); @@ -47,21 +50,19 @@ static int uni _((I32 f, char *s)); #endif static char * filter_gets _((SV *sv, PerlIO *fp, STRLEN append)); static void restore_rsfp _((void *f)); +static SV *new_constant _((char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type)); +static void restore_expect _((void *e)); +static void restore_lex_expect _((void *e)); +#endif /* PERL_OBJECT */ -static char *linestart; /* beg. of most recently read line */ - -static char pending_ident; /* pending identifier lookup */ - -static struct { - I32 super_state; /* lexer state to save */ - I32 sub_inwhat; /* "lex_inwhat" to use */ - OP *sub_op; /* "lex_op" to use */ -} sublex_info; +static char ident_too_long[] = "Identifier too long"; /* The following are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). */ +/* #define LEX_NOTPARSING 11 is done in perl.h. */ + #define LEX_NORMAL 10 #define LEX_INTERPNORMAL 9 #define LEX_INTERPCASEMOD 8 @@ -137,9 +138,8 @@ static struct { /* grandfather return to old style */ #define OLDLOP(f) return(yylval.ival=f,expect = XTERM,bufptr = s,(int)LSTOP) -static int -ao(toketype) -int toketype; +STATIC int +ao(int toketype) { if (*bufptr == '=') { bufptr++; @@ -152,20 +152,14 @@ int toketype; return toketype; } -static void -no_op(what, s) -char *what; -char *s; +STATIC void +no_op(char *what, char *s) { char *oldbp = bufptr; bool is_first = (oldbufptr == linestart); - char *msg; bufptr = s; - New(890, msg, strlen(what) + 40, char); - sprintf(msg, "%s found where operator expected", what); - yywarn(msg); - Safefree(msg); + yywarn(form("%s found where operator expected", what)); if (is_first) warn("\t(Missing semicolon on previous line?)\n"); else if (oldoldbufptr && isIDFIRST(*oldoldbufptr)) { @@ -181,9 +175,8 @@ char *s; bufptr = oldbp; } -static void -missingterm(s) -char *s; +STATIC void +missingterm(char *s) { char tmpbuf[3]; char q; @@ -209,23 +202,35 @@ char *s; } void -deprecate(s) -char *s; +deprecate(char *s) { if (dowarn) warn("Use of %s is deprecated", s); } -static void -depcom() +STATIC void +depcom(void) { deprecate("comma-less variable list"); } +#ifdef WIN32 + +STATIC I32 +win32_textfilter(int idx, SV *sv, int maxlen) +{ + I32 count = FILTER_READ(idx+1, sv, maxlen); + if (count > 0 && !maxlen) + win32_strip_return(sv); + return count; +} +#endif + + void -lex_start(line) -SV *line; +lex_start(SV *line) { + dTHR; char *s; STRLEN len; @@ -247,6 +252,11 @@ SV *line; SAVEPPTR(lex_brackstack); SAVEPPTR(lex_casestack); SAVEDESTRUCTOR(restore_rsfp, rsfp); + SAVESPTR(lex_stuff); + SAVEI32(lex_defer); + SAVESPTR(lex_repl); + SAVEDESTRUCTOR(restore_expect, tokenbuf + expect); /* encode as pointer */ + SAVEDESTRUCTOR(restore_lex_expect, tokenbuf + expect); lex_state = LEX_NORMAL; lex_defer = 0; @@ -261,11 +271,7 @@ SV *line; *lex_casestack = '\0'; lex_dojoin = 0; lex_starts = 0; - if (lex_stuff) - SvREFCNT_dec(lex_stuff); lex_stuff = Nullsv; - if (lex_repl) - SvREFCNT_dec(lex_repl); lex_repl = Nullsv; lex_inpat = 0; lex_inwhat = 0; @@ -287,14 +293,13 @@ SV *line; } void -lex_end() +lex_end(void) { doextract = FALSE; } -static void -restore_rsfp(f) -void *f; +STATIC void +restore_rsfp(void *f) { PerlIO *fp = (PerlIO*)f; @@ -305,10 +310,24 @@ void *f; rsfp = fp; } -static void -incline(s) -char *s; +STATIC void +restore_expect(void *e) +{ + /* a safe way to store a small integer in a pointer */ + expect = (expectation)((char *)e - tokenbuf); +} + +STATIC void +restore_lex_expect(void *e) +{ + /* a safe way to store a small integer in a pointer */ + lex_expect = (expectation)((char *)e - tokenbuf); +} + +STATIC void +incline(char *s) { + dTHR; char *t; char *n; char ch; @@ -346,10 +365,10 @@ char *s; curcop->cop_line = atoi(n)-1; } -static char * -skipspace(s) -register char *s; +STATIC char * +skipspace(register char *s) { + dTHR; if (lex_formbrack && lex_brackets <= lex_formbrack) { while (s < bufend && (*s == ' ' || *s == '\t')) s++; @@ -369,7 +388,9 @@ register char *s; return s; if ((s = filter_gets(linestr, rsfp, (prevlen = SvCUR(linestr)))) == Nullch) { if (minus_n || minus_p) { - sv_setpv(linestr,minus_p ? ";}continue{print" : ""); + sv_setpv(linestr,minus_p ? + ";}continue{print or die qq(-p destination: $!\\n)" : + ""); sv_catpv(linestr,";}"); minus_n = minus_p = 0; } @@ -378,7 +399,7 @@ register char *s; oldoldbufptr = oldbufptr = bufptr = s = linestart = SvPVX(linestr); bufend = SvPVX(linestr) + SvCUR(linestr); if (preprocess && !in_eval) - (void)my_pclose(rsfp); + (void)PerlProc_pclose(rsfp); else if ((PerlIO*)rsfp == PerlIO_stdin()) PerlIO_clearerr(rsfp); else @@ -390,7 +411,7 @@ register char *s; bufend = s + SvCUR(linestr); s = bufptr; incline(s); - if (perldb && curstash != debstash) { + if (PERLDB_LINE && curstash != debstash) { SV *sv = NEWSV(85,0); sv_upgrade(sv, SVt_PVMG); @@ -400,8 +421,8 @@ register char *s; } } -static void -check_uni() { +STATIC void +check_uni(void) { char *s; char ch; char *t; @@ -424,10 +445,8 @@ check_uni() { #undef UNI #define UNI(f) return uni(f,s) -static int -uni(f,s) -I32 f; -char *s; +STATIC int +uni(I32 f, char *s) { yylval.ival = f; expect = XTERM; @@ -447,17 +466,10 @@ char *s; #define LOP(f,x) return lop(f,x,s) -static I32 -lop -#ifdef CAN_PROTOTYPE - (I32 f, expectation x, char *s) -#else - (f,x,s) -I32 f; -expectation x; -char *s; -#endif /* CAN_PROTOTYPE */ +STATIC I32 +lop(I32 f, expectation x, char *s) { + dTHR; yylval.ival = f; CLINE; expect = x; @@ -475,9 +487,8 @@ char *s; return LSTOP; } -static void -force_next(type) -I32 type; +STATIC void +force_next(I32 type) { nexttype[nexttoke] = type; nexttoke++; @@ -488,13 +499,8 @@ I32 type; } } -static char * -force_word(start,token,check_keyword,allow_pack,allow_tick) -register char *start; -int token; -int check_keyword; -int allow_pack; -int allow_tick; +STATIC char * +force_word(register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) { register char *s; STRLEN len; @@ -503,9 +509,9 @@ int allow_tick; s = start; if (isIDFIRST(*s) || (allow_pack && *s == ':') || - (allow_tick && *s == '\'') ) + (allow_initial_tick && *s == '\'') ) { - s = scan_word(s, tokenbuf, allow_pack, &len); + s = scan_word(s, tokenbuf, sizeof tokenbuf, allow_pack, &len); if (check_keyword && keyword(tokenbuf, len)) return start; if (token == METHOD) { @@ -525,21 +531,20 @@ int allow_tick; return s; } -static void -force_ident(s, kind) -register char *s; -int kind; +STATIC void +force_ident(register char *s, int kind) { if (s && *s) { - OP* op = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0)); - nextval[nexttoke].opval = op; + OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0)); + nextval[nexttoke].opval = o; force_next(WORD); if (kind) { - op->op_private = OPpCONST_ENTERED; + dTHR; /* just for in_eval */ + o->op_private = OPpCONST_ENTERED; /* XXX see note in pp_entereval() for why we forgo typo warnings if the symbol must be introduced in an eval. GSAR 96-10-12 */ - gv_fetchpv(s, in_eval ? GV_ADDMULTI : TRUE, + gv_fetchpv(s, in_eval ? (GV_ADDMULTI | 8) : TRUE, kind == '$' ? SVt_PV : kind == '@' ? SVt_PVAV : kind == '%' ? SVt_PVHV : @@ -549,9 +554,8 @@ int kind; } } -static char * -force_version(s) -char *s; +STATIC char * +force_version(char *s) { OP *version = Nullop; @@ -577,27 +581,29 @@ char *s; return (s); } -static SV * -q(sv) -SV *sv; +STATIC SV * +tokeq(SV *sv) { register char *s; register char *send; register char *d; - STRLEN len; + STRLEN len = 0; + SV *pv = sv; if (!SvLEN(sv)) - return sv; + goto finish; s = SvPV_force(sv, len); if (SvIVX(sv) == -1) - return sv; + goto finish; send = s + len; while (s < send && *s != '\\') s++; if (s == send) - return sv; + goto finish; d = s; + if ( hints & HINT_NEW_STRING ) + pv = sv_2mortal(newSVpv(SvPVX(pv), len)); while (s < send) { if (*s == '\\') { if (s + 1 < send && (s[1] == '\\')) @@ -607,12 +613,14 @@ SV *sv; } *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); - + finish: + if ( hints & HINT_NEW_STRING ) + return new_constant(NULL, 0, "q", sv, pv, "q"); return sv; } -static I32 -sublex_start() +STATIC I32 +sublex_start(void) { register I32 op_type = yylval.ival; @@ -622,11 +630,20 @@ sublex_start() return THING; } if (op_type == OP_CONST || op_type == OP_READLINE) { - SV *sv = q(lex_stuff); - STRLEN len; - char *p = SvPV(sv, len); - yylval.opval = (OP*)newSVOP(op_type, 0, newSVpv(p, len)); - SvREFCNT_dec(sv); + SV *sv = tokeq(lex_stuff); + + if (SvTYPE(sv) == SVt_PVIV) { + /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ + STRLEN len; + char *p; + SV *nsv; + + p = SvPV(sv, len); + nsv = newSVpv(p, len); + SvREFCNT_dec(sv); + sv = nsv; + } + yylval.opval = (OP*)newSVOP(op_type, 0, sv); lex_stuff = Nullsv; return THING; } @@ -646,10 +663,11 @@ sublex_start() return FUNC; } -static I32 -sublex_push() +STATIC I32 +sublex_push(void) { - push_scope(); + dTHR; + ENTER; lex_state = sublex_info.super_state; SAVEI32(lex_dojoin); @@ -698,8 +716,8 @@ sublex_push() return '('; } -static I32 -sublex_done() +STATIC I32 +sublex_done(void) { if (!lex_starts++) { expect = XOPERATOR; @@ -735,7 +753,7 @@ sublex_done() return ','; } else { - pop_scope(); + LEAVE; bufend = SvPVX(linestr); bufend += SvCUR(linestr); expect = XOPERATOR; @@ -743,67 +761,188 @@ sublex_done() } } -static char * -scan_const(start) -char *start; +/* + scan_const + + Extracts a pattern, double-quoted string, or transliteration. This + is terrifying code. + + It looks at lex_inwhat and lex_inpat to find out whether it's + processing a pattern (lex_inpat is true), a transliteration + (lex_inwhat & OP_TRANS is true), or a double-quoted string. + + Returns a pointer to the character scanned up to. Iff this is + advanced from the start pointer supplied (ie if anything was + successfully parsed), will leave an OP for the substring scanned + in yylval. Caller must intuit reason for not parsing further + by looking at the next characters herself. + + In patterns: + backslashes: + double-quoted style: \r and \n + regexp special ones: \D \s + constants: \x3 + backrefs: \1 (deprecated in substitution replacements) + case and quoting: \U \Q \E + stops on @ and $, but not for $ as tail anchor + + In transliterations: + characters are VERY literal, except for - not at the start or end + of the string, which indicates a range. scan_const expands the + range to the full set of intermediate characters. + + In double-quoted strings: + backslashes: + double-quoted style: \r and \n + constants: \x3 + backrefs: \1 (deprecated) + case and quoting: \U \Q \E + stops on @ and $ + + scan_const does *not* construct ops to handle interpolated strings. + It stops processing as soon as it finds an embedded $ or @ variable + and leaves it to the caller to work out what's going on. + + @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo. + + $ in pattern could be $foo or could be tail anchor. Assumption: + it's a tail anchor if $ is the last thing in the string, or if it's + followed by one of ")| \n\t" + + \1 (backreferences) are turned into $1 + + The structure of the code is + while (there's a character to process) { + handle transliteration ranges + skip regexp comments + skip # initiated comments in //x patterns + check for embedded @foo + check for embedded scalars + if (backslash) { + leave intact backslashes from leave (below) + deprecate \1 in strings and sub replacements + handle string-changing backslashes \l \U \Q \E, etc. + switch (what was escaped) { + handle - in a transliteration (becomes a literal -) + handle \132 octal characters + handle 0x15 hex characters + handle \cV (control V) + handle printf backslashes (\f, \r, \n, etc) + } (end switch) + } (end if backslash) + } (end while character to read) + +*/ + +STATIC char * +scan_const(char *start) { - register char *send = bufend; - SV *sv = NEWSV(93, send - start); - register char *s = start; - register char *d = SvPVX(sv); - bool dorange = FALSE; - I32 len; - char *leave = + register char *send = bufend; /* end of the constant */ + SV *sv = NEWSV(93, send - start); /* sv for the constant */ + register char *s = start; /* start of the constant */ + register char *d = SvPVX(sv); /* destination for copies */ + bool dorange = FALSE; /* are we in a translit range? */ + I32 len; /* ? */ + + /* leaveit is the set of acceptably-backslashed characters */ + char *leaveit = lex_inpat - ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxc0123456789[{]} \t\n\r\f\v#" - : (lex_inwhat & OP_TRANS) - ? "" - : ""; + ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" + : ""; while (s < send || dorange) { + /* get transliterations out of the way (they're most literal) */ if (lex_inwhat == OP_TRANS) { + /* expand a range A-Z to the full set of characters. AIE! */ if (dorange) { - I32 i; - I32 max; - i = d - SvPVX(sv); - SvGROW(sv, SvLEN(sv) + 256); - d = SvPVX(sv) + i; - d -= 2; - max = (U8)d[1]; + I32 i; /* current expanded character */ + I32 max; /* last character in range */ + + i = d - SvPVX(sv); /* remember current offset */ + SvGROW(sv, SvLEN(sv) + 256); /* expand the sv -- there'll never be more'n 256 chars in a range for it to grow by */ + d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */ + d -= 2; /* eat the first char and the - */ + + max = (U8)d[1]; /* last char in range */ + for (i = (U8)*d; i <= max; i++) *d++ = i; + + /* mark the range as done, and continue */ dorange = FALSE; continue; } + + /* range begins (ignore - as first or last char) */ else if (*s == '-' && s+1 < send && s != start) { dorange = TRUE; s++; } } - else if (*s == '(' && lex_inpat && s[1] == '?' && s[2] == '#') { - while (s < send && *s != ')') - *d++ = *s++; + + /* if we get here, we're not doing a transliteration */ + + /* skip for regexp comments /(?#comment)/ */ + else if (*s == '(' && lex_inpat && s[1] == '?') { + if (s[2] == '#') { + while (s < send && *s != ')') + *d++ = *s++; + } else if (s[2] == '{') { /* This should march regcomp.c */ + I32 count = 1; + char *regparse = s + 3; + char c; + + while (count && (c = *regparse)) { + if (c == '\\' && regparse[1]) + regparse++; + else if (c == '{') + count++; + else if (c == '}') + count--; + regparse++; + } + if (*regparse == ')') + regparse++; + else + yyerror("Sequence (?{...}) not terminated or not {}-balanced"); + while (s < regparse && *s != ')') + *d++ = *s++; + } } + + /* likewise skip #-initiated comments in //x patterns */ else if (*s == '#' && lex_inpat && ((PMOP*)lex_inpat)->op_pmflags & PMf_EXTENDED) { while (s+1 < send && *s != '\n') *d++ = *s++; } + + /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */ else if (*s == '@' && s[1] && (isALNUM(s[1]) || strchr(":'{$", s[1]))) break; + + /* check for embedded scalars. only stop if we're sure it's a + variable. + */ else if (*s == '$') { if (!lex_inpat) /* not a regexp, so $ must be var */ break; - if (s + 1 < send && !strchr(")| \n\t", s[1])) + if (s + 1 < send && !strchr("()| \n\t", s[1])) break; /* in regexp, $ might be tail anchor */ } + + /* backslashes */ if (*s == '\\' && s+1 < send) { s++; - if (*s && strchr(leave, *s)) { + + /* some backslashes we leave behind */ + if (*s && strchr(leaveit, *s)) { *d++ = '\\'; *d++ = *s++; continue; } + + /* deprecate \1 in strings and substitution replacements */ if (lex_inwhat == OP_SUBST && !lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { @@ -812,34 +951,49 @@ char *start; *--s = '$'; break; } + + /* string-change backslash escapes */ if (lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) { --s; break; } + + /* if we get here, it's either a quoted -, or a digit */ switch (*s) { + + /* quoted - in transliterations */ case '-': if (lex_inwhat == OP_TRANS) { *d++ = *s++; continue; } /* FALL THROUGH */ + /* default action is to copy the quoted character */ default: *d++ = *s++; continue; + + /* \132 indicates an octal constant */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': *d++ = scan_oct(s, 3, &len); s += len; continue; + + /* \x24 indicates a hex constant */ case 'x': *d++ = scan_hex(++s, 2, &len); s += len; continue; + + /* \c is a control character */ case 'c': s++; len = *s++; *d++ = toCTRL(len); continue; + + /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': *d++ = '\b'; break; @@ -861,31 +1015,45 @@ char *start; case 'a': *d++ = '\007'; break; - } + } /* end switch */ + s++; continue; - } + } /* end if (backslash) */ + *d++ = *s++; - } + } /* while loop to process each character */ + + /* terminate the string and set up the sv */ *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); SvPOK_on(sv); + /* shrink the sv if we allocated more than we used */ if (SvCUR(sv) + 5 < SvLEN(sv)) { SvLEN_set(sv, SvCUR(sv) + 1); Renew(SvPVX(sv), SvLEN(sv), char); } - if (s > bufptr) + + /* return the substring (via yylval) only if we parsed anything */ + if (s > bufptr) { + if ( hints & ( lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) + sv = new_constant(start, s - start, (lex_inpat ? "qr" : "q"), + sv, Nullsv, + ( lex_inwhat == OP_TRANS + ? "tr" + : ( (lex_inwhat == OP_SUBST && !lex_inpat) + ? "s" + : "qq"))); yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); - else + } else SvREFCNT_dec(sv); return s; } /* This is the one truly awful dwimmer necessary to conflate C and sed. */ -static int -intuit_more(s) -register char *s; +STATIC int +intuit_more(register char *s) { if (lex_brackets) return TRUE; @@ -921,9 +1089,9 @@ register char *s; else { int weight = 2; /* let's weigh the evidence */ char seen[256]; - unsigned char un_char = 0, last_un_char; + unsigned char un_char = 255, last_un_char; char *send = strchr(s,']'); - char tmpbuf[512]; + char tmpbuf[sizeof tokenbuf * 4]; if (!send) /* has to be an expression */ return TRUE; @@ -948,7 +1116,7 @@ register char *s; case '$': weight -= seen[un_char] * 10; if (isALNUM(s[1])) { - scan_ident(s,send,tmpbuf,FALSE); + scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE); if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV)) weight -= 100; else @@ -987,6 +1155,8 @@ register char *s; weight += 30; if (strchr("zZ79~",s[1])) weight += 30; + if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) + weight -= 5; /* cope with negative subscript */ break; default: if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) && @@ -1012,23 +1182,30 @@ register char *s; return TRUE; } -static int -intuit_method(start,gv) -char *start; -GV *gv; +STATIC int +intuit_method(char *start, GV *gv) { char *s = start + (*start == '$'); - char tmpbuf[1024]; + char tmpbuf[sizeof tokenbuf]; STRLEN len; GV* indirgv; if (gv) { + CV *cv; if (GvIO(gv)) return 0; - if (!GvCVu(gv)) + if ((cv = GvCVu(gv))) { + char *proto = SvPVX(cv); + if (proto) { + if (*proto == ';') + proto++; + if (*proto == '*') + return 0; + } + } else gv = 0; } - s = scan_word(s, tmpbuf, TRUE, &len); + s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); if (*start == '$') { if (gv || last_lop_op == OP_PRINT || isUPPER(*tokenbuf)) return 0; @@ -1038,7 +1215,12 @@ GV *gv; return *s == '(' ? FUNCMETH : METHOD; } if (!keyword(tmpbuf, len)) { - indirgv = gv_fetchpv(tmpbuf,FALSE, SVt_PVCV); + if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { + len -= 2; + tmpbuf[len] = '\0'; + goto bare_package; + } + indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); if (indirgv && GvCVu(indirgv)) return 0; /* filehandle or package name makes it a method */ @@ -1046,11 +1228,10 @@ GV *gv; s = skipspace(s); if ((bufend - s) >= 2 && *s == '=' && *(s+1) == '>') return 0; /* no assumptions -- "=>" quotes bearword */ - nextval[nexttoke].opval = - (OP*)newSVOP(OP_CONST, 0, - newSVpv(tmpbuf,0)); - nextval[nexttoke].opval->op_private = - OPpCONST_BARE; + bare_package: + nextval[nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, + newSVpv(tmpbuf,0)); + nextval[nexttoke].opval->op_private = OPpCONST_BARE; expect = XTERM; force_next(WORD); bufptr = s; @@ -1060,14 +1241,15 @@ GV *gv; return 0; } -static char* -incl_perldb() +STATIC char* +incl_perldb(void) { if (perldb) { - char *pdb = getenv("PERL5DB"); + char *pdb = PerlEnv_getenv("PERL5DB"); if (pdb) return pdb; + SETERRNO(0,SS$_NORMAL); return "BEGIN { require 'perl5db.pl' }"; } return ""; @@ -1092,9 +1274,7 @@ incl_perldb() static int filter_debug = 0; SV * -filter_add(funcp, datasv) - filter_t funcp; - SV *datasv; +filter_add(filter_t funcp, SV *datasv) { if (!funcp){ /* temporary handy debugging hack to be deleted */ filter_debug = atoi((char*)datasv); @@ -1103,7 +1283,7 @@ filter_add(funcp, datasv) if (!rsfp_filters) rsfp_filters = newAV(); if (!datasv) - datasv = newSV(0); + datasv = NEWSV(255,0); if (!SvUPGRADE(datasv, SVt_PVIO)) die("Can't upgrade filter_add data to SVt_PVIO"); IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */ @@ -1117,17 +1297,15 @@ filter_add(funcp, datasv) /* Delete most recently added instance of this filter function. */ void -filter_del(funcp) - filter_t funcp; +filter_del(filter_t funcp) { if (filter_debug) warn("filter_del func %p", funcp); - if (!rsfp_filters || AvFILL(rsfp_filters)<0) + if (!rsfp_filters || AvFILLp(rsfp_filters)<0) return; /* if filter is on top of stack (usual case) just pop it off */ - if (IoDIRP(FILTER_DATA(0)) == (void*)funcp){ - /* sv_free(av_pop(rsfp_filters)); */ - sv_free(av_shift(rsfp_filters)); + if (IoDIRP(FILTER_DATA(AvFILLp(rsfp_filters))) == (void*)funcp){ + sv_free(av_pop(rsfp_filters)); return; } @@ -1138,17 +1316,17 @@ filter_del(funcp) /* Invoke the n'th filter function for the current rsfp. */ I32 -filter_read(idx, buf_sv, maxlen) - int idx; - SV *buf_sv; - int maxlen; /* 0 = read one text line */ +filter_read(int idx, SV *buf_sv, int maxlen) + + + /* 0 = read one text line */ { filter_t funcp; SV *datasv = NULL; if (!rsfp_filters) return -1; - if (idx > AvFILL(rsfp_filters)){ /* Any more filters? */ + if (idx > AvFILLp(rsfp_filters)){ /* Any more filters? */ /* Provide a default input filter to make life easy. */ /* Note that we append to the line. This is handy. */ if (filter_debug) @@ -1192,15 +1370,17 @@ filter_read(idx, buf_sv, maxlen) /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ - return (*funcp)(idx, buf_sv, maxlen); + return (*funcp)(PERL_OBJECT_THIS_ idx, buf_sv, maxlen); } -static char * -filter_gets(sv,fp, append) -register SV *sv; -register PerlIO *fp; -STRLEN append; +STATIC char * +filter_gets(register SV *sv, register PerlIO *fp, STRLEN append) { +#ifdef WIN32FILTER + if (!rsfp_filters) { + filter_add(win32_textfilter,NULL); + } +#endif if (rsfp_filters) { if (!append) @@ -1212,7 +1392,6 @@ STRLEN append; } else return (sv_gets(sv, fp, append)); - } @@ -1223,68 +1402,135 @@ STRLEN append; EXT int yychar; /* last token */ +/* + yylex + + Works out what to call the token just pulled out of the input + stream. The yacc parser takes care of taking the ops we return and + stitching them into a tree. + + Returns: + PRIVATEREF + + Structure: + if read an identifier + if we're in a my declaration + croak if they tried to say my($foo::bar) + build the ops for a my() declaration + if it's an access to a my() variable + are we in a sort block? + croak if my($a); $a <=> $b + build ops for access to a my() variable + if in a dq string, and they've said @foo and we can't find @foo + croak + build ops for a bareword + if we already built the token before, use it. +*/ + int -yylex() +yylex(void) { + dTHR; register char *s; register char *d; register I32 tmp; STRLEN len; + GV *gv = Nullgv; + GV **gvp = 0; + /* check if there's an identifier for us to look at */ if (pending_ident) { + /* pit holds the identifier we read and pending_ident is reset */ char pit = pending_ident; pending_ident = 0; + /* if we're in a my(), we can't allow dynamics here. + $foo'bar has already been turned into $foo::bar, so + just check for colons. + + if it's a legal name, the OP is a PADANY. + */ if (in_my) { if (strchr(tokenbuf,':')) croak(no_myglob,tokenbuf); + yylval.opval = newOP(OP_PADANY, 0); yylval.opval->op_targ = pad_allocmy(tokenbuf); return PRIVATEREF; } - if (!strchr(tokenbuf,':') && (tmp = pad_findmy(tokenbuf))) { - if (last_lop_op == OP_SORT && - tokenbuf[0] == '$' && - (tokenbuf[1] == 'a' || tokenbuf[1] == 'b') - && !tokenbuf[2]) + /* + build the ops for accesses to a my() variable. + + Deny my($a) or my($b) in a sort block, *if* $a or $b is + then used in a comparison. This catches most, but not + all cases. For instance, it catches + sort { my($a); $a <=> $b } + but not + sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } + (although why you'd do that is anyone's guess). + */ + + if (!strchr(tokenbuf,':')) { +#ifdef USE_THREADS + /* Check for single character per-thread SVs */ + if (tokenbuf[0] == '$' && tokenbuf[2] == '\0' + && !isALPHA(tokenbuf[1]) /* Rule out obvious non-threadsvs */ + && (tmp = find_threadsv(&tokenbuf[1])) != NOT_IN_PAD) { - for (d = in_eval ? oldoldbufptr : linestart; - d < bufend && *d != '\n'; - d++) + yylval.opval = newOP(OP_THREADSV, 0); + yylval.opval->op_targ = tmp; + return PRIVATEREF; + } +#endif /* USE_THREADS */ + if ((tmp = pad_findmy(tokenbuf)) != NOT_IN_PAD) { + /* if it's a sort block and they're naming $a or $b */ + if (last_lop_op == OP_SORT && + tokenbuf[0] == '$' && + (tokenbuf[1] == 'a' || tokenbuf[1] == 'b') + && !tokenbuf[2]) { - if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { - croak("Can't use \"my %s\" in sort comparison", - tokenbuf); + for (d = in_eval ? oldoldbufptr : linestart; + d < bufend && *d != '\n'; + d++) + { + if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { + croak("Can't use \"my %s\" in sort comparison", + tokenbuf); + } } } - } - yylval.opval = newOP(OP_PADANY, 0); - yylval.opval->op_targ = tmp; - return PRIVATEREF; + yylval.opval = newOP(OP_PADANY, 0); + yylval.opval->op_targ = tmp; + return PRIVATEREF; + } } - /* Force them to make up their mind on "@foo". */ + /* + Whine if they've said @foo in a doublequoted string, + and @foo isn't a variable we can find in the symbol + table. + */ if (pit == '@' && lex_state != LEX_NORMAL && !lex_brackets) { GV *gv = gv_fetchpv(tokenbuf+1, FALSE, SVt_PVAV); - if (!gv || ((tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) { - char tmpbuf[1024]; - sprintf(tmpbuf, "In string, %s now must be written as \\%s", - tokenbuf, tokenbuf); - yyerror(tmpbuf); - } + if (!gv || ((tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) + yyerror(form("In string, %s now must be written as \\%s", + tokenbuf, tokenbuf)); } + /* build ops for a bareword */ yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf+1, 0)); yylval.opval->op_private = OPpCONST_ENTERED; - gv_fetchpv(tokenbuf+1, in_eval ? GV_ADDMULTI : TRUE, + gv_fetchpv(tokenbuf+1, in_eval ? (GV_ADDMULTI | 8) : TRUE, ((tokenbuf[0] == '$') ? SVt_PV : (tokenbuf[0] == '@') ? SVt_PVAV : SVt_PVHV)); return WORD; } + /* no identifier pending identification */ + switch (lex_state) { #ifdef COMMENTARY case LEX_NORMAL: /* Some compilers will produce faster */ @@ -1292,6 +1538,7 @@ yylex() break; #endif + /* when we're already built the next token, just pull it out the queue */ case LEX_KNOWNEXT: nexttoke--; yylval = nextval[nexttoke]; @@ -1302,16 +1549,23 @@ yylex() } return(nexttype[nexttoke]); + /* interpolated case modifiers like \L \U, including \Q and \E. + when we get here, bufptr is at the \ + */ case LEX_INTERPCASEMOD: #ifdef DEBUGGING if (bufptr != bufend && *bufptr != '\\') croak("panic: INTERPCASEMOD"); #endif - if (bufptr == bufend || bufptr[1] == 'E') { + /* handle \E or end of string */ + if (bufptr == bufend || bufptr[1] == 'E') { char oldmod; + + /* if at a \E */ if (lex_casemods) { oldmod = lex_casestack[--lex_casemods]; lex_casestack[lex_casemods] = '\0'; + if (bufptr != bufend && strchr("LUQ", oldmod)) { bufptr += 2; lex_state = LEX_INTERPCONCAT; @@ -1380,7 +1634,13 @@ yylex() if (lex_dojoin) { nextval[nexttoke].ival = 0; force_next(','); +#ifdef USE_THREADS + nextval[nexttoke].opval = newOP(OP_THREADSV, 0); + nextval[nexttoke].opval->op_targ = find_threadsv("\""); + force_next(PRIVATEREF); +#else force_ident("\"", '$'); +#endif /* USE_THREADS */ nextval[nexttoke].ival = 0; force_next('$'); nextval[nexttoke].ival = 0; @@ -1419,7 +1679,9 @@ yylex() if (SvIVX(linestr) == '\'') { SV *sv = newSVsv(linestr); if (!lex_inpat) - sv = q(sv); + sv = tokeq(sv); + else if ( hints & HINT_NEW_RE ) + sv = new_constant(NULL, 0, "qr", sv, sv, "q"); yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); s = bufend; } @@ -1484,7 +1746,7 @@ yylex() if (SvCUR(linestr)) sv_catpv(linestr,";"); if (preambleav){ - while(AvFILL(preambleav) >= 0) { + while(AvFILLp(preambleav) >= 0) { SV *tmpsv = av_shift(preambleav); sv_catsv(linestr, tmpsv); sv_catpv(linestr, ";"); @@ -1502,28 +1764,23 @@ yylex() if (gv) GvIMPORTED_AV_on(gv); if (minus_F) { - char *tmpbuf1; - New(201, tmpbuf1, strlen(splitstr) * 2 + 20, char); if (strchr("/'\"", *splitstr) && strchr(splitstr + 1, *splitstr)) - sprintf(tmpbuf1, "@F=split(%s);", splitstr); + sv_catpvf(linestr, "@F=split(%s);", splitstr); else { char delim; s = "'~#\200\1'"; /* surely one char is unused...*/ while (s[1] && strchr(splitstr, *s)) s++; delim = *s; - sprintf(tmpbuf1, "@F=split(%s%c", - "q" + (delim == '\''), delim); - d = tmpbuf1 + strlen(tmpbuf1); - for (s = splitstr; *s; ) { + sv_catpvf(linestr, "@F=split(%s%c", + "q" + (delim == '\''), delim); + for (s = splitstr; *s; s++) { if (*s == '\\') - *d++ = '\\'; - *d++ = *s++; + sv_catpvn(linestr, "\\", 1); + sv_catpvn(linestr, s, 1); } - sprintf(d, "%c);", delim); + sv_catpvf(linestr, "%c);", delim); } - sv_catpv(linestr,tmpbuf1); - Safefree(tmpbuf1); } else sv_catpv(linestr,"@F=split(' ');"); @@ -1532,7 +1789,7 @@ yylex() sv_catpv(linestr, "\n"); oldoldbufptr = oldbufptr = s = linestart = SvPVX(linestr); bufend = SvPVX(linestr) + SvCUR(linestr); - if (perldb && curstash != debstash) { + if (PERLDB_LINE && curstash != debstash) { SV *sv = NEWSV(85,0); sv_upgrade(sv, SVt_PVMG); @@ -1546,7 +1803,7 @@ yylex() fake_eof: if (rsfp) { if (preprocess && !in_eval) - (void)my_pclose(rsfp); + (void)PerlProc_pclose(rsfp); else if ((PerlIO *)rsfp == PerlIO_stdin()) PerlIO_clearerr(rsfp); else @@ -1580,7 +1837,7 @@ yylex() incline(s); } while (doextract); oldoldbufptr = oldbufptr = bufptr = linestart = s; - if (perldb && curstash != debstash) { + if (PERLDB_LINE && curstash != debstash) { SV *sv = NEWSV(85,0); sv_upgrade(sv, SVt_PVMG); @@ -1705,7 +1962,7 @@ yylex() } d = moreswitches(d); } while (d); - if (perldb && !oldpdb || + if (PERLDB_LINE && !oldpdb || ( minus_n || minus_p ) && !(oldn || oldp) ) /* if we have already added "LINE: while (<>) {", we must not do it again */ @@ -1714,7 +1971,7 @@ yylex() oldoldbufptr = oldbufptr = s = linestart = SvPVX(linestr); bufend = SvPVX(linestr) + SvCUR(linestr); preambled = FALSE; - if (perldb) + if (PERLDB_LINE) (void)gv_fetchfile(origfilename); goto retry; } @@ -1729,9 +1986,11 @@ yylex() } goto retry; case '\r': +#ifndef WIN32CHEAT warn("Illegal character \\%03o (carriage return)", '\r'); croak( "(Maybe you didn't strip carriage returns after a network transfer?)\n"); +#endif case ' ': case '\t': case '\f': case 013: s++; goto retry; @@ -1765,9 +2024,6 @@ yylex() s++; if (strnEQ(s,"=>",2)) { - if (dowarn) - warn("Ambiguous use of -%c => resolved to \"-%c\" =>", - (int)tmp, (int)tmp); s = force_word(bufptr,WORD,FALSE,FALSE,FALSE); OPERATOR('-'); /* unary minus */ } @@ -1853,7 +2109,7 @@ yylex() case '*': if (expect != XOPERATOR) { - s = scan_ident(s, bufend, tokenbuf, TRUE); + s = scan_ident(s, bufend, tokenbuf, sizeof tokenbuf, TRUE); expect = XOPERATOR; force_ident(tokenbuf, '*'); if (!*tokenbuf) @@ -1873,7 +2129,7 @@ yylex() Mop(OP_MODULO); } tokenbuf[0] = '%'; - s = scan_ident(s, bufend, tokenbuf+1, TRUE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, TRUE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final % should be \\% or %name"); @@ -1963,17 +2219,12 @@ yylex() d++; } if (d < bufend && isIDFIRST(*d)) { - d = scan_word(d, tokenbuf + 1, FALSE, &len); + d = scan_word(d, tokenbuf + 1, sizeof tokenbuf - 1, + FALSE, &len); while (d < bufend && (*d == ' ' || *d == '\t')) d++; if (*d == '}') { char minus = (tokenbuf[0] == '-'); - if (dowarn && - (keyword(tokenbuf + 1, len) || - (minus && len == 1 && isALPHA(tokenbuf[1])) || - perl_get_cv(tokenbuf + 1, FALSE) )) - warn("Ambiguous use of {%s} resolved to {\"%s\"}", - tokenbuf + !minus, tokenbuf + !minus); s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); if (minus) force_next('-'); @@ -1997,19 +2248,73 @@ yylex() s = skipspace(s); if (*s == '}') OPERATOR(HASHBRACK); - if (isALPHA(*s)) { - for (t = s; t < bufend && isALNUM(*t); t++) ; + /* This hack serves to disambiguate a pair of curlies + * as being a block or an anon hash. Normally, expectation + * determines that, but in cases where we're not in a + * position to expect anything in particular (like inside + * eval"") we have to resolve the ambiguity. This code + * covers the case where the first term in the curlies is a + * quoted string. Most other cases need to be explicitly + * disambiguated by prepending a `+' before the opening + * curly in order to force resolution as an anon hash. + * + * XXX should probably propagate the outer expectation + * into eval"" to rely less on this hack, but that could + * potentially break current behavior of eval"". + * GSAR 97-07-21 + */ + t = s; + if (*s == '\'' || *s == '"' || *s == '`') { + /* common case: get past first string, handling escapes */ + for (t++; t < bufend && *t != *s;) + if (*t++ == '\\' && (*t == '\\' || *t == *s)) + t++; + t++; } - else if (*s == '\'' || *s == '"') { - t = strchr(s+1,*s); - if (!t++) - t = s; + else if (*s == 'q') { + if (++t < bufend + && (!isALNUM(*t) + || ((*t == 'q' || *t == 'x') && ++t < bufend + && !isALNUM(*t)))) { + char *tmps; + char open, close, term; + I32 brackets = 1; + + while (t < bufend && isSPACE(*t)) + t++; + term = *t; + open = term; + if (term && (tmps = strchr("([{< )]}> )]}>",term))) + term = tmps[5]; + close = term; + if (open == close) + for (t++; t < bufend; t++) { + if (*t == '\\' && t+1 < bufend && open != '\\') + t++; + else if (*t == open) + break; + } + else + for (t++; t < bufend; t++) { + if (*t == '\\' && t+1 < bufend) + t++; + else if (*t == close && --brackets <= 0) + break; + else if (*t == open) + brackets++; + } + } + t++; + } + else if (isALPHA(*s)) { + for (t++; t < bufend && isALNUM(*t); t++) ; } - else - t = s; while (t < bufend && isSPACE(*t)) t++; - if ((*t == ',' && !isLOWER(*s)) || (*t == '=' && t[1] == '>')) + /* if comma follows first term, call it an anon hash */ + /* XXX it could be a comma expression with loop modifiers */ + if (t < bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s))) + || (*t == '=' && t[1] == '>'))) OPERATOR(HASHBRACK); if (expect == XREF) expect = XTERM; @@ -2068,7 +2373,7 @@ yylex() BAop(OP_BIT_AND); } - s = scan_ident(s-1, bufend, tokenbuf, TRUE); + s = scan_ident(s - 1, bufend, tokenbuf, sizeof tokenbuf, TRUE); if (*tokenbuf) { expect = XOPERATOR; force_ident(tokenbuf, '&'); @@ -2190,7 +2495,8 @@ yylex() if (expect == XOPERATOR) no_op("Array length", bufptr); tokenbuf[0] = '@'; - s = scan_ident(s+1, bufend, tokenbuf+1, FALSE); + s = scan_ident(s + 1, bufend, tokenbuf + 1, sizeof tokenbuf - 1, + FALSE); if (!tokenbuf[1]) PREREF(DOLSHARP); expect = XOPERATOR; @@ -2201,7 +2507,7 @@ yylex() if (expect == XOPERATOR) no_op("Scalar", bufptr); tokenbuf[0] = '$'; - s = scan_ident(s, bufend, tokenbuf+1, FALSE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, FALSE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final $ should be \\$ or $name"); @@ -2242,11 +2548,11 @@ yylex() if (dowarn && strEQ(tokenbuf+1, "SIG") && (t = strchr(s, '}')) && (t = strchr(t, '='))) { - char tmpbuf[1024]; + char tmpbuf[sizeof tokenbuf]; STRLEN len; for (t++; isSPACE(*t); t++) ; if (isIDFIRST(*t)) { - t = scan_word(t, tmpbuf, TRUE, &len); + t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); if (*t != '(' && perl_get_cv(tmpbuf, FALSE)) warn("You need to quote \"%s\"", tmpbuf); } @@ -2264,10 +2570,25 @@ yylex() else if (strchr("&*<%", *s) && isIDFIRST(s[1])) expect = XTERM; /* e.g. print $fh &sub */ else if (isIDFIRST(*s)) { - char tmpbuf[1024]; - scan_word(s, tmpbuf, TRUE, &len); - if (keyword(tmpbuf, len)) - expect = XTERM; /* e.g. print $fh length() */ + char tmpbuf[sizeof tokenbuf]; + scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + if (tmp = keyword(tmpbuf, len)) { + /* binary operators exclude handle interpretations */ + switch (tmp) { + case -KEY_x: + case -KEY_eq: + case -KEY_ne: + case -KEY_gt: + case -KEY_lt: + case -KEY_ge: + case -KEY_le: + case -KEY_cmp: + break; + default: + expect = XTERM; /* e.g. print $fh length() */ + break; + } + } else { GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); if (gv && GvCVu(gv)) @@ -2290,7 +2611,7 @@ yylex() if (expect == XOPERATOR) no_op("Array", s); tokenbuf[0] = '@'; - s = scan_ident(s, bufend, tokenbuf+1, FALSE); + s = scan_ident(s, bufend, tokenbuf + 1, sizeof tokenbuf - 1, FALSE); if (!tokenbuf[1]) { if (s == bufend) yyerror("Final @ should be \\@ or @name"); @@ -2323,7 +2644,11 @@ yylex() case '/': /* may either be division or pattern */ case '?': /* may either be conditional or pattern */ if (expect != XOPERATOR) { - check_uni(); + /* Disable warning on "study /blah/" */ + if (oldoldbufptr == last_uni + && (*last_uni != 's' || s - last_uni < 5 + || memNE(last_uni, "study", 5) || isALNUM(last_uni[5]))) + check_uni(); s = scan_pat(s); TERM(sublex_start()); } @@ -2454,9 +2779,12 @@ yylex() case 'y': case 'Y': case 'z': case 'Z': - keylookup: + keylookup: { + gv = Nullgv; + gvp = 0; + bufptr = s; - s = scan_word(s, tokenbuf, FALSE, &len); + s = scan_word(s, tokenbuf, sizeof tokenbuf, FALSE, &len); /* Some keywords can be followed by any delimiter, including ':' */ tmp = (len == 1 && strchr("msyq", tokenbuf[0]) || @@ -2487,25 +2815,30 @@ yylex() /* Is this a word before a => operator? */ if (strnEQ(d,"=>",2)) { CLINE; - if (dowarn && (tmp || perl_get_cv(tokenbuf, FALSE))) - warn("Ambiguous use of %s => resolved to \"%s\" =>", - tokenbuf, tokenbuf); yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf,0)); yylval.opval->op_private = OPpCONST_BARE; TERM(WORD); } if (tmp < 0) { /* second-class keyword? */ - GV* gv; - if (expect != XOPERATOR && - (*s != ':' || s[1] != ':') && - (gv = gv_fetchpv(tokenbuf, FALSE, SVt_PVCV)) && - GvIMPORTED_CV(gv)) + if (expect != XOPERATOR && (*s != ':' || s[1] != ':') && + (((gv = gv_fetchpv(tokenbuf, FALSE, SVt_PVCV)) && + GvCVu(gv) && GvIMPORTED_CV(gv)) || + ((gvp = (GV**)hv_fetch(globalstash,tokenbuf,len,FALSE)) && + (gv = *gvp) != (GV*)&sv_undef && + GvCVu(gv) && GvIMPORTED_CV(gv)))) { - tmp = 0; + tmp = 0; /* overridden by importation */ + } + else if (gv && !gvp + && -tmp==KEY_lock /* XXX generalizable kludge */ + && !hv_fetch(GvHVn(incgv), "Thread.pm", 9, FALSE)) + { + tmp = 0; /* any sub overrides "weak" keyword */ + } + else { + tmp = -tmp; gv = Nullgv; gvp = 0; } - else - tmp = -tmp; } reserved_word: @@ -2513,15 +2846,19 @@ yylex() default: /* not a keyword */ just_a_word: { - GV *gv; + SV *sv; char lastchar = (bufptr == oldoldbufptr ? 0 : bufptr[-1]); /* Get the rest if it looks like a package qualifier */ if (*s == '\'' || *s == ':' && s[1] == ':') { - s = scan_word(s, tokenbuf + len, TRUE, &len); - if (!len) - croak("Bad name after %s::", tokenbuf); + STRLEN morelen; + s = scan_word(s, tokenbuf + len, sizeof tokenbuf - len, + TRUE, &morelen); + if (!morelen) + croak("Bad name after %s%s", tokenbuf, + *s == '\'' ? "'" : "::"); + len += morelen; } if (expect == XOPERATOR) { @@ -2534,24 +2871,58 @@ yylex() no_op("Bareword",s); } - /* Look for a subroutine with this name in current package. */ + /* Look for a subroutine with this name in current package, + unless name is "Foo::", in which case Foo is a bearword + (and a package name). */ - gv = gv_fetchpv(tokenbuf,FALSE, SVt_PVCV); + if (len > 2 && + tokenbuf[len - 2] == ':' && tokenbuf[len - 1] == ':') + { + if (dowarn && ! gv_fetchpv(tokenbuf, FALSE, SVt_PVHV)) + warn("Bareword \"%s\" refers to nonexistent package", + tokenbuf); + len -= 2; + tokenbuf[len] = '\0'; + gv = Nullgv; + gvp = 0; + } + else { + len = 0; + if (!gv) + gv = gv_fetchpv(tokenbuf, FALSE, SVt_PVCV); + } + + /* if we saw a global override before, get the right name */ + + if (gvp) { + sv = newSVpv("CORE::GLOBAL::",14); + sv_catpv(sv,tokenbuf); + } + else + sv = newSVpv(tokenbuf,0); /* Presume this is going to be a bareword of some sort. */ CLINE; - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf,0)); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); yylval.opval->op_private = OPpCONST_BARE; + /* And if "Foo::", then that's what it certainly is. */ + + if (len) + goto safe_bareword; + /* See if it's the indirect object for a list operator. */ if (oldoldbufptr && oldoldbufptr < bufptr && (oldoldbufptr == last_lop || oldoldbufptr == last_uni) && /* NO SKIPSPACE BEFORE HERE! */ - (expect == XREF || - (opargs[last_lop_op] >> OASHIFT & 7) == OA_FILEREF) ) + (expect == XREF + || ((opargs[last_lop_op] >> OASHIFT)& 7) == OA_FILEREF + || (last_lop_op == OP_ENTERSUB + && last_proto + && last_proto[last_proto[0] == ';' ? 1 : 0] == '*')) ) { bool immediate_paren = *s == '('; @@ -2580,6 +2951,13 @@ yylex() s = skipspace(s); if (*s == '(') { CLINE; + if (gv && GvCVu(gv)) { + for (d = s + 1; *d == ' ' || *d == '\t'; d++) ; + if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) { + s = d + 1; + goto its_constant; + } + } nextval[nexttoke].opval = yylval.opval; expect = XOPERATOR; force_next(WORD); @@ -2603,28 +2981,20 @@ yylex() /* Not a method, so call it a subroutine (if defined) */ if (gv && GvCVu(gv)) { - CV* cv = GvCV(gv); - if (*s == '(') { - nextval[nexttoke].opval = yylval.opval; - expect = XTERM; - force_next(WORD); - yylval.ival = 0; - TOKEN('&'); - } + CV* cv; if (lastchar == '-') warn("Ambiguous use of -%s resolved as -&%s()", tokenbuf, tokenbuf); last_lop = oldbufptr; last_lop_op = OP_ENTERSUB; /* Check for a constant sub */ - { - SV *sv = cv_const_sv(cv); - if (sv) { - SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv); - ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv); - yylval.opval->op_private = 0; - TOKEN(WORD); - } + cv = GvCV(gv); + if ((sv = cv_const_sv(cv))) { + its_constant: + SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv); + ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv); + yylval.opval->op_private = 0; + TOKEN(WORD); } /* Resolve to GV now. */ @@ -2633,16 +3003,17 @@ yylex() /* Is there a prototype? */ if (SvPOK(cv)) { STRLEN len; - char *proto = SvPV((SV*)cv, len); + last_proto = SvPV((SV*)cv, len); if (!len) TERM(FUNC0SUB); - if (strEQ(proto, "$")) + if (strEQ(last_proto, "$")) OPERATOR(UNIOPSUB); - if (*proto == '&' && *s == '{') { + if (*last_proto == '&' && *s == '{') { sv_setpv(subname,"__ANON__"); PREBLOCK(LSTOPSUB); } - } + } else + last_proto = NULL; nextval[nexttoke].opval = yylval.opval; expect = XTERM; force_next(WORD); @@ -2673,6 +3044,8 @@ yylex() warn(warn_reserved, tokenbuf); } } + + safe_bareword: if (lastchar && strchr("*%&", lastchar)) { warn("Operator or semicolon missing before %c%s", lastchar, tokenbuf); @@ -2683,12 +3056,13 @@ yylex() } case KEY___FILE__: + yylval.opval = (OP*)newSVOP(OP_CONST, 0, + newSVsv(GvSV(curcop->cop_filegv))); + TERM(THING); + case KEY___LINE__: - if (tokenbuf[2] == 'L') - (void)sprintf(tokenbuf,"%ld",(long)curcop->cop_line); - else - strcpy(tokenbuf, SvPVX(GvSV(curcop->cop_filegv))); - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(tokenbuf,0)); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, + newSVpvf("%ld", (long)curcop->cop_line)); TERM(THING); case KEY___PACKAGE__: @@ -2704,12 +3078,10 @@ yylex() /*SUPPRESS 560*/ if (rsfp && (!in_eval || tokenbuf[2] == 'D')) { - char dname[256]; char *pname = "main"; if (tokenbuf[2] == 'D') pname = HvNAME(curstash ? curstash : defstash); - sprintf(dname,"%s::DATA", pname); - gv = gv_fetchpv(dname,TRUE, SVt_PVIO); + gv = gv_fetchpv(form("%s::DATA", pname), TRUE, SVt_PVIO); GvMULTI_on(gv); if (!GvIO(gv)) GvIOp(gv) = newIO(); @@ -2737,6 +3109,7 @@ yylex() case KEY_DESTROY: case KEY_BEGIN: case KEY_END: + case KEY_INIT: if (expect == XSTATE) { s = bufptr; goto really_sub; @@ -2747,7 +3120,7 @@ yylex() if (*s == ':' && s[1] == ':') { s += 2; d = s; - s = scan_word(s, tokenbuf, FALSE, &len); + s = scan_word(s, tokenbuf, sizeof tokenbuf, FALSE, &len); tmp = keyword(tokenbuf, len); if (tmp < 0) tmp = -tmp; @@ -2916,7 +3289,7 @@ yylex() case KEY_foreach: yylval.ival = curcop->cop_line; s = skipspace(s); - if (isIDFIRST(*s)) { + if (expect == XSTATE && isIDFIRST(*s)) { char *p = s; if ((bufend - p) >= 3 && strnEQ(p, "my", 2) && isSPACE(*(p + 2))) @@ -3099,6 +3472,9 @@ yylex() case KEY_listen: LOP(OP_LISTEN,XTERM); + case KEY_lock: + UNI(OP_LOCK); + case KEY_lstat: UNI(OP_LSTAT); @@ -3126,6 +3502,17 @@ yylex() case KEY_my: in_my = TRUE; + s = skipspace(s); + if (isIDFIRST(*s)) { + s = scan_word(s, tokenbuf, sizeof tokenbuf, TRUE, &len); + in_my_stash = gv_stashpv(tokenbuf, FALSE); + if (!in_my_stash) { + char tmpbuf[1024]; + bufptr = s; + sprintf(tmpbuf, "No such class %.1000s", tokenbuf); + yyerror(tmpbuf); + } + } OPERATOR(MY); case KEY_next: @@ -3229,7 +3616,7 @@ yylex() } } force_next(')'); - nextval[nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, q(lex_stuff)); + nextval[nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, tokeq(lex_stuff)); lex_stuff = Nullsv; force_next(THING); force_next(','); @@ -3418,7 +3805,7 @@ yylex() if (*s == ';' || *s == ')') /* probably a close */ croak("sort is now a reserved word"); expect = XTERM; - s = force_word(s,WORD,TRUE,TRUE,TRUE); + s = force_word(s,WORD,TRUE,TRUE,FALSE); LOP(OP_SORT,XREF); case KEY_split: @@ -3452,9 +3839,9 @@ yylex() s = skipspace(s); if (isIDFIRST(*s) || *s == '\'' || *s == ':') { - char tmpbuf[128]; + char tmpbuf[sizeof tokenbuf]; expect = XBLOCK; - d = scan_word(s, tmpbuf, TRUE, &len); + d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); if (strchr(tmpbuf, ':')) sv_setpv(subname, tmpbuf); else { @@ -3530,12 +3917,12 @@ yylex() case KEY_sysopen: LOP(OP_SYSOPEN,XTERM); - case KEY_sysread: - LOP(OP_SYSREAD,XTERM); - case KEY_sysseek: LOP(OP_SYSSEEK,XTERM); + case KEY_sysread: + LOP(OP_SYSREAD,XTERM); + case KEY_syswrite: LOP(OP_SYSWRITE,XTERM); @@ -3664,13 +4051,11 @@ yylex() s = scan_trans(s); TERM(sublex_start()); } - } + }} } I32 -keyword(d, len) -register char *d; -I32 len; +keyword(register char *d, I32 len) { switch (*d) { case '_': @@ -3904,7 +4289,7 @@ I32 len; case 4: if (strEQ(d,"grep")) return KEY_grep; if (strEQ(d,"goto")) return KEY_goto; - if (strEQ(d,"glob")) return -KEY_glob; + if (strEQ(d,"glob")) return KEY_glob; break; case 6: if (strEQ(d,"gmtime")) return -KEY_gmtime; @@ -3914,6 +4299,9 @@ I32 len; case 'h': if (strEQ(d,"hex")) return -KEY_hex; break; + case 'I': + if (strEQ(d,"INIT")) return KEY_INIT; + break; case 'i': switch (len) { case 2: @@ -3956,6 +4344,7 @@ I32 len; case 4: if (strEQ(d,"last")) return KEY_last; if (strEQ(d,"link")) return -KEY_link; + if (strEQ(d,"lock")) return -KEY_lock; break; case 5: if (strEQ(d,"local")) return KEY_local; @@ -4281,11 +4670,8 @@ I32 len; return 0; } -static void -checkcomma(s,name,what) -register char *s; -char *name; -char *what; +STATIC void +checkcomma(register char *s, char *name, char *what) { char *w; @@ -4299,7 +4685,7 @@ char *what; } if (*w) for (; *w && isSPACE(*w); w++) ; - if (!*w || !strchr(";|})]oa!=", *w)) /* an advisory hack only... */ + if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */ warn("%s (...) interpreted as function",name); } while (s < bufend && isSPACE(*s)) @@ -4326,15 +4712,83 @@ char *what; } } -static char * -scan_word(s, dest, allow_package, slp) -register char *s; -char *dest; -int allow_package; -STRLEN *slp; +STATIC SV * +new_constant(char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type) +{ + dSP; + HV *table = perl_get_hv("\10", FALSE); /* ^H */ + BINOP myop; + SV *res; + bool oldcatch = CATCH_GET; + SV **cvp; + SV *cv, *typesv; + char buf[128]; + + if (!table) { + yyerror("%^H is not defined"); + return sv; + } + cvp = hv_fetch(table, key, strlen(key), FALSE); + if (!cvp || !SvOK(*cvp)) { + sprintf(buf,"$^H{%s} is not defined", key); + yyerror(buf); + return sv; + } + sv_2mortal(sv); /* Parent created it permanently */ + cv = *cvp; + if (!pv) + pv = sv_2mortal(newSVpv(s, len)); + if (type) + typesv = sv_2mortal(newSVpv(type, 0)); + else + typesv = &sv_undef; + CATCH_SET(TRUE); + Zero(&myop, 1, BINOP); + myop.op_last = (OP *) &myop; + myop.op_next = Nullop; + myop.op_flags = OPf_WANT_SCALAR | OPf_STACKED; + + PUSHSTACKi(SI_OVERLOAD); + ENTER; + SAVEOP(); + op = (OP *) &myop; + if (PERLDB_SUB && curstash != debstash) + op->op_private |= OPpENTERSUB_DB; + PUTBACK; + pp_pushmark(ARGS); + + EXTEND(sp, 3); + PUSHs(pv); + PUSHs(sv); + PUSHs(typesv); + PUSHs(cv); + PUTBACK; + + if (op = pp_entersub(ARGS)) + CALLRUNOPS(); + LEAVE; + SPAGAIN; + + res = POPs; + PUTBACK; + CATCH_SET(oldcatch); + POPSTACK; + + if (!SvOK(res)) { + sprintf(buf,"Call to &{$^H{%s}} did not return a defined value", key); + yyerror(buf); + } + return SvREFCNT_inc(res); +} + +STATIC char * +scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { register char *d = dest; + register char *e = d + destlen - 3; /* two-character token, ending NUL */ for (;;) { + if (d >= e) + croak(ident_too_long); if (isALNUM(*s)) *d++ = *s++; else if (*s == '\'' && allow_package && isIDFIRST(s[1])) { @@ -4342,7 +4796,7 @@ STRLEN *slp; *d++ = ':'; s++; } - else if (*s == ':' && s[1] == ':' && allow_package && isIDFIRST(s[2])) { + else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') { *d++ = *s++; *d++ = *s++; } @@ -4354,14 +4808,11 @@ STRLEN *slp; } } -static char * -scan_ident(s,send,dest,ck_uni) -register char *s; -register char *send; -char *dest; -I32 ck_uni; +STATIC char * +scan_ident(register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni) { register char *d; + register char *e; char *bracket = 0; char funny = *s++; @@ -4370,12 +4821,18 @@ I32 ck_uni; if (isSPACE(*s)) s = skipspace(s); d = dest; + e = d + destlen - 3; /* two-character token, ending NUL */ if (isDIGIT(*s)) { - while (isDIGIT(*s)) + while (isDIGIT(*s)) { + if (d >= e) + croak(ident_too_long); *d++ = *s++; + } } else { for (;;) { + if (d >= e) + croak(ident_too_long); if (isALNUM(*s)) *d++ = *s++; else if (*s == '\'' && isIDFIRST(s[1])) { @@ -4453,7 +4910,7 @@ I32 ck_uni; lex_state = LEX_INTERPEND; if (funny == '#') funny = '@'; - if (dowarn && + if (dowarn && lex_state == LEX_NORMAL && (keyword(dest, d - dest) || perl_get_cv(dest, FALSE))) warn("Ambiguous use of %c{%s} resolved to %c%s", funny, dest, funny, dest); @@ -4468,27 +4925,28 @@ I32 ck_uni; return s; } -void pmflag(pmfl,ch) -U16* pmfl; -int ch; +void pmflag(U16 *pmfl, int ch) { if (ch == 'i') *pmfl |= PMf_FOLD; else if (ch == 'g') *pmfl |= PMf_GLOBAL; + else if (ch == 'c') + *pmfl |= PMf_CONTINUE; else if (ch == 'o') *pmfl |= PMf_KEEP; else if (ch == 'm') *pmfl |= PMf_MULTILINE; else if (ch == 's') *pmfl |= PMf_SINGLELINE; + else if (ch == 't') + *pmfl |= PMf_TAINTMEM; else if (ch == 'x') *pmfl |= PMf_EXTENDED; } -static char * -scan_pat(start) -char *start; +STATIC char * +scan_pat(char *start) { PMOP *pm; char *s; @@ -4504,7 +4962,7 @@ char *start; pm = (PMOP*)newPMOP(OP_MATCH, 0); if (multi_open == '?') pm->op_pmflags |= PMf_ONCE; - while (*s && strchr("iogmsx", *s)) + while (*s && strchr("iogcmstx", *s)) pmflag(&pm->op_pmflags,*s++); pm->op_pmpermflags = pm->op_pmflags; @@ -4513,9 +4971,8 @@ char *start; return s; } -static char * -scan_subst(start) -char *start; +STATIC char * +scan_subst(char *start) { register char *s; register PMOP *pm; @@ -4550,13 +5007,15 @@ char *start; multi_start = first_start; /* so whole substitution is taken together */ pm = (PMOP*)newPMOP(OP_SUBST, 0); - while (*s && strchr("iogmsex", *s)) { + while (*s) { if (*s == 'e') { s++; es++; } - else + else if (strchr("iogcmstx", *s)) pmflag(&pm->op_pmflags,*s++); + else + break; } if (es) { @@ -4579,55 +5038,14 @@ char *start; return s; } -void -hoistmust(pm) -register PMOP *pm; -{ - if (!pm->op_pmshort && pm->op_pmregexp->regstart && - (!pm->op_pmregexp->regmust || pm->op_pmregexp->reganch & ROPT_ANCH) - ) { - if (!(pm->op_pmregexp->reganch & ROPT_ANCH)) - pm->op_pmflags |= PMf_SCANFIRST; - pm->op_pmshort = SvREFCNT_inc(pm->op_pmregexp->regstart); - pm->op_pmslen = SvCUR(pm->op_pmshort); - } - else if (pm->op_pmregexp->regmust) {/* is there a better short-circuit? */ - if (pm->op_pmshort && - sv_eq(pm->op_pmshort,pm->op_pmregexp->regmust)) - { - if (pm->op_pmflags & PMf_SCANFIRST) { - SvREFCNT_dec(pm->op_pmshort); - pm->op_pmshort = Nullsv; - } - else { - SvREFCNT_dec(pm->op_pmregexp->regmust); - pm->op_pmregexp->regmust = Nullsv; - return; - } - } - /* promote the better string */ - if ((!pm->op_pmshort && - !(pm->op_pmregexp->reganch & ROPT_ANCH_GPOS)) || - ((pm->op_pmflags & PMf_SCANFIRST) && - (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)))) { - SvREFCNT_dec(pm->op_pmshort); /* ok if null */ - pm->op_pmshort = pm->op_pmregexp->regmust; - pm->op_pmslen = SvCUR(pm->op_pmshort); - pm->op_pmregexp->regmust = Nullsv; - pm->op_pmflags |= PMf_SCANFIRST; - } - } -} - -static char * -scan_trans(start) -char *start; +STATIC char * +scan_trans(char *start) { register char* s; - OP *op; + OP *o; short *tbl; I32 squash; - I32 delete; + I32 Delete; I32 complement; yylval.ival = OP_NULL; @@ -4637,7 +5055,7 @@ char *start; if (lex_stuff) SvREFCNT_dec(lex_stuff); lex_stuff = Nullsv; - croak("Translation pattern not terminated"); + croak("Transliteration pattern not terminated"); } if (s[-1] == multi_open) s--; @@ -4650,54 +5068,56 @@ char *start; if (lex_repl) SvREFCNT_dec(lex_repl); lex_repl = Nullsv; - croak("Translation replacement not terminated"); + croak("Transliteration replacement not terminated"); } New(803,tbl,256,short); - op = newPVOP(OP_TRANS, 0, (char*)tbl); + o = newPVOP(OP_TRANS, 0, (char*)tbl); - complement = delete = squash = 0; + complement = Delete = squash = 0; while (*s == 'c' || *s == 'd' || *s == 's') { if (*s == 'c') complement = OPpTRANS_COMPLEMENT; else if (*s == 'd') - delete = OPpTRANS_DELETE; + Delete = OPpTRANS_DELETE; else squash = OPpTRANS_SQUASH; s++; } - op->op_private = delete|squash|complement; + o->op_private = Delete|squash|complement; - lex_op = op; + lex_op = o; yylval.ival = OP_TRANS; return s; } -static char * -scan_heredoc(s) -register char *s; +STATIC char * +scan_heredoc(register char *s) { + dTHR; SV *herewas; I32 op_type = OP_SCALAR; I32 len; SV *tmpstr; char term; register char *d; + register char *e; char *peek; - int outer = (rsfp && !lex_inwhat); + int outer = (rsfp && !(lex_inwhat == OP_SCALAR)); s += 2; d = tokenbuf; + e = tokenbuf + sizeof tokenbuf - 1; if (!outer) *d++ = '\n'; for (peek = s; *peek == ' ' || *peek == '\t'; peek++) ; if (*peek && strchr("`'\"",*peek)) { s = peek; term = *s++; - s = cpytill(d,s,bufend,term,&len); + s = delimcpy(d, e, s, bufend, term, &len); + d += len; if (s < bufend) s++; - d += len; } else { if (*s == '\\') @@ -4706,9 +5126,13 @@ register char *s; term = '"'; if (!isALNUM(*s)) deprecate("bare << to mean <<\"\""); - while (isALNUM(*s)) - *d++ = *s++; - } /* assuming tokenbuf won't clobber */ + for (; isALNUM(*s); s++) { + if (d < e) + *d++ = *s; + } + } + if (d >= tokenbuf + sizeof tokenbuf - 1) + croak("Delimiter for here document is too long"); *d++ = '\n'; *d = '\0'; len = d - tokenbuf; @@ -4719,7 +5143,7 @@ register char *s; s--, herewas = newSVpv(s,d-s); s += SvCUR(herewas); - tmpstr = NEWSV(87,80); + tmpstr = NEWSV(87,79); sv_upgrade(tmpstr, SVt_PVIV); if (term == '\'') { op_type = OP_CONST; @@ -4747,6 +5171,8 @@ register char *s; } sv_setpvn(tmpstr,d+1,s-d); s += len - 1; + curcop->cop_line++; /* the preceding stmt passes a newline */ + sv_catpvn(herewas,s,bufend-s); sv_setsv(linestr,herewas); oldoldbufptr = oldbufptr = bufptr = s = linestart = SvPVX(linestr); @@ -4761,7 +5187,7 @@ register char *s; missingterm(tokenbuf); } curcop->cop_line++; - if (perldb && curstash != debstash) { + if (PERLDB_LINE && curstash != debstash) { SV *sv = NEWSV(88,0); sv_upgrade(sv, SVt_PVMG); @@ -4793,42 +5219,93 @@ register char *s; return s; } -static char * -scan_inputsymbol(start) -char *start; +/* scan_inputsymbol + takes: current position in input buffer + returns: new position in input buffer + side-effects: yylval and lex_op are set. + + This code handles: + + <> read from ARGV + read from filehandle + read from package qualified filehandle + read from package qualified filehandle + <$fh> read from filehandle in $fh + <*.h> filename glob + +*/ + +STATIC char * +scan_inputsymbol(char *start) { - register char *s = start; + register char *s = start; /* current position in buffer */ register char *d; + register char *e; I32 len; - d = tokenbuf; - s = cpytill(d, s+1, bufend, '>', &len); - if (s < bufend) - s++; - else + d = tokenbuf; /* start of temp holding space */ + e = tokenbuf + sizeof tokenbuf; /* end of temp holding space */ + s = delimcpy(d, e, s + 1, bufend, '>', &len); /* extract until > */ + + /* die if we didn't have space for the contents of the <>, + or if it didn't end + */ + + if (len >= sizeof tokenbuf) + croak("Excessively long <> operator"); + if (s >= bufend) croak("Unterminated <> operator"); + s++; + + /* check for <$fh> + Remember, only scalar variables are interpreted as filehandles by + this code. Anything more complex (e.g., <$fh{$num}>) will be + treated as a glob() call. + This code makes use of the fact that except for the $ at the front, + a scalar variable and a filehandle look the same. + */ if (*d == '$' && d[1]) d++; + + /* allow or */ while (*d && (isALNUM(*d) || *d == '\'' || *d == ':')) d++; + + /* If we've tried to read what we allow filehandles to look like, and + there's still text left, then it must be a glob() and not a getline. + Use scan_str to pull out the stuff between the <> and treat it + as nothing more than a string. + */ + if (d - tokenbuf != len) { yylval.ival = OP_GLOB; set_csh(); s = scan_str(start); if (!s) - croak("Glob not terminated"); + croak("Glob not terminated"); return s; } else { + /* we're in a filehandle read situation */ d = tokenbuf; + + /* turn <> into */ if (!len) (void)strcpy(d,"ARGV"); + + /* if <$fh>, create the ops to turn the variable into a + filehandle + */ if (*d == '$') { I32 tmp; - if (tmp = pad_findmy(d)) { - OP *op = newOP(OP_PADSV, 0); - op->op_targ = tmp; - lex_op = (OP*)newUNOP(OP_READLINE, 0, newUNOP(OP_RV2GV, 0, op)); + + /* try to find it in the pad for this block, otherwise find + add symbol table ops + */ + if ((tmp = pad_findmy(d)) != NOT_IN_PAD) { + OP *o = newOP(OP_PADSV, 0); + o->op_targ = tmp; + lex_op = (OP*)newUNOP(OP_READLINE, 0, newUNOP(OP_RV2GV, 0, o)); } else { GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV); @@ -4837,91 +5314,178 @@ char *start; newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)))); } + /* we created the ops in lex_op, so make yylval.ival a null op */ yylval.ival = OP_NULL; } + + /* If it's none of the above, it must be a literal filehandle + ( or ) so build a simple readline OP */ else { GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO); lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); yylval.ival = OP_NULL; } } + return s; } -static char * -scan_str(start) -char *start; -{ - SV *sv; - char *tmps; - register char *s = start; - register char term; - register char *to; - I32 brackets = 1; +/* scan_str + takes: start position in buffer + returns: position to continue reading from buffer + side-effects: multi_start, multi_close, lex_repl or lex_stuff, and + updates the read buffer. + + This subroutine pulls a string out of the input. It is called for: + q single quotes q(literal text) + ' single quotes 'literal text' + qq double quotes qq(interpolate $here please) + " double quotes "interpolate $here please" + qx backticks qx(/bin/ls -l) + ` backticks `/bin/ls -l` + qw quote words @EXPORT_OK = qw( func() $spam ) + m// regexp match m/this/ + s/// regexp substitute s/this/that/ + tr/// string transliterate tr/this/that/ + y/// string transliterate y/this/that/ + ($*@) sub prototypes sub foo ($) + <> readline or globs , <>, <$fh>, or <*.c> + + In most of these cases (all but <>, patterns and transliterate) + yylex() calls scan_str(). m// makes yylex() call scan_pat() which + calls scan_str(). s/// makes yylex() call scan_subst() which calls + scan_str(). tr/// and y/// make yylex() call scan_trans() which + calls scan_str(). + + It skips whitespace before the string starts, and treats the first + character as the delimiter. If the delimiter is one of ([{< then + the corresponding "close" character )]}> is used as the closing + delimiter. It allows quoting of delimiters, and if the string has + balanced delimiters ([{<>}]) it allows nesting. + + The lexer always reads these strings into lex_stuff, except in the + case of the operators which take *two* arguments (s/// and tr///) + when it checks to see if lex_stuff is full (presumably with the 1st + arg to s or tr) and if so puts the string into lex_repl. + +*/ + +STATIC char * +scan_str(char *start) +{ + dTHR; + SV *sv; /* scalar value: string */ + char *tmps; /* temp string, used for delimiter matching */ + register char *s = start; /* current position in the buffer */ + register char term; /* terminating character */ + register char *to; /* current position in the sv's data */ + I32 brackets = 1; /* bracket nesting level */ + + /* skip space before the delimiter */ if (isSPACE(*s)) s = skipspace(s); + + /* mark where we are, in case we need to report errors */ CLINE; + + /* after skipping whitespace, the next character is the terminator */ term = *s; + /* mark where we are */ multi_start = curcop->cop_line; multi_open = term; + + /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) term = tmps[5]; multi_close = term; - sv = NEWSV(87,80); + /* create a new SV to hold the contents. 87 is leak category, I'm + assuming. 79 is the SV's initial length. What a random number. */ + sv = NEWSV(87,79); sv_upgrade(sv, SVt_PVIV); SvIVX(sv) = term; (void)SvPOK_only(sv); /* validate pointer */ + + /* move past delimiter and try to read a complete string */ s++; for (;;) { + /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (bufend - s) + 1); + /* set 'to' to the next character in the sv's string */ to = SvPVX(sv)+SvCUR(sv); + + /* if open delimiter is the close delimiter read unbridle */ if (multi_open == multi_close) { for (; s < bufend; s++,to++) { + /* embedded newlines increment the current line number */ if (*s == '\n' && !rsfp) curcop->cop_line++; + /* handle quoted delimiters */ if (*s == '\\' && s+1 < bufend && term != '\\') { if (s[1] == term) s++; + /* any other quotes are simply copied straight through */ else *to++ = *s++; } + /* terminate when run out of buffer (the for() condition), or + have found the terminator */ else if (*s == term) break; *to = *s; } } + + /* if the terminator isn't the same as the start character (e.g., + matched brackets), we have to allow more in the quoting, and + be prepared for nested brackets. + */ else { + /* read until we run out of string, or we find the terminator */ for (; s < bufend; s++,to++) { + /* embedded newlines increment the line count */ if (*s == '\n' && !rsfp) curcop->cop_line++; - if (*s == '\\' && s+1 < bufend && term != '\\') { - if (s[1] == term) + /* backslashes can escape the open or closing characters */ + if (*s == '\\' && s+1 < bufend) { + if ((s[1] == multi_open) || (s[1] == multi_close)) s++; else *to++ = *s++; } - else if (*s == term && --brackets <= 0) + /* allow nested opens and closes */ + else if (*s == multi_close && --brackets <= 0) break; else if (*s == multi_open) brackets++; *to = *s; } } + /* terminate the copied string and update the sv's end-of-string */ *to = '\0'; SvCUR_set(sv, to - SvPVX(sv)); - if (s < bufend) break; /* string ends on this line? */ + /* + * this next chunk reads more into the buffer if we're not done yet + */ + + if (s < bufend) break; /* handle case where we are done yet :-) */ + /* if we're out of file, or a read fails, bail and reset the current + line marker so we can report where the unterminated string began + */ if (!rsfp || !(oldoldbufptr = oldbufptr = s = linestart = filter_gets(linestr, rsfp, 0))) { sv_free(sv); curcop->cop_line = multi_start; return Nullch; } + /* we read a line, so increment our line counter */ curcop->cop_line++; - if (perldb && curstash != debstash) { + + /* update debugger info */ + if (PERLDB_LINE && curstash != debstash) { SV *sv = NEWSV(88,0); sv_upgrade(sv, SVt_PVMG); @@ -4929,14 +5493,26 @@ char *start; av_store(GvAV(curcop->cop_filegv), (I32)curcop->cop_line, sv); } + + /* having changed the buffer, we must update bufend */ bufend = SvPVX(linestr) + SvCUR(linestr); } + + /* at this point, we have successfully read the delimited string */ + multi_end = curcop->cop_line; s++; + + /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { SvLEN_set(sv, SvCUR(sv) + 1); Renew(SvPVX(sv), SvLEN(sv), char); } + + /* decide whether this is the first or second quoted string we've read + for this op + */ + if (lex_stuff) lex_repl = sv; else @@ -4944,130 +5520,256 @@ char *start; return s; } +/* + scan_num + takes: pointer to position in buffer + returns: pointer to new position in buffer + side-effects: builds ops for the constant in yylval.op + + Read a number in any of the formats that Perl accepts: + + 0(x[0-7A-F]+)|([0-7]+) + [\d_]+(\.[\d_]*)?[Ee](\d+) + + Underbars (_) are allowed in decimal numbers. If -w is on, + underbars before a decimal point must be at three digit intervals. + + Like most scan_ routines, it uses the tokenbuf buffer to hold the + thing it reads. + + If it reads a number without a decimal point or an exponent, it will + try converting the number to an integer and see if it can do so + without loss of precision. +*/ + char * -scan_num(start) -char *start; +scan_num(char *start) { - register char *s = start; - register char *d; - I32 tryiv; - double value; - SV *sv; - I32 floatit; - char *lastub = 0; + register char *s = start; /* current position in buffer */ + register char *d; /* destination in temp buffer */ + register char *e; /* end of temp buffer */ + I32 tryiv; /* used to see if it can be an int */ + double value; /* number read, as a double */ + SV *sv; /* place to put the converted number */ + I32 floatit; /* boolean: int or float? */ + char *lastub = 0; /* position of last underbar */ + static char number_too_long[] = "Number too long"; + + /* We use the first character to decide what type of number this is */ switch (*s) { default: - croak("panic: scan_num"); + croak("panic: scan_num"); + + /* if it starts with a 0, it could be an octal number, a decimal in + 0.13 disguise, or a hexadecimal number. + */ case '0': { + /* variables: + u holds the "number so far" + shift the power of 2 of the base (hex == 4, octal == 3) + overflowed was the number more than we can hold? + + Shift is used when we add a digit. It also serves as an "are + we in octal or hex?" indicator to disallow hex characters when + in octal mode. + */ UV u; I32 shift; bool overflowed = FALSE; + /* check for hex */ if (s[1] == 'x') { shift = 4; s += 2; } + /* check for a decimal in disguise */ else if (s[1] == '.') goto decimal; + /* so it must be octal */ else shift = 3; u = 0; + + /* read the rest of the octal number */ for (;;) { - UV n, b; + UV n, b; /* n is used in the overflow test, b is the digit we're adding on */ switch (*s) { + + /* if we don't mention it, we're done */ default: goto out; + + /* _ are ignored */ case '_': s++; break; + + /* 8 and 9 are not octal */ case '8': case '9': if (shift != 4) yyerror("Illegal octal digit"); /* FALL THROUGH */ + + /* octal digits */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - b = *s++ & 15; + b = *s++ & 15; /* ASCII digit -> value of digit */ goto digit; + + /* hex digits */ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + /* make sure they said 0x */ if (shift != 4) goto out; b = (*s++ & 7) + 9; + + /* Prepare to put the digit we have onto the end + of the number so far. We check for overflows. + */ + digit: - n = u << shift; - if (!overflowed && (n >> shift) != u) { + n = u << shift; /* make room for the digit */ + if (!overflowed && (n >> shift) != u + && !(hints & HINT_NEW_BINARY)) { warn("Integer overflow in %s number", (shift == 4) ? "hex" : "octal"); overflowed = TRUE; } - u = n | b; + u = n | b; /* add the digit to the end */ break; } } + + /* if we get here, we had success: make a scalar value from + the number. + */ out: sv = NEWSV(92,0); sv_setuv(sv, u); + if ( hints & HINT_NEW_BINARY) + sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL); } break; + + /* + handle decimal numbers. + we're also sent here when we read a 0 as the first digit + */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': decimal: d = tokenbuf; + e = tokenbuf + sizeof tokenbuf - 6; /* room for various punctuation */ floatit = FALSE; + + /* read next group of digits and _ and copy into d */ while (isDIGIT(*s) || *s == '_') { + /* skip underscores, checking for misplaced ones + if -w is on + */ if (*s == '_') { if (dowarn && lastub && s - lastub != 3) warn("Misplaced _ in number"); lastub = ++s; } - else + else { + /* check for end of fixed-length buffer */ + if (d >= e) + croak(number_too_long); + /* if we're ok, copy the character */ *d++ = *s++; + } } + + /* final misplaced underbar check */ if (dowarn && lastub && s - lastub != 3) warn("Misplaced _ in number"); + + /* read a decimal portion if there is one. avoid + 3..5 being interpreted as the number 3. followed + by .5 + */ if (*s == '.' && s[1] != '.') { floatit = TRUE; *d++ = *s++; - while (isDIGIT(*s) || *s == '_') { - if (*s == '_') - s++; - else - *d++ = *s++; + + /* copy, ignoring underbars, until we run out of + digits. Note: no misplaced underbar checks! + */ + for (; isDIGIT(*s) || *s == '_'; s++) { + /* fixed length buffer check */ + if (d >= e) + croak(number_too_long); + if (*s != '_') + *d++ = *s; } } + + /* read exponent part, if present */ if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) { floatit = TRUE; s++; + + /* regardless of whether user said 3E5 or 3e5, use lower 'e' */ *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */ + + /* allow positive or negative exponent */ if (*s == '+' || *s == '-') *d++ = *s++; - while (isDIGIT(*s)) + + /* read digits of exponent (no underbars :-) */ + while (isDIGIT(*s)) { + if (d >= e) + croak(number_too_long); *d++ = *s++; + } } + + /* terminate the string */ *d = '\0'; + + /* make an sv from the string */ sv = NEWSV(92,0); + /* reset numeric locale in case we were earlier left in Swaziland */ SET_NUMERIC_STANDARD(); value = atof(tokenbuf); + + /* + See if we can make do with an integer value without loss of + precision. We use I_V to cast to an int, because some + compilers have issues. Then we try casting it back and see + if it was the same. We only do this if we know we + specifically read an integer. + + Note: if floatit is true, then we don't need to do the + conversion at all. + */ tryiv = I_V(value); if (!floatit && (double)tryiv == value) sv_setiv(sv, tryiv); else sv_setnv(sv, value); + if ( floatit ? (hints & HINT_NEW_FLOAT) : (hints & HINT_NEW_INTEGER) ) + sv = new_constant(tokenbuf, d - tokenbuf, + (floatit ? "float" : "integer"), sv, Nullsv, NULL); break; } + /* make the op for the constant and return */ + yylval.opval = newSVOP(OP_CONST, 0, sv); return s; } -static char * -scan_formline(s) -register char *s; +STATIC char * +scan_formline(register char *s) { + dTHR; register char *eol; register char *t; SV *stuff = newSVpv("",0); @@ -5134,8 +5836,8 @@ register char *s; return s; } -static void -set_csh() +STATIC void +set_csh(void) { #ifdef CSH if (!cshlen) @@ -5144,10 +5846,9 @@ set_csh() } I32 -start_subparse(is_format, flags) -I32 is_format; -U32 flags; +start_subparse(I32 is_format, U32 flags) { + dTHR; I32 oldsavestack_ix = savestack_ix; CV* outsidecv = compcv; AV* comppadlist; @@ -5172,13 +5873,21 @@ U32 flags; CvFLAGS(compcv) |= flags; comppad = newAV(); + av_push(comppad, Nullsv); + curpad = AvARRAY(comppad); comppad_name = newAV(); comppad_name_fill = 0; min_intro_pending = 0; - av_push(comppad, Nullsv); - curpad = AvARRAY(comppad); padix = 0; subline = curcop->cop_line; +#ifdef USE_THREADS + av_store(comppad_name, 0, newSVpv("@_", 2)); + curpad[0] = (SV*)newAV(); + SvPADMY_on(curpad[0]); /* XXX Needed? */ + CvOWNER(compcv) = 0; + New(666, CvMUTEXP(compcv), 1, perl_mutex); + MUTEX_INIT(CvMUTEXP(compcv)); +#endif /* USE_THREADS */ comppadlist = newAV(); AvREAL_off(comppadlist); @@ -5186,15 +5895,20 @@ U32 flags; av_store(comppadlist, 1, (SV*)comppad); CvPADLIST(compcv) = comppadlist; - CvOUTSIDE(compcv) = (CV*)SvREFCNT_inc((SV*)outsidecv); + CvOUTSIDE(compcv) = (CV*)SvREFCNT_inc(outsidecv); +#ifdef USE_THREADS + CvOWNER(compcv) = 0; + New(666, CvMUTEXP(compcv), 1, perl_mutex); + MUTEX_INIT(CvMUTEXP(compcv)); +#endif /* USE_THREADS */ return oldsavestack_ix; } int -yywarn(s) -char *s; +yywarn(char *s) { + dTHR; --error_count; in_eval |= 2; yyerror(s); @@ -5203,13 +5917,13 @@ char *s; } int -yyerror(s) -char *s; +yyerror(char *s) { - char wbuf[40]; + dTHR; char *where = NULL; char *context = NULL; int contlen = -1; + SV *msg; if (!yychar || (yychar == ';' && !rsfp)) where = "at EOF"; @@ -5238,35 +5952,40 @@ char *s; else where = "within string"; } - else if (yychar < 32) - (void)sprintf(where = wbuf, "next char ^%c", toCTRL(yychar)); - else if (isPRINT_LC(yychar)) - (void)sprintf(where = wbuf, "next char %c", yychar); - else - (void)sprintf(where = wbuf, "next char \\%03o", yychar & 255); - if (contlen == -1) - contlen = strlen(where); - (void)sprintf(buf, "%s at %s line %d, ", - s, SvPVX(GvSV(curcop->cop_filegv)), curcop->cop_line); + else { + SV *where_sv = sv_2mortal(newSVpv("next char ", 0)); + if (yychar < 32) + sv_catpvf(where_sv, "^%c", toCTRL(yychar)); + else if (isPRINT_LC(yychar)) + sv_catpvf(where_sv, "%c", yychar); + else + sv_catpvf(where_sv, "\\%03o", yychar & 255); + where = SvPVX(where_sv); + } + msg = sv_2mortal(newSVpv(s, 0)); + sv_catpvf(msg, " at %_ line %ld, ", + GvSV(curcop->cop_filegv), (long)curcop->cop_line); if (context) - (void)sprintf(buf+strlen(buf), "near \"%.*s\"\n", contlen, context); + sv_catpvf(msg, "near \"%.*s\"\n", contlen, context); else - (void)sprintf(buf+strlen(buf), "%s\n", where); + sv_catpvf(msg, "%s\n", where); if (multi_start < multi_end && (U32)(curcop->cop_line - multi_end) <= 1) { - sprintf(buf+strlen(buf), + sv_catpvf(msg, " (Might be a runaway multi-line %c%c string starting on line %ld)\n", (int)multi_open,(int)multi_close,(long)multi_start); multi_end = 0; } if (in_eval & 2) - warn("%s",buf); + warn("%_", msg); else if (in_eval) - sv_catpv(GvSV(errgv),buf); + sv_catsv(ERRSV, msg); else - PerlIO_printf(PerlIO_stderr(), "%s",buf); + PerlIO_write(PerlIO_stderr(), SvPVX(msg), SvCUR(msg)); if (++error_count >= 10) - croak("%s has too many errors.\n", - SvPVX(GvSV(curcop->cop_filegv))); + croak("%_ has too many errors.\n", GvSV(curcop->cop_filegv)); in_my = 0; + in_my_stash = Nullhv; return 0; } + +