3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
288 const char *name = Nullch;
289 enum token_type type = TOKENTYPE_NONE;
290 const struct debug_tokens *p;
291 SV* const report = newSVpvs("<== ");
293 for (p = debug_tokens; p->token; p++) {
294 if (p->token == (int)rv) {
301 Perl_sv_catpv(aTHX_ report, name);
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
305 sv_catpvs(report, "EOF");
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
322 case TOKENTYPE_OPVAL:
324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
325 PL_op_name[yylval.opval->op_type]);
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
333 sv_catpvs(report, "(opval=null)");
336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
342 /* print the buffer with suitable escapes */
345 S_printbuf(pTHX_ const char* fmt, const char* s)
347 SV* const tmp = newSVpvs("");
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
362 S_ao(pTHX_ int toketype)
365 if (*PL_bufptr == '=') {
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
392 S_no_op(pTHX_ const char *what, char *s)
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
403 if (ckWARN_d(WARN_SYNTAX)) {
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
434 S_missingterm(pTHX_ char *s)
440 char * const nl = strrchr(s,'\n');
446 iscntrl(PL_multi_close)
448 PL_multi_close < 32 || PL_multi_close == 127
452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
457 *tmpbuf = (char)PL_multi_close;
461 q = strchr(s,'"') ? '\'' : '"';
462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
465 #define FEATURE_IS_ENABLED(name) \
466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
473 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
476 HV * const hinthv = GvHV(PL_hintgv);
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
488 Perl_deprecate(pTHX_ const char *s)
490 if (ckWARN(WARN_DEPRECATED))
491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
495 Perl_deprecate_old(pTHX_ const char *s)
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
507 "Use of %s is deprecated", s);
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
515 #ifdef PERL_CR_FILTER
519 register const char *s = SvPVX_const(sv);
520 register const char * const e = s + SvCUR(sv);
521 /* outer loop optimized to do nothing if there are no CR-LFs */
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
528 if (*s == '\r' && s[1] == '\n')
539 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
542 if (count > 0 && !maxlen)
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
555 Perl_lex_start(pTHX_ SV *line)
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
566 SAVEVPTR(PL_lex_inpat);
567 SAVEI32(PL_lex_inwhat);
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
574 SAVEI32(PL_nexttoke);
576 SAVECOPLINE(PL_curcop);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
590 SAVEI32(PL_sublex_info.sub_inwhat);
591 SAVESPTR(PL_lex_repl);
593 SAVEINT(PL_lex_expect);
595 PL_lex_state = LEX_NORMAL;
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
602 *PL_lex_casestack = '\0';
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
610 PL_sublex_info.sub_inwhat = 0;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
614 s = SvPV_const(PL_linestr, len);
615 if (!len || s[len-1] != ';') {
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 sv_catpvs(PL_linestr, "\n;");
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
623 PL_last_lop = PL_last_uni = Nullch;
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
637 PL_doextract = FALSE;
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
644 * increments the current line number in CopLINE(PL_curcop) and checks
645 * to see whether the line starts with a comment of the form
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
651 S_incline(pTHX_ char *s)
659 CopLINE_inc(PL_curcop);
662 while (SPACE_OR_TAB(*s)) s++;
663 if (strnEQ(s, "line", 4))
667 if (SPACE_OR_TAB(*s))
671 while (SPACE_OR_TAB(*s)) s++;
677 while (SPACE_OR_TAB(*s))
679 if (*s == '"' && (t = strchr(s+1, '"'))) {
684 for (t = s; !isSPACE(*t); t++) ;
687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
696 const char * const cf = CopFILE(PL_curcop);
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 CopFILE_free(PL_curcop);
733 CopFILE_set(PL_curcop, s);
736 CopLINE_set(PL_curcop, atoi(n)-1);
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
746 S_skipspace(pTHX_ register char *s)
749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
750 while (s < PL_bufend && SPACE_OR_TAB(*s))
756 SSize_t oldprevlen, oldoldprevlen;
757 SSize_t oldloplen = 0, oldunilen = 0;
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
769 if (PL_in_eval && !PL_rsfp) {
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
784 /* try to recharge the buffer */
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 /* end of file. Add on the -p or -n magic */
791 ";}continue{print or die qq(-p destination: $!\\n);}");
792 PL_minus_n = PL_minus_p = 0;
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
799 sv_setpvn(PL_linestr,";", 1);
801 /* reset variables for next time we lex */
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
805 PL_last_lop = PL_last_uni = Nullch;
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
819 (void)PerlIO_close(PL_rsfp);
824 /* not at end of file, so we only read another line */
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 oldunilen = PL_last_uni - PL_bufend;
831 oldloplen = PL_last_lop - PL_bufend;
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
838 PL_last_uni = s + oldunilen;
840 PL_last_lop = s + oldloplen;
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
847 SV * const sv = NEWSV(85,0);
849 sv_upgrade(sv, SVt_PVMG);
850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
884 if (ckWARN_d(WARN_AMBIGUOUS)){
887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
888 "Warning: Use of \"%s\" without parentheses is ambiguous",
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
899 #define LOP(f,x) return lop(f,x,s)
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
910 S_lop(pTHX_ I32 f, int x, char *s)
917 PL_last_lop = PL_oldbufptr;
918 PL_last_lop_op = (OPCODE)f;
920 return REPORT(LSTOP);
927 return REPORT(LSTOP);
932 * When the lexer realizes it knows the next token (for instance,
933 * it is reordering tokens for the parser) then it can call S_force_next
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
940 S_force_next(pTHX_ I32 type)
943 PL_nexttype[PL_nexttoke] = type;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
953 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
956 SV * const sv = newSVpvn(start,len);
957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
969 * char *start : buffer position (must be within PL_linestr)
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
975 * int allow_initial_tick : used by the "sub" lexer only.
979 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
985 start = skipspace(start);
987 if (isIDFIRST_lazy_if(s,UTF) ||
988 (allow_pack && *s == ':') ||
989 (allow_initial_tick && *s == '\'') )
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
994 if (token == METHOD) {
999 PL_expect = XOPERATOR;
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1013 * Called when the lexer wants $foo *foo &foo etc, but the program
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1021 S_force_ident(pTHX_ register const char *s, int kind)
1025 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1026 PL_nextval[PL_nexttoke].opval = o;
1029 o->op_private = OPpCONST_ENTERED;
1030 /* XXX see note in pp_entereval() for why we forgo typo
1031 warnings if the symbol must be introduced in an eval.
1033 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1034 kind == '$' ? SVt_PV :
1035 kind == '@' ? SVt_PVAV :
1036 kind == '%' ? SVt_PVHV :
1044 Perl_str_to_version(pTHX_ SV *sv)
1049 const char *start = SvPV_const(sv,len);
1050 const char * const end = start + len;
1051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1052 while (start < end) {
1056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1061 retval += ((NV)n)/nshift;
1070 * Forces the next token to be a version number.
1071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1072 * and if "guessing" is TRUE, then no new token is created (and the caller
1073 * must use an alternative parsing method).
1077 S_force_version(pTHX_ char *s, int guessing)
1080 OP *version = Nullop;
1089 while (isDIGIT(*d) || *d == '_' || *d == '.')
1091 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1093 s = scan_num(s, &yylval);
1094 version = yylval.opval;
1095 ver = cSVOPx(version)->op_sv;
1096 if (SvPOK(ver) && !SvNIOK(ver)) {
1097 SvUPGRADE(ver, SVt_PVNV);
1098 SvNV_set(ver, str_to_version(ver));
1099 SvNOK_on(ver); /* hint that it is a version */
1106 /* NOTE: The parser sees the package name and the VERSION swapped */
1107 PL_nextval[PL_nexttoke].opval = version;
1115 * Tokenize a quoted string passed in as an SV. It finds the next
1116 * chunk, up to end of string or a backslash. It may make a new
1117 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1122 S_tokeq(pTHX_ SV *sv)
1126 register char *send;
1134 s = SvPV_force(sv, len);
1135 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1138 while (s < send && *s != '\\')
1143 if ( PL_hints & HINT_NEW_STRING ) {
1144 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1150 if (s + 1 < send && (s[1] == '\\'))
1151 s++; /* all that, just for this */
1156 SvCUR_set(sv, d - SvPVX_const(sv));
1158 if ( PL_hints & HINT_NEW_STRING )
1159 return new_constant(NULL, 0, "q", sv, pv, "q");
1164 * Now come three functions related to double-quote context,
1165 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1166 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1167 * interact with PL_lex_state, and create fake ( ... ) argument lists
1168 * to handle functions and concatenation.
1169 * They assume that whoever calls them will be setting up a fake
1170 * join call, because each subthing puts a ',' after it. This lets
1173 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1175 * (I'm not sure whether the spurious commas at the end of lcfirst's
1176 * arguments and join's arguments are created or not).
1181 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1183 * Pattern matching will set PL_lex_op to the pattern-matching op to
1184 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1186 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1188 * Everything else becomes a FUNC.
1190 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1191 * had an OP_CONST or OP_READLINE). This just sets us up for a
1192 * call to S_sublex_push().
1196 S_sublex_start(pTHX)
1199 register const I32 op_type = yylval.ival;
1201 if (op_type == OP_NULL) {
1202 yylval.opval = PL_lex_op;
1206 if (op_type == OP_CONST || op_type == OP_READLINE) {
1207 SV *sv = tokeq(PL_lex_stuff);
1209 if (SvTYPE(sv) == SVt_PVIV) {
1210 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1212 const char *p = SvPV_const(sv, len);
1213 SV * const nsv = newSVpvn(p, len);
1219 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1220 PL_lex_stuff = Nullsv;
1221 /* Allow <FH> // "foo" */
1222 if (op_type == OP_READLINE)
1223 PL_expect = XTERMORDORDOR;
1227 PL_sublex_info.super_state = PL_lex_state;
1228 PL_sublex_info.sub_inwhat = op_type;
1229 PL_sublex_info.sub_op = PL_lex_op;
1230 PL_lex_state = LEX_INTERPPUSH;
1234 yylval.opval = PL_lex_op;
1244 * Create a new scope to save the lexing state. The scope will be
1245 * ended in S_sublex_done. Returns a '(', starting the function arguments
1246 * to the uc, lc, etc. found before.
1247 * Sets PL_lex_state to LEX_INTERPCONCAT.
1256 PL_lex_state = PL_sublex_info.super_state;
1257 SAVEI32(PL_lex_dojoin);
1258 SAVEI32(PL_lex_brackets);
1259 SAVEI32(PL_lex_casemods);
1260 SAVEI32(PL_lex_starts);
1261 SAVEI32(PL_lex_state);
1262 SAVEVPTR(PL_lex_inpat);
1263 SAVEI32(PL_lex_inwhat);
1264 SAVECOPLINE(PL_curcop);
1265 SAVEPPTR(PL_bufptr);
1266 SAVEPPTR(PL_bufend);
1267 SAVEPPTR(PL_oldbufptr);
1268 SAVEPPTR(PL_oldoldbufptr);
1269 SAVEPPTR(PL_last_lop);
1270 SAVEPPTR(PL_last_uni);
1271 SAVEPPTR(PL_linestart);
1272 SAVESPTR(PL_linestr);
1273 SAVEGENERICPV(PL_lex_brackstack);
1274 SAVEGENERICPV(PL_lex_casestack);
1276 PL_linestr = PL_lex_stuff;
1277 PL_lex_stuff = Nullsv;
1279 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1280 = SvPVX(PL_linestr);
1281 PL_bufend += SvCUR(PL_linestr);
1282 PL_last_lop = PL_last_uni = Nullch;
1283 SAVEFREESV(PL_linestr);
1285 PL_lex_dojoin = FALSE;
1286 PL_lex_brackets = 0;
1287 Newx(PL_lex_brackstack, 120, char);
1288 Newx(PL_lex_casestack, 12, char);
1289 PL_lex_casemods = 0;
1290 *PL_lex_casestack = '\0';
1292 PL_lex_state = LEX_INTERPCONCAT;
1293 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1295 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1296 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1297 PL_lex_inpat = PL_sublex_info.sub_op;
1299 PL_lex_inpat = Nullop;
1306 * Restores lexer state after a S_sublex_push.
1313 if (!PL_lex_starts++) {
1314 SV * const sv = newSVpvs("");
1315 if (SvUTF8(PL_linestr))
1317 PL_expect = XOPERATOR;
1318 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1322 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1323 PL_lex_state = LEX_INTERPCASEMOD;
1327 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1328 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1329 PL_linestr = PL_lex_repl;
1331 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1332 PL_bufend += SvCUR(PL_linestr);
1333 PL_last_lop = PL_last_uni = Nullch;
1334 SAVEFREESV(PL_linestr);
1335 PL_lex_dojoin = FALSE;
1336 PL_lex_brackets = 0;
1337 PL_lex_casemods = 0;
1338 *PL_lex_casestack = '\0';
1340 if (SvEVALED(PL_lex_repl)) {
1341 PL_lex_state = LEX_INTERPNORMAL;
1343 /* we don't clear PL_lex_repl here, so that we can check later
1344 whether this is an evalled subst; that means we rely on the
1345 logic to ensure sublex_done() is called again only via the
1346 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1349 PL_lex_state = LEX_INTERPCONCAT;
1350 PL_lex_repl = Nullsv;
1356 PL_bufend = SvPVX(PL_linestr);
1357 PL_bufend += SvCUR(PL_linestr);
1358 PL_expect = XOPERATOR;
1359 PL_sublex_info.sub_inwhat = 0;
1367 Extracts a pattern, double-quoted string, or transliteration. This
1370 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1371 processing a pattern (PL_lex_inpat is true), a transliteration
1372 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1374 Returns a pointer to the character scanned up to. Iff this is
1375 advanced from the start pointer supplied (ie if anything was
1376 successfully parsed), will leave an OP for the substring scanned
1377 in yylval. Caller must intuit reason for not parsing further
1378 by looking at the next characters herself.
1382 double-quoted style: \r and \n
1383 regexp special ones: \D \s
1385 backrefs: \1 (deprecated in substitution replacements)
1386 case and quoting: \U \Q \E
1387 stops on @ and $, but not for $ as tail anchor
1389 In transliterations:
1390 characters are VERY literal, except for - not at the start or end
1391 of the string, which indicates a range. scan_const expands the
1392 range to the full set of intermediate characters.
1394 In double-quoted strings:
1396 double-quoted style: \r and \n
1398 backrefs: \1 (deprecated)
1399 case and quoting: \U \Q \E
1402 scan_const does *not* construct ops to handle interpolated strings.
1403 It stops processing as soon as it finds an embedded $ or @ variable
1404 and leaves it to the caller to work out what's going on.
1406 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1408 $ in pattern could be $foo or could be tail anchor. Assumption:
1409 it's a tail anchor if $ is the last thing in the string, or if it's
1410 followed by one of ")| \n\t"
1412 \1 (backreferences) are turned into $1
1414 The structure of the code is
1415 while (there's a character to process) {
1416 handle transliteration ranges
1417 skip regexp comments
1418 skip # initiated comments in //x patterns
1419 check for embedded @foo
1420 check for embedded scalars
1422 leave intact backslashes from leave (below)
1423 deprecate \1 in strings and sub replacements
1424 handle string-changing backslashes \l \U \Q \E, etc.
1425 switch (what was escaped) {
1426 handle - in a transliteration (becomes a literal -)
1427 handle \132 octal characters
1428 handle 0x15 hex characters
1429 handle \cV (control V)
1430 handle printf backslashes (\f, \r, \n, etc)
1432 } (end if backslash)
1433 } (end while character to read)
1438 S_scan_const(pTHX_ char *start)
1441 register char *send = PL_bufend; /* end of the constant */
1442 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1443 register char *s = start; /* start of the constant */
1444 register char *d = SvPVX(sv); /* destination for copies */
1445 bool dorange = FALSE; /* are we in a translit range? */
1446 bool didrange = FALSE; /* did we just finish a range? */
1447 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1448 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1451 UV literal_endpoint = 0;
1454 const char *leaveit = /* set of acceptably-backslashed characters */
1456 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1459 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1460 /* If we are doing a trans and we know we want UTF8 set expectation */
1461 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1462 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1466 while (s < send || dorange) {
1467 /* get transliterations out of the way (they're most literal) */
1468 if (PL_lex_inwhat == OP_TRANS) {
1469 /* expand a range A-Z to the full set of characters. AIE! */
1471 I32 i; /* current expanded character */
1472 I32 min; /* first character in range */
1473 I32 max; /* last character in range */
1476 char * const c = (char*)utf8_hop((U8*)d, -1);
1480 *c = (char)UTF_TO_NATIVE(0xff);
1481 /* mark the range as done, and continue */
1487 i = d - SvPVX_const(sv); /* remember current offset */
1488 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1489 d = SvPVX(sv) + i; /* refresh d after realloc */
1490 d -= 2; /* eat the first char and the - */
1492 min = (U8)*d; /* first char in range */
1493 max = (U8)d[1]; /* last char in range */
1497 "Invalid range \"%c-%c\" in transliteration operator",
1498 (char)min, (char)max);
1502 if (literal_endpoint == 2 &&
1503 ((isLOWER(min) && isLOWER(max)) ||
1504 (isUPPER(min) && isUPPER(max)))) {
1506 for (i = min; i <= max; i++)
1508 *d++ = NATIVE_TO_NEED(has_utf8,i);
1510 for (i = min; i <= max; i++)
1512 *d++ = NATIVE_TO_NEED(has_utf8,i);
1517 for (i = min; i <= max; i++)
1520 /* mark the range as done, and continue */
1524 literal_endpoint = 0;
1529 /* range begins (ignore - as first or last char) */
1530 else if (*s == '-' && s+1 < send && s != start) {
1532 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1535 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1545 literal_endpoint = 0;
1550 /* if we get here, we're not doing a transliteration */
1552 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1553 except for the last char, which will be done separately. */
1554 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1556 while (s+1 < send && *s != ')')
1557 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1559 else if (s[2] == '{' /* This should match regcomp.c */
1560 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1563 char *regparse = s + (s[2] == '{' ? 3 : 4);
1566 while (count && (c = *regparse)) {
1567 if (c == '\\' && regparse[1])
1575 if (*regparse != ')')
1576 regparse--; /* Leave one char for continuation. */
1577 while (s < regparse)
1578 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1582 /* likewise skip #-initiated comments in //x patterns */
1583 else if (*s == '#' && PL_lex_inpat &&
1584 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1585 while (s+1 < send && *s != '\n')
1586 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1589 /* check for embedded arrays
1590 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1592 else if (*s == '@' && s[1]
1593 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1596 /* check for embedded scalars. only stop if we're sure it's a
1599 else if (*s == '$') {
1600 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1602 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1603 break; /* in regexp, $ might be tail anchor */
1606 /* End of else if chain - OP_TRANS rejoin rest */
1609 if (*s == '\\' && s+1 < send) {
1612 /* some backslashes we leave behind */
1613 if (*leaveit && *s && strchr(leaveit, *s)) {
1614 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1615 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1619 /* deprecate \1 in strings and substitution replacements */
1620 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1621 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1623 if (ckWARN(WARN_SYNTAX))
1624 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1629 /* string-change backslash escapes */
1630 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1635 /* if we get here, it's either a quoted -, or a digit */
1638 /* quoted - in transliterations */
1640 if (PL_lex_inwhat == OP_TRANS) {
1650 Perl_warner(aTHX_ packWARN(WARN_MISC),
1651 "Unrecognized escape \\%c passed through",
1653 /* default action is to copy the quoted character */
1654 goto default_action;
1657 /* \132 indicates an octal constant */
1658 case '0': case '1': case '2': case '3':
1659 case '4': case '5': case '6': case '7':
1663 uv = grok_oct(s, &len, &flags, NULL);
1666 goto NUM_ESCAPE_INSERT;
1668 /* \x24 indicates a hex constant */
1672 char* const e = strchr(s, '}');
1673 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1674 PERL_SCAN_DISALLOW_PREFIX;
1679 yyerror("Missing right brace on \\x{}");
1683 uv = grok_hex(s, &len, &flags, NULL);
1689 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1690 uv = grok_hex(s, &len, &flags, NULL);
1696 /* Insert oct or hex escaped character.
1697 * There will always enough room in sv since such
1698 * escapes will be longer than any UTF-8 sequence
1699 * they can end up as. */
1701 /* We need to map to chars to ASCII before doing the tests
1704 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1705 if (!has_utf8 && uv > 255) {
1706 /* Might need to recode whatever we have
1707 * accumulated so far if it contains any
1710 * (Can't we keep track of that and avoid
1711 * this rescan? --jhi)
1715 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1716 if (!NATIVE_IS_INVARIANT(*c)) {
1721 const STRLEN offset = d - SvPVX_const(sv);
1723 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1727 while (src >= (const U8 *)SvPVX_const(sv)) {
1728 if (!NATIVE_IS_INVARIANT(*src)) {
1729 const U8 ch = NATIVE_TO_ASCII(*src);
1730 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1731 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1741 if (has_utf8 || uv > 255) {
1742 d = (char*)uvchr_to_utf8((U8*)d, uv);
1744 if (PL_lex_inwhat == OP_TRANS &&
1745 PL_sublex_info.sub_op) {
1746 PL_sublex_info.sub_op->op_private |=
1747 (PL_lex_repl ? OPpTRANS_FROM_UTF
1760 /* \N{LATIN SMALL LETTER A} is a named character */
1764 char* e = strchr(s, '}');
1770 yyerror("Missing right brace on \\N{}");
1774 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1776 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1777 PERL_SCAN_DISALLOW_PREFIX;
1780 uv = grok_hex(s, &len, &flags, NULL);
1782 goto NUM_ESCAPE_INSERT;
1784 res = newSVpvn(s + 1, e - s - 1);
1785 res = new_constant( Nullch, 0, "charnames",
1786 res, Nullsv, "\\N{...}" );
1788 sv_utf8_upgrade(res);
1789 str = SvPV_const(res,len);
1790 #ifdef EBCDIC_NEVER_MIND
1791 /* charnames uses pack U and that has been
1792 * recently changed to do the below uni->native
1793 * mapping, so this would be redundant (and wrong,
1794 * the code point would be doubly converted).
1795 * But leave this in just in case the pack U change
1796 * gets revoked, but the semantics is still
1797 * desireable for charnames. --jhi */
1799 UV uv = utf8_to_uvchr((const U8*)str, 0);
1802 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1804 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1805 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1806 str = SvPV_const(res, len);
1810 if (!has_utf8 && SvUTF8(res)) {
1811 const char * const ostart = SvPVX_const(sv);
1812 SvCUR_set(sv, d - ostart);
1815 sv_utf8_upgrade(sv);
1816 /* this just broke our allocation above... */
1817 SvGROW(sv, (STRLEN)(send - start));
1818 d = SvPVX(sv) + SvCUR(sv);
1821 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1822 const char * const odest = SvPVX_const(sv);
1824 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1825 d = SvPVX(sv) + (d - odest);
1827 Copy(str, d, len, char);
1834 yyerror("Missing braces on \\N{}");
1837 /* \c is a control character */
1846 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1849 yyerror("Missing control char name in \\c");
1853 /* printf-style backslashes, formfeeds, newlines, etc */
1855 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1858 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1861 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1864 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1867 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1870 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1873 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1879 } /* end if (backslash) */
1886 /* If we started with encoded form, or already know we want it
1887 and then encode the next character */
1888 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1890 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1891 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1894 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1895 const STRLEN off = d - SvPVX_const(sv);
1896 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1898 d = (char*)uvchr_to_utf8((U8*)d, uv);
1902 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1904 } /* while loop to process each character */
1906 /* terminate the string and set up the sv */
1908 SvCUR_set(sv, d - SvPVX_const(sv));
1909 if (SvCUR(sv) >= SvLEN(sv))
1910 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1913 if (PL_encoding && !has_utf8) {
1914 sv_recode_to_utf8(sv, PL_encoding);
1920 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1921 PL_sublex_info.sub_op->op_private |=
1922 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1926 /* shrink the sv if we allocated more than we used */
1927 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1928 SvPV_shrink_to_cur(sv);
1931 /* return the substring (via yylval) only if we parsed anything */
1932 if (s > PL_bufptr) {
1933 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1934 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1936 ( PL_lex_inwhat == OP_TRANS
1938 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1941 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1948 * Returns TRUE if there's more to the expression (e.g., a subscript),
1951 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1953 * ->[ and ->{ return TRUE
1954 * { and [ outside a pattern are always subscripts, so return TRUE
1955 * if we're outside a pattern and it's not { or [, then return FALSE
1956 * if we're in a pattern and the first char is a {
1957 * {4,5} (any digits around the comma) returns FALSE
1958 * if we're in a pattern and the first char is a [
1960 * [SOMETHING] has a funky algorithm to decide whether it's a
1961 * character class or not. It has to deal with things like
1962 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1963 * anything else returns TRUE
1966 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1969 S_intuit_more(pTHX_ register char *s)
1972 if (PL_lex_brackets)
1974 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1976 if (*s != '{' && *s != '[')
1981 /* In a pattern, so maybe we have {n,m}. */
1998 /* On the other hand, maybe we have a character class */
2001 if (*s == ']' || *s == '^')
2004 /* this is terrifying, and it works */
2005 int weight = 2; /* let's weigh the evidence */
2007 unsigned char un_char = 255, last_un_char;
2008 const char * const send = strchr(s,']');
2009 char tmpbuf[sizeof PL_tokenbuf * 4];
2011 if (!send) /* has to be an expression */
2014 Zero(seen,256,char);
2017 else if (isDIGIT(*s)) {
2019 if (isDIGIT(s[1]) && s[2] == ']')
2025 for (; s < send; s++) {
2026 last_un_char = un_char;
2027 un_char = (unsigned char)*s;
2032 weight -= seen[un_char] * 10;
2033 if (isALNUM_lazy_if(s+1,UTF)) {
2034 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2035 if ((int)strlen(tmpbuf) > 1
2036 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2041 else if (*s == '$' && s[1] &&
2042 strchr("[#!%*<>()-=",s[1])) {
2043 if (/*{*/ strchr("])} =",s[2]))
2052 if (strchr("wds]",s[1]))
2054 else if (seen['\''] || seen['"'])
2056 else if (strchr("rnftbxcav",s[1]))
2058 else if (isDIGIT(s[1])) {
2060 while (s[1] && isDIGIT(s[1]))
2070 if (strchr("aA01! ",last_un_char))
2072 if (strchr("zZ79~",s[1]))
2074 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2075 weight -= 5; /* cope with negative subscript */
2078 if (!isALNUM(last_un_char)
2079 && !(last_un_char == '$' || last_un_char == '@'
2080 || last_un_char == '&')
2081 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2086 if (keyword(tmpbuf, d - tmpbuf))
2089 if (un_char == last_un_char + 1)
2091 weight -= seen[un_char];
2096 if (weight >= 0) /* probably a character class */
2106 * Does all the checking to disambiguate
2108 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2109 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2111 * First argument is the stuff after the first token, e.g. "bar".
2113 * Not a method if bar is a filehandle.
2114 * Not a method if foo is a subroutine prototyped to take a filehandle.
2115 * Not a method if it's really "Foo $bar"
2116 * Method if it's "foo $bar"
2117 * Not a method if it's really "print foo $bar"
2118 * Method if it's really "foo package::" (interpreted as package->foo)
2119 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2120 * Not a method if bar is a filehandle or package, but is quoted with
2125 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2128 char *s = start + (*start == '$');
2129 char tmpbuf[sizeof PL_tokenbuf];
2134 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2138 const char *proto = SvPVX_const(cv);
2149 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2150 /* start is the beginning of the possible filehandle/object,
2151 * and s is the end of it
2152 * tmpbuf is a copy of it
2155 if (*start == '$') {
2156 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2161 return *s == '(' ? FUNCMETH : METHOD;
2163 if (!keyword(tmpbuf, len)) {
2164 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2169 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2170 if (indirgv && GvCVu(indirgv))
2172 /* filehandle or package name makes it a method */
2173 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2175 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2176 return 0; /* no assumptions -- "=>" quotes bearword */
2178 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2179 newSVpvn(tmpbuf,len));
2180 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2184 return *s == '(' ? FUNCMETH : METHOD;
2192 * Return a string of Perl code to load the debugger. If PERL5DB
2193 * is set, it will return the contents of that, otherwise a
2194 * compile-time require of perl5db.pl.
2202 const char * const pdb = PerlEnv_getenv("PERL5DB");
2206 SETERRNO(0,SS_NORMAL);
2207 return "BEGIN { require 'perl5db.pl' }";
2213 /* Encoded script support. filter_add() effectively inserts a
2214 * 'pre-processing' function into the current source input stream.
2215 * Note that the filter function only applies to the current source file
2216 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2218 * The datasv parameter (which may be NULL) can be used to pass
2219 * private data to this instance of the filter. The filter function
2220 * can recover the SV using the FILTER_DATA macro and use it to
2221 * store private buffers and state information.
2223 * The supplied datasv parameter is upgraded to a PVIO type
2224 * and the IoDIRP/IoANY field is used to store the function pointer,
2225 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2226 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2227 * private use must be set using malloc'd pointers.
2231 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2237 if (!PL_rsfp_filters)
2238 PL_rsfp_filters = newAV();
2240 datasv = NEWSV(255,0);
2241 SvUPGRADE(datasv, SVt_PVIO);
2242 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2243 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2244 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2245 IoANY(datasv), SvPV_nolen(datasv)));
2246 av_unshift(PL_rsfp_filters, 1);
2247 av_store(PL_rsfp_filters, 0, datasv) ;
2252 /* Delete most recently added instance of this filter function. */
2254 Perl_filter_del(pTHX_ filter_t funcp)
2260 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2262 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2264 /* if filter is on top of stack (usual case) just pop it off */
2265 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2266 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2267 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2268 IoANY(datasv) = (void *)NULL;
2269 sv_free(av_pop(PL_rsfp_filters));
2273 /* we need to search for the correct entry and clear it */
2274 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2278 /* Invoke the idxth filter function for the current rsfp. */
2279 /* maxlen 0 = read one text line */
2281 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2287 if (!PL_rsfp_filters)
2289 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2290 /* Provide a default input filter to make life easy. */
2291 /* Note that we append to the line. This is handy. */
2292 DEBUG_P(PerlIO_printf(Perl_debug_log,
2293 "filter_read %d: from rsfp\n", idx));
2297 const int old_len = SvCUR(buf_sv);
2299 /* ensure buf_sv is large enough */
2300 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2301 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2302 if (PerlIO_error(PL_rsfp))
2303 return -1; /* error */
2305 return 0 ; /* end of file */
2307 SvCUR_set(buf_sv, old_len + len) ;
2310 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2311 if (PerlIO_error(PL_rsfp))
2312 return -1; /* error */
2314 return 0 ; /* end of file */
2317 return SvCUR(buf_sv);
2319 /* Skip this filter slot if filter has been deleted */
2320 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2321 DEBUG_P(PerlIO_printf(Perl_debug_log,
2322 "filter_read %d: skipped (filter deleted)\n",
2324 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2326 /* Get function pointer hidden within datasv */
2327 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2328 DEBUG_P(PerlIO_printf(Perl_debug_log,
2329 "filter_read %d: via function %p (%s)\n",
2330 idx, datasv, SvPV_nolen_const(datasv)));
2331 /* Call function. The function is expected to */
2332 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2333 /* Return: <0:error, =0:eof, >0:not eof */
2334 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2338 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2341 #ifdef PERL_CR_FILTER
2342 if (!PL_rsfp_filters) {
2343 filter_add(S_cr_textfilter,NULL);
2346 if (PL_rsfp_filters) {
2348 SvCUR_set(sv, 0); /* start with empty line */
2349 if (FILTER_READ(0, sv, 0) > 0)
2350 return ( SvPVX(sv) ) ;
2355 return (sv_gets(sv, fp, append));
2359 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2364 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2368 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2369 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2371 return GvHV(gv); /* Foo:: */
2374 /* use constant CLASS => 'MyClass' */
2375 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2377 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2378 pkgname = SvPV_nolen_const(sv);
2382 return gv_stashpv(pkgname, FALSE);
2386 S_tokenize_use(pTHX_ int is_use, char *s) {
2388 if (PL_expect != XSTATE)
2389 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2390 is_use ? "use" : "no"));
2392 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2393 s = force_version(s, TRUE);
2394 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2395 PL_nextval[PL_nexttoke].opval = Nullop;
2398 else if (*s == 'v') {
2399 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2400 s = force_version(s, FALSE);
2404 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2405 s = force_version(s, FALSE);
2407 yylval.ival = is_use;
2411 static const char* const exp_name[] =
2412 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2413 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2420 Works out what to call the token just pulled out of the input
2421 stream. The yacc parser takes care of taking the ops we return and
2422 stitching them into a tree.
2428 if read an identifier
2429 if we're in a my declaration
2430 croak if they tried to say my($foo::bar)
2431 build the ops for a my() declaration
2432 if it's an access to a my() variable
2433 are we in a sort block?
2434 croak if my($a); $a <=> $b
2435 build ops for access to a my() variable
2436 if in a dq string, and they've said @foo and we can't find @foo
2438 build ops for a bareword
2439 if we already built the token before, use it.
2444 #pragma segment Perl_yylex
2450 register char *s = PL_bufptr;
2456 SV* tmp = newSVpvs("");
2457 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2458 (IV)CopLINE(PL_curcop),
2459 lex_state_names[PL_lex_state],
2460 exp_name[PL_expect],
2461 pv_display(tmp, s, strlen(s), 0, 60));
2464 /* check if there's an identifier for us to look at */
2465 if (PL_pending_ident)
2466 return REPORT(S_pending_ident(aTHX));
2468 /* no identifier pending identification */
2470 switch (PL_lex_state) {
2472 case LEX_NORMAL: /* Some compilers will produce faster */
2473 case LEX_INTERPNORMAL: /* code if we comment these out. */
2477 /* when we've already built the next token, just pull it out of the queue */
2480 yylval = PL_nextval[PL_nexttoke];
2482 PL_lex_state = PL_lex_defer;
2483 PL_expect = PL_lex_expect;
2484 PL_lex_defer = LEX_NORMAL;
2486 return REPORT(PL_nexttype[PL_nexttoke]);
2488 /* interpolated case modifiers like \L \U, including \Q and \E.
2489 when we get here, PL_bufptr is at the \
2491 case LEX_INTERPCASEMOD:
2493 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2494 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2496 /* handle \E or end of string */
2497 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2499 if (PL_lex_casemods) {
2500 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2501 PL_lex_casestack[PL_lex_casemods] = '\0';
2503 if (PL_bufptr != PL_bufend
2504 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2506 PL_lex_state = LEX_INTERPCONCAT;
2510 if (PL_bufptr != PL_bufend)
2512 PL_lex_state = LEX_INTERPCONCAT;
2516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2517 "### Saw case modifier\n"); });
2519 if (s[1] == '\\' && s[2] == 'E') {
2521 PL_lex_state = LEX_INTERPCONCAT;
2526 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2527 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2528 if ((*s == 'L' || *s == 'U') &&
2529 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2530 PL_lex_casestack[--PL_lex_casemods] = '\0';
2533 if (PL_lex_casemods > 10)
2534 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2535 PL_lex_casestack[PL_lex_casemods++] = *s;
2536 PL_lex_casestack[PL_lex_casemods] = '\0';
2537 PL_lex_state = LEX_INTERPCONCAT;
2538 PL_nextval[PL_nexttoke].ival = 0;
2541 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2543 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2545 PL_nextval[PL_nexttoke].ival = OP_LC;
2547 PL_nextval[PL_nexttoke].ival = OP_UC;
2549 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2551 Perl_croak(aTHX_ "panic: yylex");
2555 if (PL_lex_starts) {
2558 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2559 if (PL_lex_casemods == 1 && PL_lex_inpat)
2568 case LEX_INTERPPUSH:
2569 return REPORT(sublex_push());
2571 case LEX_INTERPSTART:
2572 if (PL_bufptr == PL_bufend)
2573 return REPORT(sublex_done());
2574 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2575 "### Interpolated variable\n"); });
2577 PL_lex_dojoin = (*PL_bufptr == '@');
2578 PL_lex_state = LEX_INTERPNORMAL;
2579 if (PL_lex_dojoin) {
2580 PL_nextval[PL_nexttoke].ival = 0;
2582 force_ident("\"", '$');
2583 PL_nextval[PL_nexttoke].ival = 0;
2585 PL_nextval[PL_nexttoke].ival = 0;
2587 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2590 if (PL_lex_starts++) {
2592 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2593 if (!PL_lex_casemods && PL_lex_inpat)
2600 case LEX_INTERPENDMAYBE:
2601 if (intuit_more(PL_bufptr)) {
2602 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2608 if (PL_lex_dojoin) {
2609 PL_lex_dojoin = FALSE;
2610 PL_lex_state = LEX_INTERPCONCAT;
2613 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2614 && SvEVALED(PL_lex_repl))
2616 if (PL_bufptr != PL_bufend)
2617 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2618 PL_lex_repl = Nullsv;
2621 case LEX_INTERPCONCAT:
2623 if (PL_lex_brackets)
2624 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2626 if (PL_bufptr == PL_bufend)
2627 return REPORT(sublex_done());
2629 if (SvIVX(PL_linestr) == '\'') {
2630 SV *sv = newSVsv(PL_linestr);
2633 else if ( PL_hints & HINT_NEW_RE )
2634 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2635 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2639 s = scan_const(PL_bufptr);
2641 PL_lex_state = LEX_INTERPCASEMOD;
2643 PL_lex_state = LEX_INTERPSTART;
2646 if (s != PL_bufptr) {
2647 PL_nextval[PL_nexttoke] = yylval;
2650 if (PL_lex_starts++) {
2651 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2652 if (!PL_lex_casemods && PL_lex_inpat)
2665 PL_lex_state = LEX_NORMAL;
2666 s = scan_formline(PL_bufptr);
2667 if (!PL_lex_formbrack)
2673 PL_oldoldbufptr = PL_oldbufptr;
2679 if (isIDFIRST_lazy_if(s,UTF))
2681 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2684 goto fake_eof; /* emulate EOF on ^D or ^Z */
2689 if (PL_lex_brackets) {
2690 yyerror(PL_lex_formbrack
2691 ? "Format not terminated"
2692 : "Missing right curly or square bracket");
2694 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2695 "### Tokener got EOF\n");
2699 if (s++ < PL_bufend)
2700 goto retry; /* ignore stray nulls */
2703 if (!PL_in_eval && !PL_preambled) {
2704 PL_preambled = TRUE;
2705 sv_setpv(PL_linestr,incl_perldb());
2706 if (SvCUR(PL_linestr))
2707 sv_catpvs(PL_linestr,";");
2709 while(AvFILLp(PL_preambleav) >= 0) {
2710 SV *tmpsv = av_shift(PL_preambleav);
2711 sv_catsv(PL_linestr, tmpsv);
2712 sv_catpvs(PL_linestr, ";");
2715 sv_free((SV*)PL_preambleav);
2716 PL_preambleav = NULL;
2718 if (PL_minus_n || PL_minus_p) {
2719 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2721 sv_catpvs(PL_linestr,"chomp;");
2724 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2725 || *PL_splitstr == '"')
2726 && strchr(PL_splitstr + 1, *PL_splitstr))
2727 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2729 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2730 bytes can be used as quoting characters. :-) */
2731 const char *splits = PL_splitstr;
2732 sv_catpvs(PL_linestr, "our @F=split(q\0");
2735 if (*splits == '\\')
2736 sv_catpvn(PL_linestr, splits, 1);
2737 sv_catpvn(PL_linestr, splits, 1);
2738 } while (*splits++);
2739 /* This loop will embed the trailing NUL of
2740 PL_linestr as the last thing it does before
2742 sv_catpvs(PL_linestr, ");");
2746 sv_catpvs(PL_linestr,"our @F=split(' ');");
2750 sv_catpvs(PL_linestr,"use feature ':5.10';");
2751 sv_catpvs(PL_linestr, "\n");
2752 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2754 PL_last_lop = PL_last_uni = Nullch;
2755 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2756 SV * const sv = NEWSV(85,0);
2758 sv_upgrade(sv, SVt_PVMG);
2759 sv_setsv(sv,PL_linestr);
2762 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2767 bof = PL_rsfp ? TRUE : FALSE;
2768 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2771 if (PL_preprocess && !PL_in_eval)
2772 (void)PerlProc_pclose(PL_rsfp);
2773 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2774 PerlIO_clearerr(PL_rsfp);
2776 (void)PerlIO_close(PL_rsfp);
2778 PL_doextract = FALSE;
2780 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2781 sv_setpv(PL_linestr,PL_minus_p
2782 ? ";}continue{print;}" : ";}");
2783 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2785 PL_last_lop = PL_last_uni = Nullch;
2786 PL_minus_n = PL_minus_p = 0;
2789 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2790 PL_last_lop = PL_last_uni = Nullch;
2791 sv_setpvn(PL_linestr,"",0);
2792 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2794 /* If it looks like the start of a BOM or raw UTF-16,
2795 * check if it in fact is. */
2801 #ifdef PERLIO_IS_STDIO
2802 # ifdef __GNU_LIBRARY__
2803 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2804 # define FTELL_FOR_PIPE_IS_BROKEN
2808 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2809 # define FTELL_FOR_PIPE_IS_BROKEN
2814 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2815 /* This loses the possibility to detect the bof
2816 * situation on perl -P when the libc5 is being used.
2817 * Workaround? Maybe attach some extra state to PL_rsfp?
2820 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2822 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2826 s = swallow_bom((U8*)s);
2830 /* Incest with pod. */
2831 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2832 sv_setpvn(PL_linestr, "", 0);
2833 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2834 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2835 PL_last_lop = PL_last_uni = Nullch;
2836 PL_doextract = FALSE;
2840 } while (PL_doextract);
2841 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2842 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2843 SV * const sv = NEWSV(85,0);
2845 sv_upgrade(sv, SVt_PVMG);
2846 sv_setsv(sv,PL_linestr);
2849 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2851 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2852 PL_last_lop = PL_last_uni = Nullch;
2853 if (CopLINE(PL_curcop) == 1) {
2854 while (s < PL_bufend && isSPACE(*s))
2856 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2860 if (*s == '#' && *(s+1) == '!')
2862 #ifdef ALTERNATE_SHEBANG
2864 static char const as[] = ALTERNATE_SHEBANG;
2865 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2866 d = s + (sizeof(as) - 1);
2868 #endif /* ALTERNATE_SHEBANG */
2877 while (*d && !isSPACE(*d))
2881 #ifdef ARG_ZERO_IS_SCRIPT
2882 if (ipathend > ipath) {
2884 * HP-UX (at least) sets argv[0] to the script name,
2885 * which makes $^X incorrect. And Digital UNIX and Linux,
2886 * at least, set argv[0] to the basename of the Perl
2887 * interpreter. So, having found "#!", we'll set it right.
2890 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2891 assert(SvPOK(x) || SvGMAGICAL(x));
2892 if (sv_eq(x, CopFILESV(PL_curcop))) {
2893 sv_setpvn(x, ipath, ipathend - ipath);
2899 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2900 const char * const lstart = SvPV_const(x,llen);
2902 bstart += blen - llen;
2903 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2904 sv_setpvn(x, ipath, ipathend - ipath);
2909 TAINT_NOT; /* $^X is always tainted, but that's OK */
2911 #endif /* ARG_ZERO_IS_SCRIPT */
2916 d = instr(s,"perl -");
2918 d = instr(s,"perl");
2920 /* avoid getting into infinite loops when shebang
2921 * line contains "Perl" rather than "perl" */
2923 for (d = ipathend-4; d >= ipath; --d) {
2924 if ((*d == 'p' || *d == 'P')
2925 && !ibcmp(d, "perl", 4))
2935 #ifdef ALTERNATE_SHEBANG
2937 * If the ALTERNATE_SHEBANG on this system starts with a
2938 * character that can be part of a Perl expression, then if
2939 * we see it but not "perl", we're probably looking at the
2940 * start of Perl code, not a request to hand off to some
2941 * other interpreter. Similarly, if "perl" is there, but
2942 * not in the first 'word' of the line, we assume the line
2943 * contains the start of the Perl program.
2945 if (d && *s != '#') {
2946 const char *c = ipath;
2947 while (*c && !strchr("; \t\r\n\f\v#", *c))
2950 d = Nullch; /* "perl" not in first word; ignore */
2952 *s = '#'; /* Don't try to parse shebang line */
2954 #endif /* ALTERNATE_SHEBANG */
2955 #ifndef MACOS_TRADITIONAL
2960 !instr(s,"indir") &&
2961 instr(PL_origargv[0],"perl"))
2968 while (s < PL_bufend && isSPACE(*s))
2970 if (s < PL_bufend) {
2971 Newxz(newargv,PL_origargc+3,char*);
2973 while (s < PL_bufend && !isSPACE(*s))
2976 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2979 newargv = PL_origargv;
2982 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2984 Perl_croak(aTHX_ "Can't exec %s", ipath);
2988 while (*d && !isSPACE(*d)) d++;
2989 while (SPACE_OR_TAB(*d)) d++;
2992 const bool switches_done = PL_doswitches;
2993 const U32 oldpdb = PL_perldb;
2994 const bool oldn = PL_minus_n;
2995 const bool oldp = PL_minus_p;
2998 if (*d == 'M' || *d == 'm' || *d == 'C') {
2999 const char * const m = d;
3000 while (*d && !isSPACE(*d)) d++;
3001 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3004 d = moreswitches(d);
3006 if (PL_doswitches && !switches_done) {
3007 int argc = PL_origargc;
3008 char **argv = PL_origargv;
3011 } while (argc && argv[0][0] == '-' && argv[0][1]);
3012 init_argv_symbols(argc,argv);
3014 if ((PERLDB_LINE && !oldpdb) ||
3015 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3016 /* if we have already added "LINE: while (<>) {",
3017 we must not do it again */
3019 sv_setpvn(PL_linestr, "", 0);
3020 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3021 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3022 PL_last_lop = PL_last_uni = Nullch;
3023 PL_preambled = FALSE;
3025 (void)gv_fetchfile(PL_origfilename);
3032 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3034 PL_lex_state = LEX_FORMLINE;
3039 #ifdef PERL_STRICT_CR
3040 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3042 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3044 case ' ': case '\t': case '\f': case 013:
3045 #ifdef MACOS_TRADITIONAL
3052 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3053 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3054 /* handle eval qq[#line 1 "foo"\n ...] */
3055 CopLINE_dec(PL_curcop);
3059 while (s < d && *s != '\n')
3063 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3064 Perl_croak(aTHX_ "panic: input overflow");
3066 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3068 PL_lex_state = LEX_FORMLINE;
3078 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3086 while (s < PL_bufend && SPACE_OR_TAB(*s))
3089 if (strnEQ(s,"=>",2)) {
3090 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3091 DEBUG_T( { S_printbuf(aTHX_
3092 "### Saw unary minus before =>, forcing word %s\n", s);
3094 OPERATOR('-'); /* unary minus */
3096 PL_last_uni = PL_oldbufptr;
3098 case 'r': ftst = OP_FTEREAD; break;
3099 case 'w': ftst = OP_FTEWRITE; break;
3100 case 'x': ftst = OP_FTEEXEC; break;
3101 case 'o': ftst = OP_FTEOWNED; break;
3102 case 'R': ftst = OP_FTRREAD; break;
3103 case 'W': ftst = OP_FTRWRITE; break;
3104 case 'X': ftst = OP_FTREXEC; break;
3105 case 'O': ftst = OP_FTROWNED; break;
3106 case 'e': ftst = OP_FTIS; break;
3107 case 'z': ftst = OP_FTZERO; break;
3108 case 's': ftst = OP_FTSIZE; break;
3109 case 'f': ftst = OP_FTFILE; break;
3110 case 'd': ftst = OP_FTDIR; break;
3111 case 'l': ftst = OP_FTLINK; break;
3112 case 'p': ftst = OP_FTPIPE; break;
3113 case 'S': ftst = OP_FTSOCK; break;
3114 case 'u': ftst = OP_FTSUID; break;
3115 case 'g': ftst = OP_FTSGID; break;
3116 case 'k': ftst = OP_FTSVTX; break;
3117 case 'b': ftst = OP_FTBLK; break;
3118 case 'c': ftst = OP_FTCHR; break;
3119 case 't': ftst = OP_FTTTY; break;
3120 case 'T': ftst = OP_FTTEXT; break;
3121 case 'B': ftst = OP_FTBINARY; break;
3122 case 'M': case 'A': case 'C':
3123 gv_fetchpv("\024",GV_ADD, SVt_PV);
3125 case 'M': ftst = OP_FTMTIME; break;
3126 case 'A': ftst = OP_FTATIME; break;
3127 case 'C': ftst = OP_FTCTIME; break;
3135 PL_last_lop_op = (OPCODE)ftst;
3136 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3137 "### Saw file test %c\n", (int)tmp);
3142 /* Assume it was a minus followed by a one-letter named
3143 * subroutine call (or a -bareword), then. */
3144 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3145 "### '-%c' looked like a file test but was not\n",
3152 const char tmp = *s++;
3155 if (PL_expect == XOPERATOR)
3160 else if (*s == '>') {
3163 if (isIDFIRST_lazy_if(s,UTF)) {
3164 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3172 if (PL_expect == XOPERATOR)
3175 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3177 OPERATOR('-'); /* unary minus */
3183 const char tmp = *s++;
3186 if (PL_expect == XOPERATOR)
3191 if (PL_expect == XOPERATOR)
3194 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3201 if (PL_expect != XOPERATOR) {
3202 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3203 PL_expect = XOPERATOR;
3204 force_ident(PL_tokenbuf, '*');
3217 if (PL_expect == XOPERATOR) {
3221 PL_tokenbuf[0] = '%';
3222 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3223 if (!PL_tokenbuf[1]) {
3226 PL_pending_ident = '%';
3237 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3238 && FEATURE_IS_ENABLED("~~"))
3245 const char tmp = *s++;
3251 goto just_a_word_zero_gv;
3254 switch (PL_expect) {
3257 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3259 PL_bufptr = s; /* update in case we back off */
3265 PL_expect = XTERMBLOCK;
3269 while (isIDFIRST_lazy_if(s,UTF)) {
3271 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3272 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3273 if (tmp < 0) tmp = -tmp;
3289 d = scan_str(d,TRUE,TRUE);
3291 /* MUST advance bufptr here to avoid bogus
3292 "at end of line" context messages from yyerror().
3294 PL_bufptr = s + len;
3295 yyerror("Unterminated attribute parameter in attribute list");
3298 return REPORT(0); /* EOF indicator */
3302 SV *sv = newSVpvn(s, len);
3303 sv_catsv(sv, PL_lex_stuff);
3304 attrs = append_elem(OP_LIST, attrs,
3305 newSVOP(OP_CONST, 0, sv));
3306 SvREFCNT_dec(PL_lex_stuff);
3307 PL_lex_stuff = Nullsv;
3310 if (len == 6 && strnEQ(s, "unique", len)) {
3311 if (PL_in_my == KEY_our)
3313 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3315 ; /* skip to avoid loading attributes.pm */
3318 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3321 /* NOTE: any CV attrs applied here need to be part of
3322 the CVf_BUILTIN_ATTRS define in cv.h! */
3323 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3324 CvLVALUE_on(PL_compcv);
3325 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3326 CvLOCKED_on(PL_compcv);
3327 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3328 CvMETHOD_on(PL_compcv);
3329 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3330 CvASSERTION_on(PL_compcv);
3331 /* After we've set the flags, it could be argued that
3332 we don't need to do the attributes.pm-based setting
3333 process, and shouldn't bother appending recognized
3334 flags. To experiment with that, uncomment the
3335 following "else". (Note that's already been
3336 uncommented. That keeps the above-applied built-in
3337 attributes from being intercepted (and possibly
3338 rejected) by a package's attribute routines, but is
3339 justified by the performance win for the common case
3340 of applying only built-in attributes.) */
3342 attrs = append_elem(OP_LIST, attrs,
3343 newSVOP(OP_CONST, 0,
3347 if (*s == ':' && s[1] != ':')
3350 break; /* require real whitespace or :'s */
3354 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3355 if (*s != ';' && *s != '}' && *s != tmp
3356 && (tmp != '=' || *s != ')')) {
3357 const char q = ((*s == '\'') ? '"' : '\'');
3358 /* If here for an expression, and parsed no attrs, back
3360 if (tmp == '=' && !attrs) {
3364 /* MUST advance bufptr here to avoid bogus "at end of line"
3365 context messages from yyerror().
3369 ? Perl_form(aTHX_ "Invalid separator character "
3370 "%c%c%c in attribute list", q, *s, q)
3371 : "Unterminated attribute list" );
3379 PL_nextval[PL_nexttoke].opval = attrs;
3387 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3388 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3396 const char tmp = *s++;
3401 const char tmp = *s++;
3409 if (PL_lex_brackets <= 0)
3410 yyerror("Unmatched right square bracket");
3413 if (PL_lex_state == LEX_INTERPNORMAL) {
3414 if (PL_lex_brackets == 0) {
3415 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3416 PL_lex_state = LEX_INTERPEND;
3423 if (PL_lex_brackets > 100) {
3424 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3426 switch (PL_expect) {
3428 if (PL_lex_formbrack) {
3432 if (PL_oldoldbufptr == PL_last_lop)
3433 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3435 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3436 OPERATOR(HASHBRACK);
3438 while (s < PL_bufend && SPACE_OR_TAB(*s))
3441 PL_tokenbuf[0] = '\0';
3442 if (d < PL_bufend && *d == '-') {
3443 PL_tokenbuf[0] = '-';
3445 while (d < PL_bufend && SPACE_OR_TAB(*d))
3448 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3449 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3451 while (d < PL_bufend && SPACE_OR_TAB(*d))
3454 const char minus = (PL_tokenbuf[0] == '-');
3455 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3463 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3468 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3473 if (PL_oldoldbufptr == PL_last_lop)
3474 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3476 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3479 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3481 /* This hack is to get the ${} in the message. */
3483 yyerror("syntax error");
3486 OPERATOR(HASHBRACK);
3488 /* This hack serves to disambiguate a pair of curlies
3489 * as being a block or an anon hash. Normally, expectation
3490 * determines that, but in cases where we're not in a
3491 * position to expect anything in particular (like inside
3492 * eval"") we have to resolve the ambiguity. This code
3493 * covers the case where the first term in the curlies is a
3494 * quoted string. Most other cases need to be explicitly
3495 * disambiguated by prepending a "+" before the opening
3496 * curly in order to force resolution as an anon hash.
3498 * XXX should probably propagate the outer expectation
3499 * into eval"" to rely less on this hack, but that could
3500 * potentially break current behavior of eval"".
3504 if (*s == '\'' || *s == '"' || *s == '`') {
3505 /* common case: get past first string, handling escapes */
3506 for (t++; t < PL_bufend && *t != *s;)
3507 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3511 else if (*s == 'q') {
3514 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3517 /* skip q//-like construct */
3519 char open, close, term;
3522 while (t < PL_bufend && isSPACE(*t))
3524 /* check for q => */
3525 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3526 OPERATOR(HASHBRACK);
3530 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3534 for (t++; t < PL_bufend; t++) {
3535 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3537 else if (*t == open)
3541 for (t++; t < PL_bufend; t++) {
3542 if (*t == '\\' && t+1 < PL_bufend)
3544 else if (*t == close && --brackets <= 0)
3546 else if (*t == open)
3553 /* skip plain q word */
3554 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3557 else if (isALNUM_lazy_if(t,UTF)) {
3559 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3562 while (t < PL_bufend && isSPACE(*t))
3564 /* if comma follows first term, call it an anon hash */
3565 /* XXX it could be a comma expression with loop modifiers */
3566 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3567 || (*t == '=' && t[1] == '>')))
3568 OPERATOR(HASHBRACK);
3569 if (PL_expect == XREF)
3572 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3578 yylval.ival = CopLINE(PL_curcop);
3579 if (isSPACE(*s) || *s == '#')
3580 PL_copline = NOLINE; /* invalidate current command line number */
3585 if (PL_lex_brackets <= 0)
3586 yyerror("Unmatched right curly bracket");
3588 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3589 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3590 PL_lex_formbrack = 0;
3591 if (PL_lex_state == LEX_INTERPNORMAL) {
3592 if (PL_lex_brackets == 0) {
3593 if (PL_expect & XFAKEBRACK) {
3594 PL_expect &= XENUMMASK;
3595 PL_lex_state = LEX_INTERPEND;
3597 return yylex(); /* ignore fake brackets */
3599 if (*s == '-' && s[1] == '>')
3600 PL_lex_state = LEX_INTERPENDMAYBE;
3601 else if (*s != '[' && *s != '{')
3602 PL_lex_state = LEX_INTERPEND;
3605 if (PL_expect & XFAKEBRACK) {
3606 PL_expect &= XENUMMASK;
3608 return yylex(); /* ignore fake brackets */
3617 if (PL_expect == XOPERATOR) {
3618 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3619 && isIDFIRST_lazy_if(s,UTF))
3621 CopLINE_dec(PL_curcop);
3622 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3623 CopLINE_inc(PL_curcop);
3628 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3630 PL_expect = XOPERATOR;
3631 force_ident(PL_tokenbuf, '&');
3635 yylval.ival = (OPpENTERSUB_AMPER<<8);
3647 const char tmp = *s++;
3654 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3655 && strchr("+-*/%.^&|<",tmp))
3656 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3657 "Reversed %c= operator",(int)tmp);
3659 if (PL_expect == XSTATE && isALPHA(tmp) &&
3660 (s == PL_linestart+1 || s[-2] == '\n') )
3662 if (PL_in_eval && !PL_rsfp) {
3667 if (strnEQ(s,"=cut",4)) {
3681 PL_doextract = TRUE;
3685 if (PL_lex_brackets < PL_lex_formbrack) {
3687 #ifdef PERL_STRICT_CR
3688 for (t = s; SPACE_OR_TAB(*t); t++) ;
3690 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3692 if (*t == '\n' || *t == '#') {
3703 const char tmp = *s++;
3705 /* was this !=~ where !~ was meant?
3706 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3708 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3709 const char *t = s+1;
3711 while (t < PL_bufend && isSPACE(*t))
3714 if (*t == '/' || *t == '?' ||
3715 ((*t == 'm' || *t == 's' || *t == 'y')
3716 && !isALNUM(t[1])) ||
3717 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3718 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3719 "!=~ should be !~");
3729 if (PL_expect != XOPERATOR) {
3730 if (s[1] != '<' && !strchr(s,'>'))
3733 s = scan_heredoc(s);
3735 s = scan_inputsymbol(s);
3736 TERM(sublex_start());
3742 SHop(OP_LEFT_SHIFT);
3756 const char tmp = *s++;
3758 SHop(OP_RIGHT_SHIFT);
3768 if (PL_expect == XOPERATOR) {
3769 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3771 deprecate_old(commaless_variable_list);
3772 return REPORT(','); /* grandfather non-comma-format format */
3776 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3777 PL_tokenbuf[0] = '@';
3778 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3779 sizeof PL_tokenbuf - 1, FALSE);
3780 if (PL_expect == XOPERATOR)
3781 no_op("Array length", s);
3782 if (!PL_tokenbuf[1])
3784 PL_expect = XOPERATOR;
3785 PL_pending_ident = '#';
3789 PL_tokenbuf[0] = '$';
3790 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3791 sizeof PL_tokenbuf - 1, FALSE);
3792 if (PL_expect == XOPERATOR)
3794 if (!PL_tokenbuf[1]) {
3796 yyerror("Final $ should be \\$ or $name");
3800 /* This kludge not intended to be bulletproof. */
3801 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3802 yylval.opval = newSVOP(OP_CONST, 0,
3803 newSViv(PL_compiling.cop_arybase));
3804 yylval.opval->op_private = OPpCONST_ARYBASE;
3810 const char tmp = *s;
3811 if (PL_lex_state == LEX_NORMAL)
3814 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3815 && intuit_more(s)) {
3817 PL_tokenbuf[0] = '@';
3818 if (ckWARN(WARN_SYNTAX)) {
3821 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3824 PL_bufptr = skipspace(PL_bufptr);
3825 while (t < PL_bufend && *t != ']')
3827 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3828 "Multidimensional syntax %.*s not supported",
3829 (int)((t - PL_bufptr) + 1), PL_bufptr);
3833 else if (*s == '{') {
3835 PL_tokenbuf[0] = '%';
3836 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3837 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3839 char tmpbuf[sizeof PL_tokenbuf];
3840 for (t++; isSPACE(*t); t++) ;
3841 if (isIDFIRST_lazy_if(t,UTF)) {
3843 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3845 for (; isSPACE(*t); t++) ;
3846 if (*t == ';' && get_cv(tmpbuf, FALSE))
3847 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3848 "You need to quote \"%s\"",
3855 PL_expect = XOPERATOR;
3856 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3857 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3858 if (!islop || PL_last_lop_op == OP_GREPSTART)
3859 PL_expect = XOPERATOR;
3860 else if (strchr("$@\"'`q", *s))
3861 PL_expect = XTERM; /* e.g. print $fh "foo" */
3862 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3863 PL_expect = XTERM; /* e.g. print $fh &sub */
3864 else if (isIDFIRST_lazy_if(s,UTF)) {
3865 char tmpbuf[sizeof PL_tokenbuf];
3867 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3868 if ((t2 = keyword(tmpbuf, len))) {
3869 /* binary operators exclude handle interpretations */
3881 PL_expect = XTERM; /* e.g. print $fh length() */
3886 PL_expect = XTERM; /* e.g. print $fh subr() */
3889 else if (isDIGIT(*s))
3890 PL_expect = XTERM; /* e.g. print $fh 3 */
3891 else if (*s == '.' && isDIGIT(s[1]))
3892 PL_expect = XTERM; /* e.g. print $fh .3 */
3893 else if ((*s == '?' || *s == '-' || *s == '+')
3894 && !isSPACE(s[1]) && s[1] != '=')
3895 PL_expect = XTERM; /* e.g. print $fh -1 */
3896 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3898 PL_expect = XTERM; /* e.g. print $fh /.../
3899 XXX except DORDOR operator
3901 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3903 PL_expect = XTERM; /* print $fh <<"EOF" */
3906 PL_pending_ident = '$';
3910 if (PL_expect == XOPERATOR)
3912 PL_tokenbuf[0] = '@';
3913 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3914 if (!PL_tokenbuf[1]) {
3917 if (PL_lex_state == LEX_NORMAL)
3919 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3921 PL_tokenbuf[0] = '%';
3923 /* Warn about @ where they meant $. */
3924 if (*s == '[' || *s == '{') {
3925 if (ckWARN(WARN_SYNTAX)) {
3926 const char *t = s + 1;
3927 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3929 if (*t == '}' || *t == ']') {
3931 PL_bufptr = skipspace(PL_bufptr);
3932 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3933 "Scalar value %.*s better written as $%.*s",
3934 (int)(t-PL_bufptr), PL_bufptr,
3935 (int)(t-PL_bufptr-1), PL_bufptr+1);
3940 PL_pending_ident = '@';
3943 case '/': /* may be division, defined-or, or pattern */
3944 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3948 case '?': /* may either be conditional or pattern */
3949 if(PL_expect == XOPERATOR) {
3957 /* A // operator. */
3967 /* Disable warning on "study /blah/" */
3968 if (PL_oldoldbufptr == PL_last_uni
3969 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3970 || memNE(PL_last_uni, "study", 5)
3971 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3974 s = scan_pat(s,OP_MATCH);
3975 TERM(sublex_start());
3979 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3980 #ifdef PERL_STRICT_CR
3983 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3985 && (s == PL_linestart || s[-1] == '\n') )
3987 PL_lex_formbrack = 0;
3991 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3997 yylval.ival = OPf_SPECIAL;
4003 if (PL_expect != XOPERATOR)
4008 case '0': case '1': case '2': case '3': case '4':
4009 case '5': case '6': case '7': case '8': case '9':
4010 s = scan_num(s, &yylval);
4011 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4012 if (PL_expect == XOPERATOR)
4017 s = scan_str(s,FALSE,FALSE);
4018 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4019 if (PL_expect == XOPERATOR) {
4020 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4022 deprecate_old(commaless_variable_list);
4023 return REPORT(','); /* grandfather non-comma-format format */
4029 missingterm((char*)0);
4030 yylval.ival = OP_CONST;
4031 TERM(sublex_start());
4034 s = scan_str(s,FALSE,FALSE);
4035 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4036 if (PL_expect == XOPERATOR) {
4037 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4039 deprecate_old(commaless_variable_list);
4040 return REPORT(','); /* grandfather non-comma-format format */
4046 missingterm((char*)0);
4047 yylval.ival = OP_CONST;
4048 /* FIXME. I think that this can be const if char *d is replaced by
4049 more localised variables. */
4050 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4051 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4052 yylval.ival = OP_STRINGIFY;
4056 TERM(sublex_start());
4059 s = scan_str(s,FALSE,FALSE);
4060 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4061 if (PL_expect == XOPERATOR)
4062 no_op("Backticks",s);
4064 missingterm((char*)0);
4065 yylval.ival = OP_BACKTICK;
4067 TERM(sublex_start());
4071 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4072 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4074 if (PL_expect == XOPERATOR)
4075 no_op("Backslash",s);
4079 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4080 char *start = s + 2;
4081 while (isDIGIT(*start) || *start == '_')
4083 if (*start == '.' && isDIGIT(start[1])) {
4084 s = scan_num(s, &yylval);
4087 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4088 else if (!isALPHA(*start) && (PL_expect == XTERM
4089 || PL_expect == XREF || PL_expect == XSTATE
4090 || PL_expect == XTERMORDORDOR)) {
4091 const char c = *start;
4094 gv = gv_fetchpv(s, 0, SVt_PVCV);
4097 s = scan_num(s, &yylval);
4104 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4140 I32 orig_keyword = 0;
4145 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4147 /* Some keywords can be followed by any delimiter, including ':' */
4148 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4149 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4150 (PL_tokenbuf[0] == 'q' &&
4151 strchr("qwxr", PL_tokenbuf[1])))));
4153 /* x::* is just a word, unless x is "CORE" */
4154 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4158 while (d < PL_bufend && isSPACE(*d))
4159 d++; /* no comments skipped here, or s### is misparsed */
4161 /* Is this a label? */
4162 if (!tmp && PL_expect == XSTATE
4163 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4165 yylval.pval = savepv(PL_tokenbuf);
4170 /* Check for keywords */
4171 tmp = keyword(PL_tokenbuf, len);
4173 /* Is this a word before a => operator? */
4174 if (*d == '=' && d[1] == '>') {
4177 = (OP*)newSVOP(OP_CONST, 0,
4178 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4179 yylval.opval->op_private = OPpCONST_BARE;
4183 if (tmp < 0) { /* second-class keyword? */
4184 GV *ogv = NULL; /* override (winner) */
4185 GV *hgv = NULL; /* hidden (loser) */
4186 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4188 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4191 if (GvIMPORTED_CV(gv))
4193 else if (! CvMETHOD(cv))
4197 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4198 (gv = *gvp) != (GV*)&PL_sv_undef &&
4199 GvCVu(gv) && GvIMPORTED_CV(gv))
4206 tmp = 0; /* overridden by import or by GLOBAL */
4209 && -tmp==KEY_lock /* XXX generalizable kludge */
4211 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
4213 tmp = 0; /* any sub overrides "weak" keyword */
4215 else { /* no override */
4217 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4218 Perl_warner(aTHX_ packWARN(WARN_MISC),
4219 "dump() better written as CORE::dump()");
4223 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4224 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4225 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4226 "Ambiguous call resolved as CORE::%s(), %s",
4227 GvENAME(hgv), "qualify as such or use &");
4234 default: /* not a keyword */
4235 /* Trade off - by using this evil construction we can pull the
4236 variable gv into the block labelled keylookup. If not, then
4237 we have to give it function scope so that the goto from the
4238 earlier ':' case doesn't bypass the initialisation. */
4240 just_a_word_zero_gv:
4248 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4251 /* Get the rest if it looks like a package qualifier */
4253 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4255 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4258 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4259 *s == '\'' ? "'" : "::");
4264 if (PL_expect == XOPERATOR) {