3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 static const char ident_too_long[] = "Identifier too long";
29 static const char commaless_variable_list[] = "comma-less variable list";
31 static void restore_rsfp(pTHX_ void *f);
32 #ifndef PERL_NO_UTF16_FILTER
33 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
39 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
41 # define CURMAD(slot,sv)
42 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
45 #define XFAKEBRACK 128
48 #ifdef USE_UTF8_SCRIPTS
49 # define UTF (!IN_BYTES)
51 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
54 /* In variables named $^X, these are the legal values for X.
55 * 1999-02-27 mjd-perl-patch@plover.com */
56 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58 /* On MacOS, respect nonbreaking spaces */
59 #ifdef MACOS_TRADITIONAL
60 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
65 /* LEX_* are values for PL_lex_state, the state of the lexer.
66 * They are arranged oddly so that the guard on the switch statement
67 * can get by with a single comparison (if the compiler is smart enough).
70 /* #define LEX_NOTPARSING 11 is done in perl.h. */
72 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
73 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
74 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
75 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
76 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
78 /* at end of code, eg "$x" followed by: */
79 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
80 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
83 string or after \E, $foo, etc */
84 #define LEX_INTERPCONST 2 /* NOT USED */
85 #define LEX_FORMLINE 1 /* expecting a format line */
86 #define LEX_KNOWNEXT 0 /* next token known; just return it */
90 static const char* const lex_state_names[] = {
109 #include "keywords.h"
111 /* CLINE is a macro that ensures PL_copline has a sane value */
116 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
119 # define SKIPSPACE0(s) skipspace0(s)
120 # define SKIPSPACE1(s) skipspace1(s)
121 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
122 # define PEEKSPACE(s) skipspace2(s,0)
124 # define SKIPSPACE0(s) skipspace(s)
125 # define SKIPSPACE1(s) skipspace(s)
126 # define SKIPSPACE2(s,tsv) skipspace(s)
127 # define PEEKSPACE(s) skipspace(s)
131 * Convenience functions to return different tokens and prime the
132 * lexer for the next token. They all take an argument.
134 * TOKEN : generic token (used for '(', DOLSHARP, etc)
135 * OPERATOR : generic operator
136 * AOPERATOR : assignment operator
137 * PREBLOCK : beginning the block after an if, while, foreach, ...
138 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
139 * PREREF : *EXPR where EXPR is not a simple identifier
140 * TERM : expression term
141 * LOOPX : loop exiting command (goto, last, dump, etc)
142 * FTST : file test operator
143 * FUN0 : zero-argument function
144 * FUN1 : not used, except for not, which isn't a UNIOP
145 * BOop : bitwise or or xor
147 * SHop : shift operator
148 * PWop : power operator
149 * PMop : pattern-matching operator
150 * Aop : addition-level operator
151 * Mop : multiplication-level operator
152 * Eop : equality-testing operator
153 * Rop : relational operator <= != gt
155 * Also see LOP and lop() below.
158 #ifdef DEBUGGING /* Serve -DT. */
159 # define REPORT(retval) tokereport((I32)retval)
161 # define REPORT(retval) (retval)
164 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
165 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
166 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
167 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
168 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
169 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
170 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
171 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
172 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
173 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
174 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
175 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
176 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
177 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
178 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
179 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
180 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
181 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
182 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
183 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
185 /* This bit of chicanery makes a unary function followed by
186 * a parenthesis into a function with one argument, highest precedence.
187 * The UNIDOR macro is for unary functions that can be followed by the //
188 * operator (such as C<shift // 0>).
190 #define UNI2(f,x) { \
194 PL_last_uni = PL_oldbufptr; \
195 PL_last_lop_op = f; \
197 return REPORT( (int)FUNC1 ); \
199 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 #define UNI(f) UNI2(f,XTERM)
202 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
204 #define UNIBRACK(f) { \
207 PL_last_uni = PL_oldbufptr; \
209 return REPORT( (int)FUNC1 ); \
211 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
214 /* grandfather return to old style */
215 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
219 /* how to interpret the yylval associated with the token */
223 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
229 static struct debug_tokens {
231 enum token_type type;
233 } const debug_tokens[] =
235 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
236 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
237 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
238 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
239 { ARROW, TOKENTYPE_NONE, "ARROW" },
240 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
241 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
242 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
243 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
244 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
245 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
246 { DO, TOKENTYPE_NONE, "DO" },
247 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
248 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
249 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
250 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
251 { ELSE, TOKENTYPE_NONE, "ELSE" },
252 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
253 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
254 { FOR, TOKENTYPE_IVAL, "FOR" },
255 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
256 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
257 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
258 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
259 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
260 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
261 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
262 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
263 { IF, TOKENTYPE_IVAL, "IF" },
264 { LABEL, TOKENTYPE_PVAL, "LABEL" },
265 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
266 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
267 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
268 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
269 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
270 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
271 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
272 { MY, TOKENTYPE_IVAL, "MY" },
273 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
274 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
275 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
276 { OROP, TOKENTYPE_IVAL, "OROP" },
277 { OROR, TOKENTYPE_NONE, "OROR" },
278 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
279 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
280 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
281 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
282 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
283 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
284 { PREINC, TOKENTYPE_NONE, "PREINC" },
285 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
286 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
287 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
288 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
289 { SUB, TOKENTYPE_NONE, "SUB" },
290 { THING, TOKENTYPE_OPVAL, "THING" },
291 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
292 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
293 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
294 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
295 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
296 { USE, TOKENTYPE_IVAL, "USE" },
297 { WHEN, TOKENTYPE_IVAL, "WHEN" },
298 { WHILE, TOKENTYPE_IVAL, "WHILE" },
299 { WORD, TOKENTYPE_OPVAL, "WORD" },
300 { 0, TOKENTYPE_NONE, NULL }
303 /* dump the returned token in rv, plus any optional arg in yylval */
306 S_tokereport(pTHX_ I32 rv)
310 const char *name = NULL;
311 enum token_type type = TOKENTYPE_NONE;
312 const struct debug_tokens *p;
313 SV* const report = newSVpvs("<== ");
315 for (p = debug_tokens; p->token; p++) {
316 if (p->token == (int)rv) {
323 Perl_sv_catpv(aTHX_ report, name);
324 else if ((char)rv > ' ' && (char)rv < '~')
325 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 sv_catpvs(report, "EOF");
329 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
332 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
335 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
337 case TOKENTYPE_OPNUM:
338 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
339 PL_op_name[yylval.ival]);
342 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 case TOKENTYPE_OPVAL:
346 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
347 PL_op_name[yylval.opval->op_type]);
348 if (yylval.opval->op_type == OP_CONST) {
349 Perl_sv_catpvf(aTHX_ report, " %s",
350 SvPEEK(cSVOPx_sv(yylval.opval)));
355 sv_catpvs(report, "(opval=null)");
358 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
364 /* print the buffer with suitable escapes */
367 S_printbuf(pTHX_ const char* fmt, const char* s)
369 SV* const tmp = newSVpvs("");
370 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
379 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
380 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
384 S_ao(pTHX_ int toketype)
387 if (*PL_bufptr == '=') {
389 if (toketype == ANDAND)
390 yylval.ival = OP_ANDASSIGN;
391 else if (toketype == OROR)
392 yylval.ival = OP_ORASSIGN;
393 else if (toketype == DORDOR)
394 yylval.ival = OP_DORASSIGN;
402 * When Perl expects an operator and finds something else, no_op
403 * prints the warning. It always prints "<something> found where
404 * operator expected. It prints "Missing semicolon on previous line?"
405 * if the surprise occurs at the start of the line. "do you need to
406 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
407 * where the compiler doesn't know if foo is a method call or a function.
408 * It prints "Missing operator before end of line" if there's nothing
409 * after the missing operator, or "... before <...>" if there is something
410 * after the missing operator.
414 S_no_op(pTHX_ const char *what, char *s)
417 char * const oldbp = PL_bufptr;
418 const bool is_first = (PL_oldbufptr == PL_linestart);
424 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
425 if (ckWARN_d(WARN_SYNTAX)) {
427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
428 "\t(Missing semicolon on previous line?)\n");
429 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
431 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 if (t < PL_bufptr && isSPACE(*t))
434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
435 "\t(Do you need to predeclare %.*s?)\n",
436 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
441 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
449 * Complain about missing quote/regexp/heredoc terminator.
450 * If it's called with NULL then it cauterizes the line buffer.
451 * If we're in a delimited string and the delimiter is a control
452 * character, it's reformatted into a two-char sequence like ^C.
457 S_missingterm(pTHX_ char *s)
463 char * const nl = strrchr(s,'\n');
469 iscntrl(PL_multi_close)
471 PL_multi_close < 32 || PL_multi_close == 127
475 tmpbuf[1] = (char)toCTRL(PL_multi_close);
480 *tmpbuf = (char)PL_multi_close;
484 q = strchr(s,'"') ? '\'' : '"';
485 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
488 #define FEATURE_IS_ENABLED(name) \
489 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
490 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
492 * S_feature_is_enabled
493 * Check whether the named feature is enabled.
496 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
499 HV * const hinthv = GvHV(PL_hintgv);
500 char he_name[32] = "feature_";
501 (void) my_strlcpy(&he_name[8], name, 24);
503 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
511 Perl_deprecate(pTHX_ const char *s)
513 if (ckWARN(WARN_DEPRECATED))
514 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
518 Perl_deprecate_old(pTHX_ const char *s)
520 /* This function should NOT be called for any new deprecated warnings */
521 /* Use Perl_deprecate instead */
523 /* It is here to maintain backward compatibility with the pre-5.8 */
524 /* warnings category hierarchy. The "deprecated" category used to */
525 /* live under the "syntax" category. It is now a top-level category */
526 /* in its own right. */
528 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
529 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
530 "Use of %s is deprecated", s);
534 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
535 * utf16-to-utf8-reversed.
538 #ifdef PERL_CR_FILTER
542 register const char *s = SvPVX_const(sv);
543 register const char * const e = s + SvCUR(sv);
544 /* outer loop optimized to do nothing if there are no CR-LFs */
546 if (*s++ == '\r' && *s == '\n') {
547 /* hit a CR-LF, need to copy the rest */
548 register char *d = s - 1;
551 if (*s == '\r' && s[1] == '\n')
562 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
564 const I32 count = FILTER_READ(idx+1, sv, maxlen);
565 if (count > 0 && !maxlen)
573 * Initialize variables. Uses the Perl save_stack to save its state (for
574 * recursive calls to the parser).
578 Perl_lex_start(pTHX_ SV *line)
584 SAVEI32(PL_lex_dojoin);
585 SAVEI32(PL_lex_brackets);
586 SAVEI32(PL_lex_casemods);
587 SAVEI32(PL_lex_starts);
588 SAVEI32(PL_lex_state);
589 SAVEVPTR(PL_lex_inpat);
590 SAVEI32(PL_lex_inwhat);
592 if (PL_lex_state == LEX_KNOWNEXT) {
593 I32 toke = PL_lasttoke;
594 while (--toke >= 0) {
595 SAVEI32(PL_nexttoke[toke].next_type);
596 SAVEVPTR(PL_nexttoke[toke].next_val);
598 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 SAVEI32(PL_lasttoke);
602 SAVESPTR(PL_endwhite);
603 SAVESPTR(PL_thistoken);
604 SAVESPTR(PL_thiswhite);
605 SAVESPTR(PL_nextwhite);
606 SAVESPTR(PL_thisopen);
607 SAVESPTR(PL_thisclose);
608 SAVESPTR(PL_thisstuff);
609 SAVEVPTR(PL_thismad);
610 SAVEI32(PL_realtokenstart);
611 SAVEI32(PL_faketokens);
612 SAVESPTR(PL_skipwhite);
613 SAVEI32(PL_curforce);
615 if (PL_lex_state == LEX_KNOWNEXT) {
616 I32 toke = PL_nexttoke;
617 while (--toke >= 0) {
618 SAVEI32(PL_nexttype[toke]);
619 SAVEVPTR(PL_nextval[toke]);
621 SAVEI32(PL_nexttoke);
624 SAVECOPLINE(PL_curcop);
627 SAVEPPTR(PL_oldbufptr);
628 SAVEPPTR(PL_oldoldbufptr);
629 SAVEPPTR(PL_last_lop);
630 SAVEPPTR(PL_last_uni);
631 SAVEPPTR(PL_linestart);
632 SAVESPTR(PL_linestr);
633 SAVEGENERICPV(PL_lex_brackstack);
634 SAVEGENERICPV(PL_lex_casestack);
635 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
636 SAVESPTR(PL_lex_stuff);
637 SAVEI32(PL_lex_defer);
638 SAVEI32(PL_sublex_info.sub_inwhat);
639 SAVEI32(PL_sublex_info.super_state);
640 SAVEVPTR(PL_sublex_info.sub_op);
641 SAVEPPTR(PL_sublex_info.super_bufptr);
642 SAVEPPTR(PL_sublex_info.super_bufend);
643 SAVESPTR(PL_lex_repl);
645 SAVEINT(PL_lex_expect);
646 SAVEI32(PL_lex_formbrack);
648 SAVEI32(PL_multi_close);
649 SAVEI32(PL_multi_open);
650 SAVEI32(PL_multi_start);
651 SAVEI8(PL_pending_ident);
652 SAVEBOOL(PL_preambled);
654 PL_lex_state = LEX_NORMAL;
658 Newx(PL_lex_brackstack, 120, char);
659 Newx(PL_lex_casestack, 12, char);
661 *PL_lex_casestack = '\0';
672 PL_realtokenstart = 0;
684 PL_sublex_info.sub_inwhat = 0;
685 PL_sublex_info.super_state = 0;
686 PL_sublex_info.sub_op = NULL;
687 PL_sublex_info.super_bufptr = NULL;
688 PL_sublex_info.super_bufend = NULL;
690 PL_lex_formbrack = 0;
695 PL_pending_ident = '\0';
696 PL_preambled = FALSE;
699 s = SvPV_const(line, len);
704 PL_linestr = newSVpvs("\n;");
705 } else if (SvREADONLY(line) || s[len-1] != ';') {
706 PL_linestr = newSVsv(line);
708 sv_catpvs(PL_linestr, "\n;");
711 SvREFCNT_inc_simple_void_NN(line);
714 /* PL_linestr needs to survive until end of scope, not just the next
715 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
716 SAVEFREESV(PL_linestr);
717 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
718 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
719 PL_last_lop = PL_last_uni = NULL;
725 * Finalizer for lexing operations. Must be called when the parser is
726 * done with the lexer.
733 PL_doextract = FALSE;
738 * This subroutine has nothing to do with tilting, whether at windmills
739 * or pinball tables. Its name is short for "increment line". It
740 * increments the current line number in CopLINE(PL_curcop) and checks
741 * to see whether the line starts with a comment of the form
742 * # line 500 "foo.pm"
743 * If so, it sets the current line number and file to the values in the comment.
747 S_incline(pTHX_ char *s)
755 CopLINE_inc(PL_curcop);
758 while (SPACE_OR_TAB(*s))
760 if (strnEQ(s, "line", 4))
764 if (SPACE_OR_TAB(*s))
768 while (SPACE_OR_TAB(*s))
776 while (SPACE_OR_TAB(*s))
778 if (*s == '"' && (t = strchr(s+1, '"'))) {
788 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
790 if (*e != '\n' && *e != '\0')
791 return; /* false alarm */
797 const char * const cf = CopFILE(PL_curcop);
798 STRLEN tmplen = cf ? strlen(cf) : 0;
799 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
800 /* must copy *{"::_<(eval N)[oldfilename:L]"}
801 * to *{"::_<newfilename"} */
802 char smallbuf[256], smallbuf2[256];
803 char *tmpbuf, *tmpbuf2;
805 STRLEN tmplen2 = strlen(s);
806 if (tmplen + 3 < sizeof smallbuf)
809 Newx(tmpbuf, tmplen + 3, char);
810 if (tmplen2 + 3 < sizeof smallbuf2)
813 Newx(tmpbuf2, tmplen2 + 3, char);
814 tmpbuf[0] = tmpbuf2[0] = '_';
815 tmpbuf[1] = tmpbuf2[1] = '<';
816 memcpy(tmpbuf + 2, cf, ++tmplen);
817 memcpy(tmpbuf2 + 2, s, ++tmplen2);
819 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
821 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
823 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
824 /* adjust ${"::_<newfilename"} to store the new file name */
825 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
826 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
827 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
830 if (tmpbuf != smallbuf) Safefree(tmpbuf);
831 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
834 CopFILE_free(PL_curcop);
835 CopFILE_set(PL_curcop, s);
838 CopLINE_set(PL_curcop, atoi(n)-1);
842 /* skip space before PL_thistoken */
845 S_skipspace0(pTHX_ register char *s)
852 PL_thiswhite = newSVpvs("");
853 sv_catsv(PL_thiswhite, PL_skipwhite);
854 sv_free(PL_skipwhite);
857 PL_realtokenstart = s - SvPVX(PL_linestr);
861 /* skip space after PL_thistoken */
864 S_skipspace1(pTHX_ register char *s)
866 const char *start = s;
867 I32 startoff = start - SvPVX(PL_linestr);
872 start = SvPVX(PL_linestr) + startoff;
873 if (!PL_thistoken && PL_realtokenstart >= 0) {
874 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
875 PL_thistoken = newSVpvn(tstart, start - tstart);
877 PL_realtokenstart = -1;
880 PL_nextwhite = newSVpvs("");
881 sv_catsv(PL_nextwhite, PL_skipwhite);
882 sv_free(PL_skipwhite);
889 S_skipspace2(pTHX_ register char *s, SV **svp)
892 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
893 const I32 startoff = s - SvPVX(PL_linestr);
896 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
897 if (!PL_madskills || !svp)
899 start = SvPVX(PL_linestr) + startoff;
900 if (!PL_thistoken && PL_realtokenstart >= 0) {
901 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
902 PL_thistoken = newSVpvn(tstart, start - tstart);
903 PL_realtokenstart = -1;
908 sv_setsv(*svp, PL_skipwhite);
909 sv_free(PL_skipwhite);
918 S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
920 AV *av = CopFILEAVx(PL_curcop);
922 SV * const sv = newSV(0);
923 sv_upgrade(sv, SVt_PVMG);
924 sv_setpvn(sv, buf, len);
927 av_store(av, (I32)CopLINE(PL_curcop), sv);
932 S_update_debugger_info_sv(pTHX_ SV *orig_sv)
934 AV *av = CopFILEAVx(PL_curcop);
936 SV * const sv = newSV(0);
937 sv_upgrade(sv, SVt_PVMG);
938 sv_setsv(sv, orig_sv);
941 av_store(av, (I32)CopLINE(PL_curcop), sv);
947 * Called to gobble the appropriate amount and type of whitespace.
948 * Skips comments as well.
952 S_skipspace(pTHX_ register char *s)
957 int startoff = s - SvPVX(PL_linestr);
960 sv_free(PL_skipwhite);
965 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
966 while (s < PL_bufend && SPACE_OR_TAB(*s))
976 SSize_t oldprevlen, oldoldprevlen;
977 SSize_t oldloplen = 0, oldunilen = 0;
978 while (s < PL_bufend && isSPACE(*s)) {
979 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
984 if (s < PL_bufend && *s == '#') {
985 while (s < PL_bufend && *s != '\n')
989 if (PL_in_eval && !PL_rsfp) {
996 /* only continue to recharge the buffer if we're at the end
997 * of the buffer, we're not reading from a source filter, and
998 * we're in normal lexing mode
1000 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1001 PL_lex_state == LEX_FORMLINE)
1008 /* try to recharge the buffer */
1010 curoff = s - SvPVX(PL_linestr);
1013 if ((s = filter_gets(PL_linestr, PL_rsfp,
1014 (prevlen = SvCUR(PL_linestr)))) == NULL)
1017 if (PL_madskills && curoff != startoff) {
1019 PL_skipwhite = newSVpvs("");
1020 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1024 /* mustn't throw out old stuff yet if madpropping */
1025 SvCUR(PL_linestr) = curoff;
1026 s = SvPVX(PL_linestr) + curoff;
1028 if (curoff && s[-1] == '\n')
1032 /* end of file. Add on the -p or -n magic */
1033 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1036 sv_catpv(PL_linestr,
1037 ";}continue{print or die qq(-p destination: $!\\n);}");
1039 sv_setpv(PL_linestr,
1040 ";}continue{print or die qq(-p destination: $!\\n);}");
1042 PL_minus_n = PL_minus_p = 0;
1044 else if (PL_minus_n) {
1046 sv_catpvn(PL_linestr, ";}", 2);
1048 sv_setpvn(PL_linestr, ";}", 2);
1054 sv_catpvn(PL_linestr,";", 1);
1056 sv_setpvn(PL_linestr,";", 1);
1059 /* reset variables for next time we lex */
1060 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1066 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1067 PL_last_lop = PL_last_uni = NULL;
1069 /* Close the filehandle. Could be from -P preprocessor,
1070 * STDIN, or a regular file. If we were reading code from
1071 * STDIN (because the commandline held no -e or filename)
1072 * then we don't close it, we reset it so the code can
1073 * read from STDIN too.
1076 if (PL_preprocess && !PL_in_eval)
1077 (void)PerlProc_pclose(PL_rsfp);
1078 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1079 PerlIO_clearerr(PL_rsfp);
1081 (void)PerlIO_close(PL_rsfp);
1086 /* not at end of file, so we only read another line */
1087 /* make corresponding updates to old pointers, for yyerror() */
1088 oldprevlen = PL_oldbufptr - PL_bufend;
1089 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1091 oldunilen = PL_last_uni - PL_bufend;
1093 oldloplen = PL_last_lop - PL_bufend;
1094 PL_linestart = PL_bufptr = s + prevlen;
1095 PL_bufend = s + SvCUR(PL_linestr);
1097 PL_oldbufptr = s + oldprevlen;
1098 PL_oldoldbufptr = s + oldoldprevlen;
1100 PL_last_uni = s + oldunilen;
1102 PL_last_lop = s + oldloplen;
1105 /* debugger active and we're not compiling the debugger code,
1106 * so store the line into the debugger's array of lines
1108 if (PERLDB_LINE && PL_curstash != PL_debstash)
1109 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
1116 PL_skipwhite = newSVpvs("");
1117 curoff = s - SvPVX(PL_linestr);
1118 if (curoff - startoff)
1119 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1128 * Check the unary operators to ensure there's no ambiguity in how they're
1129 * used. An ambiguous piece of code would be:
1131 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1132 * the +5 is its argument.
1142 if (PL_oldoldbufptr != PL_last_uni)
1144 while (isSPACE(*PL_last_uni))
1147 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1149 if ((t = strchr(s, '(')) && t < PL_bufptr)
1152 if (ckWARN_d(WARN_AMBIGUOUS)){
1153 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1154 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1155 (int)(s - PL_last_uni), PL_last_uni);
1160 * LOP : macro to build a list operator. Its behaviour has been replaced
1161 * with a subroutine, S_lop() for which LOP is just another name.
1164 #define LOP(f,x) return lop(f,x,s)
1168 * Build a list operator (or something that might be one). The rules:
1169 * - if we have a next token, then it's a list operator [why?]
1170 * - if the next thing is an opening paren, then it's a function
1171 * - else it's a list operator
1175 S_lop(pTHX_ I32 f, int x, char *s)
1182 PL_last_lop = PL_oldbufptr;
1183 PL_last_lop_op = (OPCODE)f;
1186 return REPORT(LSTOP);
1189 return REPORT(LSTOP);
1192 return REPORT(FUNC);
1195 return REPORT(FUNC);
1197 return REPORT(LSTOP);
1203 * Sets up for an eventual force_next(). start_force(0) basically does
1204 * an unshift, while start_force(-1) does a push. yylex removes items
1209 S_start_force(pTHX_ int where)
1213 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1214 where = PL_lasttoke;
1215 assert(PL_curforce < 0 || PL_curforce == where);
1216 if (PL_curforce != where) {
1217 for (i = PL_lasttoke; i > where; --i) {
1218 PL_nexttoke[i] = PL_nexttoke[i-1];
1222 if (PL_curforce < 0) /* in case of duplicate start_force() */
1223 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1224 PL_curforce = where;
1227 curmad('^', newSVpvs(""));
1228 CURMAD('_', PL_nextwhite);
1233 S_curmad(pTHX_ char slot, SV *sv)
1239 if (PL_curforce < 0)
1240 where = &PL_thismad;
1242 where = &PL_nexttoke[PL_curforce].next_mad;
1245 sv_setpvn(sv, "", 0);
1248 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1250 else if (PL_encoding) {
1251 sv_recode_to_utf8(sv, PL_encoding);
1256 /* keep a slot open for the head of the list? */
1257 if (slot != '_' && *where && (*where)->mad_key == '^') {
1258 (*where)->mad_key = slot;
1259 sv_free((*where)->mad_val);
1260 (*where)->mad_val = (void*)sv;
1263 addmad(newMADsv(slot, sv), where, 0);
1266 # define start_force(where) NOOP
1267 # define curmad(slot, sv) NOOP
1272 * When the lexer realizes it knows the next token (for instance,
1273 * it is reordering tokens for the parser) then it can call S_force_next
1274 * to know what token to return the next time the lexer is called. Caller
1275 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1276 * and possibly PL_expect to ensure the lexer handles the token correctly.
1280 S_force_next(pTHX_ I32 type)
1284 if (PL_curforce < 0)
1285 start_force(PL_lasttoke);
1286 PL_nexttoke[PL_curforce].next_type = type;
1287 if (PL_lex_state != LEX_KNOWNEXT)
1288 PL_lex_defer = PL_lex_state;
1289 PL_lex_state = LEX_KNOWNEXT;
1290 PL_lex_expect = PL_expect;
1293 PL_nexttype[PL_nexttoke] = type;
1295 if (PL_lex_state != LEX_KNOWNEXT) {
1296 PL_lex_defer = PL_lex_state;
1297 PL_lex_expect = PL_expect;
1298 PL_lex_state = LEX_KNOWNEXT;
1304 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1307 SV * const sv = newSVpvn(start,len);
1308 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1315 * When the lexer knows the next thing is a word (for instance, it has
1316 * just seen -> and it knows that the next char is a word char, then
1317 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1321 * char *start : buffer position (must be within PL_linestr)
1322 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1323 * int check_keyword : if true, Perl checks to make sure the word isn't
1324 * a keyword (do this if the word is a label, e.g. goto FOO)
1325 * int allow_pack : if true, : characters will also be allowed (require,
1326 * use, etc. do this)
1327 * int allow_initial_tick : used by the "sub" lexer only.
1331 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1337 start = SKIPSPACE1(start);
1339 if (isIDFIRST_lazy_if(s,UTF) ||
1340 (allow_pack && *s == ':') ||
1341 (allow_initial_tick && *s == '\'') )
1343 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1344 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1346 start_force(PL_curforce);
1348 curmad('X', newSVpvn(start,s-start));
1349 if (token == METHOD) {
1354 PL_expect = XOPERATOR;
1357 NEXTVAL_NEXTTOKE.opval
1358 = (OP*)newSVOP(OP_CONST,0,
1359 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1360 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1368 * Called when the lexer wants $foo *foo &foo etc, but the program
1369 * text only contains the "foo" portion. The first argument is a pointer
1370 * to the "foo", and the second argument is the type symbol to prefix.
1371 * Forces the next token to be a "WORD".
1372 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1376 S_force_ident(pTHX_ register const char *s, int kind)
1380 const STRLEN len = strlen(s);
1381 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1382 start_force(PL_curforce);
1383 NEXTVAL_NEXTTOKE.opval = o;
1386 o->op_private = OPpCONST_ENTERED;
1387 /* XXX see note in pp_entereval() for why we forgo typo
1388 warnings if the symbol must be introduced in an eval.
1390 gv_fetchpvn_flags(s, len,
1391 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1393 kind == '$' ? SVt_PV :
1394 kind == '@' ? SVt_PVAV :
1395 kind == '%' ? SVt_PVHV :
1403 Perl_str_to_version(pTHX_ SV *sv)
1408 const char *start = SvPV_const(sv,len);
1409 const char * const end = start + len;
1410 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1411 while (start < end) {
1415 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1420 retval += ((NV)n)/nshift;
1429 * Forces the next token to be a version number.
1430 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1431 * and if "guessing" is TRUE, then no new token is created (and the caller
1432 * must use an alternative parsing method).
1436 S_force_version(pTHX_ char *s, int guessing)
1442 I32 startoff = s - SvPVX(PL_linestr);
1451 while (isDIGIT(*d) || *d == '_' || *d == '.')
1455 start_force(PL_curforce);
1456 curmad('X', newSVpvn(s,d-s));
1459 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1461 s = scan_num(s, &yylval);
1462 version = yylval.opval;
1463 ver = cSVOPx(version)->op_sv;
1464 if (SvPOK(ver) && !SvNIOK(ver)) {
1465 SvUPGRADE(ver, SVt_PVNV);
1466 SvNV_set(ver, str_to_version(ver));
1467 SvNOK_on(ver); /* hint that it is a version */
1470 else if (guessing) {
1473 sv_free(PL_nextwhite); /* let next token collect whitespace */
1475 s = SvPVX(PL_linestr) + startoff;
1483 if (PL_madskills && !version) {
1484 sv_free(PL_nextwhite); /* let next token collect whitespace */
1486 s = SvPVX(PL_linestr) + startoff;
1489 /* NOTE: The parser sees the package name and the VERSION swapped */
1490 start_force(PL_curforce);
1491 NEXTVAL_NEXTTOKE.opval = version;
1499 * Tokenize a quoted string passed in as an SV. It finds the next
1500 * chunk, up to end of string or a backslash. It may make a new
1501 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1506 S_tokeq(pTHX_ SV *sv)
1510 register char *send;
1518 s = SvPV_force(sv, len);
1519 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1522 while (s < send && *s != '\\')
1527 if ( PL_hints & HINT_NEW_STRING ) {
1528 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1534 if (s + 1 < send && (s[1] == '\\'))
1535 s++; /* all that, just for this */
1540 SvCUR_set(sv, d - SvPVX_const(sv));
1542 if ( PL_hints & HINT_NEW_STRING )
1543 return new_constant(NULL, 0, "q", sv, pv, "q");
1548 * Now come three functions related to double-quote context,
1549 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1550 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1551 * interact with PL_lex_state, and create fake ( ... ) argument lists
1552 * to handle functions and concatenation.
1553 * They assume that whoever calls them will be setting up a fake
1554 * join call, because each subthing puts a ',' after it. This lets
1557 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1559 * (I'm not sure whether the spurious commas at the end of lcfirst's
1560 * arguments and join's arguments are created or not).
1565 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1567 * Pattern matching will set PL_lex_op to the pattern-matching op to
1568 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1570 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1572 * Everything else becomes a FUNC.
1574 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1575 * had an OP_CONST or OP_READLINE). This just sets us up for a
1576 * call to S_sublex_push().
1580 S_sublex_start(pTHX)
1583 register const I32 op_type = yylval.ival;
1585 if (op_type == OP_NULL) {
1586 yylval.opval = PL_lex_op;
1590 if (op_type == OP_CONST || op_type == OP_READLINE) {
1591 SV *sv = tokeq(PL_lex_stuff);
1593 if (SvTYPE(sv) == SVt_PVIV) {
1594 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1596 const char * const p = SvPV_const(sv, len);
1597 SV * const nsv = newSVpvn(p, len);
1603 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1604 PL_lex_stuff = NULL;
1605 /* Allow <FH> // "foo" */
1606 if (op_type == OP_READLINE)
1607 PL_expect = XTERMORDORDOR;
1610 else if (op_type == OP_BACKTICK && PL_lex_op) {
1611 /* readpipe() vas overriden */
1612 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1613 yylval.opval = PL_lex_op;
1615 PL_lex_stuff = NULL;
1619 PL_sublex_info.super_state = PL_lex_state;
1620 PL_sublex_info.sub_inwhat = op_type;
1621 PL_sublex_info.sub_op = PL_lex_op;
1622 PL_lex_state = LEX_INTERPPUSH;
1626 yylval.opval = PL_lex_op;
1636 * Create a new scope to save the lexing state. The scope will be
1637 * ended in S_sublex_done. Returns a '(', starting the function arguments
1638 * to the uc, lc, etc. found before.
1639 * Sets PL_lex_state to LEX_INTERPCONCAT.
1648 PL_lex_state = PL_sublex_info.super_state;
1649 SAVEI32(PL_lex_dojoin);
1650 SAVEI32(PL_lex_brackets);
1651 SAVEI32(PL_lex_casemods);
1652 SAVEI32(PL_lex_starts);
1653 SAVEI32(PL_lex_state);
1654 SAVEVPTR(PL_lex_inpat);
1655 SAVEI32(PL_lex_inwhat);
1656 SAVECOPLINE(PL_curcop);
1657 SAVEPPTR(PL_bufptr);
1658 SAVEPPTR(PL_bufend);
1659 SAVEPPTR(PL_oldbufptr);
1660 SAVEPPTR(PL_oldoldbufptr);
1661 SAVEPPTR(PL_last_lop);
1662 SAVEPPTR(PL_last_uni);
1663 SAVEPPTR(PL_linestart);
1664 SAVESPTR(PL_linestr);
1665 SAVEGENERICPV(PL_lex_brackstack);
1666 SAVEGENERICPV(PL_lex_casestack);
1668 PL_linestr = PL_lex_stuff;
1669 PL_lex_stuff = NULL;
1671 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1672 = SvPVX(PL_linestr);
1673 PL_bufend += SvCUR(PL_linestr);
1674 PL_last_lop = PL_last_uni = NULL;
1675 SAVEFREESV(PL_linestr);
1677 PL_lex_dojoin = FALSE;
1678 PL_lex_brackets = 0;
1679 Newx(PL_lex_brackstack, 120, char);
1680 Newx(PL_lex_casestack, 12, char);
1681 PL_lex_casemods = 0;
1682 *PL_lex_casestack = '\0';
1684 PL_lex_state = LEX_INTERPCONCAT;
1685 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1687 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1688 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1689 PL_lex_inpat = PL_sublex_info.sub_op;
1691 PL_lex_inpat = NULL;
1698 * Restores lexer state after a S_sublex_push.
1705 if (!PL_lex_starts++) {
1706 SV * const sv = newSVpvs("");
1707 if (SvUTF8(PL_linestr))
1709 PL_expect = XOPERATOR;
1710 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1714 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1715 PL_lex_state = LEX_INTERPCASEMOD;
1719 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1720 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1721 PL_linestr = PL_lex_repl;
1723 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1724 PL_bufend += SvCUR(PL_linestr);
1725 PL_last_lop = PL_last_uni = NULL;
1726 SAVEFREESV(PL_linestr);
1727 PL_lex_dojoin = FALSE;
1728 PL_lex_brackets = 0;
1729 PL_lex_casemods = 0;
1730 *PL_lex_casestack = '\0';
1732 if (SvEVALED(PL_lex_repl)) {
1733 PL_lex_state = LEX_INTERPNORMAL;
1735 /* we don't clear PL_lex_repl here, so that we can check later
1736 whether this is an evalled subst; that means we rely on the
1737 logic to ensure sublex_done() is called again only via the
1738 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1741 PL_lex_state = LEX_INTERPCONCAT;
1751 PL_endwhite = newSVpvs("");
1752 sv_catsv(PL_endwhite, PL_thiswhite);
1756 sv_setpvn(PL_thistoken,"",0);
1758 PL_realtokenstart = -1;
1762 PL_bufend = SvPVX(PL_linestr);
1763 PL_bufend += SvCUR(PL_linestr);
1764 PL_expect = XOPERATOR;
1765 PL_sublex_info.sub_inwhat = 0;
1773 Extracts a pattern, double-quoted string, or transliteration. This
1776 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1777 processing a pattern (PL_lex_inpat is true), a transliteration
1778 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1780 Returns a pointer to the character scanned up to. If this is
1781 advanced from the start pointer supplied (i.e. if anything was
1782 successfully parsed), will leave an OP for the substring scanned
1783 in yylval. Caller must intuit reason for not parsing further
1784 by looking at the next characters herself.
1788 double-quoted style: \r and \n
1789 regexp special ones: \D \s
1792 case and quoting: \U \Q \E
1793 stops on @ and $, but not for $ as tail anchor
1795 In transliterations:
1796 characters are VERY literal, except for - not at the start or end
1797 of the string, which indicates a range. If the range is in bytes,
1798 scan_const expands the range to the full set of intermediate
1799 characters. If the range is in utf8, the hyphen is replaced with
1800 a certain range mark which will be handled by pmtrans() in op.c.
1802 In double-quoted strings:
1804 double-quoted style: \r and \n
1806 deprecated backrefs: \1 (in substitution replacements)
1807 case and quoting: \U \Q \E
1810 scan_const does *not* construct ops to handle interpolated strings.
1811 It stops processing as soon as it finds an embedded $ or @ variable
1812 and leaves it to the caller to work out what's going on.
1814 embedded arrays (whether in pattern or not) could be:
1815 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1817 $ in double-quoted strings must be the symbol of an embedded scalar.
1819 $ in pattern could be $foo or could be tail anchor. Assumption:
1820 it's a tail anchor if $ is the last thing in the string, or if it's
1821 followed by one of "()| \r\n\t"
1823 \1 (backreferences) are turned into $1
1825 The structure of the code is
1826 while (there's a character to process) {
1827 handle transliteration ranges
1828 skip regexp comments /(?#comment)/ and codes /(?{code})/
1829 skip #-initiated comments in //x patterns
1830 check for embedded arrays
1831 check for embedded scalars
1833 leave intact backslashes from leaveit (below)
1834 deprecate \1 in substitution replacements
1835 handle string-changing backslashes \l \U \Q \E, etc.
1836 switch (what was escaped) {
1837 handle \- in a transliteration (becomes a literal -)
1838 handle \132 (octal characters)
1839 handle \x15 and \x{1234} (hex characters)
1840 handle \N{name} (named characters)
1841 handle \cV (control characters)
1842 handle printf-style backslashes (\f, \r, \n, etc)
1844 } (end if backslash)
1845 } (end while character to read)
1850 S_scan_const(pTHX_ char *start)
1853 register char *send = PL_bufend; /* end of the constant */
1854 SV *sv = newSV(send - start); /* sv for the constant */
1855 register char *s = start; /* start of the constant */
1856 register char *d = SvPVX(sv); /* destination for copies */
1857 bool dorange = FALSE; /* are we in a translit range? */
1858 bool didrange = FALSE; /* did we just finish a range? */
1859 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1860 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1863 UV literal_endpoint = 0;
1864 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1867 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1868 /* If we are doing a trans and we know we want UTF8 set expectation */
1869 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1870 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1874 while (s < send || dorange) {
1875 /* get transliterations out of the way (they're most literal) */
1876 if (PL_lex_inwhat == OP_TRANS) {
1877 /* expand a range A-Z to the full set of characters. AIE! */
1879 I32 i; /* current expanded character */
1880 I32 min; /* first character in range */
1881 I32 max; /* last character in range */
1892 char * const c = (char*)utf8_hop((U8*)d, -1);
1896 *c = (char)UTF_TO_NATIVE(0xff);
1897 /* mark the range as done, and continue */
1903 i = d - SvPVX_const(sv); /* remember current offset */
1906 SvLEN(sv) + (has_utf8 ?
1907 (512 - UTF_CONTINUATION_MARK +
1910 /* How many two-byte within 0..255: 128 in UTF-8,
1911 * 96 in UTF-8-mod. */
1913 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1915 d = SvPVX(sv) + i; /* refresh d after realloc */
1919 for (j = 0; j <= 1; j++) {
1920 char * const c = (char*)utf8_hop((U8*)d, -1);
1921 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1927 max = (U8)0xff; /* only to \xff */
1928 uvmax = uv; /* \x{100} to uvmax */
1930 d = c; /* eat endpoint chars */
1935 d -= 2; /* eat the first char and the - */
1936 min = (U8)*d; /* first char in range */
1937 max = (U8)d[1]; /* last char in range */
1944 "Invalid range \"%c-%c\" in transliteration operator",
1945 (char)min, (char)max);
1949 if (literal_endpoint == 2 &&
1950 ((isLOWER(min) && isLOWER(max)) ||
1951 (isUPPER(min) && isUPPER(max)))) {
1953 for (i = min; i <= max; i++)
1955 *d++ = NATIVE_TO_NEED(has_utf8,i);
1957 for (i = min; i <= max; i++)
1959 *d++ = NATIVE_TO_NEED(has_utf8,i);
1964 for (i = min; i <= max; i++)
1967 const U8 ch = (U8)NATIVE_TO_UTF(i);
1968 if (UNI_IS_INVARIANT(ch))
1971 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1972 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1981 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1983 *d++ = (char)UTF_TO_NATIVE(0xff);
1985 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1989 /* mark the range as done, and continue */
1993 literal_endpoint = 0;
1998 /* range begins (ignore - as first or last char) */
1999 else if (*s == '-' && s+1 < send && s != start) {
2001 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2008 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2018 literal_endpoint = 0;
2019 native_range = TRUE;
2024 /* if we get here, we're not doing a transliteration */
2026 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2027 except for the last char, which will be done separately. */
2028 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2030 while (s+1 < send && *s != ')')
2031 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2033 else if (s[2] == '{' /* This should match regcomp.c */
2034 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2037 char *regparse = s + (s[2] == '{' ? 3 : 4);
2040 while (count && (c = *regparse)) {
2041 if (c == '\\' && regparse[1])
2049 if (*regparse != ')')
2050 regparse--; /* Leave one char for continuation. */
2051 while (s < regparse)
2052 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2056 /* likewise skip #-initiated comments in //x patterns */
2057 else if (*s == '#' && PL_lex_inpat &&
2058 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2059 while (s+1 < send && *s != '\n')
2060 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2063 /* check for embedded arrays
2064 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2066 else if (*s == '@' && s[1]) {
2067 if (isALNUM_lazy_if(s+1,UTF))
2069 if (strchr(":'{$", s[1]))
2071 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2072 break; /* in regexp, neither @+ nor @- are interpolated */
2075 /* check for embedded scalars. only stop if we're sure it's a
2078 else if (*s == '$') {
2079 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2081 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2082 break; /* in regexp, $ might be tail anchor */
2085 /* End of else if chain - OP_TRANS rejoin rest */
2088 if (*s == '\\' && s+1 < send) {
2091 /* deprecate \1 in strings and substitution replacements */
2092 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2093 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2095 if (ckWARN(WARN_SYNTAX))
2096 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2101 /* string-change backslash escapes */
2102 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2106 /* skip any other backslash escapes in a pattern */
2107 else if (PL_lex_inpat) {
2108 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2109 goto default_action;
2112 /* if we get here, it's either a quoted -, or a digit */
2115 /* quoted - in transliterations */
2117 if (PL_lex_inwhat == OP_TRANS) {
2124 if ((isALPHA(*s) || isDIGIT(*s)) &&
2126 Perl_warner(aTHX_ packWARN(WARN_MISC),
2127 "Unrecognized escape \\%c passed through",
2129 /* default action is to copy the quoted character */
2130 goto default_action;
2133 /* \132 indicates an octal constant */
2134 case '0': case '1': case '2': case '3':
2135 case '4': case '5': case '6': case '7':
2139 uv = grok_oct(s, &len, &flags, NULL);
2142 goto NUM_ESCAPE_INSERT;
2144 /* \x24 indicates a hex constant */
2148 char* const e = strchr(s, '}');
2149 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2150 PERL_SCAN_DISALLOW_PREFIX;
2155 yyerror("Missing right brace on \\x{}");
2159 uv = grok_hex(s, &len, &flags, NULL);
2165 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2166 uv = grok_hex(s, &len, &flags, NULL);
2172 /* Insert oct or hex escaped character.
2173 * There will always enough room in sv since such
2174 * escapes will be longer than any UTF-8 sequence
2175 * they can end up as. */
2177 /* We need to map to chars to ASCII before doing the tests
2180 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2181 if (!has_utf8 && uv > 255) {
2182 /* Might need to recode whatever we have
2183 * accumulated so far if it contains any
2186 * (Can't we keep track of that and avoid
2187 * this rescan? --jhi)
2191 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2192 if (!NATIVE_IS_INVARIANT(*c)) {
2197 const STRLEN offset = d - SvPVX_const(sv);
2199 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2203 while (src >= (const U8 *)SvPVX_const(sv)) {
2204 if (!NATIVE_IS_INVARIANT(*src)) {
2205 const U8 ch = NATIVE_TO_ASCII(*src);
2206 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2207 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2217 if (has_utf8 || uv > 255) {
2218 d = (char*)uvchr_to_utf8((U8*)d, uv);
2220 if (PL_lex_inwhat == OP_TRANS &&
2221 PL_sublex_info.sub_op) {
2222 PL_sublex_info.sub_op->op_private |=
2223 (PL_lex_repl ? OPpTRANS_FROM_UTF
2227 if (uv > 255 && !dorange)
2228 native_range = FALSE;
2240 /* \N{LATIN SMALL LETTER A} is a named character */
2244 char* e = strchr(s, '}');
2251 yyerror("Missing right brace on \\N{}");
2255 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2257 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2258 PERL_SCAN_DISALLOW_PREFIX;
2261 uv = grok_hex(s, &len, &flags, NULL);
2262 if ( e > s && len != (STRLEN)(e - s) ) {
2266 goto NUM_ESCAPE_INSERT;
2268 res = newSVpvn(s + 1, e - s - 1);
2269 type = newSVpvn(s - 2,e - s + 3);
2270 res = new_constant( NULL, 0, "charnames",
2271 res, NULL, SvPVX(type) );
2274 sv_utf8_upgrade(res);
2275 str = SvPV_const(res,len);
2276 #ifdef EBCDIC_NEVER_MIND
2277 /* charnames uses pack U and that has been
2278 * recently changed to do the below uni->native
2279 * mapping, so this would be redundant (and wrong,
2280 * the code point would be doubly converted).
2281 * But leave this in just in case the pack U change
2282 * gets revoked, but the semantics is still
2283 * desireable for charnames. --jhi */
2285 UV uv = utf8_to_uvchr((const U8*)str, 0);
2288 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2290 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2291 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2292 str = SvPV_const(res, len);
2296 if (!has_utf8 && SvUTF8(res)) {
2297 const char * const ostart = SvPVX_const(sv);
2298 SvCUR_set(sv, d - ostart);
2301 sv_utf8_upgrade(sv);
2302 /* this just broke our allocation above... */
2303 SvGROW(sv, (STRLEN)(send - start));
2304 d = SvPVX(sv) + SvCUR(sv);
2307 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2308 const char * const odest = SvPVX_const(sv);
2310 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2311 d = SvPVX(sv) + (d - odest);
2315 native_range = FALSE; /* \N{} is guessed to be Unicode */
2317 Copy(str, d, len, char);
2324 yyerror("Missing braces on \\N{}");
2327 /* \c is a control character */
2336 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2339 yyerror("Missing control char name in \\c");
2343 /* printf-style backslashes, formfeeds, newlines, etc */
2345 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2348 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2351 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2354 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2357 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2360 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2363 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2369 } /* end if (backslash) */
2376 /* If we started with encoded form, or already know we want it
2377 and then encode the next character */
2378 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2380 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2381 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2384 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2385 const STRLEN off = d - SvPVX_const(sv);
2386 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2388 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2391 if (uv > 255 && !dorange)
2392 native_range = FALSE;
2396 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2398 } /* while loop to process each character */
2400 /* terminate the string and set up the sv */
2402 SvCUR_set(sv, d - SvPVX_const(sv));
2403 if (SvCUR(sv) >= SvLEN(sv))
2404 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2407 if (PL_encoding && !has_utf8) {
2408 sv_recode_to_utf8(sv, PL_encoding);
2414 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2415 PL_sublex_info.sub_op->op_private |=
2416 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2420 /* shrink the sv if we allocated more than we used */
2421 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2422 SvPV_shrink_to_cur(sv);
2425 /* return the substring (via yylval) only if we parsed anything */
2426 if (s > PL_bufptr) {
2427 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2428 sv = new_constant(start, s - start,
2429 (const char *)(PL_lex_inpat ? "qr" : "q"),
2432 (( PL_lex_inwhat == OP_TRANS
2434 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2437 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2444 * Returns TRUE if there's more to the expression (e.g., a subscript),
2447 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2449 * ->[ and ->{ return TRUE
2450 * { and [ outside a pattern are always subscripts, so return TRUE
2451 * if we're outside a pattern and it's not { or [, then return FALSE
2452 * if we're in a pattern and the first char is a {
2453 * {4,5} (any digits around the comma) returns FALSE
2454 * if we're in a pattern and the first char is a [
2456 * [SOMETHING] has a funky algorithm to decide whether it's a
2457 * character class or not. It has to deal with things like
2458 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2459 * anything else returns TRUE
2462 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2465 S_intuit_more(pTHX_ register char *s)
2468 if (PL_lex_brackets)
2470 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2472 if (*s != '{' && *s != '[')
2477 /* In a pattern, so maybe we have {n,m}. */
2494 /* On the other hand, maybe we have a character class */
2497 if (*s == ']' || *s == '^')
2500 /* this is terrifying, and it works */
2501 int weight = 2; /* let's weigh the evidence */
2503 unsigned char un_char = 255, last_un_char;
2504 const char * const send = strchr(s,']');
2505 char tmpbuf[sizeof PL_tokenbuf * 4];
2507 if (!send) /* has to be an expression */
2510 Zero(seen,256,char);
2513 else if (isDIGIT(*s)) {
2515 if (isDIGIT(s[1]) && s[2] == ']')
2521 for (; s < send; s++) {
2522 last_un_char = un_char;
2523 un_char = (unsigned char)*s;
2528 weight -= seen[un_char] * 10;
2529 if (isALNUM_lazy_if(s+1,UTF)) {
2531 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2532 len = (int)strlen(tmpbuf);
2533 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2538 else if (*s == '$' && s[1] &&
2539 strchr("[#!%*<>()-=",s[1])) {
2540 if (/*{*/ strchr("])} =",s[2]))
2549 if (strchr("wds]",s[1]))
2551 else if (seen[(U8)'\''] || seen[(U8)'"'])
2553 else if (strchr("rnftbxcav",s[1]))
2555 else if (isDIGIT(s[1])) {
2557 while (s[1] && isDIGIT(s[1]))
2567 if (strchr("aA01! ",last_un_char))
2569 if (strchr("zZ79~",s[1]))
2571 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2572 weight -= 5; /* cope with negative subscript */
2575 if (!isALNUM(last_un_char)
2576 && !(last_un_char == '$' || last_un_char == '@'
2577 || last_un_char == '&')
2578 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2583 if (keyword(tmpbuf, d - tmpbuf, 0))
2586 if (un_char == last_un_char + 1)
2588 weight -= seen[un_char];
2593 if (weight >= 0) /* probably a character class */
2603 * Does all the checking to disambiguate
2605 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2606 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2608 * First argument is the stuff after the first token, e.g. "bar".
2610 * Not a method if bar is a filehandle.
2611 * Not a method if foo is a subroutine prototyped to take a filehandle.
2612 * Not a method if it's really "Foo $bar"
2613 * Method if it's "foo $bar"
2614 * Not a method if it's really "print foo $bar"
2615 * Method if it's really "foo package::" (interpreted as package->foo)
2616 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2617 * Not a method if bar is a filehandle or package, but is quoted with
2622 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2625 char *s = start + (*start == '$');
2626 char tmpbuf[sizeof PL_tokenbuf];
2634 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2638 const char *proto = SvPVX_const(cv);
2649 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2650 /* start is the beginning of the possible filehandle/object,
2651 * and s is the end of it
2652 * tmpbuf is a copy of it
2655 if (*start == '$') {
2656 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2659 len = start - SvPVX(PL_linestr);
2663 start = SvPVX(PL_linestr) + len;
2667 return *s == '(' ? FUNCMETH : METHOD;
2669 if (!keyword(tmpbuf, len, 0)) {
2670 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2674 soff = s - SvPVX(PL_linestr);
2678 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2679 if (indirgv && GvCVu(indirgv))
2681 /* filehandle or package name makes it a method */
2682 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2684 soff = s - SvPVX(PL_linestr);
2687 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2688 return 0; /* no assumptions -- "=>" quotes bearword */
2690 start_force(PL_curforce);
2691 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2692 newSVpvn(tmpbuf,len));
2693 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2695 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2700 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2702 return *s == '(' ? FUNCMETH : METHOD;
2710 * Return a string of Perl code to load the debugger. If PERL5DB
2711 * is set, it will return the contents of that, otherwise a
2712 * compile-time require of perl5db.pl.
2720 const char * const pdb = PerlEnv_getenv("PERL5DB");
2724 SETERRNO(0,SS_NORMAL);
2725 return "BEGIN { require 'perl5db.pl' }";
2731 /* Encoded script support. filter_add() effectively inserts a
2732 * 'pre-processing' function into the current source input stream.
2733 * Note that the filter function only applies to the current source file
2734 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2736 * The datasv parameter (which may be NULL) can be used to pass
2737 * private data to this instance of the filter. The filter function
2738 * can recover the SV using the FILTER_DATA macro and use it to
2739 * store private buffers and state information.
2741 * The supplied datasv parameter is upgraded to a PVIO type
2742 * and the IoDIRP/IoANY field is used to store the function pointer,
2743 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2744 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2745 * private use must be set using malloc'd pointers.
2749 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2755 if (!PL_rsfp_filters)
2756 PL_rsfp_filters = newAV();
2759 SvUPGRADE(datasv, SVt_PVIO);
2760 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2761 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2762 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2763 FPTR2DPTR(void *, IoANY(datasv)),
2764 SvPV_nolen(datasv)));
2765 av_unshift(PL_rsfp_filters, 1);
2766 av_store(PL_rsfp_filters, 0, datasv) ;
2771 /* Delete most recently added instance of this filter function. */
2773 Perl_filter_del(pTHX_ filter_t funcp)
2779 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2780 FPTR2DPTR(void*, funcp)));
2782 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2784 /* if filter is on top of stack (usual case) just pop it off */
2785 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2786 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2787 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2788 IoANY(datasv) = (void *)NULL;
2789 sv_free(av_pop(PL_rsfp_filters));
2793 /* we need to search for the correct entry and clear it */
2794 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2798 /* Invoke the idxth filter function for the current rsfp. */
2799 /* maxlen 0 = read one text line */
2801 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2806 /* This API is bad. It should have been using unsigned int for maxlen.
2807 Not sure if we want to change the API, but if not we should sanity
2808 check the value here. */
2809 const unsigned int correct_length
2818 if (!PL_rsfp_filters)
2820 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2821 /* Provide a default input filter to make life easy. */
2822 /* Note that we append to the line. This is handy. */
2823 DEBUG_P(PerlIO_printf(Perl_debug_log,
2824 "filter_read %d: from rsfp\n", idx));
2825 if (correct_length) {
2828 const int old_len = SvCUR(buf_sv);
2830 /* ensure buf_sv is large enough */
2831 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2832 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2833 correct_length)) <= 0) {
2834 if (PerlIO_error(PL_rsfp))
2835 return -1; /* error */
2837 return 0 ; /* end of file */
2839 SvCUR_set(buf_sv, old_len + len) ;
2842 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2843 if (PerlIO_error(PL_rsfp))
2844 return -1; /* error */
2846 return 0 ; /* end of file */
2849 return SvCUR(buf_sv);
2851 /* Skip this filter slot if filter has been deleted */
2852 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2853 DEBUG_P(PerlIO_printf(Perl_debug_log,
2854 "filter_read %d: skipped (filter deleted)\n",
2856 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2858 /* Get function pointer hidden within datasv */
2859 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2860 DEBUG_P(PerlIO_printf(Perl_debug_log,
2861 "filter_read %d: via function %p (%s)\n",
2862 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2863 /* Call function. The function is expected to */
2864 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2865 /* Return: <0:error, =0:eof, >0:not eof */
2866 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2870 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2873 #ifdef PERL_CR_FILTER
2874 if (!PL_rsfp_filters) {
2875 filter_add(S_cr_textfilter,NULL);
2878 if (PL_rsfp_filters) {
2880 SvCUR_set(sv, 0); /* start with empty line */
2881 if (FILTER_READ(0, sv, 0) > 0)
2882 return ( SvPVX(sv) ) ;
2887 return (sv_gets(sv, fp, append));
2891 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2896 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2900 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2901 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2903 return GvHV(gv); /* Foo:: */
2906 /* use constant CLASS => 'MyClass' */
2907 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2908 if (gv && GvCV(gv)) {
2909 SV * const sv = cv_const_sv(GvCV(gv));
2911 pkgname = SvPV_nolen_const(sv);
2914 return gv_stashpv(pkgname, FALSE);
2918 * S_readpipe_override
2919 * Check whether readpipe() is overriden, and generates the appropriate
2920 * optree, provided sublex_start() is called afterwards.
2923 S_readpipe_override(pTHX)
2926 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2927 yylval.ival = OP_BACKTICK;
2929 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2931 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2932 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2933 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2935 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2936 append_elem(OP_LIST,
2937 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2938 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2948 * The intent of this yylex wrapper is to minimize the changes to the
2949 * tokener when we aren't interested in collecting madprops. It remains
2950 * to be seen how successful this strategy will be...
2957 char *s = PL_bufptr;
2959 /* make sure PL_thiswhite is initialized */
2963 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2964 if (PL_pending_ident)
2965 return S_pending_ident(aTHX);
2967 /* previous token ate up our whitespace? */
2968 if (!PL_lasttoke && PL_nextwhite) {
2969 PL_thiswhite = PL_nextwhite;
2973 /* isolate the token, and figure out where it is without whitespace */
2974 PL_realtokenstart = -1;
2978 assert(PL_curforce < 0);
2980 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2981 if (!PL_thistoken) {
2982 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2983 PL_thistoken = newSVpvs("");
2985 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2986 PL_thistoken = newSVpvn(tstart, s - tstart);
2989 if (PL_thismad) /* install head */
2990 CURMAD('X', PL_thistoken);
2993 /* last whitespace of a sublex? */
2994 if (optype == ')' && PL_endwhite) {
2995 CURMAD('X', PL_endwhite);
3000 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3001 if (!PL_thiswhite && !PL_endwhite && !optype) {
3002 sv_free(PL_thistoken);
3007 /* put off final whitespace till peg */
3008 if (optype == ';' && !PL_rsfp) {
3009 PL_nextwhite = PL_thiswhite;
3012 else if (PL_thisopen) {
3013 CURMAD('q', PL_thisopen);
3015 sv_free(PL_thistoken);
3019 /* Store actual token text as madprop X */
3020 CURMAD('X', PL_thistoken);
3024 /* add preceding whitespace as madprop _ */
3025 CURMAD('_', PL_thiswhite);
3029 /* add quoted material as madprop = */
3030 CURMAD('=', PL_thisstuff);
3034 /* add terminating quote as madprop Q */
3035 CURMAD('Q', PL_thisclose);
3039 /* special processing based on optype */
3043 /* opval doesn't need a TOKEN since it can already store mp */
3054 append_madprops(PL_thismad, yylval.opval, 0);
3062 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3071 /* remember any fake bracket that lexer is about to discard */
3072 if (PL_lex_brackets == 1 &&
3073 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3076 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3079 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3080 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3083 break; /* don't bother looking for trailing comment */
3092 /* attach a trailing comment to its statement instead of next token */
3096 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3098 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3100 if (*s == '\n' || *s == '#') {
3101 while (s < PL_bufend && *s != '\n')
3105 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3106 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3123 /* Create new token struct. Note: opvals return early above. */
3124 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3131 S_tokenize_use(pTHX_ int is_use, char *s) {
3133 if (PL_expect != XSTATE)
3134 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3135 is_use ? "use" : "no"));
3137 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3138 s = force_version(s, TRUE);
3139 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3140 start_force(PL_curforce);
3141 NEXTVAL_NEXTTOKE.opval = NULL;
3144 else if (*s == 'v') {
3145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3146 s = force_version(s, FALSE);
3150 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3151 s = force_version(s, FALSE);
3153 yylval.ival = is_use;
3157 static const char* const exp_name[] =
3158 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3159 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3166 Works out what to call the token just pulled out of the input
3167 stream. The yacc parser takes care of taking the ops we return and
3168 stitching them into a tree.
3174 if read an identifier
3175 if we're in a my declaration
3176 croak if they tried to say my($foo::bar)
3177 build the ops for a my() declaration
3178 if it's an access to a my() variable
3179 are we in a sort block?
3180 croak if my($a); $a <=> $b
3181 build ops for access to a my() variable
3182 if in a dq string, and they've said @foo and we can't find @foo
3184 build ops for a bareword
3185 if we already built the token before, use it.
3190 #pragma segment Perl_yylex
3196 register char *s = PL_bufptr;
3201 /* orig_keyword, gvp, and gv are initialized here because
3202 * jump to the label just_a_word_zero can bypass their
3203 * initialization later. */
3204 I32 orig_keyword = 0;
3209 SV* tmp = newSVpvs("");
3210 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3211 (IV)CopLINE(PL_curcop),
3212 lex_state_names[PL_lex_state],
3213 exp_name[PL_expect],
3214 pv_display(tmp, s, strlen(s), 0, 60));
3217 /* check if there's an identifier for us to look at */
3218 if (PL_pending_ident)
3219 return REPORT(S_pending_ident(aTHX));
3221 /* no identifier pending identification */
3223 switch (PL_lex_state) {
3225 case LEX_NORMAL: /* Some compilers will produce faster */
3226 case LEX_INTERPNORMAL: /* code if we comment these out. */
3230 /* when we've already built the next token, just pull it out of the queue */
3234 yylval = PL_nexttoke[PL_lasttoke].next_val;
3236 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3237 PL_nexttoke[PL_lasttoke].next_mad = 0;
3238 if (PL_thismad && PL_thismad->mad_key == '_') {
3239 PL_thiswhite = (SV*)PL_thismad->mad_val;
3240 PL_thismad->mad_val = 0;
3241 mad_free(PL_thismad);
3246 PL_lex_state = PL_lex_defer;
3247 PL_expect = PL_lex_expect;
3248 PL_lex_defer = LEX_NORMAL;
3249 if (!PL_nexttoke[PL_lasttoke].next_type)
3254 yylval = PL_nextval[PL_nexttoke];
3256 PL_lex_state = PL_lex_defer;
3257 PL_expect = PL_lex_expect;
3258 PL_lex_defer = LEX_NORMAL;
3262 /* FIXME - can these be merged? */
3263 return(PL_nexttoke[PL_lasttoke].next_type);
3265 return REPORT(PL_nexttype[PL_nexttoke]);
3268 /* interpolated case modifiers like \L \U, including \Q and \E.
3269 when we get here, PL_bufptr is at the \
3271 case LEX_INTERPCASEMOD:
3273 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3274 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3276 /* handle \E or end of string */
3277 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3279 if (PL_lex_casemods) {
3280 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3281 PL_lex_casestack[PL_lex_casemods] = '\0';
3283 if (PL_bufptr != PL_bufend
3284 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3286 PL_lex_state = LEX_INTERPCONCAT;
3289 PL_thistoken = newSVpvs("\\E");
3295 while (PL_bufptr != PL_bufend &&
3296 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3298 PL_thiswhite = newSVpvs("");
3299 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3303 if (PL_bufptr != PL_bufend)
3306 PL_lex_state = LEX_INTERPCONCAT;
3310 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3311 "### Saw case modifier\n"); });
3313 if (s[1] == '\\' && s[2] == 'E') {
3316 PL_thiswhite = newSVpvs("");
3317 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3320 PL_lex_state = LEX_INTERPCONCAT;
3325 if (!PL_madskills) /* when just compiling don't need correct */
3326 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3327 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3328 if ((*s == 'L' || *s == 'U') &&
3329 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3330 PL_lex_casestack[--PL_lex_casemods] = '\0';
3333 if (PL_lex_casemods > 10)
3334 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3335 PL_lex_casestack[PL_lex_casemods++] = *s;
3336 PL_lex_casestack[PL_lex_casemods] = '\0';
3337 PL_lex_state = LEX_INTERPCONCAT;
3338 start_force(PL_curforce);
3339 NEXTVAL_NEXTTOKE.ival = 0;
3341 start_force(PL_curforce);
3343 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3345 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3347 NEXTVAL_NEXTTOKE.ival = OP_LC;
3349 NEXTVAL_NEXTTOKE.ival = OP_UC;
3351 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3353 Perl_croak(aTHX_ "panic: yylex");
3355 SV* const tmpsv = newSVpvs("");
3356 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3362 if (PL_lex_starts) {
3368 sv_free(PL_thistoken);
3369 PL_thistoken = newSVpvs("");
3372 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3373 if (PL_lex_casemods == 1 && PL_lex_inpat)
3382 case LEX_INTERPPUSH:
3383 return REPORT(sublex_push());
3385 case LEX_INTERPSTART:
3386 if (PL_bufptr == PL_bufend)
3387 return REPORT(sublex_done());
3388 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3389 "### Interpolated variable\n"); });
3391 PL_lex_dojoin = (*PL_bufptr == '@');
3392 PL_lex_state = LEX_INTERPNORMAL;
3393 if (PL_lex_dojoin) {
3394 start_force(PL_curforce);
3395 NEXTVAL_NEXTTOKE.ival = 0;
3397 start_force(PL_curforce);
3398 force_ident("\"", '$');
3399 start_force(PL_curforce);
3400 NEXTVAL_NEXTTOKE.ival = 0;
3402 start_force(PL_curforce);
3403 NEXTVAL_NEXTTOKE.ival = 0;
3405 start_force(PL_curforce);
3406 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3409 if (PL_lex_starts++) {
3414 sv_free(PL_thistoken);
3415 PL_thistoken = newSVpvs("");
3418 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3419 if (!PL_lex_casemods && PL_lex_inpat)
3426 case LEX_INTERPENDMAYBE:
3427 if (intuit_more(PL_bufptr)) {
3428 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3434 if (PL_lex_dojoin) {
3435 PL_lex_dojoin = FALSE;
3436 PL_lex_state = LEX_INTERPCONCAT;
3440 sv_free(PL_thistoken);
3441 PL_thistoken = newSVpvs("");
3446 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3447 && SvEVALED(PL_lex_repl))
3449 if (PL_bufptr != PL_bufend)
3450 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3454 case LEX_INTERPCONCAT:
3456 if (PL_lex_brackets)
3457 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3459 if (PL_bufptr == PL_bufend)
3460 return REPORT(sublex_done());
3462 if (SvIVX(PL_linestr) == '\'') {
3463 SV *sv = newSVsv(PL_linestr);
3466 else if ( PL_hints & HINT_NEW_RE )
3467 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3468 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3472 s = scan_const(PL_bufptr);
3474 PL_lex_state = LEX_INTERPCASEMOD;
3476 PL_lex_state = LEX_INTERPSTART;
3479 if (s != PL_bufptr) {
3480 start_force(PL_curforce);
3482 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3484 NEXTVAL_NEXTTOKE = yylval;
3487 if (PL_lex_starts++) {
3491 sv_free(PL_thistoken);
3492 PL_thistoken = newSVpvs("");
3495 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3496 if (!PL_lex_casemods && PL_lex_inpat)
3509 PL_lex_state = LEX_NORMAL;
3510 s = scan_formline(PL_bufptr);
3511 if (!PL_lex_formbrack)
3517 PL_oldoldbufptr = PL_oldbufptr;
3523 sv_free(PL_thistoken);
3526 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3530 if (isIDFIRST_lazy_if(s,UTF))
3532 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3535 goto fake_eof; /* emulate EOF on ^D or ^Z */
3544 if (PL_lex_brackets) {
3545 yyerror((const char *)
3547 ? "Format not terminated"
3548 : "Missing right curly or square bracket"));
3550 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3551 "### Tokener got EOF\n");
3555 if (s++ < PL_bufend)
3556 goto retry; /* ignore stray nulls */
3559 if (!PL_in_eval && !PL_preambled) {
3560 PL_preambled = TRUE;
3565 sv_setpv(PL_linestr,incl_perldb());
3566 if (SvCUR(PL_linestr))
3567 sv_catpvs(PL_linestr,";");
3569 while(AvFILLp(PL_preambleav) >= 0) {
3570 SV *tmpsv = av_shift(PL_preambleav);
3571 sv_catsv(PL_linestr, tmpsv);
3572 sv_catpvs(PL_linestr, ";");
3575 sv_free((SV*)PL_preambleav);
3576 PL_preambleav = NULL;
3578 if (PL_minus_n || PL_minus_p) {
3579 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3581 sv_catpvs(PL_linestr,"chomp;");
3584 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3585 || *PL_splitstr == '"')
3586 && strchr(PL_splitstr + 1, *PL_splitstr))
3587 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3589 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3590 bytes can be used as quoting characters. :-) */
3591 const char *splits = PL_splitstr;
3592 sv_catpvs(PL_linestr, "our @F=split(q\0");
3595 if (*splits == '\\')
3596 sv_catpvn(PL_linestr, splits, 1);
3597 sv_catpvn(PL_linestr, splits, 1);
3598 } while (*splits++);
3599 /* This loop will embed the trailing NUL of
3600 PL_linestr as the last thing it does before
3602 sv_catpvs(PL_linestr, ");");
3606 sv_catpvs(PL_linestr,"our @F=split(' ');");
3610 sv_catpvs(PL_linestr,"use feature ':5.10';");
3611 sv_catpvs(PL_linestr, "\n");
3612 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3613 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3614 PL_last_lop = PL_last_uni = NULL;
3615 if (PERLDB_LINE && PL_curstash != PL_debstash)
3616 update_debugger_info_sv(PL_linestr);
3620 bof = PL_rsfp ? TRUE : FALSE;
3621 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3624 PL_realtokenstart = -1;
3627 if (PL_preprocess && !PL_in_eval)
3628 (void)PerlProc_pclose(PL_rsfp);
3629 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3630 PerlIO_clearerr(PL_rsfp);
3632 (void)PerlIO_close(PL_rsfp);
3634 PL_doextract = FALSE;
3636 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3641 sv_setpv(PL_linestr,
3644 ? ";}continue{print;}" : ";}"));
3645 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3646 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3647 PL_last_lop = PL_last_uni = NULL;
3648 PL_minus_n = PL_minus_p = 0;
3651 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3652 PL_last_lop = PL_last_uni = NULL;
3653 sv_setpvn(PL_linestr,"",0);
3654 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3656 /* If it looks like the start of a BOM or raw UTF-16,
3657 * check if it in fact is. */
3663 #ifdef PERLIO_IS_STDIO
3664 # ifdef __GNU_LIBRARY__
3665 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3666 # define FTELL_FOR_PIPE_IS_BROKEN
3670 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3671 # define FTELL_FOR_PIPE_IS_BROKEN
3676 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3677 /* This loses the possibility to detect the bof
3678 * situation on perl -P when the libc5 is being used.
3679 * Workaround? Maybe attach some extra state to PL_rsfp?
3682 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3684 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3687 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3688 s = swallow_bom((U8*)s);
3692 /* Incest with pod. */
3695 sv_catsv(PL_thiswhite, PL_linestr);
3697 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3698 sv_setpvn(PL_linestr, "", 0);
3699 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3700 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3701 PL_last_lop = PL_last_uni = NULL;
3702 PL_doextract = FALSE;
3706 } while (PL_doextract);
3707 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3708 if (PERLDB_LINE && PL_curstash != PL_debstash)
3709 update_debugger_info_sv(PL_linestr);
3710 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3711 PL_last_lop = PL_last_uni = NULL;
3712 if (CopLINE(PL_curcop) == 1) {
3713 while (s < PL_bufend && isSPACE(*s))
3715 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3719 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3723 if (*s == '#' && *(s+1) == '!')
3725 #ifdef ALTERNATE_SHEBANG
3727 static char const as[] = ALTERNATE_SHEBANG;
3728 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3729 d = s + (sizeof(as) - 1);
3731 #endif /* ALTERNATE_SHEBANG */
3740 while (*d && !isSPACE(*d))
3744 #ifdef ARG_ZERO_IS_SCRIPT
3745 if (ipathend > ipath) {
3747 * HP-UX (at least) sets argv[0] to the script name,
3748 * which makes $^X incorrect. And Digital UNIX and Linux,
3749 * at least, set argv[0] to the basename of the Perl
3750 * interpreter. So, having found "#!", we'll set it right.
3752 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3754 assert(SvPOK(x) || SvGMAGICAL(x));
3755 if (sv_eq(x, CopFILESV(PL_curcop))) {
3756 sv_setpvn(x, ipath, ipathend - ipath);
3762 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3763 const char * const lstart = SvPV_const(x,llen);
3765 bstart += blen - llen;
3766 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3767 sv_setpvn(x, ipath, ipathend - ipath);
3772 TAINT_NOT; /* $^X is always tainted, but that's OK */
3774 #endif /* ARG_ZERO_IS_SCRIPT */
3779 d = instr(s,"perl -");
3781 d = instr(s,"perl");
3783 /* avoid getting into infinite loops when shebang
3784 * line contains "Perl" rather than "perl" */
3786 for (d = ipathend-4; d >= ipath; --d) {
3787 if ((*d == 'p' || *d == 'P')
3788 && !ibcmp(d, "perl", 4))
3798 #ifdef ALTERNATE_SHEBANG
3800 * If the ALTERNATE_SHEBANG on this system starts with a
3801 * character that can be part of a Perl expression, then if
3802 * we see it but not "perl", we're probably looking at the
3803 * start of Perl code, not a request to hand off to some
3804 * other interpreter. Similarly, if "perl" is there, but
3805 * not in the first 'word' of the line, we assume the line
3806 * contains the start of the Perl program.
3808 if (d && *s != '#') {
3809 const char *c = ipath;
3810 while (*c && !strchr("; \t\r\n\f\v#", *c))
3813 d = NULL; /* "perl" not in first word; ignore */
3815 *s = '#'; /* Don't try to parse shebang line */
3817 #endif /* ALTERNATE_SHEBANG */
3818 #ifndef MACOS_TRADITIONAL
3823 !instr(s,"indir") &&
3824 instr(PL_origargv[0],"perl"))
3831 while (s < PL_bufend && isSPACE(*s))
3833 if (s < PL_bufend) {
3834 Newxz(newargv,PL_origargc+3,char*);
3836 while (s < PL_bufend && !isSPACE(*s))
3839 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3842 newargv = PL_origargv;
3845 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3847 Perl_croak(aTHX_ "Can't exec %s", ipath);
3851 while (*d && !isSPACE(*d))
3853 while (SPACE_OR_TAB(*d))
3857 const bool switches_done = PL_doswitches;
3858 const U32 oldpdb = PL_perldb;
3859 const bool oldn = PL_minus_n;
3860 const bool oldp = PL_minus_p;
3863 if (*d == 'M' || *d == 'm' || *d == 'C') {
3864 const char * const m = d;
3865 while (*d && !isSPACE(*d))
3867 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3870 d = moreswitches(d);
3872 if (PL_doswitches && !switches_done) {
3873 int argc = PL_origargc;
3874 char **argv = PL_origargv;
3877 } while (argc && argv[0][0] == '-' && argv[0][1]);
3878 init_argv_symbols(argc,argv);
3880 if ((PERLDB_LINE && !oldpdb) ||
3881 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3882 /* if we have already added "LINE: while (<>) {",
3883 we must not do it again */
3885 sv_setpvn(PL_linestr, "", 0);
3886 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3887 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3888 PL_last_lop = PL_last_uni = NULL;
3889 PL_preambled = FALSE;
3891 (void)gv_fetchfile(PL_origfilename);
3898 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3900 PL_lex_state = LEX_FORMLINE;
3905 #ifdef PERL_STRICT_CR
3906 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3908 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3910 case ' ': case '\t': case '\f': case 013:
3911 #ifdef MACOS_TRADITIONAL
3915 PL_realtokenstart = -1;
3924 PL_realtokenstart = -1;
3928 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3929 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3930 /* handle eval qq[#line 1 "foo"\n ...] */
3931 CopLINE_dec(PL_curcop);
3934 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3936 if (!PL_in_eval || PL_rsfp)
3941 while (d < PL_bufend && *d != '\n')
3945 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3946 Perl_croak(aTHX_ "panic: input overflow");
3949 PL_thiswhite = newSVpvn(s, d - s);
3954 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3956 PL_lex_state = LEX_FORMLINE;
3962 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3963 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3966 TOKEN(PEG); /* make sure any #! line is accessible */
3971 /* if (PL_madskills && PL_lex_formbrack) { */
3973 while (d < PL_bufend && *d != '\n')
3977 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3978 Perl_croak(aTHX_ "panic: input overflow");
3979 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3981 PL_thiswhite = newSVpvs("");
3982 if (CopLINE(PL_curcop) == 1) {
3983 sv_setpvn(PL_thiswhite, "", 0);
3986 sv_catpvn(PL_thiswhite, s, d - s);
4000 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4008 while (s < PL_bufend && SPACE_OR_TAB(*s))
4011 if (strnEQ(s,"=>",2)) {
4012 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4013 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4014 OPERATOR('-'); /* unary minus */
4016 PL_last_uni = PL_oldbufptr;
4018 case 'r': ftst = OP_FTEREAD; break;
4019 case 'w': ftst = OP_FTEWRITE; break;
4020 case 'x': ftst = OP_FTEEXEC; break;
4021 case 'o': ftst = OP_FTEOWNED; break;
4022 case 'R': ftst = OP_FTRREAD; break;
4023 case 'W': ftst = OP_FTRWRITE; break;
4024 case 'X': ftst = OP_FTREXEC; break;
4025 case 'O': ftst = OP_FTROWNED; break;
4026 case 'e': ftst = OP_FTIS; break;
4027 case 'z': ftst = OP_FTZERO; break;
4028 case 's': ftst = OP_FTSIZE; break;
4029 case 'f': ftst = OP_FTFILE; break;
4030 case 'd': ftst = OP_FTDIR; break;
4031 case 'l': ftst = OP_FTLINK; break;
4032 case 'p': ftst = OP_FTPIPE; break;
4033 case 'S': ftst = OP_FTSOCK; break;
4034 case 'u': ftst = OP_FTSUID; break;
4035 case 'g': ftst = OP_FTSGID; break;
4036 case 'k': ftst = OP_FTSVTX; break;
4037 case 'b': ftst = OP_FTBLK; break;
4038 case 'c': ftst = OP_FTCHR; break;
4039 case 't': ftst = OP_FTTTY; break;
4040 case 'T': ftst = OP_FTTEXT; break;
4041 case 'B': ftst = OP_FTBINARY; break;
4042 case 'M': case 'A': case 'C':
4043 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4045 case 'M': ftst = OP_FTMTIME; break;
4046 case 'A': ftst = OP_FTATIME; break;
4047 case 'C': ftst = OP_FTCTIME; break;
4055 PL_last_lop_op = (OPCODE)ftst;
4056 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4057 "### Saw file test %c\n", (int)tmp);
4062 /* Assume it was a minus followed by a one-letter named
4063 * subroutine call (or a -bareword), then. */
4064 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4065 "### '-%c' looked like a file test but was not\n",
4072 const char tmp = *s++;
4075 if (PL_expect == XOPERATOR)
4080 else if (*s == '>') {
4083 if (isIDFIRST_lazy_if(s,UTF)) {
4084 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4092 if (PL_expect == XOPERATOR)
4095 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4097 OPERATOR('-'); /* unary minus */
4103 const char tmp = *s++;
4106 if (PL_expect == XOPERATOR)
4111 if (PL_expect == XOPERATOR)
4114 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4121 if (PL_expect != XOPERATOR) {
4122 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4123 PL_expect = XOPERATOR;
4124 force_ident(PL_tokenbuf, '*');
4137 if (PL_expect == XOPERATOR) {
4141 PL_tokenbuf[0] = '%';
4142 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4143 if (!PL_tokenbuf[1]) {
4146 PL_pending_ident = '%';
4157 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4158 && FEATURE_IS_ENABLED("~~"))
4165 const char tmp = *s++;
4171 goto just_a_word_zero_gv;
4174 switch (PL_expect) {
4180 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4182 PL_bufptr = s; /* update in case we back off */
4188 PL_expect = XTERMBLOCK;
4191 stuffstart = s - SvPVX(PL_linestr) - 1;
4195 while (isIDFIRST_lazy_if(s,UTF)) {
4198 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4199 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4200 if (tmp < 0) tmp = -tmp;
4215 sv = newSVpvn(s, len);
4217 d = scan_str(d,TRUE,TRUE);
4219 /* MUST advance bufptr here to avoid bogus
4220 "at end of line" context messages from yyerror().
4222 PL_bufptr = s + len;
4223 yyerror("Unterminated attribute parameter in attribute list");
4227 return REPORT(0); /* EOF indicator */
4231 sv_catsv(sv, PL_lex_stuff);
4232 attrs = append_elem(OP_LIST, attrs,
4233 newSVOP(OP_CONST, 0, sv));
4234 SvREFCNT_dec(PL_lex_stuff);
4235 PL_lex_stuff = NULL;
4238 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4240 if (PL_in_my == KEY_our) {
4242 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4244 /* skip to avoid loading attributes.pm */
4246 deprecate(":unique");
4249 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4252 /* NOTE: any CV attrs applied here need to be part of
4253 the CVf_BUILTIN_ATTRS define in cv.h! */
4254 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4256 CvLVALUE_on(PL_compcv);
4258 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4260 CvLOCKED_on(PL_compcv);
4262 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4264 CvMETHOD_on(PL_compcv);
4266 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4268 CvASSERTION_on(PL_compcv);
4270 /* After we've set the flags, it could be argued that
4271 we don't need to do the attributes.pm-based setting
4272 process, and shouldn't bother appending recognized
4273 flags. To experiment with that, uncomment the
4274 following "else". (Note that's already been
4275 uncommented. That keeps the above-applied built-in
4276 attributes from being intercepted (and possibly
4277 rejected) by a package's attribute routines, but is
4278 justified by the performance win for the common case
4279 of applying only built-in attributes.) */
4281 attrs = append_elem(OP_LIST, attrs,
4282 newSVOP(OP_CONST, 0,
4286 if (*s == ':' && s[1] != ':')
4289 break; /* require real whitespace or :'s */
4290 /* XXX losing whitespace on sequential attributes here */
4294 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4295 if (*s != ';' && *s != '}' && *s != tmp
4296 && (tmp != '=' || *s != ')')) {
4297 const char q = ((*s == '\'') ? '"' : '\'');
4298 /* If here for an expression, and parsed no attrs, back
4300 if (tmp == '=' && !attrs) {
4304 /* MUST advance bufptr here to avoid bogus "at end of line"
4305 context messages from yyerror().
4308 yyerror( (const char *)
4310 ? Perl_form(aTHX_ "Invalid separator character "
4311 "%c%c%c in attribute list", q, *s, q)
4312 : "Unterminated attribute list" ) );
4320 start_force(PL_curforce);
4321 NEXTVAL_NEXTTOKE.opval = attrs;
4322 CURMAD('_', PL_nextwhite);
4327 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4328 (s - SvPVX(PL_linestr)) - stuffstart);
4336 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4337 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4345 const char tmp = *s++;
4350 const char tmp = *s++;
4358 if (PL_lex_brackets <= 0)
4359 yyerror("Unmatched right square bracket");
4362 if (PL_lex_state == LEX_INTERPNORMAL) {
4363 if (PL_lex_brackets == 0) {
4364 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4365 PL_lex_state = LEX_INTERPEND;
4372 if (PL_lex_brackets > 100) {
4373 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4375 switch (PL_expect) {
4377 if (PL_lex_formbrack) {
4381 if (PL_oldoldbufptr == PL_last_lop)
4382 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4384 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4385 OPERATOR(HASHBRACK);
4387 while (s < PL_bufend && SPACE_OR_TAB(*s))
4390 PL_tokenbuf[0] = '\0';
4391 if (d < PL_bufend && *d == '-') {
4392 PL_tokenbuf[0] = '-';
4394 while (d < PL_bufend && SPACE_OR_TAB(*d))
4397 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4398 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4400 while (d < PL_bufend && SPACE_OR_TAB(*d))
4403 const char minus = (PL_tokenbuf[0] == '-');
4404 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4412 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4417 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4422 if (PL_oldoldbufptr == PL_last_lop)
4423 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4425 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4428 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4430 /* This hack is to get the ${} in the message. */