3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
29 #define pl_yylval (PL_parser->yylval)
31 /* YYINITDEPTH -- initial size of the parser's stacks. */
32 #define YYINITDEPTH 200
34 /* XXX temporary backwards compatibility */
35 #define PL_lex_brackets (PL_parser->lex_brackets)
36 #define PL_lex_brackstack (PL_parser->lex_brackstack)
37 #define PL_lex_casemods (PL_parser->lex_casemods)
38 #define PL_lex_casestack (PL_parser->lex_casestack)
39 #define PL_lex_defer (PL_parser->lex_defer)
40 #define PL_lex_dojoin (PL_parser->lex_dojoin)
41 #define PL_lex_expect (PL_parser->lex_expect)
42 #define PL_lex_formbrack (PL_parser->lex_formbrack)
43 #define PL_lex_inpat (PL_parser->lex_inpat)
44 #define PL_lex_inwhat (PL_parser->lex_inwhat)
45 #define PL_lex_op (PL_parser->lex_op)
46 #define PL_lex_repl (PL_parser->lex_repl)
47 #define PL_lex_starts (PL_parser->lex_starts)
48 #define PL_lex_stuff (PL_parser->lex_stuff)
49 #define PL_multi_start (PL_parser->multi_start)
50 #define PL_multi_open (PL_parser->multi_open)
51 #define PL_multi_close (PL_parser->multi_close)
52 #define PL_pending_ident (PL_parser->pending_ident)
53 #define PL_preambled (PL_parser->preambled)
54 #define PL_sublex_info (PL_parser->sublex_info)
55 #define PL_linestr (PL_parser->linestr)
56 #define PL_expect (PL_parser->expect)
57 #define PL_copline (PL_parser->copline)
58 #define PL_bufptr (PL_parser->bufptr)
59 #define PL_oldbufptr (PL_parser->oldbufptr)
60 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61 #define PL_linestart (PL_parser->linestart)
62 #define PL_bufend (PL_parser->bufend)
63 #define PL_last_uni (PL_parser->last_uni)
64 #define PL_last_lop (PL_parser->last_lop)
65 #define PL_last_lop_op (PL_parser->last_lop_op)
66 #define PL_lex_state (PL_parser->lex_state)
67 #define PL_rsfp (PL_parser->rsfp)
68 #define PL_rsfp_filters (PL_parser->rsfp_filters)
69 #define PL_in_my (PL_parser->in_my)
70 #define PL_in_my_stash (PL_parser->in_my_stash)
71 #define PL_tokenbuf (PL_parser->tokenbuf)
72 #define PL_multi_end (PL_parser->multi_end)
73 #define PL_error_count (PL_parser->error_count)
76 # define PL_endwhite (PL_parser->endwhite)
77 # define PL_faketokens (PL_parser->faketokens)
78 # define PL_lasttoke (PL_parser->lasttoke)
79 # define PL_nextwhite (PL_parser->nextwhite)
80 # define PL_realtokenstart (PL_parser->realtokenstart)
81 # define PL_skipwhite (PL_parser->skipwhite)
82 # define PL_thisclose (PL_parser->thisclose)
83 # define PL_thismad (PL_parser->thismad)
84 # define PL_thisopen (PL_parser->thisopen)
85 # define PL_thisstuff (PL_parser->thisstuff)
86 # define PL_thistoken (PL_parser->thistoken)
87 # define PL_thiswhite (PL_parser->thiswhite)
88 # define PL_thiswhite (PL_parser->thiswhite)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_curforce (PL_parser->curforce)
92 # define PL_nexttoke (PL_parser->nexttoke)
93 # define PL_nexttype (PL_parser->nexttype)
94 # define PL_nextval (PL_parser->nextval)
98 S_pending_ident(pTHX);
100 static const char ident_too_long[] = "Identifier too long";
101 static const char commaless_variable_list[] = "comma-less variable list";
103 #ifndef PERL_NO_UTF16_FILTER
104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
109 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
110 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
112 # define CURMAD(slot,sv)
113 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
116 #define XFAKEBRACK 128
117 #define XENUMMASK 127
119 #ifdef USE_UTF8_SCRIPTS
120 # define UTF (!IN_BYTES)
122 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
125 /* In variables named $^X, these are the legal values for X.
126 * 1999-02-27 mjd-perl-patch@plover.com */
127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129 /* On MacOS, respect nonbreaking spaces */
130 #ifdef MACOS_TRADITIONAL
131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in pl_yylval */
377 S_tokereport(pTHX_ I32 rv)
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)pl_yylval.ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[pl_yylval.ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", pl_yylval.pval);
415 case TOKENTYPE_OPVAL:
416 if (pl_yylval.opval) {
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[pl_yylval.opval->op_type]);
419 if (pl_yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(pl_yylval.opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char* fmt, const char* s)
440 SV* const tmp = newSVpvs("");
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
455 S_ao(pTHX_ int toketype)
458 if (*PL_bufptr == '=') {
460 if (toketype == ANDAND)
461 pl_yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 pl_yylval.ival = OP_ORASSIGN;
464 else if (toketype == DORDOR)
465 pl_yylval.ival = OP_DORASSIGN;
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
485 S_no_op(pTHX_ const char *what, char *s)
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
496 if (ckWARN_d(WARN_SYNTAX)) {
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
520 * Complain about missing quote/regexp/heredoc terminator.
521 * If it's called with NULL then it cauterizes the line buffer.
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
528 S_missingterm(pTHX_ char *s)
534 char * const nl = strrchr(s,'\n');
540 iscntrl(PL_multi_close)
542 PL_multi_close < 32 || PL_multi_close == 127
546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
551 *tmpbuf = (char)PL_multi_close;
555 q = strchr(s,'"') ? '\'' : '"';
556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
559 #define FEATURE_IS_ENABLED(name) \
560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
562 /* The longest string we pass in. */
563 #define MAX_FEATURE_LEN (sizeof("switch")-1)
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
570 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
573 HV * const hinthv = GvHV(PL_hintgv);
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
586 Perl_deprecate(pTHX_ const char *s)
588 if (ckWARN(WARN_DEPRECATED))
589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
593 Perl_deprecate_old(pTHX_ const char *s)
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
605 "Use of %s is deprecated", s);
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
613 #ifdef PERL_CR_FILTER
617 register const char *s = SvPVX_const(sv);
618 register const char * const e = s + SvCUR(sv);
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
684 /* on scope exit, free this parser and restore any outer one */
686 parser->saved_curcop = PL_curcop;
688 /* initialise lexer state */
691 parser->curforce = -1;
693 parser->nexttoke = 0;
695 parser->copline = NOLINE;
696 parser->lex_state = LEX_NORMAL;
697 parser->expect = XSTATE;
699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
707 s = SvPV_const(line, len);
713 parser->linestr = newSVpvs("\n;");
714 } else if (SvREADONLY(line) || s[len-1] != ';') {
715 parser->linestr = newSVsv(line);
717 sv_catpvs(parser->linestr, "\n;");
720 SvREFCNT_inc_simple_void_NN(line);
721 parser->linestr = line;
723 parser->oldoldbufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
732 /* delete a parser object */
735 Perl_parser_free(pTHX_ const yy_parser *parser)
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
765 PL_doextract = FALSE;
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
772 * increments the current line number in CopLINE(PL_curcop) and checks
773 * to see whether the line starts with a comment of the form
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
779 S_incline(pTHX_ const char *s)
786 CopLINE_inc(PL_curcop);
789 while (SPACE_OR_TAB(*s))
791 if (strnEQ(s, "line", 4))
795 if (SPACE_OR_TAB(*s))
799 while (SPACE_OR_TAB(*s))
807 while (SPACE_OR_TAB(*s))
809 if (*s == '"' && (t = strchr(s+1, '"'))) {
819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
825 const STRLEN len = t - s;
827 SV *const temp_sv = CopFILESV(PL_curcop);
833 tmplen = SvCUR(temp_sv);
839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
847 STRLEN tmplen2 = len;
848 if (tmplen + 2 <= sizeof smallbuf)
851 Newx(tmpbuf, tmplen + 2, char);
854 memcpy(tmpbuf + 2, cf, tmplen);
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
861 if (tmplen2 + 2 <= sizeof smallbuf)
864 Newx(tmpbuf2, tmplen2 + 2, char);
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
873 memcpy(tmpbuf2 + 2, s, tmplen2);
876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
890 CopFILE_free(PL_curcop);
891 CopFILE_setn(PL_curcop, s, len);
893 CopLINE_set(PL_curcop, atoi(n)-1);
897 /* skip space before PL_thistoken */
900 S_skipspace0(pTHX_ register char *s)
907 PL_thiswhite = newSVpvs("");
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
912 PL_realtokenstart = s - SvPVX(PL_linestr);
916 /* skip space after PL_thistoken */
919 S_skipspace1(pTHX_ register char *s)
921 const char *start = s;
922 I32 startoff = start - SvPVX(PL_linestr);
927 start = SvPVX(PL_linestr) + startoff;
928 if (!PL_thistoken && PL_realtokenstart >= 0) {
929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
930 PL_thistoken = newSVpvn(tstart, start - tstart);
932 PL_realtokenstart = -1;
935 PL_nextwhite = newSVpvs("");
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
944 S_skipspace2(pTHX_ register char *s, SV **svp)
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
954 start = SvPVX(PL_linestr) + startoff;
955 if (!PL_thistoken && PL_realtokenstart >= 0) {
956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
973 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
975 AV *av = CopFILEAVx(PL_curcop);
977 SV * const sv = newSV_type(SVt_PVMG);
979 sv_setsv(sv, orig_sv);
981 sv_setpvn(sv, buf, len);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
995 S_skipspace(pTHX_ register char *s)
1000 int startoff = s - SvPVX(PL_linestr);
1003 sv_free(PL_skipwhite);
1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
1019 SSize_t oldprevlen, oldoldprevlen;
1020 SSize_t oldloplen = 0, oldunilen = 0;
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
1030 if (s < PL_bufend) {
1032 if (PL_in_eval && !PL_rsfp) {
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
1051 /* try to recharge the buffer */
1053 curoff = s - SvPVX(PL_linestr);
1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
1060 if (PL_madskills && curoff != startoff) {
1062 PL_skipwhite = newSVpvs("");
1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1071 if (curoff && s[-1] == '\n')
1075 /* end of file. Add on the -p or -n magic */
1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1079 sv_catpvs(PL_linestr,
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1082 sv_setpvs(PL_linestr,
1083 ";}continue{print or die qq(-p destination: $!\\n);}");
1085 PL_minus_n = PL_minus_p = 0;
1087 else if (PL_minus_n) {
1089 sv_catpvn(PL_linestr, ";}", 2);
1091 sv_setpvn(PL_linestr, ";}", 2);
1097 sv_catpvn(PL_linestr,";", 1);
1099 sv_setpvn(PL_linestr,";", 1);
1102 /* reset variables for next time we lex */
1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1110 PL_last_lop = PL_last_uni = NULL;
1112 /* Close the filehandle. Could be from
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1119 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1120 PerlIO_clearerr(PL_rsfp);
1122 (void)PerlIO_close(PL_rsfp);
1127 /* not at end of file, so we only read another line */
1128 /* make corresponding updates to old pointers, for yyerror() */
1129 oldprevlen = PL_oldbufptr - PL_bufend;
1130 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1132 oldunilen = PL_last_uni - PL_bufend;
1134 oldloplen = PL_last_lop - PL_bufend;
1135 PL_linestart = PL_bufptr = s + prevlen;
1136 PL_bufend = s + SvCUR(PL_linestr);
1138 PL_oldbufptr = s + oldprevlen;
1139 PL_oldoldbufptr = s + oldoldprevlen;
1141 PL_last_uni = s + oldunilen;
1143 PL_last_lop = s + oldloplen;
1146 /* debugger active and we're not compiling the debugger code,
1147 * so store the line into the debugger's array of lines
1149 if (PERLDB_LINE && PL_curstash != PL_debstash)
1150 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1157 PL_skipwhite = newSVpvs("");
1158 curoff = s - SvPVX(PL_linestr);
1159 if (curoff - startoff)
1160 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1169 * Check the unary operators to ensure there's no ambiguity in how they're
1170 * used. An ambiguous piece of code would be:
1172 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1173 * the +5 is its argument.
1183 if (PL_oldoldbufptr != PL_last_uni)
1185 while (isSPACE(*PL_last_uni))
1188 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1190 if ((t = strchr(s, '(')) && t < PL_bufptr)
1193 if (ckWARN_d(WARN_AMBIGUOUS)){
1194 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
1201 * LOP : macro to build a list operator. Its behaviour has been replaced
1202 * with a subroutine, S_lop() for which LOP is just another name.
1205 #define LOP(f,x) return lop(f,x,s)
1209 * Build a list operator (or something that might be one). The rules:
1210 * - if we have a next token, then it's a list operator [why?]
1211 * - if the next thing is an opening paren, then it's a function
1212 * - else it's a list operator
1216 S_lop(pTHX_ I32 f, int x, char *s)
1223 PL_last_lop = PL_oldbufptr;
1224 PL_last_lop_op = (OPCODE)f;
1227 return REPORT(LSTOP);
1230 return REPORT(LSTOP);
1233 return REPORT(FUNC);
1236 return REPORT(FUNC);
1238 return REPORT(LSTOP);
1244 * Sets up for an eventual force_next(). start_force(0) basically does
1245 * an unshift, while start_force(-1) does a push. yylex removes items
1250 S_start_force(pTHX_ int where)
1254 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1255 where = PL_lasttoke;
1256 assert(PL_curforce < 0 || PL_curforce == where);
1257 if (PL_curforce != where) {
1258 for (i = PL_lasttoke; i > where; --i) {
1259 PL_nexttoke[i] = PL_nexttoke[i-1];
1263 if (PL_curforce < 0) /* in case of duplicate start_force() */
1264 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1265 PL_curforce = where;
1268 curmad('^', newSVpvs(""));
1269 CURMAD('_', PL_nextwhite);
1274 S_curmad(pTHX_ char slot, SV *sv)
1280 if (PL_curforce < 0)
1281 where = &PL_thismad;
1283 where = &PL_nexttoke[PL_curforce].next_mad;
1286 sv_setpvn(sv, "", 0);
1289 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1291 else if (PL_encoding) {
1292 sv_recode_to_utf8(sv, PL_encoding);
1297 /* keep a slot open for the head of the list? */
1298 if (slot != '_' && *where && (*where)->mad_key == '^') {
1299 (*where)->mad_key = slot;
1300 sv_free((SV*)((*where)->mad_val));
1301 (*where)->mad_val = (void*)sv;
1304 addmad(newMADsv(slot, sv), where, 0);
1307 # define start_force(where) NOOP
1308 # define curmad(slot, sv) NOOP
1313 * When the lexer realizes it knows the next token (for instance,
1314 * it is reordering tokens for the parser) then it can call S_force_next
1315 * to know what token to return the next time the lexer is called. Caller
1316 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1317 * and possibly PL_expect to ensure the lexer handles the token correctly.
1321 S_force_next(pTHX_ I32 type)
1325 if (PL_curforce < 0)
1326 start_force(PL_lasttoke);
1327 PL_nexttoke[PL_curforce].next_type = type;
1328 if (PL_lex_state != LEX_KNOWNEXT)
1329 PL_lex_defer = PL_lex_state;
1330 PL_lex_state = LEX_KNOWNEXT;
1331 PL_lex_expect = PL_expect;
1334 PL_nexttype[PL_nexttoke] = type;
1336 if (PL_lex_state != LEX_KNOWNEXT) {
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_expect = PL_expect;
1339 PL_lex_state = LEX_KNOWNEXT;
1345 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1348 SV * const sv = newSVpvn_utf8(start, len,
1350 && is_utf8_string((const U8*)start, len));
1356 * When the lexer knows the next thing is a word (for instance, it has
1357 * just seen -> and it knows that the next char is a word char, then
1358 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1362 * char *start : buffer position (must be within PL_linestr)
1363 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1364 * int check_keyword : if true, Perl checks to make sure the word isn't
1365 * a keyword (do this if the word is a label, e.g. goto FOO)
1366 * int allow_pack : if true, : characters will also be allowed (require,
1367 * use, etc. do this)
1368 * int allow_initial_tick : used by the "sub" lexer only.
1372 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1378 start = SKIPSPACE1(start);
1380 if (isIDFIRST_lazy_if(s,UTF) ||
1381 (allow_pack && *s == ':') ||
1382 (allow_initial_tick && *s == '\'') )
1384 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1385 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1387 start_force(PL_curforce);
1389 curmad('X', newSVpvn(start,s-start));
1390 if (token == METHOD) {
1395 PL_expect = XOPERATOR;
1399 curmad('g', newSVpvs( "forced" ));
1400 NEXTVAL_NEXTTOKE.opval
1401 = (OP*)newSVOP(OP_CONST,0,
1402 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1403 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1411 * Called when the lexer wants $foo *foo &foo etc, but the program
1412 * text only contains the "foo" portion. The first argument is a pointer
1413 * to the "foo", and the second argument is the type symbol to prefix.
1414 * Forces the next token to be a "WORD".
1415 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1419 S_force_ident(pTHX_ register const char *s, int kind)
1423 const STRLEN len = strlen(s);
1424 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1425 start_force(PL_curforce);
1426 NEXTVAL_NEXTTOKE.opval = o;
1429 o->op_private = OPpCONST_ENTERED;
1430 /* XXX see note in pp_entereval() for why we forgo typo
1431 warnings if the symbol must be introduced in an eval.
1433 gv_fetchpvn_flags(s, len,
1434 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1436 kind == '$' ? SVt_PV :
1437 kind == '@' ? SVt_PVAV :
1438 kind == '%' ? SVt_PVHV :
1446 Perl_str_to_version(pTHX_ SV *sv)
1451 const char *start = SvPV_const(sv,len);
1452 const char * const end = start + len;
1453 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1454 while (start < end) {
1458 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1463 retval += ((NV)n)/nshift;
1472 * Forces the next token to be a version number.
1473 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1474 * and if "guessing" is TRUE, then no new token is created (and the caller
1475 * must use an alternative parsing method).
1479 S_force_version(pTHX_ char *s, int guessing)
1485 I32 startoff = s - SvPVX(PL_linestr);
1494 while (isDIGIT(*d) || *d == '_' || *d == '.')
1498 start_force(PL_curforce);
1499 curmad('X', newSVpvn(s,d-s));
1502 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1504 s = scan_num(s, &pl_yylval);
1505 version = pl_yylval.opval;
1506 ver = cSVOPx(version)->op_sv;
1507 if (SvPOK(ver) && !SvNIOK(ver)) {
1508 SvUPGRADE(ver, SVt_PVNV);
1509 SvNV_set(ver, str_to_version(ver));
1510 SvNOK_on(ver); /* hint that it is a version */
1513 else if (guessing) {
1516 sv_free(PL_nextwhite); /* let next token collect whitespace */
1518 s = SvPVX(PL_linestr) + startoff;
1526 if (PL_madskills && !version) {
1527 sv_free(PL_nextwhite); /* let next token collect whitespace */
1529 s = SvPVX(PL_linestr) + startoff;
1532 /* NOTE: The parser sees the package name and the VERSION swapped */
1533 start_force(PL_curforce);
1534 NEXTVAL_NEXTTOKE.opval = version;
1542 * Tokenize a quoted string passed in as an SV. It finds the next
1543 * chunk, up to end of string or a backslash. It may make a new
1544 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1549 S_tokeq(pTHX_ SV *sv)
1553 register char *send;
1561 s = SvPV_force(sv, len);
1562 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1565 while (s < send && *s != '\\')
1570 if ( PL_hints & HINT_NEW_STRING ) {
1571 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1575 if (s + 1 < send && (s[1] == '\\'))
1576 s++; /* all that, just for this */
1581 SvCUR_set(sv, d - SvPVX_const(sv));
1583 if ( PL_hints & HINT_NEW_STRING )
1584 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1589 * Now come three functions related to double-quote context,
1590 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1591 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1592 * interact with PL_lex_state, and create fake ( ... ) argument lists
1593 * to handle functions and concatenation.
1594 * They assume that whoever calls them will be setting up a fake
1595 * join call, because each subthing puts a ',' after it. This lets
1598 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1600 * (I'm not sure whether the spurious commas at the end of lcfirst's
1601 * arguments and join's arguments are created or not).
1606 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1608 * Pattern matching will set PL_lex_op to the pattern-matching op to
1609 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1611 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1613 * Everything else becomes a FUNC.
1615 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1616 * had an OP_CONST or OP_READLINE). This just sets us up for a
1617 * call to S_sublex_push().
1621 S_sublex_start(pTHX)
1624 register const I32 op_type = pl_yylval.ival;
1626 if (op_type == OP_NULL) {
1627 pl_yylval.opval = PL_lex_op;
1631 if (op_type == OP_CONST || op_type == OP_READLINE) {
1632 SV *sv = tokeq(PL_lex_stuff);
1634 if (SvTYPE(sv) == SVt_PVIV) {
1635 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1637 const char * const p = SvPV_const(sv, len);
1638 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1642 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1643 PL_lex_stuff = NULL;
1644 /* Allow <FH> // "foo" */
1645 if (op_type == OP_READLINE)
1646 PL_expect = XTERMORDORDOR;
1649 else if (op_type == OP_BACKTICK && PL_lex_op) {
1650 /* readpipe() vas overriden */
1651 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1652 pl_yylval.opval = PL_lex_op;
1654 PL_lex_stuff = NULL;
1658 PL_sublex_info.super_state = PL_lex_state;
1659 PL_sublex_info.sub_inwhat = (U16)op_type;
1660 PL_sublex_info.sub_op = PL_lex_op;
1661 PL_lex_state = LEX_INTERPPUSH;
1665 pl_yylval.opval = PL_lex_op;
1675 * Create a new scope to save the lexing state. The scope will be
1676 * ended in S_sublex_done. Returns a '(', starting the function arguments
1677 * to the uc, lc, etc. found before.
1678 * Sets PL_lex_state to LEX_INTERPCONCAT.
1687 PL_lex_state = PL_sublex_info.super_state;
1688 SAVEBOOL(PL_lex_dojoin);
1689 SAVEI32(PL_lex_brackets);
1690 SAVEI32(PL_lex_casemods);
1691 SAVEI32(PL_lex_starts);
1692 SAVEI8(PL_lex_state);
1693 SAVEVPTR(PL_lex_inpat);
1694 SAVEI16(PL_lex_inwhat);
1695 SAVECOPLINE(PL_curcop);
1696 SAVEPPTR(PL_bufptr);
1697 SAVEPPTR(PL_bufend);
1698 SAVEPPTR(PL_oldbufptr);
1699 SAVEPPTR(PL_oldoldbufptr);
1700 SAVEPPTR(PL_last_lop);
1701 SAVEPPTR(PL_last_uni);
1702 SAVEPPTR(PL_linestart);
1703 SAVESPTR(PL_linestr);
1704 SAVEGENERICPV(PL_lex_brackstack);
1705 SAVEGENERICPV(PL_lex_casestack);
1707 PL_linestr = PL_lex_stuff;
1708 PL_lex_stuff = NULL;
1710 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1711 = SvPVX(PL_linestr);
1712 PL_bufend += SvCUR(PL_linestr);
1713 PL_last_lop = PL_last_uni = NULL;
1714 SAVEFREESV(PL_linestr);
1716 PL_lex_dojoin = FALSE;
1717 PL_lex_brackets = 0;
1718 Newx(PL_lex_brackstack, 120, char);
1719 Newx(PL_lex_casestack, 12, char);
1720 PL_lex_casemods = 0;
1721 *PL_lex_casestack = '\0';
1723 PL_lex_state = LEX_INTERPCONCAT;
1724 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1726 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1727 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1728 PL_lex_inpat = PL_sublex_info.sub_op;
1730 PL_lex_inpat = NULL;
1737 * Restores lexer state after a S_sublex_push.
1744 if (!PL_lex_starts++) {
1745 SV * const sv = newSVpvs("");
1746 if (SvUTF8(PL_linestr))
1748 PL_expect = XOPERATOR;
1749 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1753 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1754 PL_lex_state = LEX_INTERPCASEMOD;
1758 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1759 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1760 PL_linestr = PL_lex_repl;
1762 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1763 PL_bufend += SvCUR(PL_linestr);
1764 PL_last_lop = PL_last_uni = NULL;
1765 SAVEFREESV(PL_linestr);
1766 PL_lex_dojoin = FALSE;
1767 PL_lex_brackets = 0;
1768 PL_lex_casemods = 0;
1769 *PL_lex_casestack = '\0';
1771 if (SvEVALED(PL_lex_repl)) {
1772 PL_lex_state = LEX_INTERPNORMAL;
1774 /* we don't clear PL_lex_repl here, so that we can check later
1775 whether this is an evalled subst; that means we rely on the
1776 logic to ensure sublex_done() is called again only via the
1777 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1780 PL_lex_state = LEX_INTERPCONCAT;
1790 PL_endwhite = newSVpvs("");
1791 sv_catsv(PL_endwhite, PL_thiswhite);
1795 sv_setpvn(PL_thistoken,"",0);
1797 PL_realtokenstart = -1;
1801 PL_bufend = SvPVX(PL_linestr);
1802 PL_bufend += SvCUR(PL_linestr);
1803 PL_expect = XOPERATOR;
1804 PL_sublex_info.sub_inwhat = 0;
1812 Extracts a pattern, double-quoted string, or transliteration. This
1815 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1816 processing a pattern (PL_lex_inpat is true), a transliteration
1817 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1819 Returns a pointer to the character scanned up to. If this is
1820 advanced from the start pointer supplied (i.e. if anything was
1821 successfully parsed), will leave an OP for the substring scanned
1822 in pl_yylval. Caller must intuit reason for not parsing further
1823 by looking at the next characters herself.
1827 double-quoted style: \r and \n
1828 regexp special ones: \D \s
1831 case and quoting: \U \Q \E
1832 stops on @ and $, but not for $ as tail anchor
1834 In transliterations:
1835 characters are VERY literal, except for - not at the start or end
1836 of the string, which indicates a range. If the range is in bytes,
1837 scan_const expands the range to the full set of intermediate
1838 characters. If the range is in utf8, the hyphen is replaced with
1839 a certain range mark which will be handled by pmtrans() in op.c.
1841 In double-quoted strings:
1843 double-quoted style: \r and \n
1845 deprecated backrefs: \1 (in substitution replacements)
1846 case and quoting: \U \Q \E
1849 scan_const does *not* construct ops to handle interpolated strings.
1850 It stops processing as soon as it finds an embedded $ or @ variable
1851 and leaves it to the caller to work out what's going on.
1853 embedded arrays (whether in pattern or not) could be:
1854 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1856 $ in double-quoted strings must be the symbol of an embedded scalar.
1858 $ in pattern could be $foo or could be tail anchor. Assumption:
1859 it's a tail anchor if $ is the last thing in the string, or if it's
1860 followed by one of "()| \r\n\t"
1862 \1 (backreferences) are turned into $1
1864 The structure of the code is
1865 while (there's a character to process) {
1866 handle transliteration ranges
1867 skip regexp comments /(?#comment)/ and codes /(?{code})/
1868 skip #-initiated comments in //x patterns
1869 check for embedded arrays
1870 check for embedded scalars
1872 leave intact backslashes from leaveit (below)
1873 deprecate \1 in substitution replacements
1874 handle string-changing backslashes \l \U \Q \E, etc.
1875 switch (what was escaped) {
1876 handle \- in a transliteration (becomes a literal -)
1877 handle \132 (octal characters)
1878 handle \x15 and \x{1234} (hex characters)
1879 handle \N{name} (named characters)
1880 handle \cV (control characters)
1881 handle printf-style backslashes (\f, \r, \n, etc)
1883 } (end if backslash)
1884 } (end while character to read)
1889 S_scan_const(pTHX_ char *start)
1892 register char *send = PL_bufend; /* end of the constant */
1893 SV *sv = newSV(send - start); /* sv for the constant */
1894 register char *s = start; /* start of the constant */
1895 register char *d = SvPVX(sv); /* destination for copies */
1896 bool dorange = FALSE; /* are we in a translit range? */
1897 bool didrange = FALSE; /* did we just finish a range? */
1898 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1899 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1902 UV literal_endpoint = 0;
1903 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1906 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1907 /* If we are doing a trans and we know we want UTF8 set expectation */
1908 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1909 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1913 while (s < send || dorange) {
1914 /* get transliterations out of the way (they're most literal) */
1915 if (PL_lex_inwhat == OP_TRANS) {
1916 /* expand a range A-Z to the full set of characters. AIE! */
1918 I32 i; /* current expanded character */
1919 I32 min; /* first character in range */
1920 I32 max; /* last character in range */
1931 char * const c = (char*)utf8_hop((U8*)d, -1);
1935 *c = (char)UTF_TO_NATIVE(0xff);
1936 /* mark the range as done, and continue */
1942 i = d - SvPVX_const(sv); /* remember current offset */
1945 SvLEN(sv) + (has_utf8 ?
1946 (512 - UTF_CONTINUATION_MARK +
1949 /* How many two-byte within 0..255: 128 in UTF-8,
1950 * 96 in UTF-8-mod. */
1952 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1954 d = SvPVX(sv) + i; /* refresh d after realloc */
1958 for (j = 0; j <= 1; j++) {
1959 char * const c = (char*)utf8_hop((U8*)d, -1);
1960 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1966 max = (U8)0xff; /* only to \xff */
1967 uvmax = uv; /* \x{100} to uvmax */
1969 d = c; /* eat endpoint chars */
1974 d -= 2; /* eat the first char and the - */
1975 min = (U8)*d; /* first char in range */
1976 max = (U8)d[1]; /* last char in range */
1983 "Invalid range \"%c-%c\" in transliteration operator",
1984 (char)min, (char)max);
1988 if (literal_endpoint == 2 &&
1989 ((isLOWER(min) && isLOWER(max)) ||
1990 (isUPPER(min) && isUPPER(max)))) {
1992 for (i = min; i <= max; i++)
1994 *d++ = NATIVE_TO_NEED(has_utf8,i);
1996 for (i = min; i <= max; i++)
1998 *d++ = NATIVE_TO_NEED(has_utf8,i);
2003 for (i = min; i <= max; i++)
2006 const U8 ch = (U8)NATIVE_TO_UTF(i);
2007 if (UNI_IS_INVARIANT(ch))
2010 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2011 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2020 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2022 *d++ = (char)UTF_TO_NATIVE(0xff);
2024 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2028 /* mark the range as done, and continue */
2032 literal_endpoint = 0;
2037 /* range begins (ignore - as first or last char) */
2038 else if (*s == '-' && s+1 < send && s != start) {
2040 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2047 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2057 literal_endpoint = 0;
2058 native_range = TRUE;
2063 /* if we get here, we're not doing a transliteration */
2065 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2066 except for the last char, which will be done separately. */
2067 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2069 while (s+1 < send && *s != ')')
2070 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2072 else if (s[2] == '{' /* This should match regcomp.c */
2073 || (s[2] == '?' && s[3] == '{'))
2076 char *regparse = s + (s[2] == '{' ? 3 : 4);
2079 while (count && (c = *regparse)) {
2080 if (c == '\\' && regparse[1])
2088 if (*regparse != ')')
2089 regparse--; /* Leave one char for continuation. */
2090 while (s < regparse)
2091 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2095 /* likewise skip #-initiated comments in //x patterns */
2096 else if (*s == '#' && PL_lex_inpat &&
2097 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2098 while (s+1 < send && *s != '\n')
2099 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2102 /* check for embedded arrays
2103 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2105 else if (*s == '@' && s[1]) {
2106 if (isALNUM_lazy_if(s+1,UTF))
2108 if (strchr(":'{$", s[1]))
2110 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2111 break; /* in regexp, neither @+ nor @- are interpolated */
2114 /* check for embedded scalars. only stop if we're sure it's a
2117 else if (*s == '$') {
2118 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2120 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2121 break; /* in regexp, $ might be tail anchor */
2124 /* End of else if chain - OP_TRANS rejoin rest */
2127 if (*s == '\\' && s+1 < send) {
2130 /* deprecate \1 in strings and substitution replacements */
2131 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2132 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2134 if (ckWARN(WARN_SYNTAX))
2135 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2140 /* string-change backslash escapes */
2141 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2145 /* skip any other backslash escapes in a pattern */
2146 else if (PL_lex_inpat) {
2147 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2148 goto default_action;
2151 /* if we get here, it's either a quoted -, or a digit */
2154 /* quoted - in transliterations */
2156 if (PL_lex_inwhat == OP_TRANS) {
2163 if ((isALPHA(*s) || isDIGIT(*s)) &&
2165 Perl_warner(aTHX_ packWARN(WARN_MISC),
2166 "Unrecognized escape \\%c passed through",
2168 /* default action is to copy the quoted character */
2169 goto default_action;
2172 /* \132 indicates an octal constant */
2173 case '0': case '1': case '2': case '3':
2174 case '4': case '5': case '6': case '7':
2178 uv = grok_oct(s, &len, &flags, NULL);
2181 goto NUM_ESCAPE_INSERT;
2183 /* \x24 indicates a hex constant */
2187 char* const e = strchr(s, '}');
2188 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2189 PERL_SCAN_DISALLOW_PREFIX;
2194 yyerror("Missing right brace on \\x{}");
2198 uv = grok_hex(s, &len, &flags, NULL);
2204 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2205 uv = grok_hex(s, &len, &flags, NULL);
2211 /* Insert oct or hex escaped character.
2212 * There will always enough room in sv since such
2213 * escapes will be longer than any UTF-8 sequence
2214 * they can end up as. */
2216 /* We need to map to chars to ASCII before doing the tests
2219 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2220 if (!has_utf8 && uv > 255) {
2221 /* Might need to recode whatever we have
2222 * accumulated so far if it contains any
2225 * (Can't we keep track of that and avoid
2226 * this rescan? --jhi)
2230 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2231 if (!NATIVE_IS_INVARIANT(*c)) {
2236 const STRLEN offset = d - SvPVX_const(sv);
2238 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2242 while (src >= (const U8 *)SvPVX_const(sv)) {
2243 if (!NATIVE_IS_INVARIANT(*src)) {
2244 const U8 ch = NATIVE_TO_ASCII(*src);
2245 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2246 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2256 if (has_utf8 || uv > 255) {
2257 d = (char*)uvchr_to_utf8((U8*)d, uv);
2259 if (PL_lex_inwhat == OP_TRANS &&
2260 PL_sublex_info.sub_op) {
2261 PL_sublex_info.sub_op->op_private |=
2262 (PL_lex_repl ? OPpTRANS_FROM_UTF
2266 if (uv > 255 && !dorange)
2267 native_range = FALSE;
2279 /* \N{LATIN SMALL LETTER A} is a named character */
2283 char* e = strchr(s, '}');
2289 yyerror("Missing right brace on \\N{}");
2293 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2295 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2296 PERL_SCAN_DISALLOW_PREFIX;
2299 uv = grok_hex(s, &len, &flags, NULL);
2300 if ( e > s && len != (STRLEN)(e - s) ) {
2304 goto NUM_ESCAPE_INSERT;
2306 res = newSVpvn(s + 1, e - s - 1);
2307 res = new_constant( NULL, 0, "charnames",
2308 res, NULL, s - 2, e - s + 3 );
2310 sv_utf8_upgrade(res);
2311 str = SvPV_const(res,len);
2312 #ifdef EBCDIC_NEVER_MIND
2313 /* charnames uses pack U and that has been
2314 * recently changed to do the below uni->native
2315 * mapping, so this would be redundant (and wrong,
2316 * the code point would be doubly converted).
2317 * But leave this in just in case the pack U change
2318 * gets revoked, but the semantics is still
2319 * desireable for charnames. --jhi */
2321 UV uv = utf8_to_uvchr((const U8*)str, 0);
2324 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2326 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2327 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2328 str = SvPV_const(res, len);
2332 if (!has_utf8 && SvUTF8(res)) {
2333 const char * const ostart = SvPVX_const(sv);
2334 SvCUR_set(sv, d - ostart);
2337 sv_utf8_upgrade(sv);
2338 /* this just broke our allocation above... */
2339 SvGROW(sv, (STRLEN)(send - start));
2340 d = SvPVX(sv) + SvCUR(sv);
2343 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2344 const char * const odest = SvPVX_const(sv);
2346 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2347 d = SvPVX(sv) + (d - odest);
2351 native_range = FALSE; /* \N{} is guessed to be Unicode */
2353 Copy(str, d, len, char);
2360 yyerror("Missing braces on \\N{}");
2363 /* \c is a control character */
2372 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2375 yyerror("Missing control char name in \\c");
2379 /* printf-style backslashes, formfeeds, newlines, etc */
2381 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2384 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2387 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2390 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2393 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2396 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2399 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2405 } /* end if (backslash) */
2412 /* If we started with encoded form, or already know we want it
2413 and then encode the next character */
2414 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2416 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2417 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2420 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2421 const STRLEN off = d - SvPVX_const(sv);
2422 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2424 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2427 if (uv > 255 && !dorange)
2428 native_range = FALSE;
2432 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2434 } /* while loop to process each character */
2436 /* terminate the string and set up the sv */
2438 SvCUR_set(sv, d - SvPVX_const(sv));
2439 if (SvCUR(sv) >= SvLEN(sv))
2440 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2443 if (PL_encoding && !has_utf8) {
2444 sv_recode_to_utf8(sv, PL_encoding);
2450 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2451 PL_sublex_info.sub_op->op_private |=
2452 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2456 /* shrink the sv if we allocated more than we used */
2457 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2458 SvPV_shrink_to_cur(sv);
2461 /* return the substring (via pl_yylval) only if we parsed anything */
2462 if (s > PL_bufptr) {
2463 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2464 const char *const key = PL_lex_inpat ? "qr" : "q";
2465 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2469 if (PL_lex_inwhat == OP_TRANS) {
2472 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2480 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2483 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2490 * Returns TRUE if there's more to the expression (e.g., a subscript),
2493 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2495 * ->[ and ->{ return TRUE
2496 * { and [ outside a pattern are always subscripts, so return TRUE
2497 * if we're outside a pattern and it's not { or [, then return FALSE
2498 * if we're in a pattern and the first char is a {
2499 * {4,5} (any digits around the comma) returns FALSE
2500 * if we're in a pattern and the first char is a [
2502 * [SOMETHING] has a funky algorithm to decide whether it's a
2503 * character class or not. It has to deal with things like
2504 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2505 * anything else returns TRUE
2508 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2511 S_intuit_more(pTHX_ register char *s)
2514 if (PL_lex_brackets)
2516 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2518 if (*s != '{' && *s != '[')
2523 /* In a pattern, so maybe we have {n,m}. */
2540 /* On the other hand, maybe we have a character class */
2543 if (*s == ']' || *s == '^')
2546 /* this is terrifying, and it works */
2547 int weight = 2; /* let's weigh the evidence */
2549 unsigned char un_char = 255, last_un_char;
2550 const char * const send = strchr(s,']');
2551 char tmpbuf[sizeof PL_tokenbuf * 4];
2553 if (!send) /* has to be an expression */
2556 Zero(seen,256,char);
2559 else if (isDIGIT(*s)) {
2561 if (isDIGIT(s[1]) && s[2] == ']')
2567 for (; s < send; s++) {
2568 last_un_char = un_char;
2569 un_char = (unsigned char)*s;
2574 weight -= seen[un_char] * 10;
2575 if (isALNUM_lazy_if(s+1,UTF)) {
2577 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2578 len = (int)strlen(tmpbuf);
2579 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2584 else if (*s == '$' && s[1] &&
2585 strchr("[#!%*<>()-=",s[1])) {
2586 if (/*{*/ strchr("])} =",s[2]))
2595 if (strchr("wds]",s[1]))
2597 else if (seen[(U8)'\''] || seen[(U8)'"'])
2599 else if (strchr("rnftbxcav",s[1]))
2601 else if (isDIGIT(s[1])) {
2603 while (s[1] && isDIGIT(s[1]))
2613 if (strchr("aA01! ",last_un_char))
2615 if (strchr("zZ79~",s[1]))
2617 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2618 weight -= 5; /* cope with negative subscript */
2621 if (!isALNUM(last_un_char)
2622 && !(last_un_char == '$' || last_un_char == '@'
2623 || last_un_char == '&')
2624 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2629 if (keyword(tmpbuf, d - tmpbuf, 0))
2632 if (un_char == last_un_char + 1)
2634 weight -= seen[un_char];
2639 if (weight >= 0) /* probably a character class */
2649 * Does all the checking to disambiguate
2651 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2652 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2654 * First argument is the stuff after the first token, e.g. "bar".
2656 * Not a method if bar is a filehandle.
2657 * Not a method if foo is a subroutine prototyped to take a filehandle.
2658 * Not a method if it's really "Foo $bar"
2659 * Method if it's "foo $bar"
2660 * Not a method if it's really "print foo $bar"
2661 * Method if it's really "foo package::" (interpreted as package->foo)
2662 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2663 * Not a method if bar is a filehandle or package, but is quoted with
2668 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2671 char *s = start + (*start == '$');
2672 char tmpbuf[sizeof PL_tokenbuf];
2680 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2684 const char *proto = SvPVX_const(cv);
2695 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2696 /* start is the beginning of the possible filehandle/object,
2697 * and s is the end of it
2698 * tmpbuf is a copy of it
2701 if (*start == '$') {
2702 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2703 isUPPER(*PL_tokenbuf))
2706 len = start - SvPVX(PL_linestr);
2710 start = SvPVX(PL_linestr) + len;
2714 return *s == '(' ? FUNCMETH : METHOD;
2716 if (!keyword(tmpbuf, len, 0)) {
2717 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2721 soff = s - SvPVX(PL_linestr);
2725 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2726 if (indirgv && GvCVu(indirgv))
2728 /* filehandle or package name makes it a method */
2729 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2731 soff = s - SvPVX(PL_linestr);
2734 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2735 return 0; /* no assumptions -- "=>" quotes bearword */
2737 start_force(PL_curforce);
2738 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2739 newSVpvn(tmpbuf,len));
2740 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2742 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2747 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2749 return *s == '(' ? FUNCMETH : METHOD;
2755 /* Encoded script support. filter_add() effectively inserts a
2756 * 'pre-processing' function into the current source input stream.
2757 * Note that the filter function only applies to the current source file
2758 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2760 * The datasv parameter (which may be NULL) can be used to pass
2761 * private data to this instance of the filter. The filter function
2762 * can recover the SV using the FILTER_DATA macro and use it to
2763 * store private buffers and state information.
2765 * The supplied datasv parameter is upgraded to a PVIO type
2766 * and the IoDIRP/IoANY field is used to store the function pointer,
2767 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2768 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2769 * private use must be set using malloc'd pointers.
2773 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2782 if (!PL_rsfp_filters)
2783 PL_rsfp_filters = newAV();
2786 SvUPGRADE(datasv, SVt_PVIO);
2787 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2788 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2789 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2790 FPTR2DPTR(void *, IoANY(datasv)),
2791 SvPV_nolen(datasv)));
2792 av_unshift(PL_rsfp_filters, 1);
2793 av_store(PL_rsfp_filters, 0, datasv) ;
2798 /* Delete most recently added instance of this filter function. */
2800 Perl_filter_del(pTHX_ filter_t funcp)
2806 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2807 FPTR2DPTR(void*, funcp)));
2809 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2811 /* if filter is on top of stack (usual case) just pop it off */
2812 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2813 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2814 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2815 IoANY(datasv) = (void *)NULL;
2816 sv_free(av_pop(PL_rsfp_filters));
2820 /* we need to search for the correct entry and clear it */
2821 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2825 /* Invoke the idxth filter function for the current rsfp. */
2826 /* maxlen 0 = read one text line */
2828 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2833 /* This API is bad. It should have been using unsigned int for maxlen.
2834 Not sure if we want to change the API, but if not we should sanity
2835 check the value here. */
2836 const unsigned int correct_length
2845 if (!PL_parser || !PL_rsfp_filters)
2847 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2848 /* Provide a default input filter to make life easy. */
2849 /* Note that we append to the line. This is handy. */
2850 DEBUG_P(PerlIO_printf(Perl_debug_log,
2851 "filter_read %d: from rsfp\n", idx));
2852 if (correct_length) {
2855 const int old_len = SvCUR(buf_sv);
2857 /* ensure buf_sv is large enough */
2858 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2859 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2860 correct_length)) <= 0) {
2861 if (PerlIO_error(PL_rsfp))
2862 return -1; /* error */
2864 return 0 ; /* end of file */
2866 SvCUR_set(buf_sv, old_len + len) ;
2869 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2870 if (PerlIO_error(PL_rsfp))
2871 return -1; /* error */
2873 return 0 ; /* end of file */
2876 return SvCUR(buf_sv);
2878 /* Skip this filter slot if filter has been deleted */
2879 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: skipped (filter deleted)\n",
2883 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2885 /* Get function pointer hidden within datasv */
2886 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2887 DEBUG_P(PerlIO_printf(Perl_debug_log,
2888 "filter_read %d: via function %p (%s)\n",
2889 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2890 /* Call function. The function is expected to */
2891 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2892 /* Return: <0:error, =0:eof, >0:not eof */
2893 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2897 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2900 #ifdef PERL_CR_FILTER
2901 if (!PL_rsfp_filters) {
2902 filter_add(S_cr_textfilter,NULL);
2905 if (PL_rsfp_filters) {
2907 SvCUR_set(sv, 0); /* start with empty line */
2908 if (FILTER_READ(0, sv, 0) > 0)
2909 return ( SvPVX(sv) ) ;
2914 return (sv_gets(sv, fp, append));
2918 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2923 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2927 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2928 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2930 return GvHV(gv); /* Foo:: */
2933 /* use constant CLASS => 'MyClass' */
2934 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2935 if (gv && GvCV(gv)) {
2936 SV * const sv = cv_const_sv(GvCV(gv));
2938 pkgname = SvPV_const(sv, len);
2941 return gv_stashpvn(pkgname, len, 0);
2945 * S_readpipe_override
2946 * Check whether readpipe() is overriden, and generates the appropriate
2947 * optree, provided sublex_start() is called afterwards.
2950 S_readpipe_override(pTHX)
2953 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2954 pl_yylval.ival = OP_BACKTICK;
2956 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2958 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2959 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2960 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2962 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2963 append_elem(OP_LIST,
2964 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2965 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2972 * The intent of this yylex wrapper is to minimize the changes to the
2973 * tokener when we aren't interested in collecting madprops. It remains
2974 * to be seen how successful this strategy will be...
2981 char *s = PL_bufptr;
2983 /* make sure PL_thiswhite is initialized */
2987 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2988 if (PL_pending_ident)
2989 return S_pending_ident(aTHX);
2991 /* previous token ate up our whitespace? */
2992 if (!PL_lasttoke && PL_nextwhite) {
2993 PL_thiswhite = PL_nextwhite;
2997 /* isolate the token, and figure out where it is without whitespace */
2998 PL_realtokenstart = -1;
3002 assert(PL_curforce < 0);
3004 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3005 if (!PL_thistoken) {
3006 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3007 PL_thistoken = newSVpvs("");
3009 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3010 PL_thistoken = newSVpvn(tstart, s - tstart);
3013 if (PL_thismad) /* install head */
3014 CURMAD('X', PL_thistoken);
3017 /* last whitespace of a sublex? */
3018 if (optype == ')' && PL_endwhite) {
3019 CURMAD('X', PL_endwhite);
3024 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3025 if (!PL_thiswhite && !PL_endwhite && !optype) {
3026 sv_free(PL_thistoken);
3031 /* put off final whitespace till peg */
3032 if (optype == ';' && !PL_rsfp) {
3033 PL_nextwhite = PL_thiswhite;
3036 else if (PL_thisopen) {
3037 CURMAD('q', PL_thisopen);
3039 sv_free(PL_thistoken);
3043 /* Store actual token text as madprop X */
3044 CURMAD('X', PL_thistoken);
3048 /* add preceding whitespace as madprop _ */
3049 CURMAD('_', PL_thiswhite);
3053 /* add quoted material as madprop = */
3054 CURMAD('=', PL_thisstuff);
3058 /* add terminating quote as madprop Q */
3059 CURMAD('Q', PL_thisclose);
3063 /* special processing based on optype */
3067 /* opval doesn't need a TOKEN since it can already store mp */
3077 if (pl_yylval.opval)
3078 append_madprops(PL_thismad, pl_yylval.opval, 0);
3086 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3095 /* remember any fake bracket that lexer is about to discard */
3096 if (PL_lex_brackets == 1 &&
3097 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3100 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3103 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3104 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3107 break; /* don't bother looking for trailing comment */
3116 /* attach a trailing comment to its statement instead of next token */
3120 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3122 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3124 if (*s == '\n' || *s == '#') {
3125 while (s < PL_bufend && *s != '\n')
3129 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3130 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3147 /* Create new token struct. Note: opvals return early above. */
3148 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3155 S_tokenize_use(pTHX_ int is_use, char *s) {
3157 if (PL_expect != XSTATE)
3158 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3159 is_use ? "use" : "no"));
3161 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3162 s = force_version(s, TRUE);
3163 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3164 start_force(PL_curforce);
3165 NEXTVAL_NEXTTOKE.opval = NULL;
3168 else if (*s == 'v') {
3169 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3170 s = force_version(s, FALSE);
3174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3175 s = force_version(s, FALSE);
3177 pl_yylval.ival = is_use;
3181 static const char* const exp_name[] =
3182 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3183 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3190 Works out what to call the token just pulled out of the input
3191 stream. The yacc parser takes care of taking the ops we return and
3192 stitching them into a tree.
3198 if read an identifier
3199 if we're in a my declaration
3200 croak if they tried to say my($foo::bar)
3201 build the ops for a my() declaration
3202 if it's an access to a my() variable
3203 are we in a sort block?
3204 croak if my($a); $a <=> $b
3205 build ops for access to a my() variable
3206 if in a dq string, and they've said @foo and we can't find @foo
3208 build ops for a bareword
3209 if we already built the token before, use it.
3214 #pragma segment Perl_yylex
3220 register char *s = PL_bufptr;
3225 /* orig_keyword, gvp, and gv are initialized here because
3226 * jump to the label just_a_word_zero can bypass their
3227 * initialization later. */
3228 I32 orig_keyword = 0;
3233 SV* tmp = newSVpvs("");
3234 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3235 (IV)CopLINE(PL_curcop),
3236 lex_state_names[PL_lex_state],
3237 exp_name[PL_expect],
3238 pv_display(tmp, s, strlen(s), 0, 60));
3241 /* check if there's an identifier for us to look at */
3242 if (PL_pending_ident)
3243 return REPORT(S_pending_ident(aTHX));
3245 /* no identifier pending identification */
3247 switch (PL_lex_state) {
3249 case LEX_NORMAL: /* Some compilers will produce faster */
3250 case LEX_INTERPNORMAL: /* code if we comment these out. */
3254 /* when we've already built the next token, just pull it out of the queue */
3258 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3260 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3261 PL_nexttoke[PL_lasttoke].next_mad = 0;
3262 if (PL_thismad && PL_thismad->mad_key == '_') {
3263 PL_thiswhite = (SV*)PL_thismad->mad_val;
3264 PL_thismad->mad_val = 0;
3265 mad_free(PL_thismad);
3270 PL_lex_state = PL_lex_defer;
3271 PL_expect = PL_lex_expect;
3272 PL_lex_defer = LEX_NORMAL;
3273 if (!PL_nexttoke[PL_lasttoke].next_type)
3278 pl_yylval = PL_nextval[PL_nexttoke];
3280 PL_lex_state = PL_lex_defer;
3281 PL_expect = PL_lex_expect;
3282 PL_lex_defer = LEX_NORMAL;
3286 /* FIXME - can these be merged? */
3287 return(PL_nexttoke[PL_lasttoke].next_type);
3289 return REPORT(PL_nexttype[PL_nexttoke]);
3292 /* interpolated case modifiers like \L \U, including \Q and \E.
3293 when we get here, PL_bufptr is at the \
3295 case LEX_INTERPCASEMOD:
3297 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3298 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3300 /* handle \E or end of string */
3301 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3303 if (PL_lex_casemods) {
3304 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3305 PL_lex_casestack[PL_lex_casemods] = '\0';
3307 if (PL_bufptr != PL_bufend
3308 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3310 PL_lex_state = LEX_INTERPCONCAT;
3313 PL_thistoken = newSVpvs("\\E");
3319 while (PL_bufptr != PL_bufend &&
3320 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3322 PL_thiswhite = newSVpvs("");
3323 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3327 if (PL_bufptr != PL_bufend)
3330 PL_lex_state = LEX_INTERPCONCAT;
3334 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3335 "### Saw case modifier\n"); });
3337 if (s[1] == '\\' && s[2] == 'E') {
3340 PL_thiswhite = newSVpvs("");
3341 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3344 PL_lex_state = LEX_INTERPCONCAT;
3349 if (!PL_madskills) /* when just compiling don't need correct */
3350 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3351 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3352 if ((*s == 'L' || *s == 'U') &&
3353 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3354 PL_lex_casestack[--PL_lex_casemods] = '\0';
3357 if (PL_lex_casemods > 10)
3358 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3359 PL_lex_casestack[PL_lex_casemods++] = *s;
3360 PL_lex_casestack[PL_lex_casemods] = '\0';
3361 PL_lex_state = LEX_INTERPCONCAT;
3362 start_force(PL_curforce);
3363 NEXTVAL_NEXTTOKE.ival = 0;
3365 start_force(PL_curforce);
3367 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3369 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3371 NEXTVAL_NEXTTOKE.ival = OP_LC;
3373 NEXTVAL_NEXTTOKE.ival = OP_UC;
3375 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3377 Perl_croak(aTHX_ "panic: yylex");
3379 SV* const tmpsv = newSVpvs("\\ ");
3380 /* replace the space with the character we want to escape
3382 SvPVX(tmpsv)[1] = *s;
3388 if (PL_lex_starts) {
3394 sv_free(PL_thistoken);
3395 PL_thistoken = newSVpvs("");
3398 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3399 if (PL_lex_casemods == 1 && PL_lex_inpat)
3408 case LEX_INTERPPUSH:
3409 return REPORT(sublex_push());
3411 case LEX_INTERPSTART:
3412 if (PL_bufptr == PL_bufend)
3413 return REPORT(sublex_done());
3414 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3415 "### Interpolated variable\n"); });
3417 PL_lex_dojoin = (*PL_bufptr == '@');
3418 PL_lex_state = LEX_INTERPNORMAL;
3419 if (PL_lex_dojoin) {
3420 start_force(PL_curforce);
3421 NEXTVAL_NEXTTOKE.ival = 0;
3423 start_force(PL_curforce);
3424 force_ident("\"", '$');
3425 start_force(PL_curforce);
3426 NEXTVAL_NEXTTOKE.ival = 0;
3428 start_force(PL_curforce);
3429 NEXTVAL_NEXTTOKE.ival = 0;
3431 start_force(PL_curforce);
3432 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3435 if (PL_lex_starts++) {
3440 sv_free(PL_thistoken);
3441 PL_thistoken = newSVpvs("");
3444 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3445 if (!PL_lex_casemods && PL_lex_inpat)
3452 case LEX_INTERPENDMAYBE:
3453 if (intuit_more(PL_bufptr)) {
3454 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3460 if (PL_lex_dojoin) {
3461 PL_lex_dojoin = FALSE;
3462 PL_lex_state = LEX_INTERPCONCAT;
3466 sv_free(PL_thistoken);
3467 PL_thistoken = newSVpvs("");
3472 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3473 && SvEVALED(PL_lex_repl))
3475 if (PL_bufptr != PL_bufend)
3476 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3480 case LEX_INTERPCONCAT:
3482 if (PL_lex_brackets)
3483 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3485 if (PL_bufptr == PL_bufend)
3486 return REPORT(sublex_done());
3488 if (SvIVX(PL_linestr) == '\'') {
3489 SV *sv = newSVsv(PL_linestr);
3492 else if ( PL_hints & HINT_NEW_RE )
3493 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3494 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3498 s = scan_const(PL_bufptr);
3500 PL_lex_state = LEX_INTERPCASEMOD;
3502 PL_lex_state = LEX_INTERPSTART;
3505 if (s != PL_bufptr) {
3506 start_force(PL_curforce);
3508 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3510 NEXTVAL_NEXTTOKE = pl_yylval;
3513 if (PL_lex_starts++) {
3517 sv_free(PL_thistoken);
3518 PL_thistoken = newSVpvs("");
3521 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3522 if (!PL_lex_casemods && PL_lex_inpat)
3535 PL_lex_state = LEX_NORMAL;
3536 s = scan_formline(PL_bufptr);
3537 if (!PL_lex_formbrack)
3543 PL_oldoldbufptr = PL_oldbufptr;
3549 sv_free(PL_thistoken);
3552 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3556 if (isIDFIRST_lazy_if(s,UTF))
3558 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3559 Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3562 goto fake_eof; /* emulate EOF on ^D or ^Z */
3571 if (PL_lex_brackets) {
3572 yyerror((const char *)
3574 ? "Format not terminated"
3575 : "Missing right curly or square bracket"));
3577 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3578 "### Tokener got EOF\n");
3582 if (s++ < PL_bufend)
3583 goto retry; /* ignore stray nulls */
3586 if (!PL_in_eval && !PL_preambled) {
3587 PL_preambled = TRUE;
3593 /* Generate a string of Perl code to load the debugger.
3594 * If PERL5DB is set, it will return the contents of that,
3595 * otherwise a compile-time require of perl5db.pl. */
3597 const char * const pdb = PerlEnv_getenv("PERL5DB");
3600 sv_setpv(PL_linestr, pdb);
3601 sv_catpvs(PL_linestr,";");
3603 SETERRNO(0,SS_NORMAL);
3604 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3607 sv_setpvs(PL_linestr,"");
3608 if (PL_preambleav) {
3609 SV **svp = AvARRAY(PL_preambleav);
3610 SV **const end = svp + AvFILLp(PL_preambleav);
3612 sv_catsv(PL_linestr, *svp);
3614 sv_catpvs(PL_linestr, ";");
3616 sv_free((SV*)PL_preambleav);
3617 PL_preambleav = NULL;
3619 if (PL_minus_n || PL_minus_p) {
3620 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3622 sv_catpvs(PL_linestr,"chomp;");
3625 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3626 || *PL_splitstr == '"')
3627 && strchr(PL_splitstr + 1, *PL_splitstr))
3628 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3630 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3631 bytes can be used as quoting characters. :-) */
3632 const char *splits = PL_splitstr;
3633 sv_catpvs(PL_linestr, "our @F=split(q\0");
3636 if (*splits == '\\')
3637 sv_catpvn(PL_linestr, splits, 1);
3638 sv_catpvn(PL_linestr, splits, 1);
3639 } while (*splits++);
3640 /* This loop will embed the trailing NUL of
3641 PL_linestr as the last thing it does before
3643 sv_catpvs(PL_linestr, ");");
3647 sv_catpvs(PL_linestr,"our @F=split(' ');");
3651 sv_catpvs(PL_linestr,
3652 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3653 sv_catpvs(PL_linestr, "\n");
3654 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3655 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3656 PL_last_lop = PL_last_uni = NULL;
3657 if (PERLDB_LINE && PL_curstash != PL_debstash)
3658 update_debugger_info(PL_linestr, NULL, 0);
3662 bof = PL_rsfp ? TRUE : FALSE;
3663 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3666 PL_realtokenstart = -1;
3669 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3670 PerlIO_clearerr(PL_rsfp);
3672 (void)PerlIO_close(PL_rsfp);
3674 PL_doextract = FALSE;
3676 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3682 sv_setpvs(PL_linestr, ";}continue{print;}");
3684 sv_setpvs(PL_linestr, ";}");
3685 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3686 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3687 PL_last_lop = PL_last_uni = NULL;
3688 PL_minus_n = PL_minus_p = 0;
3691 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3692 PL_last_lop = PL_last_uni = NULL;
3693 sv_setpvn(PL_linestr,"",0);
3694 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3696 /* If it looks like the start of a BOM or raw UTF-16,
3697 * check if it in fact is. */
3703 #ifdef PERLIO_IS_STDIO
3704 # ifdef __GNU_LIBRARY__
3705 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3706 # define FTELL_FOR_PIPE_IS_BROKEN
3710 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3711 # define FTELL_FOR_PIPE_IS_BROKEN
3716 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3718 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3719 s = swallow_bom((U8*)s);
3723 /* Incest with pod. */
3726 sv_catsv(PL_thiswhite, PL_linestr);
3728 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3729 sv_setpvn(PL_linestr, "", 0);
3730 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3732 PL_last_lop = PL_last_uni = NULL;
3733 PL_doextract = FALSE;
3737 } while (PL_doextract);
3738 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3739 if (PERLDB_LINE && PL_curstash != PL_debstash)
3740 update_debugger_info(PL_linestr, NULL, 0);
3741 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3742 PL_last_lop = PL_last_uni = NULL;
3743 if (CopLINE(PL_curcop) == 1) {
3744 while (s < PL_bufend && isSPACE(*s))
3746 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3750 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3754 if (*s == '#' && *(s+1) == '!')
3756 #ifdef ALTERNATE_SHEBANG
3758 static char const as[] = ALTERNATE_SHEBANG;
3759 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3760 d = s + (sizeof(as) - 1);
3762 #endif /* ALTERNATE_SHEBANG */
3771 while (*d && !isSPACE(*d))
3775 #ifdef ARG_ZERO_IS_SCRIPT
3776 if (ipathend > ipath) {
3778 * HP-UX (at least) sets argv[0] to the script name,
3779 * which makes $^X incorrect. And Digital UNIX and Linux,
3780 * at least, set argv[0] to the basename of the Perl
3781 * interpreter. So, having found "#!", we'll set it right.
3783 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3785 assert(SvPOK(x) || SvGMAGICAL(x));
3786 if (sv_eq(x, CopFILESV(PL_curcop))) {
3787 sv_setpvn(x, ipath, ipathend - ipath);
3793 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3794 const char * const lstart = SvPV_const(x,llen);
3796 bstart += blen - llen;
3797 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3798 sv_setpvn(x, ipath, ipathend - ipath);
3803 TAINT_NOT; /* $^X is always tainted, but that's OK */
3805 #endif /* ARG_ZERO_IS_SCRIPT */
3810 d = instr(s,"perl -");
3812 d = instr(s,"perl");
3814 /* avoid getting into infinite loops when shebang
3815 * line contains "Perl" rather than "perl" */
3817 for (d = ipathend-4; d >= ipath; --d) {
3818 if ((*d == 'p' || *d == 'P')
3819 && !ibcmp(d, "perl", 4))
3829 #ifdef ALTERNATE_SHEBANG
3831 * If the ALTERNATE_SHEBANG on this system starts with a
3832 * character that can be part of a Perl expression, then if
3833 * we see it but not "perl", we're probably looking at the
3834 * start of Perl code, not a request to hand off to some
3835 * other interpreter. Similarly, if "perl" is there, but
3836 * not in the first 'word' of the line, we assume the line
3837 * contains the start of the Perl program.
3839 if (d && *s != '#') {
3840 const char *c = ipath;
3841 while (*c && !strchr("; \t\r\n\f\v#", *c))
3844 d = NULL; /* "perl" not in first word; ignore */
3846 *s = '#'; /* Don't try to parse shebang line */
3848 #endif /* ALTERNATE_SHEBANG */
3849 #ifndef MACOS_TRADITIONAL
3854 !instr(s,"indir") &&
3855 instr(PL_origargv[0],"perl"))
3862 while (s < PL_bufend && isSPACE(*s))
3864 if (s < PL_bufend) {
3865 Newxz(newargv,PL_origargc+3,char*);
3867 while (s < PL_bufend && !isSPACE(*s))
3870 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3873 newargv = PL_origargv;
3876 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3878 Perl_croak(aTHX_ "Can't exec %s", ipath);
3882 while (*d && !isSPACE(*d))
3884 while (SPACE_OR_TAB(*d))
3888 const bool switches_done = PL_doswitches;
3889 const U32 oldpdb = PL_perldb;
3890 const bool oldn = PL_minus_n;
3891 const bool oldp = PL_minus_p;
3895 if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3896 const char * const m = d1;
3897 while (*d1 && !isSPACE(*d1))
3899 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3902 d1 = moreswitches(d1);
3904 if (PL_doswitches && !switches_done) {
3905 int argc = PL_origargc;
3906 char **argv = PL_origargv;
3909 } while (argc && argv[0][0] == '-' && argv[0][1]);
3910 init_argv_symbols(argc,argv);
3912 if ((PERLDB_LINE && !oldpdb) ||
3913 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3914 /* if we have already added "LINE: while (<>) {",
3915 we must not do it again */
3917 sv_setpvn(PL_linestr, "", 0);
3918 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3919 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3920 PL_last_lop = PL_last_uni = NULL;
3921 PL_preambled = FALSE;
3923 (void)gv_fetchfile(PL_origfilename);
3930 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3932 PL_lex_state = LEX_FORMLINE;
3937 #ifdef PERL_STRICT_CR
3938 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3940 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3942 case ' ': case '\t': case '\f': case 013:
3943 #ifdef MACOS_TRADITIONAL
3947 PL_realtokenstart = -1;
3949 PL_thiswhite = newSVpvs("");
3950 sv_catpvn(PL_thiswhite, s, 1);
3957 PL_realtokenstart = -1;
3961 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3962 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3963 /* handle eval qq[#line 1 "foo"\n ...] */
3964 CopLINE_dec(PL_curcop);
3967 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3969 if (!PL_in_eval || PL_rsfp)
3974 while (d < PL_bufend && *d != '\n')
3978 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3979 Perl_croak(aTHX_ "panic: input overflow");
3982 PL_thiswhite = newSVpvn(s, d - s);
3987 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3989 PL_lex_state = LEX_FORMLINE;
3995 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3996 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3999 TOKEN(PEG); /* make sure any #! line is accessible */
4004 /* if (PL_madskills && PL_lex_formbrack) { */
4006 while (d < PL_bufend && *d != '\n')
4010 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4011 Perl_croak(aTHX_ "panic: input overflow");
4012 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4014 PL_thiswhite = newSVpvs("");
4015 if (CopLINE(PL_curcop) == 1) {
4016 sv_setpvn(PL_thiswhite, "", 0);
4019 sv_catpvn(PL_thiswhite, s, d - s);
4033 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4041 while (s < PL_bufend && SPACE_OR_TAB(*s))
4044 if (strnEQ(s,"=>",2)) {
4045 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4046 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4047 OPERATOR('-'); /* unary minus */
4049 PL_last_uni = PL_oldbufptr;
4051 case 'r': ftst = OP_FTEREAD; break;
4052 case 'w': ftst = OP_FTEWRITE; break;
4053 case 'x': ftst = OP_FTEEXEC; break;
4054 case 'o': ftst = OP_FTEOWNED; break;
4055 case 'R': ftst = OP_FTRREAD; break;
4056 case 'W': ftst = OP_FTRWRITE; break;
4057 case 'X': ftst = OP_FTREXEC; break;
4058 case 'O': ftst = OP_FTROWNED; break;
4059 case 'e': ftst = OP_FTIS; break;
4060 case 'z': ftst = OP_FTZERO; break;
4061 case 's': ftst = OP_FTSIZE; break;
4062 case 'f': ftst = OP_FTFILE; break;
4063 case 'd': ftst = OP_FTDIR; break;
4064 case 'l': ftst = OP_FTLINK; break;
4065 case 'p': ftst = OP_FTPIPE; break;
4066 case 'S': ftst = OP_FTSOCK; break;
4067 case 'u': ftst = OP_FTSUID; break;
4068 case 'g': ftst = OP_FTSGID; break;
4069 case 'k': ftst = OP_FTSVTX; break;
4070 case 'b': ftst = OP_FTBLK; break;
4071 case 'c': ftst = OP_FTCHR; break;
4072 case 't': ftst = OP_FTTTY; break;
4073 case 'T': ftst = OP_FTTEXT; break;
4074 case 'B': ftst = OP_FTBINARY; break;
4075 case 'M': case 'A': case 'C':
4076 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4078 case 'M': ftst = OP_FTMTIME; break;
4079 case 'A': ftst = OP_FTATIME; break;
4080 case 'C': ftst = OP_FTCTIME; break;
4088 PL_last_lop_op = (OPCODE)ftst;
4089 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4090 "### Saw file test %c\n", (int)tmp);
4095 /* Assume it was a minus followed by a one-letter named
4096 * subroutine call (or a -bareword), then. */
4097 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4098 "### '-%c' looked like a file test but was not\n",
4105 const char tmp = *s++;
4108 if (PL_expect == XOPERATOR)
4113 else if (*s == '>') {
4116 if (isIDFIRST_lazy_if(s,UTF)) {
4117 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4125 if (PL_expect == XOPERATOR)
4128 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4130 OPERATOR('-'); /* unary minus */
4136 const char tmp = *s++;
4139 if (PL_expect == XOPERATOR)
4144 if (PL_expect == XOPERATOR)
4147 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4154 if (PL_expect != XOPERATOR) {
4155 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4156 PL_expect = XOPERATOR;
4157 force_ident(PL_tokenbuf, '*');
4170 if (PL_expect == XOPERATOR) {
4174 PL_tokenbuf[0] = '%';
4175 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4176 sizeof PL_tokenbuf - 1, FALSE);
4177 if (!PL_tokenbuf[1]) {
4180 PL_pending_ident = '%';
4191 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4198 const char tmp = *s++;
4204 goto just_a_word_zero_gv;
4207 switch (PL_expect) {
4213 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4215 PL_bufptr = s; /* update in case we back off */
4221 PL_expect = XTERMBLOCK;
4224 stuffstart = s - SvPVX(PL_linestr) - 1;
4228 while (isIDFIRST_lazy_if(s,UTF)) {
4231 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4232 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4233 if (tmp < 0) tmp = -tmp;
4247 sv = newSVpvn(s, len);
4249 d = scan_str(d,TRUE,TRUE);
4251 /* MUST advance bufptr here to avoid bogus
4252 "at end of line" context messages from yyerror().
4254 PL_bufptr = s + len;
4255 yyerror("Unterminated attribute parameter in attribute list");
4259 return REPORT(0); /* EOF indicator */
4263 sv_catsv(sv, PL_lex_stuff);
4264 attrs = append_elem(OP_LIST, attrs,
4265 newSVOP(OP_CONST, 0, sv));
4266 SvREFCNT_dec(PL_lex_stuff);
4267 PL_lex_stuff = NULL;
4270 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4272 if (PL_in_my == KEY_our) {
4274 GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
4276 /* skip to avoid loading attributes.pm */
4278 deprecate(":unique");
4281 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4284 /* NOTE: any CV attrs applied here need to be part of
4285 the CVf_BUILTIN_ATTRS define in cv.h! */
4286 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4288 CvLVALUE_on(PL_compcv);
4290 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4292 CvLOCKED_on(PL_compcv);
4294 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4296 CvMETHOD_on(PL_compcv);
4298 /* After we've set the flags, it could be argued that
4299 we don't need to do the attributes.pm-based setting
4300 process, and shouldn't bother appending recognized
4301 flags. To experiment with that, uncomment the
4302 following "else". (Note that's already been
4303 uncommented. That keeps the above-applied built-in
4304 attributes from being intercepted (and possibly
4305 rejected) by a package's attribute routines, but is
4306 justified by the performance win for the common case
4307 of applying only built-in attributes.) */
4309 attrs = append_elem(OP_LIST, attrs,
4310 newSVOP(OP_CONST, 0,
4314 if (*s == ':' && s[1] != ':')
4317 break; /* require real whitespace or :'s */
4318 /* XXX losing whitespace on sequential attributes here */
4322 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4323 if (*s != ';' && *s != '}' && *s != tmp
4324 && (tmp != '=' || *s != ')')) {
4325 const char q = ((*s == '\'') ? '"' : '\'');
4326 /* If here for an expression, and parsed no attrs, back
4328 if (tmp == '=' && !attrs) {
4332 /* MUST advance bufptr here to avoid bogus "at end of line"
4333 context messages from yyerror().
4336 yyerror( (const char *)
4338 ? Perl_form(aTHX_ "Invalid separator character "
4339 "%c%c%c in attribute list", q, *s, q)
4340 : "Unterminated attribute list" ) );
4348 start_force(PL_curforce);
4349 NEXTVAL_NEXTTOKE.opval = attrs;
4350 CURMAD('_', PL_nextwhite);
4355 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4356 (s - SvPVX(PL_linestr)) - stuffstart);
4364 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4365 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4373 const char tmp = *s++;
4378 const char tmp = *s++;
4386 if (PL_lex_brackets <= 0)
4387 yyerror("Unmatched right square bracket");
4390 if (PL_lex_state == LEX_INTERPNORMAL) {
4391 if (PL_lex_brackets == 0) {
4392 if (*s == '-' && s[1] == '>')
4393 PL_lex_state = LEX_INTERPENDMAYBE;
4394 else if (*s != '[' && *s != '{')
4395 PL_lex_state = LEX_INTERPEND;
4402 if (PL_lex_brackets > 100) {
4403 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4405 switch (PL_expect) {
4407 if (PL_lex_formbrack) {
4411 if (PL_oldoldbufptr == PL_last_lop)
4412 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4414 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4415 OPERATOR(HASHBRACK);
4417 while (s < PL_bufend && SPACE_OR_TAB(*s))