3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
29 #define pl_yylval (PL_parser->yylval)
31 /* YYINITDEPTH -- initial size of the parser's stacks. */
32 #define YYINITDEPTH 200
34 /* XXX temporary backwards compatibility */
35 #define PL_lex_brackets (PL_parser->lex_brackets)
36 #define PL_lex_brackstack (PL_parser->lex_brackstack)
37 #define PL_lex_casemods (PL_parser->lex_casemods)
38 #define PL_lex_casestack (PL_parser->lex_casestack)
39 #define PL_lex_defer (PL_parser->lex_defer)
40 #define PL_lex_dojoin (PL_parser->lex_dojoin)
41 #define PL_lex_expect (PL_parser->lex_expect)
42 #define PL_lex_formbrack (PL_parser->lex_formbrack)
43 #define PL_lex_inpat (PL_parser->lex_inpat)
44 #define PL_lex_inwhat (PL_parser->lex_inwhat)
45 #define PL_lex_op (PL_parser->lex_op)
46 #define PL_lex_repl (PL_parser->lex_repl)
47 #define PL_lex_starts (PL_parser->lex_starts)
48 #define PL_lex_stuff (PL_parser->lex_stuff)
49 #define PL_multi_start (PL_parser->multi_start)
50 #define PL_multi_open (PL_parser->multi_open)
51 #define PL_multi_close (PL_parser->multi_close)
52 #define PL_pending_ident (PL_parser->pending_ident)
53 #define PL_preambled (PL_parser->preambled)
54 #define PL_sublex_info (PL_parser->sublex_info)
55 #define PL_linestr (PL_parser->linestr)
56 #define PL_expect (PL_parser->expect)
57 #define PL_copline (PL_parser->copline)
58 #define PL_bufptr (PL_parser->bufptr)
59 #define PL_oldbufptr (PL_parser->oldbufptr)
60 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61 #define PL_linestart (PL_parser->linestart)
62 #define PL_bufend (PL_parser->bufend)
63 #define PL_last_uni (PL_parser->last_uni)
64 #define PL_last_lop (PL_parser->last_lop)
65 #define PL_last_lop_op (PL_parser->last_lop_op)
66 #define PL_lex_state (PL_parser->lex_state)
67 #define PL_rsfp (PL_parser->rsfp)
68 #define PL_rsfp_filters (PL_parser->rsfp_filters)
69 #define PL_in_my (PL_parser->in_my)
70 #define PL_in_my_stash (PL_parser->in_my_stash)
71 #define PL_tokenbuf (PL_parser->tokenbuf)
72 #define PL_multi_end (PL_parser->multi_end)
73 #define PL_error_count (PL_parser->error_count)
76 # define PL_endwhite (PL_parser->endwhite)
77 # define PL_faketokens (PL_parser->faketokens)
78 # define PL_lasttoke (PL_parser->lasttoke)
79 # define PL_nextwhite (PL_parser->nextwhite)
80 # define PL_realtokenstart (PL_parser->realtokenstart)
81 # define PL_skipwhite (PL_parser->skipwhite)
82 # define PL_thisclose (PL_parser->thisclose)
83 # define PL_thismad (PL_parser->thismad)
84 # define PL_thisopen (PL_parser->thisopen)
85 # define PL_thisstuff (PL_parser->thisstuff)
86 # define PL_thistoken (PL_parser->thistoken)
87 # define PL_thiswhite (PL_parser->thiswhite)
88 # define PL_thiswhite (PL_parser->thiswhite)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_curforce (PL_parser->curforce)
92 # define PL_nexttoke (PL_parser->nexttoke)
93 # define PL_nexttype (PL_parser->nexttype)
94 # define PL_nextval (PL_parser->nextval)
98 S_pending_ident(pTHX);
100 static const char ident_too_long[] = "Identifier too long";
101 static const char commaless_variable_list[] = "comma-less variable list";
103 #ifndef PERL_NO_UTF16_FILTER
104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
109 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
110 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
112 # define CURMAD(slot,sv)
113 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
116 #define XFAKEBRACK 128
117 #define XENUMMASK 127
119 #ifdef USE_UTF8_SCRIPTS
120 # define UTF (!IN_BYTES)
122 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
125 /* In variables named $^X, these are the legal values for X.
126 * 1999-02-27 mjd-perl-patch@plover.com */
127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129 /* On MacOS, respect nonbreaking spaces */
130 #ifdef MACOS_TRADITIONAL
131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in pl_yylval */
377 S_tokereport(pTHX_ I32 rv)
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)pl_yylval.ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[pl_yylval.ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", pl_yylval.pval);
415 case TOKENTYPE_OPVAL:
416 if (pl_yylval.opval) {
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[pl_yylval.opval->op_type]);
419 if (pl_yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(pl_yylval.opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char* fmt, const char* s)
440 SV* const tmp = newSVpvs("");
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
455 S_ao(pTHX_ int toketype)
458 if (*PL_bufptr == '=') {
460 if (toketype == ANDAND)
461 pl_yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 pl_yylval.ival = OP_ORASSIGN;
464 else if (toketype == DORDOR)
465 pl_yylval.ival = OP_DORASSIGN;
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
485 S_no_op(pTHX_ const char *what, char *s)
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
496 if (ckWARN_d(WARN_SYNTAX)) {
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
520 * Complain about missing quote/regexp/heredoc terminator.
521 * If it's called with NULL then it cauterizes the line buffer.
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
528 S_missingterm(pTHX_ char *s)
534 char * const nl = strrchr(s,'\n');
540 iscntrl(PL_multi_close)
542 PL_multi_close < 32 || PL_multi_close == 127
546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
551 *tmpbuf = (char)PL_multi_close;
555 q = strchr(s,'"') ? '\'' : '"';
556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
559 #define FEATURE_IS_ENABLED(name) \
560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
562 /* The longest string we pass in. */
563 #define MAX_FEATURE_LEN (sizeof("switch")-1)
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
570 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
573 HV * const hinthv = GvHV(PL_hintgv);
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
586 Perl_deprecate(pTHX_ const char *s)
588 if (ckWARN(WARN_DEPRECATED))
589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
593 Perl_deprecate_old(pTHX_ const char *s)
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
605 "Use of %s is deprecated", s);
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
613 #ifdef PERL_CR_FILTER
617 register const char *s = SvPVX_const(sv);
618 register const char * const e = s + SvCUR(sv);
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
684 /* on scope exit, free this parser and restore any outer one */
686 parser->saved_curcop = PL_curcop;
688 /* initialise lexer state */
691 parser->curforce = -1;
693 parser->nexttoke = 0;
695 parser->copline = NOLINE;
696 parser->lex_state = LEX_NORMAL;
697 parser->expect = XSTATE;
699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
707 s = SvPV_const(line, len);
713 parser->linestr = newSVpvs("\n;");
714 } else if (SvREADONLY(line) || s[len-1] != ';') {
715 parser->linestr = newSVsv(line);
717 sv_catpvs(parser->linestr, "\n;");
720 SvREFCNT_inc_simple_void_NN(line);
721 parser->linestr = line;
723 parser->oldoldbufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
732 /* delete a parser object */
735 Perl_parser_free(pTHX_ const yy_parser *parser)
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
765 PL_doextract = FALSE;
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
772 * increments the current line number in CopLINE(PL_curcop) and checks
773 * to see whether the line starts with a comment of the form
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
779 S_incline(pTHX_ const char *s)
786 CopLINE_inc(PL_curcop);
789 while (SPACE_OR_TAB(*s))
791 if (strnEQ(s, "line", 4))
795 if (SPACE_OR_TAB(*s))
799 while (SPACE_OR_TAB(*s))
807 while (SPACE_OR_TAB(*s))
809 if (*s == '"' && (t = strchr(s+1, '"'))) {
819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
825 const STRLEN len = t - s;
827 SV *const temp_sv = CopFILESV(PL_curcop);
833 tmplen = SvCUR(temp_sv);
839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
847 STRLEN tmplen2 = len;
848 if (tmplen + 2 <= sizeof smallbuf)
851 Newx(tmpbuf, tmplen + 2, char);
854 memcpy(tmpbuf + 2, cf, tmplen);
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
861 if (tmplen2 + 2 <= sizeof smallbuf)
864 Newx(tmpbuf2, tmplen2 + 2, char);
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
873 memcpy(tmpbuf2 + 2, s, tmplen2);
876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
890 CopFILE_free(PL_curcop);
891 CopFILE_setn(PL_curcop, s, len);
893 CopLINE_set(PL_curcop, atoi(n)-1);
897 /* skip space before PL_thistoken */
900 S_skipspace0(pTHX_ register char *s)
907 PL_thiswhite = newSVpvs("");
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
912 PL_realtokenstart = s - SvPVX(PL_linestr);
916 /* skip space after PL_thistoken */
919 S_skipspace1(pTHX_ register char *s)
921 const char *start = s;
922 I32 startoff = start - SvPVX(PL_linestr);
927 start = SvPVX(PL_linestr) + startoff;
928 if (!PL_thistoken && PL_realtokenstart >= 0) {
929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
930 PL_thistoken = newSVpvn(tstart, start - tstart);
932 PL_realtokenstart = -1;
935 PL_nextwhite = newSVpvs("");
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
944 S_skipspace2(pTHX_ register char *s, SV **svp)
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
954 start = SvPVX(PL_linestr) + startoff;
955 if (!PL_thistoken && PL_realtokenstart >= 0) {
956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
973 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
975 AV *av = CopFILEAVx(PL_curcop);
977 SV * const sv = newSV_type(SVt_PVMG);
979 sv_setsv(sv, orig_sv);
981 sv_setpvn(sv, buf, len);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
995 S_skipspace(pTHX_ register char *s)
1000 int startoff = s - SvPVX(PL_linestr);
1003 sv_free(PL_skipwhite);
1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
1019 SSize_t oldprevlen, oldoldprevlen;
1020 SSize_t oldloplen = 0, oldunilen = 0;
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
1030 if (s < PL_bufend) {
1032 if (PL_in_eval && !PL_rsfp) {
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
1051 /* try to recharge the buffer */
1053 curoff = s - SvPVX(PL_linestr);
1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
1060 if (PL_madskills && curoff != startoff) {
1062 PL_skipwhite = newSVpvs("");
1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1071 if (curoff && s[-1] == '\n')
1075 /* end of file. Add on the -p or -n magic */
1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1079 sv_catpvs(PL_linestr,
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1082 sv_setpvs(PL_linestr,
1083 ";}continue{print or die qq(-p destination: $!\\n);}");
1085 PL_minus_n = PL_minus_p = 0;
1087 else if (PL_minus_n) {
1089 sv_catpvn(PL_linestr, ";}", 2);
1091 sv_setpvn(PL_linestr, ";}", 2);
1097 sv_catpvn(PL_linestr,";", 1);
1099 sv_setpvn(PL_linestr,";", 1);
1102 /* reset variables for next time we lex */
1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1110 PL_last_lop = PL_last_uni = NULL;
1112 /* Close the filehandle. Could be from -P preprocessor,
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1119 if (PL_preprocess && !PL_in_eval)
1120 (void)PerlProc_pclose(PL_rsfp);
1121 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1122 PerlIO_clearerr(PL_rsfp);
1124 (void)PerlIO_close(PL_rsfp);
1129 /* not at end of file, so we only read another line */
1130 /* make corresponding updates to old pointers, for yyerror() */
1131 oldprevlen = PL_oldbufptr - PL_bufend;
1132 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1134 oldunilen = PL_last_uni - PL_bufend;
1136 oldloplen = PL_last_lop - PL_bufend;
1137 PL_linestart = PL_bufptr = s + prevlen;
1138 PL_bufend = s + SvCUR(PL_linestr);
1140 PL_oldbufptr = s + oldprevlen;
1141 PL_oldoldbufptr = s + oldoldprevlen;
1143 PL_last_uni = s + oldunilen;
1145 PL_last_lop = s + oldloplen;
1148 /* debugger active and we're not compiling the debugger code,
1149 * so store the line into the debugger's array of lines
1151 if (PERLDB_LINE && PL_curstash != PL_debstash)
1152 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1159 PL_skipwhite = newSVpvs("");
1160 curoff = s - SvPVX(PL_linestr);
1161 if (curoff - startoff)
1162 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1171 * Check the unary operators to ensure there's no ambiguity in how they're
1172 * used. An ambiguous piece of code would be:
1174 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1175 * the +5 is its argument.
1185 if (PL_oldoldbufptr != PL_last_uni)
1187 while (isSPACE(*PL_last_uni))
1190 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1192 if ((t = strchr(s, '(')) && t < PL_bufptr)
1195 if (ckWARN_d(WARN_AMBIGUOUS)){
1196 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1197 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1198 (int)(s - PL_last_uni), PL_last_uni);
1203 * LOP : macro to build a list operator. Its behaviour has been replaced
1204 * with a subroutine, S_lop() for which LOP is just another name.
1207 #define LOP(f,x) return lop(f,x,s)
1211 * Build a list operator (or something that might be one). The rules:
1212 * - if we have a next token, then it's a list operator [why?]
1213 * - if the next thing is an opening paren, then it's a function
1214 * - else it's a list operator
1218 S_lop(pTHX_ I32 f, int x, char *s)
1225 PL_last_lop = PL_oldbufptr;
1226 PL_last_lop_op = (OPCODE)f;
1229 return REPORT(LSTOP);
1232 return REPORT(LSTOP);
1235 return REPORT(FUNC);
1238 return REPORT(FUNC);
1240 return REPORT(LSTOP);
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1252 S_start_force(pTHX_ int where)
1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1257 where = PL_lasttoke;
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1267 PL_curforce = where;
1270 curmad('^', newSVpvs(""));
1271 CURMAD('_', PL_nextwhite);
1276 S_curmad(pTHX_ char slot, SV *sv)
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
1285 where = &PL_nexttoke[PL_curforce].next_mad;
1288 sv_setpvn(sv, "", 0);
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
1302 sv_free((SV*)((*where)->mad_val));
1303 (*where)->mad_val = (void*)sv;
1306 addmad(newMADsv(slot, sv), where, 0);
1309 # define start_force(where) NOOP
1310 # define curmad(slot, sv) NOOP
1315 * When the lexer realizes it knows the next token (for instance,
1316 * it is reordering tokens for the parser) then it can call S_force_next
1317 * to know what token to return the next time the lexer is called. Caller
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
1323 S_force_next(pTHX_ I32 type)
1327 if (PL_curforce < 0)
1328 start_force(PL_lasttoke);
1329 PL_nexttoke[PL_curforce].next_type = type;
1330 if (PL_lex_state != LEX_KNOWNEXT)
1331 PL_lex_defer = PL_lex_state;
1332 PL_lex_state = LEX_KNOWNEXT;
1333 PL_lex_expect = PL_expect;
1336 PL_nexttype[PL_nexttoke] = type;
1338 if (PL_lex_state != LEX_KNOWNEXT) {
1339 PL_lex_defer = PL_lex_state;
1340 PL_lex_expect = PL_expect;
1341 PL_lex_state = LEX_KNOWNEXT;
1347 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1350 SV * const sv = newSVpvn_utf8(start, len,
1352 && is_utf8_string((const U8*)start, len));
1358 * When the lexer knows the next thing is a word (for instance, it has
1359 * just seen -> and it knows that the next char is a word char, then
1360 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1364 * char *start : buffer position (must be within PL_linestr)
1365 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1366 * int check_keyword : if true, Perl checks to make sure the word isn't
1367 * a keyword (do this if the word is a label, e.g. goto FOO)
1368 * int allow_pack : if true, : characters will also be allowed (require,
1369 * use, etc. do this)
1370 * int allow_initial_tick : used by the "sub" lexer only.
1374 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1380 start = SKIPSPACE1(start);
1382 if (isIDFIRST_lazy_if(s,UTF) ||
1383 (allow_pack && *s == ':') ||
1384 (allow_initial_tick && *s == '\'') )
1386 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1387 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1389 start_force(PL_curforce);
1391 curmad('X', newSVpvn(start,s-start));
1392 if (token == METHOD) {
1397 PL_expect = XOPERATOR;
1401 curmad('g', newSVpvs( "forced" ));
1402 NEXTVAL_NEXTTOKE.opval
1403 = (OP*)newSVOP(OP_CONST,0,
1404 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1405 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1413 * Called when the lexer wants $foo *foo &foo etc, but the program
1414 * text only contains the "foo" portion. The first argument is a pointer
1415 * to the "foo", and the second argument is the type symbol to prefix.
1416 * Forces the next token to be a "WORD".
1417 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1421 S_force_ident(pTHX_ register const char *s, int kind)
1425 const STRLEN len = strlen(s);
1426 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1427 start_force(PL_curforce);
1428 NEXTVAL_NEXTTOKE.opval = o;
1431 o->op_private = OPpCONST_ENTERED;
1432 /* XXX see note in pp_entereval() for why we forgo typo
1433 warnings if the symbol must be introduced in an eval.
1435 gv_fetchpvn_flags(s, len,
1436 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1438 kind == '$' ? SVt_PV :
1439 kind == '@' ? SVt_PVAV :
1440 kind == '%' ? SVt_PVHV :
1448 Perl_str_to_version(pTHX_ SV *sv)
1453 const char *start = SvPV_const(sv,len);
1454 const char * const end = start + len;
1455 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1456 while (start < end) {
1460 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1465 retval += ((NV)n)/nshift;
1474 * Forces the next token to be a version number.
1475 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1476 * and if "guessing" is TRUE, then no new token is created (and the caller
1477 * must use an alternative parsing method).
1481 S_force_version(pTHX_ char *s, int guessing)
1487 I32 startoff = s - SvPVX(PL_linestr);
1496 while (isDIGIT(*d) || *d == '_' || *d == '.')
1500 start_force(PL_curforce);
1501 curmad('X', newSVpvn(s,d-s));
1504 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1506 s = scan_num(s, &pl_yylval);
1507 version = pl_yylval.opval;
1508 ver = cSVOPx(version)->op_sv;
1509 if (SvPOK(ver) && !SvNIOK(ver)) {
1510 SvUPGRADE(ver, SVt_PVNV);
1511 SvNV_set(ver, str_to_version(ver));
1512 SvNOK_on(ver); /* hint that it is a version */
1515 else if (guessing) {
1518 sv_free(PL_nextwhite); /* let next token collect whitespace */
1520 s = SvPVX(PL_linestr) + startoff;
1528 if (PL_madskills && !version) {
1529 sv_free(PL_nextwhite); /* let next token collect whitespace */
1531 s = SvPVX(PL_linestr) + startoff;
1534 /* NOTE: The parser sees the package name and the VERSION swapped */
1535 start_force(PL_curforce);
1536 NEXTVAL_NEXTTOKE.opval = version;
1544 * Tokenize a quoted string passed in as an SV. It finds the next
1545 * chunk, up to end of string or a backslash. It may make a new
1546 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1551 S_tokeq(pTHX_ SV *sv)
1555 register char *send;
1563 s = SvPV_force(sv, len);
1564 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1567 while (s < send && *s != '\\')
1572 if ( PL_hints & HINT_NEW_STRING ) {
1573 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1577 if (s + 1 < send && (s[1] == '\\'))
1578 s++; /* all that, just for this */
1583 SvCUR_set(sv, d - SvPVX_const(sv));
1585 if ( PL_hints & HINT_NEW_STRING )
1586 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1591 * Now come three functions related to double-quote context,
1592 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1593 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1594 * interact with PL_lex_state, and create fake ( ... ) argument lists
1595 * to handle functions and concatenation.
1596 * They assume that whoever calls them will be setting up a fake
1597 * join call, because each subthing puts a ',' after it. This lets
1600 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1602 * (I'm not sure whether the spurious commas at the end of lcfirst's
1603 * arguments and join's arguments are created or not).
1608 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1610 * Pattern matching will set PL_lex_op to the pattern-matching op to
1611 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1613 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1615 * Everything else becomes a FUNC.
1617 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1618 * had an OP_CONST or OP_READLINE). This just sets us up for a
1619 * call to S_sublex_push().
1623 S_sublex_start(pTHX)
1626 register const I32 op_type = pl_yylval.ival;
1628 if (op_type == OP_NULL) {
1629 pl_yylval.opval = PL_lex_op;
1633 if (op_type == OP_CONST || op_type == OP_READLINE) {
1634 SV *sv = tokeq(PL_lex_stuff);
1636 if (SvTYPE(sv) == SVt_PVIV) {
1637 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1639 const char * const p = SvPV_const(sv, len);
1640 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1644 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1645 PL_lex_stuff = NULL;
1646 /* Allow <FH> // "foo" */
1647 if (op_type == OP_READLINE)
1648 PL_expect = XTERMORDORDOR;
1651 else if (op_type == OP_BACKTICK && PL_lex_op) {
1652 /* readpipe() vas overriden */
1653 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1654 pl_yylval.opval = PL_lex_op;
1656 PL_lex_stuff = NULL;
1660 PL_sublex_info.super_state = PL_lex_state;
1661 PL_sublex_info.sub_inwhat = (U16)op_type;
1662 PL_sublex_info.sub_op = PL_lex_op;
1663 PL_lex_state = LEX_INTERPPUSH;
1667 pl_yylval.opval = PL_lex_op;
1677 * Create a new scope to save the lexing state. The scope will be
1678 * ended in S_sublex_done. Returns a '(', starting the function arguments
1679 * to the uc, lc, etc. found before.
1680 * Sets PL_lex_state to LEX_INTERPCONCAT.
1689 PL_lex_state = PL_sublex_info.super_state;
1690 SAVEBOOL(PL_lex_dojoin);
1691 SAVEI32(PL_lex_brackets);
1692 SAVEI32(PL_lex_casemods);
1693 SAVEI32(PL_lex_starts);
1694 SAVEI8(PL_lex_state);
1695 SAVEVPTR(PL_lex_inpat);
1696 SAVEI16(PL_lex_inwhat);
1697 SAVECOPLINE(PL_curcop);
1698 SAVEPPTR(PL_bufptr);
1699 SAVEPPTR(PL_bufend);
1700 SAVEPPTR(PL_oldbufptr);
1701 SAVEPPTR(PL_oldoldbufptr);
1702 SAVEPPTR(PL_last_lop);
1703 SAVEPPTR(PL_last_uni);
1704 SAVEPPTR(PL_linestart);
1705 SAVESPTR(PL_linestr);
1706 SAVEGENERICPV(PL_lex_brackstack);
1707 SAVEGENERICPV(PL_lex_casestack);
1709 PL_linestr = PL_lex_stuff;
1710 PL_lex_stuff = NULL;
1712 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1713 = SvPVX(PL_linestr);
1714 PL_bufend += SvCUR(PL_linestr);
1715 PL_last_lop = PL_last_uni = NULL;
1716 SAVEFREESV(PL_linestr);
1718 PL_lex_dojoin = FALSE;
1719 PL_lex_brackets = 0;
1720 Newx(PL_lex_brackstack, 120, char);
1721 Newx(PL_lex_casestack, 12, char);
1722 PL_lex_casemods = 0;
1723 *PL_lex_casestack = '\0';
1725 PL_lex_state = LEX_INTERPCONCAT;
1726 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1728 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1729 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1730 PL_lex_inpat = PL_sublex_info.sub_op;
1732 PL_lex_inpat = NULL;
1739 * Restores lexer state after a S_sublex_push.
1746 if (!PL_lex_starts++) {
1747 SV * const sv = newSVpvs("");
1748 if (SvUTF8(PL_linestr))
1750 PL_expect = XOPERATOR;
1751 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1755 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1756 PL_lex_state = LEX_INTERPCASEMOD;
1760 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1761 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1762 PL_linestr = PL_lex_repl;
1764 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1765 PL_bufend += SvCUR(PL_linestr);
1766 PL_last_lop = PL_last_uni = NULL;
1767 SAVEFREESV(PL_linestr);
1768 PL_lex_dojoin = FALSE;
1769 PL_lex_brackets = 0;
1770 PL_lex_casemods = 0;
1771 *PL_lex_casestack = '\0';
1773 if (SvEVALED(PL_lex_repl)) {
1774 PL_lex_state = LEX_INTERPNORMAL;
1776 /* we don't clear PL_lex_repl here, so that we can check later
1777 whether this is an evalled subst; that means we rely on the
1778 logic to ensure sublex_done() is called again only via the
1779 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1782 PL_lex_state = LEX_INTERPCONCAT;
1792 PL_endwhite = newSVpvs("");
1793 sv_catsv(PL_endwhite, PL_thiswhite);
1797 sv_setpvn(PL_thistoken,"",0);
1799 PL_realtokenstart = -1;
1803 PL_bufend = SvPVX(PL_linestr);
1804 PL_bufend += SvCUR(PL_linestr);
1805 PL_expect = XOPERATOR;
1806 PL_sublex_info.sub_inwhat = 0;
1814 Extracts a pattern, double-quoted string, or transliteration. This
1817 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1818 processing a pattern (PL_lex_inpat is true), a transliteration
1819 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1821 Returns a pointer to the character scanned up to. If this is
1822 advanced from the start pointer supplied (i.e. if anything was
1823 successfully parsed), will leave an OP for the substring scanned
1824 in pl_yylval. Caller must intuit reason for not parsing further
1825 by looking at the next characters herself.
1829 double-quoted style: \r and \n
1830 regexp special ones: \D \s
1833 case and quoting: \U \Q \E
1834 stops on @ and $, but not for $ as tail anchor
1836 In transliterations:
1837 characters are VERY literal, except for - not at the start or end
1838 of the string, which indicates a range. If the range is in bytes,
1839 scan_const expands the range to the full set of intermediate
1840 characters. If the range is in utf8, the hyphen is replaced with
1841 a certain range mark which will be handled by pmtrans() in op.c.
1843 In double-quoted strings:
1845 double-quoted style: \r and \n
1847 deprecated backrefs: \1 (in substitution replacements)
1848 case and quoting: \U \Q \E
1851 scan_const does *not* construct ops to handle interpolated strings.
1852 It stops processing as soon as it finds an embedded $ or @ variable
1853 and leaves it to the caller to work out what's going on.
1855 embedded arrays (whether in pattern or not) could be:
1856 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1858 $ in double-quoted strings must be the symbol of an embedded scalar.
1860 $ in pattern could be $foo or could be tail anchor. Assumption:
1861 it's a tail anchor if $ is the last thing in the string, or if it's
1862 followed by one of "()| \r\n\t"
1864 \1 (backreferences) are turned into $1
1866 The structure of the code is
1867 while (there's a character to process) {
1868 handle transliteration ranges
1869 skip regexp comments /(?#comment)/ and codes /(?{code})/
1870 skip #-initiated comments in //x patterns
1871 check for embedded arrays
1872 check for embedded scalars
1874 leave intact backslashes from leaveit (below)
1875 deprecate \1 in substitution replacements
1876 handle string-changing backslashes \l \U \Q \E, etc.
1877 switch (what was escaped) {
1878 handle \- in a transliteration (becomes a literal -)
1879 handle \132 (octal characters)
1880 handle \x15 and \x{1234} (hex characters)
1881 handle \N{name} (named characters)
1882 handle \cV (control characters)
1883 handle printf-style backslashes (\f, \r, \n, etc)
1885 } (end if backslash)
1886 } (end while character to read)
1891 S_scan_const(pTHX_ char *start)
1894 register char *send = PL_bufend; /* end of the constant */
1895 SV *sv = newSV(send - start); /* sv for the constant */
1896 register char *s = start; /* start of the constant */
1897 register char *d = SvPVX(sv); /* destination for copies */
1898 bool dorange = FALSE; /* are we in a translit range? */
1899 bool didrange = FALSE; /* did we just finish a range? */
1900 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1901 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1904 UV literal_endpoint = 0;
1905 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1908 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1909 /* If we are doing a trans and we know we want UTF8 set expectation */
1910 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1911 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1915 while (s < send || dorange) {
1916 /* get transliterations out of the way (they're most literal) */
1917 if (PL_lex_inwhat == OP_TRANS) {
1918 /* expand a range A-Z to the full set of characters. AIE! */
1920 I32 i; /* current expanded character */
1921 I32 min; /* first character in range */
1922 I32 max; /* last character in range */
1933 char * const c = (char*)utf8_hop((U8*)d, -1);
1937 *c = (char)UTF_TO_NATIVE(0xff);
1938 /* mark the range as done, and continue */
1944 i = d - SvPVX_const(sv); /* remember current offset */
1947 SvLEN(sv) + (has_utf8 ?
1948 (512 - UTF_CONTINUATION_MARK +
1951 /* How many two-byte within 0..255: 128 in UTF-8,
1952 * 96 in UTF-8-mod. */
1954 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1956 d = SvPVX(sv) + i; /* refresh d after realloc */
1960 for (j = 0; j <= 1; j++) {
1961 char * const c = (char*)utf8_hop((U8*)d, -1);
1962 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1968 max = (U8)0xff; /* only to \xff */
1969 uvmax = uv; /* \x{100} to uvmax */
1971 d = c; /* eat endpoint chars */
1976 d -= 2; /* eat the first char and the - */
1977 min = (U8)*d; /* first char in range */
1978 max = (U8)d[1]; /* last char in range */
1985 "Invalid range \"%c-%c\" in transliteration operator",
1986 (char)min, (char)max);
1990 if (literal_endpoint == 2 &&
1991 ((isLOWER(min) && isLOWER(max)) ||
1992 (isUPPER(min) && isUPPER(max)))) {
1994 for (i = min; i <= max; i++)
1996 *d++ = NATIVE_TO_NEED(has_utf8,i);
1998 for (i = min; i <= max; i++)
2000 *d++ = NATIVE_TO_NEED(has_utf8,i);
2005 for (i = min; i <= max; i++)
2008 const U8 ch = (U8)NATIVE_TO_UTF(i);
2009 if (UNI_IS_INVARIANT(ch))
2012 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2013 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2022 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2024 *d++ = (char)UTF_TO_NATIVE(0xff);
2026 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2030 /* mark the range as done, and continue */
2034 literal_endpoint = 0;
2039 /* range begins (ignore - as first or last char) */
2040 else if (*s == '-' && s+1 < send && s != start) {
2042 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2049 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2059 literal_endpoint = 0;
2060 native_range = TRUE;
2065 /* if we get here, we're not doing a transliteration */
2067 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2068 except for the last char, which will be done separately. */
2069 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2071 while (s+1 < send && *s != ')')
2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2074 else if (s[2] == '{' /* This should match regcomp.c */
2075 || (s[2] == '?' && s[3] == '{'))
2078 char *regparse = s + (s[2] == '{' ? 3 : 4);
2081 while (count && (c = *regparse)) {
2082 if (c == '\\' && regparse[1])
2090 if (*regparse != ')')
2091 regparse--; /* Leave one char for continuation. */
2092 while (s < regparse)
2093 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2097 /* likewise skip #-initiated comments in //x patterns */
2098 else if (*s == '#' && PL_lex_inpat &&
2099 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2100 while (s+1 < send && *s != '\n')
2101 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2104 /* check for embedded arrays
2105 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2107 else if (*s == '@' && s[1]) {
2108 if (isALNUM_lazy_if(s+1,UTF))
2110 if (strchr(":'{$", s[1]))
2112 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2113 break; /* in regexp, neither @+ nor @- are interpolated */
2116 /* check for embedded scalars. only stop if we're sure it's a
2119 else if (*s == '$') {
2120 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2122 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2123 break; /* in regexp, $ might be tail anchor */
2126 /* End of else if chain - OP_TRANS rejoin rest */
2129 if (*s == '\\' && s+1 < send) {
2132 /* deprecate \1 in strings and substitution replacements */
2133 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2134 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2136 if (ckWARN(WARN_SYNTAX))
2137 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2142 /* string-change backslash escapes */
2143 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2147 /* skip any other backslash escapes in a pattern */
2148 else if (PL_lex_inpat) {
2149 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2150 goto default_action;
2153 /* if we get here, it's either a quoted -, or a digit */
2156 /* quoted - in transliterations */
2158 if (PL_lex_inwhat == OP_TRANS) {
2165 if ((isALPHA(*s) || isDIGIT(*s)) &&
2167 Perl_warner(aTHX_ packWARN(WARN_MISC),
2168 "Unrecognized escape \\%c passed through",
2170 /* default action is to copy the quoted character */
2171 goto default_action;
2174 /* \132 indicates an octal constant */
2175 case '0': case '1': case '2': case '3':
2176 case '4': case '5': case '6': case '7':
2180 uv = grok_oct(s, &len, &flags, NULL);
2183 goto NUM_ESCAPE_INSERT;
2185 /* \x24 indicates a hex constant */
2189 char* const e = strchr(s, '}');
2190 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2191 PERL_SCAN_DISALLOW_PREFIX;
2196 yyerror("Missing right brace on \\x{}");
2200 uv = grok_hex(s, &len, &flags, NULL);
2206 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2207 uv = grok_hex(s, &len, &flags, NULL);
2213 /* Insert oct or hex escaped character.
2214 * There will always enough room in sv since such
2215 * escapes will be longer than any UTF-8 sequence
2216 * they can end up as. */
2218 /* We need to map to chars to ASCII before doing the tests
2221 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2222 if (!has_utf8 && uv > 255) {
2223 /* Might need to recode whatever we have
2224 * accumulated so far if it contains any
2227 * (Can't we keep track of that and avoid
2228 * this rescan? --jhi)
2232 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2233 if (!NATIVE_IS_INVARIANT(*c)) {
2238 const STRLEN offset = d - SvPVX_const(sv);
2240 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2244 while (src >= (const U8 *)SvPVX_const(sv)) {
2245 if (!NATIVE_IS_INVARIANT(*src)) {
2246 const U8 ch = NATIVE_TO_ASCII(*src);
2247 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2248 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2258 if (has_utf8 || uv > 255) {
2259 d = (char*)uvchr_to_utf8((U8*)d, uv);
2261 if (PL_lex_inwhat == OP_TRANS &&
2262 PL_sublex_info.sub_op) {
2263 PL_sublex_info.sub_op->op_private |=
2264 (PL_lex_repl ? OPpTRANS_FROM_UTF
2268 if (uv > 255 && !dorange)
2269 native_range = FALSE;
2281 /* \N{LATIN SMALL LETTER A} is a named character */
2285 char* e = strchr(s, '}');
2291 yyerror("Missing right brace on \\N{}");
2295 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2297 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2298 PERL_SCAN_DISALLOW_PREFIX;
2301 uv = grok_hex(s, &len, &flags, NULL);
2302 if ( e > s && len != (STRLEN)(e - s) ) {
2306 goto NUM_ESCAPE_INSERT;
2308 res = newSVpvn(s + 1, e - s - 1);
2309 res = new_constant( NULL, 0, "charnames",
2310 res, NULL, s - 2, e - s + 3 );
2312 sv_utf8_upgrade(res);
2313 str = SvPV_const(res,len);
2314 #ifdef EBCDIC_NEVER_MIND
2315 /* charnames uses pack U and that has been
2316 * recently changed to do the below uni->native
2317 * mapping, so this would be redundant (and wrong,
2318 * the code point would be doubly converted).
2319 * But leave this in just in case the pack U change
2320 * gets revoked, but the semantics is still
2321 * desireable for charnames. --jhi */
2323 UV uv = utf8_to_uvchr((const U8*)str, 0);
2326 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2328 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2329 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2330 str = SvPV_const(res, len);
2334 if (!has_utf8 && SvUTF8(res)) {
2335 const char * const ostart = SvPVX_const(sv);
2336 SvCUR_set(sv, d - ostart);
2339 sv_utf8_upgrade(sv);
2340 /* this just broke our allocation above... */
2341 SvGROW(sv, (STRLEN)(send - start));
2342 d = SvPVX(sv) + SvCUR(sv);
2345 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2346 const char * const odest = SvPVX_const(sv);
2348 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2349 d = SvPVX(sv) + (d - odest);
2353 native_range = FALSE; /* \N{} is guessed to be Unicode */
2355 Copy(str, d, len, char);
2362 yyerror("Missing braces on \\N{}");
2365 /* \c is a control character */
2374 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2377 yyerror("Missing control char name in \\c");
2381 /* printf-style backslashes, formfeeds, newlines, etc */
2383 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2386 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2389 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2392 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2395 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2398 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2401 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2407 } /* end if (backslash) */
2414 /* If we started with encoded form, or already know we want it
2415 and then encode the next character */
2416 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2418 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2419 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2422 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2423 const STRLEN off = d - SvPVX_const(sv);
2424 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2426 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2429 if (uv > 255 && !dorange)
2430 native_range = FALSE;
2434 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2436 } /* while loop to process each character */
2438 /* terminate the string and set up the sv */
2440 SvCUR_set(sv, d - SvPVX_const(sv));
2441 if (SvCUR(sv) >= SvLEN(sv))
2442 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2445 if (PL_encoding && !has_utf8) {
2446 sv_recode_to_utf8(sv, PL_encoding);
2452 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2453 PL_sublex_info.sub_op->op_private |=
2454 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2458 /* shrink the sv if we allocated more than we used */
2459 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2460 SvPV_shrink_to_cur(sv);
2463 /* return the substring (via pl_yylval) only if we parsed anything */
2464 if (s > PL_bufptr) {
2465 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2466 const char *const key = PL_lex_inpat ? "qr" : "q";
2467 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2471 if (PL_lex_inwhat == OP_TRANS) {
2474 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2482 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2485 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2492 * Returns TRUE if there's more to the expression (e.g., a subscript),
2495 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2497 * ->[ and ->{ return TRUE
2498 * { and [ outside a pattern are always subscripts, so return TRUE
2499 * if we're outside a pattern and it's not { or [, then return FALSE
2500 * if we're in a pattern and the first char is a {
2501 * {4,5} (any digits around the comma) returns FALSE
2502 * if we're in a pattern and the first char is a [
2504 * [SOMETHING] has a funky algorithm to decide whether it's a
2505 * character class or not. It has to deal with things like
2506 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2507 * anything else returns TRUE
2510 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2513 S_intuit_more(pTHX_ register char *s)
2516 if (PL_lex_brackets)
2518 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2520 if (*s != '{' && *s != '[')
2525 /* In a pattern, so maybe we have {n,m}. */
2542 /* On the other hand, maybe we have a character class */
2545 if (*s == ']' || *s == '^')
2548 /* this is terrifying, and it works */
2549 int weight = 2; /* let's weigh the evidence */
2551 unsigned char un_char = 255, last_un_char;
2552 const char * const send = strchr(s,']');
2553 char tmpbuf[sizeof PL_tokenbuf * 4];
2555 if (!send) /* has to be an expression */
2558 Zero(seen,256,char);
2561 else if (isDIGIT(*s)) {
2563 if (isDIGIT(s[1]) && s[2] == ']')
2569 for (; s < send; s++) {
2570 last_un_char = un_char;
2571 un_char = (unsigned char)*s;
2576 weight -= seen[un_char] * 10;
2577 if (isALNUM_lazy_if(s+1,UTF)) {
2579 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2580 len = (int)strlen(tmpbuf);
2581 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2586 else if (*s == '$' && s[1] &&
2587 strchr("[#!%*<>()-=",s[1])) {
2588 if (/*{*/ strchr("])} =",s[2]))
2597 if (strchr("wds]",s[1]))
2599 else if (seen[(U8)'\''] || seen[(U8)'"'])
2601 else if (strchr("rnftbxcav",s[1]))
2603 else if (isDIGIT(s[1])) {
2605 while (s[1] && isDIGIT(s[1]))
2615 if (strchr("aA01! ",last_un_char))
2617 if (strchr("zZ79~",s[1]))
2619 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2620 weight -= 5; /* cope with negative subscript */
2623 if (!isALNUM(last_un_char)
2624 && !(last_un_char == '$' || last_un_char == '@'
2625 || last_un_char == '&')
2626 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2631 if (keyword(tmpbuf, d - tmpbuf, 0))
2634 if (un_char == last_un_char + 1)
2636 weight -= seen[un_char];
2641 if (weight >= 0) /* probably a character class */
2651 * Does all the checking to disambiguate
2653 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2654 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2656 * First argument is the stuff after the first token, e.g. "bar".
2658 * Not a method if bar is a filehandle.
2659 * Not a method if foo is a subroutine prototyped to take a filehandle.
2660 * Not a method if it's really "Foo $bar"
2661 * Method if it's "foo $bar"
2662 * Not a method if it's really "print foo $bar"
2663 * Method if it's really "foo package::" (interpreted as package->foo)
2664 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2665 * Not a method if bar is a filehandle or package, but is quoted with
2670 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2673 char *s = start + (*start == '$');
2674 char tmpbuf[sizeof PL_tokenbuf];
2682 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2686 const char *proto = SvPVX_const(cv);
2697 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2698 /* start is the beginning of the possible filehandle/object,
2699 * and s is the end of it
2700 * tmpbuf is a copy of it
2703 if (*start == '$') {
2704 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2705 isUPPER(*PL_tokenbuf))
2708 len = start - SvPVX(PL_linestr);
2712 start = SvPVX(PL_linestr) + len;
2716 return *s == '(' ? FUNCMETH : METHOD;
2718 if (!keyword(tmpbuf, len, 0)) {
2719 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2723 soff = s - SvPVX(PL_linestr);
2727 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2728 if (indirgv && GvCVu(indirgv))
2730 /* filehandle or package name makes it a method */
2731 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2733 soff = s - SvPVX(PL_linestr);
2736 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2737 return 0; /* no assumptions -- "=>" quotes bearword */
2739 start_force(PL_curforce);
2740 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2741 newSVpvn(tmpbuf,len));
2742 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2744 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2749 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2751 return *s == '(' ? FUNCMETH : METHOD;
2757 /* Encoded script support. filter_add() effectively inserts a
2758 * 'pre-processing' function into the current source input stream.
2759 * Note that the filter function only applies to the current source file
2760 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2762 * The datasv parameter (which may be NULL) can be used to pass
2763 * private data to this instance of the filter. The filter function
2764 * can recover the SV using the FILTER_DATA macro and use it to
2765 * store private buffers and state information.
2767 * The supplied datasv parameter is upgraded to a PVIO type
2768 * and the IoDIRP/IoANY field is used to store the function pointer,
2769 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2770 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2771 * private use must be set using malloc'd pointers.
2775 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2784 if (!PL_rsfp_filters)
2785 PL_rsfp_filters = newAV();
2788 SvUPGRADE(datasv, SVt_PVIO);
2789 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2790 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2791 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2792 FPTR2DPTR(void *, IoANY(datasv)),
2793 SvPV_nolen(datasv)));
2794 av_unshift(PL_rsfp_filters, 1);
2795 av_store(PL_rsfp_filters, 0, datasv) ;
2800 /* Delete most recently added instance of this filter function. */
2802 Perl_filter_del(pTHX_ filter_t funcp)
2808 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2809 FPTR2DPTR(void*, funcp)));
2811 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2813 /* if filter is on top of stack (usual case) just pop it off */
2814 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2815 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2816 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2817 IoANY(datasv) = (void *)NULL;
2818 sv_free(av_pop(PL_rsfp_filters));
2822 /* we need to search for the correct entry and clear it */
2823 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2827 /* Invoke the idxth filter function for the current rsfp. */
2828 /* maxlen 0 = read one text line */
2830 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2835 /* This API is bad. It should have been using unsigned int for maxlen.
2836 Not sure if we want to change the API, but if not we should sanity
2837 check the value here. */
2838 const unsigned int correct_length
2847 if (!PL_parser || !PL_rsfp_filters)
2849 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2850 /* Provide a default input filter to make life easy. */
2851 /* Note that we append to the line. This is handy. */
2852 DEBUG_P(PerlIO_printf(Perl_debug_log,
2853 "filter_read %d: from rsfp\n", idx));
2854 if (correct_length) {
2857 const int old_len = SvCUR(buf_sv);
2859 /* ensure buf_sv is large enough */
2860 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2861 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2862 correct_length)) <= 0) {
2863 if (PerlIO_error(PL_rsfp))
2864 return -1; /* error */
2866 return 0 ; /* end of file */
2868 SvCUR_set(buf_sv, old_len + len) ;
2871 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2872 if (PerlIO_error(PL_rsfp))
2873 return -1; /* error */
2875 return 0 ; /* end of file */
2878 return SvCUR(buf_sv);
2880 /* Skip this filter slot if filter has been deleted */
2881 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2882 DEBUG_P(PerlIO_printf(Perl_debug_log,
2883 "filter_read %d: skipped (filter deleted)\n",
2885 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2887 /* Get function pointer hidden within datasv */
2888 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2889 DEBUG_P(PerlIO_printf(Perl_debug_log,
2890 "filter_read %d: via function %p (%s)\n",
2891 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2892 /* Call function. The function is expected to */
2893 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2894 /* Return: <0:error, =0:eof, >0:not eof */
2895 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2899 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2902 #ifdef PERL_CR_FILTER
2903 if (!PL_rsfp_filters) {
2904 filter_add(S_cr_textfilter,NULL);
2907 if (PL_rsfp_filters) {
2909 SvCUR_set(sv, 0); /* start with empty line */
2910 if (FILTER_READ(0, sv, 0) > 0)
2911 return ( SvPVX(sv) ) ;
2916 return (sv_gets(sv, fp, append));
2920 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2925 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2929 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2930 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2932 return GvHV(gv); /* Foo:: */
2935 /* use constant CLASS => 'MyClass' */
2936 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2937 if (gv && GvCV(gv)) {
2938 SV * const sv = cv_const_sv(GvCV(gv));
2940 pkgname = SvPV_const(sv, len);
2943 return gv_stashpvn(pkgname, len, 0);
2947 * S_readpipe_override
2948 * Check whether readpipe() is overriden, and generates the appropriate
2949 * optree, provided sublex_start() is called afterwards.
2952 S_readpipe_override(pTHX)
2955 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2956 pl_yylval.ival = OP_BACKTICK;
2958 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2960 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2961 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2962 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2964 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2965 append_elem(OP_LIST,
2966 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2967 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2974 * The intent of this yylex wrapper is to minimize the changes to the
2975 * tokener when we aren't interested in collecting madprops. It remains
2976 * to be seen how successful this strategy will be...
2983 char *s = PL_bufptr;
2985 /* make sure PL_thiswhite is initialized */
2989 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2990 if (PL_pending_ident)
2991 return S_pending_ident(aTHX);
2993 /* previous token ate up our whitespace? */
2994 if (!PL_lasttoke && PL_nextwhite) {
2995 PL_thiswhite = PL_nextwhite;
2999 /* isolate the token, and figure out where it is without whitespace */
3000 PL_realtokenstart = -1;
3004 assert(PL_curforce < 0);
3006 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3007 if (!PL_thistoken) {
3008 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3009 PL_thistoken = newSVpvs("");
3011 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3012 PL_thistoken = newSVpvn(tstart, s - tstart);
3015 if (PL_thismad) /* install head */
3016 CURMAD('X', PL_thistoken);
3019 /* last whitespace of a sublex? */
3020 if (optype == ')' && PL_endwhite) {
3021 CURMAD('X', PL_endwhite);
3026 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3027 if (!PL_thiswhite && !PL_endwhite && !optype) {
3028 sv_free(PL_thistoken);
3033 /* put off final whitespace till peg */
3034 if (optype == ';' && !PL_rsfp) {
3035 PL_nextwhite = PL_thiswhite;
3038 else if (PL_thisopen) {
3039 CURMAD('q', PL_thisopen);
3041 sv_free(PL_thistoken);
3045 /* Store actual token text as madprop X */
3046 CURMAD('X', PL_thistoken);
3050 /* add preceding whitespace as madprop _ */
3051 CURMAD('_', PL_thiswhite);
3055 /* add quoted material as madprop = */
3056 CURMAD('=', PL_thisstuff);
3060 /* add terminating quote as madprop Q */
3061 CURMAD('Q', PL_thisclose);
3065 /* special processing based on optype */
3069 /* opval doesn't need a TOKEN since it can already store mp */
3079 if (pl_yylval.opval)
3080 append_madprops(PL_thismad, pl_yylval.opval, 0);
3088 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3097 /* remember any fake bracket that lexer is about to discard */
3098 if (PL_lex_brackets == 1 &&
3099 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3102 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3105 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3106 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3109 break; /* don't bother looking for trailing comment */
3118 /* attach a trailing comment to its statement instead of next token */
3122 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3124 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3126 if (*s == '\n' || *s == '#') {
3127 while (s < PL_bufend && *s != '\n')
3131 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3132 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3149 /* Create new token struct. Note: opvals return early above. */
3150 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3157 S_tokenize_use(pTHX_ int is_use, char *s) {
3159 if (PL_expect != XSTATE)
3160 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3161 is_use ? "use" : "no"));
3163 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3164 s = force_version(s, TRUE);
3165 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3166 start_force(PL_curforce);
3167 NEXTVAL_NEXTTOKE.opval = NULL;
3170 else if (*s == 'v') {
3171 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3172 s = force_version(s, FALSE);
3176 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3177 s = force_version(s, FALSE);
3179 pl_yylval.ival = is_use;
3183 static const char* const exp_name[] =
3184 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3185 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3192 Works out what to call the token just pulled out of the input
3193 stream. The yacc parser takes care of taking the ops we return and
3194 stitching them into a tree.
3200 if read an identifier
3201 if we're in a my declaration
3202 croak if they tried to say my($foo::bar)
3203 build the ops for a my() declaration
3204 if it's an access to a my() variable
3205 are we in a sort block?
3206 croak if my($a); $a <=> $b
3207 build ops for access to a my() variable
3208 if in a dq string, and they've said @foo and we can't find @foo
3210 build ops for a bareword
3211 if we already built the token before, use it.
3216 #pragma segment Perl_yylex
3222 register char *s = PL_bufptr;
3227 /* orig_keyword, gvp, and gv are initialized here because
3228 * jump to the label just_a_word_zero can bypass their
3229 * initialization later. */
3230 I32 orig_keyword = 0;
3235 SV* tmp = newSVpvs("");
3236 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3237 (IV)CopLINE(PL_curcop),
3238 lex_state_names[PL_lex_state],
3239 exp_name[PL_expect],
3240 pv_display(tmp, s, strlen(s), 0, 60));
3243 /* check if there's an identifier for us to look at */
3244 if (PL_pending_ident)
3245 return REPORT(S_pending_ident(aTHX));
3247 /* no identifier pending identification */
3249 switch (PL_lex_state) {
3251 case LEX_NORMAL: /* Some compilers will produce faster */
3252 case LEX_INTERPNORMAL: /* code if we comment these out. */
3256 /* when we've already built the next token, just pull it out of the queue */
3260 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3262 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3263 PL_nexttoke[PL_lasttoke].next_mad = 0;
3264 if (PL_thismad && PL_thismad->mad_key == '_') {
3265 PL_thiswhite = (SV*)PL_thismad->mad_val;
3266 PL_thismad->mad_val = 0;
3267 mad_free(PL_thismad);
3272 PL_lex_state = PL_lex_defer;
3273 PL_expect = PL_lex_expect;
3274 PL_lex_defer = LEX_NORMAL;
3275 if (!PL_nexttoke[PL_lasttoke].next_type)
3280 pl_yylval = PL_nextval[PL_nexttoke];
3282 PL_lex_state = PL_lex_defer;
3283 PL_expect = PL_lex_expect;
3284 PL_lex_defer = LEX_NORMAL;
3288 /* FIXME - can these be merged? */
3289 return(PL_nexttoke[PL_lasttoke].next_type);
3291 return REPORT(PL_nexttype[PL_nexttoke]);
3294 /* interpolated case modifiers like \L \U, including \Q and \E.
3295 when we get here, PL_bufptr is at the \
3297 case LEX_INTERPCASEMOD:
3299 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3300 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3302 /* handle \E or end of string */
3303 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3305 if (PL_lex_casemods) {
3306 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3307 PL_lex_casestack[PL_lex_casemods] = '\0';
3309 if (PL_bufptr != PL_bufend
3310 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3312 PL_lex_state = LEX_INTERPCONCAT;
3315 PL_thistoken = newSVpvs("\\E");
3321 while (PL_bufptr != PL_bufend &&
3322 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3324 PL_thiswhite = newSVpvs("");
3325 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3329 if (PL_bufptr != PL_bufend)
3332 PL_lex_state = LEX_INTERPCONCAT;
3336 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3337 "### Saw case modifier\n"); });
3339 if (s[1] == '\\' && s[2] == 'E') {
3342 PL_thiswhite = newSVpvs("");
3343 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3346 PL_lex_state = LEX_INTERPCONCAT;
3351 if (!PL_madskills) /* when just compiling don't need correct */
3352 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3353 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3354 if ((*s == 'L' || *s == 'U') &&
3355 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3356 PL_lex_casestack[--PL_lex_casemods] = '\0';
3359 if (PL_lex_casemods > 10)
3360 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3361 PL_lex_casestack[PL_lex_casemods++] = *s;
3362 PL_lex_casestack[PL_lex_casemods] = '\0';
3363 PL_lex_state = LEX_INTERPCONCAT;
3364 start_force(PL_curforce);
3365 NEXTVAL_NEXTTOKE.ival = 0;
3367 start_force(PL_curforce);
3369 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3371 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3373 NEXTVAL_NEXTTOKE.ival = OP_LC;
3375 NEXTVAL_NEXTTOKE.ival = OP_UC;
3377 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3379 Perl_croak(aTHX_ "panic: yylex");
3381 SV* const tmpsv = newSVpvs("\\ ");
3382 /* replace the space with the character we want to escape
3384 SvPVX(tmpsv)[1] = *s;
3390 if (PL_lex_starts) {
3396 sv_free(PL_thistoken);
3397 PL_thistoken = newSVpvs("");
3400 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3401 if (PL_lex_casemods == 1 && PL_lex_inpat)
3410 case LEX_INTERPPUSH:
3411 return REPORT(sublex_push());
3413 case LEX_INTERPSTART:
3414 if (PL_bufptr == PL_bufend)
3415 return REPORT(sublex_done());
3416 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3417 "### Interpolated variable\n"); });
3419 PL_lex_dojoin = (*PL_bufptr == '@');
3420 PL_lex_state = LEX_INTERPNORMAL;
3421 if (PL_lex_dojoin) {
3422 start_force(PL_curforce);
3423 NEXTVAL_NEXTTOKE.ival = 0;
3425 start_force(PL_curforce);
3426 force_ident("\"", '$');
3427 start_force(PL_curforce);
3428 NEXTVAL_NEXTTOKE.ival = 0;
3430 start_force(PL_curforce);
3431 NEXTVAL_NEXTTOKE.ival = 0;
3433 start_force(PL_curforce);
3434 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3437 if (PL_lex_starts++) {
3442 sv_free(PL_thistoken);
3443 PL_thistoken = newSVpvs("");
3446 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3447 if (!PL_lex_casemods && PL_lex_inpat)
3454 case LEX_INTERPENDMAYBE:
3455 if (intuit_more(PL_bufptr)) {
3456 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3462 if (PL_lex_dojoin) {
3463 PL_lex_dojoin = FALSE;
3464 PL_lex_state = LEX_INTERPCONCAT;
3468 sv_free(PL_thistoken);
3469 PL_thistoken = newSVpvs("");
3474 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3475 && SvEVALED(PL_lex_repl))
3477 if (PL_bufptr != PL_bufend)
3478 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3482 case LEX_INTERPCONCAT:
3484 if (PL_lex_brackets)
3485 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3487 if (PL_bufptr == PL_bufend)
3488 return REPORT(sublex_done());
3490 if (SvIVX(PL_linestr) == '\'') {
3491 SV *sv = newSVsv(PL_linestr);
3494 else if ( PL_hints & HINT_NEW_RE )
3495 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3496 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3500 s = scan_const(PL_bufptr);
3502 PL_lex_state = LEX_INTERPCASEMOD;
3504 PL_lex_state = LEX_INTERPSTART;
3507 if (s != PL_bufptr) {
3508 start_force(PL_curforce);
3510 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3512 NEXTVAL_NEXTTOKE = pl_yylval;
3515 if (PL_lex_starts++) {
3519 sv_free(PL_thistoken);
3520 PL_thistoken = newSVpvs("");
3523 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3524 if (!PL_lex_casemods && PL_lex_inpat)
3537 PL_lex_state = LEX_NORMAL;
3538 s = scan_formline(PL_bufptr);
3539 if (!PL_lex_formbrack)
3545 PL_oldoldbufptr = PL_oldbufptr;
3551 sv_free(PL_thistoken);
3554 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3558 if (isIDFIRST_lazy_if(s,UTF))
3560 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3561 Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3564 goto fake_eof; /* emulate EOF on ^D or ^Z */
3573 if (PL_lex_brackets) {
3574 yyerror((const char *)
3576 ? "Format not terminated"
3577 : "Missing right curly or square bracket"));
3579 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3580 "### Tokener got EOF\n");
3584 if (s++ < PL_bufend)
3585 goto retry; /* ignore stray nulls */
3588 if (!PL_in_eval && !PL_preambled) {
3589 PL_preambled = TRUE;
3595 /* Generate a string of Perl code to load the debugger.
3596 * If PERL5DB is set, it will return the contents of that,
3597 * otherwise a compile-time require of perl5db.pl. */
3599 const char * const pdb = PerlEnv_getenv("PERL5DB");
3602 sv_setpv(PL_linestr, pdb);
3603 sv_catpvs(PL_linestr,";");
3605 SETERRNO(0,SS_NORMAL);
3606 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3609 sv_setpvs(PL_linestr,"");
3610 if (PL_preambleav) {
3611 SV **svp = AvARRAY(PL_preambleav);
3612 SV **const end = svp + AvFILLp(PL_preambleav);
3614 sv_catsv(PL_linestr, *svp);
3616 sv_catpvs(PL_linestr, ";");
3618 sv_free((SV*)PL_preambleav);
3619 PL_preambleav = NULL;
3621 if (PL_minus_n || PL_minus_p) {
3622 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3624 sv_catpvs(PL_linestr,"chomp;");
3627 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3628 || *PL_splitstr == '"')
3629 && strchr(PL_splitstr + 1, *PL_splitstr))
3630 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3632 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3633 bytes can be used as quoting characters. :-) */
3634 const char *splits = PL_splitstr;
3635 sv_catpvs(PL_linestr, "our @F=split(q\0");
3638 if (*splits == '\\')
3639 sv_catpvn(PL_linestr, splits, 1);
3640 sv_catpvn(PL_linestr, splits, 1);
3641 } while (*splits++);
3642 /* This loop will embed the trailing NUL of
3643 PL_linestr as the last thing it does before
3645 sv_catpvs(PL_linestr, ");");
3649 sv_catpvs(PL_linestr,"our @F=split(' ');");
3653 sv_catpvs(PL_linestr,
3654 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3655 sv_catpvs(PL_linestr, "\n");
3656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3658 PL_last_lop = PL_last_uni = NULL;
3659 if (PERLDB_LINE && PL_curstash != PL_debstash)
3660 update_debugger_info(PL_linestr, NULL, 0);
3664 bof = PL_rsfp ? TRUE : FALSE;
3665 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3668 PL_realtokenstart = -1;
3671 if (PL_preprocess && !PL_in_eval)
3672 (void)PerlProc_pclose(PL_rsfp);
3673 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3674 PerlIO_clearerr(PL_rsfp);
3676 (void)PerlIO_close(PL_rsfp);
3678 PL_doextract = FALSE;
3680 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3686 sv_setpvs(PL_linestr, ";}continue{print;}");
3688 sv_setpvs(PL_linestr, ";}");
3689 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3690 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3691 PL_last_lop = PL_last_uni = NULL;
3692 PL_minus_n = PL_minus_p = 0;
3695 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3696 PL_last_lop = PL_last_uni = NULL;
3697 sv_setpvn(PL_linestr,"",0);
3698 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3700 /* If it looks like the start of a BOM or raw UTF-16,
3701 * check if it in fact is. */
3707 #ifdef PERLIO_IS_STDIO
3708 # ifdef __GNU_LIBRARY__
3709 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3710 # define FTELL_FOR_PIPE_IS_BROKEN
3714 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3715 # define FTELL_FOR_PIPE_IS_BROKEN
3720 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3721 /* This loses the possibility to detect the bof
3722 * situation on perl -P when the libc5 is being used.
3723 * Workaround? Maybe attach some extra state to PL_rsfp?
3726 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3728 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3732 s = swallow_bom((U8*)s);
3736 /* Incest with pod. */
3739 sv_catsv(PL_thiswhite, PL_linestr);
3741 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3742 sv_setpvn(PL_linestr, "", 0);
3743 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3744 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3745 PL_last_lop = PL_last_uni = NULL;
3746 PL_doextract = FALSE;
3750 } while (PL_doextract);
3751 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3752 if (PERLDB_LINE && PL_curstash != PL_debstash)
3753 update_debugger_info(PL_linestr, NULL, 0);
3754 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3755 PL_last_lop = PL_last_uni = NULL;
3756 if (CopLINE(PL_curcop) == 1) {
3757 while (s < PL_bufend && isSPACE(*s))
3759 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3763 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3767 if (*s == '#' && *(s+1) == '!')
3769 #ifdef ALTERNATE_SHEBANG
3771 static char const as[] = ALTERNATE_SHEBANG;
3772 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3773 d = s + (sizeof(as) - 1);
3775 #endif /* ALTERNATE_SHEBANG */
3784 while (*d && !isSPACE(*d))
3788 #ifdef ARG_ZERO_IS_SCRIPT
3789 if (ipathend > ipath) {
3791 * HP-UX (at least) sets argv[0] to the script name,
3792 * which makes $^X incorrect. And Digital UNIX and Linux,
3793 * at least, set argv[0] to the basename of the Perl
3794 * interpreter. So, having found "#!", we'll set it right.
3796 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3798 assert(SvPOK(x) || SvGMAGICAL(x));
3799 if (sv_eq(x, CopFILESV(PL_curcop))) {
3800 sv_setpvn(x, ipath, ipathend - ipath);
3806 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3807 const char * const lstart = SvPV_const(x,llen);
3809 bstart += blen - llen;
3810 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3811 sv_setpvn(x, ipath, ipathend - ipath);
3816 TAINT_NOT; /* $^X is always tainted, but that's OK */
3818 #endif /* ARG_ZERO_IS_SCRIPT */
3823 d = instr(s,"perl -");
3825 d = instr(s,"perl");
3827 /* avoid getting into infinite loops when shebang
3828 * line contains "Perl" rather than "perl" */
3830 for (d = ipathend-4; d >= ipath; --d) {
3831 if ((*d == 'p' || *d == 'P')
3832 && !ibcmp(d, "perl", 4))
3842 #ifdef ALTERNATE_SHEBANG
3844 * If the ALTERNATE_SHEBANG on this system starts with a
3845 * character that can be part of a Perl expression, then if
3846 * we see it but not "perl", we're probably looking at the
3847 * start of Perl code, not a request to hand off to some
3848 * other interpreter. Similarly, if "perl" is there, but
3849 * not in the first 'word' of the line, we assume the line
3850 * contains the start of the Perl program.
3852 if (d && *s != '#') {
3853 const char *c = ipath;
3854 while (*c && !strchr("; \t\r\n\f\v#", *c))
3857 d = NULL; /* "perl" not in first word; ignore */
3859 *s = '#'; /* Don't try to parse shebang line */
3861 #endif /* ALTERNATE_SHEBANG */
3862 #ifndef MACOS_TRADITIONAL
3867 !instr(s,"indir") &&
3868 instr(PL_origargv[0],"perl"))
3875 while (s < PL_bufend && isSPACE(*s))
3877 if (s < PL_bufend) {
3878 Newxz(newargv,PL_origargc+3,char*);
3880 while (s < PL_bufend && !isSPACE(*s))
3883 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3886 newargv = PL_origargv;
3889 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3891 Perl_croak(aTHX_ "Can't exec %s", ipath);
3895 while (*d && !isSPACE(*d))
3897 while (SPACE_OR_TAB(*d))
3901 const bool switches_done = PL_doswitches;
3902 const U32 oldpdb = PL_perldb;
3903 const bool oldn = PL_minus_n;
3904 const bool oldp = PL_minus_p;
3908 if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3909 const char * const m = d1;
3910 while (*d1 && !isSPACE(*d1))
3912 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3915 d1 = moreswitches(d1);
3917 if (PL_doswitches && !switches_done) {
3918 int argc = PL_origargc;
3919 char **argv = PL_origargv;
3922 } while (argc && argv[0][0] == '-' && argv[0][1]);
3923 init_argv_symbols(argc,argv);
3925 if ((PERLDB_LINE && !oldpdb) ||
3926 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3927 /* if we have already added "LINE: while (<>) {",
3928 we must not do it again */
3930 sv_setpvn(PL_linestr, "", 0);
3931 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3932 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3933 PL_last_lop = PL_last_uni = NULL;
3934 PL_preambled = FALSE;
3936 (void)gv_fetchfile(PL_origfilename);
3943 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3945 PL_lex_state = LEX_FORMLINE;
3950 #ifdef PERL_STRICT_CR
3951 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3953 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3955 case ' ': case '\t': case '\f': case 013:
3956 #ifdef MACOS_TRADITIONAL
3960 PL_realtokenstart = -1;
3962 PL_thiswhite = newSVpvs("");
3963 sv_catpvn(PL_thiswhite, s, 1);
3970 PL_realtokenstart = -1;
3974 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3975 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3976 /* handle eval qq[#line 1 "foo"\n ...] */
3977 CopLINE_dec(PL_curcop);
3980 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3982 if (!PL_in_eval || PL_rsfp)
3987 while (d < PL_bufend && *d != '\n')
3991 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3992 Perl_croak(aTHX_ "panic: input overflow");
3995 PL_thiswhite = newSVpvn(s, d - s);
4000 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4002 PL_lex_state = LEX_FORMLINE;
4008 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4009 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4012 TOKEN(PEG); /* make sure any #! line is accessible */
4017 /* if (PL_madskills && PL_lex_formbrack) { */
4019 while (d < PL_bufend && *d != '\n')
4023 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4024 Perl_croak(aTHX_ "panic: input overflow");
4025 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4027 PL_thiswhite = newSVpvs("");
4028 if (CopLINE(PL_curcop) == 1) {
4029 sv_setpvn(PL_thiswhite, "", 0);
4032 sv_catpvn(PL_thiswhite, s, d - s);
4046 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4054 while (s < PL_bufend && SPACE_OR_TAB(*s))
4057 if (strnEQ(s,"=>",2)) {
4058 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4059 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4060 OPERATOR('-'); /* unary minus */
4062 PL_last_uni = PL_oldbufptr;
4064 case 'r': ftst = OP_FTEREAD; break;
4065 case 'w': ftst = OP_FTEWRITE; break;
4066 case 'x': ftst = OP_FTEEXEC; break;
4067 case 'o': ftst = OP_FTEOWNED; break;
4068 case 'R': ftst = OP_FTRREAD; break;
4069 case 'W': ftst = OP_FTRWRITE; break;
4070 case 'X': ftst = OP_FTREXEC; break;
4071 case 'O': ftst = OP_FTROWNED; break;
4072 case 'e': ftst = OP_FTIS; break;
4073 case 'z': ftst = OP_FTZERO; break;
4074 case 's': ftst = OP_FTSIZE; break;
4075 case 'f': ftst = OP_FTFILE; break;
4076 case 'd': ftst = OP_FTDIR; break;
4077 case 'l': ftst = OP_FTLINK; break;
4078 case 'p': ftst = OP_FTPIPE; break;
4079 case 'S': ftst = OP_FTSOCK; break;
4080 case 'u': ftst = OP_FTSUID; break;
4081 case 'g': ftst = OP_FTSGID; break;
4082 case 'k': ftst = OP_FTSVTX; break;
4083 case 'b': ftst = OP_FTBLK; break;
4084 case 'c': ftst = OP_FTCHR; break;
4085 case 't': ftst = OP_FTTTY; break;
4086 case 'T': ftst = OP_FTTEXT; break;
4087 case 'B': ftst = OP_FTBINARY; break;
4088 case 'M': case 'A': case 'C':
4089 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4091 case 'M': ftst = OP_FTMTIME; break;
4092 case 'A': ftst = OP_FTATIME; break;
4093 case 'C': ftst = OP_FTCTIME; break;
4101 PL_last_lop_op = (OPCODE)ftst;
4102 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4103 "### Saw file test %c\n", (int)tmp);
4108 /* Assume it was a minus followed by a one-letter named
4109 * subroutine call (or a -bareword), then. */
4110 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4111 "### '-%c' looked like a file test but was not\n",
4118 const char tmp = *s++;
4121 if (PL_expect == XOPERATOR)
4126 else if (*s == '>') {
4129 if (isIDFIRST_lazy_if(s,UTF)) {
4130 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4138 if (PL_expect == XOPERATOR)
4141 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4143 OPERATOR('-'); /* unary minus */
4149 const char tmp = *s++;
4152 if (PL_expect == XOPERATOR)
4157 if (PL_expect == XOPERATOR)
4160 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4167 if (PL_expect != XOPERATOR) {
4168 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4169 PL_expect = XOPERATOR;
4170 force_ident(PL_tokenbuf, '*');
4183 if (PL_expect == XOPERATOR) {
4187 PL_tokenbuf[0] = '%';
4188 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4189 sizeof PL_tokenbuf - 1, FALSE);
4190 if (!PL_tokenbuf[1]) {
4193 PL_pending_ident = '%';
4204 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4211 const char tmp = *s++;
4217 goto just_a_word_zero_gv;
4220 switch (PL_expect) {
4226 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4228 PL_bufptr = s; /* update in case we back off */
4234 PL_expect = XTERMBLOCK;
4237 stuffstart = s - SvPVX(PL_linestr) - 1;
4241 while (isIDFIRST_lazy_if(s,UTF)) {
4244 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4245 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4246 if (tmp < 0) tmp = -tmp;
4260 sv = newSVpvn(s, len);
4262 d = scan_str(d,TRUE,TRUE);
4264 /* MUST advance bufptr here to avoid bogus
4265 "at end of line" context messages from yyerror().
4267 PL_bufptr = s + len;
4268 yyerror("Unterminated attribute parameter in attribute list");
4272 return REPORT(0); /* EOF indicator */
4276 sv_catsv(sv, PL_lex_stuff);
4277 attrs = append_elem(OP_LIST, attrs,
4278 newSVOP(OP_CONST, 0, sv));
4279 SvREFCNT_dec(PL_lex_stuff);
4280 PL_lex_stuff = NULL;
4283 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4285 if (PL_in_my == KEY_our) {
4287 GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
4289 /* skip to avoid loading attributes.pm */
4291 deprecate(":unique");
4294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4297 /* NOTE: any CV attrs applied here need to be part of
4298 the CVf_BUILTIN_ATTRS define in cv.h! */
4299 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4301 CvLVALUE_on(PL_compcv);
4303 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4305 CvLOCKED_on(PL_compcv);
4307 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4309 CvMETHOD_on(PL_compcv);
4311 /* After we've set the flags, it could be argued that
4312 we don't need to do the attributes.pm-based setting
4313 process, and shouldn't bother appending recognized
4314 flags. To experiment with that, uncomment the
4315 following "else". (Note that's already been
4316 uncommented. That keeps the above-applied built-in
4317 attributes from being intercepted (and possibly
4318 rejected) by a package's attribute routines, but is
4319 justified by the performance win for the common case
4320 of applying only built-in attributes.) */
4322 attrs = append_elem(OP_LIST, attrs,
4323 newSVOP(OP_CONST, 0,
4327 if (*s == ':' && s[1] != ':')
4330 break; /* require real whitespace or :'s */
4331 /* XXX losing whitespace on sequential attributes here */
4335 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4336 if (*s != ';' && *s != '}' && *s != tmp
4337 && (tmp != '=' || *s != ')')) {
4338 const char q = ((*s == '\'') ? '"' : '\'');
4339 /* If here for an expression, and parsed no attrs, back
4341 if (tmp == '=' && !attrs) {
4345 /* MUST advance bufptr here to avoid bogus "at end of line"
4346 context messages from yyerror().
4349 yyerror( (const char *)
4351 ? Perl_form(aTHX_ "Invalid separator character "
4352 "%c%c%c in attribute list", q, *s, q)
4353 : "Unterminated attribute list" ) );
4361 start_force(PL_curforce);
4362 NEXTVAL_NEXTTOKE.opval = attrs;
4363 CURMAD('_', PL_nextwhite);
4368 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4369 (s - SvPVX(PL_linestr)) - stuffstart);
4377 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4378 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4386 const char tmp = *s++;
4391 const char tmp = *s++;
4399 if (PL_lex_brackets <= 0)
4400 yyerror("Unmatched right square bracket");