3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 static const char ident_too_long[] = "Identifier too long";
32 static const char commaless_variable_list[] = "comma-less variable list";
34 static void restore_rsfp(pTHX_ void *f);
35 #ifndef PERL_NO_UTF16_FILTER
36 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
41 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
42 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
44 # define CURMAD(slot,sv)
45 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
48 #define XFAKEBRACK 128
51 #ifdef USE_UTF8_SCRIPTS
52 # define UTF (!IN_BYTES)
54 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
57 /* In variables named $^X, these are the legal values for X.
58 * 1999-02-27 mjd-perl-patch@plover.com */
59 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
61 /* On MacOS, respect nonbreaking spaces */
62 #ifdef MACOS_TRADITIONAL
63 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
65 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
68 /* LEX_* are values for PL_lex_state, the state of the lexer.
69 * They are arranged oddly so that the guard on the switch statement
70 * can get by with a single comparison (if the compiler is smart enough).
73 /* #define LEX_NOTPARSING 11 is done in perl.h. */
75 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
76 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
77 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
78 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
79 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
81 /* at end of code, eg "$x" followed by: */
82 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
83 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
85 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
86 string or after \E, $foo, etc */
87 #define LEX_INTERPCONST 2 /* NOT USED */
88 #define LEX_FORMLINE 1 /* expecting a format line */
89 #define LEX_KNOWNEXT 0 /* next token known; just return it */
93 static const char* const lex_state_names[] = {
112 #include "keywords.h"
114 /* CLINE is a macro that ensures PL_copline has a sane value */
119 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
122 # define SKIPSPACE0(s) skipspace0(s)
123 # define SKIPSPACE1(s) skipspace1(s)
124 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
125 # define PEEKSPACE(s) skipspace2(s,0)
127 # define SKIPSPACE0(s) skipspace(s)
128 # define SKIPSPACE1(s) skipspace(s)
129 # define SKIPSPACE2(s,tsv) skipspace(s)
130 # define PEEKSPACE(s) skipspace(s)
134 * Convenience functions to return different tokens and prime the
135 * lexer for the next token. They all take an argument.
137 * TOKEN : generic token (used for '(', DOLSHARP, etc)
138 * OPERATOR : generic operator
139 * AOPERATOR : assignment operator
140 * PREBLOCK : beginning the block after an if, while, foreach, ...
141 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
142 * PREREF : *EXPR where EXPR is not a simple identifier
143 * TERM : expression term
144 * LOOPX : loop exiting command (goto, last, dump, etc)
145 * FTST : file test operator
146 * FUN0 : zero-argument function
147 * FUN1 : not used, except for not, which isn't a UNIOP
148 * BOop : bitwise or or xor
150 * SHop : shift operator
151 * PWop : power operator
152 * PMop : pattern-matching operator
153 * Aop : addition-level operator
154 * Mop : multiplication-level operator
155 * Eop : equality-testing operator
156 * Rop : relational operator <= != gt
158 * Also see LOP and lop() below.
161 #ifdef DEBUGGING /* Serve -DT. */
162 # define REPORT(retval) tokereport((I32)retval)
164 # define REPORT(retval) (retval)
167 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
168 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
169 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
170 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
171 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
172 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
173 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
174 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
175 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
176 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
177 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
178 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
179 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
180 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
181 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
182 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
183 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
184 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
185 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
186 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
188 /* This bit of chicanery makes a unary function followed by
189 * a parenthesis into a function with one argument, highest precedence.
190 * The UNIDOR macro is for unary functions that can be followed by the //
191 * operator (such as C<shift // 0>).
193 #define UNI2(f,x) { \
197 PL_last_uni = PL_oldbufptr; \
198 PL_last_lop_op = f; \
200 return REPORT( (int)FUNC1 ); \
202 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
204 #define UNI(f) UNI2(f,XTERM)
205 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
207 #define UNIBRACK(f) { \
210 PL_last_uni = PL_oldbufptr; \
212 return REPORT( (int)FUNC1 ); \
214 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
217 /* grandfather return to old style */
218 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
222 /* how to interpret the yylval associated with the token */
226 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
232 static struct debug_tokens {
234 enum token_type type;
236 } const debug_tokens[] =
238 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
239 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
240 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
241 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
242 { ARROW, TOKENTYPE_NONE, "ARROW" },
243 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
244 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
245 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
246 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
247 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
248 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
249 { DO, TOKENTYPE_NONE, "DO" },
250 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
251 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
252 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
253 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
254 { ELSE, TOKENTYPE_NONE, "ELSE" },
255 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
256 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
257 { FOR, TOKENTYPE_IVAL, "FOR" },
258 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
259 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
260 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
261 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
262 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
263 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
264 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
265 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
266 { IF, TOKENTYPE_IVAL, "IF" },
267 { LABEL, TOKENTYPE_PVAL, "LABEL" },
268 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
269 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
270 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
271 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
272 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
273 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
274 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
275 { MY, TOKENTYPE_IVAL, "MY" },
276 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
277 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
278 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
279 { OROP, TOKENTYPE_IVAL, "OROP" },
280 { OROR, TOKENTYPE_NONE, "OROR" },
281 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
282 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
283 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
284 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
285 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
286 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
287 { PREINC, TOKENTYPE_NONE, "PREINC" },
288 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
289 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
290 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
291 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
292 { SUB, TOKENTYPE_NONE, "SUB" },
293 { THING, TOKENTYPE_OPVAL, "THING" },
294 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
295 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
296 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
297 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
298 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
299 { USE, TOKENTYPE_IVAL, "USE" },
300 { WHEN, TOKENTYPE_IVAL, "WHEN" },
301 { WHILE, TOKENTYPE_IVAL, "WHILE" },
302 { WORD, TOKENTYPE_OPVAL, "WORD" },
303 { 0, TOKENTYPE_NONE, NULL }
306 /* dump the returned token in rv, plus any optional arg in yylval */
309 S_tokereport(pTHX_ I32 rv)
313 const char *name = NULL;
314 enum token_type type = TOKENTYPE_NONE;
315 const struct debug_tokens *p;
316 SV* const report = newSVpvs("<== ");
318 for (p = debug_tokens; p->token; p++) {
319 if (p->token == (int)rv) {
326 Perl_sv_catpv(aTHX_ report, name);
327 else if ((char)rv > ' ' && (char)rv < '~')
328 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
330 sv_catpvs(report, "EOF");
332 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
335 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
338 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
340 case TOKENTYPE_OPNUM:
341 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
342 PL_op_name[yylval.ival]);
345 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
347 case TOKENTYPE_OPVAL:
349 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
350 PL_op_name[yylval.opval->op_type]);
351 if (yylval.opval->op_type == OP_CONST) {
352 Perl_sv_catpvf(aTHX_ report, " %s",
353 SvPEEK(cSVOPx_sv(yylval.opval)));
358 sv_catpvs(report, "(opval=null)");
361 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
367 /* print the buffer with suitable escapes */
370 S_printbuf(pTHX_ const char* fmt, const char* s)
372 SV* const tmp = newSVpvs("");
373 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
382 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
383 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
387 S_ao(pTHX_ int toketype)
390 if (*PL_bufptr == '=') {
392 if (toketype == ANDAND)
393 yylval.ival = OP_ANDASSIGN;
394 else if (toketype == OROR)
395 yylval.ival = OP_ORASSIGN;
396 else if (toketype == DORDOR)
397 yylval.ival = OP_DORASSIGN;
405 * When Perl expects an operator and finds something else, no_op
406 * prints the warning. It always prints "<something> found where
407 * operator expected. It prints "Missing semicolon on previous line?"
408 * if the surprise occurs at the start of the line. "do you need to
409 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
410 * where the compiler doesn't know if foo is a method call or a function.
411 * It prints "Missing operator before end of line" if there's nothing
412 * after the missing operator, or "... before <...>" if there is something
413 * after the missing operator.
417 S_no_op(pTHX_ const char *what, char *s)
420 char * const oldbp = PL_bufptr;
421 const bool is_first = (PL_oldbufptr == PL_linestart);
427 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
428 if (ckWARN_d(WARN_SYNTAX)) {
430 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
431 "\t(Missing semicolon on previous line?)\n");
432 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
434 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
436 if (t < PL_bufptr && isSPACE(*t))
437 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
438 "\t(Do you need to predeclare %.*s?)\n",
439 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
443 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
444 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
452 * Complain about missing quote/regexp/heredoc terminator.
453 * If it's called with NULL then it cauterizes the line buffer.
454 * If we're in a delimited string and the delimiter is a control
455 * character, it's reformatted into a two-char sequence like ^C.
460 S_missingterm(pTHX_ char *s)
466 char * const nl = strrchr(s,'\n');
472 iscntrl(PL_multi_close)
474 PL_multi_close < 32 || PL_multi_close == 127
478 tmpbuf[1] = (char)toCTRL(PL_multi_close);
483 *tmpbuf = (char)PL_multi_close;
487 q = strchr(s,'"') ? '\'' : '"';
488 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
491 #define FEATURE_IS_ENABLED(name) \
492 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
493 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
495 * S_feature_is_enabled
496 * Check whether the named feature is enabled.
499 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
502 HV * const hinthv = GvHV(PL_hintgv);
503 char he_name[32] = "feature_";
504 (void) my_strlcpy(&he_name[8], name, 24);
506 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
514 Perl_deprecate(pTHX_ const char *s)
516 if (ckWARN(WARN_DEPRECATED))
517 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
521 Perl_deprecate_old(pTHX_ const char *s)
523 /* This function should NOT be called for any new deprecated warnings */
524 /* Use Perl_deprecate instead */
526 /* It is here to maintain backward compatibility with the pre-5.8 */
527 /* warnings category hierarchy. The "deprecated" category used to */
528 /* live under the "syntax" category. It is now a top-level category */
529 /* in its own right. */
531 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
532 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
533 "Use of %s is deprecated", s);
537 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
538 * utf16-to-utf8-reversed.
541 #ifdef PERL_CR_FILTER
545 register const char *s = SvPVX_const(sv);
546 register const char * const e = s + SvCUR(sv);
547 /* outer loop optimized to do nothing if there are no CR-LFs */
549 if (*s++ == '\r' && *s == '\n') {
550 /* hit a CR-LF, need to copy the rest */
551 register char *d = s - 1;
554 if (*s == '\r' && s[1] == '\n')
565 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
567 const I32 count = FILTER_READ(idx+1, sv, maxlen);
568 if (count > 0 && !maxlen)
576 * Initialize variables. Uses the Perl save_stack to save its state (for
577 * recursive calls to the parser).
581 Perl_lex_start(pTHX_ SV *line)
588 /* create and initialise a parser */
590 Newx(parser, 1, yy_parser);
591 parser->old_parser = PL_parser;
594 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
595 parser->ps = parser->stack;
596 parser->stack_size = YYINITDEPTH;
598 parser->stack->state = 0;
599 parser->yyerrstatus = 0;
600 parser->yychar = YYEMPTY; /* Cause a token to be read. */
602 /* initialise lexer state */
604 SAVEI32(PL_lex_dojoin);
605 SAVEI32(PL_lex_brackets);
606 SAVEI32(PL_lex_casemods);
607 SAVEI32(PL_lex_starts);
608 SAVEI32(PL_lex_state);
609 SAVEVPTR(PL_lex_inpat);
610 SAVEI32(PL_lex_inwhat);
612 if (PL_lex_state == LEX_KNOWNEXT) {
613 I32 toke = PL_lasttoke;
614 while (--toke >= 0) {
615 SAVEI32(PL_nexttoke[toke].next_type);
616 SAVEVPTR(PL_nexttoke[toke].next_val);
618 SAVEVPTR(PL_nexttoke[toke].next_mad);
620 SAVEI32(PL_lasttoke);
622 SAVESPTR(PL_endwhite);
623 SAVESPTR(PL_thistoken);
624 SAVESPTR(PL_thiswhite);
625 SAVESPTR(PL_nextwhite);
626 SAVESPTR(PL_thisopen);
627 SAVESPTR(PL_thisclose);
628 SAVESPTR(PL_thisstuff);
629 SAVEVPTR(PL_thismad);
630 SAVEI32(PL_realtokenstart);
631 SAVEI32(PL_faketokens);
632 SAVESPTR(PL_skipwhite);
633 SAVEI32(PL_curforce);
635 if (PL_lex_state == LEX_KNOWNEXT) {
636 I32 toke = PL_nexttoke;
637 while (--toke >= 0) {
638 SAVEI32(PL_nexttype[toke]);
639 SAVEVPTR(PL_nextval[toke]);
641 SAVEI32(PL_nexttoke);
644 SAVECOPLINE(PL_curcop);
647 SAVEPPTR(PL_oldbufptr);
648 SAVEPPTR(PL_oldoldbufptr);
649 SAVEPPTR(PL_last_lop);
650 SAVEPPTR(PL_last_uni);
651 SAVEPPTR(PL_linestart);
652 SAVESPTR(PL_linestr);
653 SAVEGENERICPV(PL_lex_brackstack);
654 SAVEGENERICPV(PL_lex_casestack);
655 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
656 SAVESPTR(PL_lex_stuff);
657 SAVEI32(PL_lex_defer);
658 SAVEI32(PL_sublex_info.sub_inwhat);
659 SAVEI32(PL_sublex_info.super_state);
660 SAVEVPTR(PL_sublex_info.sub_op);
661 SAVEPPTR(PL_sublex_info.super_bufptr);
662 SAVEPPTR(PL_sublex_info.super_bufend);
663 SAVESPTR(PL_lex_repl);
665 SAVEINT(PL_lex_expect);
666 SAVEI32(PL_lex_formbrack);
668 SAVEI32(PL_multi_close);
669 SAVEI32(PL_multi_open);
670 SAVEI32(PL_multi_start);
671 SAVEI8(PL_pending_ident);
672 SAVEBOOL(PL_preambled);
674 PL_lex_state = LEX_NORMAL;
678 Newx(PL_lex_brackstack, 120, char);
679 Newx(PL_lex_casestack, 12, char);
681 *PL_lex_casestack = '\0';
692 PL_realtokenstart = 0;
704 PL_sublex_info.sub_inwhat = 0;
705 PL_sublex_info.super_state = 0;
706 PL_sublex_info.sub_op = NULL;
707 PL_sublex_info.super_bufptr = NULL;
708 PL_sublex_info.super_bufend = NULL;
710 PL_lex_formbrack = 0;
715 PL_pending_ident = '\0';
716 PL_preambled = FALSE;
719 s = SvPV_const(line, len);
724 PL_linestr = newSVpvs("\n;");
725 } else if (SvREADONLY(line) || s[len-1] != ';') {
726 PL_linestr = newSVsv(line);
728 sv_catpvs(PL_linestr, "\n;");
731 SvREFCNT_inc_simple_void_NN(line);
734 /* PL_linestr needs to survive until end of scope, not just the next
735 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
736 SAVEFREESV(PL_linestr);
737 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
738 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
739 PL_last_lop = PL_last_uni = NULL;
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
753 PL_doextract = FALSE;
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
760 * increments the current line number in CopLINE(PL_curcop) and checks
761 * to see whether the line starts with a comment of the form
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
767 S_incline(pTHX_ char *s)
775 CopLINE_inc(PL_curcop);
778 while (SPACE_OR_TAB(*s))
780 if (strnEQ(s, "line", 4))
784 if (SPACE_OR_TAB(*s))
788 while (SPACE_OR_TAB(*s))
796 while (SPACE_OR_TAB(*s))
798 if (*s == '"' && (t = strchr(s+1, '"'))) {
808 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
810 if (*e != '\n' && *e != '\0')
811 return; /* false alarm */
817 const char * const cf = CopFILE(PL_curcop);
818 STRLEN tmplen = cf ? strlen(cf) : 0;
819 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
820 /* must copy *{"::_<(eval N)[oldfilename:L]"}
821 * to *{"::_<newfilename"} */
822 char smallbuf[256], smallbuf2[256];
823 char *tmpbuf, *tmpbuf2;
825 STRLEN tmplen2 = strlen(s);
826 if (tmplen + 3 < sizeof smallbuf)
829 Newx(tmpbuf, tmplen + 3, char);
830 if (tmplen2 + 3 < sizeof smallbuf2)
833 Newx(tmpbuf2, tmplen2 + 3, char);
834 tmpbuf[0] = tmpbuf2[0] = '_';
835 tmpbuf[1] = tmpbuf2[1] = '<';
836 memcpy(tmpbuf + 2, cf, ++tmplen);
837 memcpy(tmpbuf2 + 2, s, ++tmplen2);
839 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
841 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
843 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
844 /* adjust ${"::_<newfilename"} to store the new file name */
845 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
846 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
847 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
850 if (tmpbuf != smallbuf) Safefree(tmpbuf);
851 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
854 CopFILE_free(PL_curcop);
855 CopFILE_set(PL_curcop, s);
858 CopLINE_set(PL_curcop, atoi(n)-1);
862 /* skip space before PL_thistoken */
865 S_skipspace0(pTHX_ register char *s)
872 PL_thiswhite = newSVpvs("");
873 sv_catsv(PL_thiswhite, PL_skipwhite);
874 sv_free(PL_skipwhite);
877 PL_realtokenstart = s - SvPVX(PL_linestr);
881 /* skip space after PL_thistoken */
884 S_skipspace1(pTHX_ register char *s)
886 const char *start = s;
887 I32 startoff = start - SvPVX(PL_linestr);
892 start = SvPVX(PL_linestr) + startoff;
893 if (!PL_thistoken && PL_realtokenstart >= 0) {
894 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
895 PL_thistoken = newSVpvn(tstart, start - tstart);
897 PL_realtokenstart = -1;
900 PL_nextwhite = newSVpvs("");
901 sv_catsv(PL_nextwhite, PL_skipwhite);
902 sv_free(PL_skipwhite);
909 S_skipspace2(pTHX_ register char *s, SV **svp)
912 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
913 const I32 startoff = s - SvPVX(PL_linestr);
916 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
917 if (!PL_madskills || !svp)
919 start = SvPVX(PL_linestr) + startoff;
920 if (!PL_thistoken && PL_realtokenstart >= 0) {
921 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
922 PL_thistoken = newSVpvn(tstart, start - tstart);
923 PL_realtokenstart = -1;
928 sv_setsv(*svp, PL_skipwhite);
929 sv_free(PL_skipwhite);
938 S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
940 AV *av = CopFILEAVx(PL_curcop);
942 SV * const sv = newSV(0);
943 sv_upgrade(sv, SVt_PVMG);
944 sv_setpvn(sv, buf, len);
947 av_store(av, (I32)CopLINE(PL_curcop), sv);
952 S_update_debugger_info_sv(pTHX_ SV *orig_sv)
954 AV *av = CopFILEAVx(PL_curcop);
956 SV * const sv = newSV(0);
957 sv_upgrade(sv, SVt_PVMG);
958 sv_setsv(sv, orig_sv);
961 av_store(av, (I32)CopLINE(PL_curcop), sv);
967 * Called to gobble the appropriate amount and type of whitespace.
968 * Skips comments as well.
972 S_skipspace(pTHX_ register char *s)
977 int startoff = s - SvPVX(PL_linestr);
980 sv_free(PL_skipwhite);
985 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
986 while (s < PL_bufend && SPACE_OR_TAB(*s))
996 SSize_t oldprevlen, oldoldprevlen;
997 SSize_t oldloplen = 0, oldunilen = 0;
998 while (s < PL_bufend && isSPACE(*s)) {
999 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1004 if (s < PL_bufend && *s == '#') {
1005 while (s < PL_bufend && *s != '\n')
1007 if (s < PL_bufend) {
1009 if (PL_in_eval && !PL_rsfp) {
1016 /* only continue to recharge the buffer if we're at the end
1017 * of the buffer, we're not reading from a source filter, and
1018 * we're in normal lexing mode
1020 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1021 PL_lex_state == LEX_FORMLINE)
1028 /* try to recharge the buffer */
1030 curoff = s - SvPVX(PL_linestr);
1033 if ((s = filter_gets(PL_linestr, PL_rsfp,
1034 (prevlen = SvCUR(PL_linestr)))) == NULL)
1037 if (PL_madskills && curoff != startoff) {
1039 PL_skipwhite = newSVpvs("");
1040 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1044 /* mustn't throw out old stuff yet if madpropping */
1045 SvCUR(PL_linestr) = curoff;
1046 s = SvPVX(PL_linestr) + curoff;
1048 if (curoff && s[-1] == '\n')
1052 /* end of file. Add on the -p or -n magic */
1053 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1056 sv_catpv(PL_linestr,
1057 ";}continue{print or die qq(-p destination: $!\\n);}");
1059 sv_setpv(PL_linestr,
1060 ";}continue{print or die qq(-p destination: $!\\n);}");
1062 PL_minus_n = PL_minus_p = 0;
1064 else if (PL_minus_n) {
1066 sv_catpvn(PL_linestr, ";}", 2);
1068 sv_setpvn(PL_linestr, ";}", 2);
1074 sv_catpvn(PL_linestr,";", 1);
1076 sv_setpvn(PL_linestr,";", 1);
1079 /* reset variables for next time we lex */
1080 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1086 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1087 PL_last_lop = PL_last_uni = NULL;
1089 /* Close the filehandle. Could be from -P preprocessor,
1090 * STDIN, or a regular file. If we were reading code from
1091 * STDIN (because the commandline held no -e or filename)
1092 * then we don't close it, we reset it so the code can
1093 * read from STDIN too.
1096 if (PL_preprocess && !PL_in_eval)
1097 (void)PerlProc_pclose(PL_rsfp);
1098 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1099 PerlIO_clearerr(PL_rsfp);
1101 (void)PerlIO_close(PL_rsfp);
1106 /* not at end of file, so we only read another line */
1107 /* make corresponding updates to old pointers, for yyerror() */
1108 oldprevlen = PL_oldbufptr - PL_bufend;
1109 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1111 oldunilen = PL_last_uni - PL_bufend;
1113 oldloplen = PL_last_lop - PL_bufend;
1114 PL_linestart = PL_bufptr = s + prevlen;
1115 PL_bufend = s + SvCUR(PL_linestr);
1117 PL_oldbufptr = s + oldprevlen;
1118 PL_oldoldbufptr = s + oldoldprevlen;
1120 PL_last_uni = s + oldunilen;
1122 PL_last_lop = s + oldloplen;
1125 /* debugger active and we're not compiling the debugger code,
1126 * so store the line into the debugger's array of lines
1128 if (PERLDB_LINE && PL_curstash != PL_debstash)
1129 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
1136 PL_skipwhite = newSVpvs("");
1137 curoff = s - SvPVX(PL_linestr);
1138 if (curoff - startoff)
1139 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1148 * Check the unary operators to ensure there's no ambiguity in how they're
1149 * used. An ambiguous piece of code would be:
1151 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1152 * the +5 is its argument.
1162 if (PL_oldoldbufptr != PL_last_uni)
1164 while (isSPACE(*PL_last_uni))
1167 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1169 if ((t = strchr(s, '(')) && t < PL_bufptr)
1172 if (ckWARN_d(WARN_AMBIGUOUS)){
1173 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1174 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1175 (int)(s - PL_last_uni), PL_last_uni);
1180 * LOP : macro to build a list operator. Its behaviour has been replaced
1181 * with a subroutine, S_lop() for which LOP is just another name.
1184 #define LOP(f,x) return lop(f,x,s)
1188 * Build a list operator (or something that might be one). The rules:
1189 * - if we have a next token, then it's a list operator [why?]
1190 * - if the next thing is an opening paren, then it's a function
1191 * - else it's a list operator
1195 S_lop(pTHX_ I32 f, int x, char *s)
1202 PL_last_lop = PL_oldbufptr;
1203 PL_last_lop_op = (OPCODE)f;
1206 return REPORT(LSTOP);
1209 return REPORT(LSTOP);
1212 return REPORT(FUNC);
1215 return REPORT(FUNC);
1217 return REPORT(LSTOP);
1223 * Sets up for an eventual force_next(). start_force(0) basically does
1224 * an unshift, while start_force(-1) does a push. yylex removes items
1229 S_start_force(pTHX_ int where)
1233 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1234 where = PL_lasttoke;
1235 assert(PL_curforce < 0 || PL_curforce == where);
1236 if (PL_curforce != where) {
1237 for (i = PL_lasttoke; i > where; --i) {
1238 PL_nexttoke[i] = PL_nexttoke[i-1];
1242 if (PL_curforce < 0) /* in case of duplicate start_force() */
1243 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1244 PL_curforce = where;
1247 curmad('^', newSVpvs(""));
1248 CURMAD('_', PL_nextwhite);
1253 S_curmad(pTHX_ char slot, SV *sv)
1259 if (PL_curforce < 0)
1260 where = &PL_thismad;
1262 where = &PL_nexttoke[PL_curforce].next_mad;
1265 sv_setpvn(sv, "", 0);
1268 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1270 else if (PL_encoding) {
1271 sv_recode_to_utf8(sv, PL_encoding);
1276 /* keep a slot open for the head of the list? */
1277 if (slot != '_' && *where && (*where)->mad_key == '^') {
1278 (*where)->mad_key = slot;
1279 sv_free((*where)->mad_val);
1280 (*where)->mad_val = (void*)sv;
1283 addmad(newMADsv(slot, sv), where, 0);
1286 # define start_force(where) NOOP
1287 # define curmad(slot, sv) NOOP
1292 * When the lexer realizes it knows the next token (for instance,
1293 * it is reordering tokens for the parser) then it can call S_force_next
1294 * to know what token to return the next time the lexer is called. Caller
1295 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1296 * and possibly PL_expect to ensure the lexer handles the token correctly.
1300 S_force_next(pTHX_ I32 type)
1304 if (PL_curforce < 0)
1305 start_force(PL_lasttoke);
1306 PL_nexttoke[PL_curforce].next_type = type;
1307 if (PL_lex_state != LEX_KNOWNEXT)
1308 PL_lex_defer = PL_lex_state;
1309 PL_lex_state = LEX_KNOWNEXT;
1310 PL_lex_expect = PL_expect;
1313 PL_nexttype[PL_nexttoke] = type;
1315 if (PL_lex_state != LEX_KNOWNEXT) {
1316 PL_lex_defer = PL_lex_state;
1317 PL_lex_expect = PL_expect;
1318 PL_lex_state = LEX_KNOWNEXT;
1324 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1327 SV * const sv = newSVpvn(start,len);
1328 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1335 * When the lexer knows the next thing is a word (for instance, it has
1336 * just seen -> and it knows that the next char is a word char, then
1337 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1341 * char *start : buffer position (must be within PL_linestr)
1342 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1343 * int check_keyword : if true, Perl checks to make sure the word isn't
1344 * a keyword (do this if the word is a label, e.g. goto FOO)
1345 * int allow_pack : if true, : characters will also be allowed (require,
1346 * use, etc. do this)
1347 * int allow_initial_tick : used by the "sub" lexer only.
1351 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1357 start = SKIPSPACE1(start);
1359 if (isIDFIRST_lazy_if(s,UTF) ||
1360 (allow_pack && *s == ':') ||
1361 (allow_initial_tick && *s == '\'') )
1363 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1364 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1366 start_force(PL_curforce);
1368 curmad('X', newSVpvn(start,s-start));
1369 if (token == METHOD) {
1374 PL_expect = XOPERATOR;
1377 NEXTVAL_NEXTTOKE.opval
1378 = (OP*)newSVOP(OP_CONST,0,
1379 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1380 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1388 * Called when the lexer wants $foo *foo &foo etc, but the program
1389 * text only contains the "foo" portion. The first argument is a pointer
1390 * to the "foo", and the second argument is the type symbol to prefix.
1391 * Forces the next token to be a "WORD".
1392 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1396 S_force_ident(pTHX_ register const char *s, int kind)
1400 const STRLEN len = strlen(s);
1401 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1402 start_force(PL_curforce);
1403 NEXTVAL_NEXTTOKE.opval = o;
1406 o->op_private = OPpCONST_ENTERED;
1407 /* XXX see note in pp_entereval() for why we forgo typo
1408 warnings if the symbol must be introduced in an eval.
1410 gv_fetchpvn_flags(s, len,
1411 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1413 kind == '$' ? SVt_PV :
1414 kind == '@' ? SVt_PVAV :
1415 kind == '%' ? SVt_PVHV :
1423 Perl_str_to_version(pTHX_ SV *sv)
1428 const char *start = SvPV_const(sv,len);
1429 const char * const end = start + len;
1430 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1431 while (start < end) {
1435 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1440 retval += ((NV)n)/nshift;
1449 * Forces the next token to be a version number.
1450 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1451 * and if "guessing" is TRUE, then no new token is created (and the caller
1452 * must use an alternative parsing method).
1456 S_force_version(pTHX_ char *s, int guessing)
1462 I32 startoff = s - SvPVX(PL_linestr);
1471 while (isDIGIT(*d) || *d == '_' || *d == '.')
1475 start_force(PL_curforce);
1476 curmad('X', newSVpvn(s,d-s));
1479 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1481 s = scan_num(s, &yylval);
1482 version = yylval.opval;
1483 ver = cSVOPx(version)->op_sv;
1484 if (SvPOK(ver) && !SvNIOK(ver)) {
1485 SvUPGRADE(ver, SVt_PVNV);
1486 SvNV_set(ver, str_to_version(ver));
1487 SvNOK_on(ver); /* hint that it is a version */
1490 else if (guessing) {
1493 sv_free(PL_nextwhite); /* let next token collect whitespace */
1495 s = SvPVX(PL_linestr) + startoff;
1503 if (PL_madskills && !version) {
1504 sv_free(PL_nextwhite); /* let next token collect whitespace */
1506 s = SvPVX(PL_linestr) + startoff;
1509 /* NOTE: The parser sees the package name and the VERSION swapped */
1510 start_force(PL_curforce);
1511 NEXTVAL_NEXTTOKE.opval = version;
1519 * Tokenize a quoted string passed in as an SV. It finds the next
1520 * chunk, up to end of string or a backslash. It may make a new
1521 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1526 S_tokeq(pTHX_ SV *sv)
1530 register char *send;
1538 s = SvPV_force(sv, len);
1539 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1542 while (s < send && *s != '\\')
1547 if ( PL_hints & HINT_NEW_STRING ) {
1548 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1554 if (s + 1 < send && (s[1] == '\\'))
1555 s++; /* all that, just for this */
1560 SvCUR_set(sv, d - SvPVX_const(sv));
1562 if ( PL_hints & HINT_NEW_STRING )
1563 return new_constant(NULL, 0, "q", sv, pv, "q");
1568 * Now come three functions related to double-quote context,
1569 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1570 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1571 * interact with PL_lex_state, and create fake ( ... ) argument lists
1572 * to handle functions and concatenation.
1573 * They assume that whoever calls them will be setting up a fake
1574 * join call, because each subthing puts a ',' after it. This lets
1577 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1579 * (I'm not sure whether the spurious commas at the end of lcfirst's
1580 * arguments and join's arguments are created or not).
1585 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1587 * Pattern matching will set PL_lex_op to the pattern-matching op to
1588 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1590 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1592 * Everything else becomes a FUNC.
1594 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1595 * had an OP_CONST or OP_READLINE). This just sets us up for a
1596 * call to S_sublex_push().
1600 S_sublex_start(pTHX)
1603 register const I32 op_type = yylval.ival;
1605 if (op_type == OP_NULL) {
1606 yylval.opval = PL_lex_op;
1610 if (op_type == OP_CONST || op_type == OP_READLINE) {
1611 SV *sv = tokeq(PL_lex_stuff);
1613 if (SvTYPE(sv) == SVt_PVIV) {
1614 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1616 const char * const p = SvPV_const(sv, len);
1617 SV * const nsv = newSVpvn(p, len);
1623 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1624 PL_lex_stuff = NULL;
1625 /* Allow <FH> // "foo" */
1626 if (op_type == OP_READLINE)
1627 PL_expect = XTERMORDORDOR;
1630 else if (op_type == OP_BACKTICK && PL_lex_op) {
1631 /* readpipe() vas overriden */
1632 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1633 yylval.opval = PL_lex_op;
1635 PL_lex_stuff = NULL;
1639 PL_sublex_info.super_state = PL_lex_state;
1640 PL_sublex_info.sub_inwhat = op_type;
1641 PL_sublex_info.sub_op = PL_lex_op;
1642 PL_lex_state = LEX_INTERPPUSH;
1646 yylval.opval = PL_lex_op;
1656 * Create a new scope to save the lexing state. The scope will be
1657 * ended in S_sublex_done. Returns a '(', starting the function arguments
1658 * to the uc, lc, etc. found before.
1659 * Sets PL_lex_state to LEX_INTERPCONCAT.
1668 PL_lex_state = PL_sublex_info.super_state;
1669 SAVEI32(PL_lex_dojoin);
1670 SAVEI32(PL_lex_brackets);
1671 SAVEI32(PL_lex_casemods);
1672 SAVEI32(PL_lex_starts);
1673 SAVEI32(PL_lex_state);
1674 SAVEVPTR(PL_lex_inpat);
1675 SAVEI32(PL_lex_inwhat);
1676 SAVECOPLINE(PL_curcop);
1677 SAVEPPTR(PL_bufptr);
1678 SAVEPPTR(PL_bufend);
1679 SAVEPPTR(PL_oldbufptr);
1680 SAVEPPTR(PL_oldoldbufptr);
1681 SAVEPPTR(PL_last_lop);
1682 SAVEPPTR(PL_last_uni);
1683 SAVEPPTR(PL_linestart);
1684 SAVESPTR(PL_linestr);
1685 SAVEGENERICPV(PL_lex_brackstack);
1686 SAVEGENERICPV(PL_lex_casestack);
1688 PL_linestr = PL_lex_stuff;
1689 PL_lex_stuff = NULL;
1691 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1692 = SvPVX(PL_linestr);
1693 PL_bufend += SvCUR(PL_linestr);
1694 PL_last_lop = PL_last_uni = NULL;
1695 SAVEFREESV(PL_linestr);
1697 PL_lex_dojoin = FALSE;
1698 PL_lex_brackets = 0;
1699 Newx(PL_lex_brackstack, 120, char);
1700 Newx(PL_lex_casestack, 12, char);
1701 PL_lex_casemods = 0;
1702 *PL_lex_casestack = '\0';
1704 PL_lex_state = LEX_INTERPCONCAT;
1705 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1707 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1708 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1709 PL_lex_inpat = PL_sublex_info.sub_op;
1711 PL_lex_inpat = NULL;
1718 * Restores lexer state after a S_sublex_push.
1725 if (!PL_lex_starts++) {
1726 SV * const sv = newSVpvs("");
1727 if (SvUTF8(PL_linestr))
1729 PL_expect = XOPERATOR;
1730 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1734 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1735 PL_lex_state = LEX_INTERPCASEMOD;
1739 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1740 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1741 PL_linestr = PL_lex_repl;
1743 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1744 PL_bufend += SvCUR(PL_linestr);
1745 PL_last_lop = PL_last_uni = NULL;
1746 SAVEFREESV(PL_linestr);
1747 PL_lex_dojoin = FALSE;
1748 PL_lex_brackets = 0;
1749 PL_lex_casemods = 0;
1750 *PL_lex_casestack = '\0';
1752 if (SvEVALED(PL_lex_repl)) {
1753 PL_lex_state = LEX_INTERPNORMAL;
1755 /* we don't clear PL_lex_repl here, so that we can check later
1756 whether this is an evalled subst; that means we rely on the
1757 logic to ensure sublex_done() is called again only via the
1758 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1761 PL_lex_state = LEX_INTERPCONCAT;
1771 PL_endwhite = newSVpvs("");
1772 sv_catsv(PL_endwhite, PL_thiswhite);
1776 sv_setpvn(PL_thistoken,"",0);
1778 PL_realtokenstart = -1;
1782 PL_bufend = SvPVX(PL_linestr);
1783 PL_bufend += SvCUR(PL_linestr);
1784 PL_expect = XOPERATOR;
1785 PL_sublex_info.sub_inwhat = 0;
1793 Extracts a pattern, double-quoted string, or transliteration. This
1796 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1797 processing a pattern (PL_lex_inpat is true), a transliteration
1798 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1800 Returns a pointer to the character scanned up to. If this is
1801 advanced from the start pointer supplied (i.e. if anything was
1802 successfully parsed), will leave an OP for the substring scanned
1803 in yylval. Caller must intuit reason for not parsing further
1804 by looking at the next characters herself.
1808 double-quoted style: \r and \n
1809 regexp special ones: \D \s
1812 case and quoting: \U \Q \E
1813 stops on @ and $, but not for $ as tail anchor
1815 In transliterations:
1816 characters are VERY literal, except for - not at the start or end
1817 of the string, which indicates a range. If the range is in bytes,
1818 scan_const expands the range to the full set of intermediate
1819 characters. If the range is in utf8, the hyphen is replaced with
1820 a certain range mark which will be handled by pmtrans() in op.c.
1822 In double-quoted strings:
1824 double-quoted style: \r and \n
1826 deprecated backrefs: \1 (in substitution replacements)
1827 case and quoting: \U \Q \E
1830 scan_const does *not* construct ops to handle interpolated strings.
1831 It stops processing as soon as it finds an embedded $ or @ variable
1832 and leaves it to the caller to work out what's going on.
1834 embedded arrays (whether in pattern or not) could be:
1835 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1837 $ in double-quoted strings must be the symbol of an embedded scalar.
1839 $ in pattern could be $foo or could be tail anchor. Assumption:
1840 it's a tail anchor if $ is the last thing in the string, or if it's
1841 followed by one of "()| \r\n\t"
1843 \1 (backreferences) are turned into $1
1845 The structure of the code is
1846 while (there's a character to process) {
1847 handle transliteration ranges
1848 skip regexp comments /(?#comment)/ and codes /(?{code})/
1849 skip #-initiated comments in //x patterns
1850 check for embedded arrays
1851 check for embedded scalars
1853 leave intact backslashes from leaveit (below)
1854 deprecate \1 in substitution replacements
1855 handle string-changing backslashes \l \U \Q \E, etc.
1856 switch (what was escaped) {
1857 handle \- in a transliteration (becomes a literal -)
1858 handle \132 (octal characters)
1859 handle \x15 and \x{1234} (hex characters)
1860 handle \N{name} (named characters)
1861 handle \cV (control characters)
1862 handle printf-style backslashes (\f, \r, \n, etc)
1864 } (end if backslash)
1865 } (end while character to read)
1870 S_scan_const(pTHX_ char *start)
1873 register char *send = PL_bufend; /* end of the constant */
1874 SV *sv = newSV(send - start); /* sv for the constant */
1875 register char *s = start; /* start of the constant */
1876 register char *d = SvPVX(sv); /* destination for copies */
1877 bool dorange = FALSE; /* are we in a translit range? */
1878 bool didrange = FALSE; /* did we just finish a range? */
1879 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1880 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1883 UV literal_endpoint = 0;
1884 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1887 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1888 /* If we are doing a trans and we know we want UTF8 set expectation */
1889 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1890 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1894 while (s < send || dorange) {
1895 /* get transliterations out of the way (they're most literal) */
1896 if (PL_lex_inwhat == OP_TRANS) {
1897 /* expand a range A-Z to the full set of characters. AIE! */
1899 I32 i; /* current expanded character */
1900 I32 min; /* first character in range */
1901 I32 max; /* last character in range */
1912 char * const c = (char*)utf8_hop((U8*)d, -1);
1916 *c = (char)UTF_TO_NATIVE(0xff);
1917 /* mark the range as done, and continue */
1923 i = d - SvPVX_const(sv); /* remember current offset */
1926 SvLEN(sv) + (has_utf8 ?
1927 (512 - UTF_CONTINUATION_MARK +
1930 /* How many two-byte within 0..255: 128 in UTF-8,
1931 * 96 in UTF-8-mod. */
1933 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1935 d = SvPVX(sv) + i; /* refresh d after realloc */
1939 for (j = 0; j <= 1; j++) {
1940 char * const c = (char*)utf8_hop((U8*)d, -1);
1941 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1947 max = (U8)0xff; /* only to \xff */
1948 uvmax = uv; /* \x{100} to uvmax */
1950 d = c; /* eat endpoint chars */
1955 d -= 2; /* eat the first char and the - */
1956 min = (U8)*d; /* first char in range */
1957 max = (U8)d[1]; /* last char in range */
1964 "Invalid range \"%c-%c\" in transliteration operator",
1965 (char)min, (char)max);
1969 if (literal_endpoint == 2 &&
1970 ((isLOWER(min) && isLOWER(max)) ||
1971 (isUPPER(min) && isUPPER(max)))) {
1973 for (i = min; i <= max; i++)
1975 *d++ = NATIVE_TO_NEED(has_utf8,i);
1977 for (i = min; i <= max; i++)
1979 *d++ = NATIVE_TO_NEED(has_utf8,i);
1984 for (i = min; i <= max; i++)
1987 const U8 ch = (U8)NATIVE_TO_UTF(i);
1988 if (UNI_IS_INVARIANT(ch))
1991 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1992 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2001 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2003 *d++ = (char)UTF_TO_NATIVE(0xff);
2005 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2009 /* mark the range as done, and continue */
2013 literal_endpoint = 0;
2018 /* range begins (ignore - as first or last char) */
2019 else if (*s == '-' && s+1 < send && s != start) {
2021 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2028 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2038 literal_endpoint = 0;
2039 native_range = TRUE;
2044 /* if we get here, we're not doing a transliteration */
2046 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2047 except for the last char, which will be done separately. */
2048 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2050 while (s+1 < send && *s != ')')
2051 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2053 else if (s[2] == '{' /* This should match regcomp.c */
2054 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2057 char *regparse = s + (s[2] == '{' ? 3 : 4);
2060 while (count && (c = *regparse)) {
2061 if (c == '\\' && regparse[1])
2069 if (*regparse != ')')
2070 regparse--; /* Leave one char for continuation. */
2071 while (s < regparse)
2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2076 /* likewise skip #-initiated comments in //x patterns */
2077 else if (*s == '#' && PL_lex_inpat &&
2078 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2079 while (s+1 < send && *s != '\n')
2080 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2083 /* check for embedded arrays
2084 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2086 else if (*s == '@' && s[1]) {
2087 if (isALNUM_lazy_if(s+1,UTF))
2089 if (strchr(":'{$", s[1]))
2091 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2092 break; /* in regexp, neither @+ nor @- are interpolated */
2095 /* check for embedded scalars. only stop if we're sure it's a
2098 else if (*s == '$') {
2099 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2101 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2102 break; /* in regexp, $ might be tail anchor */
2105 /* End of else if chain - OP_TRANS rejoin rest */
2108 if (*s == '\\' && s+1 < send) {
2111 /* deprecate \1 in strings and substitution replacements */
2112 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2113 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2115 if (ckWARN(WARN_SYNTAX))
2116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2121 /* string-change backslash escapes */
2122 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2126 /* skip any other backslash escapes in a pattern */
2127 else if (PL_lex_inpat) {
2128 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2129 goto default_action;
2132 /* if we get here, it's either a quoted -, or a digit */
2135 /* quoted - in transliterations */
2137 if (PL_lex_inwhat == OP_TRANS) {
2144 if ((isALPHA(*s) || isDIGIT(*s)) &&
2146 Perl_warner(aTHX_ packWARN(WARN_MISC),
2147 "Unrecognized escape \\%c passed through",
2149 /* default action is to copy the quoted character */
2150 goto default_action;
2153 /* \132 indicates an octal constant */
2154 case '0': case '1': case '2': case '3':
2155 case '4': case '5': case '6': case '7':
2159 uv = grok_oct(s, &len, &flags, NULL);
2162 goto NUM_ESCAPE_INSERT;
2164 /* \x24 indicates a hex constant */
2168 char* const e = strchr(s, '}');
2169 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2170 PERL_SCAN_DISALLOW_PREFIX;
2175 yyerror("Missing right brace on \\x{}");
2179 uv = grok_hex(s, &len, &flags, NULL);
2185 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2186 uv = grok_hex(s, &len, &flags, NULL);
2192 /* Insert oct or hex escaped character.
2193 * There will always enough room in sv since such
2194 * escapes will be longer than any UTF-8 sequence
2195 * they can end up as. */
2197 /* We need to map to chars to ASCII before doing the tests
2200 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2201 if (!has_utf8 && uv > 255) {
2202 /* Might need to recode whatever we have
2203 * accumulated so far if it contains any
2206 * (Can't we keep track of that and avoid
2207 * this rescan? --jhi)
2211 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2212 if (!NATIVE_IS_INVARIANT(*c)) {
2217 const STRLEN offset = d - SvPVX_const(sv);
2219 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2223 while (src >= (const U8 *)SvPVX_const(sv)) {
2224 if (!NATIVE_IS_INVARIANT(*src)) {
2225 const U8 ch = NATIVE_TO_ASCII(*src);
2226 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2227 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2237 if (has_utf8 || uv > 255) {
2238 d = (char*)uvchr_to_utf8((U8*)d, uv);
2240 if (PL_lex_inwhat == OP_TRANS &&
2241 PL_sublex_info.sub_op) {
2242 PL_sublex_info.sub_op->op_private |=
2243 (PL_lex_repl ? OPpTRANS_FROM_UTF
2247 if (uv > 255 && !dorange)
2248 native_range = FALSE;
2260 /* \N{LATIN SMALL LETTER A} is a named character */
2264 char* e = strchr(s, '}');
2271 yyerror("Missing right brace on \\N{}");
2275 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2277 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2278 PERL_SCAN_DISALLOW_PREFIX;
2281 uv = grok_hex(s, &len, &flags, NULL);
2282 if ( e > s && len != (STRLEN)(e - s) ) {
2286 goto NUM_ESCAPE_INSERT;
2288 res = newSVpvn(s + 1, e - s - 1);
2289 type = newSVpvn(s - 2,e - s + 3);
2290 res = new_constant( NULL, 0, "charnames",
2291 res, NULL, SvPVX(type) );
2294 sv_utf8_upgrade(res);
2295 str = SvPV_const(res,len);
2296 #ifdef EBCDIC_NEVER_MIND
2297 /* charnames uses pack U and that has been
2298 * recently changed to do the below uni->native
2299 * mapping, so this would be redundant (and wrong,
2300 * the code point would be doubly converted).
2301 * But leave this in just in case the pack U change
2302 * gets revoked, but the semantics is still
2303 * desireable for charnames. --jhi */
2305 UV uv = utf8_to_uvchr((const U8*)str, 0);
2308 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2310 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2311 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2312 str = SvPV_const(res, len);
2316 if (!has_utf8 && SvUTF8(res)) {
2317 const char * const ostart = SvPVX_const(sv);
2318 SvCUR_set(sv, d - ostart);
2321 sv_utf8_upgrade(sv);
2322 /* this just broke our allocation above... */
2323 SvGROW(sv, (STRLEN)(send - start));
2324 d = SvPVX(sv) + SvCUR(sv);
2327 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2328 const char * const odest = SvPVX_const(sv);
2330 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2331 d = SvPVX(sv) + (d - odest);
2335 native_range = FALSE; /* \N{} is guessed to be Unicode */
2337 Copy(str, d, len, char);
2344 yyerror("Missing braces on \\N{}");
2347 /* \c is a control character */
2356 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2359 yyerror("Missing control char name in \\c");
2363 /* printf-style backslashes, formfeeds, newlines, etc */
2365 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2368 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2371 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2374 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2377 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2380 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2383 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2389 } /* end if (backslash) */
2396 /* If we started with encoded form, or already know we want it
2397 and then encode the next character */
2398 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2400 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2401 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2404 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2405 const STRLEN off = d - SvPVX_const(sv);
2406 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2408 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2411 if (uv > 255 && !dorange)
2412 native_range = FALSE;
2416 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2418 } /* while loop to process each character */
2420 /* terminate the string and set up the sv */
2422 SvCUR_set(sv, d - SvPVX_const(sv));
2423 if (SvCUR(sv) >= SvLEN(sv))
2424 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2427 if (PL_encoding && !has_utf8) {
2428 sv_recode_to_utf8(sv, PL_encoding);
2434 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2435 PL_sublex_info.sub_op->op_private |=
2436 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2440 /* shrink the sv if we allocated more than we used */
2441 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2442 SvPV_shrink_to_cur(sv);
2445 /* return the substring (via yylval) only if we parsed anything */
2446 if (s > PL_bufptr) {
2447 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2448 sv = new_constant(start, s - start,
2449 (const char *)(PL_lex_inpat ? "qr" : "q"),
2452 (( PL_lex_inwhat == OP_TRANS
2454 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2457 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2464 * Returns TRUE if there's more to the expression (e.g., a subscript),
2467 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2469 * ->[ and ->{ return TRUE
2470 * { and [ outside a pattern are always subscripts, so return TRUE
2471 * if we're outside a pattern and it's not { or [, then return FALSE
2472 * if we're in a pattern and the first char is a {
2473 * {4,5} (any digits around the comma) returns FALSE
2474 * if we're in a pattern and the first char is a [
2476 * [SOMETHING] has a funky algorithm to decide whether it's a
2477 * character class or not. It has to deal with things like
2478 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2479 * anything else returns TRUE
2482 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2485 S_intuit_more(pTHX_ register char *s)
2488 if (PL_lex_brackets)
2490 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2492 if (*s != '{' && *s != '[')
2497 /* In a pattern, so maybe we have {n,m}. */
2514 /* On the other hand, maybe we have a character class */
2517 if (*s == ']' || *s == '^')
2520 /* this is terrifying, and it works */
2521 int weight = 2; /* let's weigh the evidence */
2523 unsigned char un_char = 255, last_un_char;
2524 const char * const send = strchr(s,']');
2525 char tmpbuf[sizeof PL_tokenbuf * 4];
2527 if (!send) /* has to be an expression */
2530 Zero(seen,256,char);
2533 else if (isDIGIT(*s)) {
2535 if (isDIGIT(s[1]) && s[2] == ']')
2541 for (; s < send; s++) {
2542 last_un_char = un_char;
2543 un_char = (unsigned char)*s;
2548 weight -= seen[un_char] * 10;
2549 if (isALNUM_lazy_if(s+1,UTF)) {
2551 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2552 len = (int)strlen(tmpbuf);
2553 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2558 else if (*s == '$' && s[1] &&
2559 strchr("[#!%*<>()-=",s[1])) {
2560 if (/*{*/ strchr("])} =",s[2]))
2569 if (strchr("wds]",s[1]))
2571 else if (seen[(U8)'\''] || seen[(U8)'"'])
2573 else if (strchr("rnftbxcav",s[1]))
2575 else if (isDIGIT(s[1])) {
2577 while (s[1] && isDIGIT(s[1]))
2587 if (strchr("aA01! ",last_un_char))
2589 if (strchr("zZ79~",s[1]))
2591 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2592 weight -= 5; /* cope with negative subscript */
2595 if (!isALNUM(last_un_char)
2596 && !(last_un_char == '$' || last_un_char == '@'
2597 || last_un_char == '&')
2598 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2603 if (keyword(tmpbuf, d - tmpbuf, 0))
2606 if (un_char == last_un_char + 1)
2608 weight -= seen[un_char];
2613 if (weight >= 0) /* probably a character class */
2623 * Does all the checking to disambiguate
2625 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2626 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2628 * First argument is the stuff after the first token, e.g. "bar".
2630 * Not a method if bar is a filehandle.
2631 * Not a method if foo is a subroutine prototyped to take a filehandle.
2632 * Not a method if it's really "Foo $bar"
2633 * Method if it's "foo $bar"
2634 * Not a method if it's really "print foo $bar"
2635 * Method if it's really "foo package::" (interpreted as package->foo)
2636 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2637 * Not a method if bar is a filehandle or package, but is quoted with
2642 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2645 char *s = start + (*start == '$');
2646 char tmpbuf[sizeof PL_tokenbuf];
2654 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2658 const char *proto = SvPVX_const(cv);
2669 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2670 /* start is the beginning of the possible filehandle/object,
2671 * and s is the end of it
2672 * tmpbuf is a copy of it
2675 if (*start == '$') {
2676 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2679 len = start - SvPVX(PL_linestr);
2683 start = SvPVX(PL_linestr) + len;
2687 return *s == '(' ? FUNCMETH : METHOD;
2689 if (!keyword(tmpbuf, len, 0)) {
2690 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2694 soff = s - SvPVX(PL_linestr);
2698 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2699 if (indirgv && GvCVu(indirgv))
2701 /* filehandle or package name makes it a method */
2702 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2704 soff = s - SvPVX(PL_linestr);
2707 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2708 return 0; /* no assumptions -- "=>" quotes bearword */
2710 start_force(PL_curforce);
2711 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2712 newSVpvn(tmpbuf,len));
2713 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2715 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2720 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2722 return *s == '(' ? FUNCMETH : METHOD;
2730 * Return a string of Perl code to load the debugger. If PERL5DB
2731 * is set, it will return the contents of that, otherwise a
2732 * compile-time require of perl5db.pl.
2740 const char * const pdb = PerlEnv_getenv("PERL5DB");
2744 SETERRNO(0,SS_NORMAL);
2745 return "BEGIN { require 'perl5db.pl' }";
2751 /* Encoded script support. filter_add() effectively inserts a
2752 * 'pre-processing' function into the current source input stream.
2753 * Note that the filter function only applies to the current source file
2754 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2756 * The datasv parameter (which may be NULL) can be used to pass
2757 * private data to this instance of the filter. The filter function
2758 * can recover the SV using the FILTER_DATA macro and use it to
2759 * store private buffers and state information.
2761 * The supplied datasv parameter is upgraded to a PVIO type
2762 * and the IoDIRP/IoANY field is used to store the function pointer,
2763 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2764 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2765 * private use must be set using malloc'd pointers.
2769 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2775 if (!PL_rsfp_filters)
2776 PL_rsfp_filters = newAV();
2779 SvUPGRADE(datasv, SVt_PVIO);
2780 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2781 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2782 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2783 FPTR2DPTR(void *, IoANY(datasv)),
2784 SvPV_nolen(datasv)));
2785 av_unshift(PL_rsfp_filters, 1);
2786 av_store(PL_rsfp_filters, 0, datasv) ;
2791 /* Delete most recently added instance of this filter function. */
2793 Perl_filter_del(pTHX_ filter_t funcp)
2799 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2800 FPTR2DPTR(void*, funcp)));
2802 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2804 /* if filter is on top of stack (usual case) just pop it off */
2805 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2806 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2807 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2808 IoANY(datasv) = (void *)NULL;
2809 sv_free(av_pop(PL_rsfp_filters));
2813 /* we need to search for the correct entry and clear it */
2814 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2818 /* Invoke the idxth filter function for the current rsfp. */
2819 /* maxlen 0 = read one text line */
2821 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2826 /* This API is bad. It should have been using unsigned int for maxlen.
2827 Not sure if we want to change the API, but if not we should sanity
2828 check the value here. */
2829 const unsigned int correct_length
2838 if (!PL_rsfp_filters)
2840 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2841 /* Provide a default input filter to make life easy. */
2842 /* Note that we append to the line. This is handy. */
2843 DEBUG_P(PerlIO_printf(Perl_debug_log,
2844 "filter_read %d: from rsfp\n", idx));
2845 if (correct_length) {
2848 const int old_len = SvCUR(buf_sv);
2850 /* ensure buf_sv is large enough */
2851 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2852 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2853 correct_length)) <= 0) {
2854 if (PerlIO_error(PL_rsfp))
2855 return -1; /* error */
2857 return 0 ; /* end of file */
2859 SvCUR_set(buf_sv, old_len + len) ;
2862 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2863 if (PerlIO_error(PL_rsfp))
2864 return -1; /* error */
2866 return 0 ; /* end of file */
2869 return SvCUR(buf_sv);
2871 /* Skip this filter slot if filter has been deleted */
2872 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2873 DEBUG_P(PerlIO_printf(Perl_debug_log,
2874 "filter_read %d: skipped (filter deleted)\n",
2876 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2878 /* Get function pointer hidden within datasv */
2879 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: via function %p (%s)\n",
2882 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2883 /* Call function. The function is expected to */
2884 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2885 /* Return: <0:error, =0:eof, >0:not eof */
2886 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2890 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2893 #ifdef PERL_CR_FILTER
2894 if (!PL_rsfp_filters) {
2895 filter_add(S_cr_textfilter,NULL);
2898 if (PL_rsfp_filters) {
2900 SvCUR_set(sv, 0); /* start with empty line */
2901 if (FILTER_READ(0, sv, 0) > 0)
2902 return ( SvPVX(sv) ) ;
2907 return (sv_gets(sv, fp, append));
2911 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2916 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2920 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2921 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2923 return GvHV(gv); /* Foo:: */
2926 /* use constant CLASS => 'MyClass' */
2927 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2928 if (gv && GvCV(gv)) {
2929 SV * const sv = cv_const_sv(GvCV(gv));
2931 pkgname = SvPV_nolen_const(sv);
2934 return gv_stashpv(pkgname, FALSE);
2938 * S_readpipe_override
2939 * Check whether readpipe() is overriden, and generates the appropriate
2940 * optree, provided sublex_start() is called afterwards.
2943 S_readpipe_override(pTHX)
2946 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2947 yylval.ival = OP_BACKTICK;
2949 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2951 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2952 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2953 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2955 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2956 append_elem(OP_LIST,
2957 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2958 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2968 * The intent of this yylex wrapper is to minimize the changes to the
2969 * tokener when we aren't interested in collecting madprops. It remains
2970 * to be seen how successful this strategy will be...
2977 char *s = PL_bufptr;
2979 /* make sure PL_thiswhite is initialized */
2983 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2984 if (PL_pending_ident)
2985 return S_pending_ident(aTHX);
2987 /* previous token ate up our whitespace? */
2988 if (!PL_lasttoke && PL_nextwhite) {
2989 PL_thiswhite = PL_nextwhite;
2993 /* isolate the token, and figure out where it is without whitespace */
2994 PL_realtokenstart = -1;
2998 assert(PL_curforce < 0);
3000 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3001 if (!PL_thistoken) {
3002 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3003 PL_thistoken = newSVpvs("");
3005 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3006 PL_thistoken = newSVpvn(tstart, s - tstart);
3009 if (PL_thismad) /* install head */
3010 CURMAD('X', PL_thistoken);
3013 /* last whitespace of a sublex? */
3014 if (optype == ')' && PL_endwhite) {
3015 CURMAD('X', PL_endwhite);
3020 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3021 if (!PL_thiswhite && !PL_endwhite && !optype) {
3022 sv_free(PL_thistoken);
3027 /* put off final whitespace till peg */
3028 if (optype == ';' && !PL_rsfp) {
3029 PL_nextwhite = PL_thiswhite;
3032 else if (PL_thisopen) {
3033 CURMAD('q', PL_thisopen);
3035 sv_free(PL_thistoken);
3039 /* Store actual token text as madprop X */
3040 CURMAD('X', PL_thistoken);
3044 /* add preceding whitespace as madprop _ */
3045 CURMAD('_', PL_thiswhite);
3049 /* add quoted material as madprop = */
3050 CURMAD('=', PL_thisstuff);
3054 /* add terminating quote as madprop Q */
3055 CURMAD('Q', PL_thisclose);
3059 /* special processing based on optype */
3063 /* opval doesn't need a TOKEN since it can already store mp */
3074 append_madprops(PL_thismad, yylval.opval, 0);
3082 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3091 /* remember any fake bracket that lexer is about to discard */
3092 if (PL_lex_brackets == 1 &&
3093 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3096 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3099 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3100 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3103 break; /* don't bother looking for trailing comment */
3112 /* attach a trailing comment to its statement instead of next token */
3116 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3118 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3120 if (*s == '\n' || *s == '#') {
3121 while (s < PL_bufend && *s != '\n')
3125 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3126 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3143 /* Create new token struct. Note: opvals return early above. */
3144 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3151 S_tokenize_use(pTHX_ int is_use, char *s) {
3153 if (PL_expect != XSTATE)
3154 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3155 is_use ? "use" : "no"));
3157 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3158 s = force_version(s, TRUE);
3159 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3160 start_force(PL_curforce);
3161 NEXTVAL_NEXTTOKE.opval = NULL;
3164 else if (*s == 'v') {
3165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3166 s = force_version(s, FALSE);
3170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3171 s = force_version(s, FALSE);
3173 yylval.ival = is_use;
3177 static const char* const exp_name[] =
3178 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3179 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3186 Works out what to call the token just pulled out of the input
3187 stream. The yacc parser takes care of taking the ops we return and
3188 stitching them into a tree.
3194 if read an identifier
3195 if we're in a my declaration
3196 croak if they tried to say my($foo::bar)
3197 build the ops for a my() declaration
3198 if it's an access to a my() variable
3199 are we in a sort block?
3200 croak if my($a); $a <=> $b
3201 build ops for access to a my() variable
3202 if in a dq string, and they've said @foo and we can't find @foo
3204 build ops for a bareword
3205 if we already built the token before, use it.
3210 #pragma segment Perl_yylex
3216 register char *s = PL_bufptr;
3221 /* orig_keyword, gvp, and gv are initialized here because
3222 * jump to the label just_a_word_zero can bypass their
3223 * initialization later. */
3224 I32 orig_keyword = 0;
3229 SV* tmp = newSVpvs("");
3230 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3231 (IV)CopLINE(PL_curcop),
3232 lex_state_names[PL_lex_state],
3233 exp_name[PL_expect],
3234 pv_display(tmp, s, strlen(s), 0, 60));
3237 /* check if there's an identifier for us to look at */
3238 if (PL_pending_ident)
3239 return REPORT(S_pending_ident(aTHX));
3241 /* no identifier pending identification */
3243 switch (PL_lex_state) {
3245 case LEX_NORMAL: /* Some compilers will produce faster */
3246 case LEX_INTERPNORMAL: /* code if we comment these out. */
3250 /* when we've already built the next token, just pull it out of the queue */
3254 yylval = PL_nexttoke[PL_lasttoke].next_val;
3256 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3257 PL_nexttoke[PL_lasttoke].next_mad = 0;
3258 if (PL_thismad && PL_thismad->mad_key == '_') {
3259 PL_thiswhite = (SV*)PL_thismad->mad_val;
3260 PL_thismad->mad_val = 0;
3261 mad_free(PL_thismad);
3266 PL_lex_state = PL_lex_defer;
3267 PL_expect = PL_lex_expect;
3268 PL_lex_defer = LEX_NORMAL;
3269 if (!PL_nexttoke[PL_lasttoke].next_type)
3274 yylval = PL_nextval[PL_nexttoke];
3276 PL_lex_state = PL_lex_defer;
3277 PL_expect = PL_lex_expect;
3278 PL_lex_defer = LEX_NORMAL;
3282 /* FIXME - can these be merged? */
3283 return(PL_nexttoke[PL_lasttoke].next_type);
3285 return REPORT(PL_nexttype[PL_nexttoke]);
3288 /* interpolated case modifiers like \L \U, including \Q and \E.
3289 when we get here, PL_bufptr is at the \
3291 case LEX_INTERPCASEMOD:
3293 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3294 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3296 /* handle \E or end of string */
3297 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3299 if (PL_lex_casemods) {
3300 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3301 PL_lex_casestack[PL_lex_casemods] = '\0';
3303 if (PL_bufptr != PL_bufend
3304 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3306 PL_lex_state = LEX_INTERPCONCAT;
3309 PL_thistoken = newSVpvs("\\E");
3315 while (PL_bufptr != PL_bufend &&
3316 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3318 PL_thiswhite = newSVpvs("");
3319 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3323 if (PL_bufptr != PL_bufend)
3326 PL_lex_state = LEX_INTERPCONCAT;
3330 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3331 "### Saw case modifier\n"); });
3333 if (s[1] == '\\' && s[2] == 'E') {
3336 PL_thiswhite = newSVpvs("");
3337 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3340 PL_lex_state = LEX_INTERPCONCAT;
3345 if (!PL_madskills) /* when just compiling don't need correct */
3346 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3347 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3348 if ((*s == 'L' || *s == 'U') &&
3349 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3350 PL_lex_casestack[--PL_lex_casemods] = '\0';
3353 if (PL_lex_casemods > 10)
3354 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3355 PL_lex_casestack[PL_lex_casemods++] = *s;
3356 PL_lex_casestack[PL_lex_casemods] = '\0';
3357 PL_lex_state = LEX_INTERPCONCAT;
3358 start_force(PL_curforce);
3359 NEXTVAL_NEXTTOKE.ival = 0;
3361 start_force(PL_curforce);
3363 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3365 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3367 NEXTVAL_NEXTTOKE.ival = OP_LC;
3369 NEXTVAL_NEXTTOKE.ival = OP_UC;
3371 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3373 Perl_croak(aTHX_ "panic: yylex");
3375 SV* const tmpsv = newSVpvs("");
3376 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3382 if (PL_lex_starts) {
3388 sv_free(PL_thistoken);
3389 PL_thistoken = newSVpvs("");
3392 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3393 if (PL_lex_casemods == 1 && PL_lex_inpat)
3402 case LEX_INTERPPUSH:
3403 return REPORT(sublex_push());
3405 case LEX_INTERPSTART:
3406 if (PL_bufptr == PL_bufend)
3407 return REPORT(sublex_done());
3408 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3409 "### Interpolated variable\n"); });
3411 PL_lex_dojoin = (*PL_bufptr == '@');
3412 PL_lex_state = LEX_INTERPNORMAL;
3413 if (PL_lex_dojoin) {
3414 start_force(PL_curforce);
3415 NEXTVAL_NEXTTOKE.ival = 0;
3417 start_force(PL_curforce);
3418 force_ident("\"", '$');
3419 start_force(PL_curforce);
3420 NEXTVAL_NEXTTOKE.ival = 0;
3422 start_force(PL_curforce);
3423 NEXTVAL_NEXTTOKE.ival = 0;
3425 start_force(PL_curforce);
3426 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3429 if (PL_lex_starts++) {
3434 sv_free(PL_thistoken);
3435 PL_thistoken = newSVpvs("");
3438 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3439 if (!PL_lex_casemods && PL_lex_inpat)
3446 case LEX_INTERPENDMAYBE:
3447 if (intuit_more(PL_bufptr)) {
3448 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3454 if (PL_lex_dojoin) {
3455 PL_lex_dojoin = FALSE;
3456 PL_lex_state = LEX_INTERPCONCAT;
3460 sv_free(PL_thistoken);
3461 PL_thistoken = newSVpvs("");
3466 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3467 && SvEVALED(PL_lex_repl))
3469 if (PL_bufptr != PL_bufend)
3470 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3474 case LEX_INTERPCONCAT:
3476 if (PL_lex_brackets)
3477 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3479 if (PL_bufptr == PL_bufend)
3480 return REPORT(sublex_done());
3482 if (SvIVX(PL_linestr) == '\'') {
3483 SV *sv = newSVsv(PL_linestr);
3486 else if ( PL_hints & HINT_NEW_RE )
3487 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3488 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3492 s = scan_const(PL_bufptr);
3494 PL_lex_state = LEX_INTERPCASEMOD;
3496 PL_lex_state = LEX_INTERPSTART;
3499 if (s != PL_bufptr) {
3500 start_force(PL_curforce);
3502 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3504 NEXTVAL_NEXTTOKE = yylval;
3507 if (PL_lex_starts++) {
3511 sv_free(PL_thistoken);
3512 PL_thistoken = newSVpvs("");
3515 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3516 if (!PL_lex_casemods && PL_lex_inpat)
3529 PL_lex_state = LEX_NORMAL;
3530 s = scan_formline(PL_bufptr);
3531 if (!PL_lex_formbrack)
3537 PL_oldoldbufptr = PL_oldbufptr;
3543 sv_free(PL_thistoken);
3546 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3550 if (isIDFIRST_lazy_if(s,UTF))
3552 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3555 goto fake_eof; /* emulate EOF on ^D or ^Z */
3564 if (PL_lex_brackets) {
3565 yyerror((const char *)
3567 ? "Format not terminated"
3568 : "Missing right curly or square bracket"));
3570 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3571 "### Tokener got EOF\n");
3575 if (s++ < PL_bufend)
3576 goto retry; /* ignore stray nulls */
3579 if (!PL_in_eval && !PL_preambled) {
3580 PL_preambled = TRUE;
3585 sv_setpv(PL_linestr,incl_perldb());
3586 if (SvCUR(PL_linestr))
3587 sv_catpvs(PL_linestr,";");
3589 while(AvFILLp(PL_preambleav) >= 0) {
3590 SV *tmpsv = av_shift(PL_preambleav);
3591 sv_catsv(PL_linestr, tmpsv);
3592 sv_catpvs(PL_linestr, ";");
3595 sv_free((SV*)PL_preambleav);
3596 PL_preambleav = NULL;
3598 if (PL_minus_n || PL_minus_p) {
3599 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3601 sv_catpvs(PL_linestr,"chomp;");
3604 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3605 || *PL_splitstr == '"')
3606 && strchr(PL_splitstr + 1, *PL_splitstr))
3607 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3609 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3610 bytes can be used as quoting characters. :-) */
3611 const char *splits = PL_splitstr;
3612 sv_catpvs(PL_linestr, "our @F=split(q\0");
3615 if (*splits == '\\')
3616 sv_catpvn(PL_linestr, splits, 1);
3617 sv_catpvn(PL_linestr, splits, 1);
3618 } while (*splits++);
3619 /* This loop will embed the trailing NUL of
3620 PL_linestr as the last thing it does before
3622 sv_catpvs(PL_linestr, ");");
3626 sv_catpvs(PL_linestr,"our @F=split(' ');");
3630 sv_catpvs(PL_linestr,"use feature ':5.10';");
3631 sv_catpvs(PL_linestr, "\n");
3632 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3633 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3634 PL_last_lop = PL_last_uni = NULL;
3635 if (PERLDB_LINE && PL_curstash != PL_debstash)
3636 update_debugger_info_sv(PL_linestr);
3640 bof = PL_rsfp ? TRUE : FALSE;
3641 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3644 PL_realtokenstart = -1;
3647 if (PL_preprocess && !PL_in_eval)
3648 (void)PerlProc_pclose(PL_rsfp);
3649 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3650 PerlIO_clearerr(PL_rsfp);
3652 (void)PerlIO_close(PL_rsfp);
3654 PL_doextract = FALSE;
3656 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3661 sv_setpv(PL_linestr,
3664 ? ";}continue{print;}" : ";}"));
3665 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3666 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3667 PL_last_lop = PL_last_uni = NULL;
3668 PL_minus_n = PL_minus_p = 0;
3671 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3672 PL_last_lop = PL_last_uni = NULL;
3673 sv_setpvn(PL_linestr,"",0);
3674 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3676 /* If it looks like the start of a BOM or raw UTF-16,
3677 * check if it in fact is. */
3683 #ifdef PERLIO_IS_STDIO
3684 # ifdef __GNU_LIBRARY__
3685 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3686 # define FTELL_FOR_PIPE_IS_BROKEN
3690 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3691 # define FTELL_FOR_PIPE_IS_BROKEN
3696 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3697 /* This loses the possibility to detect the bof
3698 * situation on perl -P when the libc5 is being used.
3699 * Workaround? Maybe attach some extra state to PL_rsfp?
3702 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3704 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3708 s = swallow_bom((U8*)s);
3712 /* Incest with pod. */
3715 sv_catsv(PL_thiswhite, PL_linestr);
3717 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3718 sv_setpvn(PL_linestr, "", 0);
3719 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3720 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3721 PL_last_lop = PL_last_uni = NULL;
3722 PL_doextract = FALSE;
3726 } while (PL_doextract);
3727 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3728 if (PERLDB_LINE && PL_curstash != PL_debstash)
3729 update_debugger_info_sv(PL_linestr);
3730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3731 PL_last_lop = PL_last_uni = NULL;
3732 if (CopLINE(PL_curcop) == 1) {
3733 while (s < PL_bufend && isSPACE(*s))
3735 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3739 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3743 if (*s == '#' && *(s+1) == '!')
3745 #ifdef ALTERNATE_SHEBANG
3747 static char const as[] = ALTERNATE_SHEBANG;
3748 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3749 d = s + (sizeof(as) - 1);
3751 #endif /* ALTERNATE_SHEBANG */
3760 while (*d && !isSPACE(*d))
3764 #ifdef ARG_ZERO_IS_SCRIPT
3765 if (ipathend > ipath) {
3767 * HP-UX (at least) sets argv[0] to the script name,
3768 * which makes $^X incorrect. And Digital UNIX and Linux,
3769 * at least, set argv[0] to the basename of the Perl
3770 * interpreter. So, having found "#!", we'll set it right.
3772 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3774 assert(SvPOK(x) || SvGMAGICAL(x));
3775 if (sv_eq(x, CopFILESV(PL_curcop))) {
3776 sv_setpvn(x, ipath, ipathend - ipath);
3782 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3783 const char * const lstart = SvPV_const(x,llen);
3785 bstart += blen - llen;
3786 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3787 sv_setpvn(x, ipath, ipathend - ipath);
3792 TAINT_NOT; /* $^X is always tainted, but that's OK */
3794 #endif /* ARG_ZERO_IS_SCRIPT */
3799 d = instr(s,"perl -");
3801 d = instr(s,"perl");
3803 /* avoid getting into infinite loops when shebang
3804 * line contains "Perl" rather than "perl" */
3806 for (d = ipathend-4; d >= ipath; --d) {
3807 if ((*d == 'p' || *d == 'P')
3808 && !ibcmp(d, "perl", 4))
3818 #ifdef ALTERNATE_SHEBANG
3820 * If the ALTERNATE_SHEBANG on this system starts with a
3821 * character that can be part of a Perl expression, then if
3822 * we see it but not "perl", we're probably looking at the
3823 * start of Perl code, not a request to hand off to some
3824 * other interpreter. Similarly, if "perl" is there, but
3825 * not in the first 'word' of the line, we assume the line
3826 * contains the start of the Perl program.
3828 if (d && *s != '#') {
3829 const char *c = ipath;
3830 while (*c && !strchr("; \t\r\n\f\v#", *c))
3833 d = NULL; /* "perl" not in first word; ignore */
3835 *s = '#'; /* Don't try to parse shebang line */
3837 #endif /* ALTERNATE_SHEBANG */
3838 #ifndef MACOS_TRADITIONAL
3843 !instr(s,"indir") &&
3844 instr(PL_origargv[0],"perl"))
3851 while (s < PL_bufend && isSPACE(*s))
3853 if (s < PL_bufend) {
3854 Newxz(newargv,PL_origargc+3,char*);
3856 while (s < PL_bufend && !isSPACE(*s))
3859 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3862 newargv = PL_origargv;
3865 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3867 Perl_croak(aTHX_ "Can't exec %s", ipath);
3871 while (*d && !isSPACE(*d))
3873 while (SPACE_OR_TAB(*d))
3877 const bool switches_done = PL_doswitches;
3878 const U32 oldpdb = PL_perldb;
3879 const bool oldn = PL_minus_n;
3880 const bool oldp = PL_minus_p;
3883 if (*d == 'M' || *d == 'm' || *d == 'C') {
3884 const char * const m = d;
3885 while (*d && !isSPACE(*d))
3887 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3890 d = moreswitches(d);
3892 if (PL_doswitches && !switches_done) {
3893 int argc = PL_origargc;
3894 char **argv = PL_origargv;
3897 } while (argc && argv[0][0] == '-' && argv[0][1]);
3898 init_argv_symbols(argc,argv);
3900 if ((PERLDB_LINE && !oldpdb) ||
3901 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3902 /* if we have already added "LINE: while (<>) {",
3903 we must not do it again */
3905 sv_setpvn(PL_linestr, "", 0);
3906 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3907 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3908 PL_last_lop = PL_last_uni = NULL;
3909 PL_preambled = FALSE;
3911 (void)gv_fetchfile(PL_origfilename);
3918 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3920 PL_lex_state = LEX_FORMLINE;
3925 #ifdef PERL_STRICT_CR
3926 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3928 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3930 case ' ': case '\t': case '\f': case 013:
3931 #ifdef MACOS_TRADITIONAL
3935 PL_realtokenstart = -1;
3944 PL_realtokenstart = -1;
3948 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3949 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3950 /* handle eval qq[#line 1 "foo"\n ...] */
3951 CopLINE_dec(PL_curcop);
3954 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3956 if (!PL_in_eval || PL_rsfp)
3961 while (d < PL_bufend && *d != '\n')
3965 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3966 Perl_croak(aTHX_ "panic: input overflow");
3969 PL_thiswhite = newSVpvn(s, d - s);
3974 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3976 PL_lex_state = LEX_FORMLINE;
3982 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3983 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3986 TOKEN(PEG); /* make sure any #! line is accessible */
3991 /* if (PL_madskills && PL_lex_formbrack) { */
3993 while (d < PL_bufend && *d != '\n')
3997 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3998 Perl_croak(aTHX_ "panic: input overflow");
3999 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4001 PL_thiswhite = newSVpvs("");
4002 if (CopLINE(PL_curcop) == 1) {
4003 sv_setpvn(PL_thiswhite, "", 0);
4006 sv_catpvn(PL_thiswhite, s, d - s);
4020 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4028 while (s < PL_bufend && SPACE_OR_TAB(*s))
4031 if (strnEQ(s,"=>",2)) {
4032 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4033 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4034 OPERATOR('-'); /* unary minus */
4036 PL_last_uni = PL_oldbufptr;
4038 case 'r': ftst = OP_FTEREAD; break;
4039 case 'w': ftst = OP_FTEWRITE; break;
4040 case 'x': ftst = OP_FTEEXEC; break;
4041 case 'o': ftst = OP_FTEOWNED; break;
4042 case 'R': ftst = OP_FTRREAD; break;
4043 case 'W': ftst = OP_FTRWRITE; break;
4044 case 'X': ftst = OP_FTREXEC; break;
4045 case 'O': ftst = OP_FTROWNED; break;
4046 case 'e': ftst = OP_FTIS; break;
4047 case 'z': ftst = OP_FTZERO; break;
4048 case 's': ftst = OP_FTSIZE; break;
4049 case 'f': ftst = OP_FTFILE; break;
4050 case 'd': ftst = OP_FTDIR; break;
4051 case 'l': ftst = OP_FTLINK; break;
4052 case 'p': ftst = OP_FTPIPE; break;
4053 case 'S': ftst = OP_FTSOCK; break;
4054 case 'u': ftst = OP_FTSUID; break;
4055 case 'g': ftst = OP_FTSGID; break;
4056 case 'k': ftst = OP_FTSVTX; break;
4057 case 'b': ftst = OP_FTBLK; break;
4058 case 'c': ftst = OP_FTCHR; break;
4059 case 't': ftst = OP_FTTTY; break;
4060 case 'T': ftst = OP_FTTEXT; break;
4061 case 'B': ftst = OP_FTBINARY; break;
4062 case 'M': case 'A': case 'C':
4063 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4065 case 'M': ftst = OP_FTMTIME; break;
4066 case 'A': ftst = OP_FTATIME; break;
4067 case 'C': ftst = OP_FTCTIME; break;
4075 PL_last_lop_op = (OPCODE)ftst;
4076 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4077 "### Saw file test %c\n", (int)tmp);
4082 /* Assume it was a minus followed by a one-letter named
4083 * subroutine call (or a -bareword), then. */
4084 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4085 "### '-%c' looked like a file test but was not\n",
4092 const char tmp = *s++;
4095 if (PL_expect == XOPERATOR)
4100 else if (*s == '>') {
4103 if (isIDFIRST_lazy_if(s,UTF)) {
4104 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4112 if (PL_expect == XOPERATOR)
4115 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4117 OPERATOR('-'); /* unary minus */
4123 const char tmp = *s++;
4126 if (PL_expect == XOPERATOR)
4131 if (PL_expect == XOPERATOR)
4134 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4141 if (PL_expect != XOPERATOR) {
4142 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4143 PL_expect = XOPERATOR;
4144 force_ident(PL_tokenbuf, '*');
4157 if (PL_expect == XOPERATOR) {
4161 PL_tokenbuf[0] = '%';
4162 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4163 if (!PL_tokenbuf[1]) {
4166 PL_pending_ident = '%';
4177 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4178 && FEATURE_IS_ENABLED("~~"))
4185 const char tmp = *s++;
4191 goto just_a_word_zero_gv;
4194 switch (PL_expect) {
4200 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4202 PL_bufptr = s; /* update in case we back off */
4208 PL_expect = XTERMBLOCK;
4211 stuffstart = s - SvPVX(PL_linestr) - 1;
4215 while (isIDFIRST_lazy_if(s,UTF)) {
4218 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4219 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4220 if (tmp < 0) tmp = -tmp;
4235 sv = newSVpvn(s, len);
4237 d = scan_str(d,TRUE,TRUE);
4239 /* MUST advance bufptr here to avoid bogus
4240 "at end of line" context messages from yyerror().
4242 PL_bufptr = s + len;
4243 yyerror("Unterminated attribute parameter in attribute list");
4247 return REPORT(0); /* EOF indicator */
4251 sv_catsv(sv, PL_lex_stuff);
4252 attrs = append_elem(OP_LIST, attrs,
4253 newSVOP(OP_CONST, 0, sv));
4254 SvREFCNT_dec(PL_lex_stuff);
4255 PL_lex_stuff = NULL;
4258 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4260 if (PL_in_my == KEY_our) {
4262 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4264 /* skip to avoid loading attributes.pm */
4266 deprecate(":unique");
4269 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4272 /* NOTE: any CV attrs applied here need to be part of
4273 the CVf_BUILTIN_ATTRS define in cv.h! */
4274 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4276 CvLVALUE_on(PL_compcv);
4278 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4280 CvLOCKED_on(PL_compcv);
4282 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4284 CvMETHOD_on(PL_compcv);
4286 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4288 CvASSERTION_on(PL_compcv);
4290 /* After we've set the flags, it could be argued that
4291 we don't need to do the attributes.pm-based setting
4292 process, and shouldn't bother appending recognized
4293 flags. To experiment with that, uncomment the
4294 following "else". (Note that's already been
4295 uncommented. That keeps the above-applied built-in
4296 attributes from being intercepted (and possibly
4297 rejected) by a package's attribute routines, but is
4298 justified by the performance win for the common case
4299 of applying only built-in attributes.) */
4301 attrs = append_elem(OP_LIST, attrs,
4302 newSVOP(OP_CONST, 0,
4306 if (*s == ':' && s[1] != ':')
4309 break; /* require real whitespace or :'s */
4310 /* XXX losing whitespace on sequential attributes here */
4314 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4315 if (*s != ';' && *s != '}' && *s != tmp
4316 && (tmp != '=' || *s != ')')) {
4317 const char q = ((*s == '\'') ? '"' : '\'');
4318 /* If here for an expression, and parsed no attrs, back
4320 if (tmp == '=' && !attrs) {
4324 /* MUST advance bufptr here to avoid bogus "at end of line"
4325 context messages from yyerror().
4328 yyerror( (const char *)
4330 ? Perl_form(aTHX_ "Invalid separator character "
4331 "%c%c%c in attribute list", q, *s, q)
4332 : "Unterminated attribute list" ) );
4340 start_force(PL_curforce);
4341 NEXTVAL_NEXTTOKE.opval = attrs;
4342 CURMAD('_', PL_nextwhite);
4347 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4348 (s - SvPVX(PL_linestr)) - stuffstart);
4356 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4357 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4365 const char tmp = *s++;
4370 const char tmp = *s++;
4378 if (PL_lex_brackets <= 0)
4379 yyerror("Unmatched right square bracket");
4382 if (PL_lex_state == LEX_INTERPNORMAL) {
4383 if (PL_lex_brackets == 0) {
4384 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4385 PL_lex_state = LEX_INTERPEND;
4392 if (PL_lex_brackets > 100) {
4393 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4395 switch (PL_expect) {
4397 if (PL_lex_formbrack) {
4401 if (PL_oldoldbufptr == PL_last_lop)
4402 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4405 OPERATOR(HASHBRACK);
4407 while (s < PL_bufend && SPACE_OR_TAB(*s))
4410 PL_tokenbuf[0] = '\0';
4411 if (d < PL_bufend && *d == '-') {
4412 PL_tokenbuf[0] = '-';
4414 while (d < PL_bufend && SPACE_OR_TAB(*d))
4417 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4418 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4420 while (d < PL_bufend && SPACE_OR_TAB(*d))
4423 const char minus = (PL_tokenbuf[0] == '-');
4424 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4432 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;