This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
remove unused variables and assignments
[perl5.git] / toke.c
... / ...
CommitLineData
1/* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
39#include "EXTERN.h"
40#define PERL_IN_TOKE_C
41#include "perl.h"
42#include "dquote_static.c"
43
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
47#define pl_yylval (PL_parser->yylval)
48
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
72#define PL_linestr (PL_parser->linestr)
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
83#define PL_lex_state (PL_parser->lex_state)
84#define PL_rsfp (PL_parser->rsfp)
85#define PL_rsfp_filters (PL_parser->rsfp_filters)
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
88#define PL_tokenbuf (PL_parser->tokenbuf)
89#define PL_multi_end (PL_parser->multi_end)
90#define PL_error_count (PL_parser->error_count)
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
112#endif
113
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
116static int
117S_pending_ident(pTHX);
118
119static const char ident_too_long[] = "Identifier too long";
120
121#ifdef PERL_MAD
122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
124#else
125# define CURMAD(slot,sv)
126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
127#endif
128
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
132
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
135#else
136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
137#endif
138
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
142/* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
147
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
171
172#ifdef DEBUGGING
173static const char* const lex_state_names[] = {
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
188#ifdef ff_next
189#undef ff_next
190#endif
191
192#include "keywords.h"
193
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
196#ifdef CLINE
197#undef CLINE
198#endif
199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
200
201#ifdef PERL_MAD
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
213/*
214 * Convenience functions to return different tokens and prime the
215 * lexer for the next token. They all take an argument.
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
227 * FUN0OP : zero-argument function, with its op created in this file
228 * FUN1 : not used, except for not, which isn't a UNIOP
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
233 * PMop : pattern-matching operator
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
237 * Rop : relational operator <= != gt
238 *
239 * Also see LOP and lop() below.
240 */
241
242#ifdef DEBUGGING /* Serve -DT. */
243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
244#else
245# define REPORT(retval) (retval)
246#endif
247
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
269
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
274 */
275#define UNI2(f,x) { \
276 pl_yylval.ival = f; \
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
283 s = PEEKSPACE(s); \
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
288
289#define UNIBRACK(f) { \
290 pl_yylval.ival = f; \
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
295 s = PEEKSPACE(s); \
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
298
299/* grandfather return to old style */
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
309
310#ifdef DEBUGGING
311
312/* how to interpret the pl_yylval associated with the token */
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
327{
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
397 { 0, TOKENTYPE_NONE, NULL }
398};
399
400/* dump the returned token in rv, plus any optional arg in pl_yylval */
401
402STATIC int
403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
404{
405 dVAR;
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
409 if (DEBUG_T_TEST) {
410 const char *name = NULL;
411 enum token_type type = TOKENTYPE_NONE;
412 const struct debug_tokens *p;
413 SV* const report = newSVpvs("<== ");
414
415 for (p = debug_tokens; p->token; p++) {
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
423 Perl_sv_catpv(aTHX_ report, name);
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
427 sv_catpvs(report, "EOF");
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
439 PL_op_name[lvalp->ival]);
440 break;
441 case TOKENTYPE_PVAL:
442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
443 break;
444 case TOKENTYPE_OPVAL:
445 if (lvalp->opval) {
446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
449 Perl_sv_catpvf(aTHX_ report, " %s",
450 SvPEEK(cSVOPx_sv(lvalp->opval)));
451 }
452
453 }
454 else
455 sv_catpvs(report, "(opval=null)");
456 break;
457 }
458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
459 };
460 return (int)rv;
461}
462
463
464/* print the buffer with suitable escapes */
465
466STATIC void
467S_printbuf(pTHX_ const char *const fmt, const char *const s)
468{
469 SV* const tmp = newSVpvs("");
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
477#endif
478
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
486/*
487 * S_ao
488 *
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
491 */
492
493STATIC int
494S_ao(pTHX_ int toketype)
495{
496 dVAR;
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
499 if (toketype == ANDAND)
500 pl_yylval.ival = OP_ANDASSIGN;
501 else if (toketype == OROR)
502 pl_yylval.ival = OP_ORASSIGN;
503 else if (toketype == DORDOR)
504 pl_yylval.ival = OP_DORASSIGN;
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
523STATIC void
524S_no_op(pTHX_ const char *const what, char *s)
525{
526 dVAR;
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
529
530 PERL_ARGS_ASSERT_NO_OP;
531
532 if (!s)
533 s = oldbp;
534 else
535 PL_bufptr = s;
536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
542 const char *t;
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
554 }
555 }
556 PL_bufptr = oldbp;
557}
558
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
562 * If it's called with NULL then it cauterizes the line buffer.
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
568STATIC void
569S_missingterm(pTHX_ char *s)
570{
571 dVAR;
572 char tmpbuf[3];
573 char q;
574 if (s) {
575 char * const nl = strrchr(s,'\n');
576 if (nl)
577 *nl = '\0';
578 }
579 else if (isCNTRL(PL_multi_close)) {
580 *tmpbuf = '^';
581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
586 *tmpbuf = (char)PL_multi_close;
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
592}
593
594/*
595 * Check whether the named feature is enabled.
596 */
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
599{
600 dVAR;
601 HV * const hinthv = GvHV(PL_hintgv);
602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
608 memcpy(&he_name[8], name, namelen);
609
610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
611}
612
613/*
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
616 */
617
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
622 register const char *s = SvPVX_const(sv);
623 register const char * const e = s + SvCUR(sv);
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
643
644STATIC I32
645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
646{
647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
651}
652#endif
653
654/*
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
672The I<flags> parameter is reserved for future use, and must always
673be zero, except for one flag that is currently reserved for perl's internal
674use.
675
676=cut
677*/
678
679/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
681
682void
683Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
684{
685 dVAR;
686 const char *s = NULL;
687 STRLEN len;
688 yy_parser *parser, *oparser;
689 if (flags && flags != LEX_START_SAME_FILTER)
690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
691
692 /* create and initialise a parser */
693
694 Newxz(parser, 1, yy_parser);
695 parser->old_parser = oparser = PL_parser;
696 PL_parser = parser;
697
698 parser->stack = NULL;
699 parser->ps = NULL;
700 parser->stack_size = 0;
701
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
704 parser->saved_curcop = PL_curcop;
705
706 /* initialise lexer state */
707
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
713 parser->error_count = oparser ? oparser->error_count : 0;
714 parser->copline = NOLINE;
715 parser->lex_state = LEX_NORMAL;
716 parser->expect = XSTATE;
717 parser->rsfp = rsfp;
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
720 ? newAV()
721 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
722
723 Newx(parser->lex_brackstack, 120, char);
724 Newx(parser->lex_casestack, 12, char);
725 *parser->lex_casestack = '\0';
726
727 if (line) {
728 s = SvPV_const(line, len);
729 } else {
730 len = 0;
731 }
732
733 if (!len) {
734 parser->linestr = newSVpvs("\n;");
735 } else {
736 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
737 if (s[len-1] != ';')
738 sv_catpvs(parser->linestr, "\n;");
739 }
740 parser->oldoldbufptr =
741 parser->oldbufptr =
742 parser->bufptr =
743 parser->linestart = SvPVX(parser->linestr);
744 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
745 parser->last_lop = parser->last_uni = NULL;
746
747 parser->in_pod = 0;
748}
749
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
758 PL_curcop = parser->saved_curcop;
759 SvREFCNT_dec(parser->linestr);
760
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
763 else if (parser->rsfp && (!parser->old_parser ||
764 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
765 PerlIO_close(parser->rsfp);
766 SvREFCNT_dec(parser->rsfp_filters);
767
768 Safefree(parser->lex_brackstack);
769 Safefree(parser->lex_casestack);
770 PL_parser = parser->old_parser;
771 Safefree(parser);
772}
773
774
775/*
776=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777
778Buffer scalar containing the chunk currently under consideration of the
779text currently being lexed. This is always a plain string scalar (for
780which C<SvPOK> is true). It is not intended to be used as a scalar by
781normal scalar means; instead refer to the buffer directly by the pointer
782variables described below.
783
784The lexer maintains various C<char*> pointers to things in the
785C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
786reallocated, all of these pointers must be updated. Don't attempt to
787do this manually, but rather use L</lex_grow_linestr> if you need to
788reallocate the buffer.
789
790The content of the text chunk in the buffer is commonly exactly one
791complete line of input, up to and including a newline terminator,
792but there are situations where it is otherwise. The octets of the
793buffer may be intended to be interpreted as either UTF-8 or Latin-1.
794The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
795flag on this scalar, which may disagree with it.
796
797For direct examination of the buffer, the variable
798L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
799lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
800of these pointers is usually preferable to examination of the scalar
801through normal scalar means.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufend
804
805Direct pointer to the end of the chunk of text currently being lexed, the
806end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
807+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
808always located at the end of the buffer, and does not count as part of
809the buffer's contents.
810
811=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812
813Points to the current position of lexing inside the lexer buffer.
814Characters around this point may be freely examined, within
815the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
816L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
817interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818
819Lexing code (whether in the Perl core or not) moves this pointer past
820the characters that it consumes. It is also expected to perform some
821bookkeeping whenever a newline character is consumed. This movement
822can be more conveniently performed by the function L</lex_read_to>,
823which handles newlines appropriately.
824
825Interpretation of the buffer's octets can be abstracted out by
826using the slightly higher-level functions L</lex_peek_unichar> and
827L</lex_read_unichar>.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>linestart
830
831Points to the start of the current line inside the lexer buffer.
832This is useful for indicating at which column an error occurred, and
833not much else. This must be updated by any lexing code that consumes
834a newline; the function L</lex_read_to> handles this detail.
835
836=cut
837*/
838
839/*
840=for apidoc Amx|bool|lex_bufutf8
841
842Indicates whether the octets in the lexer buffer
843(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
844of Unicode characters. If not, they should be interpreted as Latin-1
845characters. This is analogous to the C<SvUTF8> flag for scalars.
846
847In UTF-8 mode, it is not guaranteed that the lexer buffer actually
848contains valid UTF-8. Lexing code must be robust in the face of invalid
849encoding.
850
851The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
852is significant, but not the whole story regarding the input character
853encoding. Normally, when a file is being read, the scalar contains octets
854and its C<SvUTF8> flag is off, but the octets should be interpreted as
855UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
856however, the scalar may have the C<SvUTF8> flag on, and in this case its
857octets should be interpreted as UTF-8 unless the C<use bytes> pragma
858is in effect. This logic may change in the future; use this function
859instead of implementing the logic yourself.
860
861=cut
862*/
863
864bool
865Perl_lex_bufutf8(pTHX)
866{
867 return UTF;
868}
869
870/*
871=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872
873Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
874at least I<len> octets (including terminating NUL). Returns a
875pointer to the reallocated buffer. This is necessary before making
876any direct modification of the buffer that would increase its length.
877L</lex_stuff_pvn> provides a more convenient way to insert text into
878the buffer.
879
880Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
881this function updates all of the lexer's variables that point directly
882into the buffer.
883
884=cut
885*/
886
887char *
888Perl_lex_grow_linestr(pTHX_ STRLEN len)
889{
890 SV *linestr;
891 char *buf;
892 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
893 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
894 linestr = PL_parser->linestr;
895 buf = SvPVX(linestr);
896 if (len <= SvLEN(linestr))
897 return buf;
898 bufend_pos = PL_parser->bufend - buf;
899 bufptr_pos = PL_parser->bufptr - buf;
900 oldbufptr_pos = PL_parser->oldbufptr - buf;
901 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
902 linestart_pos = PL_parser->linestart - buf;
903 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
904 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
905 buf = sv_grow(linestr, len);
906 PL_parser->bufend = buf + bufend_pos;
907 PL_parser->bufptr = buf + bufptr_pos;
908 PL_parser->oldbufptr = buf + oldbufptr_pos;
909 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
910 PL_parser->linestart = buf + linestart_pos;
911 if (PL_parser->last_uni)
912 PL_parser->last_uni = buf + last_uni_pos;
913 if (PL_parser->last_lop)
914 PL_parser->last_lop = buf + last_lop_pos;
915 return buf;
916}
917
918/*
919=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
920
921Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
922immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
923reallocating the buffer if necessary. This means that lexing code that
924runs later will see the characters as if they had appeared in the input.
925It is not recommended to do this as part of normal parsing, and most
926uses of this facility run the risk of the inserted characters being
927interpreted in an unintended manner.
928
929The string to be inserted is represented by I<len> octets starting
930at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
931according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
932The characters are recoded for the lexer buffer, according to how the
933buffer is currently being interpreted (L</lex_bufutf8>). If a string
934to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
935function is more convenient.
936
937=cut
938*/
939
940void
941Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
942{
943 dVAR;
944 char *bufptr;
945 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
946 if (flags & ~(LEX_STUFF_UTF8))
947 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (UTF) {
949 if (flags & LEX_STUFF_UTF8) {
950 goto plain_copy;
951 } else {
952 STRLEN highhalf = 0;
953 const char *p, *e = pv+len;
954 for (p = pv; p != e; p++)
955 highhalf += !!(((U8)*p) & 0x80);
956 if (!highhalf)
957 goto plain_copy;
958 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
959 bufptr = PL_parser->bufptr;
960 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
961 SvCUR_set(PL_parser->linestr,
962 SvCUR(PL_parser->linestr) + len+highhalf);
963 PL_parser->bufend += len+highhalf;
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c & 0x80) {
967 *bufptr++ = (char)(0xc0 | (c >> 6));
968 *bufptr++ = (char)(0x80 | (c & 0x3f));
969 } else {
970 *bufptr++ = (char)c;
971 }
972 }
973 }
974 } else {
975 if (flags & LEX_STUFF_UTF8) {
976 STRLEN highhalf = 0;
977 const char *p, *e = pv+len;
978 for (p = pv; p != e; p++) {
979 U8 c = (U8)*p;
980 if (c >= 0xc4) {
981 Perl_croak(aTHX_ "Lexing code attempted to stuff "
982 "non-Latin-1 character into Latin-1 input");
983 } else if (c >= 0xc2 && p+1 != e &&
984 (((U8)p[1]) & 0xc0) == 0x80) {
985 p++;
986 highhalf++;
987 } else if (c >= 0x80) {
988 /* malformed UTF-8 */
989 ENTER;
990 SAVESPTR(PL_warnhook);
991 PL_warnhook = PERL_WARNHOOK_FATAL;
992 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 LEAVE;
994 }
995 }
996 if (!highhalf)
997 goto plain_copy;
998 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
999 bufptr = PL_parser->bufptr;
1000 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001 SvCUR_set(PL_parser->linestr,
1002 SvCUR(PL_parser->linestr) + len-highhalf);
1003 PL_parser->bufend += len-highhalf;
1004 for (p = pv; p != e; p++) {
1005 U8 c = (U8)*p;
1006 if (c & 0x80) {
1007 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008 p++;
1009 } else {
1010 *bufptr++ = (char)c;
1011 }
1012 }
1013 } else {
1014 plain_copy:
1015 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016 bufptr = PL_parser->bufptr;
1017 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019 PL_parser->bufend += len;
1020 Copy(pv, bufptr, len, char);
1021 }
1022 }
1023}
1024
1025/*
1026=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030reallocating the buffer if necessary. This means that lexing code that
1031runs later will see the characters as if they had appeared in the input.
1032It is not recommended to do this as part of normal parsing, and most
1033uses of this facility run the risk of the inserted characters being
1034interpreted in an unintended manner.
1035
1036The string to be inserted is represented by octets starting at I<pv>
1037and continuing to the first nul. These octets are interpreted as either
1038UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039in I<flags>. The characters are recoded for the lexer buffer, according
1040to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041If it is not convenient to nul-terminate a string to be inserted, the
1042L</lex_stuff_pvn> function is more appropriate.
1043
1044=cut
1045*/
1046
1047void
1048Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049{
1050 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051 lex_stuff_pvn(pv, strlen(pv), flags);
1052}
1053
1054/*
1055=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059reallocating the buffer if necessary. This means that lexing code that
1060runs later will see the characters as if they had appeared in the input.
1061It is not recommended to do this as part of normal parsing, and most
1062uses of this facility run the risk of the inserted characters being
1063interpreted in an unintended manner.
1064
1065The string to be inserted is the string value of I<sv>. The characters
1066are recoded for the lexer buffer, according to how the buffer is currently
1067being interpreted (L</lex_bufutf8>). If a string to be inserted is
1068not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069need to construct a scalar.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076{
1077 char *pv;
1078 STRLEN len;
1079 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 if (flags)
1081 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 pv = SvPV(sv, len);
1083 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084}
1085
1086/*
1087=for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1091This hides the discarded text from any lexing code that runs later,
1092as if the text had never appeared.
1093
1094This is not the normal way to consume lexed text. For that, use
1095L</lex_read_to>.
1096
1097=cut
1098*/
1099
1100void
1101Perl_lex_unstuff(pTHX_ char *ptr)
1102{
1103 char *buf, *bufend;
1104 STRLEN unstuff_len;
1105 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106 buf = PL_parser->bufptr;
1107 if (ptr < buf)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109 if (ptr == buf)
1110 return;
1111 bufend = PL_parser->bufend;
1112 if (ptr > bufend)
1113 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114 unstuff_len = ptr - buf;
1115 Move(ptr, buf, bufend+1-ptr, char);
1116 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117 PL_parser->bufend = bufend - unstuff_len;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_read_to|char *ptr
1122
1123Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125performing the correct bookkeeping whenever a newline character is passed.
1126This is the normal way to consume lexed text.
1127
1128Interpretation of the buffer's octets can be abstracted out by
1129using the slightly higher-level functions L</lex_peek_unichar> and
1130L</lex_read_unichar>.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_read_to(pTHX_ char *ptr)
1137{
1138 char *s;
1139 PERL_ARGS_ASSERT_LEX_READ_TO;
1140 s = PL_parser->bufptr;
1141 if (ptr < s || ptr > PL_parser->bufend)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143 for (; s != ptr; s++)
1144 if (*s == '\n') {
1145 CopLINE_inc(PL_curcop);
1146 PL_parser->linestart = s+1;
1147 }
1148 PL_parser->bufptr = ptr;
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155up to I<ptr>. The remaining content of the buffer will be moved, and
1156all pointers into the buffer updated appropriately. I<ptr> must not
1157be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158it is not permitted to discard text that has yet to be lexed.
1159
1160Normally it is not necessarily to do this directly, because it suffices to
1161use the implicit discarding behaviour of L</lex_next_chunk> and things
1162based on it. However, if a token stretches across multiple lines,
1163and the lexing code has kept multiple lines of text in the buffer for
1164that purpose, then after completion of the token it would be wise to
1165explicitly discard the now-unneeded earlier lines, to avoid future
1166multi-line tokens growing the buffer without bound.
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_discard_to(pTHX_ char *ptr)
1173{
1174 char *buf;
1175 STRLEN discard_len;
1176 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177 buf = SvPVX(PL_parser->linestr);
1178 if (ptr < buf)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 if (ptr == buf)
1181 return;
1182 if (ptr > PL_parser->bufptr)
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184 discard_len = ptr - buf;
1185 if (PL_parser->oldbufptr < ptr)
1186 PL_parser->oldbufptr = ptr;
1187 if (PL_parser->oldoldbufptr < ptr)
1188 PL_parser->oldoldbufptr = ptr;
1189 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190 PL_parser->last_uni = NULL;
1191 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192 PL_parser->last_lop = NULL;
1193 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195 PL_parser->bufend -= discard_len;
1196 PL_parser->bufptr -= discard_len;
1197 PL_parser->oldbufptr -= discard_len;
1198 PL_parser->oldoldbufptr -= discard_len;
1199 if (PL_parser->last_uni)
1200 PL_parser->last_uni -= discard_len;
1201 if (PL_parser->last_lop)
1202 PL_parser->last_lop -= discard_len;
1203}
1204
1205/*
1206=for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208Reads in the next chunk of text to be lexed, appending it to
1209L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1210looked to the end of the current chunk and wants to know more. It is
1211usual, but not necessary, for lexing to have consumed the entirety of
1212the current chunk at this time.
1213
1214If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215chunk (i.e., the current chunk has been entirely consumed), normally the
1216current chunk will be discarded at the same time that the new chunk is
1217read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218will not be discarded. If the current chunk has not been entirely
1219consumed, then it will not be discarded regardless of the flag.
1220
1221Returns true if some new text was added to the buffer, or false if the
1222buffer has reached the end of the input text.
1223
1224=cut
1225*/
1226
1227#define LEX_FAKE_EOF 0x80000000
1228
1229bool
1230Perl_lex_next_chunk(pTHX_ U32 flags)
1231{
1232 SV *linestr;
1233 char *buf;
1234 STRLEN old_bufend_pos, new_bufend_pos;
1235 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237 bool got_some_for_debugger = 0;
1238 bool got_some;
1239 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241 linestr = PL_parser->linestr;
1242 buf = SvPVX(linestr);
1243 if (!(flags & LEX_KEEP_PREVIOUS) &&
1244 PL_parser->bufptr == PL_parser->bufend) {
1245 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 linestart_pos = 0;
1247 if (PL_parser->last_uni != PL_parser->bufend)
1248 PL_parser->last_uni = NULL;
1249 if (PL_parser->last_lop != PL_parser->bufend)
1250 PL_parser->last_lop = NULL;
1251 last_uni_pos = last_lop_pos = 0;
1252 *buf = 0;
1253 SvCUR(linestr) = 0;
1254 } else {
1255 old_bufend_pos = PL_parser->bufend - buf;
1256 bufptr_pos = PL_parser->bufptr - buf;
1257 oldbufptr_pos = PL_parser->oldbufptr - buf;
1258 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259 linestart_pos = PL_parser->linestart - buf;
1260 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 }
1263 if (flags & LEX_FAKE_EOF) {
1264 goto eof;
1265 } else if (!PL_parser->rsfp) {
1266 got_some = 0;
1267 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some = 1;
1269 got_some_for_debugger = 1;
1270 } else {
1271 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1272 sv_setpvs(linestr, "");
1273 eof:
1274 /* End of real input. Close filehandle (unless it was STDIN),
1275 * then add implicit termination.
1276 */
1277 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278 PerlIO_clearerr(PL_parser->rsfp);
1279 else if (PL_parser->rsfp)
1280 (void)PerlIO_close(PL_parser->rsfp);
1281 PL_parser->rsfp = NULL;
1282 PL_parser->in_pod = 0;
1283#ifdef PERL_MAD
1284 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285 PL_faketokens = 1;
1286#endif
1287 if (!PL_in_eval && PL_minus_p) {
1288 sv_catpvs(linestr,
1289 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290 PL_minus_n = PL_minus_p = 0;
1291 } else if (!PL_in_eval && PL_minus_n) {
1292 sv_catpvs(linestr, /*{*/";}");
1293 PL_minus_n = 0;
1294 } else
1295 sv_catpvs(linestr, ";");
1296 got_some = 1;
1297 }
1298 buf = SvPVX(linestr);
1299 new_bufend_pos = SvCUR(linestr);
1300 PL_parser->bufend = buf + new_bufend_pos;
1301 PL_parser->bufptr = buf + bufptr_pos;
1302 PL_parser->oldbufptr = buf + oldbufptr_pos;
1303 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304 PL_parser->linestart = buf + linestart_pos;
1305 if (PL_parser->last_uni)
1306 PL_parser->last_uni = buf + last_uni_pos;
1307 if (PL_parser->last_lop)
1308 PL_parser->last_lop = buf + last_lop_pos;
1309 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310 PL_curstash != PL_debstash) {
1311 /* debugger active and we're not compiling the debugger code,
1312 * so store the line into the debugger's array of lines
1313 */
1314 update_debugger_info(NULL, buf+old_bufend_pos,
1315 new_bufend_pos-old_bufend_pos);
1316 }
1317 return got_some;
1318}
1319
1320/*
1321=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323Looks ahead one (Unicode) character in the text currently being lexed.
1324Returns the codepoint (unsigned integer value) of the next character,
1325or -1 if lexing has reached the end of the input text. To consume the
1326peeked character, use L</lex_read_unichar>.
1327
1328If the next character is in (or extends into) the next chunk of input
1329text, the next chunk will be read in. Normally the current chunk will be
1330discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331then the current chunk will not be discarded.
1332
1333If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334is encountered, an exception is generated.
1335
1336=cut
1337*/
1338
1339I32
1340Perl_lex_peek_unichar(pTHX_ U32 flags)
1341{
1342 dVAR;
1343 char *s, *bufend;
1344 if (flags & ~(LEX_KEEP_PREVIOUS))
1345 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346 s = PL_parser->bufptr;
1347 bufend = PL_parser->bufend;
1348 if (UTF) {
1349 U8 head;
1350 I32 unichar;
1351 STRLEN len, retlen;
1352 if (s == bufend) {
1353 if (!lex_next_chunk(flags))
1354 return -1;
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 }
1358 head = (U8)*s;
1359 if (!(head & 0x80))
1360 return head;
1361 if (head & 0x40) {
1362 len = PL_utf8skip[head];
1363 while ((STRLEN)(bufend-s) < len) {
1364 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 break;
1366 s = PL_parser->bufptr;
1367 bufend = PL_parser->bufend;
1368 }
1369 }
1370 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371 if (retlen == (STRLEN)-1) {
1372 /* malformed UTF-8 */
1373 ENTER;
1374 SAVESPTR(PL_warnhook);
1375 PL_warnhook = PERL_WARNHOOK_FATAL;
1376 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377 LEAVE;
1378 }
1379 return unichar;
1380 } else {
1381 if (s == bufend) {
1382 if (!lex_next_chunk(flags))
1383 return -1;
1384 s = PL_parser->bufptr;
1385 }
1386 return (U8)*s;
1387 }
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393Reads the next (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the character read,
1395and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396if lexing has reached the end of the input text. To non-destructively
1397examine the next character, use L</lex_peek_unichar> instead.
1398
1399If the next character is in (or extends into) the next chunk of input
1400text, the next chunk will be read in. Normally the current chunk will be
1401discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402then the current chunk will not be discarded.
1403
1404If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405is encountered, an exception is generated.
1406
1407=cut
1408*/
1409
1410I32
1411Perl_lex_read_unichar(pTHX_ U32 flags)
1412{
1413 I32 c;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416 c = lex_peek_unichar(flags);
1417 if (c != -1) {
1418 if (c == '\n')
1419 CopLINE_inc(PL_curcop);
1420 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1421 }
1422 return c;
1423}
1424
1425/*
1426=for apidoc Amx|void|lex_read_space|U32 flags
1427
1428Reads optional spaces, in Perl style, in the text currently being
1429lexed. The spaces may include ordinary whitespace characters and
1430Perl-style comments. C<#line> directives are processed if encountered.
1431L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1432at a non-space character (or the end of the input text).
1433
1434If spaces extend into the next chunk of input text, the next chunk will
1435be read in. Normally the current chunk will be discarded at the same
1436time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1437chunk will not be discarded.
1438
1439=cut
1440*/
1441
1442#define LEX_NO_NEXT_CHUNK 0x80000000
1443
1444void
1445Perl_lex_read_space(pTHX_ U32 flags)
1446{
1447 char *s, *bufend;
1448 bool need_incline = 0;
1449 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1450 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1451#ifdef PERL_MAD
1452 if (PL_skipwhite) {
1453 sv_free(PL_skipwhite);
1454 PL_skipwhite = NULL;
1455 }
1456 if (PL_madskills)
1457 PL_skipwhite = newSVpvs("");
1458#endif /* PERL_MAD */
1459 s = PL_parser->bufptr;
1460 bufend = PL_parser->bufend;
1461 while (1) {
1462 char c = *s;
1463 if (c == '#') {
1464 do {
1465 c = *++s;
1466 } while (!(c == '\n' || (c == 0 && s == bufend)));
1467 } else if (c == '\n') {
1468 s++;
1469 PL_parser->linestart = s;
1470 if (s == bufend)
1471 need_incline = 1;
1472 else
1473 incline(s);
1474 } else if (isSPACE(c)) {
1475 s++;
1476 } else if (c == 0 && s == bufend) {
1477 bool got_more;
1478#ifdef PERL_MAD
1479 if (PL_madskills)
1480 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1481#endif /* PERL_MAD */
1482 if (flags & LEX_NO_NEXT_CHUNK)
1483 break;
1484 PL_parser->bufptr = s;
1485 CopLINE_inc(PL_curcop);
1486 got_more = lex_next_chunk(flags);
1487 CopLINE_dec(PL_curcop);
1488 s = PL_parser->bufptr;
1489 bufend = PL_parser->bufend;
1490 if (!got_more)
1491 break;
1492 if (need_incline && PL_parser->rsfp) {
1493 incline(s);
1494 need_incline = 0;
1495 }
1496 } else {
1497 break;
1498 }
1499 }
1500#ifdef PERL_MAD
1501 if (PL_madskills)
1502 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1503#endif /* PERL_MAD */
1504 PL_parser->bufptr = s;
1505}
1506
1507/*
1508 * S_incline
1509 * This subroutine has nothing to do with tilting, whether at windmills
1510 * or pinball tables. Its name is short for "increment line". It
1511 * increments the current line number in CopLINE(PL_curcop) and checks
1512 * to see whether the line starts with a comment of the form
1513 * # line 500 "foo.pm"
1514 * If so, it sets the current line number and file to the values in the comment.
1515 */
1516
1517STATIC void
1518S_incline(pTHX_ const char *s)
1519{
1520 dVAR;
1521 const char *t;
1522 const char *n;
1523 const char *e;
1524 line_t line_num;
1525
1526 PERL_ARGS_ASSERT_INCLINE;
1527
1528 CopLINE_inc(PL_curcop);
1529 if (*s++ != '#')
1530 return;
1531 while (SPACE_OR_TAB(*s))
1532 s++;
1533 if (strnEQ(s, "line", 4))
1534 s += 4;
1535 else
1536 return;
1537 if (SPACE_OR_TAB(*s))
1538 s++;
1539 else
1540 return;
1541 while (SPACE_OR_TAB(*s))
1542 s++;
1543 if (!isDIGIT(*s))
1544 return;
1545
1546 n = s;
1547 while (isDIGIT(*s))
1548 s++;
1549 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1550 return;
1551 while (SPACE_OR_TAB(*s))
1552 s++;
1553 if (*s == '"' && (t = strchr(s+1, '"'))) {
1554 s++;
1555 e = t + 1;
1556 }
1557 else {
1558 t = s;
1559 while (!isSPACE(*t))
1560 t++;
1561 e = t;
1562 }
1563 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1564 e++;
1565 if (*e != '\n' && *e != '\0')
1566 return; /* false alarm */
1567
1568 line_num = atoi(n)-1;
1569
1570 if (t - s > 0) {
1571 const STRLEN len = t - s;
1572 SV *const temp_sv = CopFILESV(PL_curcop);
1573 const char *cf;
1574 STRLEN tmplen;
1575
1576 if (temp_sv) {
1577 cf = SvPVX(temp_sv);
1578 tmplen = SvCUR(temp_sv);
1579 } else {
1580 cf = NULL;
1581 tmplen = 0;
1582 }
1583
1584 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1585 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1586 * to *{"::_<newfilename"} */
1587 /* However, the long form of evals is only turned on by the
1588 debugger - usually they're "(eval %lu)" */
1589 char smallbuf[128];
1590 char *tmpbuf;
1591 GV **gvp;
1592 STRLEN tmplen2 = len;
1593 if (tmplen + 2 <= sizeof smallbuf)
1594 tmpbuf = smallbuf;
1595 else
1596 Newx(tmpbuf, tmplen + 2, char);
1597 tmpbuf[0] = '_';
1598 tmpbuf[1] = '<';
1599 memcpy(tmpbuf + 2, cf, tmplen);
1600 tmplen += 2;
1601 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1602 if (gvp) {
1603 char *tmpbuf2;
1604 GV *gv2;
1605
1606 if (tmplen2 + 2 <= sizeof smallbuf)
1607 tmpbuf2 = smallbuf;
1608 else
1609 Newx(tmpbuf2, tmplen2 + 2, char);
1610
1611 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1612 /* Either they malloc'd it, or we malloc'd it,
1613 so no prefix is present in ours. */
1614 tmpbuf2[0] = '_';
1615 tmpbuf2[1] = '<';
1616 }
1617
1618 memcpy(tmpbuf2 + 2, s, tmplen2);
1619 tmplen2 += 2;
1620
1621 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1622 if (!isGV(gv2)) {
1623 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1624 /* adjust ${"::_<newfilename"} to store the new file name */
1625 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1626 /* The line number may differ. If that is the case,
1627 alias the saved lines that are in the array.
1628 Otherwise alias the whole array. */
1629 if (CopLINE(PL_curcop) == line_num) {
1630 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1631 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1632 }
1633 else if (GvAV(*gvp)) {
1634 AV * const av = GvAV(*gvp);
1635 const I32 start = CopLINE(PL_curcop)+1;
1636 I32 items = AvFILLp(av) - start;
1637 if (items > 0) {
1638 AV * const av2 = GvAVn(gv2);
1639 SV **svp = AvARRAY(av) + start;
1640 I32 l = (I32)line_num+1;
1641 while (items--)
1642 av_store(av2, l++, SvREFCNT_inc(*svp++));
1643 }
1644 }
1645 }
1646
1647 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1648 }
1649 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1650 }
1651 CopFILE_free(PL_curcop);
1652 CopFILE_setn(PL_curcop, s, len);
1653 }
1654 CopLINE_set(PL_curcop, line_num);
1655}
1656
1657#ifdef PERL_MAD
1658/* skip space before PL_thistoken */
1659
1660STATIC char *
1661S_skipspace0(pTHX_ register char *s)
1662{
1663 PERL_ARGS_ASSERT_SKIPSPACE0;
1664
1665 s = skipspace(s);
1666 if (!PL_madskills)
1667 return s;
1668 if (PL_skipwhite) {
1669 if (!PL_thiswhite)
1670 PL_thiswhite = newSVpvs("");
1671 sv_catsv(PL_thiswhite, PL_skipwhite);
1672 sv_free(PL_skipwhite);
1673 PL_skipwhite = 0;
1674 }
1675 PL_realtokenstart = s - SvPVX(PL_linestr);
1676 return s;
1677}
1678
1679/* skip space after PL_thistoken */
1680
1681STATIC char *
1682S_skipspace1(pTHX_ register char *s)
1683{
1684 const char *start = s;
1685 I32 startoff = start - SvPVX(PL_linestr);
1686
1687 PERL_ARGS_ASSERT_SKIPSPACE1;
1688
1689 s = skipspace(s);
1690 if (!PL_madskills)
1691 return s;
1692 start = SvPVX(PL_linestr) + startoff;
1693 if (!PL_thistoken && PL_realtokenstart >= 0) {
1694 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1695 PL_thistoken = newSVpvn(tstart, start - tstart);
1696 }
1697 PL_realtokenstart = -1;
1698 if (PL_skipwhite) {
1699 if (!PL_nextwhite)
1700 PL_nextwhite = newSVpvs("");
1701 sv_catsv(PL_nextwhite, PL_skipwhite);
1702 sv_free(PL_skipwhite);
1703 PL_skipwhite = 0;
1704 }
1705 return s;
1706}
1707
1708STATIC char *
1709S_skipspace2(pTHX_ register char *s, SV **svp)
1710{
1711 char *start;
1712 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1713 const I32 startoff = s - SvPVX(PL_linestr);
1714
1715 PERL_ARGS_ASSERT_SKIPSPACE2;
1716
1717 s = skipspace(s);
1718 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1719 if (!PL_madskills || !svp)
1720 return s;
1721 start = SvPVX(PL_linestr) + startoff;
1722 if (!PL_thistoken && PL_realtokenstart >= 0) {
1723 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1724 PL_thistoken = newSVpvn(tstart, start - tstart);
1725 PL_realtokenstart = -1;
1726 }
1727 if (PL_skipwhite) {
1728 if (!*svp)
1729 *svp = newSVpvs("");
1730 sv_setsv(*svp, PL_skipwhite);
1731 sv_free(PL_skipwhite);
1732 PL_skipwhite = 0;
1733 }
1734
1735 return s;
1736}
1737#endif
1738
1739STATIC void
1740S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1741{
1742 AV *av = CopFILEAVx(PL_curcop);
1743 if (av) {
1744 SV * const sv = newSV_type(SVt_PVMG);
1745 if (orig_sv)
1746 sv_setsv(sv, orig_sv);
1747 else
1748 sv_setpvn(sv, buf, len);
1749 (void)SvIOK_on(sv);
1750 SvIV_set(sv, 0);
1751 av_store(av, (I32)CopLINE(PL_curcop), sv);
1752 }
1753}
1754
1755/*
1756 * S_skipspace
1757 * Called to gobble the appropriate amount and type of whitespace.
1758 * Skips comments as well.
1759 */
1760
1761STATIC char *
1762S_skipspace(pTHX_ register char *s)
1763{
1764#ifdef PERL_MAD
1765 char *start = s;
1766#endif /* PERL_MAD */
1767 PERL_ARGS_ASSERT_SKIPSPACE;
1768#ifdef PERL_MAD
1769 if (PL_skipwhite) {
1770 sv_free(PL_skipwhite);
1771 PL_skipwhite = NULL;
1772 }
1773#endif /* PERL_MAD */
1774 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1775 while (s < PL_bufend && SPACE_OR_TAB(*s))
1776 s++;
1777 } else {
1778 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1779 PL_bufptr = s;
1780 lex_read_space(LEX_KEEP_PREVIOUS |
1781 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1782 LEX_NO_NEXT_CHUNK : 0));
1783 s = PL_bufptr;
1784 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1785 if (PL_linestart > PL_bufptr)
1786 PL_bufptr = PL_linestart;
1787 return s;
1788 }
1789#ifdef PERL_MAD
1790 if (PL_madskills)
1791 PL_skipwhite = newSVpvn(start, s-start);
1792#endif /* PERL_MAD */
1793 return s;
1794}
1795
1796/*
1797 * S_check_uni
1798 * Check the unary operators to ensure there's no ambiguity in how they're
1799 * used. An ambiguous piece of code would be:
1800 * rand + 5
1801 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1802 * the +5 is its argument.
1803 */
1804
1805STATIC void
1806S_check_uni(pTHX)
1807{
1808 dVAR;
1809 const char *s;
1810 const char *t;
1811
1812 if (PL_oldoldbufptr != PL_last_uni)
1813 return;
1814 while (isSPACE(*PL_last_uni))
1815 PL_last_uni++;
1816 s = PL_last_uni;
1817 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1818 s++;
1819 if ((t = strchr(s, '(')) && t < PL_bufptr)
1820 return;
1821
1822 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1823 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1824 (int)(s - PL_last_uni), PL_last_uni);
1825}
1826
1827/*
1828 * LOP : macro to build a list operator. Its behaviour has been replaced
1829 * with a subroutine, S_lop() for which LOP is just another name.
1830 */
1831
1832#define LOP(f,x) return lop(f,x,s)
1833
1834/*
1835 * S_lop
1836 * Build a list operator (or something that might be one). The rules:
1837 * - if we have a next token, then it's a list operator [why?]
1838 * - if the next thing is an opening paren, then it's a function
1839 * - else it's a list operator
1840 */
1841
1842STATIC I32
1843S_lop(pTHX_ I32 f, int x, char *s)
1844{
1845 dVAR;
1846
1847 PERL_ARGS_ASSERT_LOP;
1848
1849 pl_yylval.ival = f;
1850 CLINE;
1851 PL_expect = x;
1852 PL_bufptr = s;
1853 PL_last_lop = PL_oldbufptr;
1854 PL_last_lop_op = (OPCODE)f;
1855#ifdef PERL_MAD
1856 if (PL_lasttoke)
1857 goto lstop;
1858#else
1859 if (PL_nexttoke)
1860 goto lstop;
1861#endif
1862 if (*s == '(')
1863 return REPORT(FUNC);
1864 s = PEEKSPACE(s);
1865 if (*s == '(')
1866 return REPORT(FUNC);
1867 else {
1868 lstop:
1869 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1870 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1871 return REPORT(LSTOP);
1872 }
1873}
1874
1875#ifdef PERL_MAD
1876 /*
1877 * S_start_force
1878 * Sets up for an eventual force_next(). start_force(0) basically does
1879 * an unshift, while start_force(-1) does a push. yylex removes items
1880 * on the "pop" end.
1881 */
1882
1883STATIC void
1884S_start_force(pTHX_ int where)
1885{
1886 int i;
1887
1888 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1889 where = PL_lasttoke;
1890 assert(PL_curforce < 0 || PL_curforce == where);
1891 if (PL_curforce != where) {
1892 for (i = PL_lasttoke; i > where; --i) {
1893 PL_nexttoke[i] = PL_nexttoke[i-1];
1894 }
1895 PL_lasttoke++;
1896 }
1897 if (PL_curforce < 0) /* in case of duplicate start_force() */
1898 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1899 PL_curforce = where;
1900 if (PL_nextwhite) {
1901 if (PL_madskills)
1902 curmad('^', newSVpvs(""));
1903 CURMAD('_', PL_nextwhite);
1904 }
1905}
1906
1907STATIC void
1908S_curmad(pTHX_ char slot, SV *sv)
1909{
1910 MADPROP **where;
1911
1912 if (!sv)
1913 return;
1914 if (PL_curforce < 0)
1915 where = &PL_thismad;
1916 else
1917 where = &PL_nexttoke[PL_curforce].next_mad;
1918
1919 if (PL_faketokens)
1920 sv_setpvs(sv, "");
1921 else {
1922 if (!IN_BYTES) {
1923 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1924 SvUTF8_on(sv);
1925 else if (PL_encoding) {
1926 sv_recode_to_utf8(sv, PL_encoding);
1927 }
1928 }
1929 }
1930
1931 /* keep a slot open for the head of the list? */
1932 if (slot != '_' && *where && (*where)->mad_key == '^') {
1933 (*where)->mad_key = slot;
1934 sv_free(MUTABLE_SV(((*where)->mad_val)));
1935 (*where)->mad_val = (void*)sv;
1936 }
1937 else
1938 addmad(newMADsv(slot, sv), where, 0);
1939}
1940#else
1941# define start_force(where) NOOP
1942# define curmad(slot, sv) NOOP
1943#endif
1944
1945/*
1946 * S_force_next
1947 * When the lexer realizes it knows the next token (for instance,
1948 * it is reordering tokens for the parser) then it can call S_force_next
1949 * to know what token to return the next time the lexer is called. Caller
1950 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1951 * and possibly PL_expect to ensure the lexer handles the token correctly.
1952 */
1953
1954STATIC void
1955S_force_next(pTHX_ I32 type)
1956{
1957 dVAR;
1958#ifdef DEBUGGING
1959 if (DEBUG_T_TEST) {
1960 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1961 tokereport(type, &NEXTVAL_NEXTTOKE);
1962 }
1963#endif
1964#ifdef PERL_MAD
1965 if (PL_curforce < 0)
1966 start_force(PL_lasttoke);
1967 PL_nexttoke[PL_curforce].next_type = type;
1968 if (PL_lex_state != LEX_KNOWNEXT)
1969 PL_lex_defer = PL_lex_state;
1970 PL_lex_state = LEX_KNOWNEXT;
1971 PL_lex_expect = PL_expect;
1972 PL_curforce = -1;
1973#else
1974 PL_nexttype[PL_nexttoke] = type;
1975 PL_nexttoke++;
1976 if (PL_lex_state != LEX_KNOWNEXT) {
1977 PL_lex_defer = PL_lex_state;
1978 PL_lex_expect = PL_expect;
1979 PL_lex_state = LEX_KNOWNEXT;
1980 }
1981#endif
1982}
1983
1984void
1985Perl_yyunlex(pTHX)
1986{
1987 int yyc = PL_parser->yychar;
1988 if (yyc != YYEMPTY) {
1989 if (yyc) {
1990 start_force(-1);
1991 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1992 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1993 PL_lex_allbrackets--;
1994 PL_lex_brackets--;
1995 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1996 } else if (yyc == '('/*)*/) {
1997 PL_lex_allbrackets--;
1998 yyc |= (2<<24);
1999 }
2000 force_next(yyc);
2001 }
2002 PL_parser->yychar = YYEMPTY;
2003 }
2004}
2005
2006STATIC SV *
2007S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2008{
2009 dVAR;
2010 SV * const sv = newSVpvn_utf8(start, len,
2011 !IN_BYTES
2012 && UTF
2013 && !is_ascii_string((const U8*)start, len)
2014 && is_utf8_string((const U8*)start, len));
2015 return sv;
2016}
2017
2018/*
2019 * S_force_word
2020 * When the lexer knows the next thing is a word (for instance, it has
2021 * just seen -> and it knows that the next char is a word char, then
2022 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2023 * lookahead.
2024 *
2025 * Arguments:
2026 * char *start : buffer position (must be within PL_linestr)
2027 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2028 * int check_keyword : if true, Perl checks to make sure the word isn't
2029 * a keyword (do this if the word is a label, e.g. goto FOO)
2030 * int allow_pack : if true, : characters will also be allowed (require,
2031 * use, etc. do this)
2032 * int allow_initial_tick : used by the "sub" lexer only.
2033 */
2034
2035STATIC char *
2036S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2037{
2038 dVAR;
2039 register char *s;
2040 STRLEN len;
2041
2042 PERL_ARGS_ASSERT_FORCE_WORD;
2043
2044 start = SKIPSPACE1(start);
2045 s = start;
2046 if (isIDFIRST_lazy_if(s,UTF) ||
2047 (allow_pack && *s == ':') ||
2048 (allow_initial_tick && *s == '\'') )
2049 {
2050 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2051 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2052 return start;
2053 start_force(PL_curforce);
2054 if (PL_madskills)
2055 curmad('X', newSVpvn(start,s-start));
2056 if (token == METHOD) {
2057 s = SKIPSPACE1(s);
2058 if (*s == '(')
2059 PL_expect = XTERM;
2060 else {
2061 PL_expect = XOPERATOR;
2062 }
2063 }
2064 if (PL_madskills)
2065 curmad('g', newSVpvs( "forced" ));
2066 NEXTVAL_NEXTTOKE.opval
2067 = (OP*)newSVOP(OP_CONST,0,
2068 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2069 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2070 force_next(token);
2071 }
2072 return s;
2073}
2074
2075/*
2076 * S_force_ident
2077 * Called when the lexer wants $foo *foo &foo etc, but the program
2078 * text only contains the "foo" portion. The first argument is a pointer
2079 * to the "foo", and the second argument is the type symbol to prefix.
2080 * Forces the next token to be a "WORD".
2081 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2082 */
2083
2084STATIC void
2085S_force_ident(pTHX_ register const char *s, int kind)
2086{
2087 dVAR;
2088
2089 PERL_ARGS_ASSERT_FORCE_IDENT;
2090
2091 if (*s) {
2092 const STRLEN len = strlen(s);
2093 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2094 start_force(PL_curforce);
2095 NEXTVAL_NEXTTOKE.opval = o;
2096 force_next(WORD);
2097 if (kind) {
2098 o->op_private = OPpCONST_ENTERED;
2099 /* XXX see note in pp_entereval() for why we forgo typo
2100 warnings if the symbol must be introduced in an eval.
2101 GSAR 96-10-12 */
2102 gv_fetchpvn_flags(s, len,
2103 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2104 : GV_ADD,
2105 kind == '$' ? SVt_PV :
2106 kind == '@' ? SVt_PVAV :
2107 kind == '%' ? SVt_PVHV :
2108 SVt_PVGV
2109 );
2110 }
2111 }
2112}
2113
2114NV
2115Perl_str_to_version(pTHX_ SV *sv)
2116{
2117 NV retval = 0.0;
2118 NV nshift = 1.0;
2119 STRLEN len;
2120 const char *start = SvPV_const(sv,len);
2121 const char * const end = start + len;
2122 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2123
2124 PERL_ARGS_ASSERT_STR_TO_VERSION;
2125
2126 while (start < end) {
2127 STRLEN skip;
2128 UV n;
2129 if (utf)
2130 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2131 else {
2132 n = *(U8*)start;
2133 skip = 1;
2134 }
2135 retval += ((NV)n)/nshift;
2136 start += skip;
2137 nshift *= 1000;
2138 }
2139 return retval;
2140}
2141
2142/*
2143 * S_force_version
2144 * Forces the next token to be a version number.
2145 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2146 * and if "guessing" is TRUE, then no new token is created (and the caller
2147 * must use an alternative parsing method).
2148 */
2149
2150STATIC char *
2151S_force_version(pTHX_ char *s, int guessing)
2152{
2153 dVAR;
2154 OP *version = NULL;
2155 char *d;
2156#ifdef PERL_MAD
2157 I32 startoff = s - SvPVX(PL_linestr);
2158#endif
2159
2160 PERL_ARGS_ASSERT_FORCE_VERSION;
2161
2162 s = SKIPSPACE1(s);
2163
2164 d = s;
2165 if (*d == 'v')
2166 d++;
2167 if (isDIGIT(*d)) {
2168 while (isDIGIT(*d) || *d == '_' || *d == '.')
2169 d++;
2170#ifdef PERL_MAD
2171 if (PL_madskills) {
2172 start_force(PL_curforce);
2173 curmad('X', newSVpvn(s,d-s));
2174 }
2175#endif
2176 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2177 SV *ver;
2178#ifdef USE_LOCALE_NUMERIC
2179 char *loc = setlocale(LC_NUMERIC, "C");
2180#endif
2181 s = scan_num(s, &pl_yylval);
2182#ifdef USE_LOCALE_NUMERIC
2183 setlocale(LC_NUMERIC, loc);
2184#endif
2185 version = pl_yylval.opval;
2186 ver = cSVOPx(version)->op_sv;
2187 if (SvPOK(ver) && !SvNIOK(ver)) {
2188 SvUPGRADE(ver, SVt_PVNV);
2189 SvNV_set(ver, str_to_version(ver));
2190 SvNOK_on(ver); /* hint that it is a version */
2191 }
2192 }
2193 else if (guessing) {
2194#ifdef PERL_MAD
2195 if (PL_madskills) {
2196 sv_free(PL_nextwhite); /* let next token collect whitespace */
2197 PL_nextwhite = 0;
2198 s = SvPVX(PL_linestr) + startoff;
2199 }
2200#endif
2201 return s;
2202 }
2203 }
2204
2205#ifdef PERL_MAD
2206 if (PL_madskills && !version) {
2207 sv_free(PL_nextwhite); /* let next token collect whitespace */
2208 PL_nextwhite = 0;
2209 s = SvPVX(PL_linestr) + startoff;
2210 }
2211#endif
2212 /* NOTE: The parser sees the package name and the VERSION swapped */
2213 start_force(PL_curforce);
2214 NEXTVAL_NEXTTOKE.opval = version;
2215 force_next(WORD);
2216
2217 return s;
2218}
2219
2220/*
2221 * S_force_strict_version
2222 * Forces the next token to be a version number using strict syntax rules.
2223 */
2224
2225STATIC char *
2226S_force_strict_version(pTHX_ char *s)
2227{
2228 dVAR;
2229 OP *version = NULL;
2230#ifdef PERL_MAD
2231 I32 startoff = s - SvPVX(PL_linestr);
2232#endif
2233 const char *errstr = NULL;
2234
2235 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2236
2237 while (isSPACE(*s)) /* leading whitespace */
2238 s++;
2239
2240 if (is_STRICT_VERSION(s,&errstr)) {
2241 SV *ver = newSV(0);
2242 s = (char *)scan_version(s, ver, 0);
2243 version = newSVOP(OP_CONST, 0, ver);
2244 }
2245 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2246 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2247 {
2248 PL_bufptr = s;
2249 if (errstr)
2250 yyerror(errstr); /* version required */
2251 return s;
2252 }
2253
2254#ifdef PERL_MAD
2255 if (PL_madskills && !version) {
2256 sv_free(PL_nextwhite); /* let next token collect whitespace */
2257 PL_nextwhite = 0;
2258 s = SvPVX(PL_linestr) + startoff;
2259 }
2260#endif
2261 /* NOTE: The parser sees the package name and the VERSION swapped */
2262 start_force(PL_curforce);
2263 NEXTVAL_NEXTTOKE.opval = version;
2264 force_next(WORD);
2265
2266 return s;
2267}
2268
2269/*
2270 * S_tokeq
2271 * Tokenize a quoted string passed in as an SV. It finds the next
2272 * chunk, up to end of string or a backslash. It may make a new
2273 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2274 * turns \\ into \.
2275 */
2276
2277STATIC SV *
2278S_tokeq(pTHX_ SV *sv)
2279{
2280 dVAR;
2281 register char *s;
2282 register char *send;
2283 register char *d;
2284 STRLEN len = 0;
2285 SV *pv = sv;
2286
2287 PERL_ARGS_ASSERT_TOKEQ;
2288
2289 if (!SvLEN(sv))
2290 goto finish;
2291
2292 s = SvPV_force(sv, len);
2293 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2294 goto finish;
2295 send = s + len;
2296 /* This is relying on the SV being "well formed" with a trailing '\0' */
2297 while (s < send && !(*s == '\\' && s[1] == '\\'))
2298 s++;
2299 if (s == send)
2300 goto finish;
2301 d = s;
2302 if ( PL_hints & HINT_NEW_STRING ) {
2303 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2304 }
2305 while (s < send) {
2306 if (*s == '\\') {
2307 if (s + 1 < send && (s[1] == '\\'))
2308 s++; /* all that, just for this */
2309 }
2310 *d++ = *s++;
2311 }
2312 *d = '\0';
2313 SvCUR_set(sv, d - SvPVX_const(sv));
2314 finish:
2315 if ( PL_hints & HINT_NEW_STRING )
2316 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2317 return sv;
2318}
2319
2320/*
2321 * Now come three functions related to double-quote context,
2322 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2323 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2324 * interact with PL_lex_state, and create fake ( ... ) argument lists
2325 * to handle functions and concatenation.
2326 * They assume that whoever calls them will be setting up a fake
2327 * join call, because each subthing puts a ',' after it. This lets
2328 * "lower \luPpEr"
2329 * become
2330 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2331 *
2332 * (I'm not sure whether the spurious commas at the end of lcfirst's
2333 * arguments and join's arguments are created or not).
2334 */
2335
2336/*
2337 * S_sublex_start
2338 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2339 *
2340 * Pattern matching will set PL_lex_op to the pattern-matching op to
2341 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2342 *
2343 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2344 *
2345 * Everything else becomes a FUNC.
2346 *
2347 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2348 * had an OP_CONST or OP_READLINE). This just sets us up for a
2349 * call to S_sublex_push().
2350 */
2351
2352STATIC I32
2353S_sublex_start(pTHX)
2354{
2355 dVAR;
2356 register const I32 op_type = pl_yylval.ival;
2357
2358 if (op_type == OP_NULL) {
2359 pl_yylval.opval = PL_lex_op;
2360 PL_lex_op = NULL;
2361 return THING;
2362 }
2363 if (op_type == OP_CONST || op_type == OP_READLINE) {
2364 SV *sv = tokeq(PL_lex_stuff);
2365
2366 if (SvTYPE(sv) == SVt_PVIV) {
2367 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2368 STRLEN len;
2369 const char * const p = SvPV_const(sv, len);
2370 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2371 SvREFCNT_dec(sv);
2372 sv = nsv;
2373 }
2374 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2375 PL_lex_stuff = NULL;
2376 /* Allow <FH> // "foo" */
2377 if (op_type == OP_READLINE)
2378 PL_expect = XTERMORDORDOR;
2379 return THING;
2380 }
2381 else if (op_type == OP_BACKTICK && PL_lex_op) {
2382 /* readpipe() vas overriden */
2383 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2384 pl_yylval.opval = PL_lex_op;
2385 PL_lex_op = NULL;
2386 PL_lex_stuff = NULL;
2387 return THING;
2388 }
2389
2390 PL_sublex_info.super_state = PL_lex_state;
2391 PL_sublex_info.sub_inwhat = (U16)op_type;
2392 PL_sublex_info.sub_op = PL_lex_op;
2393 PL_lex_state = LEX_INTERPPUSH;
2394
2395 PL_expect = XTERM;
2396 if (PL_lex_op) {
2397 pl_yylval.opval = PL_lex_op;
2398 PL_lex_op = NULL;
2399 return PMFUNC;
2400 }
2401 else
2402 return FUNC;
2403}
2404
2405/*
2406 * S_sublex_push
2407 * Create a new scope to save the lexing state. The scope will be
2408 * ended in S_sublex_done. Returns a '(', starting the function arguments
2409 * to the uc, lc, etc. found before.
2410 * Sets PL_lex_state to LEX_INTERPCONCAT.
2411 */
2412
2413STATIC I32
2414S_sublex_push(pTHX)
2415{
2416 dVAR;
2417 ENTER;
2418
2419 PL_lex_state = PL_sublex_info.super_state;
2420 SAVEBOOL(PL_lex_dojoin);
2421 SAVEI32(PL_lex_brackets);
2422 SAVEI32(PL_lex_allbrackets);
2423 SAVEI8(PL_lex_fakeeof);
2424 SAVEI32(PL_lex_casemods);
2425 SAVEI32(PL_lex_starts);
2426 SAVEI8(PL_lex_state);
2427 SAVEVPTR(PL_lex_inpat);
2428 SAVEI16(PL_lex_inwhat);
2429 SAVECOPLINE(PL_curcop);
2430 SAVEPPTR(PL_bufptr);
2431 SAVEPPTR(PL_bufend);
2432 SAVEPPTR(PL_oldbufptr);
2433 SAVEPPTR(PL_oldoldbufptr);
2434 SAVEPPTR(PL_last_lop);
2435 SAVEPPTR(PL_last_uni);
2436 SAVEPPTR(PL_linestart);
2437 SAVESPTR(PL_linestr);
2438 SAVEGENERICPV(PL_lex_brackstack);
2439 SAVEGENERICPV(PL_lex_casestack);
2440
2441 PL_linestr = PL_lex_stuff;
2442 PL_lex_stuff = NULL;
2443
2444 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2445 = SvPVX(PL_linestr);
2446 PL_bufend += SvCUR(PL_linestr);
2447 PL_last_lop = PL_last_uni = NULL;
2448 SAVEFREESV(PL_linestr);
2449
2450 PL_lex_dojoin = FALSE;
2451 PL_lex_brackets = 0;
2452 PL_lex_allbrackets = 0;
2453 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2454 Newx(PL_lex_brackstack, 120, char);
2455 Newx(PL_lex_casestack, 12, char);
2456 PL_lex_casemods = 0;
2457 *PL_lex_casestack = '\0';
2458 PL_lex_starts = 0;
2459 PL_lex_state = LEX_INTERPCONCAT;
2460 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2461
2462 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2463 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2464 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2465 PL_lex_inpat = PL_sublex_info.sub_op;
2466 else
2467 PL_lex_inpat = NULL;
2468
2469 return '(';
2470}
2471
2472/*
2473 * S_sublex_done
2474 * Restores lexer state after a S_sublex_push.
2475 */
2476
2477STATIC I32
2478S_sublex_done(pTHX)
2479{
2480 dVAR;
2481 if (!PL_lex_starts++) {
2482 SV * const sv = newSVpvs("");
2483 if (SvUTF8(PL_linestr))
2484 SvUTF8_on(sv);
2485 PL_expect = XOPERATOR;
2486 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2487 return THING;
2488 }
2489
2490 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2491 PL_lex_state = LEX_INTERPCASEMOD;
2492 return yylex();
2493 }
2494
2495 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2496 assert(PL_lex_inwhat != OP_TRANSR);
2497 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2498 PL_linestr = PL_lex_repl;
2499 PL_lex_inpat = 0;
2500 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2501 PL_bufend += SvCUR(PL_linestr);
2502 PL_last_lop = PL_last_uni = NULL;
2503 SAVEFREESV(PL_linestr);
2504 PL_lex_dojoin = FALSE;
2505 PL_lex_brackets = 0;
2506 PL_lex_allbrackets = 0;
2507 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2508 PL_lex_casemods = 0;
2509 *PL_lex_casestack = '\0';
2510 PL_lex_starts = 0;
2511 if (SvEVALED(PL_lex_repl)) {
2512 PL_lex_state = LEX_INTERPNORMAL;
2513 PL_lex_starts++;
2514 /* we don't clear PL_lex_repl here, so that we can check later
2515 whether this is an evalled subst; that means we rely on the
2516 logic to ensure sublex_done() is called again only via the
2517 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2518 }
2519 else {
2520 PL_lex_state = LEX_INTERPCONCAT;
2521 PL_lex_repl = NULL;
2522 }
2523 return ',';
2524 }
2525 else {
2526#ifdef PERL_MAD
2527 if (PL_madskills) {
2528 if (PL_thiswhite) {
2529 if (!PL_endwhite)
2530 PL_endwhite = newSVpvs("");
2531 sv_catsv(PL_endwhite, PL_thiswhite);
2532 PL_thiswhite = 0;
2533 }
2534 if (PL_thistoken)
2535 sv_setpvs(PL_thistoken,"");
2536 else
2537 PL_realtokenstart = -1;
2538 }
2539#endif
2540 LEAVE;
2541 PL_bufend = SvPVX(PL_linestr);
2542 PL_bufend += SvCUR(PL_linestr);
2543 PL_expect = XOPERATOR;
2544 PL_sublex_info.sub_inwhat = 0;
2545 return ')';
2546 }
2547}
2548
2549/*
2550 scan_const
2551
2552 Extracts a pattern, double-quoted string, or transliteration. This
2553 is terrifying code.
2554
2555 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2556 processing a pattern (PL_lex_inpat is true), a transliteration
2557 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2558
2559 Returns a pointer to the character scanned up to. If this is
2560 advanced from the start pointer supplied (i.e. if anything was
2561 successfully parsed), will leave an OP for the substring scanned
2562 in pl_yylval. Caller must intuit reason for not parsing further
2563 by looking at the next characters herself.
2564
2565 In patterns:
2566 backslashes:
2567 constants: \N{NAME} only
2568 case and quoting: \U \Q \E
2569 stops on @ and $, but not for $ as tail anchor
2570
2571 In transliterations:
2572 characters are VERY literal, except for - not at the start or end
2573 of the string, which indicates a range. If the range is in bytes,
2574 scan_const expands the range to the full set of intermediate
2575 characters. If the range is in utf8, the hyphen is replaced with
2576 a certain range mark which will be handled by pmtrans() in op.c.
2577
2578 In double-quoted strings:
2579 backslashes:
2580 double-quoted style: \r and \n
2581 constants: \x31, etc.
2582 deprecated backrefs: \1 (in substitution replacements)
2583 case and quoting: \U \Q \E
2584 stops on @ and $
2585
2586 scan_const does *not* construct ops to handle interpolated strings.
2587 It stops processing as soon as it finds an embedded $ or @ variable
2588 and leaves it to the caller to work out what's going on.
2589
2590 embedded arrays (whether in pattern or not) could be:
2591 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2592
2593 $ in double-quoted strings must be the symbol of an embedded scalar.
2594
2595 $ in pattern could be $foo or could be tail anchor. Assumption:
2596 it's a tail anchor if $ is the last thing in the string, or if it's
2597 followed by one of "()| \r\n\t"
2598
2599 \1 (backreferences) are turned into $1
2600
2601 The structure of the code is
2602 while (there's a character to process) {
2603 handle transliteration ranges
2604 skip regexp comments /(?#comment)/ and codes /(?{code})/
2605 skip #-initiated comments in //x patterns
2606 check for embedded arrays
2607 check for embedded scalars
2608 if (backslash) {
2609 deprecate \1 in substitution replacements
2610 handle string-changing backslashes \l \U \Q \E, etc.
2611 switch (what was escaped) {
2612 handle \- in a transliteration (becomes a literal -)
2613 if a pattern and not \N{, go treat as regular character
2614 handle \132 (octal characters)
2615 handle \x15 and \x{1234} (hex characters)
2616 handle \N{name} (named characters, also \N{3,5} in a pattern)
2617 handle \cV (control characters)
2618 handle printf-style backslashes (\f, \r, \n, etc)
2619 } (end switch)
2620 continue
2621 } (end if backslash)
2622 handle regular character
2623 } (end while character to read)
2624
2625*/
2626
2627STATIC char *
2628S_scan_const(pTHX_ char *start)
2629{
2630 dVAR;
2631 register char *send = PL_bufend; /* end of the constant */
2632 SV *sv = newSV(send - start); /* sv for the constant. See
2633 note below on sizing. */
2634 register char *s = start; /* start of the constant */
2635 register char *d = SvPVX(sv); /* destination for copies */
2636 bool dorange = FALSE; /* are we in a translit range? */
2637 bool didrange = FALSE; /* did we just finish a range? */
2638 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2639 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2640 to be UTF8? But, this can
2641 show as true when the source
2642 isn't utf8, as for example
2643 when it is entirely composed
2644 of hex constants */
2645
2646 /* Note on sizing: The scanned constant is placed into sv, which is
2647 * initialized by newSV() assuming one byte of output for every byte of
2648 * input. This routine expects newSV() to allocate an extra byte for a
2649 * trailing NUL, which this routine will append if it gets to the end of
2650 * the input. There may be more bytes of input than output (eg., \N{LATIN
2651 * CAPITAL LETTER A}), or more output than input if the constant ends up
2652 * recoded to utf8, but each time a construct is found that might increase
2653 * the needed size, SvGROW() is called. Its size parameter each time is
2654 * based on the best guess estimate at the time, namely the length used so
2655 * far, plus the length the current construct will occupy, plus room for
2656 * the trailing NUL, plus one byte for every input byte still unscanned */
2657
2658 UV uv;
2659#ifdef EBCDIC
2660 UV literal_endpoint = 0;
2661 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2662#endif
2663
2664 PERL_ARGS_ASSERT_SCAN_CONST;
2665
2666 assert(PL_lex_inwhat != OP_TRANSR);
2667 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2668 /* If we are doing a trans and we know we want UTF8 set expectation */
2669 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2670 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2671 }
2672
2673
2674 while (s < send || dorange) {
2675
2676 /* get transliterations out of the way (they're most literal) */
2677 if (PL_lex_inwhat == OP_TRANS) {
2678 /* expand a range A-Z to the full set of characters. AIE! */
2679 if (dorange) {
2680 I32 i; /* current expanded character */
2681 I32 min; /* first character in range */
2682 I32 max; /* last character in range */
2683
2684#ifdef EBCDIC
2685 UV uvmax = 0;
2686#endif
2687
2688 if (has_utf8
2689#ifdef EBCDIC
2690 && !native_range
2691#endif
2692 ) {
2693 char * const c = (char*)utf8_hop((U8*)d, -1);
2694 char *e = d++;
2695 while (e-- > c)
2696 *(e + 1) = *e;
2697 *c = (char)UTF_TO_NATIVE(0xff);
2698 /* mark the range as done, and continue */
2699 dorange = FALSE;
2700 didrange = TRUE;
2701 continue;
2702 }
2703
2704 i = d - SvPVX_const(sv); /* remember current offset */
2705#ifdef EBCDIC
2706 SvGROW(sv,
2707 SvLEN(sv) + (has_utf8 ?
2708 (512 - UTF_CONTINUATION_MARK +
2709 UNISKIP(0x100))
2710 : 256));
2711 /* How many two-byte within 0..255: 128 in UTF-8,
2712 * 96 in UTF-8-mod. */
2713#else
2714 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2715#endif
2716 d = SvPVX(sv) + i; /* refresh d after realloc */
2717#ifdef EBCDIC
2718 if (has_utf8) {
2719 int j;
2720 for (j = 0; j <= 1; j++) {
2721 char * const c = (char*)utf8_hop((U8*)d, -1);
2722 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2723 if (j)
2724 min = (U8)uv;
2725 else if (uv < 256)
2726 max = (U8)uv;
2727 else {
2728 max = (U8)0xff; /* only to \xff */
2729 uvmax = uv; /* \x{100} to uvmax */
2730 }
2731 d = c; /* eat endpoint chars */
2732 }
2733 }
2734 else {
2735#endif
2736 d -= 2; /* eat the first char and the - */
2737 min = (U8)*d; /* first char in range */
2738 max = (U8)d[1]; /* last char in range */
2739#ifdef EBCDIC
2740 }
2741#endif
2742
2743 if (min > max) {
2744 Perl_croak(aTHX_
2745 "Invalid range \"%c-%c\" in transliteration operator",
2746 (char)min, (char)max);
2747 }
2748
2749#ifdef EBCDIC
2750 if (literal_endpoint == 2 &&
2751 ((isLOWER(min) && isLOWER(max)) ||
2752 (isUPPER(min) && isUPPER(max)))) {
2753 if (isLOWER(min)) {
2754 for (i = min; i <= max; i++)
2755 if (isLOWER(i))
2756 *d++ = NATIVE_TO_NEED(has_utf8,i);
2757 } else {
2758 for (i = min; i <= max; i++)
2759 if (isUPPER(i))
2760 *d++ = NATIVE_TO_NEED(has_utf8,i);
2761 }
2762 }
2763 else
2764#endif
2765 for (i = min; i <= max; i++)
2766#ifdef EBCDIC
2767 if (has_utf8) {
2768 const U8 ch = (U8)NATIVE_TO_UTF(i);
2769 if (UNI_IS_INVARIANT(ch))
2770 *d++ = (U8)i;
2771 else {
2772 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2773 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2774 }
2775 }
2776 else
2777#endif
2778 *d++ = (char)i;
2779
2780#ifdef EBCDIC
2781 if (uvmax) {
2782 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2783 if (uvmax > 0x101)
2784 *d++ = (char)UTF_TO_NATIVE(0xff);
2785 if (uvmax > 0x100)
2786 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2787 }
2788#endif
2789
2790 /* mark the range as done, and continue */
2791 dorange = FALSE;
2792 didrange = TRUE;
2793#ifdef EBCDIC
2794 literal_endpoint = 0;
2795#endif
2796 continue;
2797 }
2798
2799 /* range begins (ignore - as first or last char) */
2800 else if (*s == '-' && s+1 < send && s != start) {
2801 if (didrange) {
2802 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2803 }
2804 if (has_utf8
2805#ifdef EBCDIC
2806 && !native_range
2807#endif
2808 ) {
2809 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2810 s++;
2811 continue;
2812 }
2813 dorange = TRUE;
2814 s++;
2815 }
2816 else {
2817 didrange = FALSE;
2818#ifdef EBCDIC
2819 literal_endpoint = 0;
2820 native_range = TRUE;
2821#endif
2822 }
2823 }
2824
2825 /* if we get here, we're not doing a transliteration */
2826
2827 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2828 except for the last char, which will be done separately. */
2829 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2830 if (s[2] == '#') {
2831 while (s+1 < send && *s != ')')
2832 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2833 }
2834 else if (s[2] == '{' /* This should match regcomp.c */
2835 || (s[2] == '?' && s[3] == '{'))
2836 {
2837 I32 count = 1;
2838 char *regparse = s + (s[2] == '{' ? 3 : 4);
2839 char c;
2840
2841 while (count && (c = *regparse)) {
2842 if (c == '\\' && regparse[1])
2843 regparse++;
2844 else if (c == '{')
2845 count++;
2846 else if (c == '}')
2847 count--;
2848 regparse++;
2849 }
2850 if (*regparse != ')')
2851 regparse--; /* Leave one char for continuation. */
2852 while (s < regparse)
2853 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2854 }
2855 }
2856
2857 /* likewise skip #-initiated comments in //x patterns */
2858 else if (*s == '#' && PL_lex_inpat &&
2859 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2860 while (s+1 < send && *s != '\n')
2861 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2862 }
2863
2864 /* check for embedded arrays
2865 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2866 */
2867 else if (*s == '@' && s[1]) {
2868 if (isALNUM_lazy_if(s+1,UTF))
2869 break;
2870 if (strchr(":'{$", s[1]))
2871 break;
2872 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2873 break; /* in regexp, neither @+ nor @- are interpolated */
2874 }
2875
2876 /* check for embedded scalars. only stop if we're sure it's a
2877 variable.
2878 */
2879 else if (*s == '$') {
2880 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2881 break;
2882 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2883 if (s[1] == '\\') {
2884 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2885 "Possible unintended interpolation of $\\ in regex");
2886 }
2887 break; /* in regexp, $ might be tail anchor */
2888 }
2889 }
2890
2891 /* End of else if chain - OP_TRANS rejoin rest */
2892
2893 /* backslashes */
2894 if (*s == '\\' && s+1 < send) {
2895 char* e; /* Can be used for ending '}', etc. */
2896
2897 s++;
2898
2899 /* warn on \1 - \9 in substitution replacements, but note that \11
2900 * is an octal; and \19 is \1 followed by '9' */
2901 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2902 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2903 {
2904 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2905 *--s = '$';
2906 break;
2907 }
2908
2909 /* string-change backslash escapes */
2910 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2911 --s;
2912 break;
2913 }
2914 /* In a pattern, process \N, but skip any other backslash escapes.
2915 * This is because we don't want to translate an escape sequence
2916 * into a meta symbol and have the regex compiler use the meta
2917 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2918 * in spite of this, we do have to process \N here while the proper
2919 * charnames handler is in scope. See bugs #56444 and #62056.
2920 * There is a complication because \N in a pattern may also stand
2921 * for 'match a non-nl', and not mean a charname, in which case its
2922 * processing should be deferred to the regex compiler. To be a
2923 * charname it must be followed immediately by a '{', and not look
2924 * like \N followed by a curly quantifier, i.e., not something like
2925 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2926 * quantifier */
2927 else if (PL_lex_inpat
2928 && (*s != 'N'
2929 || s[1] != '{'
2930 || regcurly(s + 1)))
2931 {
2932 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2933 goto default_action;
2934 }
2935
2936 switch (*s) {
2937
2938 /* quoted - in transliterations */
2939 case '-':
2940 if (PL_lex_inwhat == OP_TRANS) {
2941 *d++ = *s++;
2942 continue;
2943 }
2944 /* FALL THROUGH */
2945 default:
2946 {
2947 if ((isALPHA(*s) || isDIGIT(*s)))
2948 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2949 "Unrecognized escape \\%c passed through",
2950 *s);
2951 /* default action is to copy the quoted character */
2952 goto default_action;
2953 }
2954
2955 /* eg. \132 indicates the octal constant 0132 */
2956 case '0': case '1': case '2': case '3':
2957 case '4': case '5': case '6': case '7':
2958 {
2959 I32 flags = 0;
2960 STRLEN len = 3;
2961 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2962 s += len;
2963 }
2964 goto NUM_ESCAPE_INSERT;
2965
2966 /* eg. \o{24} indicates the octal constant \024 */
2967 case 'o':
2968 {
2969 STRLEN len;
2970 const char* error;
2971
2972 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2973 s += len;
2974 if (! valid) {
2975 yyerror(error);
2976 continue;
2977 }
2978 goto NUM_ESCAPE_INSERT;
2979 }
2980
2981 /* eg. \x24 indicates the hex constant 0x24 */
2982 case 'x':
2983 ++s;
2984 if (*s == '{') {
2985 char* const e = strchr(s, '}');
2986 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2987 PERL_SCAN_DISALLOW_PREFIX;
2988 STRLEN len;
2989
2990 ++s;
2991 if (!e) {
2992 yyerror("Missing right brace on \\x{}");
2993 continue;
2994 }
2995 len = e - s;
2996 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2997 s = e + 1;
2998 }
2999 else {
3000 {
3001 STRLEN len = 2;
3002 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3003 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3004 s += len;
3005 }
3006 }
3007
3008 NUM_ESCAPE_INSERT:
3009 /* Insert oct or hex escaped character. There will always be
3010 * enough room in sv since such escapes will be longer than any
3011 * UTF-8 sequence they can end up as, except if they force us
3012 * to recode the rest of the string into utf8 */
3013
3014 /* Here uv is the ordinal of the next character being added in
3015 * unicode (converted from native). */
3016 if (!UNI_IS_INVARIANT(uv)) {
3017 if (!has_utf8 && uv > 255) {
3018 /* Might need to recode whatever we have accumulated so
3019 * far if it contains any chars variant in utf8 or
3020 * utf-ebcdic. */
3021
3022 SvCUR_set(sv, d - SvPVX_const(sv));
3023 SvPOK_on(sv);
3024 *d = '\0';
3025 /* See Note on sizing above. */
3026 sv_utf8_upgrade_flags_grow(sv,
3027 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3028 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3029 d = SvPVX(sv) + SvCUR(sv);
3030 has_utf8 = TRUE;
3031 }
3032
3033 if (has_utf8) {
3034 d = (char*)uvuni_to_utf8((U8*)d, uv);
3035 if (PL_lex_inwhat == OP_TRANS &&
3036 PL_sublex_info.sub_op) {
3037 PL_sublex_info.sub_op->op_private |=
3038 (PL_lex_repl ? OPpTRANS_FROM_UTF
3039 : OPpTRANS_TO_UTF);
3040 }
3041#ifdef EBCDIC
3042 if (uv > 255 && !dorange)
3043 native_range = FALSE;
3044#endif
3045 }
3046 else {
3047 *d++ = (char)uv;
3048 }
3049 }
3050 else {
3051 *d++ = (char) uv;
3052 }
3053 continue;
3054
3055 case 'N':
3056 /* In a non-pattern \N must be a named character, like \N{LATIN
3057 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3058 * mean to match a non-newline. For non-patterns, named
3059 * characters are converted to their string equivalents. In
3060 * patterns, named characters are not converted to their
3061 * ultimate forms for the same reasons that other escapes
3062 * aren't. Instead, they are converted to the \N{U+...} form
3063 * to get the value from the charnames that is in effect right
3064 * now, while preserving the fact that it was a named character
3065 * so that the regex compiler knows this */
3066
3067 /* This section of code doesn't generally use the
3068 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3069 * a close examination of this macro and determined it is a
3070 * no-op except on utfebcdic variant characters. Every
3071 * character generated by this that would normally need to be
3072 * enclosed by this macro is invariant, so the macro is not
3073 * needed, and would complicate use of copy(). XXX There are
3074 * other parts of this file where the macro is used
3075 * inconsistently, but are saved by it being a no-op */
3076
3077 /* The structure of this section of code (besides checking for
3078 * errors and upgrading to utf8) is:
3079 * Further disambiguate between the two meanings of \N, and if
3080 * not a charname, go process it elsewhere
3081 * If of form \N{U+...}, pass it through if a pattern;
3082 * otherwise convert to utf8
3083 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3084 * pattern; otherwise convert to utf8 */
3085
3086 /* Here, s points to the 'N'; the test below is guaranteed to
3087 * succeed if we are being called on a pattern as we already
3088 * know from a test above that the next character is a '{'.
3089 * On a non-pattern \N must mean 'named sequence, which
3090 * requires braces */
3091 s++;
3092 if (*s != '{') {
3093 yyerror("Missing braces on \\N{}");
3094 continue;
3095 }
3096 s++;
3097
3098 /* If there is no matching '}', it is an error. */
3099 if (! (e = strchr(s, '}'))) {
3100 if (! PL_lex_inpat) {
3101 yyerror("Missing right brace on \\N{}");
3102 } else {
3103 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3104 }
3105 continue;
3106 }
3107
3108 /* Here it looks like a named character */
3109
3110 if (PL_lex_inpat) {
3111
3112 /* XXX This block is temporary code. \N{} implies that the
3113 * pattern is to have Unicode semantics, and therefore
3114 * currently has to be encoded in utf8. By putting it in
3115 * utf8 now, we save a whole pass in the regular expression
3116 * compiler. Once that code is changed so Unicode
3117 * semantics doesn't necessarily have to be in utf8, this
3118 * block should be removed. However, the code that parses
3119 * the output of this would have to be changed to not
3120 * necessarily expect utf8 */
3121 if (!has_utf8) {
3122 SvCUR_set(sv, d - SvPVX_const(sv));
3123 SvPOK_on(sv);
3124 *d = '\0';
3125 /* See Note on sizing above. */
3126 sv_utf8_upgrade_flags_grow(sv,
3127 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3128 /* 5 = '\N{' + cur char + NUL */
3129 (STRLEN)(send - s) + 5);
3130 d = SvPVX(sv) + SvCUR(sv);
3131 has_utf8 = TRUE;
3132 }
3133 }
3134
3135 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3136 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3137 | PERL_SCAN_DISALLOW_PREFIX;
3138 STRLEN len;
3139
3140 /* For \N{U+...}, the '...' is a unicode value even on
3141 * EBCDIC machines */
3142 s += 2; /* Skip to next char after the 'U+' */
3143 len = e - s;
3144 uv = grok_hex(s, &len, &flags, NULL);
3145 if (len == 0 || len != (STRLEN)(e - s)) {
3146 yyerror("Invalid hexadecimal number in \\N{U+...}");
3147 s = e + 1;
3148 continue;
3149 }
3150
3151 if (PL_lex_inpat) {
3152
3153 /* On non-EBCDIC platforms, pass through to the regex
3154 * compiler unchanged. The reason we evaluated the
3155 * number above is to make sure there wasn't a syntax
3156 * error. But on EBCDIC we convert to native so
3157 * downstream code can continue to assume it's native
3158 */
3159 s -= 5; /* Include the '\N{U+' */
3160#ifdef EBCDIC
3161 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3162 and the \0 */
3163 "\\N{U+%X}",
3164 (unsigned int) UNI_TO_NATIVE(uv));
3165#else
3166 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3167 d += e - s + 1;
3168#endif
3169 }
3170 else { /* Not a pattern: convert the hex to string */
3171
3172 /* If destination is not in utf8, unconditionally
3173 * recode it to be so. This is because \N{} implies
3174 * Unicode semantics, and scalars have to be in utf8
3175 * to guarantee those semantics */
3176 if (! has_utf8) {
3177 SvCUR_set(sv, d - SvPVX_const(sv));
3178 SvPOK_on(sv);
3179 *d = '\0';
3180 /* See Note on sizing above. */
3181 sv_utf8_upgrade_flags_grow(
3182 sv,
3183 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3184 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3185 d = SvPVX(sv) + SvCUR(sv);
3186 has_utf8 = TRUE;
3187 }
3188
3189 /* Add the string to the output */
3190 if (UNI_IS_INVARIANT(uv)) {
3191 *d++ = (char) uv;
3192 }
3193 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3194 }
3195 }
3196 else { /* Here is \N{NAME} but not \N{U+...}. */
3197
3198 SV *res; /* result from charnames */
3199 const char *str; /* the string in 'res' */
3200 STRLEN len; /* its length */
3201
3202 /* Get the value for NAME */
3203 res = newSVpvn(s, e - s);
3204 res = new_constant( NULL, 0, "charnames",
3205 /* includes all of: \N{...} */
3206 res, NULL, s - 3, e - s + 4 );
3207
3208 /* Most likely res will be in utf8 already since the
3209 * standard charnames uses pack U, but a custom translator
3210 * can leave it otherwise, so make sure. XXX This can be
3211 * revisited to not have charnames use utf8 for characters
3212 * that don't need it when regexes don't have to be in utf8
3213 * for Unicode semantics. If doing so, remember EBCDIC */
3214 sv_utf8_upgrade(res);
3215 str = SvPV_const(res, len);
3216
3217 /* Don't accept malformed input */
3218 if (! is_utf8_string((U8 *) str, len)) {
3219 yyerror("Malformed UTF-8 returned by \\N");
3220 }
3221 else if (PL_lex_inpat) {
3222
3223 if (! len) { /* The name resolved to an empty string */
3224 Copy("\\N{}", d, 4, char);
3225 d += 4;
3226 }
3227 else {
3228 /* In order to not lose information for the regex
3229 * compiler, pass the result in the specially made
3230 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3231 * the code points in hex of each character
3232 * returned by charnames */
3233
3234 const char *str_end = str + len;
3235 STRLEN char_length; /* cur char's byte length */
3236 STRLEN output_length; /* and the number of bytes
3237 after this is translated
3238 into hex digits */
3239 const STRLEN off = d - SvPVX_const(sv);
3240
3241 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3242 * max('U+', '.'); and 1 for NUL */
3243 char hex_string[2 * UTF8_MAXBYTES + 5];
3244
3245 /* Get the first character of the result. */
3246 U32 uv = utf8n_to_uvuni((U8 *) str,
3247 len,
3248 &char_length,
3249 UTF8_ALLOW_ANYUV);
3250
3251 /* The call to is_utf8_string() above hopefully
3252 * guarantees that there won't be an error. But
3253 * it's easy here to make sure. The function just
3254 * above warns and returns 0 if invalid utf8, but
3255 * it can also return 0 if the input is validly a
3256 * NUL. Disambiguate */
3257 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3258 uv = UNICODE_REPLACEMENT;
3259 }
3260
3261 /* Convert first code point to hex, including the
3262 * boiler plate before it. For all these, we
3263 * convert to native format so that downstream code
3264 * can continue to assume the input is native */
3265 output_length =
3266 my_snprintf(hex_string, sizeof(hex_string),
3267 "\\N{U+%X",
3268 (unsigned int) UNI_TO_NATIVE(uv));
3269
3270 /* Make sure there is enough space to hold it */
3271 d = off + SvGROW(sv, off
3272 + output_length
3273 + (STRLEN)(send - e)
3274 + 2); /* '}' + NUL */
3275 /* And output it */
3276 Copy(hex_string, d, output_length, char);
3277 d += output_length;
3278
3279 /* For each subsequent character, append dot and
3280 * its ordinal in hex */
3281 while ((str += char_length) < str_end) {
3282 const STRLEN off = d - SvPVX_const(sv);
3283 U32 uv = utf8n_to_uvuni((U8 *) str,
3284 str_end - str,
3285 &char_length,
3286 UTF8_ALLOW_ANYUV);
3287 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3288 uv = UNICODE_REPLACEMENT;
3289 }
3290
3291 output_length =
3292 my_snprintf(hex_string, sizeof(hex_string),
3293 ".%X",
3294 (unsigned int) UNI_TO_NATIVE(uv));
3295
3296 d = off + SvGROW(sv, off
3297 + output_length
3298 + (STRLEN)(send - e)
3299 + 2); /* '}' + NUL */
3300 Copy(hex_string, d, output_length, char);
3301 d += output_length;
3302 }
3303
3304 *d++ = '}'; /* Done. Add the trailing brace */
3305 }
3306 }
3307 else { /* Here, not in a pattern. Convert the name to a
3308 * string. */
3309
3310 /* If destination is not in utf8, unconditionally
3311 * recode it to be so. This is because \N{} implies
3312 * Unicode semantics, and scalars have to be in utf8
3313 * to guarantee those semantics */
3314 if (! has_utf8) {
3315 SvCUR_set(sv, d - SvPVX_const(sv));
3316 SvPOK_on(sv);
3317 *d = '\0';
3318 /* See Note on sizing above. */
3319 sv_utf8_upgrade_flags_grow(sv,
3320 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3321 len + (STRLEN)(send - s) + 1);
3322 d = SvPVX(sv) + SvCUR(sv);
3323 has_utf8 = TRUE;
3324 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3325
3326 /* See Note on sizing above. (NOTE: SvCUR() is not
3327 * set correctly here). */
3328 const STRLEN off = d - SvPVX_const(sv);
3329 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3330 }
3331 Copy(str, d, len, char);
3332 d += len;
3333 }
3334 SvREFCNT_dec(res);
3335
3336 /* Deprecate non-approved name syntax */
3337 if (ckWARN_d(WARN_DEPRECATED)) {
3338 bool problematic = FALSE;
3339 char* i = s;
3340
3341 /* For non-ut8 input, look to see that the first
3342 * character is an alpha, then loop through the rest
3343 * checking that each is a continuation */
3344 if (! this_utf8) {
3345 if (! isALPHAU(*i)) problematic = TRUE;
3346 else for (i = s + 1; i < e; i++) {
3347 if (isCHARNAME_CONT(*i)) continue;
3348 problematic = TRUE;
3349 break;
3350 }
3351 }
3352 else {
3353 /* Similarly for utf8. For invariants can check
3354 * directly. We accept anything above the latin1
3355 * range because it is immaterial to Perl if it is
3356 * correct or not, and is expensive to check. But
3357 * it is fairly easy in the latin1 range to convert
3358 * the variants into a single character and check
3359 * those */
3360 if (UTF8_IS_INVARIANT(*i)) {
3361 if (! isALPHAU(*i)) problematic = TRUE;
3362 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3363 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3364 *(i+1)))))
3365 {
3366 problematic = TRUE;
3367 }
3368 }
3369 if (! problematic) for (i = s + UTF8SKIP(s);
3370 i < e;
3371 i+= UTF8SKIP(i))
3372 {
3373 if (UTF8_IS_INVARIANT(*i)) {
3374 if (isCHARNAME_CONT(*i)) continue;
3375 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3376 continue;
3377 } else if (isCHARNAME_CONT(
3378 UNI_TO_NATIVE(
3379 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3380 {
3381 continue;
3382 }
3383 problematic = TRUE;
3384 break;
3385 }
3386 }
3387 if (problematic) {
3388 /* The e-i passed to the final %.*s makes sure that
3389 * should the trailing NUL be missing that this
3390 * print won't run off the end of the string */
3391 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3392 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3393 (int)(i - s + 1), s, (int)(e - i), i + 1);
3394 }
3395 }
3396 } /* End \N{NAME} */
3397#ifdef EBCDIC
3398 if (!dorange)
3399 native_range = FALSE; /* \N{} is defined to be Unicode */
3400#endif
3401 s = e + 1; /* Point to just after the '}' */
3402 continue;
3403
3404 /* \c is a control character */
3405 case 'c':
3406 s++;
3407 if (s < send) {
3408 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3409 }
3410 else {
3411 yyerror("Missing control char name in \\c");
3412 }
3413 continue;
3414
3415 /* printf-style backslashes, formfeeds, newlines, etc */
3416 case 'b':
3417 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3418 break;
3419 case 'n':
3420 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3421 break;
3422 case 'r':
3423 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3424 break;
3425 case 'f':
3426 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3427 break;
3428 case 't':
3429 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3430 break;
3431 case 'e':
3432 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3433 break;
3434 case 'a':
3435 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3436 break;
3437 } /* end switch */
3438
3439 s++;
3440 continue;
3441 } /* end if (backslash) */
3442#ifdef EBCDIC
3443 else
3444 literal_endpoint++;
3445#endif
3446
3447 default_action:
3448 /* If we started with encoded form, or already know we want it,
3449 then encode the next character */
3450 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3451 STRLEN len = 1;
3452
3453
3454 /* One might think that it is wasted effort in the case of the
3455 * source being utf8 (this_utf8 == TRUE) to take the next character
3456 * in the source, convert it to an unsigned value, and then convert
3457 * it back again. But the source has not been validated here. The
3458 * routine that does the conversion checks for errors like
3459 * malformed utf8 */
3460
3461 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3462 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3463 if (!has_utf8) {
3464 SvCUR_set(sv, d - SvPVX_const(sv));
3465 SvPOK_on(sv);
3466 *d = '\0';
3467 /* See Note on sizing above. */
3468 sv_utf8_upgrade_flags_grow(sv,
3469 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3470 need + (STRLEN)(send - s) + 1);
3471 d = SvPVX(sv) + SvCUR(sv);
3472 has_utf8 = TRUE;
3473 } else if (need > len) {
3474 /* encoded value larger than old, may need extra space (NOTE:
3475 * SvCUR() is not set correctly here). See Note on sizing
3476 * above. */
3477 const STRLEN off = d - SvPVX_const(sv);
3478 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3479 }
3480 s += len;
3481
3482 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3483#ifdef EBCDIC
3484 if (uv > 255 && !dorange)
3485 native_range = FALSE;
3486#endif
3487 }
3488 else {
3489 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3490 }
3491 } /* while loop to process each character */
3492
3493 /* terminate the string and set up the sv */
3494 *d = '\0';
3495 SvCUR_set(sv, d - SvPVX_const(sv));
3496 if (SvCUR(sv) >= SvLEN(sv))
3497 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3498
3499 SvPOK_on(sv);
3500 if (PL_encoding && !has_utf8) {
3501 sv_recode_to_utf8(sv, PL_encoding);
3502 if (SvUTF8(sv))
3503 has_utf8 = TRUE;
3504 }
3505 if (has_utf8) {
3506 SvUTF8_on(sv);
3507 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3508 PL_sublex_info.sub_op->op_private |=
3509 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3510 }
3511 }
3512
3513 /* shrink the sv if we allocated more than we used */
3514 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3515 SvPV_shrink_to_cur(sv);
3516 }
3517
3518 /* return the substring (via pl_yylval) only if we parsed anything */
3519 if (s > PL_bufptr) {
3520 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3521 const char *const key = PL_lex_inpat ? "qr" : "q";
3522 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3523 const char *type;
3524 STRLEN typelen;
3525
3526 if (PL_lex_inwhat == OP_TRANS) {
3527 type = "tr";
3528 typelen = 2;
3529 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3530 type = "s";
3531 typelen = 1;
3532 } else {
3533 type = "qq";
3534 typelen = 2;
3535 }
3536
3537 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3538 type, typelen);
3539 }
3540 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3541 } else
3542 SvREFCNT_dec(sv);
3543 return s;
3544}
3545
3546/* S_intuit_more
3547 * Returns TRUE if there's more to the expression (e.g., a subscript),
3548 * FALSE otherwise.
3549 *
3550 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3551 *
3552 * ->[ and ->{ return TRUE
3553 * { and [ outside a pattern are always subscripts, so return TRUE
3554 * if we're outside a pattern and it's not { or [, then return FALSE
3555 * if we're in a pattern and the first char is a {
3556 * {4,5} (any digits around the comma) returns FALSE
3557 * if we're in a pattern and the first char is a [
3558 * [] returns FALSE
3559 * [SOMETHING] has a funky algorithm to decide whether it's a
3560 * character class or not. It has to deal with things like
3561 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3562 * anything else returns TRUE
3563 */
3564
3565/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3566
3567STATIC int
3568S_intuit_more(pTHX_ register char *s)
3569{
3570 dVAR;
3571
3572 PERL_ARGS_ASSERT_INTUIT_MORE;
3573
3574 if (PL_lex_brackets)
3575 return TRUE;
3576 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3577 return TRUE;
3578 if (*s != '{' && *s != '[')
3579 return FALSE;
3580 if (!PL_lex_inpat)
3581 return TRUE;
3582
3583 /* In a pattern, so maybe we have {n,m}. */
3584 if (*s == '{') {
3585 if (regcurly(s)) {
3586 return FALSE;
3587 }
3588 return TRUE;
3589 }
3590
3591 /* On the other hand, maybe we have a character class */
3592
3593 s++;
3594 if (*s == ']' || *s == '^')
3595 return FALSE;
3596 else {
3597 /* this is terrifying, and it works */
3598 int weight = 2; /* let's weigh the evidence */
3599 char seen[256];
3600 unsigned char un_char = 255, last_un_char;
3601 const char * const send = strchr(s,']');
3602 char tmpbuf[sizeof PL_tokenbuf * 4];
3603
3604 if (!send) /* has to be an expression */
3605 return TRUE;
3606
3607 Zero(seen,256,char);
3608 if (*s == '$')
3609 weight -= 3;
3610 else if (isDIGIT(*s)) {
3611 if (s[1] != ']') {
3612 if (isDIGIT(s[1]) && s[2] == ']')
3613 weight -= 10;
3614 }
3615 else
3616 weight -= 100;
3617 }
3618 for (; s < send; s++) {
3619 last_un_char = un_char;
3620 un_char = (unsigned char)*s;
3621 switch (*s) {
3622 case '@':
3623 case '&':
3624 case '$':
3625 weight -= seen[un_char] * 10;
3626 if (isALNUM_lazy_if(s+1,UTF)) {
3627 int len;
3628 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3629 len = (int)strlen(tmpbuf);
3630 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3631 weight -= 100;
3632 else
3633 weight -= 10;
3634 }
3635 else if (*s == '$' && s[1] &&
3636 strchr("[#!%*<>()-=",s[1])) {
3637 if (/*{*/ strchr("])} =",s[2]))
3638 weight -= 10;
3639 else
3640 weight -= 1;
3641 }
3642 break;
3643 case '\\':
3644 un_char = 254;
3645 if (s[1]) {
3646 if (strchr("wds]",s[1]))
3647 weight += 100;
3648 else if (seen[(U8)'\''] || seen[(U8)'"'])
3649 weight += 1;
3650 else if (strchr("rnftbxcav",s[1]))
3651 weight += 40;
3652 else if (isDIGIT(s[1])) {
3653 weight += 40;
3654 while (s[1] && isDIGIT(s[1]))
3655 s++;
3656 }
3657 }
3658 else
3659 weight += 100;
3660 break;
3661 case '-':
3662 if (s[1] == '\\')
3663 weight += 50;
3664 if (strchr("aA01! ",last_un_char))
3665 weight += 30;
3666 if (strchr("zZ79~",s[1]))
3667 weight += 30;
3668 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3669 weight -= 5; /* cope with negative subscript */
3670 break;
3671 default:
3672 if (!isALNUM(last_un_char)
3673 && !(last_un_char == '$' || last_un_char == '@'
3674 || last_un_char == '&')
3675 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3676 char *d = tmpbuf;
3677 while (isALPHA(*s))
3678 *d++ = *s++;
3679 *d = '\0';
3680 if (keyword(tmpbuf, d - tmpbuf, 0))
3681 weight -= 150;
3682 }
3683 if (un_char == last_un_char + 1)
3684 weight += 5;
3685 weight -= seen[un_char];
3686 break;
3687 }
3688 seen[un_char]++;
3689 }
3690 if (weight >= 0) /* probably a character class */
3691 return FALSE;
3692 }
3693
3694 return TRUE;
3695}
3696
3697/*
3698 * S_intuit_method
3699 *
3700 * Does all the checking to disambiguate
3701 * foo bar
3702 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3703 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3704 *
3705 * First argument is the stuff after the first token, e.g. "bar".
3706 *
3707 * Not a method if bar is a filehandle.
3708 * Not a method if foo is a subroutine prototyped to take a filehandle.
3709 * Not a method if it's really "Foo $bar"
3710 * Method if it's "foo $bar"
3711 * Not a method if it's really "print foo $bar"
3712 * Method if it's really "foo package::" (interpreted as package->foo)
3713 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3714 * Not a method if bar is a filehandle or package, but is quoted with
3715 * =>
3716 */
3717
3718STATIC int
3719S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3720{
3721 dVAR;
3722 char *s = start + (*start == '$');
3723 char tmpbuf[sizeof PL_tokenbuf];
3724 STRLEN len;
3725 GV* indirgv;
3726#ifdef PERL_MAD
3727 int soff;
3728#endif
3729
3730 PERL_ARGS_ASSERT_INTUIT_METHOD;
3731
3732 if (gv) {
3733 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3734 return 0;
3735 if (cv) {
3736 if (SvPOK(cv)) {
3737 const char *proto = SvPVX_const(cv);
3738 if (proto) {
3739 if (*proto == ';')
3740 proto++;
3741 if (*proto == '*')
3742 return 0;
3743 }
3744 }
3745 } else
3746 gv = NULL;
3747 }
3748 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3749 /* start is the beginning of the possible filehandle/object,
3750 * and s is the end of it
3751 * tmpbuf is a copy of it
3752 */
3753
3754 if (*start == '$') {
3755 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3756 isUPPER(*PL_tokenbuf))
3757 return 0;
3758#ifdef PERL_MAD
3759 len = start - SvPVX(PL_linestr);
3760#endif
3761 s = PEEKSPACE(s);
3762#ifdef PERL_MAD
3763 start = SvPVX(PL_linestr) + len;
3764#endif
3765 PL_bufptr = start;
3766 PL_expect = XREF;
3767 return *s == '(' ? FUNCMETH : METHOD;
3768 }
3769 if (!keyword(tmpbuf, len, 0)) {
3770 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3771 len -= 2;
3772 tmpbuf[len] = '\0';
3773#ifdef PERL_MAD
3774 soff = s - SvPVX(PL_linestr);
3775#endif
3776 goto bare_package;
3777 }
3778 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3779 if (indirgv && GvCVu(indirgv))
3780 return 0;
3781 /* filehandle or package name makes it a method */
3782 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3783#ifdef PERL_MAD
3784 soff = s - SvPVX(PL_linestr);
3785#endif
3786 s = PEEKSPACE(s);
3787 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3788 return 0; /* no assumptions -- "=>" quotes bareword */
3789 bare_package:
3790 start_force(PL_curforce);
3791 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3792 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3793 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3794 if (PL_madskills)
3795 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3796 PL_expect = XTERM;
3797 force_next(WORD);
3798 PL_bufptr = s;
3799#ifdef PERL_MAD
3800 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3801#endif
3802 return *s == '(' ? FUNCMETH : METHOD;
3803 }
3804 }
3805 return 0;
3806}
3807
3808/* Encoded script support. filter_add() effectively inserts a
3809 * 'pre-processing' function into the current source input stream.
3810 * Note that the filter function only applies to the current source file
3811 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3812 *
3813 * The datasv parameter (which may be NULL) can be used to pass
3814 * private data to this instance of the filter. The filter function
3815 * can recover the SV using the FILTER_DATA macro and use it to
3816 * store private buffers and state information.
3817 *
3818 * The supplied datasv parameter is upgraded to a PVIO type
3819 * and the IoDIRP/IoANY field is used to store the function pointer,
3820 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3821 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3822 * private use must be set using malloc'd pointers.
3823 */
3824
3825SV *
3826Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3827{
3828 dVAR;
3829 if (!funcp)
3830 return NULL;
3831
3832 if (!PL_parser)
3833 return NULL;
3834
3835 if (!PL_rsfp_filters)
3836 PL_rsfp_filters = newAV();
3837 if (!datasv)
3838 datasv = newSV(0);
3839 SvUPGRADE(datasv, SVt_PVIO);
3840 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3841 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3842 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3843 FPTR2DPTR(void *, IoANY(datasv)),
3844 SvPV_nolen(datasv)));
3845 av_unshift(PL_rsfp_filters, 1);
3846 av_store(PL_rsfp_filters, 0, datasv) ;
3847 return(datasv);
3848}
3849
3850
3851/* Delete most recently added instance of this filter function. */
3852void
3853Perl_filter_del(pTHX_ filter_t funcp)
3854{
3855 dVAR;
3856 SV *datasv;
3857
3858 PERL_ARGS_ASSERT_FILTER_DEL;
3859
3860#ifdef DEBUGGING
3861 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3862 FPTR2DPTR(void*, funcp)));
3863#endif
3864 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3865 return;
3866 /* if filter is on top of stack (usual case) just pop it off */
3867 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3868 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3869 sv_free(av_pop(PL_rsfp_filters));
3870
3871 return;
3872 }
3873 /* we need to search for the correct entry and clear it */
3874 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3875}
3876
3877
3878/* Invoke the idxth filter function for the current rsfp. */
3879/* maxlen 0 = read one text line */
3880I32
3881Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3882{
3883 dVAR;
3884 filter_t funcp;
3885 SV *datasv = NULL;
3886 /* This API is bad. It should have been using unsigned int for maxlen.
3887 Not sure if we want to change the API, but if not we should sanity
3888 check the value here. */
3889 const unsigned int correct_length
3890 = maxlen < 0 ?
3891#ifdef PERL_MICRO
3892 0x7FFFFFFF
3893#else
3894 INT_MAX
3895#endif
3896 : maxlen;
3897
3898 PERL_ARGS_ASSERT_FILTER_READ;
3899
3900 if (!PL_parser || !PL_rsfp_filters)
3901 return -1;
3902 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3903 /* Provide a default input filter to make life easy. */
3904 /* Note that we append to the line. This is handy. */
3905 DEBUG_P(PerlIO_printf(Perl_debug_log,
3906 "filter_read %d: from rsfp\n", idx));
3907 if (correct_length) {
3908 /* Want a block */
3909 int len ;
3910 const int old_len = SvCUR(buf_sv);
3911
3912 /* ensure buf_sv is large enough */
3913 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3914 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3915 correct_length)) <= 0) {
3916 if (PerlIO_error(PL_rsfp))
3917 return -1; /* error */
3918 else
3919 return 0 ; /* end of file */
3920 }
3921 SvCUR_set(buf_sv, old_len + len) ;
3922 SvPVX(buf_sv)[old_len + len] = '\0';
3923 } else {
3924 /* Want a line */
3925 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3926 if (PerlIO_error(PL_rsfp))
3927 return -1; /* error */
3928 else
3929 return 0 ; /* end of file */
3930 }
3931 }
3932 return SvCUR(buf_sv);
3933 }
3934 /* Skip this filter slot if filter has been deleted */
3935 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3936 DEBUG_P(PerlIO_printf(Perl_debug_log,
3937 "filter_read %d: skipped (filter deleted)\n",
3938 idx));
3939 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3940 }
3941 /* Get function pointer hidden within datasv */
3942 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3943 DEBUG_P(PerlIO_printf(Perl_debug_log,
3944 "filter_read %d: via function %p (%s)\n",
3945 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3946 /* Call function. The function is expected to */
3947 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3948 /* Return: <0:error, =0:eof, >0:not eof */
3949 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3950}
3951
3952STATIC char *
3953S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3954{
3955 dVAR;
3956
3957 PERL_ARGS_ASSERT_FILTER_GETS;
3958
3959#ifdef PERL_CR_FILTER
3960 if (!PL_rsfp_filters) {
3961 filter_add(S_cr_textfilter,NULL);
3962 }
3963#endif
3964 if (PL_rsfp_filters) {
3965 if (!append)
3966 SvCUR_set(sv, 0); /* start with empty line */
3967 if (FILTER_READ(0, sv, 0) > 0)
3968 return ( SvPVX(sv) ) ;
3969 else
3970 return NULL ;
3971 }
3972 else
3973 return (sv_gets(sv, PL_rsfp, append));
3974}
3975
3976STATIC HV *
3977S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3978{
3979 dVAR;
3980 GV *gv;
3981
3982 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3983
3984 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3985 return PL_curstash;
3986
3987 if (len > 2 &&
3988 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3989 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3990 {
3991 return GvHV(gv); /* Foo:: */
3992 }
3993
3994 /* use constant CLASS => 'MyClass' */
3995 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3996 if (gv && GvCV(gv)) {
3997 SV * const sv = cv_const_sv(GvCV(gv));
3998 if (sv)
3999 pkgname = SvPV_const(sv, len);
4000 }
4001
4002 return gv_stashpvn(pkgname, len, 0);
4003}
4004
4005/*
4006 * S_readpipe_override
4007 * Check whether readpipe() is overridden, and generates the appropriate
4008 * optree, provided sublex_start() is called afterwards.
4009 */
4010STATIC void
4011S_readpipe_override(pTHX)
4012{
4013 GV **gvp;
4014 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4015 pl_yylval.ival = OP_BACKTICK;
4016 if ((gv_readpipe
4017 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4018 ||
4019 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4020 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4021 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4022 {
4023 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4024 op_append_elem(OP_LIST,
4025 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4026 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4027 }
4028}
4029
4030#ifdef PERL_MAD
4031 /*
4032 * Perl_madlex
4033 * The intent of this yylex wrapper is to minimize the changes to the
4034 * tokener when we aren't interested in collecting madprops. It remains
4035 * to be seen how successful this strategy will be...
4036 */
4037
4038int
4039Perl_madlex(pTHX)
4040{
4041 int optype;
4042 char *s = PL_bufptr;
4043
4044 /* make sure PL_thiswhite is initialized */
4045 PL_thiswhite = 0;
4046 PL_thismad = 0;
4047
4048 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4049 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4050 return S_pending_ident(aTHX);
4051
4052 /* previous token ate up our whitespace? */
4053 if (!PL_lasttoke && PL_nextwhite) {
4054 PL_thiswhite = PL_nextwhite;
4055 PL_nextwhite = 0;
4056 }
4057
4058 /* isolate the token, and figure out where it is without whitespace */
4059 PL_realtokenstart = -1;
4060 PL_thistoken = 0;
4061 optype = yylex();
4062 s = PL_bufptr;
4063 assert(PL_curforce < 0);
4064
4065 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4066 if (!PL_thistoken) {
4067 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4068 PL_thistoken = newSVpvs("");
4069 else {
4070 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4071 PL_thistoken = newSVpvn(tstart, s - tstart);
4072 }
4073 }
4074 if (PL_thismad) /* install head */
4075 CURMAD('X', PL_thistoken);
4076 }
4077
4078 /* last whitespace of a sublex? */
4079 if (optype == ')' && PL_endwhite) {
4080 CURMAD('X', PL_endwhite);
4081 }
4082
4083 if (!PL_thismad) {
4084
4085 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4086 if (!PL_thiswhite && !PL_endwhite && !optype) {
4087 sv_free(PL_thistoken);
4088 PL_thistoken = 0;
4089 return 0;
4090 }
4091
4092 /* put off final whitespace till peg */
4093 if (optype == ';' && !PL_rsfp) {
4094 PL_nextwhite = PL_thiswhite;
4095 PL_thiswhite = 0;
4096 }
4097 else if (PL_thisopen) {
4098 CURMAD('q', PL_thisopen);
4099 if (PL_thistoken)
4100 sv_free(PL_thistoken);
4101 PL_thistoken = 0;
4102 }
4103 else {
4104 /* Store actual token text as madprop X */
4105 CURMAD('X', PL_thistoken);
4106 }
4107
4108 if (PL_thiswhite) {
4109 /* add preceding whitespace as madprop _ */
4110 CURMAD('_', PL_thiswhite);
4111 }
4112
4113 if (PL_thisstuff) {
4114 /* add quoted material as madprop = */
4115 CURMAD('=', PL_thisstuff);
4116 }
4117
4118 if (PL_thisclose) {
4119 /* add terminating quote as madprop Q */
4120 CURMAD('Q', PL_thisclose);
4121 }
4122 }
4123
4124 /* special processing based on optype */
4125
4126 switch (optype) {
4127
4128 /* opval doesn't need a TOKEN since it can already store mp */
4129 case WORD:
4130 case METHOD:
4131 case FUNCMETH:
4132 case THING:
4133 case PMFUNC:
4134 case PRIVATEREF:
4135 case FUNC0SUB:
4136 case UNIOPSUB:
4137 case LSTOPSUB:
4138 if (pl_yylval.opval)
4139 append_madprops(PL_thismad, pl_yylval.opval, 0);
4140 PL_thismad = 0;
4141 return optype;
4142
4143 /* fake EOF */
4144 case 0:
4145 optype = PEG;
4146 if (PL_endwhite) {
4147 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4148 PL_endwhite = 0;
4149 }
4150 break;
4151
4152 case ']':
4153 case '}':
4154 if (PL_faketokens)
4155 break;
4156 /* remember any fake bracket that lexer is about to discard */
4157 if (PL_lex_brackets == 1 &&
4158 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4159 {
4160 s = PL_bufptr;
4161 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4162 s++;
4163 if (*s == '}') {
4164 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4165 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4166 PL_thiswhite = 0;
4167 PL_bufptr = s - 1;
4168 break; /* don't bother looking for trailing comment */
4169 }
4170 else
4171 s = PL_bufptr;
4172 }
4173 if (optype == ']')
4174 break;
4175 /* FALLTHROUGH */
4176
4177 /* attach a trailing comment to its statement instead of next token */
4178 case ';':
4179 if (PL_faketokens)
4180 break;
4181 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4182 s = PL_bufptr;
4183 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4184 s++;
4185 if (*s == '\n' || *s == '#') {
4186 while (s < PL_bufend && *s != '\n')
4187 s++;
4188 if (s < PL_bufend)
4189 s++;
4190 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4191 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4192 PL_thiswhite = 0;
4193 PL_bufptr = s;
4194 }
4195 }
4196 break;
4197
4198 /* pval */
4199 case LABEL:
4200 break;
4201
4202 /* ival */
4203 default:
4204 break;
4205
4206 }
4207
4208 /* Create new token struct. Note: opvals return early above. */
4209 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4210 PL_thismad = 0;
4211 return optype;
4212}
4213#endif
4214
4215STATIC char *
4216S_tokenize_use(pTHX_ int is_use, char *s) {
4217 dVAR;
4218
4219 PERL_ARGS_ASSERT_TOKENIZE_USE;
4220
4221 if (PL_expect != XSTATE)
4222 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4223 is_use ? "use" : "no"));
4224 s = SKIPSPACE1(s);
4225 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4226 s = force_version(s, TRUE);
4227 if (*s == ';' || *s == '}'
4228 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4229 start_force(PL_curforce);
4230 NEXTVAL_NEXTTOKE.opval = NULL;
4231 force_next(WORD);
4232 }
4233 else if (*s == 'v') {
4234 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4235 s = force_version(s, FALSE);
4236 }
4237 }
4238 else {
4239 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4240 s = force_version(s, FALSE);
4241 }
4242 pl_yylval.ival = is_use;
4243 return s;
4244}
4245#ifdef DEBUGGING
4246 static const char* const exp_name[] =
4247 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4248 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4249 };
4250#endif
4251
4252#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4253STATIC bool
4254S_word_takes_any_delimeter(char *p, STRLEN len)
4255{
4256 return (len == 1 && strchr("msyq", p[0])) ||
4257 (len == 2 && (
4258 (p[0] == 't' && p[1] == 'r') ||
4259 (p[0] == 'q' && strchr("qwxr", p[1]))));
4260}
4261
4262/*
4263 yylex
4264
4265 Works out what to call the token just pulled out of the input
4266 stream. The yacc parser takes care of taking the ops we return and
4267 stitching them into a tree.
4268
4269 Returns:
4270 PRIVATEREF
4271
4272 Structure:
4273 if read an identifier
4274 if we're in a my declaration
4275 croak if they tried to say my($foo::bar)
4276 build the ops for a my() declaration
4277 if it's an access to a my() variable
4278 are we in a sort block?
4279 croak if my($a); $a <=> $b
4280 build ops for access to a my() variable
4281 if in a dq string, and they've said @foo and we can't find @foo
4282 croak
4283 build ops for a bareword
4284 if we already built the token before, use it.
4285*/
4286
4287
4288#ifdef __SC__
4289#pragma segment Perl_yylex
4290#endif
4291int
4292Perl_yylex(pTHX)
4293{
4294 dVAR;
4295 register char *s = PL_bufptr;
4296 register char *d;
4297 STRLEN len;
4298 bool bof = FALSE;
4299 U32 fake_eof = 0;
4300
4301 /* orig_keyword, gvp, and gv are initialized here because
4302 * jump to the label just_a_word_zero can bypass their
4303 * initialization later. */
4304 I32 orig_keyword = 0;
4305 GV *gv = NULL;
4306 GV **gvp = NULL;
4307
4308 DEBUG_T( {
4309 SV* tmp = newSVpvs("");
4310 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4311 (IV)CopLINE(PL_curcop),
4312 lex_state_names[PL_lex_state],
4313 exp_name[PL_expect],
4314 pv_display(tmp, s, strlen(s), 0, 60));
4315 SvREFCNT_dec(tmp);
4316 } );
4317 /* check if there's an identifier for us to look at */
4318 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4319 return REPORT(S_pending_ident(aTHX));
4320
4321 /* no identifier pending identification */
4322
4323 switch (PL_lex_state) {
4324#ifdef COMMENTARY
4325 case LEX_NORMAL: /* Some compilers will produce faster */
4326 case LEX_INTERPNORMAL: /* code if we comment these out. */
4327 break;
4328#endif
4329
4330 /* when we've already built the next token, just pull it out of the queue */
4331 case LEX_KNOWNEXT:
4332#ifdef PERL_MAD
4333 PL_lasttoke--;
4334 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4335 if (PL_madskills) {
4336 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4337 PL_nexttoke[PL_lasttoke].next_mad = 0;
4338 if (PL_thismad && PL_thismad->mad_key == '_') {
4339 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4340 PL_thismad->mad_val = 0;
4341 mad_free(PL_thismad);
4342 PL_thismad = 0;
4343 }
4344 }
4345 if (!PL_lasttoke) {
4346 PL_lex_state = PL_lex_defer;
4347 PL_expect = PL_lex_expect;
4348 PL_lex_defer = LEX_NORMAL;
4349 if (!PL_nexttoke[PL_lasttoke].next_type)
4350 return yylex();
4351 }
4352#else
4353 PL_nexttoke--;
4354 pl_yylval = PL_nextval[PL_nexttoke];
4355 if (!PL_nexttoke) {
4356 PL_lex_state = PL_lex_defer;
4357 PL_expect = PL_lex_expect;
4358 PL_lex_defer = LEX_NORMAL;
4359 }
4360#endif
4361 {
4362 I32 next_type;
4363#ifdef PERL_MAD
4364 next_type = PL_nexttoke[PL_lasttoke].next_type;
4365#else
4366 next_type = PL_nexttype[PL_nexttoke];
4367#endif
4368 if (next_type & (7<<24)) {
4369 if (next_type & (1<<24)) {
4370 if (PL_lex_brackets > 100)
4371 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4372 PL_lex_brackstack[PL_lex_brackets++] =
4373 (char) ((next_type >> 16) & 0xff);
4374 }
4375 if (next_type & (2<<24))
4376 PL_lex_allbrackets++;
4377 if (next_type & (4<<24))
4378 PL_lex_allbrackets--;
4379 next_type &= 0xffff;
4380 }
4381#ifdef PERL_MAD
4382 /* FIXME - can these be merged? */
4383 return next_type;
4384#else
4385 return REPORT(next_type);
4386#endif
4387 }
4388
4389 /* interpolated case modifiers like \L \U, including \Q and \E.
4390 when we get here, PL_bufptr is at the \
4391 */
4392 case LEX_INTERPCASEMOD:
4393#ifdef DEBUGGING
4394 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4395 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4396#endif
4397 /* handle \E or end of string */
4398 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4399 /* if at a \E */
4400 if (PL_lex_casemods) {
4401 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4402 PL_lex_casestack[PL_lex_casemods] = '\0';
4403
4404 if (PL_bufptr != PL_bufend
4405 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4406 PL_bufptr += 2;
4407 PL_lex_state = LEX_INTERPCONCAT;
4408#ifdef PERL_MAD
4409 if (PL_madskills)
4410 PL_thistoken = newSVpvs("\\E");
4411#endif
4412 }
4413 PL_lex_allbrackets--;
4414 return REPORT(')');
4415 }
4416#ifdef PERL_MAD
4417 while (PL_bufptr != PL_bufend &&
4418 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4419 if (!PL_thiswhite)
4420 PL_thiswhite = newSVpvs("");
4421 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4422 PL_bufptr += 2;
4423 }
4424#else
4425 if (PL_bufptr != PL_bufend)
4426 PL_bufptr += 2;
4427#endif
4428 PL_lex_state = LEX_INTERPCONCAT;
4429 return yylex();
4430 }
4431 else {
4432 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4433 "### Saw case modifier\n"); });
4434 s = PL_bufptr + 1;
4435 if (s[1] == '\\' && s[2] == 'E') {
4436#ifdef PERL_MAD
4437 if (!PL_thiswhite)
4438 PL_thiswhite = newSVpvs("");
4439 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4440#endif
4441 PL_bufptr = s + 3;
4442 PL_lex_state = LEX_INTERPCONCAT;
4443 return yylex();
4444 }
4445 else {
4446 I32 tmp;
4447 if (!PL_madskills) /* when just compiling don't need correct */
4448 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4449 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4450 if ((*s == 'L' || *s == 'U') &&
4451 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4452 PL_lex_casestack[--PL_lex_casemods] = '\0';
4453 PL_lex_allbrackets--;
4454 return REPORT(')');
4455 }
4456 if (PL_lex_casemods > 10)
4457 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4458 PL_lex_casestack[PL_lex_casemods++] = *s;
4459 PL_lex_casestack[PL_lex_casemods] = '\0';
4460 PL_lex_state = LEX_INTERPCONCAT;
4461 start_force(PL_curforce);
4462 NEXTVAL_NEXTTOKE.ival = 0;
4463 force_next((2<<24)|'(');
4464 start_force(PL_curforce);
4465 if (*s == 'l')
4466 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4467 else if (*s == 'u')
4468 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4469 else if (*s == 'L')
4470 NEXTVAL_NEXTTOKE.ival = OP_LC;
4471 else if (*s == 'U')
4472 NEXTVAL_NEXTTOKE.ival = OP_UC;
4473 else if (*s == 'Q')
4474 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4475 else
4476 Perl_croak(aTHX_ "panic: yylex");
4477 if (PL_madskills) {
4478 SV* const tmpsv = newSVpvs("\\ ");
4479 /* replace the space with the character we want to escape
4480 */
4481 SvPVX(tmpsv)[1] = *s;
4482 curmad('_', tmpsv);
4483 }
4484 PL_bufptr = s + 1;
4485 }
4486 force_next(FUNC);
4487 if (PL_lex_starts) {
4488 s = PL_bufptr;
4489 PL_lex_starts = 0;
4490#ifdef PERL_MAD
4491 if (PL_madskills) {
4492 if (PL_thistoken)
4493 sv_free(PL_thistoken);
4494 PL_thistoken = newSVpvs("");
4495 }
4496#endif
4497 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4498 if (PL_lex_casemods == 1 && PL_lex_inpat)
4499 OPERATOR(',');
4500 else
4501 Aop(OP_CONCAT);
4502 }
4503 else
4504 return yylex();
4505 }
4506
4507 case LEX_INTERPPUSH:
4508 return REPORT(sublex_push());
4509
4510 case LEX_INTERPSTART:
4511 if (PL_bufptr == PL_bufend)
4512 return REPORT(sublex_done());
4513 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4514 "### Interpolated variable\n"); });
4515 PL_expect = XTERM;
4516 PL_lex_dojoin = (*PL_bufptr == '@');
4517 PL_lex_state = LEX_INTERPNORMAL;
4518 if (PL_lex_dojoin) {
4519 start_force(PL_curforce);
4520 NEXTVAL_NEXTTOKE.ival = 0;
4521 force_next(',');
4522 start_force(PL_curforce);
4523 force_ident("\"", '$');
4524 start_force(PL_curforce);
4525 NEXTVAL_NEXTTOKE.ival = 0;
4526 force_next('$');
4527 start_force(PL_curforce);
4528 NEXTVAL_NEXTTOKE.ival = 0;
4529 force_next((2<<24)|'(');
4530 start_force(PL_curforce);
4531 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4532 force_next(FUNC);
4533 }
4534 if (PL_lex_starts++) {
4535 s = PL_bufptr;
4536#ifdef PERL_MAD
4537 if (PL_madskills) {
4538 if (PL_thistoken)
4539 sv_free(PL_thistoken);
4540 PL_thistoken = newSVpvs("");
4541 }
4542#endif
4543 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4544 if (!PL_lex_casemods && PL_lex_inpat)
4545 OPERATOR(',');
4546 else
4547 Aop(OP_CONCAT);
4548 }
4549 return yylex();
4550
4551 case LEX_INTERPENDMAYBE:
4552 if (intuit_more(PL_bufptr)) {
4553 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4554 break;
4555 }
4556 /* FALL THROUGH */
4557
4558 case LEX_INTERPEND:
4559 if (PL_lex_dojoin) {
4560 PL_lex_dojoin = FALSE;
4561 PL_lex_state = LEX_INTERPCONCAT;
4562#ifdef PERL_MAD
4563 if (PL_madskills) {
4564 if (PL_thistoken)
4565 sv_free(PL_thistoken);
4566 PL_thistoken = newSVpvs("");
4567 }
4568#endif
4569 PL_lex_allbrackets--;
4570 return REPORT(')');
4571 }
4572 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4573 && SvEVALED(PL_lex_repl))
4574 {
4575 if (PL_bufptr != PL_bufend)
4576 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4577 PL_lex_repl = NULL;
4578 }
4579 /* FALLTHROUGH */
4580 case LEX_INTERPCONCAT:
4581#ifdef DEBUGGING
4582 if (PL_lex_brackets)
4583 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4584#endif
4585 if (PL_bufptr == PL_bufend)
4586 return REPORT(sublex_done());
4587
4588 if (SvIVX(PL_linestr) == '\'') {
4589 SV *sv = newSVsv(PL_linestr);
4590 if (!PL_lex_inpat)
4591 sv = tokeq(sv);
4592 else if ( PL_hints & HINT_NEW_RE )
4593 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4594 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4595 s = PL_bufend;
4596 }
4597 else {
4598 s = scan_const(PL_bufptr);
4599 if (*s == '\\')
4600 PL_lex_state = LEX_INTERPCASEMOD;
4601 else
4602 PL_lex_state = LEX_INTERPSTART;
4603 }
4604
4605 if (s != PL_bufptr) {
4606 start_force(PL_curforce);
4607 if (PL_madskills) {
4608 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4609 }
4610 NEXTVAL_NEXTTOKE = pl_yylval;
4611 PL_expect = XTERM;
4612 force_next(THING);
4613 if (PL_lex_starts++) {
4614#ifdef PERL_MAD
4615 if (PL_madskills) {
4616 if (PL_thistoken)
4617 sv_free(PL_thistoken);
4618 PL_thistoken = newSVpvs("");
4619 }
4620#endif
4621 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4622 if (!PL_lex_casemods && PL_lex_inpat)
4623 OPERATOR(',');
4624 else
4625 Aop(OP_CONCAT);
4626 }
4627 else {
4628 PL_bufptr = s;
4629 return yylex();
4630 }
4631 }
4632
4633 return yylex();
4634 case LEX_FORMLINE:
4635 PL_lex_state = LEX_NORMAL;
4636 s = scan_formline(PL_bufptr);
4637 if (!PL_lex_formbrack)
4638 goto rightbracket;
4639 OPERATOR(';');
4640 }
4641
4642 s = PL_bufptr;
4643 PL_oldoldbufptr = PL_oldbufptr;
4644 PL_oldbufptr = s;
4645
4646 retry:
4647#ifdef PERL_MAD
4648 if (PL_thistoken) {
4649 sv_free(PL_thistoken);
4650 PL_thistoken = 0;
4651 }
4652 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4653#endif
4654 switch (*s) {
4655 default:
4656 if (isIDFIRST_lazy_if(s,UTF))
4657 goto keylookup;
4658 {
4659 unsigned char c = *s;
4660 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4661 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4662 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4663 } else {
4664 d = PL_linestart;
4665 }
4666 *s = '\0';
4667 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4668 }
4669 case 4:
4670 case 26:
4671 goto fake_eof; /* emulate EOF on ^D or ^Z */
4672 case 0:
4673#ifdef PERL_MAD
4674 if (PL_madskills)
4675 PL_faketokens = 0;
4676#endif
4677 if (!PL_rsfp) {
4678 PL_last_uni = 0;
4679 PL_last_lop = 0;
4680 if (PL_lex_brackets &&
4681 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4682 yyerror((const char *)
4683 (PL_lex_formbrack
4684 ? "Format not terminated"
4685 : "Missing right curly or square bracket"));
4686 }
4687 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4688 "### Tokener got EOF\n");
4689 } );
4690 TOKEN(0);
4691 }
4692 if (s++ < PL_bufend)
4693 goto retry; /* ignore stray nulls */
4694 PL_last_uni = 0;
4695 PL_last_lop = 0;
4696 if (!PL_in_eval && !PL_preambled) {
4697 PL_preambled = TRUE;
4698#ifdef PERL_MAD
4699 if (PL_madskills)
4700 PL_faketokens = 1;
4701#endif
4702 if (PL_perldb) {
4703 /* Generate a string of Perl code to load the debugger.
4704 * If PERL5DB is set, it will return the contents of that,
4705 * otherwise a compile-time require of perl5db.pl. */
4706
4707 const char * const pdb = PerlEnv_getenv("PERL5DB");
4708
4709 if (pdb) {
4710 sv_setpv(PL_linestr, pdb);
4711 sv_catpvs(PL_linestr,";");
4712 } else {
4713 SETERRNO(0,SS_NORMAL);
4714 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4715 }
4716 } else
4717 sv_setpvs(PL_linestr,"");
4718 if (PL_preambleav) {
4719 SV **svp = AvARRAY(PL_preambleav);
4720 SV **const end = svp + AvFILLp(PL_preambleav);
4721 while(svp <= end) {
4722 sv_catsv(PL_linestr, *svp);
4723 ++svp;
4724 sv_catpvs(PL_linestr, ";");
4725 }
4726 sv_free(MUTABLE_SV(PL_preambleav));
4727 PL_preambleav = NULL;
4728 }
4729 if (PL_minus_E)
4730 sv_catpvs(PL_linestr,
4731 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4732 if (PL_minus_n || PL_minus_p) {
4733 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4734 if (PL_minus_l)
4735 sv_catpvs(PL_linestr,"chomp;");
4736 if (PL_minus_a) {
4737 if (PL_minus_F) {
4738 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4739 || *PL_splitstr == '"')
4740 && strchr(PL_splitstr + 1, *PL_splitstr))
4741 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4742 else {
4743 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4744 bytes can be used as quoting characters. :-) */
4745 const char *splits = PL_splitstr;
4746 sv_catpvs(PL_linestr, "our @F=split(q\0");
4747 do {
4748 /* Need to \ \s */
4749 if (*splits == '\\')
4750 sv_catpvn(PL_linestr, splits, 1);
4751 sv_catpvn(PL_linestr, splits, 1);
4752 } while (*splits++);
4753 /* This loop will embed the trailing NUL of
4754 PL_linestr as the last thing it does before
4755 terminating. */
4756 sv_catpvs(PL_linestr, ");");
4757 }
4758 }
4759 else
4760 sv_catpvs(PL_linestr,"our @F=split(' ');");
4761 }
4762 }
4763 sv_catpvs(PL_linestr, "\n");
4764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4766 PL_last_lop = PL_last_uni = NULL;
4767 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4768 update_debugger_info(PL_linestr, NULL, 0);
4769 goto retry;
4770 }
4771 do {
4772 fake_eof = 0;
4773 bof = PL_rsfp ? TRUE : FALSE;
4774 if (0) {
4775 fake_eof:
4776 fake_eof = LEX_FAKE_EOF;
4777 }
4778 PL_bufptr = PL_bufend;
4779 CopLINE_inc(PL_curcop);
4780 if (!lex_next_chunk(fake_eof)) {
4781 CopLINE_dec(PL_curcop);
4782 s = PL_bufptr;
4783 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4784 }
4785 CopLINE_dec(PL_curcop);
4786#ifdef PERL_MAD
4787 if (!PL_rsfp)
4788 PL_realtokenstart = -1;
4789#endif
4790 s = PL_bufptr;
4791 /* If it looks like the start of a BOM or raw UTF-16,
4792 * check if it in fact is. */
4793 if (bof && PL_rsfp &&
4794 (*s == 0 ||
4795 *(U8*)s == 0xEF ||
4796 *(U8*)s >= 0xFE ||
4797 s[1] == 0)) {
4798 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4799 bof = (offset == (Off_t)SvCUR(PL_linestr));
4800#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4801 /* offset may include swallowed CR */
4802 if (!bof)
4803 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4804#endif
4805 if (bof) {
4806 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4807 s = swallow_bom((U8*)s);
4808 }
4809 }
4810 if (PL_parser->in_pod) {
4811 /* Incest with pod. */
4812#ifdef PERL_MAD
4813 if (PL_madskills)
4814 sv_catsv(PL_thiswhite, PL_linestr);
4815#endif
4816 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4817 sv_setpvs(PL_linestr, "");
4818 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4820 PL_last_lop = PL_last_uni = NULL;
4821 PL_parser->in_pod = 0;
4822 }
4823 }
4824 if (PL_rsfp)
4825 incline(s);
4826 } while (PL_parser->in_pod);
4827 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4828 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4829 PL_last_lop = PL_last_uni = NULL;
4830 if (CopLINE(PL_curcop) == 1) {
4831 while (s < PL_bufend && isSPACE(*s))
4832 s++;
4833 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4834 s++;
4835#ifdef PERL_MAD
4836 if (PL_madskills)
4837 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4838#endif
4839 d = NULL;
4840 if (!PL_in_eval) {
4841 if (*s == '#' && *(s+1) == '!')
4842 d = s + 2;
4843#ifdef ALTERNATE_SHEBANG
4844 else {
4845 static char const as[] = ALTERNATE_SHEBANG;
4846 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4847 d = s + (sizeof(as) - 1);
4848 }
4849#endif /* ALTERNATE_SHEBANG */
4850 }
4851 if (d) {
4852 char *ipath;
4853 char *ipathend;
4854
4855 while (isSPACE(*d))
4856 d++;
4857 ipath = d;
4858 while (*d && !isSPACE(*d))
4859 d++;
4860 ipathend = d;
4861
4862#ifdef ARG_ZERO_IS_SCRIPT
4863 if (ipathend > ipath) {
4864 /*
4865 * HP-UX (at least) sets argv[0] to the script name,
4866 * which makes $^X incorrect. And Digital UNIX and Linux,
4867 * at least, set argv[0] to the basename of the Perl
4868 * interpreter. So, having found "#!", we'll set it right.
4869 */
4870 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4871 SVt_PV)); /* $^X */
4872 assert(SvPOK(x) || SvGMAGICAL(x));
4873 if (sv_eq(x, CopFILESV(PL_curcop))) {
4874 sv_setpvn(x, ipath, ipathend - ipath);
4875 SvSETMAGIC(x);
4876 }
4877 else {
4878 STRLEN blen;
4879 STRLEN llen;
4880 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4881 const char * const lstart = SvPV_const(x,llen);
4882 if (llen < blen) {
4883 bstart += blen - llen;
4884 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4885 sv_setpvn(x, ipath, ipathend - ipath);
4886 SvSETMAGIC(x);
4887 }
4888 }
4889 }
4890 TAINT_NOT; /* $^X is always tainted, but that's OK */
4891 }
4892#endif /* ARG_ZERO_IS_SCRIPT */
4893
4894 /*
4895 * Look for options.
4896 */
4897 d = instr(s,"perl -");
4898 if (!d) {
4899 d = instr(s,"perl");
4900#if defined(DOSISH)
4901 /* avoid getting into infinite loops when shebang
4902 * line contains "Perl" rather than "perl" */
4903 if (!d) {
4904 for (d = ipathend-4; d >= ipath; --d) {
4905 if ((*d == 'p' || *d == 'P')
4906 && !ibcmp(d, "perl", 4))
4907 {
4908 break;
4909 }
4910 }
4911 if (d < ipath)
4912 d = NULL;
4913 }
4914#endif
4915 }
4916#ifdef ALTERNATE_SHEBANG
4917 /*
4918 * If the ALTERNATE_SHEBANG on this system starts with a
4919 * character that can be part of a Perl expression, then if
4920 * we see it but not "perl", we're probably looking at the
4921 * start of Perl code, not a request to hand off to some
4922 * other interpreter. Similarly, if "perl" is there, but
4923 * not in the first 'word' of the line, we assume the line
4924 * contains the start of the Perl program.
4925 */
4926 if (d && *s != '#') {
4927 const char *c = ipath;
4928 while (*c && !strchr("; \t\r\n\f\v#", *c))
4929 c++;
4930 if (c < d)
4931 d = NULL; /* "perl" not in first word; ignore */
4932 else
4933 *s = '#'; /* Don't try to parse shebang line */
4934 }
4935#endif /* ALTERNATE_SHEBANG */
4936 if (!d &&
4937 *s == '#' &&
4938 ipathend > ipath &&
4939 !PL_minus_c &&
4940 !instr(s,"indir") &&
4941 instr(PL_origargv[0],"perl"))
4942 {
4943 dVAR;
4944 char **newargv;
4945
4946 *ipathend = '\0';
4947 s = ipathend + 1;
4948 while (s < PL_bufend && isSPACE(*s))
4949 s++;
4950 if (s < PL_bufend) {
4951 Newx(newargv,PL_origargc+3,char*);
4952 newargv[1] = s;
4953 while (s < PL_bufend && !isSPACE(*s))
4954 s++;
4955 *s = '\0';
4956 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4957 }
4958 else
4959 newargv = PL_origargv;
4960 newargv[0] = ipath;
4961 PERL_FPU_PRE_EXEC
4962 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4963 PERL_FPU_POST_EXEC
4964 Perl_croak(aTHX_ "Can't exec %s", ipath);
4965 }
4966 if (d) {
4967 while (*d && !isSPACE(*d))
4968 d++;
4969 while (SPACE_OR_TAB(*d))
4970 d++;
4971
4972 if (*d++ == '-') {
4973 const bool switches_done = PL_doswitches;
4974 const U32 oldpdb = PL_perldb;
4975 const bool oldn = PL_minus_n;
4976 const bool oldp = PL_minus_p;
4977 const char *d1 = d;
4978
4979 do {
4980 bool baduni = FALSE;
4981 if (*d1 == 'C') {
4982 const char *d2 = d1 + 1;
4983 if (parse_unicode_opts((const char **)&d2)
4984 != PL_unicode)
4985 baduni = TRUE;
4986 }
4987 if (baduni || *d1 == 'M' || *d1 == 'm') {
4988 const char * const m = d1;
4989 while (*d1 && !isSPACE(*d1))
4990 d1++;
4991 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4992 (int)(d1 - m), m);
4993 }
4994 d1 = moreswitches(d1);
4995 } while (d1);
4996 if (PL_doswitches && !switches_done) {
4997 int argc = PL_origargc;
4998 char **argv = PL_origargv;
4999 do {
5000 argc--,argv++;
5001 } while (argc && argv[0][0] == '-' && argv[0][1]);
5002 init_argv_symbols(argc,argv);
5003 }
5004 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5005 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5006 /* if we have already added "LINE: while (<>) {",
5007 we must not do it again */
5008 {
5009 sv_setpvs(PL_linestr, "");
5010 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5011 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5012 PL_last_lop = PL_last_uni = NULL;
5013 PL_preambled = FALSE;
5014 if (PERLDB_LINE || PERLDB_SAVESRC)
5015 (void)gv_fetchfile(PL_origfilename);
5016 goto retry;
5017 }
5018 }
5019 }
5020 }
5021 }
5022 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5023 PL_bufptr = s;
5024 PL_lex_state = LEX_FORMLINE;
5025 return yylex();
5026 }
5027 goto retry;
5028 case '\r':
5029#ifdef PERL_STRICT_CR
5030 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5031 Perl_croak(aTHX_
5032 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5033#endif
5034 case ' ': case '\t': case '\f': case 013:
5035#ifdef PERL_MAD
5036 PL_realtokenstart = -1;
5037 if (!PL_thiswhite)
5038 PL_thiswhite = newSVpvs("");
5039 sv_catpvn(PL_thiswhite, s, 1);
5040#endif
5041 s++;
5042 goto retry;
5043 case '#':
5044 case '\n':
5045#ifdef PERL_MAD
5046 PL_realtokenstart = -1;
5047 if (PL_madskills)
5048 PL_faketokens = 0;
5049#endif
5050 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5051 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5052 /* handle eval qq[#line 1 "foo"\n ...] */
5053 CopLINE_dec(PL_curcop);
5054 incline(s);
5055 }
5056 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5057 s = SKIPSPACE0(s);
5058 if (!PL_in_eval || PL_rsfp)
5059 incline(s);
5060 }
5061 else {
5062 d = s;
5063 while (d < PL_bufend && *d != '\n')
5064 d++;
5065 if (d < PL_bufend)
5066 d++;
5067 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5068 Perl_croak(aTHX_ "panic: input overflow");
5069#ifdef PERL_MAD
5070 if (PL_madskills)
5071 PL_thiswhite = newSVpvn(s, d - s);
5072#endif
5073 s = d;
5074 incline(s);
5075 }
5076 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5077 PL_bufptr = s;
5078 PL_lex_state = LEX_FORMLINE;
5079 return yylex();
5080 }
5081 }
5082 else {
5083#ifdef PERL_MAD
5084 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5085 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5086 PL_faketokens = 0;
5087 s = SKIPSPACE0(s);
5088 TOKEN(PEG); /* make sure any #! line is accessible */
5089 }
5090 s = SKIPSPACE0(s);
5091 }
5092 else {
5093/* if (PL_madskills && PL_lex_formbrack) { */
5094 d = s;
5095 while (d < PL_bufend && *d != '\n')
5096 d++;
5097 if (d < PL_bufend)
5098 d++;
5099 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5100 Perl_croak(aTHX_ "panic: input overflow");
5101 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5102 if (!PL_thiswhite)
5103 PL_thiswhite = newSVpvs("");
5104 if (CopLINE(PL_curcop) == 1) {
5105 sv_setpvs(PL_thiswhite, "");
5106 PL_faketokens = 0;
5107 }
5108 sv_catpvn(PL_thiswhite, s, d - s);
5109 }
5110 s = d;
5111/* }
5112 *s = '\0';
5113 PL_bufend = s; */
5114 }
5115#else
5116 *s = '\0';
5117 PL_bufend = s;
5118#endif
5119 }
5120 goto retry;
5121 case '-':
5122 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5123 I32 ftst = 0;
5124 char tmp;
5125
5126 s++;
5127 PL_bufptr = s;
5128 tmp = *s++;
5129
5130 while (s < PL_bufend && SPACE_OR_TAB(*s))
5131 s++;
5132
5133 if (strnEQ(s,"=>",2)) {
5134 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5135 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5136 OPERATOR('-'); /* unary minus */
5137 }
5138 PL_last_uni = PL_oldbufptr;
5139 switch (tmp) {
5140 case 'r': ftst = OP_FTEREAD; break;
5141 case 'w': ftst = OP_FTEWRITE; break;
5142 case 'x': ftst = OP_FTEEXEC; break;
5143 case 'o': ftst = OP_FTEOWNED; break;
5144 case 'R': ftst = OP_FTRREAD; break;
5145 case 'W': ftst = OP_FTRWRITE; break;
5146 case 'X': ftst = OP_FTREXEC; break;
5147 case 'O': ftst = OP_FTROWNED; break;
5148 case 'e': ftst = OP_FTIS; break;
5149 case 'z': ftst = OP_FTZERO; break;
5150 case 's': ftst = OP_FTSIZE; break;
5151 case 'f': ftst = OP_FTFILE; break;
5152 case 'd': ftst = OP_FTDIR; break;
5153 case 'l': ftst = OP_FTLINK; break;
5154 case 'p': ftst = OP_FTPIPE; break;
5155 case 'S': ftst = OP_FTSOCK; break;
5156 case 'u': ftst = OP_FTSUID; break;
5157 case 'g': ftst = OP_FTSGID; break;
5158 case 'k': ftst = OP_FTSVTX; break;
5159 case 'b': ftst = OP_FTBLK; break;
5160 case 'c': ftst = OP_FTCHR; break;
5161 case 't': ftst = OP_FTTTY; break;
5162 case 'T': ftst = OP_FTTEXT; break;
5163 case 'B': ftst = OP_FTBINARY; break;
5164 case 'M': case 'A': case 'C':
5165 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5166 switch (tmp) {
5167 case 'M': ftst = OP_FTMTIME; break;
5168 case 'A': ftst = OP_FTATIME; break;
5169 case 'C': ftst = OP_FTCTIME; break;
5170 default: break;
5171 }
5172 break;
5173 default:
5174 break;
5175 }
5176 if (ftst) {
5177 PL_last_lop_op = (OPCODE)ftst;
5178 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5179 "### Saw file test %c\n", (int)tmp);
5180 } );
5181 FTST(ftst);
5182 }
5183 else {
5184 /* Assume it was a minus followed by a one-letter named
5185 * subroutine call (or a -bareword), then. */
5186 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5187 "### '-%c' looked like a file test but was not\n",
5188 (int) tmp);
5189 } );
5190 s = --PL_bufptr;
5191 }
5192 }
5193 {
5194 const char tmp = *s++;
5195 if (*s == tmp) {
5196 s++;
5197 if (PL_expect == XOPERATOR)
5198 TERM(POSTDEC);
5199 else
5200 OPERATOR(PREDEC);
5201 }
5202 else if (*s == '>') {
5203 s++;
5204 s = SKIPSPACE1(s);
5205 if (isIDFIRST_lazy_if(s,UTF)) {
5206 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5207 TOKEN(ARROW);
5208 }
5209 else if (*s == '$')
5210 OPERATOR(ARROW);
5211 else
5212 TERM(ARROW);
5213 }
5214 if (PL_expect == XOPERATOR) {
5215 if (*s == '=' && !PL_lex_allbrackets &&
5216 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5217 s--;
5218 TOKEN(0);
5219 }
5220 Aop(OP_SUBTRACT);
5221 }
5222 else {
5223 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5224 check_uni();
5225 OPERATOR('-'); /* unary minus */
5226 }
5227 }
5228
5229 case '+':
5230 {
5231 const char tmp = *s++;
5232 if (*s == tmp) {
5233 s++;
5234 if (PL_expect == XOPERATOR)
5235 TERM(POSTINC);
5236 else
5237 OPERATOR(PREINC);
5238 }
5239 if (PL_expect == XOPERATOR) {
5240 if (*s == '=' && !PL_lex_allbrackets &&
5241 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5242 s--;
5243 TOKEN(0);
5244 }
5245 Aop(OP_ADD);
5246 }
5247 else {
5248 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5249 check_uni();
5250 OPERATOR('+');
5251 }
5252 }
5253
5254 case '*':
5255 if (PL_expect != XOPERATOR) {
5256 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5257 PL_expect = XOPERATOR;
5258 force_ident(PL_tokenbuf, '*');
5259 if (!*PL_tokenbuf)
5260 PREREF('*');
5261 TERM('*');
5262 }
5263 s++;
5264 if (*s == '*') {
5265 s++;
5266 if (*s == '=' && !PL_lex_allbrackets &&
5267 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5268 s -= 2;
5269 TOKEN(0);
5270 }
5271 PWop(OP_POW);
5272 }
5273 if (*s == '=' && !PL_lex_allbrackets &&
5274 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5275 s--;
5276 TOKEN(0);
5277 }
5278 Mop(OP_MULTIPLY);
5279
5280 case '%':
5281 if (PL_expect == XOPERATOR) {
5282 if (s[1] == '=' && !PL_lex_allbrackets &&
5283 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5284 TOKEN(0);
5285 ++s;
5286 Mop(OP_MODULO);
5287 }
5288 PL_tokenbuf[0] = '%';
5289 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5290 sizeof PL_tokenbuf - 1, FALSE);
5291 if (!PL_tokenbuf[1]) {
5292 PREREF('%');
5293 }
5294 PL_pending_ident = '%';
5295 TERM('%');
5296
5297 case '^':
5298 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5299 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5300 TOKEN(0);
5301 s++;
5302 BOop(OP_BIT_XOR);
5303 case '[':
5304 if (PL_lex_brackets > 100)
5305 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5306 PL_lex_brackstack[PL_lex_brackets++] = 0;
5307 PL_lex_allbrackets++;
5308 {
5309 const char tmp = *s++;
5310 OPERATOR(tmp);
5311 }
5312 case '~':
5313 if (s[1] == '~'
5314 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5315 {
5316 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5317 TOKEN(0);
5318 s += 2;
5319 Eop(OP_SMARTMATCH);
5320 }
5321 s++;
5322 OPERATOR('~');
5323 case ',':
5324 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5325 TOKEN(0);
5326 s++;
5327 OPERATOR(',');
5328 case ':':
5329 if (s[1] == ':') {
5330 len = 0;
5331 goto just_a_word_zero_gv;
5332 }
5333 s++;
5334 switch (PL_expect) {
5335 OP *attrs;
5336#ifdef PERL_MAD
5337 I32 stuffstart;
5338#endif
5339 case XOPERATOR:
5340 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5341 break;
5342 PL_bufptr = s; /* update in case we back off */
5343 if (*s == '=') {
5344 Perl_croak(aTHX_
5345 "Use of := for an empty attribute list is not allowed");
5346 }
5347 goto grabattrs;
5348 case XATTRBLOCK:
5349 PL_expect = XBLOCK;
5350 goto grabattrs;
5351 case XATTRTERM:
5352 PL_expect = XTERMBLOCK;
5353 grabattrs:
5354#ifdef PERL_MAD
5355 stuffstart = s - SvPVX(PL_linestr) - 1;
5356#endif
5357 s = PEEKSPACE(s);
5358 attrs = NULL;
5359 while (isIDFIRST_lazy_if(s,UTF)) {
5360 I32 tmp;
5361 SV *sv;
5362 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5363 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5364 if (tmp < 0) tmp = -tmp;
5365 switch (tmp) {
5366 case KEY_or:
5367 case KEY_and:
5368 case KEY_for:
5369 case KEY_foreach:
5370 case KEY_unless:
5371 case KEY_if:
5372 case KEY_while:
5373 case KEY_until:
5374 goto got_attrs;
5375 default:
5376 break;
5377 }
5378 }
5379 sv = newSVpvn(s, len);
5380 if (*d == '(') {
5381 d = scan_str(d,TRUE,TRUE);
5382 if (!d) {
5383 /* MUST advance bufptr here to avoid bogus
5384 "at end of line" context messages from yyerror().
5385 */
5386 PL_bufptr = s + len;
5387 yyerror("Unterminated attribute parameter in attribute list");
5388 if (attrs)
5389 op_free(attrs);
5390 sv_free(sv);
5391 return REPORT(0); /* EOF indicator */
5392 }
5393 }
5394 if (PL_lex_stuff) {
5395 sv_catsv(sv, PL_lex_stuff);
5396 attrs = op_append_elem(OP_LIST, attrs,
5397 newSVOP(OP_CONST, 0, sv));
5398 SvREFCNT_dec(PL_lex_stuff);
5399 PL_lex_stuff = NULL;
5400 }
5401 else {
5402 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5403 sv_free(sv);
5404 if (PL_in_my == KEY_our) {
5405 deprecate(":unique");
5406 }
5407 else
5408 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5409 }
5410
5411 /* NOTE: any CV attrs applied here need to be part of
5412 the CVf_BUILTIN_ATTRS define in cv.h! */
5413 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5414 sv_free(sv);
5415 CvLVALUE_on(PL_compcv);
5416 }
5417 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5418 sv_free(sv);
5419 deprecate(":locked");
5420 }
5421 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5422 sv_free(sv);
5423 CvMETHOD_on(PL_compcv);
5424 }
5425 /* After we've set the flags, it could be argued that
5426 we don't need to do the attributes.pm-based setting
5427 process, and shouldn't bother appending recognized
5428 flags. To experiment with that, uncomment the
5429 following "else". (Note that's already been
5430 uncommented. That keeps the above-applied built-in
5431 attributes from being intercepted (and possibly
5432 rejected) by a package's attribute routines, but is
5433 justified by the performance win for the common case
5434 of applying only built-in attributes.) */
5435 else
5436 attrs = op_append_elem(OP_LIST, attrs,
5437 newSVOP(OP_CONST, 0,
5438 sv));
5439 }
5440 s = PEEKSPACE(d);
5441 if (*s == ':' && s[1] != ':')
5442 s = PEEKSPACE(s+1);
5443 else if (s == d)
5444 break; /* require real whitespace or :'s */
5445 /* XXX losing whitespace on sequential attributes here */
5446 }
5447 {
5448 const char tmp
5449 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5450 if (*s != ';' && *s != '}' && *s != tmp
5451 && (tmp != '=' || *s != ')')) {
5452 const char q = ((*s == '\'') ? '"' : '\'');
5453 /* If here for an expression, and parsed no attrs, back
5454 off. */
5455 if (tmp == '=' && !attrs) {
5456 s = PL_bufptr;
5457 break;
5458 }
5459 /* MUST advance bufptr here to avoid bogus "at end of line"
5460 context messages from yyerror().
5461 */
5462 PL_bufptr = s;
5463 yyerror( (const char *)
5464 (*s
5465 ? Perl_form(aTHX_ "Invalid separator character "
5466 "%c%c%c in attribute list", q, *s, q)
5467 : "Unterminated attribute list" ) );
5468 if (attrs)
5469 op_free(attrs);
5470 OPERATOR(':');
5471 }
5472 }
5473 got_attrs:
5474 if (attrs) {
5475 start_force(PL_curforce);
5476 NEXTVAL_NEXTTOKE.opval = attrs;
5477 CURMAD('_', PL_nextwhite);
5478 force_next(THING);
5479 }
5480#ifdef PERL_MAD
5481 if (PL_madskills) {
5482 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5483 (s - SvPVX(PL_linestr)) - stuffstart);
5484 }
5485#endif
5486 TOKEN(COLONATTR);
5487 }
5488 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5489 s--;
5490 TOKEN(0);
5491 }
5492 PL_lex_allbrackets--;
5493 OPERATOR(':');
5494 case '(':
5495 s++;
5496 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5497 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5498 else
5499 PL_expect = XTERM;
5500 s = SKIPSPACE1(s);
5501 PL_lex_allbrackets++;
5502 TOKEN('(');
5503 case ';':
5504 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5505 TOKEN(0);
5506 CLINE;
5507 s++;
5508 OPERATOR(';');
5509 case ')':
5510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5511 TOKEN(0);
5512 s++;
5513 PL_lex_allbrackets--;
5514 s = SKIPSPACE1(s);
5515 if (*s == '{')
5516 PREBLOCK(')');
5517 TERM(')');
5518 case ']':
5519 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5520 TOKEN(0);
5521 s++;
5522 if (PL_lex_brackets <= 0)
5523 yyerror("Unmatched right square bracket");
5524 else
5525 --PL_lex_brackets;
5526 PL_lex_allbrackets--;
5527 if (PL_lex_state == LEX_INTERPNORMAL) {
5528 if (PL_lex_brackets == 0) {
5529 if (*s == '-' && s[1] == '>')
5530 PL_lex_state = LEX_INTERPENDMAYBE;
5531 else if (*s != '[' && *s != '{')
5532 PL_lex_state = LEX_INTERPEND;
5533 }
5534 }
5535 TERM(']');
5536 case '{':
5537 leftbracket:
5538 s++;
5539 if (PL_lex_brackets > 100) {
5540 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5541 }
5542 switch (PL_expect) {
5543 case XTERM:
5544 if (PL_lex_formbrack) {
5545 s--;
5546 PRETERMBLOCK(DO);
5547 }
5548 if (PL_oldoldbufptr == PL_last_lop)
5549 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5550 else
5551 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5552 PL_lex_allbrackets++;
5553 OPERATOR(HASHBRACK);
5554 case XOPERATOR:
5555 while (s < PL_bufend && SPACE_OR_TAB(*s))
5556 s++;
5557 d = s;
5558 PL_tokenbuf[0] = '\0';
5559 if (d < PL_bufend && *d == '-') {
5560 PL_tokenbuf[0] = '-';
5561 d++;
5562 while (d < PL_bufend && SPACE_OR_TAB(*d))
5563 d++;
5564 }
5565 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5566 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5567 FALSE, &len);
5568 while (d < PL_bufend && SPACE_OR_TAB(*d))
5569 d++;
5570 if (*d == '}') {
5571 const char minus = (PL_tokenbuf[0] == '-');
5572 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5573 if (minus)
5574 force_next('-');
5575 }
5576 }
5577 /* FALL THROUGH */
5578 case XATTRBLOCK:
5579 case XBLOCK:
5580 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5581 PL_lex_allbrackets++;
5582 PL_expect = XSTATE;
5583 break;
5584 case XATTRTERM:
5585 case XTERMBLOCK:
5586 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5587 PL_lex_allbrackets++;
5588 PL_expect = XSTATE;
5589 break;
5590 default: {
5591 const char *t;
5592 if (PL_oldoldbufptr == PL_last_lop)
5593 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5594 else
5595 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5596 PL_lex_allbrackets++;
5597 s = SKIPSPACE1(s);
5598 if (*s == '}') {
5599 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5600 PL_expect = XTERM;
5601 /* This hack is to get the ${} in the message. */
5602 PL_bufptr = s+1;
5603 yyerror("syntax error");
5604 break;
5605 }
5606 OPERATOR(HASHBRACK);
5607 }
5608 /* This hack serves to disambiguate a pair of curlies
5609 * as being a block or an anon hash. Normally, expectation
5610 * determines that, but in cases where we're not in a
5611 * position to expect anything in particular (like inside
5612 * eval"") we have to resolve the ambiguity. This code
5613 * covers the case where the first term in the curlies is a
5614 * quoted string. Most other cases need to be explicitly
5615 * disambiguated by prepending a "+" before the opening
5616 * curly in order to force resolution as an anon hash.
5617 *
5618 * XXX should probably propagate the outer expectation
5619 * into eval"" to rely less on this hack, but that could
5620 * potentially break current behavior of eval"".
5621 * GSAR 97-07-21
5622 */
5623 t = s;
5624 if (*s == '\'' || *s == '"' || *s == '`') {
5625 /* common case: get past first string, handling escapes */
5626 for (t++; t < PL_bufend && *t != *s;)
5627 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5628 t++;
5629 t++;
5630 }
5631 else if (*s == 'q') {
5632 if (++t < PL_bufend
5633 && (!isALNUM(*t)
5634 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5635 && !isALNUM(*t))))
5636 {
5637 /* skip q//-like construct */
5638 const char *tmps;
5639 char open, close, term;
5640 I32 brackets = 1;
5641
5642 while (t < PL_bufend && isSPACE(*t))
5643 t++;
5644 /* check for q => */
5645 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5646 OPERATOR(HASHBRACK);
5647 }
5648 term = *t;
5649 open = term;
5650 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5651 term = tmps[5];
5652 close = term;
5653 if (open == close)
5654 for (t++; t < PL_bufend; t++) {
5655 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5656 t++;
5657 else if (*t == open)
5658 break;
5659 }
5660 else {
5661 for (t++; t < PL_bufend; t++) {
5662 if (*t == '\\' && t+1 < PL_bufend)
5663 t++;
5664 else if (*t == close && --brackets <= 0)
5665 break;
5666 else if (*t == open)
5667 brackets++;
5668 }
5669 }
5670 t++;
5671 }
5672 else
5673 /* skip plain q word */
5674 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5675 t += UTF8SKIP(t);
5676 }
5677 else if (isALNUM_lazy_if(t,UTF)) {
5678 t += UTF8SKIP(t);
5679 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5680 t += UTF8SKIP(t);
5681 }
5682 while (t < PL_bufend && isSPACE(*t))
5683 t++;
5684 /* if comma follows first term, call it an anon hash */
5685 /* XXX it could be a comma expression with loop modifiers */
5686 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5687 || (*t == '=' && t[1] == '>')))
5688 OPERATOR(HASHBRACK);
5689 if (PL_expect == XREF)
5690 PL_expect = XTERM;
5691 else {
5692 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5693 PL_expect = XSTATE;
5694 }
5695 }
5696 break;
5697 }
5698 pl_yylval.ival = CopLINE(PL_curcop);
5699 if (isSPACE(*s) || *s == '#')
5700 PL_copline = NOLINE; /* invalidate current command line number */
5701 TOKEN('{');
5702 case '}':
5703 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5704 TOKEN(0);
5705 rightbracket:
5706 s++;
5707 if (PL_lex_brackets <= 0)
5708 yyerror("Unmatched right curly bracket");
5709 else
5710 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5711 PL_lex_allbrackets--;
5712 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5713 PL_lex_formbrack = 0;
5714 if (PL_lex_state == LEX_INTERPNORMAL) {
5715 if (PL_lex_brackets == 0) {
5716 if (PL_expect & XFAKEBRACK) {
5717 PL_expect &= XENUMMASK;
5718 PL_lex_state = LEX_INTERPEND;
5719 PL_bufptr = s;
5720#if 0
5721 if (PL_madskills) {
5722 if (!PL_thiswhite)
5723 PL_thiswhite = newSVpvs("");
5724 sv_catpvs(PL_thiswhite,"}");
5725 }
5726#endif
5727 return yylex(); /* ignore fake brackets */
5728 }
5729 if (*s == '-' && s[1] == '>')
5730 PL_lex_state = LEX_INTERPENDMAYBE;
5731 else if (*s != '[' && *s != '{')
5732 PL_lex_state = LEX_INTERPEND;
5733 }
5734 }
5735 if (PL_expect & XFAKEBRACK) {
5736 PL_expect &= XENUMMASK;
5737 PL_bufptr = s;
5738 return yylex(); /* ignore fake brackets */
5739 }
5740 start_force(PL_curforce);
5741 if (PL_madskills) {
5742 curmad('X', newSVpvn(s-1,1));
5743 CURMAD('_', PL_thiswhite);
5744 }
5745 force_next('}');
5746#ifdef PERL_MAD
5747 if (!PL_thistoken)
5748 PL_thistoken = newSVpvs("");
5749#endif
5750 TOKEN(';');
5751 case '&':
5752 s++;
5753 if (*s++ == '&') {
5754 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5756 s -= 2;
5757 TOKEN(0);
5758 }
5759 AOPERATOR(ANDAND);
5760 }
5761 s--;
5762 if (PL_expect == XOPERATOR) {
5763 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5764 && isIDFIRST_lazy_if(s,UTF))
5765 {
5766 CopLINE_dec(PL_curcop);
5767 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5768 CopLINE_inc(PL_curcop);
5769 }
5770 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5771 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5772 s--;
5773 TOKEN(0);
5774 }
5775 BAop(OP_BIT_AND);
5776 }
5777
5778 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5779 if (*PL_tokenbuf) {
5780 PL_expect = XOPERATOR;
5781 force_ident(PL_tokenbuf, '&');
5782 }
5783 else
5784 PREREF('&');
5785 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5786 TERM('&');
5787
5788 case '|':
5789 s++;
5790 if (*s++ == '|') {
5791 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5792 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5793 s -= 2;
5794 TOKEN(0);
5795 }
5796 AOPERATOR(OROR);
5797 }
5798 s--;
5799 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5800 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5801 s--;
5802 TOKEN(0);
5803 }
5804 BOop(OP_BIT_OR);
5805 case '=':
5806 s++;
5807 {
5808 const char tmp = *s++;
5809 if (tmp == '=') {
5810 if (!PL_lex_allbrackets &&
5811 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5812 s -= 2;
5813 TOKEN(0);
5814 }
5815 Eop(OP_EQ);
5816 }
5817 if (tmp == '>') {
5818 if (!PL_lex_allbrackets &&
5819 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5820 s -= 2;
5821 TOKEN(0);
5822 }
5823 OPERATOR(',');
5824 }
5825 if (tmp == '~')
5826 PMop(OP_MATCH);
5827 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5828 && strchr("+-*/%.^&|<",tmp))
5829 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5830 "Reversed %c= operator",(int)tmp);
5831 s--;
5832 if (PL_expect == XSTATE && isALPHA(tmp) &&
5833 (s == PL_linestart+1 || s[-2] == '\n') )
5834 {
5835 if (PL_in_eval && !PL_rsfp) {
5836 d = PL_bufend;
5837 while (s < d) {
5838 if (*s++ == '\n') {
5839 incline(s);
5840 if (strnEQ(s,"=cut",4)) {
5841 s = strchr(s,'\n');
5842 if (s)
5843 s++;
5844 else
5845 s = d;
5846 incline(s);
5847 goto retry;
5848 }
5849 }
5850 }
5851 goto retry;
5852 }
5853#ifdef PERL_MAD
5854 if (PL_madskills) {
5855 if (!PL_thiswhite)
5856 PL_thiswhite = newSVpvs("");
5857 sv_catpvn(PL_thiswhite, PL_linestart,
5858 PL_bufend - PL_linestart);
5859 }
5860#endif
5861 s = PL_bufend;
5862 PL_parser->in_pod = 1;
5863 goto retry;
5864 }
5865 }
5866 if (PL_lex_brackets < PL_lex_formbrack) {
5867 const char *t = s;
5868#ifdef PERL_STRICT_CR
5869 while (SPACE_OR_TAB(*t))
5870#else
5871 while (SPACE_OR_TAB(*t) || *t == '\r')
5872#endif
5873 t++;
5874 if (*t == '\n' || *t == '#') {
5875 s--;
5876 PL_expect = XBLOCK;
5877 goto leftbracket;
5878 }
5879 }
5880 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5881 s--;
5882 TOKEN(0);
5883 }
5884 pl_yylval.ival = 0;
5885 OPERATOR(ASSIGNOP);
5886 case '!':
5887 s++;
5888 {
5889 const char tmp = *s++;
5890 if (tmp == '=') {
5891 /* was this !=~ where !~ was meant?
5892 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5893
5894 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5895 const char *t = s+1;
5896
5897 while (t < PL_bufend && isSPACE(*t))
5898 ++t;
5899
5900 if (*t == '/' || *t == '?' ||
5901 ((*t == 'm' || *t == 's' || *t == 'y')
5902 && !isALNUM(t[1])) ||
5903 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5904 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5905 "!=~ should be !~");
5906 }
5907 if (!PL_lex_allbrackets &&
5908 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5909 s -= 2;
5910 TOKEN(0);
5911 }
5912 Eop(OP_NE);
5913 }
5914 if (tmp == '~')
5915 PMop(OP_NOT);
5916 }
5917 s--;
5918 OPERATOR('!');
5919 case '<':
5920 if (PL_expect != XOPERATOR) {
5921 if (s[1] != '<' && !strchr(s,'>'))
5922 check_uni();
5923 if (s[1] == '<')
5924 s = scan_heredoc(s);
5925 else
5926 s = scan_inputsymbol(s);
5927 TERM(sublex_start());
5928 }
5929 s++;
5930 {
5931 char tmp = *s++;
5932 if (tmp == '<') {
5933 if (*s == '=' && !PL_lex_allbrackets &&
5934 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5935 s -= 2;
5936 TOKEN(0);
5937 }
5938 SHop(OP_LEFT_SHIFT);
5939 }
5940 if (tmp == '=') {
5941 tmp = *s++;
5942 if (tmp == '>') {
5943 if (!PL_lex_allbrackets &&
5944 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5945 s -= 3;
5946 TOKEN(0);
5947 }
5948 Eop(OP_NCMP);
5949 }
5950 s--;
5951 if (!PL_lex_allbrackets &&
5952 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5953 s -= 2;
5954 TOKEN(0);
5955 }
5956 Rop(OP_LE);
5957 }
5958 }
5959 s--;
5960 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5961 s--;
5962 TOKEN(0);
5963 }
5964 Rop(OP_LT);
5965 case '>':
5966 s++;
5967 {
5968 const char tmp = *s++;
5969 if (tmp == '>') {
5970 if (*s == '=' && !PL_lex_allbrackets &&
5971 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5972 s -= 2;
5973 TOKEN(0);
5974 }
5975 SHop(OP_RIGHT_SHIFT);
5976 }
5977 else if (tmp == '=') {
5978 if (!PL_lex_allbrackets &&
5979 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5980 s -= 2;
5981 TOKEN(0);
5982 }
5983 Rop(OP_GE);
5984 }
5985 }
5986 s--;
5987 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5988 s--;
5989 TOKEN(0);
5990 }
5991 Rop(OP_GT);
5992
5993 case '$':
5994 CLINE;
5995
5996 if (PL_expect == XOPERATOR) {
5997 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5998 return deprecate_commaless_var_list();
5999 }
6000 }
6001
6002 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6003 PL_tokenbuf[0] = '@';
6004 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6005 sizeof PL_tokenbuf - 1, FALSE);
6006 if (PL_expect == XOPERATOR)
6007 no_op("Array length", s);
6008 if (!PL_tokenbuf[1])
6009 PREREF(DOLSHARP);
6010 PL_expect = XOPERATOR;
6011 PL_pending_ident = '#';
6012 TOKEN(DOLSHARP);
6013 }
6014
6015 PL_tokenbuf[0] = '$';
6016 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6017 sizeof PL_tokenbuf - 1, FALSE);
6018 if (PL_expect == XOPERATOR)
6019 no_op("Scalar", s);
6020 if (!PL_tokenbuf[1]) {
6021 if (s == PL_bufend)
6022 yyerror("Final $ should be \\$ or $name");
6023 PREREF('$');
6024 }
6025
6026 /* This kludge not intended to be bulletproof. */
6027 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6028 pl_yylval.opval = newSVOP(OP_CONST, 0,
6029 newSViv(CopARYBASE_get(&PL_compiling)));
6030 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
6031 TERM(THING);
6032 }
6033
6034 d = s;
6035 {
6036 const char tmp = *s;
6037 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6038 s = SKIPSPACE1(s);
6039
6040 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6041 && intuit_more(s)) {
6042 if (*s == '[') {
6043 PL_tokenbuf[0] = '@';
6044 if (ckWARN(WARN_SYNTAX)) {
6045 char *t = s+1;
6046
6047 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6048 t++;
6049 if (*t++ == ',') {
6050 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6051 while (t < PL_bufend && *t != ']')
6052 t++;
6053 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6054 "Multidimensional syntax %.*s not supported",
6055 (int)((t - PL_bufptr) + 1), PL_bufptr);
6056 }
6057 }
6058 }
6059 else if (*s == '{') {
6060 char *t;
6061 PL_tokenbuf[0] = '%';
6062 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6063 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6064 {
6065 char tmpbuf[sizeof PL_tokenbuf];
6066 do {
6067 t++;
6068 } while (isSPACE(*t));
6069 if (isIDFIRST_lazy_if(t,UTF)) {
6070 STRLEN len;
6071 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6072 &len);
6073 while (isSPACE(*t))
6074 t++;
6075 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6076 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6077 "You need to quote \"%s\"",
6078 tmpbuf);
6079 }
6080 }
6081 }
6082 }
6083
6084 PL_expect = XOPERATOR;
6085 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6086 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6087 if (!islop || PL_last_lop_op == OP_GREPSTART)
6088 PL_expect = XOPERATOR;
6089 else if (strchr("$@\"'`q", *s))
6090 PL_expect = XTERM; /* e.g. print $fh "foo" */
6091 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6092 PL_expect = XTERM; /* e.g. print $fh &sub */
6093 else if (isIDFIRST_lazy_if(s,UTF)) {
6094 char tmpbuf[sizeof PL_tokenbuf];
6095 int t2;
6096 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6097 if ((t2 = keyword(tmpbuf, len, 0))) {
6098 /* binary operators exclude handle interpretations */
6099 switch (t2) {
6100 case -KEY_x:
6101 case -KEY_eq:
6102 case -KEY_ne:
6103 case -KEY_gt:
6104 case -KEY_lt:
6105 case -KEY_ge:
6106 case -KEY_le:
6107 case -KEY_cmp:
6108 break;
6109 default:
6110 PL_expect = XTERM; /* e.g. print $fh length() */
6111 break;
6112 }
6113 }
6114 else {
6115 PL_expect = XTERM; /* e.g. print $fh subr() */
6116 }
6117 }
6118 else if (isDIGIT(*s))
6119 PL_expect = XTERM; /* e.g. print $fh 3 */
6120 else if (*s == '.' && isDIGIT(s[1]))
6121 PL_expect = XTERM; /* e.g. print $fh .3 */
6122 else if ((*s == '?' || *s == '-' || *s == '+')
6123 && !isSPACE(s[1]) && s[1] != '=')
6124 PL_expect = XTERM; /* e.g. print $fh -1 */
6125 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6126 && s[1] != '/')
6127 PL_expect = XTERM; /* e.g. print $fh /.../
6128 XXX except DORDOR operator
6129 */
6130 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6131 && s[2] != '=')
6132 PL_expect = XTERM; /* print $fh <<"EOF" */
6133 }
6134 }
6135 PL_pending_ident = '$';
6136 TOKEN('$');
6137
6138 case '@':
6139 if (PL_expect == XOPERATOR)
6140 no_op("Array", s);
6141 PL_tokenbuf[0] = '@';
6142 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6143 if (!PL_tokenbuf[1]) {
6144 PREREF('@');
6145 }
6146 if (PL_lex_state == LEX_NORMAL)
6147 s = SKIPSPACE1(s);
6148 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6149 if (*s == '{')
6150 PL_tokenbuf[0] = '%';
6151
6152 /* Warn about @ where they meant $. */
6153 if (*s == '[' || *s == '{') {
6154 if (ckWARN(WARN_SYNTAX)) {
6155 const char *t = s + 1;
6156 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6157 t++;
6158 if (*t == '}' || *t == ']') {
6159 t++;
6160 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6162 "Scalar value %.*s better written as $%.*s",
6163 (int)(t-PL_bufptr), PL_bufptr,
6164 (int)(t-PL_bufptr-1), PL_bufptr+1);
6165 }
6166 }
6167 }
6168 }
6169 PL_pending_ident = '@';
6170 TERM('@');
6171
6172 case '/': /* may be division, defined-or, or pattern */
6173 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6174 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6175 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6176 TOKEN(0);
6177 s += 2;
6178 AOPERATOR(DORDOR);
6179 }
6180 case '?': /* may either be conditional or pattern */
6181 if (PL_expect == XOPERATOR) {
6182 char tmp = *s++;
6183 if(tmp == '?') {
6184 if (!PL_lex_allbrackets &&
6185 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6186 s--;
6187 TOKEN(0);
6188 }
6189 PL_lex_allbrackets++;
6190 OPERATOR('?');
6191 }
6192 else {
6193 tmp = *s++;
6194 if(tmp == '/') {
6195 /* A // operator. */
6196 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6197 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6198 LEX_FAKEEOF_LOGIC)) {
6199 s -= 2;
6200 TOKEN(0);
6201 }
6202 AOPERATOR(DORDOR);
6203 }
6204 else {
6205 s--;
6206 if (*s == '=' && !PL_lex_allbrackets &&
6207 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6208 s--;
6209 TOKEN(0);
6210 }
6211 Mop(OP_DIVIDE);
6212 }
6213 }
6214 }
6215 else {
6216 /* Disable warning on "study /blah/" */
6217 if (PL_oldoldbufptr == PL_last_uni
6218 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6219 || memNE(PL_last_uni, "study", 5)
6220 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6221 ))
6222 check_uni();
6223 if (*s == '?')
6224 deprecate("?PATTERN? without explicit operator");
6225 s = scan_pat(s,OP_MATCH);
6226 TERM(sublex_start());
6227 }
6228
6229 case '.':
6230 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6231#ifdef PERL_STRICT_CR
6232 && s[1] == '\n'
6233#else
6234 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6235#endif
6236 && (s == PL_linestart || s[-1] == '\n') )
6237 {
6238 PL_lex_formbrack = 0;
6239 PL_expect = XSTATE;
6240 goto rightbracket;
6241 }
6242 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6243 s += 3;
6244 OPERATOR(YADAYADA);
6245 }
6246 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6247 char tmp = *s++;
6248 if (*s == tmp) {
6249 if (!PL_lex_allbrackets &&
6250 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6251 s--;
6252 TOKEN(0);
6253 }
6254 s++;
6255 if (*s == tmp) {
6256 s++;
6257 pl_yylval.ival = OPf_SPECIAL;
6258 }
6259 else
6260 pl_yylval.ival = 0;
6261 OPERATOR(DOTDOT);
6262 }
6263 if (*s == '=' && !PL_lex_allbrackets &&
6264 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6265 s--;
6266 TOKEN(0);
6267 }
6268 Aop(OP_CONCAT);
6269 }
6270 /* FALL THROUGH */
6271 case '0': case '1': case '2': case '3': case '4':
6272 case '5': case '6': case '7': case '8': case '9':
6273 s = scan_num(s, &pl_yylval);
6274 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6275 if (PL_expect == XOPERATOR)
6276 no_op("Number",s);
6277 TERM(THING);
6278
6279 case '\'':
6280 s = scan_str(s,!!PL_madskills,FALSE);
6281 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6282 if (PL_expect == XOPERATOR) {
6283 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6284 return deprecate_commaless_var_list();
6285 }
6286 else
6287 no_op("String",s);
6288 }
6289 if (!s)
6290 missingterm(NULL);
6291 pl_yylval.ival = OP_CONST;
6292 TERM(sublex_start());
6293
6294 case '"':
6295 s = scan_str(s,!!PL_madskills,FALSE);
6296 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6297 if (PL_expect == XOPERATOR) {
6298 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6299 return deprecate_commaless_var_list();
6300 }
6301 else
6302 no_op("String",s);
6303 }
6304 if (!s)
6305 missingterm(NULL);
6306 pl_yylval.ival = OP_CONST;
6307 /* FIXME. I think that this can be const if char *d is replaced by
6308 more localised variables. */
6309 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6310 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6311 pl_yylval.ival = OP_STRINGIFY;
6312 break;
6313 }
6314 }
6315 TERM(sublex_start());
6316
6317 case '`':
6318 s = scan_str(s,!!PL_madskills,FALSE);
6319 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6320 if (PL_expect == XOPERATOR)
6321 no_op("Backticks",s);
6322 if (!s)
6323 missingterm(NULL);
6324 readpipe_override();
6325 TERM(sublex_start());
6326
6327 case '\\':
6328 s++;
6329 if (PL_lex_inwhat && isDIGIT(*s))
6330 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6331 *s, *s);
6332 if (PL_expect == XOPERATOR)
6333 no_op("Backslash",s);
6334 OPERATOR(REFGEN);
6335
6336 case 'v':
6337 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6338 char *start = s + 2;
6339 while (isDIGIT(*start) || *start == '_')
6340 start++;
6341 if (*start == '.' && isDIGIT(start[1])) {
6342 s = scan_num(s, &pl_yylval);
6343 TERM(THING);
6344 }
6345 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6346 else if (!isALPHA(*start) && (PL_expect == XTERM
6347 || PL_expect == XREF || PL_expect == XSTATE
6348 || PL_expect == XTERMORDORDOR)) {
6349 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6350 if (!gv) {
6351 s = scan_num(s, &pl_yylval);
6352 TERM(THING);
6353 }
6354 }
6355 }
6356 goto keylookup;
6357 case 'x':
6358 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6359 s++;
6360 Mop(OP_REPEAT);
6361 }
6362 goto keylookup;
6363
6364 case '_':
6365 case 'a': case 'A':
6366 case 'b': case 'B':
6367 case 'c': case 'C':
6368 case 'd': case 'D':
6369 case 'e': case 'E':
6370 case 'f': case 'F':
6371 case 'g': case 'G':
6372 case 'h': case 'H':
6373 case 'i': case 'I':
6374 case 'j': case 'J':
6375 case 'k': case 'K':
6376 case 'l': case 'L':
6377 case 'm': case 'M':
6378 case 'n': case 'N':
6379 case 'o': case 'O':
6380 case 'p': case 'P':
6381 case 'q': case 'Q':
6382 case 'r': case 'R':
6383 case 's': case 'S':
6384 case 't': case 'T':
6385 case 'u': case 'U':
6386 case 'V':
6387 case 'w': case 'W':
6388 case 'X':
6389 case 'y': case 'Y':
6390 case 'z': case 'Z':
6391
6392 keylookup: {
6393 bool anydelim;
6394 I32 tmp;
6395
6396 orig_keyword = 0;
6397 gv = NULL;
6398 gvp = NULL;
6399
6400 PL_bufptr = s;
6401 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6402
6403 /* Some keywords can be followed by any delimiter, including ':' */
6404 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6405
6406 /* x::* is just a word, unless x is "CORE" */
6407 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6408 goto just_a_word;
6409
6410 d = s;
6411 while (d < PL_bufend && isSPACE(*d))
6412 d++; /* no comments skipped here, or s### is misparsed */
6413
6414 /* Is this a word before a => operator? */
6415 if (*d == '=' && d[1] == '>') {
6416 CLINE;
6417 pl_yylval.opval
6418 = (OP*)newSVOP(OP_CONST, 0,
6419 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6420 pl_yylval.opval->op_private = OPpCONST_BARE;
6421 TERM(WORD);
6422 }
6423
6424 /* Check for plugged-in keyword */
6425 {
6426 OP *o;
6427 int result;
6428 char *saved_bufptr = PL_bufptr;
6429 PL_bufptr = s;
6430 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6431 s = PL_bufptr;
6432 if (result == KEYWORD_PLUGIN_DECLINE) {
6433 /* not a plugged-in keyword */
6434 PL_bufptr = saved_bufptr;
6435 } else if (result == KEYWORD_PLUGIN_STMT) {
6436 pl_yylval.opval = o;
6437 CLINE;
6438 PL_expect = XSTATE;
6439 return REPORT(PLUGSTMT);
6440 } else if (result == KEYWORD_PLUGIN_EXPR) {
6441 pl_yylval.opval = o;
6442 CLINE;
6443 PL_expect = XOPERATOR;
6444 return REPORT(PLUGEXPR);
6445 } else {
6446 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6447 PL_tokenbuf);
6448 }
6449 }
6450
6451 /* Check for built-in keyword */
6452 tmp = keyword(PL_tokenbuf, len, 0);
6453
6454 /* Is this a label? */
6455 if (!anydelim && PL_expect == XSTATE
6456 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6457 s = d + 1;
6458 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6459 CLINE;
6460 TOKEN(LABEL);
6461 }
6462
6463 if (tmp < 0) { /* second-class keyword? */
6464 GV *ogv = NULL; /* override (winner) */
6465 GV *hgv = NULL; /* hidden (loser) */
6466 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6467 CV *cv;
6468 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6469 (cv = GvCVu(gv)))
6470 {
6471 if (GvIMPORTED_CV(gv))
6472 ogv = gv;
6473 else if (! CvMETHOD(cv))
6474 hgv = gv;
6475 }
6476 if (!ogv &&
6477 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6478 (gv = *gvp) && isGV_with_GP(gv) &&
6479 GvCVu(gv) && GvIMPORTED_CV(gv))
6480 {
6481 ogv = gv;
6482 }
6483 }
6484 if (ogv) {
6485 orig_keyword = tmp;
6486 tmp = 0; /* overridden by import or by GLOBAL */
6487 }
6488 else if (gv && !gvp
6489 && -tmp==KEY_lock /* XXX generalizable kludge */
6490 && GvCVu(gv))
6491 {
6492 tmp = 0; /* any sub overrides "weak" keyword */
6493 }
6494 else { /* no override */
6495 tmp = -tmp;
6496 if (tmp == KEY_dump) {
6497 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6498 "dump() better written as CORE::dump()");
6499 }
6500 gv = NULL;
6501 gvp = 0;
6502 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6503 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6504 "Ambiguous call resolved as CORE::%s(), "
6505 "qualify as such or use &",
6506 GvENAME(hgv));
6507 }
6508 }
6509
6510 reserved_word:
6511 switch (tmp) {
6512
6513 default: /* not a keyword */
6514 /* Trade off - by using this evil construction we can pull the
6515 variable gv into the block labelled keylookup. If not, then
6516 we have to give it function scope so that the goto from the
6517 earlier ':' case doesn't bypass the initialisation. */
6518 if (0) {
6519 just_a_word_zero_gv:
6520 gv = NULL;
6521 gvp = NULL;
6522 orig_keyword = 0;
6523 }
6524 just_a_word: {
6525 SV *sv;
6526 int pkgname = 0;
6527 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6528 OP *rv2cv_op;
6529 CV *cv;
6530#ifdef PERL_MAD
6531 SV *nextPL_nextwhite = 0;
6532#endif
6533
6534
6535 /* Get the rest if it looks like a package qualifier */
6536
6537 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6538 STRLEN morelen;
6539 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6540 TRUE, &morelen);
6541 if (!morelen)
6542 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6543 *s == '\'' ? "'" : "::");
6544 len += morelen;
6545 pkgname = 1;
6546 }
6547
6548 if (PL_expect == XOPERATOR) {
6549 if (PL_bufptr == PL_linestart) {
6550 CopLINE_dec(PL_curcop);
6551 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6552 CopLINE_inc(PL_curcop);
6553 }
6554 else
6555 no_op("Bareword",s);
6556 }
6557
6558 /* Look for a subroutine with this name in current package,
6559 unless name is "Foo::", in which case Foo is a bareword
6560 (and a package name). */
6561
6562 if (len > 2 && !PL_madskills &&
6563 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6564 {
6565 if (ckWARN(WARN_BAREWORD)
6566 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6567 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6568 "Bareword \"%s\" refers to nonexistent package",
6569 PL_tokenbuf);
6570 len -= 2;
6571 PL_tokenbuf[len] = '\0';
6572 gv = NULL;
6573 gvp = 0;
6574 }
6575 else {
6576 if (!gv) {
6577 /* Mustn't actually add anything to a symbol table.
6578 But also don't want to "initialise" any placeholder
6579 constants that might already be there into full
6580 blown PVGVs with attached PVCV. */
6581 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6582 GV_NOADD_NOINIT, SVt_PVCV);
6583 }
6584 len = 0;
6585 }
6586
6587 /* if we saw a global override before, get the right name */
6588
6589 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6590 len ? len : strlen(PL_tokenbuf));
6591 if (gvp) {
6592 SV * const tmp_sv = sv;
6593 sv = newSVpvs("CORE::GLOBAL::");
6594 sv_catsv(sv, tmp_sv);
6595 SvREFCNT_dec(tmp_sv);
6596 }
6597
6598#ifdef PERL_MAD
6599 if (PL_madskills && !PL_thistoken) {
6600 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6601 PL_thistoken = newSVpvn(start,s - start);
6602 PL_realtokenstart = s - SvPVX(PL_linestr);
6603 }
6604#endif
6605
6606 /* Presume this is going to be a bareword of some sort. */
6607 CLINE;
6608 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6609 pl_yylval.opval->op_private = OPpCONST_BARE;
6610
6611 /* And if "Foo::", then that's what it certainly is. */
6612 if (len)
6613 goto safe_bareword;
6614
6615 {
6616 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6617 const_op->op_private = OPpCONST_BARE;
6618 rv2cv_op = newCVREF(0, const_op);
6619 }
6620 cv = rv2cv_op_cv(rv2cv_op, 0);
6621
6622 /* See if it's the indirect object for a list operator. */
6623
6624 if (PL_oldoldbufptr &&
6625 PL_oldoldbufptr < PL_bufptr &&
6626 (PL_oldoldbufptr == PL_last_lop
6627 || PL_oldoldbufptr == PL_last_uni) &&
6628 /* NO SKIPSPACE BEFORE HERE! */
6629 (PL_expect == XREF ||
6630 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6631 {
6632 bool immediate_paren = *s == '(';
6633
6634 /* (Now we can afford to cross potential line boundary.) */
6635 s = SKIPSPACE2(s,nextPL_nextwhite);
6636#ifdef PERL_MAD
6637 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6638#endif
6639
6640 /* Two barewords in a row may indicate method call. */
6641
6642 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6643 (tmp = intuit_method(s, gv, cv))) {
6644 op_free(rv2cv_op);
6645 if (tmp == METHOD && !PL_lex_allbrackets &&
6646 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6647 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6648 return REPORT(tmp);
6649 }
6650
6651 /* If not a declared subroutine, it's an indirect object. */
6652 /* (But it's an indir obj regardless for sort.) */
6653 /* Also, if "_" follows a filetest operator, it's a bareword */
6654
6655 if (
6656 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6657 (!cv &&
6658 (PL_last_lop_op != OP_MAPSTART &&
6659 PL_last_lop_op != OP_GREPSTART))))
6660 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6661 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6662 )
6663 {
6664 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6665 goto bareword;
6666 }
6667 }
6668
6669 PL_expect = XOPERATOR;
6670#ifdef PERL_MAD
6671 if (isSPACE(*s))
6672 s = SKIPSPACE2(s,nextPL_nextwhite);
6673 PL_nextwhite = nextPL_nextwhite;
6674#else
6675 s = skipspace(s);
6676#endif
6677
6678 /* Is this a word before a => operator? */
6679 if (*s == '=' && s[1] == '>' && !pkgname) {
6680 op_free(rv2cv_op);
6681 CLINE;
6682 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6683 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6684 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6685 TERM(WORD);
6686 }
6687
6688 /* If followed by a paren, it's certainly a subroutine. */
6689 if (*s == '(') {
6690 CLINE;
6691 if (cv) {
6692 d = s + 1;
6693 while (SPACE_OR_TAB(*d))
6694 d++;
6695 if (*d == ')' && (sv = cv_const_sv(cv))) {
6696 s = d + 1;
6697 goto its_constant;
6698 }
6699 }
6700#ifdef PERL_MAD
6701 if (PL_madskills) {
6702 PL_nextwhite = PL_thiswhite;
6703 PL_thiswhite = 0;
6704 }
6705 start_force(PL_curforce);
6706#endif
6707 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6708 PL_expect = XOPERATOR;
6709#ifdef PERL_MAD
6710 if (PL_madskills) {
6711 PL_nextwhite = nextPL_nextwhite;
6712 curmad('X', PL_thistoken);
6713 PL_thistoken = newSVpvs("");
6714 }
6715#endif
6716 op_free(rv2cv_op);
6717 force_next(WORD);
6718 pl_yylval.ival = 0;
6719 TOKEN('&');
6720 }
6721
6722 /* If followed by var or block, call it a method (unless sub) */
6723
6724 if ((*s == '$' || *s == '{') && !cv) {
6725 op_free(rv2cv_op);
6726 PL_last_lop = PL_oldbufptr;
6727 PL_last_lop_op = OP_METHOD;
6728 if (!PL_lex_allbrackets &&
6729 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6730 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6731 PREBLOCK(METHOD);
6732 }
6733
6734 /* If followed by a bareword, see if it looks like indir obj. */
6735
6736 if (!orig_keyword
6737 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6738 && (tmp = intuit_method(s, gv, cv))) {
6739 op_free(rv2cv_op);
6740 if (tmp == METHOD && !PL_lex_allbrackets &&
6741 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6742 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6743 return REPORT(tmp);
6744 }
6745
6746 /* Not a method, so call it a subroutine (if defined) */
6747
6748 if (cv) {
6749 if (lastchar == '-')
6750 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6751 "Ambiguous use of -%s resolved as -&%s()",
6752 PL_tokenbuf, PL_tokenbuf);
6753 /* Check for a constant sub */
6754 if ((sv = cv_const_sv(cv))) {
6755 its_constant:
6756 op_free(rv2cv_op);
6757 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6758 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6759 pl_yylval.opval->op_private = 0;
6760 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6761 TOKEN(WORD);
6762 }
6763
6764 op_free(pl_yylval.opval);
6765 pl_yylval.opval = rv2cv_op;
6766 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6767 PL_last_lop = PL_oldbufptr;
6768 PL_last_lop_op = OP_ENTERSUB;
6769 /* Is there a prototype? */
6770 if (
6771#ifdef PERL_MAD
6772 cv &&
6773#endif
6774 SvPOK(cv))
6775 {
6776 STRLEN protolen;
6777 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6778 if (!protolen)
6779 TERM(FUNC0SUB);
6780 while (*proto == ';')
6781 proto++;
6782 if (
6783 (
6784 (
6785 *proto == '$' || *proto == '_'
6786 || *proto == '*' || *proto == '+'
6787 )
6788 && proto[1] == '\0'
6789 )
6790 || (
6791 *proto == '\\' && proto[1] && proto[2] == '\0'
6792 )
6793 )
6794 OPERATOR(UNIOPSUB);
6795 if (*proto == '\\' && proto[1] == '[') {
6796 const char *p = proto + 2;
6797 while(*p && *p != ']')
6798 ++p;
6799 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6800 }
6801 if (*proto == '&' && *s == '{') {
6802 if (PL_curstash)
6803 sv_setpvs(PL_subname, "__ANON__");
6804 else
6805 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6806 if (!PL_lex_allbrackets &&
6807 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6808 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6809 PREBLOCK(LSTOPSUB);
6810 }
6811 }
6812#ifdef PERL_MAD
6813 {
6814 if (PL_madskills) {
6815 PL_nextwhite = PL_thiswhite;
6816 PL_thiswhite = 0;
6817 }
6818 start_force(PL_curforce);
6819 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6820 PL_expect = XTERM;
6821 if (PL_madskills) {
6822 PL_nextwhite = nextPL_nextwhite;
6823 curmad('X', PL_thistoken);
6824 PL_thistoken = newSVpvs("");
6825 }
6826 force_next(WORD);
6827 if (!PL_lex_allbrackets &&
6828 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6829 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6830 TOKEN(NOAMP);
6831 }
6832 }
6833
6834 /* Guess harder when madskills require "best effort". */
6835 if (PL_madskills && (!gv || !GvCVu(gv))) {
6836 int probable_sub = 0;
6837 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6838 probable_sub = 1;
6839 else if (isALPHA(*s)) {
6840 char tmpbuf[1024];
6841 STRLEN tmplen;
6842 d = s;
6843 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6844 if (!keyword(tmpbuf, tmplen, 0))
6845 probable_sub = 1;
6846 else {
6847 while (d < PL_bufend && isSPACE(*d))
6848 d++;
6849 if (*d == '=' && d[1] == '>')
6850 probable_sub = 1;
6851 }
6852 }
6853 if (probable_sub) {
6854 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6855 op_free(pl_yylval.opval);
6856 pl_yylval.opval = rv2cv_op;
6857 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6858 PL_last_lop = PL_oldbufptr;
6859 PL_last_lop_op = OP_ENTERSUB;
6860 PL_nextwhite = PL_thiswhite;
6861 PL_thiswhite = 0;
6862 start_force(PL_curforce);
6863 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6864 PL_expect = XTERM;
6865 PL_nextwhite = nextPL_nextwhite;
6866 curmad('X', PL_thistoken);
6867 PL_thistoken = newSVpvs("");
6868 force_next(WORD);
6869 if (!PL_lex_allbrackets &&
6870 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6871 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6872 TOKEN(NOAMP);
6873 }
6874#else
6875 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6876 PL_expect = XTERM;
6877 force_next(WORD);
6878 if (!PL_lex_allbrackets &&
6879 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6880 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6881 TOKEN(NOAMP);
6882#endif
6883 }
6884
6885 /* Call it a bare word */
6886
6887 if (PL_hints & HINT_STRICT_SUBS)
6888 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6889 else {
6890 bareword:
6891 /* after "print" and similar functions (corresponding to
6892 * "F? L" in opcode.pl), whatever wasn't already parsed as
6893 * a filehandle should be subject to "strict subs".
6894 * Likewise for the optional indirect-object argument to system
6895 * or exec, which can't be a bareword */
6896 if ((PL_last_lop_op == OP_PRINT
6897 || PL_last_lop_op == OP_PRTF
6898 || PL_last_lop_op == OP_SAY
6899 || PL_last_lop_op == OP_SYSTEM
6900 || PL_last_lop_op == OP_EXEC)
6901 && (PL_hints & HINT_STRICT_SUBS))
6902 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6903 if (lastchar != '-') {
6904 if (ckWARN(WARN_RESERVED)) {
6905 d = PL_tokenbuf;
6906 while (isLOWER(*d))
6907 d++;
6908 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6909 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6910 PL_tokenbuf);
6911 }
6912 }
6913 }
6914 op_free(rv2cv_op);
6915
6916 safe_bareword:
6917 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6918 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919 "Operator or semicolon missing before %c%s",
6920 lastchar, PL_tokenbuf);
6921 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6922 "Ambiguous use of %c resolved as operator %c",
6923 lastchar, lastchar);
6924 }
6925 TOKEN(WORD);
6926 }
6927
6928 case KEY___FILE__:
6929 FUN0OP(
6930 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6931 );
6932
6933 case KEY___LINE__:
6934 FUN0OP(
6935 (OP*)newSVOP(OP_CONST, 0,
6936 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6937 );
6938
6939 case KEY___PACKAGE__:
6940 FUN0OP(
6941 (OP*)newSVOP(OP_CONST, 0,
6942 (PL_curstash
6943 ? newSVhek(HvNAME_HEK(PL_curstash))
6944 : &PL_sv_undef))
6945 );
6946
6947 case KEY___DATA__:
6948 case KEY___END__: {
6949 GV *gv;
6950 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6951 const char *pname = "main";
6952 if (PL_tokenbuf[2] == 'D')
6953 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6954 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6955 SVt_PVIO);
6956 GvMULTI_on(gv);
6957 if (!GvIO(gv))
6958 GvIOp(gv) = newIO();
6959 IoIFP(GvIOp(gv)) = PL_rsfp;
6960#if defined(HAS_FCNTL) && defined(F_SETFD)
6961 {
6962 const int fd = PerlIO_fileno(PL_rsfp);
6963 fcntl(fd,F_SETFD,fd >= 3);
6964 }
6965#endif
6966 /* Mark this internal pseudo-handle as clean */
6967 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6968 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6969 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6970 else
6971 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6972#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6973 /* if the script was opened in binmode, we need to revert
6974 * it to text mode for compatibility; but only iff it has CRs
6975 * XXX this is a questionable hack at best. */
6976 if (PL_bufend-PL_bufptr > 2
6977 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6978 {
6979 Off_t loc = 0;
6980 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6981 loc = PerlIO_tell(PL_rsfp);
6982 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6983 }
6984#ifdef NETWARE
6985 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6986#else
6987 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6988#endif /* NETWARE */
6989#ifdef PERLIO_IS_STDIO /* really? */
6990# if defined(__BORLANDC__)
6991 /* XXX see note in do_binmode() */
6992 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6993# endif
6994#endif
6995 if (loc > 0)
6996 PerlIO_seek(PL_rsfp, loc, 0);
6997 }
6998 }
6999#endif
7000#ifdef PERLIO_LAYERS
7001 if (!IN_BYTES) {
7002 if (UTF)
7003 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7004 else if (PL_encoding) {
7005 SV *name;
7006 dSP;
7007 ENTER;
7008 SAVETMPS;
7009 PUSHMARK(sp);
7010 EXTEND(SP, 1);
7011 XPUSHs(PL_encoding);
7012 PUTBACK;
7013 call_method("name", G_SCALAR);
7014 SPAGAIN;
7015 name = POPs;
7016 PUTBACK;
7017 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7018 Perl_form(aTHX_ ":encoding(%"SVf")",
7019 SVfARG(name)));
7020 FREETMPS;
7021 LEAVE;
7022 }
7023 }
7024#endif
7025#ifdef PERL_MAD
7026 if (PL_madskills) {
7027 if (PL_realtokenstart >= 0) {
7028 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7029 if (!PL_endwhite)
7030 PL_endwhite = newSVpvs("");
7031 sv_catsv(PL_endwhite, PL_thiswhite);
7032 PL_thiswhite = 0;
7033 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7034 PL_realtokenstart = -1;
7035 }
7036 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7037 != NULL) ;
7038 }
7039#endif
7040 PL_rsfp = NULL;
7041 }
7042 goto fake_eof;
7043 }
7044
7045 case KEY_AUTOLOAD:
7046 case KEY_DESTROY:
7047 case KEY_BEGIN:
7048 case KEY_UNITCHECK:
7049 case KEY_CHECK:
7050 case KEY_INIT:
7051 case KEY_END:
7052 if (PL_expect == XSTATE) {
7053 s = PL_bufptr;
7054 goto really_sub;
7055 }
7056 goto just_a_word;
7057
7058 case KEY_CORE:
7059 if (*s == ':' && s[1] == ':') {
7060 s += 2;
7061 d = s;
7062 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7063 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7064 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7065 if (tmp < 0)
7066 tmp = -tmp;
7067 else if (tmp == KEY_require || tmp == KEY_do)
7068 /* that's a way to remember we saw "CORE::" */
7069 orig_keyword = tmp;
7070 goto reserved_word;
7071 }
7072 goto just_a_word;
7073
7074 case KEY_abs:
7075 UNI(OP_ABS);
7076
7077 case KEY_alarm:
7078 UNI(OP_ALARM);
7079
7080 case KEY_accept:
7081 LOP(OP_ACCEPT,XTERM);
7082
7083 case KEY_and:
7084 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7085 return REPORT(0);
7086 OPERATOR(ANDOP);
7087
7088 case KEY_atan2:
7089 LOP(OP_ATAN2,XTERM);
7090
7091 case KEY_bind:
7092 LOP(OP_BIND,XTERM);
7093
7094 case KEY_binmode:
7095 LOP(OP_BINMODE,XTERM);
7096
7097 case KEY_bless:
7098 LOP(OP_BLESS,XTERM);
7099
7100 case KEY_break:
7101 FUN0(OP_BREAK);
7102
7103 case KEY_chop:
7104 UNI(OP_CHOP);
7105
7106 case KEY_continue:
7107 /* We have to disambiguate the two senses of
7108 "continue". If the next token is a '{' then
7109 treat it as the start of a continue block;
7110 otherwise treat it as a control operator.
7111 */
7112 s = skipspace(s);
7113 if (*s == '{')
7114 PREBLOCK(CONTINUE);
7115 else
7116 FUN0(OP_CONTINUE);
7117
7118 case KEY_chdir:
7119 /* may use HOME */
7120 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7121 UNI(OP_CHDIR);
7122
7123 case KEY_close:
7124 UNI(OP_CLOSE);
7125
7126 case KEY_closedir:
7127 UNI(OP_CLOSEDIR);
7128
7129 case KEY_cmp:
7130 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7131 return REPORT(0);
7132 Eop(OP_SCMP);
7133
7134 case KEY_caller:
7135 UNI(OP_CALLER);
7136
7137 case KEY_crypt:
7138#ifdef FCRYPT
7139 if (!PL_cryptseen) {
7140 PL_cryptseen = TRUE;
7141 init_des();
7142 }
7143#endif
7144 LOP(OP_CRYPT,XTERM);
7145
7146 case KEY_chmod:
7147 LOP(OP_CHMOD,XTERM);
7148
7149 case KEY_chown:
7150 LOP(OP_CHOWN,XTERM);
7151
7152 case KEY_connect:
7153 LOP(OP_CONNECT,XTERM);
7154
7155 case KEY_chr:
7156 UNI(OP_CHR);
7157
7158 case KEY_cos:
7159 UNI(OP_COS);
7160
7161 case KEY_chroot:
7162 UNI(OP_CHROOT);
7163
7164 case KEY_default:
7165 PREBLOCK(DEFAULT);
7166
7167 case KEY_do:
7168 s = SKIPSPACE1(s);
7169 if (*s == '{')
7170 PRETERMBLOCK(DO);
7171 if (*s != '\'')
7172 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7173 if (orig_keyword == KEY_do) {
7174 orig_keyword = 0;
7175 pl_yylval.ival = 1;
7176 }
7177 else
7178 pl_yylval.ival = 0;
7179 OPERATOR(DO);
7180
7181 case KEY_die:
7182 PL_hints |= HINT_BLOCK_SCOPE;
7183 LOP(OP_DIE,XTERM);
7184
7185 case KEY_defined:
7186 UNI(OP_DEFINED);
7187
7188 case KEY_delete:
7189 UNI(OP_DELETE);
7190
7191 case KEY_dbmopen:
7192 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7193 STR_WITH_LEN("NDBM_File::"),
7194 STR_WITH_LEN("DB_File::"),
7195 STR_WITH_LEN("GDBM_File::"),
7196 STR_WITH_LEN("SDBM_File::"),
7197 STR_WITH_LEN("ODBM_File::"),
7198 NULL);
7199 LOP(OP_DBMOPEN,XTERM);
7200
7201 case KEY_dbmclose:
7202 UNI(OP_DBMCLOSE);
7203
7204 case KEY_dump:
7205 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7206 LOOPX(OP_DUMP);
7207
7208 case KEY_else:
7209 PREBLOCK(ELSE);
7210
7211 case KEY_elsif:
7212 pl_yylval.ival = CopLINE(PL_curcop);
7213 OPERATOR(ELSIF);
7214
7215 case KEY_eq:
7216 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7217 return REPORT(0);
7218 Eop(OP_SEQ);
7219
7220 case KEY_exists:
7221 UNI(OP_EXISTS);
7222
7223 case KEY_exit:
7224 if (PL_madskills)
7225 UNI(OP_INT);
7226 UNI(OP_EXIT);
7227
7228 case KEY_eval:
7229 s = SKIPSPACE1(s);
7230 if (*s == '{') { /* block eval */
7231 PL_expect = XTERMBLOCK;
7232 UNIBRACK(OP_ENTERTRY);
7233 }
7234 else { /* string eval */
7235 PL_expect = XTERM;
7236 UNIBRACK(OP_ENTEREVAL);
7237 }
7238
7239 case KEY_eof:
7240 UNI(OP_EOF);
7241
7242 case KEY_exp:
7243 UNI(OP_EXP);
7244
7245 case KEY_each:
7246 UNI(OP_EACH);
7247
7248 case KEY_exec:
7249 LOP(OP_EXEC,XREF);
7250
7251 case KEY_endhostent:
7252 FUN0(OP_EHOSTENT);
7253
7254 case KEY_endnetent:
7255 FUN0(OP_ENETENT);
7256
7257 case KEY_endservent:
7258 FUN0(OP_ESERVENT);
7259
7260 case KEY_endprotoent:
7261 FUN0(OP_EPROTOENT);
7262
7263 case KEY_endpwent:
7264 FUN0(OP_EPWENT);
7265
7266 case KEY_endgrent:
7267 FUN0(OP_EGRENT);
7268
7269 case KEY_for:
7270 case KEY_foreach:
7271 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7272 return REPORT(0);
7273 pl_yylval.ival = CopLINE(PL_curcop);
7274 s = SKIPSPACE1(s);
7275 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7276 char *p = s;
7277#ifdef PERL_MAD
7278 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7279#endif
7280
7281 if ((PL_bufend - p) >= 3 &&
7282 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7283 p += 2;
7284 else if ((PL_bufend - p) >= 4 &&
7285 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7286 p += 3;
7287 p = PEEKSPACE(p);
7288 if (isIDFIRST_lazy_if(p,UTF)) {
7289 p = scan_ident(p, PL_bufend,
7290 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7291 p = PEEKSPACE(p);
7292 }
7293 if (*p != '$')
7294 Perl_croak(aTHX_ "Missing $ on loop variable");
7295#ifdef PERL_MAD
7296 s = SvPVX(PL_linestr) + soff;
7297#endif
7298 }
7299 OPERATOR(FOR);
7300
7301 case KEY_formline:
7302 LOP(OP_FORMLINE,XTERM);
7303
7304 case KEY_fork:
7305 FUN0(OP_FORK);
7306
7307 case KEY_fcntl:
7308 LOP(OP_FCNTL,XTERM);
7309
7310 case KEY_fileno:
7311 UNI(OP_FILENO);
7312
7313 case KEY_flock:
7314 LOP(OP_FLOCK,XTERM);
7315
7316 case KEY_gt:
7317 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7318 return REPORT(0);
7319 Rop(OP_SGT);
7320
7321 case KEY_ge:
7322 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7323 return REPORT(0);
7324 Rop(OP_SGE);
7325
7326 case KEY_grep:
7327 LOP(OP_GREPSTART, XREF);
7328
7329 case KEY_goto:
7330 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7331 LOOPX(OP_GOTO);
7332
7333 case KEY_gmtime:
7334 UNI(OP_GMTIME);
7335
7336 case KEY_getc:
7337 UNIDOR(OP_GETC);
7338
7339 case KEY_getppid:
7340 FUN0(OP_GETPPID);
7341
7342 case KEY_getpgrp:
7343 UNI(OP_GETPGRP);
7344
7345 case KEY_getpriority:
7346 LOP(OP_GETPRIORITY,XTERM);
7347
7348 case KEY_getprotobyname:
7349 UNI(OP_GPBYNAME);
7350
7351 case KEY_getprotobynumber:
7352 LOP(OP_GPBYNUMBER,XTERM);
7353
7354 case KEY_getprotoent:
7355 FUN0(OP_GPROTOENT);
7356
7357 case KEY_getpwent:
7358 FUN0(OP_GPWENT);
7359
7360 case KEY_getpwnam:
7361 UNI(OP_GPWNAM);
7362
7363 case KEY_getpwuid:
7364 UNI(OP_GPWUID);
7365
7366 case KEY_getpeername:
7367 UNI(OP_GETPEERNAME);
7368
7369 case KEY_gethostbyname:
7370 UNI(OP_GHBYNAME);
7371
7372 case KEY_gethostbyaddr:
7373 LOP(OP_GHBYADDR,XTERM);
7374
7375 case KEY_gethostent:
7376 FUN0(OP_GHOSTENT);
7377
7378 case KEY_getnetbyname:
7379 UNI(OP_GNBYNAME);
7380
7381 case KEY_getnetbyaddr:
7382 LOP(OP_GNBYADDR,XTERM);
7383
7384 case KEY_getnetent:
7385 FUN0(OP_GNETENT);
7386
7387 case KEY_getservbyname:
7388 LOP(OP_GSBYNAME,XTERM);
7389
7390 case KEY_getservbyport:
7391 LOP(OP_GSBYPORT,XTERM);
7392
7393 case KEY_getservent:
7394 FUN0(OP_GSERVENT);
7395
7396 case KEY_getsockname:
7397 UNI(OP_GETSOCKNAME);
7398
7399 case KEY_getsockopt:
7400 LOP(OP_GSOCKOPT,XTERM);
7401
7402 case KEY_getgrent:
7403 FUN0(OP_GGRENT);
7404
7405 case KEY_getgrnam:
7406 UNI(OP_GGRNAM);
7407
7408 case KEY_getgrgid:
7409 UNI(OP_GGRGID);
7410
7411 case KEY_getlogin:
7412 FUN0(OP_GETLOGIN);
7413
7414 case KEY_given:
7415 pl_yylval.ival = CopLINE(PL_curcop);
7416 OPERATOR(GIVEN);
7417
7418 case KEY_glob:
7419 LOP(OP_GLOB,XTERM);
7420
7421 case KEY_hex:
7422 UNI(OP_HEX);
7423
7424 case KEY_if:
7425 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7426 return REPORT(0);
7427 pl_yylval.ival = CopLINE(PL_curcop);
7428 OPERATOR(IF);
7429
7430 case KEY_index:
7431 LOP(OP_INDEX,XTERM);
7432
7433 case KEY_int:
7434 UNI(OP_INT);
7435
7436 case KEY_ioctl:
7437 LOP(OP_IOCTL,XTERM);
7438
7439 case KEY_join:
7440 LOP(OP_JOIN,XTERM);
7441
7442 case KEY_keys:
7443 UNI(OP_KEYS);
7444
7445 case KEY_kill:
7446 LOP(OP_KILL,XTERM);
7447
7448 case KEY_last:
7449 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7450 LOOPX(OP_LAST);
7451
7452 case KEY_lc:
7453 UNI(OP_LC);
7454
7455 case KEY_lcfirst:
7456 UNI(OP_LCFIRST);
7457
7458 case KEY_local:
7459 pl_yylval.ival = 0;
7460 OPERATOR(LOCAL);
7461
7462 case KEY_length:
7463 UNI(OP_LENGTH);
7464
7465 case KEY_lt:
7466 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7467 return REPORT(0);
7468 Rop(OP_SLT);
7469
7470 case KEY_le:
7471 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7472 return REPORT(0);
7473 Rop(OP_SLE);
7474
7475 case KEY_localtime:
7476 UNI(OP_LOCALTIME);
7477
7478 case KEY_log:
7479 UNI(OP_LOG);
7480
7481 case KEY_link:
7482 LOP(OP_LINK,XTERM);
7483
7484 case KEY_listen:
7485 LOP(OP_LISTEN,XTERM);
7486
7487 case KEY_lock:
7488 UNI(OP_LOCK);
7489
7490 case KEY_lstat:
7491 UNI(OP_LSTAT);
7492
7493 case KEY_m:
7494 s = scan_pat(s,OP_MATCH);
7495 TERM(sublex_start());
7496
7497 case KEY_map:
7498 LOP(OP_MAPSTART, XREF);
7499
7500 case KEY_mkdir:
7501 LOP(OP_MKDIR,XTERM);
7502
7503 case KEY_msgctl:
7504 LOP(OP_MSGCTL,XTERM);
7505
7506 case KEY_msgget:
7507 LOP(OP_MSGGET,XTERM);
7508
7509 case KEY_msgrcv:
7510 LOP(OP_MSGRCV,XTERM);
7511
7512 case KEY_msgsnd:
7513 LOP(OP_MSGSND,XTERM);
7514
7515 case KEY_our:
7516 case KEY_my:
7517 case KEY_state:
7518 PL_in_my = (U16)tmp;
7519 s = SKIPSPACE1(s);
7520 if (isIDFIRST_lazy_if(s,UTF)) {
7521#ifdef PERL_MAD
7522 char* start = s;
7523#endif
7524 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7525 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7526 goto really_sub;
7527 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7528 if (!PL_in_my_stash) {
7529 char tmpbuf[1024];
7530 PL_bufptr = s;
7531 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7532 yyerror(tmpbuf);
7533 }
7534#ifdef PERL_MAD
7535 if (PL_madskills) { /* just add type to declarator token */
7536 sv_catsv(PL_thistoken, PL_nextwhite);
7537 PL_nextwhite = 0;
7538 sv_catpvn(PL_thistoken, start, s - start);
7539 }
7540#endif
7541 }
7542 pl_yylval.ival = 1;
7543 OPERATOR(MY);
7544
7545 case KEY_next:
7546 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7547 LOOPX(OP_NEXT);
7548
7549 case KEY_ne:
7550 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7551 return REPORT(0);
7552 Eop(OP_SNE);
7553
7554 case KEY_no:
7555 s = tokenize_use(0, s);
7556 OPERATOR(USE);
7557
7558 case KEY_not:
7559 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7560 FUN1(OP_NOT);
7561 else {
7562 if (!PL_lex_allbrackets &&
7563 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7564 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7565 OPERATOR(NOTOP);
7566 }
7567
7568 case KEY_open:
7569 s = SKIPSPACE1(s);
7570 if (isIDFIRST_lazy_if(s,UTF)) {
7571 const char *t;
7572 for (d = s; isALNUM_lazy_if(d,UTF);)
7573 d++;
7574 for (t=d; isSPACE(*t);)
7575 t++;
7576 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7577 /* [perl #16184] */
7578 && !(t[0] == '=' && t[1] == '>')
7579 ) {
7580 int parms_len = (int)(d-s);
7581 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7582 "Precedence problem: open %.*s should be open(%.*s)",
7583 parms_len, s, parms_len, s);
7584 }
7585 }
7586 LOP(OP_OPEN,XTERM);
7587
7588 case KEY_or:
7589 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7590 return REPORT(0);
7591 pl_yylval.ival = OP_OR;
7592 OPERATOR(OROP);
7593
7594 case KEY_ord:
7595 UNI(OP_ORD);
7596
7597 case KEY_oct:
7598 UNI(OP_OCT);
7599
7600 case KEY_opendir:
7601 LOP(OP_OPEN_DIR,XTERM);
7602
7603 case KEY_print:
7604 checkcomma(s,PL_tokenbuf,"filehandle");
7605 LOP(OP_PRINT,XREF);
7606
7607 case KEY_printf:
7608 checkcomma(s,PL_tokenbuf,"filehandle");
7609 LOP(OP_PRTF,XREF);
7610
7611 case KEY_prototype:
7612 UNI(OP_PROTOTYPE);
7613
7614 case KEY_push:
7615 LOP(OP_PUSH,XTERM);
7616
7617 case KEY_pop:
7618 UNIDOR(OP_POP);
7619
7620 case KEY_pos:
7621 UNIDOR(OP_POS);
7622
7623 case KEY_pack:
7624 LOP(OP_PACK,XTERM);
7625
7626 case KEY_package:
7627 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7628 s = SKIPSPACE1(s);
7629 s = force_strict_version(s);
7630 PL_lex_expect = XBLOCK;
7631 OPERATOR(PACKAGE);
7632
7633 case KEY_pipe:
7634 LOP(OP_PIPE_OP,XTERM);
7635
7636 case KEY_q:
7637 s = scan_str(s,!!PL_madskills,FALSE);
7638 if (!s)
7639 missingterm(NULL);
7640 pl_yylval.ival = OP_CONST;
7641 TERM(sublex_start());
7642
7643 case KEY_quotemeta:
7644 UNI(OP_QUOTEMETA);
7645
7646 case KEY_qw: {
7647 OP *words = NULL;
7648 s = scan_str(s,!!PL_madskills,FALSE);
7649 if (!s)
7650 missingterm(NULL);
7651 PL_expect = XOPERATOR;
7652 if (SvCUR(PL_lex_stuff)) {
7653 int warned_comma = !ckWARN(WARN_QW);
7654 int warned_comment = warned_comma;
7655 d = SvPV_force(PL_lex_stuff, len);
7656 while (len) {
7657 for (; isSPACE(*d) && len; --len, ++d)
7658 /**/;
7659 if (len) {
7660 SV *sv;
7661 const char *b = d;
7662 if (!warned_comma || !warned_comment) {
7663 for (; !isSPACE(*d) && len; --len, ++d) {
7664 if (!warned_comma && *d == ',') {
7665 Perl_warner(aTHX_ packWARN(WARN_QW),
7666 "Possible attempt to separate words with commas");
7667 ++warned_comma;
7668 }
7669 else if (!warned_comment && *d == '#') {
7670 Perl_warner(aTHX_ packWARN(WARN_QW),
7671 "Possible attempt to put comments in qw() list");
7672 ++warned_comment;
7673 }
7674 }
7675 }
7676 else {
7677 for (; !isSPACE(*d) && len; --len, ++d)
7678 /**/;
7679 }
7680 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7681 words = op_append_elem(OP_LIST, words,
7682 newSVOP(OP_CONST, 0, tokeq(sv)));
7683 }
7684 }
7685 }
7686 if (!words)
7687 words = newNULLLIST();
7688 if (PL_lex_stuff) {
7689 SvREFCNT_dec(PL_lex_stuff);
7690 PL_lex_stuff = NULL;
7691 }
7692 PL_expect = XOPERATOR;
7693 pl_yylval.opval = sawparens(words);
7694 TOKEN(QWLIST);
7695 }
7696
7697 case KEY_qq:
7698 s = scan_str(s,!!PL_madskills,FALSE);
7699 if (!s)
7700 missingterm(NULL);
7701 pl_yylval.ival = OP_STRINGIFY;
7702 if (SvIVX(PL_lex_stuff) == '\'')
7703 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7704 TERM(sublex_start());
7705
7706 case KEY_qr:
7707 s = scan_pat(s,OP_QR);
7708 TERM(sublex_start());
7709
7710 case KEY_qx:
7711 s = scan_str(s,!!PL_madskills,FALSE);
7712 if (!s)
7713 missingterm(NULL);
7714 readpipe_override();
7715 TERM(sublex_start());
7716
7717 case KEY_return:
7718 OLDLOP(OP_RETURN);
7719
7720 case KEY_require:
7721 s = SKIPSPACE1(s);
7722 if (isDIGIT(*s)) {
7723 s = force_version(s, FALSE);
7724 }
7725 else if (*s != 'v' || !isDIGIT(s[1])
7726 || (s = force_version(s, TRUE), *s == 'v'))
7727 {
7728 *PL_tokenbuf = '\0';
7729 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7730 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7731 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7732 else if (*s == '<')
7733 yyerror("<> should be quotes");
7734 }
7735 if (orig_keyword == KEY_require) {
7736 orig_keyword = 0;
7737 pl_yylval.ival = 1;
7738 }
7739 else
7740 pl_yylval.ival = 0;
7741 PL_expect = XTERM;
7742 PL_bufptr = s;
7743 PL_last_uni = PL_oldbufptr;
7744 PL_last_lop_op = OP_REQUIRE;
7745 s = skipspace(s);
7746 return REPORT( (int)REQUIRE );
7747
7748 case KEY_reset:
7749 UNI(OP_RESET);
7750
7751 case KEY_redo:
7752 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7753 LOOPX(OP_REDO);
7754
7755 case KEY_rename:
7756 LOP(OP_RENAME,XTERM);
7757
7758 case KEY_rand:
7759 UNI(OP_RAND);
7760
7761 case KEY_rmdir:
7762 UNI(OP_RMDIR);
7763
7764 case KEY_rindex:
7765 LOP(OP_RINDEX,XTERM);
7766
7767 case KEY_read:
7768 LOP(OP_READ,XTERM);
7769
7770 case KEY_readdir:
7771 UNI(OP_READDIR);
7772
7773 case KEY_readline:
7774 UNIDOR(OP_READLINE);
7775
7776 case KEY_readpipe:
7777 UNIDOR(OP_BACKTICK);
7778
7779 case KEY_rewinddir:
7780 UNI(OP_REWINDDIR);
7781
7782 case KEY_recv:
7783 LOP(OP_RECV,XTERM);
7784
7785 case KEY_reverse:
7786 LOP(OP_REVERSE,XTERM);
7787
7788 case KEY_readlink:
7789 UNIDOR(OP_READLINK);
7790
7791 case KEY_ref:
7792 UNI(OP_REF);
7793
7794 case KEY_s:
7795 s = scan_subst(s);
7796 if (pl_yylval.opval)
7797 TERM(sublex_start());
7798 else
7799 TOKEN(1); /* force error */
7800
7801 case KEY_say:
7802 checkcomma(s,PL_tokenbuf,"filehandle");
7803 LOP(OP_SAY,XREF);
7804
7805 case KEY_chomp:
7806 UNI(OP_CHOMP);
7807
7808 case KEY_scalar:
7809 UNI(OP_SCALAR);
7810
7811 case KEY_select:
7812 LOP(OP_SELECT,XTERM);
7813
7814 case KEY_seek:
7815 LOP(OP_SEEK,XTERM);
7816
7817 case KEY_semctl:
7818 LOP(OP_SEMCTL,XTERM);
7819
7820 case KEY_semget:
7821 LOP(OP_SEMGET,XTERM);
7822
7823 case KEY_semop:
7824 LOP(OP_SEMOP,XTERM);
7825
7826 case KEY_send:
7827 LOP(OP_SEND,XTERM);
7828
7829 case KEY_setpgrp:
7830 LOP(OP_SETPGRP,XTERM);
7831
7832 case KEY_setpriority:
7833 LOP(OP_SETPRIORITY,XTERM);
7834
7835 case KEY_sethostent:
7836 UNI(OP_SHOSTENT);
7837
7838 case KEY_setnetent:
7839 UNI(OP_SNETENT);
7840
7841 case KEY_setservent:
7842 UNI(OP_SSERVENT);
7843
7844 case KEY_setprotoent:
7845 UNI(OP_SPROTOENT);
7846
7847 case KEY_setpwent:
7848 FUN0(OP_SPWENT);
7849
7850 case KEY_setgrent:
7851 FUN0(OP_SGRENT);
7852
7853 case KEY_seekdir:
7854 LOP(OP_SEEKDIR,XTERM);
7855
7856 case KEY_setsockopt:
7857 LOP(OP_SSOCKOPT,XTERM);
7858
7859 case KEY_shift:
7860 UNIDOR(OP_SHIFT);
7861
7862 case KEY_shmctl:
7863 LOP(OP_SHMCTL,XTERM);
7864
7865 case KEY_shmget:
7866 LOP(OP_SHMGET,XTERM);
7867
7868 case KEY_shmread:
7869 LOP(OP_SHMREAD,XTERM);
7870
7871 case KEY_shmwrite:
7872 LOP(OP_SHMWRITE,XTERM);
7873
7874 case KEY_shutdown:
7875 LOP(OP_SHUTDOWN,XTERM);
7876
7877 case KEY_sin:
7878 UNI(OP_SIN);
7879
7880 case KEY_sleep:
7881 UNI(OP_SLEEP);
7882
7883 case KEY_socket:
7884 LOP(OP_SOCKET,XTERM);
7885
7886 case KEY_socketpair:
7887 LOP(OP_SOCKPAIR,XTERM);
7888
7889 case KEY_sort:
7890 checkcomma(s,PL_tokenbuf,"subroutine name");
7891 s = SKIPSPACE1(s);
7892 if (*s == ';' || *s == ')') /* probably a close */
7893 Perl_croak(aTHX_ "sort is now a reserved word");
7894 PL_expect = XTERM;
7895 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7896 LOP(OP_SORT,XREF);
7897
7898 case KEY_split:
7899 LOP(OP_SPLIT,XTERM);
7900
7901 case KEY_sprintf:
7902 LOP(OP_SPRINTF,XTERM);
7903
7904 case KEY_splice:
7905 LOP(OP_SPLICE,XTERM);
7906
7907 case KEY_sqrt:
7908 UNI(OP_SQRT);
7909
7910 case KEY_srand:
7911 UNI(OP_SRAND);
7912
7913 case KEY_stat:
7914 UNI(OP_STAT);
7915
7916 case KEY_study:
7917 UNI(OP_STUDY);
7918
7919 case KEY_substr:
7920 LOP(OP_SUBSTR,XTERM);
7921
7922 case KEY_format:
7923 case KEY_sub:
7924 really_sub:
7925 {
7926 char tmpbuf[sizeof PL_tokenbuf];
7927 SSize_t tboffset = 0;
7928 expectation attrful;
7929 bool have_name, have_proto;
7930 const int key = tmp;
7931
7932#ifdef PERL_MAD
7933 SV *tmpwhite = 0;
7934
7935 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7936 SV *subtoken = newSVpvn(tstart, s - tstart);
7937 PL_thistoken = 0;
7938
7939 d = s;
7940 s = SKIPSPACE2(s,tmpwhite);
7941#else
7942 s = skipspace(s);
7943#endif
7944
7945 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7946 (*s == ':' && s[1] == ':'))
7947 {
7948#ifdef PERL_MAD
7949 SV *nametoke = NULL;
7950#endif
7951
7952 PL_expect = XBLOCK;
7953 attrful = XATTRBLOCK;
7954 /* remember buffer pos'n for later force_word */
7955 tboffset = s - PL_oldbufptr;
7956 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7957#ifdef PERL_MAD
7958 if (PL_madskills)
7959 nametoke = newSVpvn(s, d - s);
7960#endif
7961 if (memchr(tmpbuf, ':', len))
7962 sv_setpvn(PL_subname, tmpbuf, len);
7963 else {
7964 sv_setsv(PL_subname,PL_curstname);
7965 sv_catpvs(PL_subname,"::");
7966 sv_catpvn(PL_subname,tmpbuf,len);
7967 }
7968 have_name = TRUE;
7969
7970#ifdef PERL_MAD
7971
7972 start_force(0);
7973 CURMAD('X', nametoke);
7974 CURMAD('_', tmpwhite);
7975 (void) force_word(PL_oldbufptr + tboffset, WORD,
7976 FALSE, TRUE, TRUE);
7977
7978 s = SKIPSPACE2(d,tmpwhite);
7979#else
7980 s = skipspace(d);
7981#endif
7982 }
7983 else {
7984 if (key == KEY_my)
7985 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7986 PL_expect = XTERMBLOCK;
7987 attrful = XATTRTERM;
7988 sv_setpvs(PL_subname,"?");
7989 have_name = FALSE;
7990 }
7991
7992 if (key == KEY_format) {
7993 if (*s == '=')
7994 PL_lex_formbrack = PL_lex_brackets + 1;
7995#ifdef PERL_MAD
7996 PL_thistoken = subtoken;
7997 s = d;
7998#else
7999 if (have_name)
8000 (void) force_word(PL_oldbufptr + tboffset, WORD,
8001 FALSE, TRUE, TRUE);
8002#endif
8003 OPERATOR(FORMAT);
8004 }
8005
8006 /* Look for a prototype */
8007 if (*s == '(') {
8008 char *p;
8009 bool bad_proto = FALSE;
8010 bool in_brackets = FALSE;
8011 char greedy_proto = ' ';
8012 bool proto_after_greedy_proto = FALSE;
8013 bool must_be_last = FALSE;
8014 bool underscore = FALSE;
8015 bool seen_underscore = FALSE;
8016 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8017
8018 s = scan_str(s,!!PL_madskills,FALSE);
8019 if (!s)
8020 Perl_croak(aTHX_ "Prototype not terminated");
8021 /* strip spaces and check for bad characters */
8022 d = SvPVX(PL_lex_stuff);
8023 tmp = 0;
8024 for (p = d; *p; ++p) {
8025 if (!isSPACE(*p)) {
8026 d[tmp++] = *p;
8027
8028 if (warnillegalproto) {
8029 if (must_be_last)
8030 proto_after_greedy_proto = TRUE;
8031 if (!strchr("$@%*;[]&\\_+", *p)) {
8032 bad_proto = TRUE;
8033 }
8034 else {
8035 if ( underscore ) {
8036 if ( *p != ';' )
8037 bad_proto = TRUE;
8038 underscore = FALSE;
8039 }
8040 if ( *p == '[' ) {
8041 in_brackets = TRUE;
8042 }
8043 else if ( *p == ']' ) {
8044 in_brackets = FALSE;
8045 }
8046 else if ( (*p == '@' || *p == '%') &&
8047 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8048 !in_brackets ) {
8049 must_be_last = TRUE;
8050 greedy_proto = *p;
8051 }
8052 else if ( *p == '_' ) {
8053 underscore = seen_underscore = TRUE;
8054 }
8055 }
8056 }
8057 }
8058 }
8059 d[tmp] = '\0';
8060 if (proto_after_greedy_proto)
8061 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8062 "Prototype after '%c' for %"SVf" : %s",
8063 greedy_proto, SVfARG(PL_subname), d);
8064 if (bad_proto)
8065 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8066 "Illegal character %sin prototype for %"SVf" : %s",
8067 seen_underscore ? "after '_' " : "",
8068 SVfARG(PL_subname), d);
8069 SvCUR_set(PL_lex_stuff, tmp);
8070 have_proto = TRUE;
8071
8072#ifdef PERL_MAD
8073 start_force(0);
8074 CURMAD('q', PL_thisopen);
8075 CURMAD('_', tmpwhite);
8076 CURMAD('=', PL_thisstuff);
8077 CURMAD('Q', PL_thisclose);
8078 NEXTVAL_NEXTTOKE.opval =
8079 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8080 PL_lex_stuff = NULL;
8081 force_next(THING);
8082
8083 s = SKIPSPACE2(s,tmpwhite);
8084#else
8085 s = skipspace(s);
8086#endif
8087 }
8088 else
8089 have_proto = FALSE;
8090
8091 if (*s == ':' && s[1] != ':')
8092 PL_expect = attrful;
8093 else if (*s != '{' && key == KEY_sub) {
8094 if (!have_name)
8095 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8096 else if (*s != ';' && *s != '}')
8097 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8098 }
8099
8100#ifdef PERL_MAD
8101 start_force(0);
8102 if (tmpwhite) {
8103 if (PL_madskills)
8104 curmad('^', newSVpvs(""));
8105 CURMAD('_', tmpwhite);
8106 }
8107 force_next(0);
8108
8109 PL_thistoken = subtoken;
8110#else
8111 if (have_proto) {
8112 NEXTVAL_NEXTTOKE.opval =
8113 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8114 PL_lex_stuff = NULL;
8115 force_next(THING);
8116 }
8117#endif
8118 if (!have_name) {
8119 if (PL_curstash)
8120 sv_setpvs(PL_subname, "__ANON__");
8121 else
8122 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8123 TOKEN(ANONSUB);
8124 }
8125#ifndef PERL_MAD
8126 (void) force_word(PL_oldbufptr + tboffset, WORD,
8127 FALSE, TRUE, TRUE);
8128#endif
8129 if (key == KEY_my)
8130 TOKEN(MYSUB);
8131 TOKEN(SUB);
8132 }
8133
8134 case KEY_system:
8135 LOP(OP_SYSTEM,XREF);
8136
8137 case KEY_symlink:
8138 LOP(OP_SYMLINK,XTERM);
8139
8140 case KEY_syscall:
8141 LOP(OP_SYSCALL,XTERM);
8142
8143 case KEY_sysopen:
8144 LOP(OP_SYSOPEN,XTERM);
8145
8146 case KEY_sysseek:
8147 LOP(OP_SYSSEEK,XTERM);
8148
8149 case KEY_sysread:
8150 LOP(OP_SYSREAD,XTERM);
8151
8152 case KEY_syswrite:
8153 LOP(OP_SYSWRITE,XTERM);
8154
8155 case KEY_tr:
8156 s = scan_trans(s);
8157 TERM(sublex_start());
8158
8159 case KEY_tell:
8160 UNI(OP_TELL);
8161
8162 case KEY_telldir:
8163 UNI(OP_TELLDIR);
8164
8165 case KEY_tie:
8166 LOP(OP_TIE,XTERM);
8167
8168 case KEY_tied:
8169 UNI(OP_TIED);
8170
8171 case KEY_time:
8172 FUN0(OP_TIME);
8173
8174 case KEY_times:
8175 FUN0(OP_TMS);
8176
8177 case KEY_truncate:
8178 LOP(OP_TRUNCATE,XTERM);
8179
8180 case KEY_uc:
8181 UNI(OP_UC);
8182
8183 case KEY_ucfirst:
8184 UNI(OP_UCFIRST);
8185
8186 case KEY_untie:
8187 UNI(OP_UNTIE);
8188
8189 case KEY_until:
8190 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8191 return REPORT(0);
8192 pl_yylval.ival = CopLINE(PL_curcop);
8193 OPERATOR(UNTIL);
8194
8195 case KEY_unless:
8196 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8197 return REPORT(0);
8198 pl_yylval.ival = CopLINE(PL_curcop);
8199 OPERATOR(UNLESS);
8200
8201 case KEY_unlink:
8202 LOP(OP_UNLINK,XTERM);
8203
8204 case KEY_undef:
8205 UNIDOR(OP_UNDEF);
8206
8207 case KEY_unpack:
8208 LOP(OP_UNPACK,XTERM);
8209
8210 case KEY_utime:
8211 LOP(OP_UTIME,XTERM);
8212
8213 case KEY_umask:
8214 UNIDOR(OP_UMASK);
8215
8216 case KEY_unshift:
8217 LOP(OP_UNSHIFT,XTERM);
8218
8219 case KEY_use:
8220 s = tokenize_use(1, s);
8221 OPERATOR(USE);
8222
8223 case KEY_values:
8224 UNI(OP_VALUES);
8225
8226 case KEY_vec:
8227 LOP(OP_VEC,XTERM);
8228
8229 case KEY_when:
8230 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8231 return REPORT(0);
8232 pl_yylval.ival = CopLINE(PL_curcop);
8233 OPERATOR(WHEN);
8234
8235 case KEY_while:
8236 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8237 return REPORT(0);
8238 pl_yylval.ival = CopLINE(PL_curcop);
8239 OPERATOR(WHILE);
8240
8241 case KEY_warn:
8242 PL_hints |= HINT_BLOCK_SCOPE;
8243 LOP(OP_WARN,XTERM);
8244
8245 case KEY_wait:
8246 FUN0(OP_WAIT);
8247
8248 case KEY_waitpid:
8249 LOP(OP_WAITPID,XTERM);
8250
8251 case KEY_wantarray:
8252 FUN0(OP_WANTARRAY);
8253
8254 case KEY_write:
8255#ifdef EBCDIC
8256 {
8257 char ctl_l[2];
8258 ctl_l[0] = toCTRL('L');
8259 ctl_l[1] = '\0';
8260 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8261 }
8262#else
8263 /* Make sure $^L is defined */
8264 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8265#endif
8266 UNI(OP_ENTERWRITE);
8267
8268 case KEY_x:
8269 if (PL_expect == XOPERATOR) {
8270 if (*s == '=' && !PL_lex_allbrackets &&
8271 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8272 return REPORT(0);
8273 Mop(OP_REPEAT);
8274 }
8275 check_uni();
8276 goto just_a_word;
8277
8278 case KEY_xor:
8279 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8280 return REPORT(0);
8281 pl_yylval.ival = OP_XOR;
8282 OPERATOR(OROP);
8283
8284 case KEY_y:
8285 s = scan_trans(s);
8286 TERM(sublex_start());
8287 }
8288 }}
8289}
8290#ifdef __SC__
8291#pragma segment Main
8292#endif
8293
8294static int
8295S_pending_ident(pTHX)
8296{
8297 dVAR;
8298 register char *d;
8299 PADOFFSET tmp = 0;
8300 /* pit holds the identifier we read and pending_ident is reset */
8301 char pit = PL_pending_ident;
8302 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8303 /* All routes through this function want to know if there is a colon. */
8304 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8305 PL_pending_ident = 0;
8306
8307 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8308 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8309 "### Pending identifier '%s'\n", PL_tokenbuf); });
8310
8311 /* if we're in a my(), we can't allow dynamics here.
8312 $foo'bar has already been turned into $foo::bar, so
8313 just check for colons.
8314
8315 if it's a legal name, the OP is a PADANY.
8316 */
8317 if (PL_in_my) {
8318 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8319 if (has_colon)
8320 yyerror(Perl_form(aTHX_ "No package name allowed for "
8321 "variable %s in \"our\"",
8322 PL_tokenbuf));
8323 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8324 }
8325 else {
8326 if (has_colon)
8327 yyerror(Perl_form(aTHX_ PL_no_myglob,
8328 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8329
8330 pl_yylval.opval = newOP(OP_PADANY, 0);
8331 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8332 UTF ? SVf_UTF8 : 0);
8333 return PRIVATEREF;
8334 }
8335 }
8336
8337 /*
8338 build the ops for accesses to a my() variable.
8339
8340 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8341 then used in a comparison. This catches most, but not
8342 all cases. For instance, it catches
8343 sort { my($a); $a <=> $b }
8344 but not
8345 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8346 (although why you'd do that is anyone's guess).
8347 */
8348
8349 if (!has_colon) {
8350 if (!PL_in_my)
8351 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8352 UTF ? SVf_UTF8 : 0);
8353 if (tmp != NOT_IN_PAD) {
8354 /* might be an "our" variable" */
8355 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8356 /* build ops for a bareword */
8357 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8358 HEK * const stashname = HvNAME_HEK(stash);
8359 SV * const sym = newSVhek(stashname);
8360 sv_catpvs(sym, "::");
8361 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8362 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8363 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8364 gv_fetchsv(sym,
8365 (PL_in_eval
8366 ? (GV_ADDMULTI | GV_ADDINEVAL)
8367 : GV_ADDMULTI
8368 ),
8369 ((PL_tokenbuf[0] == '$') ? SVt_PV
8370 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8371 : SVt_PVHV));
8372 return WORD;
8373 }
8374
8375 /* if it's a sort block and they're naming $a or $b */
8376 if (PL_last_lop_op == OP_SORT &&
8377 PL_tokenbuf[0] == '$' &&
8378 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8379 && !PL_tokenbuf[2])
8380 {
8381 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8382 d < PL_bufend && *d != '\n';
8383 d++)
8384 {
8385 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8386 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8387 PL_tokenbuf);
8388 }
8389 }
8390 }
8391
8392 pl_yylval.opval = newOP(OP_PADANY, 0);
8393 pl_yylval.opval->op_targ = tmp;
8394 return PRIVATEREF;
8395 }
8396 }
8397
8398 /*
8399 Whine if they've said @foo in a doublequoted string,
8400 and @foo isn't a variable we can find in the symbol
8401 table.
8402 */
8403 if (ckWARN(WARN_AMBIGUOUS) &&
8404 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8405 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8406 SVt_PVAV);
8407 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8408 /* DO NOT warn for @- and @+ */
8409 && !( PL_tokenbuf[2] == '\0' &&
8410 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8411 )
8412 {
8413 /* Downgraded from fatal to warning 20000522 mjd */
8414 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8415 "Possible unintended interpolation of %s in string",
8416 PL_tokenbuf);
8417 }
8418 }
8419
8420 /* build ops for a bareword */
8421 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8422 tokenbuf_len - 1));
8423 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8424 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8425 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8426 ((PL_tokenbuf[0] == '$') ? SVt_PV
8427 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8428 : SVt_PVHV));
8429 return WORD;
8430}
8431
8432STATIC void
8433S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8434{
8435 dVAR;
8436
8437 PERL_ARGS_ASSERT_CHECKCOMMA;
8438
8439 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8440 if (ckWARN(WARN_SYNTAX)) {
8441 int level = 1;
8442 const char *w;
8443 for (w = s+2; *w && level; w++) {
8444 if (*w == '(')
8445 ++level;
8446 else if (*w == ')')
8447 --level;
8448 }
8449 while (isSPACE(*w))
8450 ++w;
8451 /* the list of chars below is for end of statements or
8452 * block / parens, boolean operators (&&, ||, //) and branch
8453 * constructs (or, and, if, until, unless, while, err, for).
8454 * Not a very solid hack... */
8455 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8456 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8457 "%s (...) interpreted as function",name);
8458 }
8459 }
8460 while (s < PL_bufend && isSPACE(*s))
8461 s++;
8462 if (*s == '(')
8463 s++;
8464 while (s < PL_bufend && isSPACE(*s))
8465 s++;
8466 if (isIDFIRST_lazy_if(s,UTF)) {
8467 const char * const w = s++;
8468 while (isALNUM_lazy_if(s,UTF))
8469 s++;
8470 while (s < PL_bufend && isSPACE(*s))
8471 s++;
8472 if (*s == ',') {
8473 GV* gv;
8474 if (keyword(w, s - w, 0))
8475 return;
8476
8477 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8478 if (gv && GvCVu(gv))
8479 return;
8480 Perl_croak(aTHX_ "No comma allowed after %s", what);
8481 }
8482 }
8483}
8484
8485/* Either returns sv, or mortalizes sv and returns a new SV*.
8486 Best used as sv=new_constant(..., sv, ...).
8487 If s, pv are NULL, calls subroutine with one argument,
8488 and type is used with error messages only. */
8489
8490STATIC SV *
8491S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8492 SV *sv, SV *pv, const char *type, STRLEN typelen)
8493{
8494 dVAR; dSP;
8495 HV * const table = GvHV(PL_hintgv); /* ^H */
8496 SV *res;
8497 SV **cvp;
8498 SV *cv, *typesv;
8499 const char *why1 = "", *why2 = "", *why3 = "";
8500
8501 PERL_ARGS_ASSERT_NEW_CONSTANT;
8502
8503 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8504 SV *msg;
8505
8506 why2 = (const char *)
8507 (strEQ(key,"charnames")
8508 ? "(possibly a missing \"use charnames ...\")"
8509 : "");
8510 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8511 (type ? type: "undef"), why2);
8512
8513 /* This is convoluted and evil ("goto considered harmful")
8514 * but I do not understand the intricacies of all the different
8515 * failure modes of %^H in here. The goal here is to make
8516 * the most probable error message user-friendly. --jhi */
8517
8518 goto msgdone;
8519
8520 report:
8521 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8522 (type ? type: "undef"), why1, why2, why3);
8523 msgdone:
8524 yyerror(SvPVX_const(msg));
8525 SvREFCNT_dec(msg);
8526 return sv;
8527 }
8528
8529 /* charnames doesn't work well if there have been errors found */
8530 if (PL_error_count > 0 && strEQ(key,"charnames"))
8531 return &PL_sv_undef;
8532
8533 cvp = hv_fetch(table, key, keylen, FALSE);
8534 if (!cvp || !SvOK(*cvp)) {
8535 why1 = "$^H{";
8536 why2 = key;
8537 why3 = "} is not defined";
8538 goto report;
8539 }
8540 sv_2mortal(sv); /* Parent created it permanently */
8541 cv = *cvp;
8542 if (!pv && s)
8543 pv = newSVpvn_flags(s, len, SVs_TEMP);
8544 if (type && pv)
8545 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8546 else
8547 typesv = &PL_sv_undef;
8548
8549 PUSHSTACKi(PERLSI_OVERLOAD);
8550 ENTER ;
8551 SAVETMPS;
8552
8553 PUSHMARK(SP) ;
8554 EXTEND(sp, 3);
8555 if (pv)
8556 PUSHs(pv);
8557 PUSHs(sv);
8558 if (pv)
8559 PUSHs(typesv);
8560 PUTBACK;
8561 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8562
8563 SPAGAIN ;
8564
8565 /* Check the eval first */
8566 if (!PL_in_eval && SvTRUE(ERRSV)) {
8567 sv_catpvs(ERRSV, "Propagated");
8568 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8569 (void)POPs;
8570 res = SvREFCNT_inc_simple(sv);
8571 }
8572 else {
8573 res = POPs;
8574 SvREFCNT_inc_simple_void(res);
8575 }
8576
8577 PUTBACK ;
8578 FREETMPS ;
8579 LEAVE ;
8580 POPSTACK;
8581
8582 if (!SvOK(res)) {
8583 why1 = "Call to &{$^H{";
8584 why2 = key;
8585 why3 = "}} did not return a defined value";
8586 sv = res;
8587 goto report;
8588 }
8589
8590 return res;
8591}
8592
8593/* Returns a NUL terminated string, with the length of the string written to
8594 *slp
8595 */
8596STATIC char *
8597S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8598{
8599 dVAR;
8600 register char *d = dest;
8601 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8602
8603 PERL_ARGS_ASSERT_SCAN_WORD;
8604
8605 for (;;) {
8606 if (d >= e)
8607 Perl_croak(aTHX_ ident_too_long);
8608 if (isALNUM(*s)) /* UTF handled below */
8609 *d++ = *s++;
8610 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8611 *d++ = ':';
8612 *d++ = ':';
8613 s++;
8614 }
8615 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8616 *d++ = *s++;
8617 *d++ = *s++;
8618 }
8619 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8620 char *t = s + UTF8SKIP(s);
8621 size_t len;
8622 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8623 t += UTF8SKIP(t);
8624 len = t - s;
8625 if (d + len > e)
8626 Perl_croak(aTHX_ ident_too_long);
8627 Copy(s, d, len, char);
8628 d += len;
8629 s = t;
8630 }
8631 else {
8632 *d = '\0';
8633 *slp = d - dest;
8634 return s;
8635 }
8636 }
8637}
8638
8639STATIC char *
8640S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8641{
8642 dVAR;
8643 char *bracket = NULL;
8644 char funny = *s++;
8645 register char *d = dest;
8646 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8647
8648 PERL_ARGS_ASSERT_SCAN_IDENT;
8649
8650 if (isSPACE(*s))
8651 s = PEEKSPACE(s);
8652 if (isDIGIT(*s)) {
8653 while (isDIGIT(*s)) {
8654 if (d >= e)
8655 Perl_croak(aTHX_ ident_too_long);
8656 *d++ = *s++;
8657 }
8658 }
8659 else {
8660 for (;;) {
8661 if (d >= e)
8662 Perl_croak(aTHX_ ident_too_long);
8663 if (isALNUM(*s)) /* UTF handled below */
8664 *d++ = *s++;
8665 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8666 *d++ = ':';
8667 *d++ = ':';
8668 s++;
8669 }
8670 else if (*s == ':' && s[1] == ':') {
8671 *d++ = *s++;
8672 *d++ = *s++;
8673 }
8674 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8675 char *t = s + UTF8SKIP(s);
8676 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8677 t += UTF8SKIP(t);
8678 if (d + (t - s) > e)
8679 Perl_croak(aTHX_ ident_too_long);
8680 Copy(s, d, t - s, char);
8681 d += t - s;
8682 s = t;
8683 }
8684 else
8685 break;
8686 }
8687 }
8688 *d = '\0';
8689 d = dest;
8690 if (*d) {
8691 if (PL_lex_state != LEX_NORMAL)
8692 PL_lex_state = LEX_INTERPENDMAYBE;
8693 return s;
8694 }
8695 if (*s == '$' && s[1] &&
8696 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8697 {
8698 return s;
8699 }
8700 if (*s == '{') {
8701 bracket = s;
8702 s++;
8703 }
8704 else if (ck_uni)
8705 check_uni();
8706 if (s < send)
8707 *d = *s++;
8708 d[1] = '\0';
8709 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8710 *d = toCTRL(*s);
8711 s++;
8712 }
8713 if (bracket) {
8714 if (isSPACE(s[-1])) {
8715 while (s < send) {
8716 const char ch = *s++;
8717 if (!SPACE_OR_TAB(ch)) {
8718 *d = ch;
8719 break;
8720 }
8721 }
8722 }
8723 if (isIDFIRST_lazy_if(d,UTF)) {
8724 d++;
8725 if (UTF) {
8726 char *end = s;
8727 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8728 end += UTF8SKIP(end);
8729 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8730 end += UTF8SKIP(end);
8731 }
8732 Copy(s, d, end - s, char);
8733 d += end - s;
8734 s = end;
8735 }
8736 else {
8737 while ((isALNUM(*s) || *s == ':') && d < e)
8738 *d++ = *s++;
8739 if (d >= e)
8740 Perl_croak(aTHX_ ident_too_long);
8741 }
8742 *d = '\0';
8743 while (s < send && SPACE_OR_TAB(*s))
8744 s++;
8745 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8746 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8747 const char * const brack =
8748 (const char *)
8749 ((*s == '[') ? "[...]" : "{...}");
8750 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8751 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8752 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8753 funny, dest, brack, funny, dest, brack);
8754 }
8755 bracket++;
8756 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8757 PL_lex_allbrackets++;
8758 return s;
8759 }
8760 }
8761 /* Handle extended ${^Foo} variables
8762 * 1999-02-27 mjd-perl-patch@plover.com */
8763 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8764 && isALNUM(*s))
8765 {
8766 d++;
8767 while (isALNUM(*s) && d < e) {
8768 *d++ = *s++;
8769 }
8770 if (d >= e)
8771 Perl_croak(aTHX_ ident_too_long);
8772 *d = '\0';
8773 }
8774 if (*s == '}') {
8775 s++;
8776 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8777 PL_lex_state = LEX_INTERPEND;
8778 PL_expect = XREF;
8779 }
8780 if (PL_lex_state == LEX_NORMAL) {
8781 if (ckWARN(WARN_AMBIGUOUS) &&
8782 (keyword(dest, d - dest, 0)
8783 || get_cvn_flags(dest, d - dest, 0)))
8784 {
8785 if (funny == '#')
8786 funny = '@';
8787 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8788 "Ambiguous use of %c{%s} resolved to %c%s",
8789 funny, dest, funny, dest);
8790 }
8791 }
8792 }
8793 else {
8794 s = bracket; /* let the parser handle it */
8795 *dest = '\0';
8796 }
8797 }
8798 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8799 PL_lex_state = LEX_INTERPEND;
8800 return s;
8801}
8802
8803static bool
8804S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8805
8806 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8807 * the parse starting at 's', based on the subset that are valid in this
8808 * context input to this routine in 'valid_flags'. Advances s. Returns
8809 * TRUE if the input was a valid flag, so the next char may be as well;
8810 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8811 * current regex. This routine will set it to any charset modifier found.
8812 * The caller shouldn't change it. This way, another charset modifier
8813 * encountered in the parse can be detected as an error, as we have decided
8814 * allow only one */
8815
8816 const char c = **s;
8817
8818 if (! strchr(valid_flags, c)) {
8819 if (isALNUM(c)) {
8820 goto deprecate;
8821 }
8822 return FALSE;
8823 }
8824
8825 switch (c) {
8826
8827 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8828 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8829 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8830 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8831 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8832 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8833 case LOCALE_PAT_MOD:
8834
8835 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8836 * can't be regex modifiers.
8837 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8838 * much as easily done. s///lei, for example, has to mean regex
8839 * modifiers if it's not an error (as does any word character
8840 * following the 'e'). Otherwise, we resolve to the backwards-
8841 * compatible, but less likely 's/// le ...', i.e. as meaning
8842 * less-than-or-equal. The reason it's not likely is that s//
8843 * returns a number for code in the field (/r returns a string, but
8844 * that wasn't added until the 5.13 series), and so '<=' should be
8845 * used for comparing, not 'le'. */
8846 if (*((*s) + 1) == 't') {
8847 goto deprecate;
8848 }
8849 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8850
8851 /* 'e' is valid only for substitutes, s///e. If it is not
8852 * valid in the current context, then 'm//le' must mean the
8853 * comparison operator, so use the regular deprecation message.
8854 */
8855 if (! strchr(valid_flags, 'e')) {
8856 goto deprecate;
8857 }
8858 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8859 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8860 return FALSE;
8861 }
8862 if (*charset) {
8863 goto multiple_charsets;
8864 }
8865 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8866 *charset = c;
8867 break;
8868 case UNICODE_PAT_MOD:
8869 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8870 * 'n' means they can't be regex modifiers */
8871 if (*((*s) + 1) == 'n') {
8872 goto deprecate;
8873 }
8874 if (*charset) {
8875 goto multiple_charsets;
8876 }
8877 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8878 *charset = c;
8879 break;
8880 case ASCII_RESTRICT_PAT_MOD:
8881 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8882 * can't be regex modifiers */
8883 if (*((*s) + 1) == 'n') {
8884 goto deprecate;
8885 }
8886
8887 if (! *charset) {
8888 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8889 }
8890 else {
8891
8892 /* Error if previous modifier wasn't an 'a', but if it was, see
8893 * if, and accept, a second occurrence (only) */
8894 if (*charset != 'a'
8895 || get_regex_charset(*pmfl)
8896 != REGEX_ASCII_RESTRICTED_CHARSET)
8897 {
8898 goto multiple_charsets;
8899 }
8900 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8901 }
8902 *charset = c;
8903 break;
8904 case DEPENDS_PAT_MOD:
8905 if (*charset) {
8906 goto multiple_charsets;
8907 }
8908 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8909 *charset = c;
8910 break;
8911 }
8912
8913 (*s)++;
8914 return TRUE;
8915
8916 deprecate:
8917 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8918 "Having no space between pattern and following word is deprecated");
8919 return FALSE;
8920
8921 multiple_charsets:
8922 if (*charset != c) {
8923 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8924 }
8925 else if (c == 'a') {
8926 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8927 }
8928 else {
8929 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8930 }
8931
8932 /* Pretend that it worked, so will continue processing before dieing */
8933 (*s)++;
8934 return TRUE;
8935}
8936
8937STATIC char *
8938S_scan_pat(pTHX_ char *start, I32 type)
8939{
8940 dVAR;
8941 PMOP *pm;
8942 char *s = scan_str(start,!!PL_madskills,FALSE);
8943 const char * const valid_flags =
8944 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8945 char charset = '\0'; /* character set modifier */
8946#ifdef PERL_MAD
8947 char *modstart;
8948#endif
8949
8950 PERL_ARGS_ASSERT_SCAN_PAT;
8951
8952 if (!s) {
8953 const char * const delimiter = skipspace(start);
8954 Perl_croak(aTHX_
8955 (const char *)
8956 (*delimiter == '?'
8957 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8958 : "Search pattern not terminated" ));
8959 }
8960
8961 pm = (PMOP*)newPMOP(type, 0);
8962 if (PL_multi_open == '?') {
8963 /* This is the only point in the code that sets PMf_ONCE: */
8964 pm->op_pmflags |= PMf_ONCE;
8965
8966 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8967 allows us to restrict the list needed by reset to just the ??
8968 matches. */
8969 assert(type != OP_TRANS);
8970 if (PL_curstash) {
8971 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8972 U32 elements;
8973 if (!mg) {
8974 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8975 0);
8976 }
8977 elements = mg->mg_len / sizeof(PMOP**);
8978 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8979 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8980 mg->mg_len = elements * sizeof(PMOP**);
8981 PmopSTASH_set(pm,PL_curstash);
8982 }
8983 }
8984#ifdef PERL_MAD
8985 modstart = s;
8986#endif
8987 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8988#ifdef PERL_MAD
8989 if (PL_madskills && modstart != s) {
8990 SV* tmptoken = newSVpvn(modstart, s - modstart);
8991 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8992 }
8993#endif
8994 /* issue a warning if /c is specified,but /g is not */
8995 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8996 {
8997 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8998 "Use of /c modifier is meaningless without /g" );
8999 }
9000
9001 PL_lex_op = (OP*)pm;
9002 pl_yylval.ival = OP_MATCH;
9003 return s;
9004}
9005
9006STATIC char *
9007S_scan_subst(pTHX_ char *start)
9008{
9009 dVAR;
9010 char *s;
9011 register PMOP *pm;
9012 I32 first_start;
9013 I32 es = 0;
9014 char charset = '\0'; /* character set modifier */
9015#ifdef PERL_MAD
9016 char *modstart;
9017#endif
9018
9019 PERL_ARGS_ASSERT_SCAN_SUBST;
9020
9021 pl_yylval.ival = OP_NULL;
9022
9023 s = scan_str(start,!!PL_madskills,FALSE);
9024
9025 if (!s)
9026 Perl_croak(aTHX_ "Substitution pattern not terminated");
9027
9028 if (s[-1] == PL_multi_open)
9029 s--;
9030#ifdef PERL_MAD
9031 if (PL_madskills) {
9032 CURMAD('q', PL_thisopen);
9033 CURMAD('_', PL_thiswhite);
9034 CURMAD('E', PL_thisstuff);
9035 CURMAD('Q', PL_thisclose);
9036 PL_realtokenstart = s - SvPVX(PL_linestr);
9037 }
9038#endif
9039
9040 first_start = PL_multi_start;
9041 s = scan_str(s,!!PL_madskills,FALSE);
9042 if (!s) {
9043 if (PL_lex_stuff) {
9044 SvREFCNT_dec(PL_lex_stuff);
9045 PL_lex_stuff = NULL;
9046 }
9047 Perl_croak(aTHX_ "Substitution replacement not terminated");
9048 }
9049 PL_multi_start = first_start; /* so whole substitution is taken together */
9050
9051 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9052
9053#ifdef PERL_MAD
9054 if (PL_madskills) {
9055 CURMAD('z', PL_thisopen);
9056 CURMAD('R', PL_thisstuff);
9057 CURMAD('Z', PL_thisclose);
9058 }
9059 modstart = s;
9060#endif
9061
9062 while (*s) {
9063 if (*s == EXEC_PAT_MOD) {
9064 s++;
9065 es++;
9066 }
9067 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9068 {
9069 break;
9070 }
9071 }
9072
9073#ifdef PERL_MAD
9074 if (PL_madskills) {
9075 if (modstart != s)
9076 curmad('m', newSVpvn(modstart, s - modstart));
9077 append_madprops(PL_thismad, (OP*)pm, 0);
9078 PL_thismad = 0;
9079 }
9080#endif
9081 if ((pm->op_pmflags & PMf_CONTINUE)) {
9082 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9083 }
9084
9085 if (es) {
9086 SV * const repl = newSVpvs("");
9087
9088 PL_sublex_info.super_bufptr = s;
9089 PL_sublex_info.super_bufend = PL_bufend;
9090 PL_multi_end = 0;
9091 pm->op_pmflags |= PMf_EVAL;
9092 while (es-- > 0) {
9093 if (es)
9094 sv_catpvs(repl, "eval ");
9095 else
9096 sv_catpvs(repl, "do ");
9097 }
9098 sv_catpvs(repl, "{");
9099 sv_catsv(repl, PL_lex_repl);
9100 if (strchr(SvPVX(PL_lex_repl), '#'))
9101 sv_catpvs(repl, "\n");
9102 sv_catpvs(repl, "}");
9103 SvEVALED_on(repl);
9104 SvREFCNT_dec(PL_lex_repl);
9105 PL_lex_repl = repl;
9106 }
9107
9108 PL_lex_op = (OP*)pm;
9109 pl_yylval.ival = OP_SUBST;
9110 return s;
9111}
9112
9113STATIC char *
9114S_scan_trans(pTHX_ char *start)
9115{
9116 dVAR;
9117 register char* s;
9118 OP *o;
9119 short *tbl;
9120 U8 squash;
9121 U8 del;
9122 U8 complement;
9123 bool nondestruct = 0;
9124#ifdef PERL_MAD
9125 char *modstart;
9126#endif
9127
9128 PERL_ARGS_ASSERT_SCAN_TRANS;
9129
9130 pl_yylval.ival = OP_NULL;
9131
9132 s = scan_str(start,!!PL_madskills,FALSE);
9133 if (!s)
9134 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9135
9136 if (s[-1] == PL_multi_open)
9137 s--;
9138#ifdef PERL_MAD
9139 if (PL_madskills) {
9140 CURMAD('q', PL_thisopen);
9141 CURMAD('_', PL_thiswhite);
9142 CURMAD('E', PL_thisstuff);
9143 CURMAD('Q', PL_thisclose);
9144 PL_realtokenstart = s - SvPVX(PL_linestr);
9145 }
9146#endif
9147
9148 s = scan_str(s,!!PL_madskills,FALSE);
9149 if (!s) {
9150 if (PL_lex_stuff) {
9151 SvREFCNT_dec(PL_lex_stuff);
9152 PL_lex_stuff = NULL;
9153 }
9154 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9155 }
9156 if (PL_madskills) {
9157 CURMAD('z', PL_thisopen);
9158 CURMAD('R', PL_thisstuff);
9159 CURMAD('Z', PL_thisclose);
9160 }
9161
9162 complement = del = squash = 0;
9163#ifdef PERL_MAD
9164 modstart = s;
9165#endif
9166 while (1) {
9167 switch (*s) {
9168 case 'c':
9169 complement = OPpTRANS_COMPLEMENT;
9170 break;
9171 case 'd':
9172 del = OPpTRANS_DELETE;
9173 break;
9174 case 's':
9175 squash = OPpTRANS_SQUASH;
9176 break;
9177 case 'r':
9178 nondestruct = 1;
9179 break;
9180 default:
9181 goto no_more;
9182 }
9183 s++;
9184 }
9185 no_more:
9186
9187 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9188 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9189 o->op_private &= ~OPpTRANS_ALL;
9190 o->op_private |= del|squash|complement|
9191 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9192 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9193
9194 PL_lex_op = o;
9195 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9196
9197#ifdef PERL_MAD
9198 if (PL_madskills) {
9199 if (modstart != s)
9200 curmad('m', newSVpvn(modstart, s - modstart));
9201 append_madprops(PL_thismad, o, 0);
9202 PL_thismad = 0;
9203 }
9204#endif
9205
9206 return s;
9207}
9208
9209STATIC char *
9210S_scan_heredoc(pTHX_ register char *s)
9211{
9212 dVAR;
9213 SV *herewas;
9214 I32 op_type = OP_SCALAR;
9215 I32 len;
9216 SV *tmpstr;
9217 char term;
9218 const char *found_newline;
9219 register char *d;
9220 register char *e;
9221 char *peek;
9222 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9223#ifdef PERL_MAD
9224 I32 stuffstart = s - SvPVX(PL_linestr);
9225 char *tstart;
9226
9227 PL_realtokenstart = -1;
9228#endif
9229
9230 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9231
9232 s += 2;
9233 d = PL_tokenbuf;
9234 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9235 if (!outer)
9236 *d++ = '\n';
9237 peek = s;
9238 while (SPACE_OR_TAB(*peek))
9239 peek++;
9240 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9241 s = peek;
9242 term = *s++;
9243 s = delimcpy(d, e, s, PL_bufend, term, &len);
9244 d += len;
9245 if (s < PL_bufend)
9246 s++;
9247 }
9248 else {
9249 if (*s == '\\')
9250 s++, term = '\'';
9251 else
9252 term = '"';
9253 if (!isALNUM_lazy_if(s,UTF))
9254 deprecate("bare << to mean <<\"\"");
9255 for (; isALNUM_lazy_if(s,UTF); s++) {
9256 if (d < e)
9257 *d++ = *s;
9258 }
9259 }
9260 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9261 Perl_croak(aTHX_ "Delimiter for here document is too long");
9262 *d++ = '\n';
9263 *d = '\0';
9264 len = d - PL_tokenbuf;
9265
9266#ifdef PERL_MAD
9267 if (PL_madskills) {
9268 tstart = PL_tokenbuf + !outer;
9269 PL_thisclose = newSVpvn(tstart, len - !outer);
9270 tstart = SvPVX(PL_linestr) + stuffstart;
9271 PL_thisopen = newSVpvn(tstart, s - tstart);
9272 stuffstart = s - SvPVX(PL_linestr);
9273 }
9274#endif
9275#ifndef PERL_STRICT_CR
9276 d = strchr(s, '\r');
9277 if (d) {
9278 char * const olds = s;
9279 s = d;
9280 while (s < PL_bufend) {
9281 if (*s == '\r') {
9282 *d++ = '\n';
9283 if (*++s == '\n')
9284 s++;
9285 }
9286 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9287 *d++ = *s++;
9288 s++;
9289 }
9290 else
9291 *d++ = *s++;
9292 }
9293 *d = '\0';
9294 PL_bufend = d;
9295 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9296 s = olds;
9297 }
9298#endif
9299#ifdef PERL_MAD
9300 found_newline = 0;
9301#endif
9302 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9303 herewas = newSVpvn(s,PL_bufend-s);
9304 }
9305 else {
9306#ifdef PERL_MAD
9307 herewas = newSVpvn(s-1,found_newline-s+1);
9308#else
9309 s--;
9310 herewas = newSVpvn(s,found_newline-s);
9311#endif
9312 }
9313#ifdef PERL_MAD
9314 if (PL_madskills) {
9315 tstart = SvPVX(PL_linestr) + stuffstart;
9316 if (PL_thisstuff)
9317 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9318 else
9319 PL_thisstuff = newSVpvn(tstart, s - tstart);
9320 }
9321#endif
9322 s += SvCUR(herewas);
9323
9324#ifdef PERL_MAD
9325 stuffstart = s - SvPVX(PL_linestr);
9326
9327 if (found_newline)
9328 s--;
9329#endif
9330
9331 tmpstr = newSV_type(SVt_PVIV);
9332 SvGROW(tmpstr, 80);
9333 if (term == '\'') {
9334 op_type = OP_CONST;
9335 SvIV_set(tmpstr, -1);
9336 }
9337 else if (term == '`') {
9338 op_type = OP_BACKTICK;
9339 SvIV_set(tmpstr, '\\');
9340 }
9341
9342 CLINE;
9343 PL_multi_start = CopLINE(PL_curcop);
9344 PL_multi_open = PL_multi_close = '<';
9345 term = *PL_tokenbuf;
9346 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9347 char * const bufptr = PL_sublex_info.super_bufptr;
9348 char * const bufend = PL_sublex_info.super_bufend;
9349 char * const olds = s - SvCUR(herewas);
9350 s = strchr(bufptr, '\n');
9351 if (!s)
9352 s = bufend;
9353 d = s;
9354 while (s < bufend &&
9355 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9356 if (*s++ == '\n')
9357 CopLINE_inc(PL_curcop);
9358 }
9359 if (s >= bufend) {
9360 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9361 missingterm(PL_tokenbuf);
9362 }
9363 sv_setpvn(herewas,bufptr,d-bufptr+1);
9364 sv_setpvn(tmpstr,d+1,s-d);
9365 s += len - 1;
9366 sv_catpvn(herewas,s,bufend-s);
9367 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9368
9369 s = olds;
9370 goto retval;
9371 }
9372 else if (!outer) {
9373 d = s;
9374 while (s < PL_bufend &&
9375 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9376 if (*s++ == '\n')
9377 CopLINE_inc(PL_curcop);
9378 }
9379 if (s >= PL_bufend) {
9380 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9381 missingterm(PL_tokenbuf);
9382 }
9383 sv_setpvn(tmpstr,d+1,s-d);
9384#ifdef PERL_MAD
9385 if (PL_madskills) {
9386 if (PL_thisstuff)
9387 sv_catpvn(PL_thisstuff, d + 1, s - d);
9388 else
9389 PL_thisstuff = newSVpvn(d + 1, s - d);
9390 stuffstart = s - SvPVX(PL_linestr);
9391 }
9392#endif
9393 s += len - 1;
9394 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9395
9396 sv_catpvn(herewas,s,PL_bufend-s);
9397 sv_setsv(PL_linestr,herewas);
9398 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9399 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9400 PL_last_lop = PL_last_uni = NULL;
9401 }
9402 else
9403 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9404 while (s >= PL_bufend) { /* multiple line string? */
9405#ifdef PERL_MAD
9406 if (PL_madskills) {
9407 tstart = SvPVX(PL_linestr) + stuffstart;
9408 if (PL_thisstuff)
9409 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9410 else
9411 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9412 }
9413#endif
9414 PL_bufptr = s;
9415 CopLINE_inc(PL_curcop);
9416 if (!outer || !lex_next_chunk(0)) {
9417 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9418 missingterm(PL_tokenbuf);
9419 }
9420 CopLINE_dec(PL_curcop);
9421 s = PL_bufptr;
9422#ifdef PERL_MAD
9423 stuffstart = s - SvPVX(PL_linestr);
9424#endif
9425 CopLINE_inc(PL_curcop);
9426 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9427 PL_last_lop = PL_last_uni = NULL;
9428#ifndef PERL_STRICT_CR
9429 if (PL_bufend - PL_linestart >= 2) {
9430 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9431 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9432 {
9433 PL_bufend[-2] = '\n';
9434 PL_bufend--;
9435 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9436 }
9437 else if (PL_bufend[-1] == '\r')
9438 PL_bufend[-1] = '\n';
9439 }
9440 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9441 PL_bufend[-1] = '\n';
9442#endif
9443 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9444 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9445 *(SvPVX(PL_linestr) + off ) = ' ';
9446 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9447 sv_catsv(PL_linestr,herewas);
9448 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9449 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9450 }
9451 else {
9452 s = PL_bufend;
9453 sv_catsv(tmpstr,PL_linestr);
9454 }
9455 }
9456 s++;
9457retval:
9458 PL_multi_end = CopLINE(PL_curcop);
9459 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9460 SvPV_shrink_to_cur(tmpstr);
9461 }
9462 SvREFCNT_dec(herewas);
9463 if (!IN_BYTES) {
9464 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9465 SvUTF8_on(tmpstr);
9466 else if (PL_encoding)
9467 sv_recode_to_utf8(tmpstr, PL_encoding);
9468 }
9469 PL_lex_stuff = tmpstr;
9470 pl_yylval.ival = op_type;
9471 return s;
9472}
9473
9474/* scan_inputsymbol
9475 takes: current position in input buffer
9476 returns: new position in input buffer
9477 side-effects: pl_yylval and lex_op are set.
9478
9479 This code handles:
9480
9481 <> read from ARGV
9482 <FH> read from filehandle
9483 <pkg::FH> read from package qualified filehandle
9484 <pkg'FH> read from package qualified filehandle
9485 <$fh> read from filehandle in $fh
9486 <*.h> filename glob
9487
9488*/
9489
9490STATIC char *
9491S_scan_inputsymbol(pTHX_ char *start)
9492{
9493 dVAR;
9494 register char *s = start; /* current position in buffer */
9495 char *end;
9496 I32 len;
9497 char *d = PL_tokenbuf; /* start of temp holding space */
9498 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9499
9500 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9501
9502 end = strchr(s, '\n');
9503 if (!end)
9504 end = PL_bufend;
9505 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9506
9507 /* die if we didn't have space for the contents of the <>,
9508 or if it didn't end, or if we see a newline
9509 */
9510
9511 if (len >= (I32)sizeof PL_tokenbuf)
9512 Perl_croak(aTHX_ "Excessively long <> operator");
9513 if (s >= end)
9514 Perl_croak(aTHX_ "Unterminated <> operator");
9515
9516 s++;
9517
9518 /* check for <$fh>
9519 Remember, only scalar variables are interpreted as filehandles by
9520 this code. Anything more complex (e.g., <$fh{$num}>) will be
9521 treated as a glob() call.
9522 This code makes use of the fact that except for the $ at the front,
9523 a scalar variable and a filehandle look the same.
9524 */
9525 if (*d == '$' && d[1]) d++;
9526
9527 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9528 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9529 d++;
9530
9531 /* If we've tried to read what we allow filehandles to look like, and
9532 there's still text left, then it must be a glob() and not a getline.
9533 Use scan_str to pull out the stuff between the <> and treat it
9534 as nothing more than a string.
9535 */
9536
9537 if (d - PL_tokenbuf != len) {
9538 pl_yylval.ival = OP_GLOB;
9539 s = scan_str(start,!!PL_madskills,FALSE);
9540 if (!s)
9541 Perl_croak(aTHX_ "Glob not terminated");
9542 return s;
9543 }
9544 else {
9545 bool readline_overriden = FALSE;
9546 GV *gv_readline;
9547 GV **gvp;
9548 /* we're in a filehandle read situation */
9549 d = PL_tokenbuf;
9550
9551 /* turn <> into <ARGV> */
9552 if (!len)
9553 Copy("ARGV",d,5,char);
9554
9555 /* Check whether readline() is overriden */
9556 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9557 if ((gv_readline
9558 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9559 ||
9560 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9561 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9562 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9563 readline_overriden = TRUE;
9564
9565 /* if <$fh>, create the ops to turn the variable into a
9566 filehandle
9567 */
9568 if (*d == '$') {
9569 /* try to find it in the pad for this block, otherwise find
9570 add symbol table ops
9571 */
9572 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9573 if (tmp != NOT_IN_PAD) {
9574 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9575 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9576 HEK * const stashname = HvNAME_HEK(stash);
9577 SV * const sym = sv_2mortal(newSVhek(stashname));
9578 sv_catpvs(sym, "::");
9579 sv_catpv(sym, d+1);
9580 d = SvPVX(sym);
9581 goto intro_sym;
9582 }
9583 else {
9584 OP * const o = newOP(OP_PADSV, 0);
9585 o->op_targ = tmp;
9586 PL_lex_op = readline_overriden
9587 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9588 op_append_elem(OP_LIST, o,
9589 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9590 : (OP*)newUNOP(OP_READLINE, 0, o);
9591 }
9592 }
9593 else {
9594 GV *gv;
9595 ++d;
9596intro_sym:
9597 gv = gv_fetchpv(d,
9598 (PL_in_eval
9599 ? (GV_ADDMULTI | GV_ADDINEVAL)
9600 : GV_ADDMULTI),
9601 SVt_PV);
9602 PL_lex_op = readline_overriden
9603 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9604 op_append_elem(OP_LIST,
9605 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9606 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9607 : (OP*)newUNOP(OP_READLINE, 0,
9608 newUNOP(OP_RV2SV, 0,
9609 newGVOP(OP_GV, 0, gv)));
9610 }
9611 if (!readline_overriden)
9612 PL_lex_op->op_flags |= OPf_SPECIAL;
9613 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9614 pl_yylval.ival = OP_NULL;
9615 }
9616
9617 /* If it's none of the above, it must be a literal filehandle
9618 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9619 else {
9620 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9621 PL_lex_op = readline_overriden
9622 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9623 op_append_elem(OP_LIST,
9624 newGVOP(OP_GV, 0, gv),
9625 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9626 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9627 pl_yylval.ival = OP_NULL;
9628 }
9629 }
9630
9631 return s;
9632}
9633
9634
9635/* scan_str
9636 takes: start position in buffer
9637 keep_quoted preserve \ on the embedded delimiter(s)
9638 keep_delims preserve the delimiters around the string
9639 returns: position to continue reading from buffer
9640 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9641 updates the read buffer.
9642
9643 This subroutine pulls a string out of the input. It is called for:
9644 q single quotes q(literal text)
9645 ' single quotes 'literal text'
9646 qq double quotes qq(interpolate $here please)
9647 " double quotes "interpolate $here please"
9648 qx backticks qx(/bin/ls -l)
9649 ` backticks `/bin/ls -l`
9650 qw quote words @EXPORT_OK = qw( func() $spam )
9651 m// regexp match m/this/
9652 s/// regexp substitute s/this/that/
9653 tr/// string transliterate tr/this/that/
9654 y/// string transliterate y/this/that/
9655 ($*@) sub prototypes sub foo ($)
9656 (stuff) sub attr parameters sub foo : attr(stuff)
9657 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9658
9659 In most of these cases (all but <>, patterns and transliterate)
9660 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9661 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9662 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9663 calls scan_str().
9664
9665 It skips whitespace before the string starts, and treats the first
9666 character as the delimiter. If the delimiter is one of ([{< then
9667 the corresponding "close" character )]}> is used as the closing
9668 delimiter. It allows quoting of delimiters, and if the string has
9669 balanced delimiters ([{<>}]) it allows nesting.
9670
9671 On success, the SV with the resulting string is put into lex_stuff or,
9672 if that is already non-NULL, into lex_repl. The second case occurs only
9673 when parsing the RHS of the special constructs s/// and tr/// (y///).
9674 For convenience, the terminating delimiter character is stuffed into
9675 SvIVX of the SV.
9676*/
9677
9678STATIC char *
9679S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9680{
9681 dVAR;
9682 SV *sv; /* scalar value: string */
9683 const char *tmps; /* temp string, used for delimiter matching */
9684 register char *s = start; /* current position in the buffer */
9685 register char term; /* terminating character */
9686 register char *to; /* current position in the sv's data */
9687 I32 brackets = 1; /* bracket nesting level */
9688 bool has_utf8 = FALSE; /* is there any utf8 content? */
9689 I32 termcode; /* terminating char. code */
9690 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9691 STRLEN termlen; /* length of terminating string */
9692 int last_off = 0; /* last position for nesting bracket */
9693#ifdef PERL_MAD
9694 int stuffstart;
9695 char *tstart;
9696#endif
9697
9698 PERL_ARGS_ASSERT_SCAN_STR;
9699
9700 /* skip space before the delimiter */
9701 if (isSPACE(*s)) {
9702 s = PEEKSPACE(s);
9703 }
9704
9705#ifdef PERL_MAD
9706 if (PL_realtokenstart >= 0) {
9707 stuffstart = PL_realtokenstart;
9708 PL_realtokenstart = -1;
9709 }
9710 else
9711 stuffstart = start - SvPVX(PL_linestr);
9712#endif
9713 /* mark where we are, in case we need to report errors */
9714 CLINE;
9715
9716 /* after skipping whitespace, the next character is the terminator */
9717 term = *s;
9718 if (!UTF) {
9719 termcode = termstr[0] = term;
9720 termlen = 1;
9721 }
9722 else {
9723 termcode = utf8_to_uvchr((U8*)s, &termlen);
9724 Copy(s, termstr, termlen, U8);
9725 if (!UTF8_IS_INVARIANT(term))
9726 has_utf8 = TRUE;
9727 }
9728
9729 /* mark where we are */
9730 PL_multi_start = CopLINE(PL_curcop);
9731 PL_multi_open = term;
9732
9733 /* find corresponding closing delimiter */
9734 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9735 termcode = termstr[0] = term = tmps[5];
9736
9737 PL_multi_close = term;
9738
9739 /* create a new SV to hold the contents. 79 is the SV's initial length.
9740 What a random number. */
9741 sv = newSV_type(SVt_PVIV);
9742 SvGROW(sv, 80);
9743 SvIV_set(sv, termcode);
9744 (void)SvPOK_only(sv); /* validate pointer */
9745
9746 /* move past delimiter and try to read a complete string */
9747 if (keep_delims)
9748 sv_catpvn(sv, s, termlen);
9749 s += termlen;
9750#ifdef PERL_MAD
9751 tstart = SvPVX(PL_linestr) + stuffstart;
9752 if (!PL_thisopen && !keep_delims) {
9753 PL_thisopen = newSVpvn(tstart, s - tstart);
9754 stuffstart = s - SvPVX(PL_linestr);
9755 }
9756#endif
9757 for (;;) {
9758 if (PL_encoding && !UTF) {
9759 bool cont = TRUE;
9760
9761 while (cont) {
9762 int offset = s - SvPVX_const(PL_linestr);
9763 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9764 &offset, (char*)termstr, termlen);
9765 const char * const ns = SvPVX_const(PL_linestr) + offset;
9766 char * const svlast = SvEND(sv) - 1;
9767
9768 for (; s < ns; s++) {
9769 if (*s == '\n' && !PL_rsfp)
9770 CopLINE_inc(PL_curcop);
9771 }
9772 if (!found)
9773 goto read_more_line;
9774 else {
9775 /* handle quoted delimiters */
9776 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9777 const char *t;
9778 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9779 t--;
9780 if ((svlast-1 - t) % 2) {
9781 if (!keep_quoted) {
9782 *(svlast-1) = term;
9783 *svlast = '\0';
9784 SvCUR_set(sv, SvCUR(sv) - 1);
9785 }
9786 continue;
9787 }
9788 }
9789 if (PL_multi_open == PL_multi_close) {
9790 cont = FALSE;
9791 }
9792 else {
9793 const char *t;
9794 char *w;
9795 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9796 /* At here, all closes are "was quoted" one,
9797 so we don't check PL_multi_close. */
9798 if (*t == '\\') {
9799 if (!keep_quoted && *(t+1) == PL_multi_open)
9800 t++;
9801 else
9802 *w++ = *t++;
9803 }
9804 else if (*t == PL_multi_open)
9805 brackets++;
9806
9807 *w = *t;
9808 }
9809 if (w < t) {
9810 *w++ = term;
9811 *w = '\0';
9812 SvCUR_set(sv, w - SvPVX_const(sv));
9813 }
9814 last_off = w - SvPVX(sv);
9815 if (--brackets <= 0)
9816 cont = FALSE;
9817 }
9818 }
9819 }
9820 if (!keep_delims) {
9821 SvCUR_set(sv, SvCUR(sv) - 1);
9822 *SvEND(sv) = '\0';
9823 }
9824 break;
9825 }
9826
9827 /* extend sv if need be */
9828 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9829 /* set 'to' to the next character in the sv's string */
9830 to = SvPVX(sv)+SvCUR(sv);
9831
9832 /* if open delimiter is the close delimiter read unbridle */
9833 if (PL_multi_open == PL_multi_close) {
9834 for (; s < PL_bufend; s++,to++) {
9835 /* embedded newlines increment the current line number */
9836 if (*s == '\n' && !PL_rsfp)
9837 CopLINE_inc(PL_curcop);
9838 /* handle quoted delimiters */
9839 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9840 if (!keep_quoted && s[1] == term)
9841 s++;
9842 /* any other quotes are simply copied straight through */
9843 else
9844 *to++ = *s++;
9845 }
9846 /* terminate when run out of buffer (the for() condition), or
9847 have found the terminator */
9848 else if (*s == term) {
9849 if (termlen == 1)
9850 break;
9851 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9852 break;
9853 }
9854 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9855 has_utf8 = TRUE;
9856 *to = *s;
9857 }
9858 }
9859
9860 /* if the terminator isn't the same as the start character (e.g.,
9861 matched brackets), we have to allow more in the quoting, and
9862 be prepared for nested brackets.
9863 */
9864 else {
9865 /* read until we run out of string, or we find the terminator */
9866 for (; s < PL_bufend; s++,to++) {
9867 /* embedded newlines increment the line count */
9868 if (*s == '\n' && !PL_rsfp)
9869 CopLINE_inc(PL_curcop);
9870 /* backslashes can escape the open or closing characters */
9871 if (*s == '\\' && s+1 < PL_bufend) {
9872 if (!keep_quoted &&
9873 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9874 s++;
9875 else
9876 *to++ = *s++;
9877 }
9878 /* allow nested opens and closes */
9879 else if (*s == PL_multi_close && --brackets <= 0)
9880 break;
9881 else if (*s == PL_multi_open)
9882 brackets++;
9883 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9884 has_utf8 = TRUE;
9885 *to = *s;
9886 }
9887 }
9888 /* terminate the copied string and update the sv's end-of-string */
9889 *to = '\0';
9890 SvCUR_set(sv, to - SvPVX_const(sv));
9891
9892 /*
9893 * this next chunk reads more into the buffer if we're not done yet
9894 */
9895
9896 if (s < PL_bufend)
9897 break; /* handle case where we are done yet :-) */
9898
9899#ifndef PERL_STRICT_CR
9900 if (to - SvPVX_const(sv) >= 2) {
9901 if ((to[-2] == '\r' && to[-1] == '\n') ||
9902 (to[-2] == '\n' && to[-1] == '\r'))
9903 {
9904 to[-2] = '\n';
9905 to--;
9906 SvCUR_set(sv, to - SvPVX_const(sv));
9907 }
9908 else if (to[-1] == '\r')
9909 to[-1] = '\n';
9910 }
9911 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9912 to[-1] = '\n';
9913#endif
9914
9915 read_more_line:
9916 /* if we're out of file, or a read fails, bail and reset the current
9917 line marker so we can report where the unterminated string began
9918 */
9919#ifdef PERL_MAD
9920 if (PL_madskills) {
9921 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9922 if (PL_thisstuff)
9923 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9924 else
9925 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9926 }
9927#endif
9928 CopLINE_inc(PL_curcop);
9929 PL_bufptr = PL_bufend;
9930 if (!lex_next_chunk(0)) {
9931 sv_free(sv);
9932 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9933 return NULL;
9934 }
9935 s = PL_bufptr;
9936#ifdef PERL_MAD
9937 stuffstart = 0;
9938#endif
9939 }
9940
9941 /* at this point, we have successfully read the delimited string */
9942
9943 if (!PL_encoding || UTF) {
9944#ifdef PERL_MAD
9945 if (PL_madskills) {
9946 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9947 const int len = s - tstart;
9948 if (PL_thisstuff)
9949 sv_catpvn(PL_thisstuff, tstart, len);
9950 else
9951 PL_thisstuff = newSVpvn(tstart, len);
9952 if (!PL_thisclose && !keep_delims)
9953 PL_thisclose = newSVpvn(s,termlen);
9954 }
9955#endif
9956
9957 if (keep_delims)
9958 sv_catpvn(sv, s, termlen);
9959 s += termlen;
9960 }
9961#ifdef PERL_MAD
9962 else {
9963 if (PL_madskills) {
9964 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9965 const int len = s - tstart - termlen;
9966 if (PL_thisstuff)
9967 sv_catpvn(PL_thisstuff, tstart, len);
9968 else
9969 PL_thisstuff = newSVpvn(tstart, len);
9970 if (!PL_thisclose && !keep_delims)
9971 PL_thisclose = newSVpvn(s - termlen,termlen);
9972 }
9973 }
9974#endif
9975 if (has_utf8 || PL_encoding)
9976 SvUTF8_on(sv);
9977
9978 PL_multi_end = CopLINE(PL_curcop);
9979
9980 /* if we allocated too much space, give some back */
9981 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9982 SvLEN_set(sv, SvCUR(sv) + 1);
9983 SvPV_renew(sv, SvLEN(sv));
9984 }
9985
9986 /* decide whether this is the first or second quoted string we've read
9987 for this op
9988 */
9989
9990 if (PL_lex_stuff)
9991 PL_lex_repl = sv;
9992 else
9993 PL_lex_stuff = sv;
9994 return s;
9995}
9996
9997/*
9998 scan_num
9999 takes: pointer to position in buffer
10000 returns: pointer to new position in buffer
10001 side-effects: builds ops for the constant in pl_yylval.op
10002
10003 Read a number in any of the formats that Perl accepts:
10004
10005 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10006 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10007 0b[01](_?[01])*
10008 0[0-7](_?[0-7])*
10009 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10010
10011 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10012 thing it reads.
10013
10014 If it reads a number without a decimal point or an exponent, it will
10015 try converting the number to an integer and see if it can do so
10016 without loss of precision.
10017*/
10018
10019char *
10020Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10021{
10022 dVAR;
10023 register const char *s = start; /* current position in buffer */
10024 register char *d; /* destination in temp buffer */
10025 register char *e; /* end of temp buffer */
10026 NV nv; /* number read, as a double */
10027 SV *sv = NULL; /* place to put the converted number */
10028 bool floatit; /* boolean: int or float? */
10029 const char *lastub = NULL; /* position of last underbar */
10030 static char const number_too_long[] = "Number too long";
10031
10032 PERL_ARGS_ASSERT_SCAN_NUM;
10033
10034 /* We use the first character to decide what type of number this is */
10035
10036 switch (*s) {
10037 default:
10038 Perl_croak(aTHX_ "panic: scan_num");
10039
10040 /* if it starts with a 0, it could be an octal number, a decimal in
10041 0.13 disguise, or a hexadecimal number, or a binary number. */
10042 case '0':
10043 {
10044 /* variables:
10045 u holds the "number so far"
10046 shift the power of 2 of the base
10047 (hex == 4, octal == 3, binary == 1)
10048 overflowed was the number more than we can hold?
10049
10050 Shift is used when we add a digit. It also serves as an "are
10051 we in octal/hex/binary?" indicator to disallow hex characters
10052 when in octal mode.
10053 */
10054 NV n = 0.0;
10055 UV u = 0;
10056 I32 shift;
10057 bool overflowed = FALSE;
10058 bool just_zero = TRUE; /* just plain 0 or binary number? */
10059 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10060 static const char* const bases[5] =
10061 { "", "binary", "", "octal", "hexadecimal" };
10062 static const char* const Bases[5] =
10063 { "", "Binary", "", "Octal", "Hexadecimal" };
10064 static const char* const maxima[5] =
10065 { "",
10066 "0b11111111111111111111111111111111",
10067 "",
10068 "037777777777",
10069 "0xffffffff" };
10070 const char *base, *Base, *max;
10071
10072 /* check for hex */
10073 if (s[1] == 'x' || s[1] == 'X') {
10074 shift = 4;
10075 s += 2;
10076 just_zero = FALSE;
10077 } else if (s[1] == 'b' || s[1] == 'B') {
10078 shift = 1;
10079 s += 2;
10080 just_zero = FALSE;
10081 }
10082 /* check for a decimal in disguise */
10083 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10084 goto decimal;
10085 /* so it must be octal */
10086 else {
10087 shift = 3;
10088 s++;
10089 }
10090
10091 if (*s == '_') {
10092 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10093 "Misplaced _ in number");
10094 lastub = s++;
10095 }
10096
10097 base = bases[shift];
10098 Base = Bases[shift];
10099 max = maxima[shift];
10100
10101 /* read the rest of the number */
10102 for (;;) {
10103 /* x is used in the overflow test,
10104 b is the digit we're adding on. */
10105 UV x, b;
10106
10107 switch (*s) {
10108
10109 /* if we don't mention it, we're done */
10110 default:
10111 goto out;
10112
10113 /* _ are ignored -- but warned about if consecutive */
10114 case '_':
10115 if (lastub && s == lastub + 1)
10116 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10117 "Misplaced _ in number");
10118 lastub = s++;
10119 break;
10120
10121 /* 8 and 9 are not octal */
10122 case '8': case '9':
10123 if (shift == 3)
10124 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10125 /* FALL THROUGH */
10126
10127 /* octal digits */
10128 case '2': case '3': case '4':
10129 case '5': case '6': case '7':
10130 if (shift == 1)
10131 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10132 /* FALL THROUGH */
10133
10134 case '0': case '1':
10135 b = *s++ & 15; /* ASCII digit -> value of digit */
10136 goto digit;
10137
10138 /* hex digits */
10139 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10140 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10141 /* make sure they said 0x */
10142 if (shift != 4)
10143 goto out;
10144 b = (*s++ & 7) + 9;
10145
10146 /* Prepare to put the digit we have onto the end
10147 of the number so far. We check for overflows.
10148 */
10149
10150 digit:
10151 just_zero = FALSE;
10152 if (!overflowed) {
10153 x = u << shift; /* make room for the digit */
10154
10155 if ((x >> shift) != u
10156 && !(PL_hints & HINT_NEW_BINARY)) {
10157 overflowed = TRUE;
10158 n = (NV) u;
10159 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10160 "Integer overflow in %s number",
10161 base);
10162 } else
10163 u = x | b; /* add the digit to the end */
10164 }
10165 if (overflowed) {
10166 n *= nvshift[shift];
10167 /* If an NV has not enough bits in its
10168 * mantissa to represent an UV this summing of
10169 * small low-order numbers is a waste of time
10170 * (because the NV cannot preserve the
10171 * low-order bits anyway): we could just
10172 * remember when did we overflow and in the
10173 * end just multiply n by the right
10174 * amount. */
10175 n += (NV) b;
10176 }
10177 break;
10178 }
10179 }
10180
10181 /* if we get here, we had success: make a scalar value from
10182 the number.
10183 */
10184 out:
10185
10186 /* final misplaced underbar check */
10187 if (s[-1] == '_') {
10188 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10189 }
10190
10191 if (overflowed) {
10192 if (n > 4294967295.0)
10193 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10194 "%s number > %s non-portable",
10195 Base, max);
10196 sv = newSVnv(n);
10197 }
10198 else {
10199#if UVSIZE > 4
10200 if (u > 0xffffffff)
10201 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10202 "%s number > %s non-portable",
10203 Base, max);
10204#endif
10205 sv = newSVuv(u);
10206 }
10207 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10208 sv = new_constant(start, s - start, "integer",
10209 sv, NULL, NULL, 0);
10210 else if (PL_hints & HINT_NEW_BINARY)
10211 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10212 }
10213 break;
10214
10215 /*
10216 handle decimal numbers.
10217 we're also sent here when we read a 0 as the first digit
10218 */
10219 case '1': case '2': case '3': case '4': case '5':
10220 case '6': case '7': case '8': case '9': case '.':
10221 decimal:
10222 d = PL_tokenbuf;
10223 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10224 floatit = FALSE;
10225
10226 /* read next group of digits and _ and copy into d */
10227 while (isDIGIT(*s) || *s == '_') {
10228 /* skip underscores, checking for misplaced ones
10229 if -w is on
10230 */
10231 if (*s == '_') {
10232 if (lastub && s == lastub + 1)
10233 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10234 "Misplaced _ in number");
10235 lastub = s++;
10236 }
10237 else {
10238 /* check for end of fixed-length buffer */
10239 if (d >= e)
10240 Perl_croak(aTHX_ number_too_long);
10241 /* if we're ok, copy the character */
10242 *d++ = *s++;
10243 }
10244 }
10245
10246 /* final misplaced underbar check */
10247 if (lastub && s == lastub + 1) {
10248 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10249 }
10250
10251 /* read a decimal portion if there is one. avoid
10252 3..5 being interpreted as the number 3. followed
10253 by .5
10254 */
10255 if (*s == '.' && s[1] != '.') {
10256 floatit = TRUE;
10257 *d++ = *s++;
10258
10259 if (*s == '_') {
10260 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10261 "Misplaced _ in number");
10262 lastub = s;
10263 }
10264
10265 /* copy, ignoring underbars, until we run out of digits.
10266 */
10267 for (; isDIGIT(*s) || *s == '_'; s++) {
10268 /* fixed length buffer check */
10269 if (d >= e)
10270 Perl_croak(aTHX_ number_too_long);
10271 if (*s == '_') {
10272 if (lastub && s == lastub + 1)
10273 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10274 "Misplaced _ in number");
10275 lastub = s;
10276 }
10277 else
10278 *d++ = *s;
10279 }
10280 /* fractional part ending in underbar? */
10281 if (s[-1] == '_') {
10282 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10283 "Misplaced _ in number");
10284 }
10285 if (*s == '.' && isDIGIT(s[1])) {
10286 /* oops, it's really a v-string, but without the "v" */
10287 s = start;
10288 goto vstring;
10289 }
10290 }
10291
10292 /* read exponent part, if present */
10293 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10294 floatit = TRUE;
10295 s++;
10296
10297 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10298 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10299
10300 /* stray preinitial _ */
10301 if (*s == '_') {
10302 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10303 "Misplaced _ in number");
10304 lastub = s++;
10305 }
10306
10307 /* allow positive or negative exponent */
10308 if (*s == '+' || *s == '-')
10309 *d++ = *s++;
10310
10311 /* stray initial _ */
10312 if (*s == '_') {
10313 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10314 "Misplaced _ in number");
10315 lastub = s++;
10316 }
10317
10318 /* read digits of exponent */
10319 while (isDIGIT(*s) || *s == '_') {
10320 if (isDIGIT(*s)) {
10321 if (d >= e)
10322 Perl_croak(aTHX_ number_too_long);
10323 *d++ = *s++;
10324 }
10325 else {
10326 if (((lastub && s == lastub + 1) ||
10327 (!isDIGIT(s[1]) && s[1] != '_')))
10328 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10329 "Misplaced _ in number");
10330 lastub = s++;
10331 }
10332 }
10333 }
10334
10335
10336 /*
10337 We try to do an integer conversion first if no characters
10338 indicating "float" have been found.
10339 */
10340
10341 if (!floatit) {
10342 UV uv;
10343 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10344
10345 if (flags == IS_NUMBER_IN_UV) {
10346 if (uv <= IV_MAX)
10347 sv = newSViv(uv); /* Prefer IVs over UVs. */
10348 else
10349 sv = newSVuv(uv);
10350 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10351 if (uv <= (UV) IV_MIN)
10352 sv = newSViv(-(IV)uv);
10353 else
10354 floatit = TRUE;
10355 } else
10356 floatit = TRUE;
10357 }
10358 if (floatit) {
10359 /* terminate the string */
10360 *d = '\0';
10361 nv = Atof(PL_tokenbuf);
10362 sv = newSVnv(nv);
10363 }
10364
10365 if ( floatit
10366 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10367 const char *const key = floatit ? "float" : "integer";
10368 const STRLEN keylen = floatit ? 5 : 7;
10369 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10370 key, keylen, sv, NULL, NULL, 0);
10371 }
10372 break;
10373
10374 /* if it starts with a v, it could be a v-string */
10375 case 'v':
10376vstring:
10377 sv = newSV(5); /* preallocate storage space */
10378 s = scan_vstring(s, PL_bufend, sv);
10379 break;
10380 }
10381
10382 /* make the op for the constant and return */
10383
10384 if (sv)
10385 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10386 else
10387 lvalp->opval = NULL;
10388
10389 return (char *)s;
10390}
10391
10392STATIC char *
10393S_scan_formline(pTHX_ register char *s)
10394{
10395 dVAR;
10396 register char *eol;
10397 register char *t;
10398 SV * const stuff = newSVpvs("");
10399 bool needargs = FALSE;
10400 bool eofmt = FALSE;
10401#ifdef PERL_MAD
10402 char *tokenstart = s;
10403 SV* savewhite = NULL;
10404
10405 if (PL_madskills) {
10406 savewhite = PL_thiswhite;
10407 PL_thiswhite = 0;
10408 }
10409#endif
10410
10411 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10412
10413 while (!needargs) {
10414 if (*s == '.') {
10415 t = s+1;
10416#ifdef PERL_STRICT_CR
10417 while (SPACE_OR_TAB(*t))
10418 t++;
10419#else
10420 while (SPACE_OR_TAB(*t) || *t == '\r')
10421 t++;
10422#endif
10423 if (*t == '\n' || t == PL_bufend) {
10424 eofmt = TRUE;
10425 break;
10426 }
10427 }
10428 if (PL_in_eval && !PL_rsfp) {
10429 eol = (char *) memchr(s,'\n',PL_bufend-s);
10430 if (!eol++)
10431 eol = PL_bufend;
10432 }
10433 else
10434 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10435 if (*s != '#') {
10436 for (t = s; t < eol; t++) {
10437 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10438 needargs = FALSE;
10439 goto enough; /* ~~ must be first line in formline */
10440 }
10441 if (*t == '@' || *t == '^')
10442 needargs = TRUE;
10443 }
10444 if (eol > s) {
10445 sv_catpvn(stuff, s, eol-s);
10446#ifndef PERL_STRICT_CR
10447 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10448 char *end = SvPVX(stuff) + SvCUR(stuff);
10449 end[-2] = '\n';
10450 end[-1] = '\0';
10451 SvCUR_set(stuff, SvCUR(stuff) - 1);
10452 }
10453#endif
10454 }
10455 else
10456 break;
10457 }
10458 s = (char*)eol;
10459 if (PL_rsfp) {
10460 bool got_some;
10461#ifdef PERL_MAD
10462 if (PL_madskills) {
10463 if (PL_thistoken)
10464 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10465 else
10466 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10467 }
10468#endif
10469 PL_bufptr = PL_bufend;
10470 CopLINE_inc(PL_curcop);
10471 got_some = lex_next_chunk(0);
10472 CopLINE_dec(PL_curcop);
10473 s = PL_bufptr;
10474#ifdef PERL_MAD
10475 tokenstart = PL_bufptr;
10476#endif
10477 if (!got_some)
10478 break;
10479 }
10480 incline(s);
10481 }
10482 enough:
10483 if (SvCUR(stuff)) {
10484 PL_expect = XTERM;
10485 if (needargs) {
10486 PL_lex_state = LEX_NORMAL;
10487 start_force(PL_curforce);
10488 NEXTVAL_NEXTTOKE.ival = 0;
10489 force_next(',');
10490 }
10491 else
10492 PL_lex_state = LEX_FORMLINE;
10493 if (!IN_BYTES) {
10494 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10495 SvUTF8_on(stuff);
10496 else if (PL_encoding)
10497 sv_recode_to_utf8(stuff, PL_encoding);
10498 }
10499 start_force(PL_curforce);
10500 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10501 force_next(THING);
10502 start_force(PL_curforce);
10503 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10504 force_next(LSTOP);
10505 }
10506 else {
10507 SvREFCNT_dec(stuff);
10508 if (eofmt)
10509 PL_lex_formbrack = 0;
10510 PL_bufptr = s;
10511 }
10512#ifdef PERL_MAD
10513 if (PL_madskills) {
10514 if (PL_thistoken)
10515 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10516 else
10517 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10518 PL_thiswhite = savewhite;
10519 }
10520#endif
10521 return s;
10522}
10523
10524I32
10525Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10526{
10527 dVAR;
10528 const I32 oldsavestack_ix = PL_savestack_ix;
10529 CV* const outsidecv = PL_compcv;
10530
10531 if (PL_compcv) {
10532 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10533 }
10534 SAVEI32(PL_subline);
10535 save_item(PL_subname);
10536 SAVESPTR(PL_compcv);
10537
10538 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10539 CvFLAGS(PL_compcv) |= flags;
10540
10541 PL_subline = CopLINE(PL_curcop);
10542 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10543 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10544 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10545
10546 return oldsavestack_ix;
10547}
10548
10549#ifdef __SC__
10550#pragma segment Perl_yylex
10551#endif
10552static int
10553S_yywarn(pTHX_ const char *const s)
10554{
10555 dVAR;
10556
10557 PERL_ARGS_ASSERT_YYWARN;
10558
10559 PL_in_eval |= EVAL_WARNONLY;
10560 yyerror(s);
10561 PL_in_eval &= ~EVAL_WARNONLY;
10562 return 0;
10563}
10564
10565int
10566Perl_yyerror(pTHX_ const char *const s)
10567{
10568 dVAR;
10569 const char *where = NULL;
10570 const char *context = NULL;
10571 int contlen = -1;
10572 SV *msg;
10573 int yychar = PL_parser->yychar;
10574
10575 PERL_ARGS_ASSERT_YYERROR;
10576
10577 if (!yychar || (yychar == ';' && !PL_rsfp))
10578 where = "at EOF";
10579 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10580 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10581 PL_oldbufptr != PL_bufptr) {
10582 /*
10583 Only for NetWare:
10584 The code below is removed for NetWare because it abends/crashes on NetWare
10585 when the script has error such as not having the closing quotes like:
10586 if ($var eq "value)
10587 Checking of white spaces is anyway done in NetWare code.
10588 */
10589#ifndef NETWARE
10590 while (isSPACE(*PL_oldoldbufptr))
10591 PL_oldoldbufptr++;
10592#endif
10593 context = PL_oldoldbufptr;
10594 contlen = PL_bufptr - PL_oldoldbufptr;
10595 }
10596 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10597 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10598 /*
10599 Only for NetWare:
10600 The code below is removed for NetWare because it abends/crashes on NetWare
10601 when the script has error such as not having the closing quotes like:
10602 if ($var eq "value)
10603 Checking of white spaces is anyway done in NetWare code.
10604 */
10605#ifndef NETWARE
10606 while (isSPACE(*PL_oldbufptr))
10607 PL_oldbufptr++;
10608#endif
10609 context = PL_oldbufptr;
10610 contlen = PL_bufptr - PL_oldbufptr;
10611 }
10612 else if (yychar > 255)
10613 where = "next token ???";
10614 else if (yychar == -2) { /* YYEMPTY */
10615 if (PL_lex_state == LEX_NORMAL ||
10616 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10617 where = "at end of line";
10618 else if (PL_lex_inpat)
10619 where = "within pattern";
10620 else
10621 where = "within string";
10622 }
10623 else {
10624 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10625 if (yychar < 32)
10626 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10627 else if (isPRINT_LC(yychar)) {
10628 const char string = yychar;
10629 sv_catpvn(where_sv, &string, 1);
10630 }
10631 else
10632 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10633 where = SvPVX_const(where_sv);
10634 }
10635 msg = sv_2mortal(newSVpv(s, 0));
10636 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10637 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10638 if (context)
10639 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10640 else
10641 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10642 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10643 Perl_sv_catpvf(aTHX_ msg,
10644 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10645 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10646 PL_multi_end = 0;
10647 }
10648 if (PL_in_eval & EVAL_WARNONLY) {
10649 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10650 }
10651 else
10652 qerror(msg);
10653 if (PL_error_count >= 10) {
10654 if (PL_in_eval && SvCUR(ERRSV))
10655 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10656 SVfARG(ERRSV), OutCopFILE(PL_curcop));
10657 else
10658 Perl_croak(aTHX_ "%s has too many errors.\n",
10659 OutCopFILE(PL_curcop));
10660 }
10661 PL_in_my = 0;
10662 PL_in_my_stash = NULL;
10663 return 0;
10664}
10665#ifdef __SC__
10666#pragma segment Main
10667#endif
10668
10669STATIC char*
10670S_swallow_bom(pTHX_ U8 *s)
10671{
10672 dVAR;
10673 const STRLEN slen = SvCUR(PL_linestr);
10674
10675 PERL_ARGS_ASSERT_SWALLOW_BOM;
10676
10677 switch (s[0]) {
10678 case 0xFF:
10679 if (s[1] == 0xFE) {
10680 /* UTF-16 little-endian? (or UTF-32LE?) */
10681 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10682 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10683#ifndef PERL_NO_UTF16_FILTER
10684 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10685 s += 2;
10686 if (PL_bufend > (char*)s) {
10687 s = add_utf16_textfilter(s, TRUE);
10688 }
10689#else
10690 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10691#endif
10692 }
10693 break;
10694 case 0xFE:
10695 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10696#ifndef PERL_NO_UTF16_FILTER
10697 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10698 s += 2;
10699 if (PL_bufend > (char *)s) {
10700 s = add_utf16_textfilter(s, FALSE);
10701 }
10702#else
10703 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10704#endif
10705 }
10706 break;
10707 case 0xEF:
10708 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10709 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10710 s += 3; /* UTF-8 */
10711 }
10712 break;
10713 case 0:
10714 if (slen > 3) {
10715 if (s[1] == 0) {
10716 if (s[2] == 0xFE && s[3] == 0xFF) {
10717 /* UTF-32 big-endian */
10718 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10719 }
10720 }
10721 else if (s[2] == 0 && s[3] != 0) {
10722 /* Leading bytes
10723 * 00 xx 00 xx
10724 * are a good indicator of UTF-16BE. */
10725#ifndef PERL_NO_UTF16_FILTER
10726 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10727 s = add_utf16_textfilter(s, FALSE);
10728#else
10729 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10730#endif
10731 }
10732 }
10733#ifdef EBCDIC
10734 case 0xDD:
10735 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10736 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10737 s += 4; /* UTF-8 */
10738 }
10739 break;
10740#endif
10741
10742 default:
10743 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10744 /* Leading bytes
10745 * xx 00 xx 00
10746 * are a good indicator of UTF-16LE. */
10747#ifndef PERL_NO_UTF16_FILTER
10748 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10749 s = add_utf16_textfilter(s, TRUE);
10750#else
10751 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10752#endif
10753 }
10754 }
10755 return (char*)s;
10756}
10757
10758
10759#ifndef PERL_NO_UTF16_FILTER
10760static I32
10761S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10762{
10763 dVAR;
10764 SV *const filter = FILTER_DATA(idx);
10765 /* We re-use this each time round, throwing the contents away before we
10766 return. */
10767 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10768 SV *const utf8_buffer = filter;
10769 IV status = IoPAGE(filter);
10770 const bool reverse = cBOOL(IoLINES(filter));
10771 I32 retval;
10772
10773 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10774
10775 /* As we're automatically added, at the lowest level, and hence only called
10776 from this file, we can be sure that we're not called in block mode. Hence
10777 don't bother writing code to deal with block mode. */
10778 if (maxlen) {
10779 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10780 }
10781 if (status < 0) {
10782 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10783 }
10784 DEBUG_P(PerlIO_printf(Perl_debug_log,
10785 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10786 FPTR2DPTR(void *, S_utf16_textfilter),
10787 reverse ? 'l' : 'b', idx, maxlen, status,
10788 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10789
10790 while (1) {
10791 STRLEN chars;
10792 STRLEN have;
10793 I32 newlen;
10794 U8 *end;
10795 /* First, look in our buffer of existing UTF-8 data: */
10796 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10797
10798 if (nl) {
10799 ++nl;
10800 } else if (status == 0) {
10801 /* EOF */
10802 IoPAGE(filter) = 0;
10803 nl = SvEND(utf8_buffer);
10804 }
10805 if (nl) {
10806 STRLEN got = nl - SvPVX(utf8_buffer);
10807 /* Did we have anything to append? */
10808 retval = got != 0;
10809 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10810 /* Everything else in this code works just fine if SVp_POK isn't
10811 set. This, however, needs it, and we need it to work, else
10812 we loop infinitely because the buffer is never consumed. */
10813 sv_chop(utf8_buffer, nl);
10814 break;
10815 }
10816
10817 /* OK, not a complete line there, so need to read some more UTF-16.
10818 Read an extra octect if the buffer currently has an odd number. */
10819 while (1) {
10820 if (status <= 0)
10821 break;
10822 if (SvCUR(utf16_buffer) >= 2) {
10823 /* Location of the high octet of the last complete code point.
10824 Gosh, UTF-16 is a pain. All the benefits of variable length,
10825 *coupled* with all the benefits of partial reads and
10826 endianness. */
10827 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10828 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10829
10830 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10831 break;
10832 }
10833
10834 /* We have the first half of a surrogate. Read more. */
10835 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10836 }
10837
10838 status = FILTER_READ(idx + 1, utf16_buffer,
10839 160 + (SvCUR(utf16_buffer) & 1));
10840 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10841 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10842 if (status < 0) {
10843 /* Error */
10844 IoPAGE(filter) = status;
10845 return status;
10846 }
10847 }
10848
10849 chars = SvCUR(utf16_buffer) >> 1;
10850 have = SvCUR(utf8_buffer);
10851 SvGROW(utf8_buffer, have + chars * 3 + 1);
10852
10853 if (reverse) {
10854 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10855 (U8*)SvPVX_const(utf8_buffer) + have,
10856 chars * 2, &newlen);
10857 } else {
10858 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10859 (U8*)SvPVX_const(utf8_buffer) + have,
10860 chars * 2, &newlen);
10861 }
10862 SvCUR_set(utf8_buffer, have + newlen);
10863 *end = '\0';
10864
10865 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10866 it's private to us, and utf16_to_utf8{,reversed} take a
10867 (pointer,length) pair, rather than a NUL-terminated string. */
10868 if(SvCUR(utf16_buffer) & 1) {
10869 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10870 SvCUR_set(utf16_buffer, 1);
10871 } else {
10872 SvCUR_set(utf16_buffer, 0);
10873 }
10874 }
10875 DEBUG_P(PerlIO_printf(Perl_debug_log,
10876 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10877 status,
10878 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10879 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10880 return retval;
10881}
10882
10883static U8 *
10884S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10885{
10886 SV *filter = filter_add(S_utf16_textfilter, NULL);
10887
10888 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10889
10890 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10891 sv_setpvs(filter, "");
10892 IoLINES(filter) = reversed;
10893 IoPAGE(filter) = 1; /* Not EOF */
10894
10895 /* Sadly, we have to return a valid pointer, come what may, so we have to
10896 ignore any error return from this. */
10897 SvCUR_set(PL_linestr, 0);
10898 if (FILTER_READ(0, PL_linestr, 0)) {
10899 SvUTF8_on(PL_linestr);
10900 } else {
10901 SvUTF8_on(PL_linestr);
10902 }
10903 PL_bufend = SvEND(PL_linestr);
10904 return (U8*)SvPVX(PL_linestr);
10905}
10906#endif
10907
10908/*
10909Returns a pointer to the next character after the parsed
10910vstring, as well as updating the passed in sv.
10911
10912Function must be called like
10913
10914 sv = newSV(5);
10915 s = scan_vstring(s,e,sv);
10916
10917where s and e are the start and end of the string.
10918The sv should already be large enough to store the vstring
10919passed in, for performance reasons.
10920
10921*/
10922
10923char *
10924Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10925{
10926 dVAR;
10927 const char *pos = s;
10928 const char *start = s;
10929
10930 PERL_ARGS_ASSERT_SCAN_VSTRING;
10931
10932 if (*pos == 'v') pos++; /* get past 'v' */
10933 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10934 pos++;
10935 if ( *pos != '.') {
10936 /* this may not be a v-string if followed by => */
10937 const char *next = pos;
10938 while (next < e && isSPACE(*next))
10939 ++next;
10940 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10941 /* return string not v-string */
10942 sv_setpvn(sv,(char *)s,pos-s);
10943 return (char *)pos;
10944 }
10945 }
10946
10947 if (!isALPHA(*pos)) {
10948 U8 tmpbuf[UTF8_MAXBYTES+1];
10949
10950 if (*s == 'v')
10951 s++; /* get past 'v' */
10952
10953 sv_setpvs(sv, "");
10954
10955 for (;;) {
10956 /* this is atoi() that tolerates underscores */
10957 U8 *tmpend;
10958 UV rev = 0;
10959 const char *end = pos;
10960 UV mult = 1;
10961 while (--end >= s) {
10962 if (*end != '_') {
10963 const UV orev = rev;
10964 rev += (*end - '0') * mult;
10965 mult *= 10;
10966 if (orev > rev)
10967 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10968 "Integer overflow in decimal number");
10969 }
10970 }
10971#ifdef EBCDIC
10972 if (rev > 0x7FFFFFFF)
10973 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10974#endif
10975 /* Append native character for the rev point */
10976 tmpend = uvchr_to_utf8(tmpbuf, rev);
10977 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10978 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10979 SvUTF8_on(sv);
10980 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10981 s = ++pos;
10982 else {
10983 s = pos;
10984 break;
10985 }
10986 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10987 pos++;
10988 }
10989 SvPOK_on(sv);
10990 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10991 SvRMAGICAL_on(sv);
10992 }
10993 return (char *)s;
10994}
10995
10996int
10997Perl_keyword_plugin_standard(pTHX_
10998 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10999{
11000 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11001 PERL_UNUSED_CONTEXT;
11002 PERL_UNUSED_ARG(keyword_ptr);
11003 PERL_UNUSED_ARG(keyword_len);
11004 PERL_UNUSED_ARG(op_ptr);
11005 return KEYWORD_PLUGIN_DECLINE;
11006}
11007
11008#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11009static void
11010S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11011{
11012 SAVEI32(PL_lex_brackets);
11013 if (PL_lex_brackets > 100)
11014 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11015 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11016 SAVEI32(PL_lex_allbrackets);
11017 PL_lex_allbrackets = 0;
11018 SAVEI8(PL_lex_fakeeof);
11019 PL_lex_fakeeof = (U8)fakeeof;
11020 if(yyparse(gramtype) && !PL_parser->error_count)
11021 qerror(Perl_mess(aTHX_ "Parse error"));
11022}
11023
11024#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11025static OP *
11026S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11027{
11028 OP *o;
11029 ENTER;
11030 SAVEVPTR(PL_eval_root);
11031 PL_eval_root = NULL;
11032 parse_recdescent(gramtype, fakeeof);
11033 o = PL_eval_root;
11034 LEAVE;
11035 return o;
11036}
11037
11038#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11039static OP *
11040S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11041{
11042 OP *exprop;
11043 if (flags & ~PARSE_OPTIONAL)
11044 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11045 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11046 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11047 if (!PL_parser->error_count)
11048 qerror(Perl_mess(aTHX_ "Parse error"));
11049 exprop = newOP(OP_NULL, 0);
11050 }
11051 return exprop;
11052}
11053
11054/*
11055=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11056
11057Parse a Perl arithmetic expression. This may contain operators of precedence
11058down to the bit shift operators. The expression must be followed (and thus
11059terminated) either by a comparison or lower-precedence operator or by
11060something that would normally terminate an expression such as semicolon.
11061If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11062otherwise it is mandatory. It is up to the caller to ensure that the
11063dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11064the source of the code to be parsed and the lexical context for the
11065expression.
11066
11067The op tree representing the expression is returned. If an optional
11068expression is absent, a null pointer is returned, otherwise the pointer
11069will be non-null.
11070
11071If an error occurs in parsing or compilation, in most cases a valid op
11072tree is returned anyway. The error is reflected in the parser state,
11073normally resulting in a single exception at the top level of parsing
11074which covers all the compilation errors that occurred. Some compilation
11075errors, however, will throw an exception immediately.
11076
11077=cut
11078*/
11079
11080OP *
11081Perl_parse_arithexpr(pTHX_ U32 flags)
11082{
11083 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11084}
11085
11086/*
11087=for apidoc Amx|OP *|parse_termexpr|U32 flags
11088
11089Parse a Perl term expression. This may contain operators of precedence
11090down to the assignment operators. The expression must be followed (and thus
11091terminated) either by a comma or lower-precedence operator or by
11092something that would normally terminate an expression such as semicolon.
11093If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11094otherwise it is mandatory. It is up to the caller to ensure that the
11095dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11096the source of the code to be parsed and the lexical context for the
11097expression.
11098
11099The op tree representing the expression is returned. If an optional
11100expression is absent, a null pointer is returned, otherwise the pointer
11101will be non-null.
11102
11103If an error occurs in parsing or compilation, in most cases a valid op
11104tree is returned anyway. The error is reflected in the parser state,
11105normally resulting in a single exception at the top level of parsing
11106which covers all the compilation errors that occurred. Some compilation
11107errors, however, will throw an exception immediately.
11108
11109=cut
11110*/
11111
11112OP *
11113Perl_parse_termexpr(pTHX_ U32 flags)
11114{
11115 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11116}
11117
11118/*
11119=for apidoc Amx|OP *|parse_listexpr|U32 flags
11120
11121Parse a Perl list expression. This may contain operators of precedence
11122down to the comma operator. The expression must be followed (and thus
11123terminated) either by a low-precedence logic operator such as C<or> or by
11124something that would normally terminate an expression such as semicolon.
11125If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11126otherwise it is mandatory. It is up to the caller to ensure that the
11127dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11128the source of the code to be parsed and the lexical context for the
11129expression.
11130
11131The op tree representing the expression is returned. If an optional
11132expression is absent, a null pointer is returned, otherwise the pointer
11133will be non-null.
11134
11135If an error occurs in parsing or compilation, in most cases a valid op
11136tree is returned anyway. The error is reflected in the parser state,
11137normally resulting in a single exception at the top level of parsing
11138which covers all the compilation errors that occurred. Some compilation
11139errors, however, will throw an exception immediately.
11140
11141=cut
11142*/
11143
11144OP *
11145Perl_parse_listexpr(pTHX_ U32 flags)
11146{
11147 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11148}
11149
11150/*
11151=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11152
11153Parse a single complete Perl expression. This allows the full
11154expression grammar, including the lowest-precedence operators such
11155as C<or>. The expression must be followed (and thus terminated) by a
11156token that an expression would normally be terminated by: end-of-file,
11157closing bracketing punctuation, semicolon, or one of the keywords that
11158signals a postfix expression-statement modifier. If I<flags> includes
11159C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11160mandatory. It is up to the caller to ensure that the dynamic parser
11161state (L</PL_parser> et al) is correctly set to reflect the source of
11162the code to be parsed and the lexical context for the expression.
11163
11164The op tree representing the expression is returned. If an optional
11165expression is absent, a null pointer is returned, otherwise the pointer
11166will be non-null.
11167
11168If an error occurs in parsing or compilation, in most cases a valid op
11169tree is returned anyway. The error is reflected in the parser state,
11170normally resulting in a single exception at the top level of parsing
11171which covers all the compilation errors that occurred. Some compilation
11172errors, however, will throw an exception immediately.
11173
11174=cut
11175*/
11176
11177OP *
11178Perl_parse_fullexpr(pTHX_ U32 flags)
11179{
11180 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11181}
11182
11183/*
11184=for apidoc Amx|OP *|parse_block|U32 flags
11185
11186Parse a single complete Perl code block. This consists of an opening
11187brace, a sequence of statements, and a closing brace. The block
11188constitutes a lexical scope, so C<my> variables and various compile-time
11189effects can be contained within it. It is up to the caller to ensure
11190that the dynamic parser state (L</PL_parser> et al) is correctly set to
11191reflect the source of the code to be parsed and the lexical context for
11192the statement.
11193
11194The op tree representing the code block is returned. This is always a
11195real op, never a null pointer. It will normally be a C<lineseq> list,
11196including C<nextstate> or equivalent ops. No ops to construct any kind
11197of runtime scope are included by virtue of it being a block.
11198
11199If an error occurs in parsing or compilation, in most cases a valid op
11200tree (most likely null) is returned anyway. The error is reflected in
11201the parser state, normally resulting in a single exception at the top
11202level of parsing which covers all the compilation errors that occurred.
11203Some compilation errors, however, will throw an exception immediately.
11204
11205The I<flags> parameter is reserved for future use, and must always
11206be zero.
11207
11208=cut
11209*/
11210
11211OP *
11212Perl_parse_block(pTHX_ U32 flags)
11213{
11214 if (flags)
11215 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11216 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11217}
11218
11219/*
11220=for apidoc Amx|OP *|parse_barestmt|U32 flags
11221
11222Parse a single unadorned Perl statement. This may be a normal imperative
11223statement or a declaration that has compile-time effect. It does not
11224include any label or other affixture. It is up to the caller to ensure
11225that the dynamic parser state (L</PL_parser> et al) is correctly set to
11226reflect the source of the code to be parsed and the lexical context for
11227the statement.
11228
11229The op tree representing the statement is returned. This may be a
11230null pointer if the statement is null, for example if it was actually
11231a subroutine definition (which has compile-time side effects). If not
11232null, it will be ops directly implementing the statement, suitable to
11233pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11234equivalent op (except for those embedded in a scope contained entirely
11235within the statement).
11236
11237If an error occurs in parsing or compilation, in most cases a valid op
11238tree (most likely null) is returned anyway. The error is reflected in
11239the parser state, normally resulting in a single exception at the top
11240level of parsing which covers all the compilation errors that occurred.
11241Some compilation errors, however, will throw an exception immediately.
11242
11243The I<flags> parameter is reserved for future use, and must always
11244be zero.
11245
11246=cut
11247*/
11248
11249OP *
11250Perl_parse_barestmt(pTHX_ U32 flags)
11251{
11252 if (flags)
11253 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11254 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11255}
11256
11257/*
11258=for apidoc Amx|SV *|parse_label|U32 flags
11259
11260Parse a single label, possibly optional, of the type that may prefix a
11261Perl statement. It is up to the caller to ensure that the dynamic parser
11262state (L</PL_parser> et al) is correctly set to reflect the source of
11263the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11264label is optional, otherwise it is mandatory.
11265
11266The name of the label is returned in the form of a fresh scalar. If an
11267optional label is absent, a null pointer is returned.
11268
11269If an error occurs in parsing, which can only occur if the label is
11270mandatory, a valid label is returned anyway. The error is reflected in
11271the parser state, normally resulting in a single exception at the top
11272level of parsing which covers all the compilation errors that occurred.
11273
11274=cut
11275*/
11276
11277SV *
11278Perl_parse_label(pTHX_ U32 flags)
11279{
11280 if (flags & ~PARSE_OPTIONAL)
11281 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11282 if (PL_lex_state == LEX_KNOWNEXT) {
11283 PL_parser->yychar = yylex();
11284 if (PL_parser->yychar == LABEL) {
11285 char *lpv = pl_yylval.pval;
11286 STRLEN llen = strlen(lpv);
11287 SV *lsv;
11288 PL_parser->yychar = YYEMPTY;
11289 lsv = newSV_type(SVt_PV);
11290 SvPV_set(lsv, lpv);
11291 SvCUR_set(lsv, llen);
11292 SvLEN_set(lsv, llen+1);
11293 SvPOK_on(lsv);
11294 return lsv;
11295 } else {
11296 yyunlex();
11297 goto no_label;
11298 }
11299 } else {
11300 char *s, *t;
11301 U8 c;
11302 STRLEN wlen, bufptr_pos;
11303 lex_read_space(0);
11304 t = s = PL_bufptr;
11305 c = (U8)*s;
11306 if (!isIDFIRST_A(c))
11307 goto no_label;
11308 do {
11309 c = (U8)*++t;
11310 } while(isWORDCHAR_A(c));
11311 wlen = t - s;
11312 if (word_takes_any_delimeter(s, wlen))
11313 goto no_label;
11314 bufptr_pos = s - SvPVX(PL_linestr);
11315 PL_bufptr = t;
11316 lex_read_space(LEX_KEEP_PREVIOUS);
11317 t = PL_bufptr;
11318 s = SvPVX(PL_linestr) + bufptr_pos;
11319 if (t[0] == ':' && t[1] != ':') {
11320 PL_oldoldbufptr = PL_oldbufptr;
11321 PL_oldbufptr = s;
11322 PL_bufptr = t+1;
11323 return newSVpvn(s, wlen);
11324 } else {
11325 PL_bufptr = s;
11326 no_label:
11327 if (flags & PARSE_OPTIONAL) {
11328 return NULL;
11329 } else {
11330 qerror(Perl_mess(aTHX_ "Parse error"));
11331 return newSVpvs("x");
11332 }
11333 }
11334 }
11335}
11336
11337/*
11338=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11339
11340Parse a single complete Perl statement. This may be a normal imperative
11341statement or a declaration that has compile-time effect, and may include
11342optional labels. It is up to the caller to ensure that the dynamic
11343parser state (L</PL_parser> et al) is correctly set to reflect the source
11344of the code to be parsed and the lexical context for the statement.
11345
11346The op tree representing the statement is returned. This may be a
11347null pointer if the statement is null, for example if it was actually
11348a subroutine definition (which has compile-time side effects). If not
11349null, it will be the result of a L</newSTATEOP> call, normally including
11350a C<nextstate> or equivalent op.
11351
11352If an error occurs in parsing or compilation, in most cases a valid op
11353tree (most likely null) is returned anyway. The error is reflected in
11354the parser state, normally resulting in a single exception at the top
11355level of parsing which covers all the compilation errors that occurred.
11356Some compilation errors, however, will throw an exception immediately.
11357
11358The I<flags> parameter is reserved for future use, and must always
11359be zero.
11360
11361=cut
11362*/
11363
11364OP *
11365Perl_parse_fullstmt(pTHX_ U32 flags)
11366{
11367 if (flags)
11368 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11369 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11370}
11371
11372/*
11373=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11374
11375Parse a sequence of zero or more Perl statements. These may be normal
11376imperative statements, including optional labels, or declarations
11377that have compile-time effect, or any mixture thereof. The statement
11378sequence ends when a closing brace or end-of-file is encountered in a
11379place where a new statement could have validly started. It is up to
11380the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11381is correctly set to reflect the source of the code to be parsed and the
11382lexical context for the statements.
11383
11384The op tree representing the statement sequence is returned. This may
11385be a null pointer if the statements were all null, for example if there
11386were no statements or if there were only subroutine definitions (which
11387have compile-time side effects). If not null, it will be a C<lineseq>
11388list, normally including C<nextstate> or equivalent ops.
11389
11390If an error occurs in parsing or compilation, in most cases a valid op
11391tree is returned anyway. The error is reflected in the parser state,
11392normally resulting in a single exception at the top level of parsing
11393which covers all the compilation errors that occurred. Some compilation
11394errors, however, will throw an exception immediately.
11395
11396The I<flags> parameter is reserved for future use, and must always
11397be zero.
11398
11399=cut
11400*/
11401
11402OP *
11403Perl_parse_stmtseq(pTHX_ U32 flags)
11404{
11405 OP *stmtseqop;
11406 I32 c;
11407 if (flags)
11408 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11409 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11410 c = lex_peek_unichar(0);
11411 if (c != -1 && c != /*{*/'}')
11412 qerror(Perl_mess(aTHX_ "Parse error"));
11413 return stmtseqop;
11414}
11415
11416void
11417Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11418{
11419 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11420 deprecate("qw(...) as parentheses");
11421 force_next((4<<24)|')');
11422 if (qwlist->op_type == OP_STUB) {
11423 op_free(qwlist);
11424 }
11425 else {
11426 start_force(PL_curforce);
11427 NEXTVAL_NEXTTOKE.opval = qwlist;
11428 force_next(THING);
11429 }
11430 force_next((2<<24)|'(');
11431}
11432
11433/*
11434 * Local variables:
11435 * c-indentation-style: bsd
11436 * c-basic-offset: 4
11437 * indent-tabs-mode: t
11438 * End:
11439 *
11440 * ex: set ts=8 sts=4 sw=4 noet:
11441 */