This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade ExtUtils::ParseXS to 3.05
[perl5.git] / toke.c
... / ...
CommitLineData
1/* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
39#include "EXTERN.h"
40#define PERL_IN_TOKE_C
41#include "perl.h"
42#include "dquote_static.c"
43
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
47#define pl_yylval (PL_parser->yylval)
48
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
72#define PL_linestr (PL_parser->linestr)
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
83#define PL_lex_state (PL_parser->lex_state)
84#define PL_rsfp (PL_parser->rsfp)
85#define PL_rsfp_filters (PL_parser->rsfp_filters)
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
88#define PL_tokenbuf (PL_parser->tokenbuf)
89#define PL_multi_end (PL_parser->multi_end)
90#define PL_error_count (PL_parser->error_count)
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
112#endif
113
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
116static int
117S_pending_ident(pTHX);
118
119static const char ident_too_long[] = "Identifier too long";
120
121#ifdef PERL_MAD
122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
124#else
125# define CURMAD(slot,sv)
126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
127#endif
128
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
132
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
135#else
136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
137#endif
138
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
142/* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
147
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
171
172#ifdef DEBUGGING
173static const char* const lex_state_names[] = {
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
188#ifdef ff_next
189#undef ff_next
190#endif
191
192#include "keywords.h"
193
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
196#ifdef CLINE
197#undef CLINE
198#endif
199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
200
201#ifdef PERL_MAD
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
213/*
214 * Convenience functions to return different tokens and prime the
215 * lexer for the next token. They all take an argument.
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
227 * FUN0OP : zero-argument function, with its op created in this file
228 * FUN1 : not used, except for not, which isn't a UNIOP
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
233 * PMop : pattern-matching operator
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
237 * Rop : relational operator <= != gt
238 *
239 * Also see LOP and lop() below.
240 */
241
242#ifdef DEBUGGING /* Serve -DT. */
243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
244#else
245# define REPORT(retval) (retval)
246#endif
247
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
269
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
274 */
275#define UNI2(f,x) { \
276 pl_yylval.ival = f; \
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
283 s = PEEKSPACE(s); \
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
288
289#define UNIBRACK(f) { \
290 pl_yylval.ival = f; \
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
295 s = PEEKSPACE(s); \
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
298
299/* grandfather return to old style */
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
309
310#ifdef DEBUGGING
311
312/* how to interpret the pl_yylval associated with the token */
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
327{
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
397 { 0, TOKENTYPE_NONE, NULL }
398};
399
400/* dump the returned token in rv, plus any optional arg in pl_yylval */
401
402STATIC int
403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
404{
405 dVAR;
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
409 if (DEBUG_T_TEST) {
410 const char *name = NULL;
411 enum token_type type = TOKENTYPE_NONE;
412 const struct debug_tokens *p;
413 SV* const report = newSVpvs("<== ");
414
415 for (p = debug_tokens; p->token; p++) {
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
423 Perl_sv_catpv(aTHX_ report, name);
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
427 sv_catpvs(report, "EOF");
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
439 PL_op_name[lvalp->ival]);
440 break;
441 case TOKENTYPE_PVAL:
442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
443 break;
444 case TOKENTYPE_OPVAL:
445 if (lvalp->opval) {
446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
449 Perl_sv_catpvf(aTHX_ report, " %s",
450 SvPEEK(cSVOPx_sv(lvalp->opval)));
451 }
452
453 }
454 else
455 sv_catpvs(report, "(opval=null)");
456 break;
457 }
458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
459 };
460 return (int)rv;
461}
462
463
464/* print the buffer with suitable escapes */
465
466STATIC void
467S_printbuf(pTHX_ const char *const fmt, const char *const s)
468{
469 SV* const tmp = newSVpvs("");
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
477#endif
478
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
486/*
487 * S_ao
488 *
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
491 */
492
493STATIC int
494S_ao(pTHX_ int toketype)
495{
496 dVAR;
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
499 if (toketype == ANDAND)
500 pl_yylval.ival = OP_ANDASSIGN;
501 else if (toketype == OROR)
502 pl_yylval.ival = OP_ORASSIGN;
503 else if (toketype == DORDOR)
504 pl_yylval.ival = OP_DORASSIGN;
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
523STATIC void
524S_no_op(pTHX_ const char *const what, char *s)
525{
526 dVAR;
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
529
530 PERL_ARGS_ASSERT_NO_OP;
531
532 if (!s)
533 s = oldbp;
534 else
535 PL_bufptr = s;
536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
542 const char *t;
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
554 }
555 }
556 PL_bufptr = oldbp;
557}
558
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
562 * If it's called with NULL then it cauterizes the line buffer.
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
568STATIC void
569S_missingterm(pTHX_ char *s)
570{
571 dVAR;
572 char tmpbuf[3];
573 char q;
574 if (s) {
575 char * const nl = strrchr(s,'\n');
576 if (nl)
577 *nl = '\0';
578 }
579 else if (isCNTRL(PL_multi_close)) {
580 *tmpbuf = '^';
581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
586 *tmpbuf = (char)PL_multi_close;
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
592}
593
594/*
595 * Check whether the named feature is enabled.
596 */
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
599{
600 dVAR;
601 HV * const hinthv = GvHV(PL_hintgv);
602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
608 memcpy(&he_name[8], name, namelen);
609
610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
611}
612
613/*
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
616 */
617
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
622 register const char *s = SvPVX_const(sv);
623 register const char * const e = s + SvCUR(sv);
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
643
644STATIC I32
645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
646{
647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
651}
652#endif
653
654/*
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
672The I<flags> parameter is reserved for future use, and must always
673be zero, except for one flag that is currently reserved for perl's internal
674use.
675
676=cut
677*/
678
679/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
681
682void
683Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
684{
685 dVAR;
686 const char *s = NULL;
687 STRLEN len;
688 yy_parser *parser, *oparser;
689 if (flags && flags != LEX_START_SAME_FILTER)
690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
691
692 /* create and initialise a parser */
693
694 Newxz(parser, 1, yy_parser);
695 parser->old_parser = oparser = PL_parser;
696 PL_parser = parser;
697
698 parser->stack = NULL;
699 parser->ps = NULL;
700 parser->stack_size = 0;
701
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
704 parser->saved_curcop = PL_curcop;
705
706 /* initialise lexer state */
707
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
713 parser->error_count = oparser ? oparser->error_count : 0;
714 parser->copline = NOLINE;
715 parser->lex_state = LEX_NORMAL;
716 parser->expect = XSTATE;
717 parser->rsfp = rsfp;
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
720 ? newAV()
721 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
722
723 Newx(parser->lex_brackstack, 120, char);
724 Newx(parser->lex_casestack, 12, char);
725 *parser->lex_casestack = '\0';
726
727 if (line) {
728 s = SvPV_const(line, len);
729 } else {
730 len = 0;
731 }
732
733 if (!len) {
734 parser->linestr = newSVpvs("\n;");
735 } else {
736 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
737 if (s[len-1] != ';')
738 sv_catpvs(parser->linestr, "\n;");
739 }
740 parser->oldoldbufptr =
741 parser->oldbufptr =
742 parser->bufptr =
743 parser->linestart = SvPVX(parser->linestr);
744 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
745 parser->last_lop = parser->last_uni = NULL;
746
747 parser->in_pod = 0;
748}
749
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
758 PL_curcop = parser->saved_curcop;
759 SvREFCNT_dec(parser->linestr);
760
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
763 else if (parser->rsfp && (!parser->old_parser ||
764 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
765 PerlIO_close(parser->rsfp);
766 SvREFCNT_dec(parser->rsfp_filters);
767
768 Safefree(parser->lex_brackstack);
769 Safefree(parser->lex_casestack);
770 PL_parser = parser->old_parser;
771 Safefree(parser);
772}
773
774
775/*
776=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777
778Buffer scalar containing the chunk currently under consideration of the
779text currently being lexed. This is always a plain string scalar (for
780which C<SvPOK> is true). It is not intended to be used as a scalar by
781normal scalar means; instead refer to the buffer directly by the pointer
782variables described below.
783
784The lexer maintains various C<char*> pointers to things in the
785C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
786reallocated, all of these pointers must be updated. Don't attempt to
787do this manually, but rather use L</lex_grow_linestr> if you need to
788reallocate the buffer.
789
790The content of the text chunk in the buffer is commonly exactly one
791complete line of input, up to and including a newline terminator,
792but there are situations where it is otherwise. The octets of the
793buffer may be intended to be interpreted as either UTF-8 or Latin-1.
794The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
795flag on this scalar, which may disagree with it.
796
797For direct examination of the buffer, the variable
798L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
799lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
800of these pointers is usually preferable to examination of the scalar
801through normal scalar means.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufend
804
805Direct pointer to the end of the chunk of text currently being lexed, the
806end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
807+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
808always located at the end of the buffer, and does not count as part of
809the buffer's contents.
810
811=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812
813Points to the current position of lexing inside the lexer buffer.
814Characters around this point may be freely examined, within
815the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
816L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
817interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818
819Lexing code (whether in the Perl core or not) moves this pointer past
820the characters that it consumes. It is also expected to perform some
821bookkeeping whenever a newline character is consumed. This movement
822can be more conveniently performed by the function L</lex_read_to>,
823which handles newlines appropriately.
824
825Interpretation of the buffer's octets can be abstracted out by
826using the slightly higher-level functions L</lex_peek_unichar> and
827L</lex_read_unichar>.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>linestart
830
831Points to the start of the current line inside the lexer buffer.
832This is useful for indicating at which column an error occurred, and
833not much else. This must be updated by any lexing code that consumes
834a newline; the function L</lex_read_to> handles this detail.
835
836=cut
837*/
838
839/*
840=for apidoc Amx|bool|lex_bufutf8
841
842Indicates whether the octets in the lexer buffer
843(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
844of Unicode characters. If not, they should be interpreted as Latin-1
845characters. This is analogous to the C<SvUTF8> flag for scalars.
846
847In UTF-8 mode, it is not guaranteed that the lexer buffer actually
848contains valid UTF-8. Lexing code must be robust in the face of invalid
849encoding.
850
851The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
852is significant, but not the whole story regarding the input character
853encoding. Normally, when a file is being read, the scalar contains octets
854and its C<SvUTF8> flag is off, but the octets should be interpreted as
855UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
856however, the scalar may have the C<SvUTF8> flag on, and in this case its
857octets should be interpreted as UTF-8 unless the C<use bytes> pragma
858is in effect. This logic may change in the future; use this function
859instead of implementing the logic yourself.
860
861=cut
862*/
863
864bool
865Perl_lex_bufutf8(pTHX)
866{
867 return UTF;
868}
869
870/*
871=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872
873Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
874at least I<len> octets (including terminating NUL). Returns a
875pointer to the reallocated buffer. This is necessary before making
876any direct modification of the buffer that would increase its length.
877L</lex_stuff_pvn> provides a more convenient way to insert text into
878the buffer.
879
880Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
881this function updates all of the lexer's variables that point directly
882into the buffer.
883
884=cut
885*/
886
887char *
888Perl_lex_grow_linestr(pTHX_ STRLEN len)
889{
890 SV *linestr;
891 char *buf;
892 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
893 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
894 linestr = PL_parser->linestr;
895 buf = SvPVX(linestr);
896 if (len <= SvLEN(linestr))
897 return buf;
898 bufend_pos = PL_parser->bufend - buf;
899 bufptr_pos = PL_parser->bufptr - buf;
900 oldbufptr_pos = PL_parser->oldbufptr - buf;
901 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
902 linestart_pos = PL_parser->linestart - buf;
903 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
904 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
905 buf = sv_grow(linestr, len);
906 PL_parser->bufend = buf + bufend_pos;
907 PL_parser->bufptr = buf + bufptr_pos;
908 PL_parser->oldbufptr = buf + oldbufptr_pos;
909 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
910 PL_parser->linestart = buf + linestart_pos;
911 if (PL_parser->last_uni)
912 PL_parser->last_uni = buf + last_uni_pos;
913 if (PL_parser->last_lop)
914 PL_parser->last_lop = buf + last_lop_pos;
915 return buf;
916}
917
918/*
919=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
920
921Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
922immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
923reallocating the buffer if necessary. This means that lexing code that
924runs later will see the characters as if they had appeared in the input.
925It is not recommended to do this as part of normal parsing, and most
926uses of this facility run the risk of the inserted characters being
927interpreted in an unintended manner.
928
929The string to be inserted is represented by I<len> octets starting
930at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
931according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
932The characters are recoded for the lexer buffer, according to how the
933buffer is currently being interpreted (L</lex_bufutf8>). If a string
934to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
935function is more convenient.
936
937=cut
938*/
939
940void
941Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
942{
943 dVAR;
944 char *bufptr;
945 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
946 if (flags & ~(LEX_STUFF_UTF8))
947 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (UTF) {
949 if (flags & LEX_STUFF_UTF8) {
950 goto plain_copy;
951 } else {
952 STRLEN highhalf = 0;
953 const char *p, *e = pv+len;
954 for (p = pv; p != e; p++)
955 highhalf += !!(((U8)*p) & 0x80);
956 if (!highhalf)
957 goto plain_copy;
958 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
959 bufptr = PL_parser->bufptr;
960 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
961 SvCUR_set(PL_parser->linestr,
962 SvCUR(PL_parser->linestr) + len+highhalf);
963 PL_parser->bufend += len+highhalf;
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c & 0x80) {
967 *bufptr++ = (char)(0xc0 | (c >> 6));
968 *bufptr++ = (char)(0x80 | (c & 0x3f));
969 } else {
970 *bufptr++ = (char)c;
971 }
972 }
973 }
974 } else {
975 if (flags & LEX_STUFF_UTF8) {
976 STRLEN highhalf = 0;
977 const char *p, *e = pv+len;
978 for (p = pv; p != e; p++) {
979 U8 c = (U8)*p;
980 if (c >= 0xc4) {
981 Perl_croak(aTHX_ "Lexing code attempted to stuff "
982 "non-Latin-1 character into Latin-1 input");
983 } else if (c >= 0xc2 && p+1 != e &&
984 (((U8)p[1]) & 0xc0) == 0x80) {
985 p++;
986 highhalf++;
987 } else if (c >= 0x80) {
988 /* malformed UTF-8 */
989 ENTER;
990 SAVESPTR(PL_warnhook);
991 PL_warnhook = PERL_WARNHOOK_FATAL;
992 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 LEAVE;
994 }
995 }
996 if (!highhalf)
997 goto plain_copy;
998 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
999 bufptr = PL_parser->bufptr;
1000 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001 SvCUR_set(PL_parser->linestr,
1002 SvCUR(PL_parser->linestr) + len-highhalf);
1003 PL_parser->bufend += len-highhalf;
1004 for (p = pv; p != e; p++) {
1005 U8 c = (U8)*p;
1006 if (c & 0x80) {
1007 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008 p++;
1009 } else {
1010 *bufptr++ = (char)c;
1011 }
1012 }
1013 } else {
1014 plain_copy:
1015 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016 bufptr = PL_parser->bufptr;
1017 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019 PL_parser->bufend += len;
1020 Copy(pv, bufptr, len, char);
1021 }
1022 }
1023}
1024
1025/*
1026=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030reallocating the buffer if necessary. This means that lexing code that
1031runs later will see the characters as if they had appeared in the input.
1032It is not recommended to do this as part of normal parsing, and most
1033uses of this facility run the risk of the inserted characters being
1034interpreted in an unintended manner.
1035
1036The string to be inserted is represented by octets starting at I<pv>
1037and continuing to the first nul. These octets are interpreted as either
1038UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039in I<flags>. The characters are recoded for the lexer buffer, according
1040to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041If it is not convenient to nul-terminate a string to be inserted, the
1042L</lex_stuff_pvn> function is more appropriate.
1043
1044=cut
1045*/
1046
1047void
1048Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049{
1050 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051 lex_stuff_pvn(pv, strlen(pv), flags);
1052}
1053
1054/*
1055=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059reallocating the buffer if necessary. This means that lexing code that
1060runs later will see the characters as if they had appeared in the input.
1061It is not recommended to do this as part of normal parsing, and most
1062uses of this facility run the risk of the inserted characters being
1063interpreted in an unintended manner.
1064
1065The string to be inserted is the string value of I<sv>. The characters
1066are recoded for the lexer buffer, according to how the buffer is currently
1067being interpreted (L</lex_bufutf8>). If a string to be inserted is
1068not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069need to construct a scalar.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076{
1077 char *pv;
1078 STRLEN len;
1079 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 if (flags)
1081 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 pv = SvPV(sv, len);
1083 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084}
1085
1086/*
1087=for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1091This hides the discarded text from any lexing code that runs later,
1092as if the text had never appeared.
1093
1094This is not the normal way to consume lexed text. For that, use
1095L</lex_read_to>.
1096
1097=cut
1098*/
1099
1100void
1101Perl_lex_unstuff(pTHX_ char *ptr)
1102{
1103 char *buf, *bufend;
1104 STRLEN unstuff_len;
1105 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106 buf = PL_parser->bufptr;
1107 if (ptr < buf)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109 if (ptr == buf)
1110 return;
1111 bufend = PL_parser->bufend;
1112 if (ptr > bufend)
1113 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114 unstuff_len = ptr - buf;
1115 Move(ptr, buf, bufend+1-ptr, char);
1116 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117 PL_parser->bufend = bufend - unstuff_len;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_read_to|char *ptr
1122
1123Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125performing the correct bookkeeping whenever a newline character is passed.
1126This is the normal way to consume lexed text.
1127
1128Interpretation of the buffer's octets can be abstracted out by
1129using the slightly higher-level functions L</lex_peek_unichar> and
1130L</lex_read_unichar>.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_read_to(pTHX_ char *ptr)
1137{
1138 char *s;
1139 PERL_ARGS_ASSERT_LEX_READ_TO;
1140 s = PL_parser->bufptr;
1141 if (ptr < s || ptr > PL_parser->bufend)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143 for (; s != ptr; s++)
1144 if (*s == '\n') {
1145 CopLINE_inc(PL_curcop);
1146 PL_parser->linestart = s+1;
1147 }
1148 PL_parser->bufptr = ptr;
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155up to I<ptr>. The remaining content of the buffer will be moved, and
1156all pointers into the buffer updated appropriately. I<ptr> must not
1157be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158it is not permitted to discard text that has yet to be lexed.
1159
1160Normally it is not necessarily to do this directly, because it suffices to
1161use the implicit discarding behaviour of L</lex_next_chunk> and things
1162based on it. However, if a token stretches across multiple lines,
1163and the lexing code has kept multiple lines of text in the buffer for
1164that purpose, then after completion of the token it would be wise to
1165explicitly discard the now-unneeded earlier lines, to avoid future
1166multi-line tokens growing the buffer without bound.
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_discard_to(pTHX_ char *ptr)
1173{
1174 char *buf;
1175 STRLEN discard_len;
1176 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177 buf = SvPVX(PL_parser->linestr);
1178 if (ptr < buf)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 if (ptr == buf)
1181 return;
1182 if (ptr > PL_parser->bufptr)
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184 discard_len = ptr - buf;
1185 if (PL_parser->oldbufptr < ptr)
1186 PL_parser->oldbufptr = ptr;
1187 if (PL_parser->oldoldbufptr < ptr)
1188 PL_parser->oldoldbufptr = ptr;
1189 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190 PL_parser->last_uni = NULL;
1191 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192 PL_parser->last_lop = NULL;
1193 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195 PL_parser->bufend -= discard_len;
1196 PL_parser->bufptr -= discard_len;
1197 PL_parser->oldbufptr -= discard_len;
1198 PL_parser->oldoldbufptr -= discard_len;
1199 if (PL_parser->last_uni)
1200 PL_parser->last_uni -= discard_len;
1201 if (PL_parser->last_lop)
1202 PL_parser->last_lop -= discard_len;
1203}
1204
1205/*
1206=for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208Reads in the next chunk of text to be lexed, appending it to
1209L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1210looked to the end of the current chunk and wants to know more. It is
1211usual, but not necessary, for lexing to have consumed the entirety of
1212the current chunk at this time.
1213
1214If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215chunk (i.e., the current chunk has been entirely consumed), normally the
1216current chunk will be discarded at the same time that the new chunk is
1217read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218will not be discarded. If the current chunk has not been entirely
1219consumed, then it will not be discarded regardless of the flag.
1220
1221Returns true if some new text was added to the buffer, or false if the
1222buffer has reached the end of the input text.
1223
1224=cut
1225*/
1226
1227#define LEX_FAKE_EOF 0x80000000
1228
1229bool
1230Perl_lex_next_chunk(pTHX_ U32 flags)
1231{
1232 SV *linestr;
1233 char *buf;
1234 STRLEN old_bufend_pos, new_bufend_pos;
1235 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237 bool got_some_for_debugger = 0;
1238 bool got_some;
1239 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241 linestr = PL_parser->linestr;
1242 buf = SvPVX(linestr);
1243 if (!(flags & LEX_KEEP_PREVIOUS) &&
1244 PL_parser->bufptr == PL_parser->bufend) {
1245 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 linestart_pos = 0;
1247 if (PL_parser->last_uni != PL_parser->bufend)
1248 PL_parser->last_uni = NULL;
1249 if (PL_parser->last_lop != PL_parser->bufend)
1250 PL_parser->last_lop = NULL;
1251 last_uni_pos = last_lop_pos = 0;
1252 *buf = 0;
1253 SvCUR(linestr) = 0;
1254 } else {
1255 old_bufend_pos = PL_parser->bufend - buf;
1256 bufptr_pos = PL_parser->bufptr - buf;
1257 oldbufptr_pos = PL_parser->oldbufptr - buf;
1258 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259 linestart_pos = PL_parser->linestart - buf;
1260 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 }
1263 if (flags & LEX_FAKE_EOF) {
1264 goto eof;
1265 } else if (!PL_parser->rsfp) {
1266 got_some = 0;
1267 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some = 1;
1269 got_some_for_debugger = 1;
1270 } else {
1271 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1272 sv_setpvs(linestr, "");
1273 eof:
1274 /* End of real input. Close filehandle (unless it was STDIN),
1275 * then add implicit termination.
1276 */
1277 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278 PerlIO_clearerr(PL_parser->rsfp);
1279 else if (PL_parser->rsfp)
1280 (void)PerlIO_close(PL_parser->rsfp);
1281 PL_parser->rsfp = NULL;
1282 PL_parser->in_pod = 0;
1283#ifdef PERL_MAD
1284 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285 PL_faketokens = 1;
1286#endif
1287 if (!PL_in_eval && PL_minus_p) {
1288 sv_catpvs(linestr,
1289 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290 PL_minus_n = PL_minus_p = 0;
1291 } else if (!PL_in_eval && PL_minus_n) {
1292 sv_catpvs(linestr, /*{*/";}");
1293 PL_minus_n = 0;
1294 } else
1295 sv_catpvs(linestr, ";");
1296 got_some = 1;
1297 }
1298 buf = SvPVX(linestr);
1299 new_bufend_pos = SvCUR(linestr);
1300 PL_parser->bufend = buf + new_bufend_pos;
1301 PL_parser->bufptr = buf + bufptr_pos;
1302 PL_parser->oldbufptr = buf + oldbufptr_pos;
1303 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304 PL_parser->linestart = buf + linestart_pos;
1305 if (PL_parser->last_uni)
1306 PL_parser->last_uni = buf + last_uni_pos;
1307 if (PL_parser->last_lop)
1308 PL_parser->last_lop = buf + last_lop_pos;
1309 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310 PL_curstash != PL_debstash) {
1311 /* debugger active and we're not compiling the debugger code,
1312 * so store the line into the debugger's array of lines
1313 */
1314 update_debugger_info(NULL, buf+old_bufend_pos,
1315 new_bufend_pos-old_bufend_pos);
1316 }
1317 return got_some;
1318}
1319
1320/*
1321=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323Looks ahead one (Unicode) character in the text currently being lexed.
1324Returns the codepoint (unsigned integer value) of the next character,
1325or -1 if lexing has reached the end of the input text. To consume the
1326peeked character, use L</lex_read_unichar>.
1327
1328If the next character is in (or extends into) the next chunk of input
1329text, the next chunk will be read in. Normally the current chunk will be
1330discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331then the current chunk will not be discarded.
1332
1333If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334is encountered, an exception is generated.
1335
1336=cut
1337*/
1338
1339I32
1340Perl_lex_peek_unichar(pTHX_ U32 flags)
1341{
1342 dVAR;
1343 char *s, *bufend;
1344 if (flags & ~(LEX_KEEP_PREVIOUS))
1345 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346 s = PL_parser->bufptr;
1347 bufend = PL_parser->bufend;
1348 if (UTF) {
1349 U8 head;
1350 I32 unichar;
1351 STRLEN len, retlen;
1352 if (s == bufend) {
1353 if (!lex_next_chunk(flags))
1354 return -1;
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 }
1358 head = (U8)*s;
1359 if (!(head & 0x80))
1360 return head;
1361 if (head & 0x40) {
1362 len = PL_utf8skip[head];
1363 while ((STRLEN)(bufend-s) < len) {
1364 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 break;
1366 s = PL_parser->bufptr;
1367 bufend = PL_parser->bufend;
1368 }
1369 }
1370 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371 if (retlen == (STRLEN)-1) {
1372 /* malformed UTF-8 */
1373 ENTER;
1374 SAVESPTR(PL_warnhook);
1375 PL_warnhook = PERL_WARNHOOK_FATAL;
1376 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377 LEAVE;
1378 }
1379 return unichar;
1380 } else {
1381 if (s == bufend) {
1382 if (!lex_next_chunk(flags))
1383 return -1;
1384 s = PL_parser->bufptr;
1385 }
1386 return (U8)*s;
1387 }
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393Reads the next (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the character read,
1395and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396if lexing has reached the end of the input text. To non-destructively
1397examine the next character, use L</lex_peek_unichar> instead.
1398
1399If the next character is in (or extends into) the next chunk of input
1400text, the next chunk will be read in. Normally the current chunk will be
1401discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402then the current chunk will not be discarded.
1403
1404If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405is encountered, an exception is generated.
1406
1407=cut
1408*/
1409
1410I32
1411Perl_lex_read_unichar(pTHX_ U32 flags)
1412{
1413 I32 c;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416 c = lex_peek_unichar(flags);
1417 if (c != -1) {
1418 if (c == '\n')
1419 CopLINE_inc(PL_curcop);
1420 if (UTF)
1421 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422 else
1423 ++(PL_parser->bufptr);
1424 }
1425 return c;
1426}
1427
1428/*
1429=for apidoc Amx|void|lex_read_space|U32 flags
1430
1431Reads optional spaces, in Perl style, in the text currently being
1432lexed. The spaces may include ordinary whitespace characters and
1433Perl-style comments. C<#line> directives are processed if encountered.
1434L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435at a non-space character (or the end of the input text).
1436
1437If spaces extend into the next chunk of input text, the next chunk will
1438be read in. Normally the current chunk will be discarded at the same
1439time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440chunk will not be discarded.
1441
1442=cut
1443*/
1444
1445#define LEX_NO_NEXT_CHUNK 0x80000000
1446
1447void
1448Perl_lex_read_space(pTHX_ U32 flags)
1449{
1450 char *s, *bufend;
1451 bool need_incline = 0;
1452 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1453 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454#ifdef PERL_MAD
1455 if (PL_skipwhite) {
1456 sv_free(PL_skipwhite);
1457 PL_skipwhite = NULL;
1458 }
1459 if (PL_madskills)
1460 PL_skipwhite = newSVpvs("");
1461#endif /* PERL_MAD */
1462 s = PL_parser->bufptr;
1463 bufend = PL_parser->bufend;
1464 while (1) {
1465 char c = *s;
1466 if (c == '#') {
1467 do {
1468 c = *++s;
1469 } while (!(c == '\n' || (c == 0 && s == bufend)));
1470 } else if (c == '\n') {
1471 s++;
1472 PL_parser->linestart = s;
1473 if (s == bufend)
1474 need_incline = 1;
1475 else
1476 incline(s);
1477 } else if (isSPACE(c)) {
1478 s++;
1479 } else if (c == 0 && s == bufend) {
1480 bool got_more;
1481#ifdef PERL_MAD
1482 if (PL_madskills)
1483 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484#endif /* PERL_MAD */
1485 if (flags & LEX_NO_NEXT_CHUNK)
1486 break;
1487 PL_parser->bufptr = s;
1488 CopLINE_inc(PL_curcop);
1489 got_more = lex_next_chunk(flags);
1490 CopLINE_dec(PL_curcop);
1491 s = PL_parser->bufptr;
1492 bufend = PL_parser->bufend;
1493 if (!got_more)
1494 break;
1495 if (need_incline && PL_parser->rsfp) {
1496 incline(s);
1497 need_incline = 0;
1498 }
1499 } else {
1500 break;
1501 }
1502 }
1503#ifdef PERL_MAD
1504 if (PL_madskills)
1505 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506#endif /* PERL_MAD */
1507 PL_parser->bufptr = s;
1508}
1509
1510/*
1511 * S_incline
1512 * This subroutine has nothing to do with tilting, whether at windmills
1513 * or pinball tables. Its name is short for "increment line". It
1514 * increments the current line number in CopLINE(PL_curcop) and checks
1515 * to see whether the line starts with a comment of the form
1516 * # line 500 "foo.pm"
1517 * If so, it sets the current line number and file to the values in the comment.
1518 */
1519
1520STATIC void
1521S_incline(pTHX_ const char *s)
1522{
1523 dVAR;
1524 const char *t;
1525 const char *n;
1526 const char *e;
1527 line_t line_num;
1528
1529 PERL_ARGS_ASSERT_INCLINE;
1530
1531 CopLINE_inc(PL_curcop);
1532 if (*s++ != '#')
1533 return;
1534 while (SPACE_OR_TAB(*s))
1535 s++;
1536 if (strnEQ(s, "line", 4))
1537 s += 4;
1538 else
1539 return;
1540 if (SPACE_OR_TAB(*s))
1541 s++;
1542 else
1543 return;
1544 while (SPACE_OR_TAB(*s))
1545 s++;
1546 if (!isDIGIT(*s))
1547 return;
1548
1549 n = s;
1550 while (isDIGIT(*s))
1551 s++;
1552 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1553 return;
1554 while (SPACE_OR_TAB(*s))
1555 s++;
1556 if (*s == '"' && (t = strchr(s+1, '"'))) {
1557 s++;
1558 e = t + 1;
1559 }
1560 else {
1561 t = s;
1562 while (!isSPACE(*t))
1563 t++;
1564 e = t;
1565 }
1566 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1567 e++;
1568 if (*e != '\n' && *e != '\0')
1569 return; /* false alarm */
1570
1571 line_num = atoi(n)-1;
1572
1573 if (t - s > 0) {
1574 const STRLEN len = t - s;
1575 SV *const temp_sv = CopFILESV(PL_curcop);
1576 const char *cf;
1577 STRLEN tmplen;
1578
1579 if (temp_sv) {
1580 cf = SvPVX(temp_sv);
1581 tmplen = SvCUR(temp_sv);
1582 } else {
1583 cf = NULL;
1584 tmplen = 0;
1585 }
1586
1587 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1588 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589 * to *{"::_<newfilename"} */
1590 /* However, the long form of evals is only turned on by the
1591 debugger - usually they're "(eval %lu)" */
1592 char smallbuf[128];
1593 char *tmpbuf;
1594 GV **gvp;
1595 STRLEN tmplen2 = len;
1596 if (tmplen + 2 <= sizeof smallbuf)
1597 tmpbuf = smallbuf;
1598 else
1599 Newx(tmpbuf, tmplen + 2, char);
1600 tmpbuf[0] = '_';
1601 tmpbuf[1] = '<';
1602 memcpy(tmpbuf + 2, cf, tmplen);
1603 tmplen += 2;
1604 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605 if (gvp) {
1606 char *tmpbuf2;
1607 GV *gv2;
1608
1609 if (tmplen2 + 2 <= sizeof smallbuf)
1610 tmpbuf2 = smallbuf;
1611 else
1612 Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615 /* Either they malloc'd it, or we malloc'd it,
1616 so no prefix is present in ours. */
1617 tmpbuf2[0] = '_';
1618 tmpbuf2[1] = '<';
1619 }
1620
1621 memcpy(tmpbuf2 + 2, s, tmplen2);
1622 tmplen2 += 2;
1623
1624 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1625 if (!isGV(gv2)) {
1626 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1627 /* adjust ${"::_<newfilename"} to store the new file name */
1628 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1629 /* The line number may differ. If that is the case,
1630 alias the saved lines that are in the array.
1631 Otherwise alias the whole array. */
1632 if (CopLINE(PL_curcop) == line_num) {
1633 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635 }
1636 else if (GvAV(*gvp)) {
1637 AV * const av = GvAV(*gvp);
1638 const I32 start = CopLINE(PL_curcop)+1;
1639 I32 items = AvFILLp(av) - start;
1640 if (items > 0) {
1641 AV * const av2 = GvAVn(gv2);
1642 SV **svp = AvARRAY(av) + start;
1643 I32 l = (I32)line_num+1;
1644 while (items--)
1645 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646 }
1647 }
1648 }
1649
1650 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1651 }
1652 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1653 }
1654 CopFILE_free(PL_curcop);
1655 CopFILE_setn(PL_curcop, s, len);
1656 }
1657 CopLINE_set(PL_curcop, line_num);
1658}
1659
1660#ifdef PERL_MAD
1661/* skip space before PL_thistoken */
1662
1663STATIC char *
1664S_skipspace0(pTHX_ register char *s)
1665{
1666 PERL_ARGS_ASSERT_SKIPSPACE0;
1667
1668 s = skipspace(s);
1669 if (!PL_madskills)
1670 return s;
1671 if (PL_skipwhite) {
1672 if (!PL_thiswhite)
1673 PL_thiswhite = newSVpvs("");
1674 sv_catsv(PL_thiswhite, PL_skipwhite);
1675 sv_free(PL_skipwhite);
1676 PL_skipwhite = 0;
1677 }
1678 PL_realtokenstart = s - SvPVX(PL_linestr);
1679 return s;
1680}
1681
1682/* skip space after PL_thistoken */
1683
1684STATIC char *
1685S_skipspace1(pTHX_ register char *s)
1686{
1687 const char *start = s;
1688 I32 startoff = start - SvPVX(PL_linestr);
1689
1690 PERL_ARGS_ASSERT_SKIPSPACE1;
1691
1692 s = skipspace(s);
1693 if (!PL_madskills)
1694 return s;
1695 start = SvPVX(PL_linestr) + startoff;
1696 if (!PL_thistoken && PL_realtokenstart >= 0) {
1697 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 }
1700 PL_realtokenstart = -1;
1701 if (PL_skipwhite) {
1702 if (!PL_nextwhite)
1703 PL_nextwhite = newSVpvs("");
1704 sv_catsv(PL_nextwhite, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1706 PL_skipwhite = 0;
1707 }
1708 return s;
1709}
1710
1711STATIC char *
1712S_skipspace2(pTHX_ register char *s, SV **svp)
1713{
1714 char *start;
1715 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716 const I32 startoff = s - SvPVX(PL_linestr);
1717
1718 PERL_ARGS_ASSERT_SKIPSPACE2;
1719
1720 s = skipspace(s);
1721 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722 if (!PL_madskills || !svp)
1723 return s;
1724 start = SvPVX(PL_linestr) + startoff;
1725 if (!PL_thistoken && PL_realtokenstart >= 0) {
1726 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1727 PL_thistoken = newSVpvn(tstart, start - tstart);
1728 PL_realtokenstart = -1;
1729 }
1730 if (PL_skipwhite) {
1731 if (!*svp)
1732 *svp = newSVpvs("");
1733 sv_setsv(*svp, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1735 PL_skipwhite = 0;
1736 }
1737
1738 return s;
1739}
1740#endif
1741
1742STATIC void
1743S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1744{
1745 AV *av = CopFILEAVx(PL_curcop);
1746 if (av) {
1747 SV * const sv = newSV_type(SVt_PVMG);
1748 if (orig_sv)
1749 sv_setsv(sv, orig_sv);
1750 else
1751 sv_setpvn(sv, buf, len);
1752 (void)SvIOK_on(sv);
1753 SvIV_set(sv, 0);
1754 av_store(av, (I32)CopLINE(PL_curcop), sv);
1755 }
1756}
1757
1758/*
1759 * S_skipspace
1760 * Called to gobble the appropriate amount and type of whitespace.
1761 * Skips comments as well.
1762 */
1763
1764STATIC char *
1765S_skipspace(pTHX_ register char *s)
1766{
1767#ifdef PERL_MAD
1768 char *start = s;
1769#endif /* PERL_MAD */
1770 PERL_ARGS_ASSERT_SKIPSPACE;
1771#ifdef PERL_MAD
1772 if (PL_skipwhite) {
1773 sv_free(PL_skipwhite);
1774 PL_skipwhite = NULL;
1775 }
1776#endif /* PERL_MAD */
1777 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1778 while (s < PL_bufend && SPACE_OR_TAB(*s))
1779 s++;
1780 } else {
1781 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782 PL_bufptr = s;
1783 lex_read_space(LEX_KEEP_PREVIOUS |
1784 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785 LEX_NO_NEXT_CHUNK : 0));
1786 s = PL_bufptr;
1787 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788 if (PL_linestart > PL_bufptr)
1789 PL_bufptr = PL_linestart;
1790 return s;
1791 }
1792#ifdef PERL_MAD
1793 if (PL_madskills)
1794 PL_skipwhite = newSVpvn(start, s-start);
1795#endif /* PERL_MAD */
1796 return s;
1797}
1798
1799/*
1800 * S_check_uni
1801 * Check the unary operators to ensure there's no ambiguity in how they're
1802 * used. An ambiguous piece of code would be:
1803 * rand + 5
1804 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1805 * the +5 is its argument.
1806 */
1807
1808STATIC void
1809S_check_uni(pTHX)
1810{
1811 dVAR;
1812 const char *s;
1813 const char *t;
1814
1815 if (PL_oldoldbufptr != PL_last_uni)
1816 return;
1817 while (isSPACE(*PL_last_uni))
1818 PL_last_uni++;
1819 s = PL_last_uni;
1820 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821 s++;
1822 if ((t = strchr(s, '(')) && t < PL_bufptr)
1823 return;
1824
1825 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827 (int)(s - PL_last_uni), PL_last_uni);
1828}
1829
1830/*
1831 * LOP : macro to build a list operator. Its behaviour has been replaced
1832 * with a subroutine, S_lop() for which LOP is just another name.
1833 */
1834
1835#define LOP(f,x) return lop(f,x,s)
1836
1837/*
1838 * S_lop
1839 * Build a list operator (or something that might be one). The rules:
1840 * - if we have a next token, then it's a list operator [why?]
1841 * - if the next thing is an opening paren, then it's a function
1842 * - else it's a list operator
1843 */
1844
1845STATIC I32
1846S_lop(pTHX_ I32 f, int x, char *s)
1847{
1848 dVAR;
1849
1850 PERL_ARGS_ASSERT_LOP;
1851
1852 pl_yylval.ival = f;
1853 CLINE;
1854 PL_expect = x;
1855 PL_bufptr = s;
1856 PL_last_lop = PL_oldbufptr;
1857 PL_last_lop_op = (OPCODE)f;
1858#ifdef PERL_MAD
1859 if (PL_lasttoke)
1860 goto lstop;
1861#else
1862 if (PL_nexttoke)
1863 goto lstop;
1864#endif
1865 if (*s == '(')
1866 return REPORT(FUNC);
1867 s = PEEKSPACE(s);
1868 if (*s == '(')
1869 return REPORT(FUNC);
1870 else {
1871 lstop:
1872 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1874 return REPORT(LSTOP);
1875 }
1876}
1877
1878#ifdef PERL_MAD
1879 /*
1880 * S_start_force
1881 * Sets up for an eventual force_next(). start_force(0) basically does
1882 * an unshift, while start_force(-1) does a push. yylex removes items
1883 * on the "pop" end.
1884 */
1885
1886STATIC void
1887S_start_force(pTHX_ int where)
1888{
1889 int i;
1890
1891 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1892 where = PL_lasttoke;
1893 assert(PL_curforce < 0 || PL_curforce == where);
1894 if (PL_curforce != where) {
1895 for (i = PL_lasttoke; i > where; --i) {
1896 PL_nexttoke[i] = PL_nexttoke[i-1];
1897 }
1898 PL_lasttoke++;
1899 }
1900 if (PL_curforce < 0) /* in case of duplicate start_force() */
1901 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1902 PL_curforce = where;
1903 if (PL_nextwhite) {
1904 if (PL_madskills)
1905 curmad('^', newSVpvs(""));
1906 CURMAD('_', PL_nextwhite);
1907 }
1908}
1909
1910STATIC void
1911S_curmad(pTHX_ char slot, SV *sv)
1912{
1913 MADPROP **where;
1914
1915 if (!sv)
1916 return;
1917 if (PL_curforce < 0)
1918 where = &PL_thismad;
1919 else
1920 where = &PL_nexttoke[PL_curforce].next_mad;
1921
1922 if (PL_faketokens)
1923 sv_setpvs(sv, "");
1924 else {
1925 if (!IN_BYTES) {
1926 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927 SvUTF8_on(sv);
1928 else if (PL_encoding) {
1929 sv_recode_to_utf8(sv, PL_encoding);
1930 }
1931 }
1932 }
1933
1934 /* keep a slot open for the head of the list? */
1935 if (slot != '_' && *where && (*where)->mad_key == '^') {
1936 (*where)->mad_key = slot;
1937 sv_free(MUTABLE_SV(((*where)->mad_val)));
1938 (*where)->mad_val = (void*)sv;
1939 }
1940 else
1941 addmad(newMADsv(slot, sv), where, 0);
1942}
1943#else
1944# define start_force(where) NOOP
1945# define curmad(slot, sv) NOOP
1946#endif
1947
1948/*
1949 * S_force_next
1950 * When the lexer realizes it knows the next token (for instance,
1951 * it is reordering tokens for the parser) then it can call S_force_next
1952 * to know what token to return the next time the lexer is called. Caller
1953 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954 * and possibly PL_expect to ensure the lexer handles the token correctly.
1955 */
1956
1957STATIC void
1958S_force_next(pTHX_ I32 type)
1959{
1960 dVAR;
1961#ifdef DEBUGGING
1962 if (DEBUG_T_TEST) {
1963 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1964 tokereport(type, &NEXTVAL_NEXTTOKE);
1965 }
1966#endif
1967#ifdef PERL_MAD
1968 if (PL_curforce < 0)
1969 start_force(PL_lasttoke);
1970 PL_nexttoke[PL_curforce].next_type = type;
1971 if (PL_lex_state != LEX_KNOWNEXT)
1972 PL_lex_defer = PL_lex_state;
1973 PL_lex_state = LEX_KNOWNEXT;
1974 PL_lex_expect = PL_expect;
1975 PL_curforce = -1;
1976#else
1977 PL_nexttype[PL_nexttoke] = type;
1978 PL_nexttoke++;
1979 if (PL_lex_state != LEX_KNOWNEXT) {
1980 PL_lex_defer = PL_lex_state;
1981 PL_lex_expect = PL_expect;
1982 PL_lex_state = LEX_KNOWNEXT;
1983 }
1984#endif
1985}
1986
1987void
1988Perl_yyunlex(pTHX)
1989{
1990 int yyc = PL_parser->yychar;
1991 if (yyc != YYEMPTY) {
1992 if (yyc) {
1993 start_force(-1);
1994 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1996 PL_lex_allbrackets--;
1997 PL_lex_brackets--;
1998 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999 } else if (yyc == '('/*)*/) {
2000 PL_lex_allbrackets--;
2001 yyc |= (2<<24);
2002 }
2003 force_next(yyc);
2004 }
2005 PL_parser->yychar = YYEMPTY;
2006 }
2007}
2008
2009STATIC SV *
2010S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2011{
2012 dVAR;
2013 SV * const sv = newSVpvn_utf8(start, len,
2014 !IN_BYTES
2015 && UTF
2016 && !is_ascii_string((const U8*)start, len)
2017 && is_utf8_string((const U8*)start, len));
2018 return sv;
2019}
2020
2021/*
2022 * S_force_word
2023 * When the lexer knows the next thing is a word (for instance, it has
2024 * just seen -> and it knows that the next char is a word char, then
2025 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026 * lookahead.
2027 *
2028 * Arguments:
2029 * char *start : buffer position (must be within PL_linestr)
2030 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2031 * int check_keyword : if true, Perl checks to make sure the word isn't
2032 * a keyword (do this if the word is a label, e.g. goto FOO)
2033 * int allow_pack : if true, : characters will also be allowed (require,
2034 * use, etc. do this)
2035 * int allow_initial_tick : used by the "sub" lexer only.
2036 */
2037
2038STATIC char *
2039S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2040{
2041 dVAR;
2042 register char *s;
2043 STRLEN len;
2044
2045 PERL_ARGS_ASSERT_FORCE_WORD;
2046
2047 start = SKIPSPACE1(start);
2048 s = start;
2049 if (isIDFIRST_lazy_if(s,UTF) ||
2050 (allow_pack && *s == ':') ||
2051 (allow_initial_tick && *s == '\'') )
2052 {
2053 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2054 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2055 return start;
2056 start_force(PL_curforce);
2057 if (PL_madskills)
2058 curmad('X', newSVpvn(start,s-start));
2059 if (token == METHOD) {
2060 s = SKIPSPACE1(s);
2061 if (*s == '(')
2062 PL_expect = XTERM;
2063 else {
2064 PL_expect = XOPERATOR;
2065 }
2066 }
2067 if (PL_madskills)
2068 curmad('g', newSVpvs( "forced" ));
2069 NEXTVAL_NEXTTOKE.opval
2070 = (OP*)newSVOP(OP_CONST,0,
2071 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2072 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2073 force_next(token);
2074 }
2075 return s;
2076}
2077
2078/*
2079 * S_force_ident
2080 * Called when the lexer wants $foo *foo &foo etc, but the program
2081 * text only contains the "foo" portion. The first argument is a pointer
2082 * to the "foo", and the second argument is the type symbol to prefix.
2083 * Forces the next token to be a "WORD".
2084 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2085 */
2086
2087STATIC void
2088S_force_ident(pTHX_ register const char *s, int kind)
2089{
2090 dVAR;
2091
2092 PERL_ARGS_ASSERT_FORCE_IDENT;
2093
2094 if (*s) {
2095 const STRLEN len = strlen(s);
2096 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2097 start_force(PL_curforce);
2098 NEXTVAL_NEXTTOKE.opval = o;
2099 force_next(WORD);
2100 if (kind) {
2101 o->op_private = OPpCONST_ENTERED;
2102 /* XXX see note in pp_entereval() for why we forgo typo
2103 warnings if the symbol must be introduced in an eval.
2104 GSAR 96-10-12 */
2105 gv_fetchpvn_flags(s, len,
2106 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2107 : GV_ADD,
2108 kind == '$' ? SVt_PV :
2109 kind == '@' ? SVt_PVAV :
2110 kind == '%' ? SVt_PVHV :
2111 SVt_PVGV
2112 );
2113 }
2114 }
2115}
2116
2117NV
2118Perl_str_to_version(pTHX_ SV *sv)
2119{
2120 NV retval = 0.0;
2121 NV nshift = 1.0;
2122 STRLEN len;
2123 const char *start = SvPV_const(sv,len);
2124 const char * const end = start + len;
2125 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2126
2127 PERL_ARGS_ASSERT_STR_TO_VERSION;
2128
2129 while (start < end) {
2130 STRLEN skip;
2131 UV n;
2132 if (utf)
2133 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2134 else {
2135 n = *(U8*)start;
2136 skip = 1;
2137 }
2138 retval += ((NV)n)/nshift;
2139 start += skip;
2140 nshift *= 1000;
2141 }
2142 return retval;
2143}
2144
2145/*
2146 * S_force_version
2147 * Forces the next token to be a version number.
2148 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2149 * and if "guessing" is TRUE, then no new token is created (and the caller
2150 * must use an alternative parsing method).
2151 */
2152
2153STATIC char *
2154S_force_version(pTHX_ char *s, int guessing)
2155{
2156 dVAR;
2157 OP *version = NULL;
2158 char *d;
2159#ifdef PERL_MAD
2160 I32 startoff = s - SvPVX(PL_linestr);
2161#endif
2162
2163 PERL_ARGS_ASSERT_FORCE_VERSION;
2164
2165 s = SKIPSPACE1(s);
2166
2167 d = s;
2168 if (*d == 'v')
2169 d++;
2170 if (isDIGIT(*d)) {
2171 while (isDIGIT(*d) || *d == '_' || *d == '.')
2172 d++;
2173#ifdef PERL_MAD
2174 if (PL_madskills) {
2175 start_force(PL_curforce);
2176 curmad('X', newSVpvn(s,d-s));
2177 }
2178#endif
2179 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2180 SV *ver;
2181#ifdef USE_LOCALE_NUMERIC
2182 char *loc = setlocale(LC_NUMERIC, "C");
2183#endif
2184 s = scan_num(s, &pl_yylval);
2185#ifdef USE_LOCALE_NUMERIC
2186 setlocale(LC_NUMERIC, loc);
2187#endif
2188 version = pl_yylval.opval;
2189 ver = cSVOPx(version)->op_sv;
2190 if (SvPOK(ver) && !SvNIOK(ver)) {
2191 SvUPGRADE(ver, SVt_PVNV);
2192 SvNV_set(ver, str_to_version(ver));
2193 SvNOK_on(ver); /* hint that it is a version */
2194 }
2195 }
2196 else if (guessing) {
2197#ifdef PERL_MAD
2198 if (PL_madskills) {
2199 sv_free(PL_nextwhite); /* let next token collect whitespace */
2200 PL_nextwhite = 0;
2201 s = SvPVX(PL_linestr) + startoff;
2202 }
2203#endif
2204 return s;
2205 }
2206 }
2207
2208#ifdef PERL_MAD
2209 if (PL_madskills && !version) {
2210 sv_free(PL_nextwhite); /* let next token collect whitespace */
2211 PL_nextwhite = 0;
2212 s = SvPVX(PL_linestr) + startoff;
2213 }
2214#endif
2215 /* NOTE: The parser sees the package name and the VERSION swapped */
2216 start_force(PL_curforce);
2217 NEXTVAL_NEXTTOKE.opval = version;
2218 force_next(WORD);
2219
2220 return s;
2221}
2222
2223/*
2224 * S_force_strict_version
2225 * Forces the next token to be a version number using strict syntax rules.
2226 */
2227
2228STATIC char *
2229S_force_strict_version(pTHX_ char *s)
2230{
2231 dVAR;
2232 OP *version = NULL;
2233#ifdef PERL_MAD
2234 I32 startoff = s - SvPVX(PL_linestr);
2235#endif
2236 const char *errstr = NULL;
2237
2238 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2239
2240 while (isSPACE(*s)) /* leading whitespace */
2241 s++;
2242
2243 if (is_STRICT_VERSION(s,&errstr)) {
2244 SV *ver = newSV(0);
2245 s = (char *)scan_version(s, ver, 0);
2246 version = newSVOP(OP_CONST, 0, ver);
2247 }
2248 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2249 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2250 {
2251 PL_bufptr = s;
2252 if (errstr)
2253 yyerror(errstr); /* version required */
2254 return s;
2255 }
2256
2257#ifdef PERL_MAD
2258 if (PL_madskills && !version) {
2259 sv_free(PL_nextwhite); /* let next token collect whitespace */
2260 PL_nextwhite = 0;
2261 s = SvPVX(PL_linestr) + startoff;
2262 }
2263#endif
2264 /* NOTE: The parser sees the package name and the VERSION swapped */
2265 start_force(PL_curforce);
2266 NEXTVAL_NEXTTOKE.opval = version;
2267 force_next(WORD);
2268
2269 return s;
2270}
2271
2272/*
2273 * S_tokeq
2274 * Tokenize a quoted string passed in as an SV. It finds the next
2275 * chunk, up to end of string or a backslash. It may make a new
2276 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2277 * turns \\ into \.
2278 */
2279
2280STATIC SV *
2281S_tokeq(pTHX_ SV *sv)
2282{
2283 dVAR;
2284 register char *s;
2285 register char *send;
2286 register char *d;
2287 STRLEN len = 0;
2288 SV *pv = sv;
2289
2290 PERL_ARGS_ASSERT_TOKEQ;
2291
2292 if (!SvLEN(sv))
2293 goto finish;
2294
2295 s = SvPV_force(sv, len);
2296 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2297 goto finish;
2298 send = s + len;
2299 /* This is relying on the SV being "well formed" with a trailing '\0' */
2300 while (s < send && !(*s == '\\' && s[1] == '\\'))
2301 s++;
2302 if (s == send)
2303 goto finish;
2304 d = s;
2305 if ( PL_hints & HINT_NEW_STRING ) {
2306 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2307 }
2308 while (s < send) {
2309 if (*s == '\\') {
2310 if (s + 1 < send && (s[1] == '\\'))
2311 s++; /* all that, just for this */
2312 }
2313 *d++ = *s++;
2314 }
2315 *d = '\0';
2316 SvCUR_set(sv, d - SvPVX_const(sv));
2317 finish:
2318 if ( PL_hints & HINT_NEW_STRING )
2319 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2320 return sv;
2321}
2322
2323/*
2324 * Now come three functions related to double-quote context,
2325 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2326 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2327 * interact with PL_lex_state, and create fake ( ... ) argument lists
2328 * to handle functions and concatenation.
2329 * They assume that whoever calls them will be setting up a fake
2330 * join call, because each subthing puts a ',' after it. This lets
2331 * "lower \luPpEr"
2332 * become
2333 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2334 *
2335 * (I'm not sure whether the spurious commas at the end of lcfirst's
2336 * arguments and join's arguments are created or not).
2337 */
2338
2339/*
2340 * S_sublex_start
2341 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2342 *
2343 * Pattern matching will set PL_lex_op to the pattern-matching op to
2344 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2345 *
2346 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2347 *
2348 * Everything else becomes a FUNC.
2349 *
2350 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2351 * had an OP_CONST or OP_READLINE). This just sets us up for a
2352 * call to S_sublex_push().
2353 */
2354
2355STATIC I32
2356S_sublex_start(pTHX)
2357{
2358 dVAR;
2359 register const I32 op_type = pl_yylval.ival;
2360
2361 if (op_type == OP_NULL) {
2362 pl_yylval.opval = PL_lex_op;
2363 PL_lex_op = NULL;
2364 return THING;
2365 }
2366 if (op_type == OP_CONST || op_type == OP_READLINE) {
2367 SV *sv = tokeq(PL_lex_stuff);
2368
2369 if (SvTYPE(sv) == SVt_PVIV) {
2370 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2371 STRLEN len;
2372 const char * const p = SvPV_const(sv, len);
2373 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2374 SvREFCNT_dec(sv);
2375 sv = nsv;
2376 }
2377 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2378 PL_lex_stuff = NULL;
2379 /* Allow <FH> // "foo" */
2380 if (op_type == OP_READLINE)
2381 PL_expect = XTERMORDORDOR;
2382 return THING;
2383 }
2384 else if (op_type == OP_BACKTICK && PL_lex_op) {
2385 /* readpipe() vas overriden */
2386 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2387 pl_yylval.opval = PL_lex_op;
2388 PL_lex_op = NULL;
2389 PL_lex_stuff = NULL;
2390 return THING;
2391 }
2392
2393 PL_sublex_info.super_state = PL_lex_state;
2394 PL_sublex_info.sub_inwhat = (U16)op_type;
2395 PL_sublex_info.sub_op = PL_lex_op;
2396 PL_lex_state = LEX_INTERPPUSH;
2397
2398 PL_expect = XTERM;
2399 if (PL_lex_op) {
2400 pl_yylval.opval = PL_lex_op;
2401 PL_lex_op = NULL;
2402 return PMFUNC;
2403 }
2404 else
2405 return FUNC;
2406}
2407
2408/*
2409 * S_sublex_push
2410 * Create a new scope to save the lexing state. The scope will be
2411 * ended in S_sublex_done. Returns a '(', starting the function arguments
2412 * to the uc, lc, etc. found before.
2413 * Sets PL_lex_state to LEX_INTERPCONCAT.
2414 */
2415
2416STATIC I32
2417S_sublex_push(pTHX)
2418{
2419 dVAR;
2420 ENTER;
2421
2422 PL_lex_state = PL_sublex_info.super_state;
2423 SAVEBOOL(PL_lex_dojoin);
2424 SAVEI32(PL_lex_brackets);
2425 SAVEI32(PL_lex_allbrackets);
2426 SAVEI8(PL_lex_fakeeof);
2427 SAVEI32(PL_lex_casemods);
2428 SAVEI32(PL_lex_starts);
2429 SAVEI8(PL_lex_state);
2430 SAVEVPTR(PL_lex_inpat);
2431 SAVEI16(PL_lex_inwhat);
2432 SAVECOPLINE(PL_curcop);
2433 SAVEPPTR(PL_bufptr);
2434 SAVEPPTR(PL_bufend);
2435 SAVEPPTR(PL_oldbufptr);
2436 SAVEPPTR(PL_oldoldbufptr);
2437 SAVEPPTR(PL_last_lop);
2438 SAVEPPTR(PL_last_uni);
2439 SAVEPPTR(PL_linestart);
2440 SAVESPTR(PL_linestr);
2441 SAVEGENERICPV(PL_lex_brackstack);
2442 SAVEGENERICPV(PL_lex_casestack);
2443
2444 PL_linestr = PL_lex_stuff;
2445 PL_lex_stuff = NULL;
2446
2447 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2448 = SvPVX(PL_linestr);
2449 PL_bufend += SvCUR(PL_linestr);
2450 PL_last_lop = PL_last_uni = NULL;
2451 SAVEFREESV(PL_linestr);
2452
2453 PL_lex_dojoin = FALSE;
2454 PL_lex_brackets = 0;
2455 PL_lex_allbrackets = 0;
2456 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2457 Newx(PL_lex_brackstack, 120, char);
2458 Newx(PL_lex_casestack, 12, char);
2459 PL_lex_casemods = 0;
2460 *PL_lex_casestack = '\0';
2461 PL_lex_starts = 0;
2462 PL_lex_state = LEX_INTERPCONCAT;
2463 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2464
2465 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2466 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2467 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2468 PL_lex_inpat = PL_sublex_info.sub_op;
2469 else
2470 PL_lex_inpat = NULL;
2471
2472 return '(';
2473}
2474
2475/*
2476 * S_sublex_done
2477 * Restores lexer state after a S_sublex_push.
2478 */
2479
2480STATIC I32
2481S_sublex_done(pTHX)
2482{
2483 dVAR;
2484 if (!PL_lex_starts++) {
2485 SV * const sv = newSVpvs("");
2486 if (SvUTF8(PL_linestr))
2487 SvUTF8_on(sv);
2488 PL_expect = XOPERATOR;
2489 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2490 return THING;
2491 }
2492
2493 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2494 PL_lex_state = LEX_INTERPCASEMOD;
2495 return yylex();
2496 }
2497
2498 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2499 assert(PL_lex_inwhat != OP_TRANSR);
2500 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2501 PL_linestr = PL_lex_repl;
2502 PL_lex_inpat = 0;
2503 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2504 PL_bufend += SvCUR(PL_linestr);
2505 PL_last_lop = PL_last_uni = NULL;
2506 SAVEFREESV(PL_linestr);
2507 PL_lex_dojoin = FALSE;
2508 PL_lex_brackets = 0;
2509 PL_lex_allbrackets = 0;
2510 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2511 PL_lex_casemods = 0;
2512 *PL_lex_casestack = '\0';
2513 PL_lex_starts = 0;
2514 if (SvEVALED(PL_lex_repl)) {
2515 PL_lex_state = LEX_INTERPNORMAL;
2516 PL_lex_starts++;
2517 /* we don't clear PL_lex_repl here, so that we can check later
2518 whether this is an evalled subst; that means we rely on the
2519 logic to ensure sublex_done() is called again only via the
2520 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2521 }
2522 else {
2523 PL_lex_state = LEX_INTERPCONCAT;
2524 PL_lex_repl = NULL;
2525 }
2526 return ',';
2527 }
2528 else {
2529#ifdef PERL_MAD
2530 if (PL_madskills) {
2531 if (PL_thiswhite) {
2532 if (!PL_endwhite)
2533 PL_endwhite = newSVpvs("");
2534 sv_catsv(PL_endwhite, PL_thiswhite);
2535 PL_thiswhite = 0;
2536 }
2537 if (PL_thistoken)
2538 sv_setpvs(PL_thistoken,"");
2539 else
2540 PL_realtokenstart = -1;
2541 }
2542#endif
2543 LEAVE;
2544 PL_bufend = SvPVX(PL_linestr);
2545 PL_bufend += SvCUR(PL_linestr);
2546 PL_expect = XOPERATOR;
2547 PL_sublex_info.sub_inwhat = 0;
2548 return ')';
2549 }
2550}
2551
2552/*
2553 scan_const
2554
2555 Extracts a pattern, double-quoted string, or transliteration. This
2556 is terrifying code.
2557
2558 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2559 processing a pattern (PL_lex_inpat is true), a transliteration
2560 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2561
2562 Returns a pointer to the character scanned up to. If this is
2563 advanced from the start pointer supplied (i.e. if anything was
2564 successfully parsed), will leave an OP for the substring scanned
2565 in pl_yylval. Caller must intuit reason for not parsing further
2566 by looking at the next characters herself.
2567
2568 In patterns:
2569 backslashes:
2570 constants: \N{NAME} only
2571 case and quoting: \U \Q \E
2572 stops on @ and $, but not for $ as tail anchor
2573
2574 In transliterations:
2575 characters are VERY literal, except for - not at the start or end
2576 of the string, which indicates a range. If the range is in bytes,
2577 scan_const expands the range to the full set of intermediate
2578 characters. If the range is in utf8, the hyphen is replaced with
2579 a certain range mark which will be handled by pmtrans() in op.c.
2580
2581 In double-quoted strings:
2582 backslashes:
2583 double-quoted style: \r and \n
2584 constants: \x31, etc.
2585 deprecated backrefs: \1 (in substitution replacements)
2586 case and quoting: \U \Q \E
2587 stops on @ and $
2588
2589 scan_const does *not* construct ops to handle interpolated strings.
2590 It stops processing as soon as it finds an embedded $ or @ variable
2591 and leaves it to the caller to work out what's going on.
2592
2593 embedded arrays (whether in pattern or not) could be:
2594 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2595
2596 $ in double-quoted strings must be the symbol of an embedded scalar.
2597
2598 $ in pattern could be $foo or could be tail anchor. Assumption:
2599 it's a tail anchor if $ is the last thing in the string, or if it's
2600 followed by one of "()| \r\n\t"
2601
2602 \1 (backreferences) are turned into $1
2603
2604 The structure of the code is
2605 while (there's a character to process) {
2606 handle transliteration ranges
2607 skip regexp comments /(?#comment)/ and codes /(?{code})/
2608 skip #-initiated comments in //x patterns
2609 check for embedded arrays
2610 check for embedded scalars
2611 if (backslash) {
2612 deprecate \1 in substitution replacements
2613 handle string-changing backslashes \l \U \Q \E, etc.
2614 switch (what was escaped) {
2615 handle \- in a transliteration (becomes a literal -)
2616 if a pattern and not \N{, go treat as regular character
2617 handle \132 (octal characters)
2618 handle \x15 and \x{1234} (hex characters)
2619 handle \N{name} (named characters, also \N{3,5} in a pattern)
2620 handle \cV (control characters)
2621 handle printf-style backslashes (\f, \r, \n, etc)
2622 } (end switch)
2623 continue
2624 } (end if backslash)
2625 handle regular character
2626 } (end while character to read)
2627
2628*/
2629
2630STATIC char *
2631S_scan_const(pTHX_ char *start)
2632{
2633 dVAR;
2634 register char *send = PL_bufend; /* end of the constant */
2635 SV *sv = newSV(send - start); /* sv for the constant. See
2636 note below on sizing. */
2637 register char *s = start; /* start of the constant */
2638 register char *d = SvPVX(sv); /* destination for copies */
2639 bool dorange = FALSE; /* are we in a translit range? */
2640 bool didrange = FALSE; /* did we just finish a range? */
2641 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2642 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2643 to be UTF8? But, this can
2644 show as true when the source
2645 isn't utf8, as for example
2646 when it is entirely composed
2647 of hex constants */
2648
2649 /* Note on sizing: The scanned constant is placed into sv, which is
2650 * initialized by newSV() assuming one byte of output for every byte of
2651 * input. This routine expects newSV() to allocate an extra byte for a
2652 * trailing NUL, which this routine will append if it gets to the end of
2653 * the input. There may be more bytes of input than output (eg., \N{LATIN
2654 * CAPITAL LETTER A}), or more output than input if the constant ends up
2655 * recoded to utf8, but each time a construct is found that might increase
2656 * the needed size, SvGROW() is called. Its size parameter each time is
2657 * based on the best guess estimate at the time, namely the length used so
2658 * far, plus the length the current construct will occupy, plus room for
2659 * the trailing NUL, plus one byte for every input byte still unscanned */
2660
2661 UV uv;
2662#ifdef EBCDIC
2663 UV literal_endpoint = 0;
2664 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2665#endif
2666
2667 PERL_ARGS_ASSERT_SCAN_CONST;
2668
2669 assert(PL_lex_inwhat != OP_TRANSR);
2670 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2671 /* If we are doing a trans and we know we want UTF8 set expectation */
2672 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2673 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2674 }
2675
2676
2677 while (s < send || dorange) {
2678
2679 /* get transliterations out of the way (they're most literal) */
2680 if (PL_lex_inwhat == OP_TRANS) {
2681 /* expand a range A-Z to the full set of characters. AIE! */
2682 if (dorange) {
2683 I32 i; /* current expanded character */
2684 I32 min; /* first character in range */
2685 I32 max; /* last character in range */
2686
2687#ifdef EBCDIC
2688 UV uvmax = 0;
2689#endif
2690
2691 if (has_utf8
2692#ifdef EBCDIC
2693 && !native_range
2694#endif
2695 ) {
2696 char * const c = (char*)utf8_hop((U8*)d, -1);
2697 char *e = d++;
2698 while (e-- > c)
2699 *(e + 1) = *e;
2700 *c = (char)UTF_TO_NATIVE(0xff);
2701 /* mark the range as done, and continue */
2702 dorange = FALSE;
2703 didrange = TRUE;
2704 continue;
2705 }
2706
2707 i = d - SvPVX_const(sv); /* remember current offset */
2708#ifdef EBCDIC
2709 SvGROW(sv,
2710 SvLEN(sv) + (has_utf8 ?
2711 (512 - UTF_CONTINUATION_MARK +
2712 UNISKIP(0x100))
2713 : 256));
2714 /* How many two-byte within 0..255: 128 in UTF-8,
2715 * 96 in UTF-8-mod. */
2716#else
2717 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2718#endif
2719 d = SvPVX(sv) + i; /* refresh d after realloc */
2720#ifdef EBCDIC
2721 if (has_utf8) {
2722 int j;
2723 for (j = 0; j <= 1; j++) {
2724 char * const c = (char*)utf8_hop((U8*)d, -1);
2725 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2726 if (j)
2727 min = (U8)uv;
2728 else if (uv < 256)
2729 max = (U8)uv;
2730 else {
2731 max = (U8)0xff; /* only to \xff */
2732 uvmax = uv; /* \x{100} to uvmax */
2733 }
2734 d = c; /* eat endpoint chars */
2735 }
2736 }
2737 else {
2738#endif
2739 d -= 2; /* eat the first char and the - */
2740 min = (U8)*d; /* first char in range */
2741 max = (U8)d[1]; /* last char in range */
2742#ifdef EBCDIC
2743 }
2744#endif
2745
2746 if (min > max) {
2747 Perl_croak(aTHX_
2748 "Invalid range \"%c-%c\" in transliteration operator",
2749 (char)min, (char)max);
2750 }
2751
2752#ifdef EBCDIC
2753 if (literal_endpoint == 2 &&
2754 ((isLOWER(min) && isLOWER(max)) ||
2755 (isUPPER(min) && isUPPER(max)))) {
2756 if (isLOWER(min)) {
2757 for (i = min; i <= max; i++)
2758 if (isLOWER(i))
2759 *d++ = NATIVE_TO_NEED(has_utf8,i);
2760 } else {
2761 for (i = min; i <= max; i++)
2762 if (isUPPER(i))
2763 *d++ = NATIVE_TO_NEED(has_utf8,i);
2764 }
2765 }
2766 else
2767#endif
2768 for (i = min; i <= max; i++)
2769#ifdef EBCDIC
2770 if (has_utf8) {
2771 const U8 ch = (U8)NATIVE_TO_UTF(i);
2772 if (UNI_IS_INVARIANT(ch))
2773 *d++ = (U8)i;
2774 else {
2775 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2776 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2777 }
2778 }
2779 else
2780#endif
2781 *d++ = (char)i;
2782
2783#ifdef EBCDIC
2784 if (uvmax) {
2785 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2786 if (uvmax > 0x101)
2787 *d++ = (char)UTF_TO_NATIVE(0xff);
2788 if (uvmax > 0x100)
2789 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2790 }
2791#endif
2792
2793 /* mark the range as done, and continue */
2794 dorange = FALSE;
2795 didrange = TRUE;
2796#ifdef EBCDIC
2797 literal_endpoint = 0;
2798#endif
2799 continue;
2800 }
2801
2802 /* range begins (ignore - as first or last char) */
2803 else if (*s == '-' && s+1 < send && s != start) {
2804 if (didrange) {
2805 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2806 }
2807 if (has_utf8
2808#ifdef EBCDIC
2809 && !native_range
2810#endif
2811 ) {
2812 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2813 s++;
2814 continue;
2815 }
2816 dorange = TRUE;
2817 s++;
2818 }
2819 else {
2820 didrange = FALSE;
2821#ifdef EBCDIC
2822 literal_endpoint = 0;
2823 native_range = TRUE;
2824#endif
2825 }
2826 }
2827
2828 /* if we get here, we're not doing a transliteration */
2829
2830 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2831 except for the last char, which will be done separately. */
2832 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2833 if (s[2] == '#') {
2834 while (s+1 < send && *s != ')')
2835 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2836 }
2837 else if (s[2] == '{' /* This should match regcomp.c */
2838 || (s[2] == '?' && s[3] == '{'))
2839 {
2840 I32 count = 1;
2841 char *regparse = s + (s[2] == '{' ? 3 : 4);
2842 char c;
2843
2844 while (count && (c = *regparse)) {
2845 if (c == '\\' && regparse[1])
2846 regparse++;
2847 else if (c == '{')
2848 count++;
2849 else if (c == '}')
2850 count--;
2851 regparse++;
2852 }
2853 if (*regparse != ')')
2854 regparse--; /* Leave one char for continuation. */
2855 while (s < regparse)
2856 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2857 }
2858 }
2859
2860 /* likewise skip #-initiated comments in //x patterns */
2861 else if (*s == '#' && PL_lex_inpat &&
2862 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2863 while (s+1 < send && *s != '\n')
2864 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2865 }
2866
2867 /* check for embedded arrays
2868 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2869 */
2870 else if (*s == '@' && s[1]) {
2871 if (isALNUM_lazy_if(s+1,UTF))
2872 break;
2873 if (strchr(":'{$", s[1]))
2874 break;
2875 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2876 break; /* in regexp, neither @+ nor @- are interpolated */
2877 }
2878
2879 /* check for embedded scalars. only stop if we're sure it's a
2880 variable.
2881 */
2882 else if (*s == '$') {
2883 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2884 break;
2885 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2886 if (s[1] == '\\') {
2887 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2888 "Possible unintended interpolation of $\\ in regex");
2889 }
2890 break; /* in regexp, $ might be tail anchor */
2891 }
2892 }
2893
2894 /* End of else if chain - OP_TRANS rejoin rest */
2895
2896 /* backslashes */
2897 if (*s == '\\' && s+1 < send) {
2898 char* e; /* Can be used for ending '}', etc. */
2899
2900 s++;
2901
2902 /* warn on \1 - \9 in substitution replacements, but note that \11
2903 * is an octal; and \19 is \1 followed by '9' */
2904 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2905 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2906 {
2907 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2908 *--s = '$';
2909 break;
2910 }
2911
2912 /* string-change backslash escapes */
2913 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2914 --s;
2915 break;
2916 }
2917 /* In a pattern, process \N, but skip any other backslash escapes.
2918 * This is because we don't want to translate an escape sequence
2919 * into a meta symbol and have the regex compiler use the meta
2920 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2921 * in spite of this, we do have to process \N here while the proper
2922 * charnames handler is in scope. See bugs #56444 and #62056.
2923 * There is a complication because \N in a pattern may also stand
2924 * for 'match a non-nl', and not mean a charname, in which case its
2925 * processing should be deferred to the regex compiler. To be a
2926 * charname it must be followed immediately by a '{', and not look
2927 * like \N followed by a curly quantifier, i.e., not something like
2928 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2929 * quantifier */
2930 else if (PL_lex_inpat
2931 && (*s != 'N'
2932 || s[1] != '{'
2933 || regcurly(s + 1)))
2934 {
2935 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2936 goto default_action;
2937 }
2938
2939 switch (*s) {
2940
2941 /* quoted - in transliterations */
2942 case '-':
2943 if (PL_lex_inwhat == OP_TRANS) {
2944 *d++ = *s++;
2945 continue;
2946 }
2947 /* FALL THROUGH */
2948 default:
2949 {
2950 if ((isALPHA(*s) || isDIGIT(*s)))
2951 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2952 "Unrecognized escape \\%c passed through",
2953 *s);
2954 /* default action is to copy the quoted character */
2955 goto default_action;
2956 }
2957
2958 /* eg. \132 indicates the octal constant 0132 */
2959 case '0': case '1': case '2': case '3':
2960 case '4': case '5': case '6': case '7':
2961 {
2962 I32 flags = 0;
2963 STRLEN len = 3;
2964 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2965 s += len;
2966 }
2967 goto NUM_ESCAPE_INSERT;
2968
2969 /* eg. \o{24} indicates the octal constant \024 */
2970 case 'o':
2971 {
2972 STRLEN len;
2973 const char* error;
2974
2975 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2976 s += len;
2977 if (! valid) {
2978 yyerror(error);
2979 continue;
2980 }
2981 goto NUM_ESCAPE_INSERT;
2982 }
2983
2984 /* eg. \x24 indicates the hex constant 0x24 */
2985 case 'x':
2986 ++s;
2987 if (*s == '{') {
2988 char* const e = strchr(s, '}');
2989 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2990 PERL_SCAN_DISALLOW_PREFIX;
2991 STRLEN len;
2992
2993 ++s;
2994 if (!e) {
2995 yyerror("Missing right brace on \\x{}");
2996 continue;
2997 }
2998 len = e - s;
2999 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3000 s = e + 1;
3001 }
3002 else {
3003 {
3004 STRLEN len = 2;
3005 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3006 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3007 s += len;
3008 }
3009 }
3010
3011 NUM_ESCAPE_INSERT:
3012 /* Insert oct or hex escaped character. There will always be
3013 * enough room in sv since such escapes will be longer than any
3014 * UTF-8 sequence they can end up as, except if they force us
3015 * to recode the rest of the string into utf8 */
3016
3017 /* Here uv is the ordinal of the next character being added in
3018 * unicode (converted from native). */
3019 if (!UNI_IS_INVARIANT(uv)) {
3020 if (!has_utf8 && uv > 255) {
3021 /* Might need to recode whatever we have accumulated so
3022 * far if it contains any chars variant in utf8 or
3023 * utf-ebcdic. */
3024
3025 SvCUR_set(sv, d - SvPVX_const(sv));
3026 SvPOK_on(sv);
3027 *d = '\0';
3028 /* See Note on sizing above. */
3029 sv_utf8_upgrade_flags_grow(sv,
3030 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3031 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3032 d = SvPVX(sv) + SvCUR(sv);
3033 has_utf8 = TRUE;
3034 }
3035
3036 if (has_utf8) {
3037 d = (char*)uvuni_to_utf8((U8*)d, uv);
3038 if (PL_lex_inwhat == OP_TRANS &&
3039 PL_sublex_info.sub_op) {
3040 PL_sublex_info.sub_op->op_private |=
3041 (PL_lex_repl ? OPpTRANS_FROM_UTF
3042 : OPpTRANS_TO_UTF);
3043 }
3044#ifdef EBCDIC
3045 if (uv > 255 && !dorange)
3046 native_range = FALSE;
3047#endif
3048 }
3049 else {
3050 *d++ = (char)uv;
3051 }
3052 }
3053 else {
3054 *d++ = (char) uv;
3055 }
3056 continue;
3057
3058 case 'N':
3059 /* In a non-pattern \N must be a named character, like \N{LATIN
3060 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3061 * mean to match a non-newline. For non-patterns, named
3062 * characters are converted to their string equivalents. In
3063 * patterns, named characters are not converted to their
3064 * ultimate forms for the same reasons that other escapes
3065 * aren't. Instead, they are converted to the \N{U+...} form
3066 * to get the value from the charnames that is in effect right
3067 * now, while preserving the fact that it was a named character
3068 * so that the regex compiler knows this */
3069
3070 /* This section of code doesn't generally use the
3071 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3072 * a close examination of this macro and determined it is a
3073 * no-op except on utfebcdic variant characters. Every
3074 * character generated by this that would normally need to be
3075 * enclosed by this macro is invariant, so the macro is not
3076 * needed, and would complicate use of copy(). XXX There are
3077 * other parts of this file where the macro is used
3078 * inconsistently, but are saved by it being a no-op */
3079
3080 /* The structure of this section of code (besides checking for
3081 * errors and upgrading to utf8) is:
3082 * Further disambiguate between the two meanings of \N, and if
3083 * not a charname, go process it elsewhere
3084 * If of form \N{U+...}, pass it through if a pattern;
3085 * otherwise convert to utf8
3086 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3087 * pattern; otherwise convert to utf8 */
3088
3089 /* Here, s points to the 'N'; the test below is guaranteed to
3090 * succeed if we are being called on a pattern as we already
3091 * know from a test above that the next character is a '{'.
3092 * On a non-pattern \N must mean 'named sequence, which
3093 * requires braces */
3094 s++;
3095 if (*s != '{') {
3096 yyerror("Missing braces on \\N{}");
3097 continue;
3098 }
3099 s++;
3100
3101 /* If there is no matching '}', it is an error. */
3102 if (! (e = strchr(s, '}'))) {
3103 if (! PL_lex_inpat) {
3104 yyerror("Missing right brace on \\N{}");
3105 } else {
3106 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3107 }
3108 continue;
3109 }
3110
3111 /* Here it looks like a named character */
3112
3113 if (PL_lex_inpat) {
3114
3115 /* XXX This block is temporary code. \N{} implies that the
3116 * pattern is to have Unicode semantics, and therefore
3117 * currently has to be encoded in utf8. By putting it in
3118 * utf8 now, we save a whole pass in the regular expression
3119 * compiler. Once that code is changed so Unicode
3120 * semantics doesn't necessarily have to be in utf8, this
3121 * block should be removed. However, the code that parses
3122 * the output of this would have to be changed to not
3123 * necessarily expect utf8 */
3124 if (!has_utf8) {
3125 SvCUR_set(sv, d - SvPVX_const(sv));
3126 SvPOK_on(sv);
3127 *d = '\0';
3128 /* See Note on sizing above. */
3129 sv_utf8_upgrade_flags_grow(sv,
3130 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3131 /* 5 = '\N{' + cur char + NUL */
3132 (STRLEN)(send - s) + 5);
3133 d = SvPVX(sv) + SvCUR(sv);
3134 has_utf8 = TRUE;
3135 }
3136 }
3137
3138 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3139 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3140 | PERL_SCAN_DISALLOW_PREFIX;
3141 STRLEN len;
3142
3143 /* For \N{U+...}, the '...' is a unicode value even on
3144 * EBCDIC machines */
3145 s += 2; /* Skip to next char after the 'U+' */
3146 len = e - s;
3147 uv = grok_hex(s, &len, &flags, NULL);
3148 if (len == 0 || len != (STRLEN)(e - s)) {
3149 yyerror("Invalid hexadecimal number in \\N{U+...}");
3150 s = e + 1;
3151 continue;
3152 }
3153
3154 if (PL_lex_inpat) {
3155
3156 /* On non-EBCDIC platforms, pass through to the regex
3157 * compiler unchanged. The reason we evaluated the
3158 * number above is to make sure there wasn't a syntax
3159 * error. But on EBCDIC we convert to native so
3160 * downstream code can continue to assume it's native
3161 */
3162 s -= 5; /* Include the '\N{U+' */
3163#ifdef EBCDIC
3164 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3165 and the \0 */
3166 "\\N{U+%X}",
3167 (unsigned int) UNI_TO_NATIVE(uv));
3168#else
3169 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3170 d += e - s + 1;
3171#endif
3172 }
3173 else { /* Not a pattern: convert the hex to string */
3174
3175 /* If destination is not in utf8, unconditionally
3176 * recode it to be so. This is because \N{} implies
3177 * Unicode semantics, and scalars have to be in utf8
3178 * to guarantee those semantics */
3179 if (! has_utf8) {
3180 SvCUR_set(sv, d - SvPVX_const(sv));
3181 SvPOK_on(sv);
3182 *d = '\0';
3183 /* See Note on sizing above. */
3184 sv_utf8_upgrade_flags_grow(
3185 sv,
3186 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3187 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3188 d = SvPVX(sv) + SvCUR(sv);
3189 has_utf8 = TRUE;
3190 }
3191
3192 /* Add the string to the output */
3193 if (UNI_IS_INVARIANT(uv)) {
3194 *d++ = (char) uv;
3195 }
3196 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3197 }
3198 }
3199 else { /* Here is \N{NAME} but not \N{U+...}. */
3200
3201 SV *res; /* result from charnames */
3202 const char *str; /* the string in 'res' */
3203 STRLEN len; /* its length */
3204
3205 /* Get the value for NAME */
3206 res = newSVpvn(s, e - s);
3207 res = new_constant( NULL, 0, "charnames",
3208 /* includes all of: \N{...} */
3209 res, NULL, s - 3, e - s + 4 );
3210
3211 /* Most likely res will be in utf8 already since the
3212 * standard charnames uses pack U, but a custom translator
3213 * can leave it otherwise, so make sure. XXX This can be
3214 * revisited to not have charnames use utf8 for characters
3215 * that don't need it when regexes don't have to be in utf8
3216 * for Unicode semantics. If doing so, remember EBCDIC */
3217 sv_utf8_upgrade(res);
3218 str = SvPV_const(res, len);
3219
3220 /* Don't accept malformed input */
3221 if (! is_utf8_string((U8 *) str, len)) {
3222 yyerror("Malformed UTF-8 returned by \\N");
3223 }
3224 else if (PL_lex_inpat) {
3225
3226 if (! len) { /* The name resolved to an empty string */
3227 Copy("\\N{}", d, 4, char);
3228 d += 4;
3229 }
3230 else {
3231 /* In order to not lose information for the regex
3232 * compiler, pass the result in the specially made
3233 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3234 * the code points in hex of each character
3235 * returned by charnames */
3236
3237 const char *str_end = str + len;
3238 STRLEN char_length; /* cur char's byte length */
3239 STRLEN output_length; /* and the number of bytes
3240 after this is translated
3241 into hex digits */
3242 const STRLEN off = d - SvPVX_const(sv);
3243
3244 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3245 * max('U+', '.'); and 1 for NUL */
3246 char hex_string[2 * UTF8_MAXBYTES + 5];
3247
3248 /* Get the first character of the result. */
3249 U32 uv = utf8n_to_uvuni((U8 *) str,
3250 len,
3251 &char_length,
3252 UTF8_ALLOW_ANYUV);
3253
3254 /* The call to is_utf8_string() above hopefully
3255 * guarantees that there won't be an error. But
3256 * it's easy here to make sure. The function just
3257 * above warns and returns 0 if invalid utf8, but
3258 * it can also return 0 if the input is validly a
3259 * NUL. Disambiguate */
3260 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3261 uv = UNICODE_REPLACEMENT;
3262 }
3263
3264 /* Convert first code point to hex, including the
3265 * boiler plate before it. For all these, we
3266 * convert to native format so that downstream code
3267 * can continue to assume the input is native */
3268 output_length =
3269 my_snprintf(hex_string, sizeof(hex_string),
3270 "\\N{U+%X",
3271 (unsigned int) UNI_TO_NATIVE(uv));
3272
3273 /* Make sure there is enough space to hold it */
3274 d = off + SvGROW(sv, off
3275 + output_length
3276 + (STRLEN)(send - e)
3277 + 2); /* '}' + NUL */
3278 /* And output it */
3279 Copy(hex_string, d, output_length, char);
3280 d += output_length;
3281
3282 /* For each subsequent character, append dot and
3283 * its ordinal in hex */
3284 while ((str += char_length) < str_end) {
3285 const STRLEN off = d - SvPVX_const(sv);
3286 U32 uv = utf8n_to_uvuni((U8 *) str,
3287 str_end - str,
3288 &char_length,
3289 UTF8_ALLOW_ANYUV);
3290 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3291 uv = UNICODE_REPLACEMENT;
3292 }
3293
3294 output_length =
3295 my_snprintf(hex_string, sizeof(hex_string),
3296 ".%X",
3297 (unsigned int) UNI_TO_NATIVE(uv));
3298
3299 d = off + SvGROW(sv, off
3300 + output_length
3301 + (STRLEN)(send - e)
3302 + 2); /* '}' + NUL */
3303 Copy(hex_string, d, output_length, char);
3304 d += output_length;
3305 }
3306
3307 *d++ = '}'; /* Done. Add the trailing brace */
3308 }
3309 }
3310 else { /* Here, not in a pattern. Convert the name to a
3311 * string. */
3312
3313 /* If destination is not in utf8, unconditionally
3314 * recode it to be so. This is because \N{} implies
3315 * Unicode semantics, and scalars have to be in utf8
3316 * to guarantee those semantics */
3317 if (! has_utf8) {
3318 SvCUR_set(sv, d - SvPVX_const(sv));
3319 SvPOK_on(sv);
3320 *d = '\0';
3321 /* See Note on sizing above. */
3322 sv_utf8_upgrade_flags_grow(sv,
3323 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3324 len + (STRLEN)(send - s) + 1);
3325 d = SvPVX(sv) + SvCUR(sv);
3326 has_utf8 = TRUE;
3327 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3328
3329 /* See Note on sizing above. (NOTE: SvCUR() is not
3330 * set correctly here). */
3331 const STRLEN off = d - SvPVX_const(sv);
3332 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3333 }
3334 Copy(str, d, len, char);
3335 d += len;
3336 }
3337 SvREFCNT_dec(res);
3338
3339 /* Deprecate non-approved name syntax */
3340 if (ckWARN_d(WARN_DEPRECATED)) {
3341 bool problematic = FALSE;
3342 char* i = s;
3343
3344 /* For non-ut8 input, look to see that the first
3345 * character is an alpha, then loop through the rest
3346 * checking that each is a continuation */
3347 if (! this_utf8) {
3348 if (! isALPHAU(*i)) problematic = TRUE;
3349 else for (i = s + 1; i < e; i++) {
3350 if (isCHARNAME_CONT(*i)) continue;
3351 problematic = TRUE;
3352 break;
3353 }
3354 }
3355 else {
3356 /* Similarly for utf8. For invariants can check
3357 * directly. We accept anything above the latin1
3358 * range because it is immaterial to Perl if it is
3359 * correct or not, and is expensive to check. But
3360 * it is fairly easy in the latin1 range to convert
3361 * the variants into a single character and check
3362 * those */
3363 if (UTF8_IS_INVARIANT(*i)) {
3364 if (! isALPHAU(*i)) problematic = TRUE;
3365 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3367 *(i+1)))))
3368 {
3369 problematic = TRUE;
3370 }
3371 }
3372 if (! problematic) for (i = s + UTF8SKIP(s);
3373 i < e;
3374 i+= UTF8SKIP(i))
3375 {
3376 if (UTF8_IS_INVARIANT(*i)) {
3377 if (isCHARNAME_CONT(*i)) continue;
3378 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3379 continue;
3380 } else if (isCHARNAME_CONT(
3381 UNI_TO_NATIVE(
3382 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3383 {
3384 continue;
3385 }
3386 problematic = TRUE;
3387 break;
3388 }
3389 }
3390 if (problematic) {
3391 /* The e-i passed to the final %.*s makes sure that
3392 * should the trailing NUL be missing that this
3393 * print won't run off the end of the string */
3394 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3395 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3396 (int)(i - s + 1), s, (int)(e - i), i + 1);
3397 }
3398 }
3399 } /* End \N{NAME} */
3400#ifdef EBCDIC
3401 if (!dorange)
3402 native_range = FALSE; /* \N{} is defined to be Unicode */
3403#endif
3404 s = e + 1; /* Point to just after the '}' */
3405 continue;
3406
3407 /* \c is a control character */
3408 case 'c':
3409 s++;
3410 if (s < send) {
3411 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3412 }
3413 else {
3414 yyerror("Missing control char name in \\c");
3415 }
3416 continue;
3417
3418 /* printf-style backslashes, formfeeds, newlines, etc */
3419 case 'b':
3420 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3421 break;
3422 case 'n':
3423 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3424 break;
3425 case 'r':
3426 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3427 break;
3428 case 'f':
3429 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3430 break;
3431 case 't':
3432 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3433 break;
3434 case 'e':
3435 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3436 break;
3437 case 'a':
3438 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3439 break;
3440 } /* end switch */
3441
3442 s++;
3443 continue;
3444 } /* end if (backslash) */
3445#ifdef EBCDIC
3446 else
3447 literal_endpoint++;
3448#endif
3449
3450 default_action:
3451 /* If we started with encoded form, or already know we want it,
3452 then encode the next character */
3453 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3454 STRLEN len = 1;
3455
3456
3457 /* One might think that it is wasted effort in the case of the
3458 * source being utf8 (this_utf8 == TRUE) to take the next character
3459 * in the source, convert it to an unsigned value, and then convert
3460 * it back again. But the source has not been validated here. The
3461 * routine that does the conversion checks for errors like
3462 * malformed utf8 */
3463
3464 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3465 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3466 if (!has_utf8) {
3467 SvCUR_set(sv, d - SvPVX_const(sv));
3468 SvPOK_on(sv);
3469 *d = '\0';
3470 /* See Note on sizing above. */
3471 sv_utf8_upgrade_flags_grow(sv,
3472 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3473 need + (STRLEN)(send - s) + 1);
3474 d = SvPVX(sv) + SvCUR(sv);
3475 has_utf8 = TRUE;
3476 } else if (need > len) {
3477 /* encoded value larger than old, may need extra space (NOTE:
3478 * SvCUR() is not set correctly here). See Note on sizing
3479 * above. */
3480 const STRLEN off = d - SvPVX_const(sv);
3481 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3482 }
3483 s += len;
3484
3485 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3486#ifdef EBCDIC
3487 if (uv > 255 && !dorange)
3488 native_range = FALSE;
3489#endif
3490 }
3491 else {
3492 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3493 }
3494 } /* while loop to process each character */
3495
3496 /* terminate the string and set up the sv */
3497 *d = '\0';
3498 SvCUR_set(sv, d - SvPVX_const(sv));
3499 if (SvCUR(sv) >= SvLEN(sv))
3500 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3501
3502 SvPOK_on(sv);
3503 if (PL_encoding && !has_utf8) {
3504 sv_recode_to_utf8(sv, PL_encoding);
3505 if (SvUTF8(sv))
3506 has_utf8 = TRUE;
3507 }
3508 if (has_utf8) {
3509 SvUTF8_on(sv);
3510 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3511 PL_sublex_info.sub_op->op_private |=
3512 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3513 }
3514 }
3515
3516 /* shrink the sv if we allocated more than we used */
3517 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3518 SvPV_shrink_to_cur(sv);
3519 }
3520
3521 /* return the substring (via pl_yylval) only if we parsed anything */
3522 if (s > PL_bufptr) {
3523 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3524 const char *const key = PL_lex_inpat ? "qr" : "q";
3525 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3526 const char *type;
3527 STRLEN typelen;
3528
3529 if (PL_lex_inwhat == OP_TRANS) {
3530 type = "tr";
3531 typelen = 2;
3532 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3533 type = "s";
3534 typelen = 1;
3535 } else {
3536 type = "qq";
3537 typelen = 2;
3538 }
3539
3540 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3541 type, typelen);
3542 }
3543 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3544 } else
3545 SvREFCNT_dec(sv);
3546 return s;
3547}
3548
3549/* S_intuit_more
3550 * Returns TRUE if there's more to the expression (e.g., a subscript),
3551 * FALSE otherwise.
3552 *
3553 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3554 *
3555 * ->[ and ->{ return TRUE
3556 * { and [ outside a pattern are always subscripts, so return TRUE
3557 * if we're outside a pattern and it's not { or [, then return FALSE
3558 * if we're in a pattern and the first char is a {
3559 * {4,5} (any digits around the comma) returns FALSE
3560 * if we're in a pattern and the first char is a [
3561 * [] returns FALSE
3562 * [SOMETHING] has a funky algorithm to decide whether it's a
3563 * character class or not. It has to deal with things like
3564 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3565 * anything else returns TRUE
3566 */
3567
3568/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3569
3570STATIC int
3571S_intuit_more(pTHX_ register char *s)
3572{
3573 dVAR;
3574
3575 PERL_ARGS_ASSERT_INTUIT_MORE;
3576
3577 if (PL_lex_brackets)
3578 return TRUE;
3579 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3580 return TRUE;
3581 if (*s != '{' && *s != '[')
3582 return FALSE;
3583 if (!PL_lex_inpat)
3584 return TRUE;
3585
3586 /* In a pattern, so maybe we have {n,m}. */
3587 if (*s == '{') {
3588 if (regcurly(s)) {
3589 return FALSE;
3590 }
3591 return TRUE;
3592 }
3593
3594 /* On the other hand, maybe we have a character class */
3595
3596 s++;
3597 if (*s == ']' || *s == '^')
3598 return FALSE;
3599 else {
3600 /* this is terrifying, and it works */
3601 int weight = 2; /* let's weigh the evidence */
3602 char seen[256];
3603 unsigned char un_char = 255, last_un_char;
3604 const char * const send = strchr(s,']');
3605 char tmpbuf[sizeof PL_tokenbuf * 4];
3606
3607 if (!send) /* has to be an expression */
3608 return TRUE;
3609
3610 Zero(seen,256,char);
3611 if (*s == '$')
3612 weight -= 3;
3613 else if (isDIGIT(*s)) {
3614 if (s[1] != ']') {
3615 if (isDIGIT(s[1]) && s[2] == ']')
3616 weight -= 10;
3617 }
3618 else
3619 weight -= 100;
3620 }
3621 for (; s < send; s++) {
3622 last_un_char = un_char;
3623 un_char = (unsigned char)*s;
3624 switch (*s) {
3625 case '@':
3626 case '&':
3627 case '$':
3628 weight -= seen[un_char] * 10;
3629 if (isALNUM_lazy_if(s+1,UTF)) {
3630 int len;
3631 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3632 len = (int)strlen(tmpbuf);
3633 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3634 weight -= 100;
3635 else
3636 weight -= 10;
3637 }
3638 else if (*s == '$' && s[1] &&
3639 strchr("[#!%*<>()-=",s[1])) {
3640 if (/*{*/ strchr("])} =",s[2]))
3641 weight -= 10;
3642 else
3643 weight -= 1;
3644 }
3645 break;
3646 case '\\':
3647 un_char = 254;
3648 if (s[1]) {
3649 if (strchr("wds]",s[1]))
3650 weight += 100;
3651 else if (seen[(U8)'\''] || seen[(U8)'"'])
3652 weight += 1;
3653 else if (strchr("rnftbxcav",s[1]))
3654 weight += 40;
3655 else if (isDIGIT(s[1])) {
3656 weight += 40;
3657 while (s[1] && isDIGIT(s[1]))
3658 s++;
3659 }
3660 }
3661 else
3662 weight += 100;
3663 break;
3664 case '-':
3665 if (s[1] == '\\')
3666 weight += 50;
3667 if (strchr("aA01! ",last_un_char))
3668 weight += 30;
3669 if (strchr("zZ79~",s[1]))
3670 weight += 30;
3671 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3672 weight -= 5; /* cope with negative subscript */
3673 break;
3674 default:
3675 if (!isALNUM(last_un_char)
3676 && !(last_un_char == '$' || last_un_char == '@'
3677 || last_un_char == '&')
3678 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3679 char *d = tmpbuf;
3680 while (isALPHA(*s))
3681 *d++ = *s++;
3682 *d = '\0';
3683 if (keyword(tmpbuf, d - tmpbuf, 0))
3684 weight -= 150;
3685 }
3686 if (un_char == last_un_char + 1)
3687 weight += 5;
3688 weight -= seen[un_char];
3689 break;
3690 }
3691 seen[un_char]++;
3692 }
3693 if (weight >= 0) /* probably a character class */
3694 return FALSE;
3695 }
3696
3697 return TRUE;
3698}
3699
3700/*
3701 * S_intuit_method
3702 *
3703 * Does all the checking to disambiguate
3704 * foo bar
3705 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3706 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3707 *
3708 * First argument is the stuff after the first token, e.g. "bar".
3709 *
3710 * Not a method if bar is a filehandle.
3711 * Not a method if foo is a subroutine prototyped to take a filehandle.
3712 * Not a method if it's really "Foo $bar"
3713 * Method if it's "foo $bar"
3714 * Not a method if it's really "print foo $bar"
3715 * Method if it's really "foo package::" (interpreted as package->foo)
3716 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3717 * Not a method if bar is a filehandle or package, but is quoted with
3718 * =>
3719 */
3720
3721STATIC int
3722S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3723{
3724 dVAR;
3725 char *s = start + (*start == '$');
3726 char tmpbuf[sizeof PL_tokenbuf];
3727 STRLEN len;
3728 GV* indirgv;
3729#ifdef PERL_MAD
3730 int soff;
3731#endif
3732
3733 PERL_ARGS_ASSERT_INTUIT_METHOD;
3734
3735 if (gv) {
3736 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3737 return 0;
3738 if (cv) {
3739 if (SvPOK(cv)) {
3740 const char *proto = SvPVX_const(cv);
3741 if (proto) {
3742 if (*proto == ';')
3743 proto++;
3744 if (*proto == '*')
3745 return 0;
3746 }
3747 }
3748 } else
3749 gv = NULL;
3750 }
3751 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3752 /* start is the beginning of the possible filehandle/object,
3753 * and s is the end of it
3754 * tmpbuf is a copy of it
3755 */
3756
3757 if (*start == '$') {
3758 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3759 isUPPER(*PL_tokenbuf))
3760 return 0;
3761#ifdef PERL_MAD
3762 len = start - SvPVX(PL_linestr);
3763#endif
3764 s = PEEKSPACE(s);
3765#ifdef PERL_MAD
3766 start = SvPVX(PL_linestr) + len;
3767#endif
3768 PL_bufptr = start;
3769 PL_expect = XREF;
3770 return *s == '(' ? FUNCMETH : METHOD;
3771 }
3772 if (!keyword(tmpbuf, len, 0)) {
3773 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3774 len -= 2;
3775 tmpbuf[len] = '\0';
3776#ifdef PERL_MAD
3777 soff = s - SvPVX(PL_linestr);
3778#endif
3779 goto bare_package;
3780 }
3781 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3782 if (indirgv && GvCVu(indirgv))
3783 return 0;
3784 /* filehandle or package name makes it a method */
3785 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3786#ifdef PERL_MAD
3787 soff = s - SvPVX(PL_linestr);
3788#endif
3789 s = PEEKSPACE(s);
3790 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3791 return 0; /* no assumptions -- "=>" quotes bareword */
3792 bare_package:
3793 start_force(PL_curforce);
3794 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3795 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3796 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3797 if (PL_madskills)
3798 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3799 PL_expect = XTERM;
3800 force_next(WORD);
3801 PL_bufptr = s;
3802#ifdef PERL_MAD
3803 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3804#endif
3805 return *s == '(' ? FUNCMETH : METHOD;
3806 }
3807 }
3808 return 0;
3809}
3810
3811/* Encoded script support. filter_add() effectively inserts a
3812 * 'pre-processing' function into the current source input stream.
3813 * Note that the filter function only applies to the current source file
3814 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3815 *
3816 * The datasv parameter (which may be NULL) can be used to pass
3817 * private data to this instance of the filter. The filter function
3818 * can recover the SV using the FILTER_DATA macro and use it to
3819 * store private buffers and state information.
3820 *
3821 * The supplied datasv parameter is upgraded to a PVIO type
3822 * and the IoDIRP/IoANY field is used to store the function pointer,
3823 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3824 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3825 * private use must be set using malloc'd pointers.
3826 */
3827
3828SV *
3829Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3830{
3831 dVAR;
3832 if (!funcp)
3833 return NULL;
3834
3835 if (!PL_parser)
3836 return NULL;
3837
3838 if (!PL_rsfp_filters)
3839 PL_rsfp_filters = newAV();
3840 if (!datasv)
3841 datasv = newSV(0);
3842 SvUPGRADE(datasv, SVt_PVIO);
3843 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3844 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3845 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3846 FPTR2DPTR(void *, IoANY(datasv)),
3847 SvPV_nolen(datasv)));
3848 av_unshift(PL_rsfp_filters, 1);
3849 av_store(PL_rsfp_filters, 0, datasv) ;
3850 return(datasv);
3851}
3852
3853
3854/* Delete most recently added instance of this filter function. */
3855void
3856Perl_filter_del(pTHX_ filter_t funcp)
3857{
3858 dVAR;
3859 SV *datasv;
3860
3861 PERL_ARGS_ASSERT_FILTER_DEL;
3862
3863#ifdef DEBUGGING
3864 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3865 FPTR2DPTR(void*, funcp)));
3866#endif
3867 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3868 return;
3869 /* if filter is on top of stack (usual case) just pop it off */
3870 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3871 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3872 sv_free(av_pop(PL_rsfp_filters));
3873
3874 return;
3875 }
3876 /* we need to search for the correct entry and clear it */
3877 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3878}
3879
3880
3881/* Invoke the idxth filter function for the current rsfp. */
3882/* maxlen 0 = read one text line */
3883I32
3884Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3885{
3886 dVAR;
3887 filter_t funcp;
3888 SV *datasv = NULL;
3889 /* This API is bad. It should have been using unsigned int for maxlen.
3890 Not sure if we want to change the API, but if not we should sanity
3891 check the value here. */
3892 const unsigned int correct_length
3893 = maxlen < 0 ?
3894#ifdef PERL_MICRO
3895 0x7FFFFFFF
3896#else
3897 INT_MAX
3898#endif
3899 : maxlen;
3900
3901 PERL_ARGS_ASSERT_FILTER_READ;
3902
3903 if (!PL_parser || !PL_rsfp_filters)
3904 return -1;
3905 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3906 /* Provide a default input filter to make life easy. */
3907 /* Note that we append to the line. This is handy. */
3908 DEBUG_P(PerlIO_printf(Perl_debug_log,
3909 "filter_read %d: from rsfp\n", idx));
3910 if (correct_length) {
3911 /* Want a block */
3912 int len ;
3913 const int old_len = SvCUR(buf_sv);
3914
3915 /* ensure buf_sv is large enough */
3916 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3917 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3918 correct_length)) <= 0) {
3919 if (PerlIO_error(PL_rsfp))
3920 return -1; /* error */
3921 else
3922 return 0 ; /* end of file */
3923 }
3924 SvCUR_set(buf_sv, old_len + len) ;
3925 SvPVX(buf_sv)[old_len + len] = '\0';
3926 } else {
3927 /* Want a line */
3928 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3929 if (PerlIO_error(PL_rsfp))
3930 return -1; /* error */
3931 else
3932 return 0 ; /* end of file */
3933 }
3934 }
3935 return SvCUR(buf_sv);
3936 }
3937 /* Skip this filter slot if filter has been deleted */
3938 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3939 DEBUG_P(PerlIO_printf(Perl_debug_log,
3940 "filter_read %d: skipped (filter deleted)\n",
3941 idx));
3942 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3943 }
3944 /* Get function pointer hidden within datasv */
3945 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3946 DEBUG_P(PerlIO_printf(Perl_debug_log,
3947 "filter_read %d: via function %p (%s)\n",
3948 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3949 /* Call function. The function is expected to */
3950 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3951 /* Return: <0:error, =0:eof, >0:not eof */
3952 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3953}
3954
3955STATIC char *
3956S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3957{
3958 dVAR;
3959
3960 PERL_ARGS_ASSERT_FILTER_GETS;
3961
3962#ifdef PERL_CR_FILTER
3963 if (!PL_rsfp_filters) {
3964 filter_add(S_cr_textfilter,NULL);
3965 }
3966#endif
3967 if (PL_rsfp_filters) {
3968 if (!append)
3969 SvCUR_set(sv, 0); /* start with empty line */
3970 if (FILTER_READ(0, sv, 0) > 0)
3971 return ( SvPVX(sv) ) ;
3972 else
3973 return NULL ;
3974 }
3975 else
3976 return (sv_gets(sv, PL_rsfp, append));
3977}
3978
3979STATIC HV *
3980S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3981{
3982 dVAR;
3983 GV *gv;
3984
3985 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3986
3987 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3988 return PL_curstash;
3989
3990 if (len > 2 &&
3991 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3992 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3993 {
3994 return GvHV(gv); /* Foo:: */
3995 }
3996
3997 /* use constant CLASS => 'MyClass' */
3998 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3999 if (gv && GvCV(gv)) {
4000 SV * const sv = cv_const_sv(GvCV(gv));
4001 if (sv)
4002 pkgname = SvPV_const(sv, len);
4003 }
4004
4005 return gv_stashpvn(pkgname, len, 0);
4006}
4007
4008/*
4009 * S_readpipe_override
4010 * Check whether readpipe() is overridden, and generates the appropriate
4011 * optree, provided sublex_start() is called afterwards.
4012 */
4013STATIC void
4014S_readpipe_override(pTHX)
4015{
4016 GV **gvp;
4017 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4018 pl_yylval.ival = OP_BACKTICK;
4019 if ((gv_readpipe
4020 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4021 ||
4022 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4023 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4024 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4025 {
4026 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4027 op_append_elem(OP_LIST,
4028 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4029 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4030 }
4031}
4032
4033#ifdef PERL_MAD
4034 /*
4035 * Perl_madlex
4036 * The intent of this yylex wrapper is to minimize the changes to the
4037 * tokener when we aren't interested in collecting madprops. It remains
4038 * to be seen how successful this strategy will be...
4039 */
4040
4041int
4042Perl_madlex(pTHX)
4043{
4044 int optype;
4045 char *s = PL_bufptr;
4046
4047 /* make sure PL_thiswhite is initialized */
4048 PL_thiswhite = 0;
4049 PL_thismad = 0;
4050
4051 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4052 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4053 return S_pending_ident(aTHX);
4054
4055 /* previous token ate up our whitespace? */
4056 if (!PL_lasttoke && PL_nextwhite) {
4057 PL_thiswhite = PL_nextwhite;
4058 PL_nextwhite = 0;
4059 }
4060
4061 /* isolate the token, and figure out where it is without whitespace */
4062 PL_realtokenstart = -1;
4063 PL_thistoken = 0;
4064 optype = yylex();
4065 s = PL_bufptr;
4066 assert(PL_curforce < 0);
4067
4068 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4069 if (!PL_thistoken) {
4070 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4071 PL_thistoken = newSVpvs("");
4072 else {
4073 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4074 PL_thistoken = newSVpvn(tstart, s - tstart);
4075 }
4076 }
4077 if (PL_thismad) /* install head */
4078 CURMAD('X', PL_thistoken);
4079 }
4080
4081 /* last whitespace of a sublex? */
4082 if (optype == ')' && PL_endwhite) {
4083 CURMAD('X', PL_endwhite);
4084 }
4085
4086 if (!PL_thismad) {
4087
4088 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4089 if (!PL_thiswhite && !PL_endwhite && !optype) {
4090 sv_free(PL_thistoken);
4091 PL_thistoken = 0;
4092 return 0;
4093 }
4094
4095 /* put off final whitespace till peg */
4096 if (optype == ';' && !PL_rsfp) {
4097 PL_nextwhite = PL_thiswhite;
4098 PL_thiswhite = 0;
4099 }
4100 else if (PL_thisopen) {
4101 CURMAD('q', PL_thisopen);
4102 if (PL_thistoken)
4103 sv_free(PL_thistoken);
4104 PL_thistoken = 0;
4105 }
4106 else {
4107 /* Store actual token text as madprop X */
4108 CURMAD('X', PL_thistoken);
4109 }
4110
4111 if (PL_thiswhite) {
4112 /* add preceding whitespace as madprop _ */
4113 CURMAD('_', PL_thiswhite);
4114 }
4115
4116 if (PL_thisstuff) {
4117 /* add quoted material as madprop = */
4118 CURMAD('=', PL_thisstuff);
4119 }
4120
4121 if (PL_thisclose) {
4122 /* add terminating quote as madprop Q */
4123 CURMAD('Q', PL_thisclose);
4124 }
4125 }
4126
4127 /* special processing based on optype */
4128
4129 switch (optype) {
4130
4131 /* opval doesn't need a TOKEN since it can already store mp */
4132 case WORD:
4133 case METHOD:
4134 case FUNCMETH:
4135 case THING:
4136 case PMFUNC:
4137 case PRIVATEREF:
4138 case FUNC0SUB:
4139 case UNIOPSUB:
4140 case LSTOPSUB:
4141 if (pl_yylval.opval)
4142 append_madprops(PL_thismad, pl_yylval.opval, 0);
4143 PL_thismad = 0;
4144 return optype;
4145
4146 /* fake EOF */
4147 case 0:
4148 optype = PEG;
4149 if (PL_endwhite) {
4150 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4151 PL_endwhite = 0;
4152 }
4153 break;
4154
4155 case ']':
4156 case '}':
4157 if (PL_faketokens)
4158 break;
4159 /* remember any fake bracket that lexer is about to discard */
4160 if (PL_lex_brackets == 1 &&
4161 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4162 {
4163 s = PL_bufptr;
4164 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4165 s++;
4166 if (*s == '}') {
4167 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4168 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4169 PL_thiswhite = 0;
4170 PL_bufptr = s - 1;
4171 break; /* don't bother looking for trailing comment */
4172 }
4173 else
4174 s = PL_bufptr;
4175 }
4176 if (optype == ']')
4177 break;
4178 /* FALLTHROUGH */
4179
4180 /* attach a trailing comment to its statement instead of next token */
4181 case ';':
4182 if (PL_faketokens)
4183 break;
4184 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4185 s = PL_bufptr;
4186 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4187 s++;
4188 if (*s == '\n' || *s == '#') {
4189 while (s < PL_bufend && *s != '\n')
4190 s++;
4191 if (s < PL_bufend)
4192 s++;
4193 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4194 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4195 PL_thiswhite = 0;
4196 PL_bufptr = s;
4197 }
4198 }
4199 break;
4200
4201 /* pval */
4202 case LABEL:
4203 break;
4204
4205 /* ival */
4206 default:
4207 break;
4208
4209 }
4210
4211 /* Create new token struct. Note: opvals return early above. */
4212 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4213 PL_thismad = 0;
4214 return optype;
4215}
4216#endif
4217
4218STATIC char *
4219S_tokenize_use(pTHX_ int is_use, char *s) {
4220 dVAR;
4221
4222 PERL_ARGS_ASSERT_TOKENIZE_USE;
4223
4224 if (PL_expect != XSTATE)
4225 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4226 is_use ? "use" : "no"));
4227 s = SKIPSPACE1(s);
4228 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4229 s = force_version(s, TRUE);
4230 if (*s == ';' || *s == '}'
4231 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4232 start_force(PL_curforce);
4233 NEXTVAL_NEXTTOKE.opval = NULL;
4234 force_next(WORD);
4235 }
4236 else if (*s == 'v') {
4237 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4238 s = force_version(s, FALSE);
4239 }
4240 }
4241 else {
4242 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4243 s = force_version(s, FALSE);
4244 }
4245 pl_yylval.ival = is_use;
4246 return s;
4247}
4248#ifdef DEBUGGING
4249 static const char* const exp_name[] =
4250 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4251 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4252 };
4253#endif
4254
4255#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4256STATIC bool
4257S_word_takes_any_delimeter(char *p, STRLEN len)
4258{
4259 return (len == 1 && strchr("msyq", p[0])) ||
4260 (len == 2 && (
4261 (p[0] == 't' && p[1] == 'r') ||
4262 (p[0] == 'q' && strchr("qwxr", p[1]))));
4263}
4264
4265/*
4266 yylex
4267
4268 Works out what to call the token just pulled out of the input
4269 stream. The yacc parser takes care of taking the ops we return and
4270 stitching them into a tree.
4271
4272 Returns:
4273 PRIVATEREF
4274
4275 Structure:
4276 if read an identifier
4277 if we're in a my declaration
4278 croak if they tried to say my($foo::bar)
4279 build the ops for a my() declaration
4280 if it's an access to a my() variable
4281 are we in a sort block?
4282 croak if my($a); $a <=> $b
4283 build ops for access to a my() variable
4284 if in a dq string, and they've said @foo and we can't find @foo
4285 croak
4286 build ops for a bareword
4287 if we already built the token before, use it.
4288*/
4289
4290
4291#ifdef __SC__
4292#pragma segment Perl_yylex
4293#endif
4294int
4295Perl_yylex(pTHX)
4296{
4297 dVAR;
4298 register char *s = PL_bufptr;
4299 register char *d;
4300 STRLEN len;
4301 bool bof = FALSE;
4302 U32 fake_eof = 0;
4303
4304 /* orig_keyword, gvp, and gv are initialized here because
4305 * jump to the label just_a_word_zero can bypass their
4306 * initialization later. */
4307 I32 orig_keyword = 0;
4308 GV *gv = NULL;
4309 GV **gvp = NULL;
4310
4311 DEBUG_T( {
4312 SV* tmp = newSVpvs("");
4313 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4314 (IV)CopLINE(PL_curcop),
4315 lex_state_names[PL_lex_state],
4316 exp_name[PL_expect],
4317 pv_display(tmp, s, strlen(s), 0, 60));
4318 SvREFCNT_dec(tmp);
4319 } );
4320 /* check if there's an identifier for us to look at */
4321 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4322 return REPORT(S_pending_ident(aTHX));
4323
4324 /* no identifier pending identification */
4325
4326 switch (PL_lex_state) {
4327#ifdef COMMENTARY
4328 case LEX_NORMAL: /* Some compilers will produce faster */
4329 case LEX_INTERPNORMAL: /* code if we comment these out. */
4330 break;
4331#endif
4332
4333 /* when we've already built the next token, just pull it out of the queue */
4334 case LEX_KNOWNEXT:
4335#ifdef PERL_MAD
4336 PL_lasttoke--;
4337 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4338 if (PL_madskills) {
4339 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4340 PL_nexttoke[PL_lasttoke].next_mad = 0;
4341 if (PL_thismad && PL_thismad->mad_key == '_') {
4342 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4343 PL_thismad->mad_val = 0;
4344 mad_free(PL_thismad);
4345 PL_thismad = 0;
4346 }
4347 }
4348 if (!PL_lasttoke) {
4349 PL_lex_state = PL_lex_defer;
4350 PL_expect = PL_lex_expect;
4351 PL_lex_defer = LEX_NORMAL;
4352 if (!PL_nexttoke[PL_lasttoke].next_type)
4353 return yylex();
4354 }
4355#else
4356 PL_nexttoke--;
4357 pl_yylval = PL_nextval[PL_nexttoke];
4358 if (!PL_nexttoke) {
4359 PL_lex_state = PL_lex_defer;
4360 PL_expect = PL_lex_expect;
4361 PL_lex_defer = LEX_NORMAL;
4362 }
4363#endif
4364 {
4365 I32 next_type;
4366#ifdef PERL_MAD
4367 next_type = PL_nexttoke[PL_lasttoke].next_type;
4368#else
4369 next_type = PL_nexttype[PL_nexttoke];
4370#endif
4371 if (next_type & (7<<24)) {
4372 if (next_type & (1<<24)) {
4373 if (PL_lex_brackets > 100)
4374 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4375 PL_lex_brackstack[PL_lex_brackets++] =
4376 (char) ((next_type >> 16) & 0xff);
4377 }
4378 if (next_type & (2<<24))
4379 PL_lex_allbrackets++;
4380 if (next_type & (4<<24))
4381 PL_lex_allbrackets--;
4382 next_type &= 0xffff;
4383 }
4384#ifdef PERL_MAD
4385 /* FIXME - can these be merged? */
4386 return next_type;
4387#else
4388 return REPORT(next_type);
4389#endif
4390 }
4391
4392 /* interpolated case modifiers like \L \U, including \Q and \E.
4393 when we get here, PL_bufptr is at the \
4394 */
4395 case LEX_INTERPCASEMOD:
4396#ifdef DEBUGGING
4397 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4398 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4399#endif
4400 /* handle \E or end of string */
4401 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4402 /* if at a \E */
4403 if (PL_lex_casemods) {
4404 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4405 PL_lex_casestack[PL_lex_casemods] = '\0';
4406
4407 if (PL_bufptr != PL_bufend
4408 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4409 PL_bufptr += 2;
4410 PL_lex_state = LEX_INTERPCONCAT;
4411#ifdef PERL_MAD
4412 if (PL_madskills)
4413 PL_thistoken = newSVpvs("\\E");
4414#endif
4415 }
4416 PL_lex_allbrackets--;
4417 return REPORT(')');
4418 }
4419#ifdef PERL_MAD
4420 while (PL_bufptr != PL_bufend &&
4421 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4422 if (!PL_thiswhite)
4423 PL_thiswhite = newSVpvs("");
4424 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4425 PL_bufptr += 2;
4426 }
4427#else
4428 if (PL_bufptr != PL_bufend)
4429 PL_bufptr += 2;
4430#endif
4431 PL_lex_state = LEX_INTERPCONCAT;
4432 return yylex();
4433 }
4434 else {
4435 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4436 "### Saw case modifier\n"); });
4437 s = PL_bufptr + 1;
4438 if (s[1] == '\\' && s[2] == 'E') {
4439#ifdef PERL_MAD
4440 if (!PL_thiswhite)
4441 PL_thiswhite = newSVpvs("");
4442 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4443#endif
4444 PL_bufptr = s + 3;
4445 PL_lex_state = LEX_INTERPCONCAT;
4446 return yylex();
4447 }
4448 else {
4449 I32 tmp;
4450 if (!PL_madskills) /* when just compiling don't need correct */
4451 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4452 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4453 if ((*s == 'L' || *s == 'U') &&
4454 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4455 PL_lex_casestack[--PL_lex_casemods] = '\0';
4456 PL_lex_allbrackets--;
4457 return REPORT(')');
4458 }
4459 if (PL_lex_casemods > 10)
4460 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4461 PL_lex_casestack[PL_lex_casemods++] = *s;
4462 PL_lex_casestack[PL_lex_casemods] = '\0';
4463 PL_lex_state = LEX_INTERPCONCAT;
4464 start_force(PL_curforce);
4465 NEXTVAL_NEXTTOKE.ival = 0;
4466 force_next((2<<24)|'(');
4467 start_force(PL_curforce);
4468 if (*s == 'l')
4469 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4470 else if (*s == 'u')
4471 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4472 else if (*s == 'L')
4473 NEXTVAL_NEXTTOKE.ival = OP_LC;
4474 else if (*s == 'U')
4475 NEXTVAL_NEXTTOKE.ival = OP_UC;
4476 else if (*s == 'Q')
4477 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4478 else
4479 Perl_croak(aTHX_ "panic: yylex");
4480 if (PL_madskills) {
4481 SV* const tmpsv = newSVpvs("\\ ");
4482 /* replace the space with the character we want to escape
4483 */
4484 SvPVX(tmpsv)[1] = *s;
4485 curmad('_', tmpsv);
4486 }
4487 PL_bufptr = s + 1;
4488 }
4489 force_next(FUNC);
4490 if (PL_lex_starts) {
4491 s = PL_bufptr;
4492 PL_lex_starts = 0;
4493#ifdef PERL_MAD
4494 if (PL_madskills) {
4495 if (PL_thistoken)
4496 sv_free(PL_thistoken);
4497 PL_thistoken = newSVpvs("");
4498 }
4499#endif
4500 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4501 if (PL_lex_casemods == 1 && PL_lex_inpat)
4502 OPERATOR(',');
4503 else
4504 Aop(OP_CONCAT);
4505 }
4506 else
4507 return yylex();
4508 }
4509
4510 case LEX_INTERPPUSH:
4511 return REPORT(sublex_push());
4512
4513 case LEX_INTERPSTART:
4514 if (PL_bufptr == PL_bufend)
4515 return REPORT(sublex_done());
4516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4517 "### Interpolated variable\n"); });
4518 PL_expect = XTERM;
4519 PL_lex_dojoin = (*PL_bufptr == '@');
4520 PL_lex_state = LEX_INTERPNORMAL;
4521 if (PL_lex_dojoin) {
4522 start_force(PL_curforce);
4523 NEXTVAL_NEXTTOKE.ival = 0;
4524 force_next(',');
4525 start_force(PL_curforce);
4526 force_ident("\"", '$');
4527 start_force(PL_curforce);
4528 NEXTVAL_NEXTTOKE.ival = 0;
4529 force_next('$');
4530 start_force(PL_curforce);
4531 NEXTVAL_NEXTTOKE.ival = 0;
4532 force_next((2<<24)|'(');
4533 start_force(PL_curforce);
4534 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4535 force_next(FUNC);
4536 }
4537 if (PL_lex_starts++) {
4538 s = PL_bufptr;
4539#ifdef PERL_MAD
4540 if (PL_madskills) {
4541 if (PL_thistoken)
4542 sv_free(PL_thistoken);
4543 PL_thistoken = newSVpvs("");
4544 }
4545#endif
4546 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4547 if (!PL_lex_casemods && PL_lex_inpat)
4548 OPERATOR(',');
4549 else
4550 Aop(OP_CONCAT);
4551 }
4552 return yylex();
4553
4554 case LEX_INTERPENDMAYBE:
4555 if (intuit_more(PL_bufptr)) {
4556 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4557 break;
4558 }
4559 /* FALL THROUGH */
4560
4561 case LEX_INTERPEND:
4562 if (PL_lex_dojoin) {
4563 PL_lex_dojoin = FALSE;
4564 PL_lex_state = LEX_INTERPCONCAT;
4565#ifdef PERL_MAD
4566 if (PL_madskills) {
4567 if (PL_thistoken)
4568 sv_free(PL_thistoken);
4569 PL_thistoken = newSVpvs("");
4570 }
4571#endif
4572 PL_lex_allbrackets--;
4573 return REPORT(')');
4574 }
4575 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4576 && SvEVALED(PL_lex_repl))
4577 {
4578 if (PL_bufptr != PL_bufend)
4579 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4580 PL_lex_repl = NULL;
4581 }
4582 /* FALLTHROUGH */
4583 case LEX_INTERPCONCAT:
4584#ifdef DEBUGGING
4585 if (PL_lex_brackets)
4586 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4587#endif
4588 if (PL_bufptr == PL_bufend)
4589 return REPORT(sublex_done());
4590
4591 if (SvIVX(PL_linestr) == '\'') {
4592 SV *sv = newSVsv(PL_linestr);
4593 if (!PL_lex_inpat)
4594 sv = tokeq(sv);
4595 else if ( PL_hints & HINT_NEW_RE )
4596 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4597 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4598 s = PL_bufend;
4599 }
4600 else {
4601 s = scan_const(PL_bufptr);
4602 if (*s == '\\')
4603 PL_lex_state = LEX_INTERPCASEMOD;
4604 else
4605 PL_lex_state = LEX_INTERPSTART;
4606 }
4607
4608 if (s != PL_bufptr) {
4609 start_force(PL_curforce);
4610 if (PL_madskills) {
4611 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4612 }
4613 NEXTVAL_NEXTTOKE = pl_yylval;
4614 PL_expect = XTERM;
4615 force_next(THING);
4616 if (PL_lex_starts++) {
4617#ifdef PERL_MAD
4618 if (PL_madskills) {
4619 if (PL_thistoken)
4620 sv_free(PL_thistoken);
4621 PL_thistoken = newSVpvs("");
4622 }
4623#endif
4624 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4625 if (!PL_lex_casemods && PL_lex_inpat)
4626 OPERATOR(',');
4627 else
4628 Aop(OP_CONCAT);
4629 }
4630 else {
4631 PL_bufptr = s;
4632 return yylex();
4633 }
4634 }
4635
4636 return yylex();
4637 case LEX_FORMLINE:
4638 PL_lex_state = LEX_NORMAL;
4639 s = scan_formline(PL_bufptr);
4640 if (!PL_lex_formbrack)
4641 goto rightbracket;
4642 OPERATOR(';');
4643 }
4644
4645 s = PL_bufptr;
4646 PL_oldoldbufptr = PL_oldbufptr;
4647 PL_oldbufptr = s;
4648
4649 retry:
4650#ifdef PERL_MAD
4651 if (PL_thistoken) {
4652 sv_free(PL_thistoken);
4653 PL_thistoken = 0;
4654 }
4655 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4656#endif
4657 switch (*s) {
4658 default:
4659 if (isIDFIRST_lazy_if(s,UTF))
4660 goto keylookup;
4661 {
4662 unsigned char c = *s;
4663 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4664 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4665 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4666 } else {
4667 d = PL_linestart;
4668 }
4669 *s = '\0';
4670 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4671 }
4672 case 4:
4673 case 26:
4674 goto fake_eof; /* emulate EOF on ^D or ^Z */
4675 case 0:
4676#ifdef PERL_MAD
4677 if (PL_madskills)
4678 PL_faketokens = 0;
4679#endif
4680 if (!PL_rsfp) {
4681 PL_last_uni = 0;
4682 PL_last_lop = 0;
4683 if (PL_lex_brackets &&
4684 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4685 yyerror((const char *)
4686 (PL_lex_formbrack
4687 ? "Format not terminated"
4688 : "Missing right curly or square bracket"));
4689 }
4690 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4691 "### Tokener got EOF\n");
4692 } );
4693 TOKEN(0);
4694 }
4695 if (s++ < PL_bufend)
4696 goto retry; /* ignore stray nulls */
4697 PL_last_uni = 0;
4698 PL_last_lop = 0;
4699 if (!PL_in_eval && !PL_preambled) {
4700 PL_preambled = TRUE;
4701#ifdef PERL_MAD
4702 if (PL_madskills)
4703 PL_faketokens = 1;
4704#endif
4705 if (PL_perldb) {
4706 /* Generate a string of Perl code to load the debugger.
4707 * If PERL5DB is set, it will return the contents of that,
4708 * otherwise a compile-time require of perl5db.pl. */
4709
4710 const char * const pdb = PerlEnv_getenv("PERL5DB");
4711
4712 if (pdb) {
4713 sv_setpv(PL_linestr, pdb);
4714 sv_catpvs(PL_linestr,";");
4715 } else {
4716 SETERRNO(0,SS_NORMAL);
4717 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4718 }
4719 } else
4720 sv_setpvs(PL_linestr,"");
4721 if (PL_preambleav) {
4722 SV **svp = AvARRAY(PL_preambleav);
4723 SV **const end = svp + AvFILLp(PL_preambleav);
4724 while(svp <= end) {
4725 sv_catsv(PL_linestr, *svp);
4726 ++svp;
4727 sv_catpvs(PL_linestr, ";");
4728 }
4729 sv_free(MUTABLE_SV(PL_preambleav));
4730 PL_preambleav = NULL;
4731 }
4732 if (PL_minus_E)
4733 sv_catpvs(PL_linestr,
4734 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4735 if (PL_minus_n || PL_minus_p) {
4736 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4737 if (PL_minus_l)
4738 sv_catpvs(PL_linestr,"chomp;");
4739 if (PL_minus_a) {
4740 if (PL_minus_F) {
4741 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4742 || *PL_splitstr == '"')
4743 && strchr(PL_splitstr + 1, *PL_splitstr))
4744 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4745 else {
4746 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4747 bytes can be used as quoting characters. :-) */
4748 const char *splits = PL_splitstr;
4749 sv_catpvs(PL_linestr, "our @F=split(q\0");
4750 do {
4751 /* Need to \ \s */
4752 if (*splits == '\\')
4753 sv_catpvn(PL_linestr, splits, 1);
4754 sv_catpvn(PL_linestr, splits, 1);
4755 } while (*splits++);
4756 /* This loop will embed the trailing NUL of
4757 PL_linestr as the last thing it does before
4758 terminating. */
4759 sv_catpvs(PL_linestr, ");");
4760 }
4761 }
4762 else
4763 sv_catpvs(PL_linestr,"our @F=split(' ');");
4764 }
4765 }
4766 sv_catpvs(PL_linestr, "\n");
4767 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4768 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4769 PL_last_lop = PL_last_uni = NULL;
4770 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4771 update_debugger_info(PL_linestr, NULL, 0);
4772 goto retry;
4773 }
4774 do {
4775 fake_eof = 0;
4776 bof = PL_rsfp ? TRUE : FALSE;
4777 if (0) {
4778 fake_eof:
4779 fake_eof = LEX_FAKE_EOF;
4780 }
4781 PL_bufptr = PL_bufend;
4782 CopLINE_inc(PL_curcop);
4783 if (!lex_next_chunk(fake_eof)) {
4784 CopLINE_dec(PL_curcop);
4785 s = PL_bufptr;
4786 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4787 }
4788 CopLINE_dec(PL_curcop);
4789#ifdef PERL_MAD
4790 if (!PL_rsfp)
4791 PL_realtokenstart = -1;
4792#endif
4793 s = PL_bufptr;
4794 /* If it looks like the start of a BOM or raw UTF-16,
4795 * check if it in fact is. */
4796 if (bof && PL_rsfp &&
4797 (*s == 0 ||
4798 *(U8*)s == 0xEF ||
4799 *(U8*)s >= 0xFE ||
4800 s[1] == 0)) {
4801 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4802 bof = (offset == (Off_t)SvCUR(PL_linestr));
4803#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4804 /* offset may include swallowed CR */
4805 if (!bof)
4806 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4807#endif
4808 if (bof) {
4809 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4810 s = swallow_bom((U8*)s);
4811 }
4812 }
4813 if (PL_parser->in_pod) {
4814 /* Incest with pod. */
4815#ifdef PERL_MAD
4816 if (PL_madskills)
4817 sv_catsv(PL_thiswhite, PL_linestr);
4818#endif
4819 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4820 sv_setpvs(PL_linestr, "");
4821 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4822 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4823 PL_last_lop = PL_last_uni = NULL;
4824 PL_parser->in_pod = 0;
4825 }
4826 }
4827 if (PL_rsfp)
4828 incline(s);
4829 } while (PL_parser->in_pod);
4830 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4832 PL_last_lop = PL_last_uni = NULL;
4833 if (CopLINE(PL_curcop) == 1) {
4834 while (s < PL_bufend && isSPACE(*s))
4835 s++;
4836 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4837 s++;
4838#ifdef PERL_MAD
4839 if (PL_madskills)
4840 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4841#endif
4842 d = NULL;
4843 if (!PL_in_eval) {
4844 if (*s == '#' && *(s+1) == '!')
4845 d = s + 2;
4846#ifdef ALTERNATE_SHEBANG
4847 else {
4848 static char const as[] = ALTERNATE_SHEBANG;
4849 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4850 d = s + (sizeof(as) - 1);
4851 }
4852#endif /* ALTERNATE_SHEBANG */
4853 }
4854 if (d) {
4855 char *ipath;
4856 char *ipathend;
4857
4858 while (isSPACE(*d))
4859 d++;
4860 ipath = d;
4861 while (*d && !isSPACE(*d))
4862 d++;
4863 ipathend = d;
4864
4865#ifdef ARG_ZERO_IS_SCRIPT
4866 if (ipathend > ipath) {
4867 /*
4868 * HP-UX (at least) sets argv[0] to the script name,
4869 * which makes $^X incorrect. And Digital UNIX and Linux,
4870 * at least, set argv[0] to the basename of the Perl
4871 * interpreter. So, having found "#!", we'll set it right.
4872 */
4873 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4874 SVt_PV)); /* $^X */
4875 assert(SvPOK(x) || SvGMAGICAL(x));
4876 if (sv_eq(x, CopFILESV(PL_curcop))) {
4877 sv_setpvn(x, ipath, ipathend - ipath);
4878 SvSETMAGIC(x);
4879 }
4880 else {
4881 STRLEN blen;
4882 STRLEN llen;
4883 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4884 const char * const lstart = SvPV_const(x,llen);
4885 if (llen < blen) {
4886 bstart += blen - llen;
4887 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4888 sv_setpvn(x, ipath, ipathend - ipath);
4889 SvSETMAGIC(x);
4890 }
4891 }
4892 }
4893 TAINT_NOT; /* $^X is always tainted, but that's OK */
4894 }
4895#endif /* ARG_ZERO_IS_SCRIPT */
4896
4897 /*
4898 * Look for options.
4899 */
4900 d = instr(s,"perl -");
4901 if (!d) {
4902 d = instr(s,"perl");
4903#if defined(DOSISH)
4904 /* avoid getting into infinite loops when shebang
4905 * line contains "Perl" rather than "perl" */
4906 if (!d) {
4907 for (d = ipathend-4; d >= ipath; --d) {
4908 if ((*d == 'p' || *d == 'P')
4909 && !ibcmp(d, "perl", 4))
4910 {
4911 break;
4912 }
4913 }
4914 if (d < ipath)
4915 d = NULL;
4916 }
4917#endif
4918 }
4919#ifdef ALTERNATE_SHEBANG
4920 /*
4921 * If the ALTERNATE_SHEBANG on this system starts with a
4922 * character that can be part of a Perl expression, then if
4923 * we see it but not "perl", we're probably looking at the
4924 * start of Perl code, not a request to hand off to some
4925 * other interpreter. Similarly, if "perl" is there, but
4926 * not in the first 'word' of the line, we assume the line
4927 * contains the start of the Perl program.
4928 */
4929 if (d && *s != '#') {
4930 const char *c = ipath;
4931 while (*c && !strchr("; \t\r\n\f\v#", *c))
4932 c++;
4933 if (c < d)
4934 d = NULL; /* "perl" not in first word; ignore */
4935 else
4936 *s = '#'; /* Don't try to parse shebang line */
4937 }
4938#endif /* ALTERNATE_SHEBANG */
4939 if (!d &&
4940 *s == '#' &&
4941 ipathend > ipath &&
4942 !PL_minus_c &&
4943 !instr(s,"indir") &&
4944 instr(PL_origargv[0],"perl"))
4945 {
4946 dVAR;
4947 char **newargv;
4948
4949 *ipathend = '\0';
4950 s = ipathend + 1;
4951 while (s < PL_bufend && isSPACE(*s))
4952 s++;
4953 if (s < PL_bufend) {
4954 Newx(newargv,PL_origargc+3,char*);
4955 newargv[1] = s;
4956 while (s < PL_bufend && !isSPACE(*s))
4957 s++;
4958 *s = '\0';
4959 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4960 }
4961 else
4962 newargv = PL_origargv;
4963 newargv[0] = ipath;
4964 PERL_FPU_PRE_EXEC
4965 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4966 PERL_FPU_POST_EXEC
4967 Perl_croak(aTHX_ "Can't exec %s", ipath);
4968 }
4969 if (d) {
4970 while (*d && !isSPACE(*d))
4971 d++;
4972 while (SPACE_OR_TAB(*d))
4973 d++;
4974
4975 if (*d++ == '-') {
4976 const bool switches_done = PL_doswitches;
4977 const U32 oldpdb = PL_perldb;
4978 const bool oldn = PL_minus_n;
4979 const bool oldp = PL_minus_p;
4980 const char *d1 = d;
4981
4982 do {
4983 bool baduni = FALSE;
4984 if (*d1 == 'C') {
4985 const char *d2 = d1 + 1;
4986 if (parse_unicode_opts((const char **)&d2)
4987 != PL_unicode)
4988 baduni = TRUE;
4989 }
4990 if (baduni || *d1 == 'M' || *d1 == 'm') {
4991 const char * const m = d1;
4992 while (*d1 && !isSPACE(*d1))
4993 d1++;
4994 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4995 (int)(d1 - m), m);
4996 }
4997 d1 = moreswitches(d1);
4998 } while (d1);
4999 if (PL_doswitches && !switches_done) {
5000 int argc = PL_origargc;
5001 char **argv = PL_origargv;
5002 do {
5003 argc--,argv++;
5004 } while (argc && argv[0][0] == '-' && argv[0][1]);
5005 init_argv_symbols(argc,argv);
5006 }
5007 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5008 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5009 /* if we have already added "LINE: while (<>) {",
5010 we must not do it again */
5011 {
5012 sv_setpvs(PL_linestr, "");
5013 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5014 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5015 PL_last_lop = PL_last_uni = NULL;
5016 PL_preambled = FALSE;
5017 if (PERLDB_LINE || PERLDB_SAVESRC)
5018 (void)gv_fetchfile(PL_origfilename);
5019 goto retry;
5020 }
5021 }
5022 }
5023 }
5024 }
5025 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5026 PL_bufptr = s;
5027 PL_lex_state = LEX_FORMLINE;
5028 return yylex();
5029 }
5030 goto retry;
5031 case '\r':
5032#ifdef PERL_STRICT_CR
5033 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5034 Perl_croak(aTHX_
5035 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5036#endif
5037 case ' ': case '\t': case '\f': case 013:
5038#ifdef PERL_MAD
5039 PL_realtokenstart = -1;
5040 if (!PL_thiswhite)
5041 PL_thiswhite = newSVpvs("");
5042 sv_catpvn(PL_thiswhite, s, 1);
5043#endif
5044 s++;
5045 goto retry;
5046 case '#':
5047 case '\n':
5048#ifdef PERL_MAD
5049 PL_realtokenstart = -1;
5050 if (PL_madskills)
5051 PL_faketokens = 0;
5052#endif
5053 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5054 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5055 /* handle eval qq[#line 1 "foo"\n ...] */
5056 CopLINE_dec(PL_curcop);
5057 incline(s);
5058 }
5059 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5060 s = SKIPSPACE0(s);
5061 if (!PL_in_eval || PL_rsfp)
5062 incline(s);
5063 }
5064 else {
5065 d = s;
5066 while (d < PL_bufend && *d != '\n')
5067 d++;
5068 if (d < PL_bufend)
5069 d++;
5070 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5071 Perl_croak(aTHX_ "panic: input overflow");
5072#ifdef PERL_MAD
5073 if (PL_madskills)
5074 PL_thiswhite = newSVpvn(s, d - s);
5075#endif
5076 s = d;
5077 incline(s);
5078 }
5079 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5080 PL_bufptr = s;
5081 PL_lex_state = LEX_FORMLINE;
5082 return yylex();
5083 }
5084 }
5085 else {
5086#ifdef PERL_MAD
5087 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5088 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5089 PL_faketokens = 0;
5090 s = SKIPSPACE0(s);
5091 TOKEN(PEG); /* make sure any #! line is accessible */
5092 }
5093 s = SKIPSPACE0(s);
5094 }
5095 else {
5096/* if (PL_madskills && PL_lex_formbrack) { */
5097 d = s;
5098 while (d < PL_bufend && *d != '\n')
5099 d++;
5100 if (d < PL_bufend)
5101 d++;
5102 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5103 Perl_croak(aTHX_ "panic: input overflow");
5104 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5105 if (!PL_thiswhite)
5106 PL_thiswhite = newSVpvs("");
5107 if (CopLINE(PL_curcop) == 1) {
5108 sv_setpvs(PL_thiswhite, "");
5109 PL_faketokens = 0;
5110 }
5111 sv_catpvn(PL_thiswhite, s, d - s);
5112 }
5113 s = d;
5114/* }
5115 *s = '\0';
5116 PL_bufend = s; */
5117 }
5118#else
5119 *s = '\0';
5120 PL_bufend = s;
5121#endif
5122 }
5123 goto retry;
5124 case '-':
5125 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5126 I32 ftst = 0;
5127 char tmp;
5128
5129 s++;
5130 PL_bufptr = s;
5131 tmp = *s++;
5132
5133 while (s < PL_bufend && SPACE_OR_TAB(*s))
5134 s++;
5135
5136 if (strnEQ(s,"=>",2)) {
5137 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5138 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5139 OPERATOR('-'); /* unary minus */
5140 }
5141 PL_last_uni = PL_oldbufptr;
5142 switch (tmp) {
5143 case 'r': ftst = OP_FTEREAD; break;
5144 case 'w': ftst = OP_FTEWRITE; break;
5145 case 'x': ftst = OP_FTEEXEC; break;
5146 case 'o': ftst = OP_FTEOWNED; break;
5147 case 'R': ftst = OP_FTRREAD; break;
5148 case 'W': ftst = OP_FTRWRITE; break;
5149 case 'X': ftst = OP_FTREXEC; break;
5150 case 'O': ftst = OP_FTROWNED; break;
5151 case 'e': ftst = OP_FTIS; break;
5152 case 'z': ftst = OP_FTZERO; break;
5153 case 's': ftst = OP_FTSIZE; break;
5154 case 'f': ftst = OP_FTFILE; break;
5155 case 'd': ftst = OP_FTDIR; break;
5156 case 'l': ftst = OP_FTLINK; break;
5157 case 'p': ftst = OP_FTPIPE; break;
5158 case 'S': ftst = OP_FTSOCK; break;
5159 case 'u': ftst = OP_FTSUID; break;
5160 case 'g': ftst = OP_FTSGID; break;
5161 case 'k': ftst = OP_FTSVTX; break;
5162 case 'b': ftst = OP_FTBLK; break;
5163 case 'c': ftst = OP_FTCHR; break;
5164 case 't': ftst = OP_FTTTY; break;
5165 case 'T': ftst = OP_FTTEXT; break;
5166 case 'B': ftst = OP_FTBINARY; break;
5167 case 'M': case 'A': case 'C':
5168 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5169 switch (tmp) {
5170 case 'M': ftst = OP_FTMTIME; break;
5171 case 'A': ftst = OP_FTATIME; break;
5172 case 'C': ftst = OP_FTCTIME; break;
5173 default: break;
5174 }
5175 break;
5176 default:
5177 break;
5178 }
5179 if (ftst) {
5180 PL_last_lop_op = (OPCODE)ftst;
5181 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5182 "### Saw file test %c\n", (int)tmp);
5183 } );
5184 FTST(ftst);
5185 }
5186 else {
5187 /* Assume it was a minus followed by a one-letter named
5188 * subroutine call (or a -bareword), then. */
5189 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5190 "### '-%c' looked like a file test but was not\n",
5191 (int) tmp);
5192 } );
5193 s = --PL_bufptr;
5194 }
5195 }
5196 {
5197 const char tmp = *s++;
5198 if (*s == tmp) {
5199 s++;
5200 if (PL_expect == XOPERATOR)
5201 TERM(POSTDEC);
5202 else
5203 OPERATOR(PREDEC);
5204 }
5205 else if (*s == '>') {
5206 s++;
5207 s = SKIPSPACE1(s);
5208 if (isIDFIRST_lazy_if(s,UTF)) {
5209 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5210 TOKEN(ARROW);
5211 }
5212 else if (*s == '$')
5213 OPERATOR(ARROW);
5214 else
5215 TERM(ARROW);
5216 }
5217 if (PL_expect == XOPERATOR) {
5218 if (*s == '=' && !PL_lex_allbrackets &&
5219 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5220 s--;
5221 TOKEN(0);
5222 }
5223 Aop(OP_SUBTRACT);
5224 }
5225 else {
5226 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5227 check_uni();
5228 OPERATOR('-'); /* unary minus */
5229 }
5230 }
5231
5232 case '+':
5233 {
5234 const char tmp = *s++;
5235 if (*s == tmp) {
5236 s++;
5237 if (PL_expect == XOPERATOR)
5238 TERM(POSTINC);
5239 else
5240 OPERATOR(PREINC);
5241 }
5242 if (PL_expect == XOPERATOR) {
5243 if (*s == '=' && !PL_lex_allbrackets &&
5244 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5245 s--;
5246 TOKEN(0);
5247 }
5248 Aop(OP_ADD);
5249 }
5250 else {
5251 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5252 check_uni();
5253 OPERATOR('+');
5254 }
5255 }
5256
5257 case '*':
5258 if (PL_expect != XOPERATOR) {
5259 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5260 PL_expect = XOPERATOR;
5261 force_ident(PL_tokenbuf, '*');
5262 if (!*PL_tokenbuf)
5263 PREREF('*');
5264 TERM('*');
5265 }
5266 s++;
5267 if (*s == '*') {
5268 s++;
5269 if (*s == '=' && !PL_lex_allbrackets &&
5270 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5271 s -= 2;
5272 TOKEN(0);
5273 }
5274 PWop(OP_POW);
5275 }
5276 if (*s == '=' && !PL_lex_allbrackets &&
5277 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5278 s--;
5279 TOKEN(0);
5280 }
5281 Mop(OP_MULTIPLY);
5282
5283 case '%':
5284 if (PL_expect == XOPERATOR) {
5285 if (s[1] == '=' && !PL_lex_allbrackets &&
5286 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5287 TOKEN(0);
5288 ++s;
5289 Mop(OP_MODULO);
5290 }
5291 PL_tokenbuf[0] = '%';
5292 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5293 sizeof PL_tokenbuf - 1, FALSE);
5294 if (!PL_tokenbuf[1]) {
5295 PREREF('%');
5296 }
5297 PL_pending_ident = '%';
5298 TERM('%');
5299
5300 case '^':
5301 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5302 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5303 TOKEN(0);
5304 s++;
5305 BOop(OP_BIT_XOR);
5306 case '[':
5307 if (PL_lex_brackets > 100)
5308 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5309 PL_lex_brackstack[PL_lex_brackets++] = 0;
5310 PL_lex_allbrackets++;
5311 {
5312 const char tmp = *s++;
5313 OPERATOR(tmp);
5314 }
5315 case '~':
5316 if (s[1] == '~'
5317 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5318 {
5319 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5320 TOKEN(0);
5321 s += 2;
5322 Eop(OP_SMARTMATCH);
5323 }
5324 s++;
5325 OPERATOR('~');
5326 case ',':
5327 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5328 TOKEN(0);
5329 s++;
5330 OPERATOR(',');
5331 case ':':
5332 if (s[1] == ':') {
5333 len = 0;
5334 goto just_a_word_zero_gv;
5335 }
5336 s++;
5337 switch (PL_expect) {
5338 OP *attrs;
5339#ifdef PERL_MAD
5340 I32 stuffstart;
5341#endif
5342 case XOPERATOR:
5343 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5344 break;
5345 PL_bufptr = s; /* update in case we back off */
5346 if (*s == '=') {
5347 Perl_croak(aTHX_
5348 "Use of := for an empty attribute list is not allowed");
5349 }
5350 goto grabattrs;
5351 case XATTRBLOCK:
5352 PL_expect = XBLOCK;
5353 goto grabattrs;
5354 case XATTRTERM:
5355 PL_expect = XTERMBLOCK;
5356 grabattrs:
5357#ifdef PERL_MAD
5358 stuffstart = s - SvPVX(PL_linestr) - 1;
5359#endif
5360 s = PEEKSPACE(s);
5361 attrs = NULL;
5362 while (isIDFIRST_lazy_if(s,UTF)) {
5363 I32 tmp;
5364 SV *sv;
5365 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5366 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5367 if (tmp < 0) tmp = -tmp;
5368 switch (tmp) {
5369 case KEY_or:
5370 case KEY_and:
5371 case KEY_for:
5372 case KEY_foreach:
5373 case KEY_unless:
5374 case KEY_if:
5375 case KEY_while:
5376 case KEY_until:
5377 goto got_attrs;
5378 default:
5379 break;
5380 }
5381 }
5382 sv = newSVpvn(s, len);
5383 if (*d == '(') {
5384 d = scan_str(d,TRUE,TRUE);
5385 if (!d) {
5386 /* MUST advance bufptr here to avoid bogus
5387 "at end of line" context messages from yyerror().
5388 */
5389 PL_bufptr = s + len;
5390 yyerror("Unterminated attribute parameter in attribute list");
5391 if (attrs)
5392 op_free(attrs);
5393 sv_free(sv);
5394 return REPORT(0); /* EOF indicator */
5395 }
5396 }
5397 if (PL_lex_stuff) {
5398 sv_catsv(sv, PL_lex_stuff);
5399 attrs = op_append_elem(OP_LIST, attrs,
5400 newSVOP(OP_CONST, 0, sv));
5401 SvREFCNT_dec(PL_lex_stuff);
5402 PL_lex_stuff = NULL;
5403 }
5404 else {
5405 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5406 sv_free(sv);
5407 if (PL_in_my == KEY_our) {
5408 deprecate(":unique");
5409 }
5410 else
5411 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5412 }
5413
5414 /* NOTE: any CV attrs applied here need to be part of
5415 the CVf_BUILTIN_ATTRS define in cv.h! */
5416 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5417 sv_free(sv);
5418 CvLVALUE_on(PL_compcv);
5419 }
5420 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5421 sv_free(sv);
5422 deprecate(":locked");
5423 }
5424 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5425 sv_free(sv);
5426 CvMETHOD_on(PL_compcv);
5427 }
5428 /* After we've set the flags, it could be argued that
5429 we don't need to do the attributes.pm-based setting
5430 process, and shouldn't bother appending recognized
5431 flags. To experiment with that, uncomment the
5432 following "else". (Note that's already been
5433 uncommented. That keeps the above-applied built-in
5434 attributes from being intercepted (and possibly
5435 rejected) by a package's attribute routines, but is
5436 justified by the performance win for the common case
5437 of applying only built-in attributes.) */
5438 else
5439 attrs = op_append_elem(OP_LIST, attrs,
5440 newSVOP(OP_CONST, 0,
5441 sv));
5442 }
5443 s = PEEKSPACE(d);
5444 if (*s == ':' && s[1] != ':')
5445 s = PEEKSPACE(s+1);
5446 else if (s == d)
5447 break; /* require real whitespace or :'s */
5448 /* XXX losing whitespace on sequential attributes here */
5449 }
5450 {
5451 const char tmp
5452 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5453 if (*s != ';' && *s != '}' && *s != tmp
5454 && (tmp != '=' || *s != ')')) {
5455 const char q = ((*s == '\'') ? '"' : '\'');
5456 /* If here for an expression, and parsed no attrs, back
5457 off. */
5458 if (tmp == '=' && !attrs) {
5459 s = PL_bufptr;
5460 break;
5461 }
5462 /* MUST advance bufptr here to avoid bogus "at end of line"
5463 context messages from yyerror().
5464 */
5465 PL_bufptr = s;
5466 yyerror( (const char *)
5467 (*s
5468 ? Perl_form(aTHX_ "Invalid separator character "
5469 "%c%c%c in attribute list", q, *s, q)
5470 : "Unterminated attribute list" ) );
5471 if (attrs)
5472 op_free(attrs);
5473 OPERATOR(':');
5474 }
5475 }
5476 got_attrs:
5477 if (attrs) {
5478 start_force(PL_curforce);
5479 NEXTVAL_NEXTTOKE.opval = attrs;
5480 CURMAD('_', PL_nextwhite);
5481 force_next(THING);
5482 }
5483#ifdef PERL_MAD
5484 if (PL_madskills) {
5485 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5486 (s - SvPVX(PL_linestr)) - stuffstart);
5487 }
5488#endif
5489 TOKEN(COLONATTR);
5490 }
5491 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5492 s--;
5493 TOKEN(0);
5494 }
5495 PL_lex_allbrackets--;
5496 OPERATOR(':');
5497 case '(':
5498 s++;
5499 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5500 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5501 else
5502 PL_expect = XTERM;
5503 s = SKIPSPACE1(s);
5504 PL_lex_allbrackets++;
5505 TOKEN('(');
5506 case ';':
5507 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5508 TOKEN(0);
5509 CLINE;
5510 s++;
5511 OPERATOR(';');
5512 case ')':
5513 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5514 TOKEN(0);
5515 s++;
5516 PL_lex_allbrackets--;
5517 s = SKIPSPACE1(s);
5518 if (*s == '{')
5519 PREBLOCK(')');
5520 TERM(')');
5521 case ']':
5522 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5523 TOKEN(0);
5524 s++;
5525 if (PL_lex_brackets <= 0)
5526 yyerror("Unmatched right square bracket");
5527 else
5528 --PL_lex_brackets;
5529 PL_lex_allbrackets--;
5530 if (PL_lex_state == LEX_INTERPNORMAL) {
5531 if (PL_lex_brackets == 0) {
5532 if (*s == '-' && s[1] == '>')
5533 PL_lex_state = LEX_INTERPENDMAYBE;
5534 else if (*s != '[' && *s != '{')
5535 PL_lex_state = LEX_INTERPEND;
5536 }
5537 }
5538 TERM(']');
5539 case '{':
5540 leftbracket:
5541 s++;
5542 if (PL_lex_brackets > 100) {
5543 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5544 }
5545 switch (PL_expect) {
5546 case XTERM:
5547 if (PL_lex_formbrack) {
5548 s--;
5549 PRETERMBLOCK(DO);
5550 }
5551 if (PL_oldoldbufptr == PL_last_lop)
5552 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5553 else
5554 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5555 PL_lex_allbrackets++;
5556 OPERATOR(HASHBRACK);
5557 case XOPERATOR:
5558 while (s < PL_bufend && SPACE_OR_TAB(*s))
5559 s++;
5560 d = s;
5561 PL_tokenbuf[0] = '\0';
5562 if (d < PL_bufend && *d == '-') {
5563 PL_tokenbuf[0] = '-';
5564 d++;
5565 while (d < PL_bufend && SPACE_OR_TAB(*d))
5566 d++;
5567 }
5568 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5569 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5570 FALSE, &len);
5571 while (d < PL_bufend && SPACE_OR_TAB(*d))
5572 d++;
5573 if (*d == '}') {
5574 const char minus = (PL_tokenbuf[0] == '-');
5575 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5576 if (minus)
5577 force_next('-');
5578 }
5579 }
5580 /* FALL THROUGH */
5581 case XATTRBLOCK:
5582 case XBLOCK:
5583 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5584 PL_lex_allbrackets++;
5585 PL_expect = XSTATE;
5586 break;
5587 case XATTRTERM:
5588 case XTERMBLOCK:
5589 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5590 PL_lex_allbrackets++;
5591 PL_expect = XSTATE;
5592 break;
5593 default: {
5594 const char *t;
5595 if (PL_oldoldbufptr == PL_last_lop)
5596 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5597 else
5598 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5599 PL_lex_allbrackets++;
5600 s = SKIPSPACE1(s);
5601 if (*s == '}') {
5602 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5603 PL_expect = XTERM;
5604 /* This hack is to get the ${} in the message. */
5605 PL_bufptr = s+1;
5606 yyerror("syntax error");
5607 break;
5608 }
5609 OPERATOR(HASHBRACK);
5610 }
5611 /* This hack serves to disambiguate a pair of curlies
5612 * as being a block or an anon hash. Normally, expectation
5613 * determines that, but in cases where we're not in a
5614 * position to expect anything in particular (like inside
5615 * eval"") we have to resolve the ambiguity. This code
5616 * covers the case where the first term in the curlies is a
5617 * quoted string. Most other cases need to be explicitly
5618 * disambiguated by prepending a "+" before the opening
5619 * curly in order to force resolution as an anon hash.
5620 *
5621 * XXX should probably propagate the outer expectation
5622 * into eval"" to rely less on this hack, but that could
5623 * potentially break current behavior of eval"".
5624 * GSAR 97-07-21
5625 */
5626 t = s;
5627 if (*s == '\'' || *s == '"' || *s == '`') {
5628 /* common case: get past first string, handling escapes */
5629 for (t++; t < PL_bufend && *t != *s;)
5630 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5631 t++;
5632 t++;
5633 }
5634 else if (*s == 'q') {
5635 if (++t < PL_bufend
5636 && (!isALNUM(*t)
5637 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5638 && !isALNUM(*t))))
5639 {
5640 /* skip q//-like construct */
5641 const char *tmps;
5642 char open, close, term;
5643 I32 brackets = 1;
5644
5645 while (t < PL_bufend && isSPACE(*t))
5646 t++;
5647 /* check for q => */
5648 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5649 OPERATOR(HASHBRACK);
5650 }
5651 term = *t;
5652 open = term;
5653 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5654 term = tmps[5];
5655 close = term;
5656 if (open == close)
5657 for (t++; t < PL_bufend; t++) {
5658 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5659 t++;
5660 else if (*t == open)
5661 break;
5662 }
5663 else {
5664 for (t++; t < PL_bufend; t++) {
5665 if (*t == '\\' && t+1 < PL_bufend)
5666 t++;
5667 else if (*t == close && --brackets <= 0)
5668 break;
5669 else if (*t == open)
5670 brackets++;
5671 }
5672 }
5673 t++;
5674 }
5675 else
5676 /* skip plain q word */
5677 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5678 t += UTF8SKIP(t);
5679 }
5680 else if (isALNUM_lazy_if(t,UTF)) {
5681 t += UTF8SKIP(t);
5682 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5683 t += UTF8SKIP(t);
5684 }
5685 while (t < PL_bufend && isSPACE(*t))
5686 t++;
5687 /* if comma follows first term, call it an anon hash */
5688 /* XXX it could be a comma expression with loop modifiers */
5689 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5690 || (*t == '=' && t[1] == '>')))
5691 OPERATOR(HASHBRACK);
5692 if (PL_expect == XREF)
5693 PL_expect = XTERM;
5694 else {
5695 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5696 PL_expect = XSTATE;
5697 }
5698 }
5699 break;
5700 }
5701 pl_yylval.ival = CopLINE(PL_curcop);
5702 if (isSPACE(*s) || *s == '#')
5703 PL_copline = NOLINE; /* invalidate current command line number */
5704 TOKEN('{');
5705 case '}':
5706 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5707 TOKEN(0);
5708 rightbracket:
5709 s++;
5710 if (PL_lex_brackets <= 0)
5711 yyerror("Unmatched right curly bracket");
5712 else
5713 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5714 PL_lex_allbrackets--;
5715 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5716 PL_lex_formbrack = 0;
5717 if (PL_lex_state == LEX_INTERPNORMAL) {
5718 if (PL_lex_brackets == 0) {
5719 if (PL_expect & XFAKEBRACK) {
5720 PL_expect &= XENUMMASK;
5721 PL_lex_state = LEX_INTERPEND;
5722 PL_bufptr = s;
5723#if 0
5724 if (PL_madskills) {
5725 if (!PL_thiswhite)
5726 PL_thiswhite = newSVpvs("");
5727 sv_catpvs(PL_thiswhite,"}");
5728 }
5729#endif
5730 return yylex(); /* ignore fake brackets */
5731 }
5732 if (*s == '-' && s[1] == '>')
5733 PL_lex_state = LEX_INTERPENDMAYBE;
5734 else if (*s != '[' && *s != '{')
5735 PL_lex_state = LEX_INTERPEND;
5736 }
5737 }
5738 if (PL_expect & XFAKEBRACK) {
5739 PL_expect &= XENUMMASK;
5740 PL_bufptr = s;
5741 return yylex(); /* ignore fake brackets */
5742 }
5743 start_force(PL_curforce);
5744 if (PL_madskills) {
5745 curmad('X', newSVpvn(s-1,1));
5746 CURMAD('_', PL_thiswhite);
5747 }
5748 force_next('}');
5749#ifdef PERL_MAD
5750 if (!PL_thistoken)
5751 PL_thistoken = newSVpvs("");
5752#endif
5753 TOKEN(';');
5754 case '&':
5755 s++;
5756 if (*s++ == '&') {
5757 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5758 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5759 s -= 2;
5760 TOKEN(0);
5761 }
5762 AOPERATOR(ANDAND);
5763 }
5764 s--;
5765 if (PL_expect == XOPERATOR) {
5766 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5767 && isIDFIRST_lazy_if(s,UTF))
5768 {
5769 CopLINE_dec(PL_curcop);
5770 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5771 CopLINE_inc(PL_curcop);
5772 }
5773 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5774 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5775 s--;
5776 TOKEN(0);
5777 }
5778 BAop(OP_BIT_AND);
5779 }
5780
5781 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5782 if (*PL_tokenbuf) {
5783 PL_expect = XOPERATOR;
5784 force_ident(PL_tokenbuf, '&');
5785 }
5786 else
5787 PREREF('&');
5788 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5789 TERM('&');
5790
5791 case '|':
5792 s++;
5793 if (*s++ == '|') {
5794 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5795 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5796 s -= 2;
5797 TOKEN(0);
5798 }
5799 AOPERATOR(OROR);
5800 }
5801 s--;
5802 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5803 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5804 s--;
5805 TOKEN(0);
5806 }
5807 BOop(OP_BIT_OR);
5808 case '=':
5809 s++;
5810 {
5811 const char tmp = *s++;
5812 if (tmp == '=') {
5813 if (!PL_lex_allbrackets &&
5814 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5815 s -= 2;
5816 TOKEN(0);
5817 }
5818 Eop(OP_EQ);
5819 }
5820 if (tmp == '>') {
5821 if (!PL_lex_allbrackets &&
5822 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5823 s -= 2;
5824 TOKEN(0);
5825 }
5826 OPERATOR(',');
5827 }
5828 if (tmp == '~')
5829 PMop(OP_MATCH);
5830 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5831 && strchr("+-*/%.^&|<",tmp))
5832 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5833 "Reversed %c= operator",(int)tmp);
5834 s--;
5835 if (PL_expect == XSTATE && isALPHA(tmp) &&
5836 (s == PL_linestart+1 || s[-2] == '\n') )
5837 {
5838 if (PL_in_eval && !PL_rsfp) {
5839 d = PL_bufend;
5840 while (s < d) {
5841 if (*s++ == '\n') {
5842 incline(s);
5843 if (strnEQ(s,"=cut",4)) {
5844 s = strchr(s,'\n');
5845 if (s)
5846 s++;
5847 else
5848 s = d;
5849 incline(s);
5850 goto retry;
5851 }
5852 }
5853 }
5854 goto retry;
5855 }
5856#ifdef PERL_MAD
5857 if (PL_madskills) {
5858 if (!PL_thiswhite)
5859 PL_thiswhite = newSVpvs("");
5860 sv_catpvn(PL_thiswhite, PL_linestart,
5861 PL_bufend - PL_linestart);
5862 }
5863#endif
5864 s = PL_bufend;
5865 PL_parser->in_pod = 1;
5866 goto retry;
5867 }
5868 }
5869 if (PL_lex_brackets < PL_lex_formbrack) {
5870 const char *t = s;
5871#ifdef PERL_STRICT_CR
5872 while (SPACE_OR_TAB(*t))
5873#else
5874 while (SPACE_OR_TAB(*t) || *t == '\r')
5875#endif
5876 t++;
5877 if (*t == '\n' || *t == '#') {
5878 s--;
5879 PL_expect = XBLOCK;
5880 goto leftbracket;
5881 }
5882 }
5883 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5884 s--;
5885 TOKEN(0);
5886 }
5887 pl_yylval.ival = 0;
5888 OPERATOR(ASSIGNOP);
5889 case '!':
5890 s++;
5891 {
5892 const char tmp = *s++;
5893 if (tmp == '=') {
5894 /* was this !=~ where !~ was meant?
5895 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5896
5897 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5898 const char *t = s+1;
5899
5900 while (t < PL_bufend && isSPACE(*t))
5901 ++t;
5902
5903 if (*t == '/' || *t == '?' ||
5904 ((*t == 'm' || *t == 's' || *t == 'y')
5905 && !isALNUM(t[1])) ||
5906 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5907 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5908 "!=~ should be !~");
5909 }
5910 if (!PL_lex_allbrackets &&
5911 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5912 s -= 2;
5913 TOKEN(0);
5914 }
5915 Eop(OP_NE);
5916 }
5917 if (tmp == '~')
5918 PMop(OP_NOT);
5919 }
5920 s--;
5921 OPERATOR('!');
5922 case '<':
5923 if (PL_expect != XOPERATOR) {
5924 if (s[1] != '<' && !strchr(s,'>'))
5925 check_uni();
5926 if (s[1] == '<')
5927 s = scan_heredoc(s);
5928 else
5929 s = scan_inputsymbol(s);
5930 TERM(sublex_start());
5931 }
5932 s++;
5933 {
5934 char tmp = *s++;
5935 if (tmp == '<') {
5936 if (*s == '=' && !PL_lex_allbrackets &&
5937 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5938 s -= 2;
5939 TOKEN(0);
5940 }
5941 SHop(OP_LEFT_SHIFT);
5942 }
5943 if (tmp == '=') {
5944 tmp = *s++;
5945 if (tmp == '>') {
5946 if (!PL_lex_allbrackets &&
5947 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5948 s -= 3;
5949 TOKEN(0);
5950 }
5951 Eop(OP_NCMP);
5952 }
5953 s--;
5954 if (!PL_lex_allbrackets &&
5955 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5956 s -= 2;
5957 TOKEN(0);
5958 }
5959 Rop(OP_LE);
5960 }
5961 }
5962 s--;
5963 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964 s--;
5965 TOKEN(0);
5966 }
5967 Rop(OP_LT);
5968 case '>':
5969 s++;
5970 {
5971 const char tmp = *s++;
5972 if (tmp == '>') {
5973 if (*s == '=' && !PL_lex_allbrackets &&
5974 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5975 s -= 2;
5976 TOKEN(0);
5977 }
5978 SHop(OP_RIGHT_SHIFT);
5979 }
5980 else if (tmp == '=') {
5981 if (!PL_lex_allbrackets &&
5982 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5983 s -= 2;
5984 TOKEN(0);
5985 }
5986 Rop(OP_GE);
5987 }
5988 }
5989 s--;
5990 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5991 s--;
5992 TOKEN(0);
5993 }
5994 Rop(OP_GT);
5995
5996 case '$':
5997 CLINE;
5998
5999 if (PL_expect == XOPERATOR) {
6000 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6001 return deprecate_commaless_var_list();
6002 }
6003 }
6004
6005 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6006 PL_tokenbuf[0] = '@';
6007 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6008 sizeof PL_tokenbuf - 1, FALSE);
6009 if (PL_expect == XOPERATOR)
6010 no_op("Array length", s);
6011 if (!PL_tokenbuf[1])
6012 PREREF(DOLSHARP);
6013 PL_expect = XOPERATOR;
6014 PL_pending_ident = '#';
6015 TOKEN(DOLSHARP);
6016 }
6017
6018 PL_tokenbuf[0] = '$';
6019 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6020 sizeof PL_tokenbuf - 1, FALSE);
6021 if (PL_expect == XOPERATOR)
6022 no_op("Scalar", s);
6023 if (!PL_tokenbuf[1]) {
6024 if (s == PL_bufend)
6025 yyerror("Final $ should be \\$ or $name");
6026 PREREF('$');
6027 }
6028
6029 d = s;
6030 {
6031 const char tmp = *s;
6032 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6033 s = SKIPSPACE1(s);
6034
6035 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6036 && intuit_more(s)) {
6037 if (*s == '[') {
6038 PL_tokenbuf[0] = '@';
6039 if (ckWARN(WARN_SYNTAX)) {
6040 char *t = s+1;
6041
6042 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6043 t++;
6044 if (*t++ == ',') {
6045 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6046 while (t < PL_bufend && *t != ']')
6047 t++;
6048 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6049 "Multidimensional syntax %.*s not supported",
6050 (int)((t - PL_bufptr) + 1), PL_bufptr);
6051 }
6052 }
6053 }
6054 else if (*s == '{') {
6055 char *t;
6056 PL_tokenbuf[0] = '%';
6057 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6058 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6059 {
6060 char tmpbuf[sizeof PL_tokenbuf];
6061 do {
6062 t++;
6063 } while (isSPACE(*t));
6064 if (isIDFIRST_lazy_if(t,UTF)) {
6065 STRLEN len;
6066 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6067 &len);
6068 while (isSPACE(*t))
6069 t++;
6070 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6071 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6072 "You need to quote \"%s\"",
6073 tmpbuf);
6074 }
6075 }
6076 }
6077 }
6078
6079 PL_expect = XOPERATOR;
6080 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6081 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6082 if (!islop || PL_last_lop_op == OP_GREPSTART)
6083 PL_expect = XOPERATOR;
6084 else if (strchr("$@\"'`q", *s))
6085 PL_expect = XTERM; /* e.g. print $fh "foo" */
6086 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6087 PL_expect = XTERM; /* e.g. print $fh &sub */
6088 else if (isIDFIRST_lazy_if(s,UTF)) {
6089 char tmpbuf[sizeof PL_tokenbuf];
6090 int t2;
6091 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6092 if ((t2 = keyword(tmpbuf, len, 0))) {
6093 /* binary operators exclude handle interpretations */
6094 switch (t2) {
6095 case -KEY_x:
6096 case -KEY_eq:
6097 case -KEY_ne:
6098 case -KEY_gt:
6099 case -KEY_lt:
6100 case -KEY_ge:
6101 case -KEY_le:
6102 case -KEY_cmp:
6103 break;
6104 default:
6105 PL_expect = XTERM; /* e.g. print $fh length() */
6106 break;
6107 }
6108 }
6109 else {
6110 PL_expect = XTERM; /* e.g. print $fh subr() */
6111 }
6112 }
6113 else if (isDIGIT(*s))
6114 PL_expect = XTERM; /* e.g. print $fh 3 */
6115 else if (*s == '.' && isDIGIT(s[1]))
6116 PL_expect = XTERM; /* e.g. print $fh .3 */
6117 else if ((*s == '?' || *s == '-' || *s == '+')
6118 && !isSPACE(s[1]) && s[1] != '=')
6119 PL_expect = XTERM; /* e.g. print $fh -1 */
6120 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6121 && s[1] != '/')
6122 PL_expect = XTERM; /* e.g. print $fh /.../
6123 XXX except DORDOR operator
6124 */
6125 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6126 && s[2] != '=')
6127 PL_expect = XTERM; /* print $fh <<"EOF" */
6128 }
6129 }
6130 PL_pending_ident = '$';
6131 TOKEN('$');
6132
6133 case '@':
6134 if (PL_expect == XOPERATOR)
6135 no_op("Array", s);
6136 PL_tokenbuf[0] = '@';
6137 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6138 if (!PL_tokenbuf[1]) {
6139 PREREF('@');
6140 }
6141 if (PL_lex_state == LEX_NORMAL)
6142 s = SKIPSPACE1(s);
6143 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6144 if (*s == '{')
6145 PL_tokenbuf[0] = '%';
6146
6147 /* Warn about @ where they meant $. */
6148 if (*s == '[' || *s == '{') {
6149 if (ckWARN(WARN_SYNTAX)) {
6150 const char *t = s + 1;
6151 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6152 t++;
6153 if (*t == '}' || *t == ']') {
6154 t++;
6155 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6156 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6157 "Scalar value %.*s better written as $%.*s",
6158 (int)(t-PL_bufptr), PL_bufptr,
6159 (int)(t-PL_bufptr-1), PL_bufptr+1);
6160 }
6161 }
6162 }
6163 }
6164 PL_pending_ident = '@';
6165 TERM('@');
6166
6167 case '/': /* may be division, defined-or, or pattern */
6168 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6169 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6170 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6171 TOKEN(0);
6172 s += 2;
6173 AOPERATOR(DORDOR);
6174 }
6175 case '?': /* may either be conditional or pattern */
6176 if (PL_expect == XOPERATOR) {
6177 char tmp = *s++;
6178 if(tmp == '?') {
6179 if (!PL_lex_allbrackets &&
6180 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6181 s--;
6182 TOKEN(0);
6183 }
6184 PL_lex_allbrackets++;
6185 OPERATOR('?');
6186 }
6187 else {
6188 tmp = *s++;
6189 if(tmp == '/') {
6190 /* A // operator. */
6191 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6192 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6193 LEX_FAKEEOF_LOGIC)) {
6194 s -= 2;
6195 TOKEN(0);
6196 }
6197 AOPERATOR(DORDOR);
6198 }
6199 else {
6200 s--;
6201 if (*s == '=' && !PL_lex_allbrackets &&
6202 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6203 s--;
6204 TOKEN(0);
6205 }
6206 Mop(OP_DIVIDE);
6207 }
6208 }
6209 }
6210 else {
6211 /* Disable warning on "study /blah/" */
6212 if (PL_oldoldbufptr == PL_last_uni
6213 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6214 || memNE(PL_last_uni, "study", 5)
6215 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6216 ))
6217 check_uni();
6218 if (*s == '?')
6219 deprecate("?PATTERN? without explicit operator");
6220 s = scan_pat(s,OP_MATCH);
6221 TERM(sublex_start());
6222 }
6223
6224 case '.':
6225 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6226#ifdef PERL_STRICT_CR
6227 && s[1] == '\n'
6228#else
6229 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6230#endif
6231 && (s == PL_linestart || s[-1] == '\n') )
6232 {
6233 PL_lex_formbrack = 0;
6234 PL_expect = XSTATE;
6235 goto rightbracket;
6236 }
6237 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6238 s += 3;
6239 OPERATOR(YADAYADA);
6240 }
6241 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6242 char tmp = *s++;
6243 if (*s == tmp) {
6244 if (!PL_lex_allbrackets &&
6245 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6246 s--;
6247 TOKEN(0);
6248 }
6249 s++;
6250 if (*s == tmp) {
6251 s++;
6252 pl_yylval.ival = OPf_SPECIAL;
6253 }
6254 else
6255 pl_yylval.ival = 0;
6256 OPERATOR(DOTDOT);
6257 }
6258 if (*s == '=' && !PL_lex_allbrackets &&
6259 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6260 s--;
6261 TOKEN(0);
6262 }
6263 Aop(OP_CONCAT);
6264 }
6265 /* FALL THROUGH */
6266 case '0': case '1': case '2': case '3': case '4':
6267 case '5': case '6': case '7': case '8': case '9':
6268 s = scan_num(s, &pl_yylval);
6269 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6270 if (PL_expect == XOPERATOR)
6271 no_op("Number",s);
6272 TERM(THING);
6273
6274 case '\'':
6275 s = scan_str(s,!!PL_madskills,FALSE);
6276 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6277 if (PL_expect == XOPERATOR) {
6278 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6279 return deprecate_commaless_var_list();
6280 }
6281 else
6282 no_op("String",s);
6283 }
6284 if (!s)
6285 missingterm(NULL);
6286 pl_yylval.ival = OP_CONST;
6287 TERM(sublex_start());
6288
6289 case '"':
6290 s = scan_str(s,!!PL_madskills,FALSE);
6291 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6292 if (PL_expect == XOPERATOR) {
6293 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6294 return deprecate_commaless_var_list();
6295 }
6296 else
6297 no_op("String",s);
6298 }
6299 if (!s)
6300 missingterm(NULL);
6301 pl_yylval.ival = OP_CONST;
6302 /* FIXME. I think that this can be const if char *d is replaced by
6303 more localised variables. */
6304 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6305 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6306 pl_yylval.ival = OP_STRINGIFY;
6307 break;
6308 }
6309 }
6310 TERM(sublex_start());
6311
6312 case '`':
6313 s = scan_str(s,!!PL_madskills,FALSE);
6314 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6315 if (PL_expect == XOPERATOR)
6316 no_op("Backticks",s);
6317 if (!s)
6318 missingterm(NULL);
6319 readpipe_override();
6320 TERM(sublex_start());
6321
6322 case '\\':
6323 s++;
6324 if (PL_lex_inwhat && isDIGIT(*s))
6325 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6326 *s, *s);
6327 if (PL_expect == XOPERATOR)
6328 no_op("Backslash",s);
6329 OPERATOR(REFGEN);
6330
6331 case 'v':
6332 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6333 char *start = s + 2;
6334 while (isDIGIT(*start) || *start == '_')
6335 start++;
6336 if (*start == '.' && isDIGIT(start[1])) {
6337 s = scan_num(s, &pl_yylval);
6338 TERM(THING);
6339 }
6340 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6341 else if (!isALPHA(*start) && (PL_expect == XTERM
6342 || PL_expect == XREF || PL_expect == XSTATE
6343 || PL_expect == XTERMORDORDOR)) {
6344 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6345 if (!gv) {
6346 s = scan_num(s, &pl_yylval);
6347 TERM(THING);
6348 }
6349 }
6350 }
6351 goto keylookup;
6352 case 'x':
6353 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6354 s++;
6355 Mop(OP_REPEAT);
6356 }
6357 goto keylookup;
6358
6359 case '_':
6360 case 'a': case 'A':
6361 case 'b': case 'B':
6362 case 'c': case 'C':
6363 case 'd': case 'D':
6364 case 'e': case 'E':
6365 case 'f': case 'F':
6366 case 'g': case 'G':
6367 case 'h': case 'H':
6368 case 'i': case 'I':
6369 case 'j': case 'J':
6370 case 'k': case 'K':
6371 case 'l': case 'L':
6372 case 'm': case 'M':
6373 case 'n': case 'N':
6374 case 'o': case 'O':
6375 case 'p': case 'P':
6376 case 'q': case 'Q':
6377 case 'r': case 'R':
6378 case 's': case 'S':
6379 case 't': case 'T':
6380 case 'u': case 'U':
6381 case 'V':
6382 case 'w': case 'W':
6383 case 'X':
6384 case 'y': case 'Y':
6385 case 'z': case 'Z':
6386
6387 keylookup: {
6388 bool anydelim;
6389 I32 tmp;
6390
6391 orig_keyword = 0;
6392 gv = NULL;
6393 gvp = NULL;
6394
6395 PL_bufptr = s;
6396 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6397
6398 /* Some keywords can be followed by any delimiter, including ':' */
6399 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6400
6401 /* x::* is just a word, unless x is "CORE" */
6402 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6403 goto just_a_word;
6404
6405 d = s;
6406 while (d < PL_bufend && isSPACE(*d))
6407 d++; /* no comments skipped here, or s### is misparsed */
6408
6409 /* Is this a word before a => operator? */
6410 if (*d == '=' && d[1] == '>') {
6411 CLINE;
6412 pl_yylval.opval
6413 = (OP*)newSVOP(OP_CONST, 0,
6414 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6415 pl_yylval.opval->op_private = OPpCONST_BARE;
6416 TERM(WORD);
6417 }
6418
6419 /* Check for plugged-in keyword */
6420 {
6421 OP *o;
6422 int result;
6423 char *saved_bufptr = PL_bufptr;
6424 PL_bufptr = s;
6425 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6426 s = PL_bufptr;
6427 if (result == KEYWORD_PLUGIN_DECLINE) {
6428 /* not a plugged-in keyword */
6429 PL_bufptr = saved_bufptr;
6430 } else if (result == KEYWORD_PLUGIN_STMT) {
6431 pl_yylval.opval = o;
6432 CLINE;
6433 PL_expect = XSTATE;
6434 return REPORT(PLUGSTMT);
6435 } else if (result == KEYWORD_PLUGIN_EXPR) {
6436 pl_yylval.opval = o;
6437 CLINE;
6438 PL_expect = XOPERATOR;
6439 return REPORT(PLUGEXPR);
6440 } else {
6441 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6442 PL_tokenbuf);
6443 }
6444 }
6445
6446 /* Check for built-in keyword */
6447 tmp = keyword(PL_tokenbuf, len, 0);
6448
6449 /* Is this a label? */
6450 if (!anydelim && PL_expect == XSTATE
6451 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6452 s = d + 1;
6453 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6454 CLINE;
6455 TOKEN(LABEL);
6456 }
6457
6458 if (tmp < 0) { /* second-class keyword? */
6459 GV *ogv = NULL; /* override (winner) */
6460 GV *hgv = NULL; /* hidden (loser) */
6461 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6462 CV *cv;
6463 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6464 (cv = GvCVu(gv)))
6465 {
6466 if (GvIMPORTED_CV(gv))
6467 ogv = gv;
6468 else if (! CvMETHOD(cv))
6469 hgv = gv;
6470 }
6471 if (!ogv &&
6472 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6473 (gv = *gvp) && isGV_with_GP(gv) &&
6474 GvCVu(gv) && GvIMPORTED_CV(gv))
6475 {
6476 ogv = gv;
6477 }
6478 }
6479 if (ogv) {
6480 orig_keyword = tmp;
6481 tmp = 0; /* overridden by import or by GLOBAL */
6482 }
6483 else if (gv && !gvp
6484 && -tmp==KEY_lock /* XXX generalizable kludge */
6485 && GvCVu(gv))
6486 {
6487 tmp = 0; /* any sub overrides "weak" keyword */
6488 }
6489 else { /* no override */
6490 tmp = -tmp;
6491 if (tmp == KEY_dump) {
6492 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6493 "dump() better written as CORE::dump()");
6494 }
6495 gv = NULL;
6496 gvp = 0;
6497 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6498 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6499 "Ambiguous call resolved as CORE::%s(), "
6500 "qualify as such or use &",
6501 GvENAME(hgv));
6502 }
6503 }
6504
6505 reserved_word:
6506 switch (tmp) {
6507
6508 default: /* not a keyword */
6509 /* Trade off - by using this evil construction we can pull the
6510 variable gv into the block labelled keylookup. If not, then
6511 we have to give it function scope so that the goto from the
6512 earlier ':' case doesn't bypass the initialisation. */
6513 if (0) {
6514 just_a_word_zero_gv:
6515 gv = NULL;
6516 gvp = NULL;
6517 orig_keyword = 0;
6518 }
6519 just_a_word: {
6520 SV *sv;
6521 int pkgname = 0;
6522 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6523 OP *rv2cv_op;
6524 CV *cv;
6525#ifdef PERL_MAD
6526 SV *nextPL_nextwhite = 0;
6527#endif
6528
6529
6530 /* Get the rest if it looks like a package qualifier */
6531
6532 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6533 STRLEN morelen;
6534 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6535 TRUE, &morelen);
6536 if (!morelen)
6537 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6538 *s == '\'' ? "'" : "::");
6539 len += morelen;
6540 pkgname = 1;
6541 }
6542
6543 if (PL_expect == XOPERATOR) {
6544 if (PL_bufptr == PL_linestart) {
6545 CopLINE_dec(PL_curcop);
6546 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6547 CopLINE_inc(PL_curcop);
6548 }
6549 else
6550 no_op("Bareword",s);
6551 }
6552
6553 /* Look for a subroutine with this name in current package,
6554 unless name is "Foo::", in which case Foo is a bareword
6555 (and a package name). */
6556
6557 if (len > 2 && !PL_madskills &&
6558 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6559 {
6560 if (ckWARN(WARN_BAREWORD)
6561 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6562 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6563 "Bareword \"%s\" refers to nonexistent package",
6564 PL_tokenbuf);
6565 len -= 2;
6566 PL_tokenbuf[len] = '\0';
6567 gv = NULL;
6568 gvp = 0;
6569 }
6570 else {
6571 if (!gv) {
6572 /* Mustn't actually add anything to a symbol table.
6573 But also don't want to "initialise" any placeholder
6574 constants that might already be there into full
6575 blown PVGVs with attached PVCV. */
6576 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6577 GV_NOADD_NOINIT, SVt_PVCV);
6578 }
6579 len = 0;
6580 }
6581
6582 /* if we saw a global override before, get the right name */
6583
6584 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6585 len ? len : strlen(PL_tokenbuf));
6586 if (gvp) {
6587 SV * const tmp_sv = sv;
6588 sv = newSVpvs("CORE::GLOBAL::");
6589 sv_catsv(sv, tmp_sv);
6590 SvREFCNT_dec(tmp_sv);
6591 }
6592
6593#ifdef PERL_MAD
6594 if (PL_madskills && !PL_thistoken) {
6595 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6596 PL_thistoken = newSVpvn(start,s - start);
6597 PL_realtokenstart = s - SvPVX(PL_linestr);
6598 }
6599#endif
6600
6601 /* Presume this is going to be a bareword of some sort. */
6602 CLINE;
6603 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6604 pl_yylval.opval->op_private = OPpCONST_BARE;
6605
6606 /* And if "Foo::", then that's what it certainly is. */
6607 if (len)
6608 goto safe_bareword;
6609
6610 {
6611 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6612 const_op->op_private = OPpCONST_BARE;
6613 rv2cv_op = newCVREF(0, const_op);
6614 }
6615 cv = rv2cv_op_cv(rv2cv_op, 0);
6616
6617 /* See if it's the indirect object for a list operator. */
6618
6619 if (PL_oldoldbufptr &&
6620 PL_oldoldbufptr < PL_bufptr &&
6621 (PL_oldoldbufptr == PL_last_lop
6622 || PL_oldoldbufptr == PL_last_uni) &&
6623 /* NO SKIPSPACE BEFORE HERE! */
6624 (PL_expect == XREF ||
6625 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6626 {
6627 bool immediate_paren = *s == '(';
6628
6629 /* (Now we can afford to cross potential line boundary.) */
6630 s = SKIPSPACE2(s,nextPL_nextwhite);
6631#ifdef PERL_MAD
6632 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6633#endif
6634
6635 /* Two barewords in a row may indicate method call. */
6636
6637 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6638 (tmp = intuit_method(s, gv, cv))) {
6639 op_free(rv2cv_op);
6640 if (tmp == METHOD && !PL_lex_allbrackets &&
6641 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6642 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6643 return REPORT(tmp);
6644 }
6645
6646 /* If not a declared subroutine, it's an indirect object. */
6647 /* (But it's an indir obj regardless for sort.) */
6648 /* Also, if "_" follows a filetest operator, it's a bareword */
6649
6650 if (
6651 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6652 (!cv &&
6653 (PL_last_lop_op != OP_MAPSTART &&
6654 PL_last_lop_op != OP_GREPSTART))))
6655 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6656 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6657 )
6658 {
6659 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6660 goto bareword;
6661 }
6662 }
6663
6664 PL_expect = XOPERATOR;
6665#ifdef PERL_MAD
6666 if (isSPACE(*s))
6667 s = SKIPSPACE2(s,nextPL_nextwhite);
6668 PL_nextwhite = nextPL_nextwhite;
6669#else
6670 s = skipspace(s);
6671#endif
6672
6673 /* Is this a word before a => operator? */
6674 if (*s == '=' && s[1] == '>' && !pkgname) {
6675 op_free(rv2cv_op);
6676 CLINE;
6677 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6678 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6679 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6680 TERM(WORD);
6681 }
6682
6683 /* If followed by a paren, it's certainly a subroutine. */
6684 if (*s == '(') {
6685 CLINE;
6686 if (cv) {
6687 d = s + 1;
6688 while (SPACE_OR_TAB(*d))
6689 d++;
6690 if (*d == ')' && (sv = cv_const_sv(cv))) {
6691 s = d + 1;
6692 goto its_constant;
6693 }
6694 }
6695#ifdef PERL_MAD
6696 if (PL_madskills) {
6697 PL_nextwhite = PL_thiswhite;
6698 PL_thiswhite = 0;
6699 }
6700 start_force(PL_curforce);
6701#endif
6702 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6703 PL_expect = XOPERATOR;
6704#ifdef PERL_MAD
6705 if (PL_madskills) {
6706 PL_nextwhite = nextPL_nextwhite;
6707 curmad('X', PL_thistoken);
6708 PL_thistoken = newSVpvs("");
6709 }
6710#endif
6711 op_free(rv2cv_op);
6712 force_next(WORD);
6713 pl_yylval.ival = 0;
6714 TOKEN('&');
6715 }
6716
6717 /* If followed by var or block, call it a method (unless sub) */
6718
6719 if ((*s == '$' || *s == '{') && !cv) {
6720 op_free(rv2cv_op);
6721 PL_last_lop = PL_oldbufptr;
6722 PL_last_lop_op = OP_METHOD;
6723 if (!PL_lex_allbrackets &&
6724 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6725 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6726 PREBLOCK(METHOD);
6727 }
6728
6729 /* If followed by a bareword, see if it looks like indir obj. */
6730
6731 if (!orig_keyword
6732 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6733 && (tmp = intuit_method(s, gv, cv))) {
6734 op_free(rv2cv_op);
6735 if (tmp == METHOD && !PL_lex_allbrackets &&
6736 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6737 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6738 return REPORT(tmp);
6739 }
6740
6741 /* Not a method, so call it a subroutine (if defined) */
6742
6743 if (cv) {
6744 if (lastchar == '-')
6745 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6746 "Ambiguous use of -%s resolved as -&%s()",
6747 PL_tokenbuf, PL_tokenbuf);
6748 /* Check for a constant sub */
6749 if ((sv = cv_const_sv(cv))) {
6750 its_constant:
6751 op_free(rv2cv_op);
6752 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6753 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6754 pl_yylval.opval->op_private = 0;
6755 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6756 TOKEN(WORD);
6757 }
6758
6759 op_free(pl_yylval.opval);
6760 pl_yylval.opval = rv2cv_op;
6761 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6762 PL_last_lop = PL_oldbufptr;
6763 PL_last_lop_op = OP_ENTERSUB;
6764 /* Is there a prototype? */
6765 if (
6766#ifdef PERL_MAD
6767 cv &&
6768#endif
6769 SvPOK(cv))
6770 {
6771 STRLEN protolen;
6772 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6773 if (!protolen)
6774 TERM(FUNC0SUB);
6775 while (*proto == ';')
6776 proto++;
6777 if (
6778 (
6779 (
6780 *proto == '$' || *proto == '_'
6781 || *proto == '*' || *proto == '+'
6782 )
6783 && proto[1] == '\0'
6784 )
6785 || (
6786 *proto == '\\' && proto[1] && proto[2] == '\0'
6787 )
6788 )
6789 OPERATOR(UNIOPSUB);
6790 if (*proto == '\\' && proto[1] == '[') {
6791 const char *p = proto + 2;
6792 while(*p && *p != ']')
6793 ++p;
6794 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6795 }
6796 if (*proto == '&' && *s == '{') {
6797 if (PL_curstash)
6798 sv_setpvs(PL_subname, "__ANON__");
6799 else
6800 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6801 if (!PL_lex_allbrackets &&
6802 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6803 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6804 PREBLOCK(LSTOPSUB);
6805 }
6806 }
6807#ifdef PERL_MAD
6808 {
6809 if (PL_madskills) {
6810 PL_nextwhite = PL_thiswhite;
6811 PL_thiswhite = 0;
6812 }
6813 start_force(PL_curforce);
6814 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6815 PL_expect = XTERM;
6816 if (PL_madskills) {
6817 PL_nextwhite = nextPL_nextwhite;
6818 curmad('X', PL_thistoken);
6819 PL_thistoken = newSVpvs("");
6820 }
6821 force_next(WORD);
6822 if (!PL_lex_allbrackets &&
6823 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6824 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6825 TOKEN(NOAMP);
6826 }
6827 }
6828
6829 /* Guess harder when madskills require "best effort". */
6830 if (PL_madskills && (!gv || !GvCVu(gv))) {
6831 int probable_sub = 0;
6832 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6833 probable_sub = 1;
6834 else if (isALPHA(*s)) {
6835 char tmpbuf[1024];
6836 STRLEN tmplen;
6837 d = s;
6838 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6839 if (!keyword(tmpbuf, tmplen, 0))
6840 probable_sub = 1;
6841 else {
6842 while (d < PL_bufend && isSPACE(*d))
6843 d++;
6844 if (*d == '=' && d[1] == '>')
6845 probable_sub = 1;
6846 }
6847 }
6848 if (probable_sub) {
6849 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6850 op_free(pl_yylval.opval);
6851 pl_yylval.opval = rv2cv_op;
6852 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6853 PL_last_lop = PL_oldbufptr;
6854 PL_last_lop_op = OP_ENTERSUB;
6855 PL_nextwhite = PL_thiswhite;
6856 PL_thiswhite = 0;
6857 start_force(PL_curforce);
6858 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6859 PL_expect = XTERM;
6860 PL_nextwhite = nextPL_nextwhite;
6861 curmad('X', PL_thistoken);
6862 PL_thistoken = newSVpvs("");
6863 force_next(WORD);
6864 if (!PL_lex_allbrackets &&
6865 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6866 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6867 TOKEN(NOAMP);
6868 }
6869#else
6870 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6871 PL_expect = XTERM;
6872 force_next(WORD);
6873 if (!PL_lex_allbrackets &&
6874 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6875 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6876 TOKEN(NOAMP);
6877#endif
6878 }
6879
6880 /* Call it a bare word */
6881
6882 if (PL_hints & HINT_STRICT_SUBS)
6883 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6884 else {
6885 bareword:
6886 /* after "print" and similar functions (corresponding to
6887 * "F? L" in opcode.pl), whatever wasn't already parsed as
6888 * a filehandle should be subject to "strict subs".
6889 * Likewise for the optional indirect-object argument to system
6890 * or exec, which can't be a bareword */
6891 if ((PL_last_lop_op == OP_PRINT
6892 || PL_last_lop_op == OP_PRTF
6893 || PL_last_lop_op == OP_SAY
6894 || PL_last_lop_op == OP_SYSTEM
6895 || PL_last_lop_op == OP_EXEC)
6896 && (PL_hints & HINT_STRICT_SUBS))
6897 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6898 if (lastchar != '-') {
6899 if (ckWARN(WARN_RESERVED)) {
6900 d = PL_tokenbuf;
6901 while (isLOWER(*d))
6902 d++;
6903 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6904 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6905 PL_tokenbuf);
6906 }
6907 }
6908 }
6909 op_free(rv2cv_op);
6910
6911 safe_bareword:
6912 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6913 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6914 "Operator or semicolon missing before %c%s",
6915 lastchar, PL_tokenbuf);
6916 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6917 "Ambiguous use of %c resolved as operator %c",
6918 lastchar, lastchar);
6919 }
6920 TOKEN(WORD);
6921 }
6922
6923 case KEY___FILE__:
6924 FUN0OP(
6925 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6926 );
6927
6928 case KEY___LINE__:
6929 FUN0OP(
6930 (OP*)newSVOP(OP_CONST, 0,
6931 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6932 );
6933
6934 case KEY___PACKAGE__:
6935 FUN0OP(
6936 (OP*)newSVOP(OP_CONST, 0,
6937 (PL_curstash
6938 ? newSVhek(HvNAME_HEK(PL_curstash))
6939 : &PL_sv_undef))
6940 );
6941
6942 case KEY___DATA__:
6943 case KEY___END__: {
6944 GV *gv;
6945 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6946 const char *pname = "main";
6947 if (PL_tokenbuf[2] == 'D')
6948 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6949 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6950 SVt_PVIO);
6951 GvMULTI_on(gv);
6952 if (!GvIO(gv))
6953 GvIOp(gv) = newIO();
6954 IoIFP(GvIOp(gv)) = PL_rsfp;
6955#if defined(HAS_FCNTL) && defined(F_SETFD)
6956 {
6957 const int fd = PerlIO_fileno(PL_rsfp);
6958 fcntl(fd,F_SETFD,fd >= 3);
6959 }
6960#endif
6961 /* Mark this internal pseudo-handle as clean */
6962 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6963 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6964 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6965 else
6966 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6967#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968 /* if the script was opened in binmode, we need to revert
6969 * it to text mode for compatibility; but only iff it has CRs
6970 * XXX this is a questionable hack at best. */
6971 if (PL_bufend-PL_bufptr > 2
6972 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6973 {
6974 Off_t loc = 0;
6975 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6976 loc = PerlIO_tell(PL_rsfp);
6977 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978 }
6979#ifdef NETWARE
6980 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981#else
6982 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6983#endif /* NETWARE */
6984 if (loc > 0)
6985 PerlIO_seek(PL_rsfp, loc, 0);
6986 }
6987 }
6988#endif
6989#ifdef PERLIO_LAYERS
6990 if (!IN_BYTES) {
6991 if (UTF)
6992 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6993 else if (PL_encoding) {
6994 SV *name;
6995 dSP;
6996 ENTER;
6997 SAVETMPS;
6998 PUSHMARK(sp);
6999 EXTEND(SP, 1);
7000 XPUSHs(PL_encoding);
7001 PUTBACK;
7002 call_method("name", G_SCALAR);
7003 SPAGAIN;
7004 name = POPs;
7005 PUTBACK;
7006 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7007 Perl_form(aTHX_ ":encoding(%"SVf")",
7008 SVfARG(name)));
7009 FREETMPS;
7010 LEAVE;
7011 }
7012 }
7013#endif
7014#ifdef PERL_MAD
7015 if (PL_madskills) {
7016 if (PL_realtokenstart >= 0) {
7017 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7018 if (!PL_endwhite)
7019 PL_endwhite = newSVpvs("");
7020 sv_catsv(PL_endwhite, PL_thiswhite);
7021 PL_thiswhite = 0;
7022 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7023 PL_realtokenstart = -1;
7024 }
7025 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7026 != NULL) ;
7027 }
7028#endif
7029 PL_rsfp = NULL;
7030 }
7031 goto fake_eof;
7032 }
7033
7034 case KEY_AUTOLOAD:
7035 case KEY_DESTROY:
7036 case KEY_BEGIN:
7037 case KEY_UNITCHECK:
7038 case KEY_CHECK:
7039 case KEY_INIT:
7040 case KEY_END:
7041 if (PL_expect == XSTATE) {
7042 s = PL_bufptr;
7043 goto really_sub;
7044 }
7045 goto just_a_word;
7046
7047 case KEY_CORE:
7048 if (*s == ':' && s[1] == ':') {
7049 s += 2;
7050 d = s;
7051 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7052 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7053 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7054 if (tmp < 0)
7055 tmp = -tmp;
7056 else if (tmp == KEY_require || tmp == KEY_do)
7057 /* that's a way to remember we saw "CORE::" */
7058 orig_keyword = tmp;
7059 goto reserved_word;
7060 }
7061 goto just_a_word;
7062
7063 case KEY_abs:
7064 UNI(OP_ABS);
7065
7066 case KEY_alarm:
7067 UNI(OP_ALARM);
7068
7069 case KEY_accept:
7070 LOP(OP_ACCEPT,XTERM);
7071
7072 case KEY_and:
7073 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7074 return REPORT(0);
7075 OPERATOR(ANDOP);
7076
7077 case KEY_atan2:
7078 LOP(OP_ATAN2,XTERM);
7079
7080 case KEY_bind:
7081 LOP(OP_BIND,XTERM);
7082
7083 case KEY_binmode:
7084 LOP(OP_BINMODE,XTERM);
7085
7086 case KEY_bless:
7087 LOP(OP_BLESS,XTERM);
7088
7089 case KEY_break:
7090 FUN0(OP_BREAK);
7091
7092 case KEY_chop:
7093 UNI(OP_CHOP);
7094
7095 case KEY_continue:
7096 /* We have to disambiguate the two senses of
7097 "continue". If the next token is a '{' then
7098 treat it as the start of a continue block;
7099 otherwise treat it as a control operator.
7100 */
7101 s = skipspace(s);
7102 if (*s == '{')
7103 PREBLOCK(CONTINUE);
7104 else
7105 FUN0(OP_CONTINUE);
7106
7107 case KEY_chdir:
7108 /* may use HOME */
7109 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7110 UNI(OP_CHDIR);
7111
7112 case KEY_close:
7113 UNI(OP_CLOSE);
7114
7115 case KEY_closedir:
7116 UNI(OP_CLOSEDIR);
7117
7118 case KEY_cmp:
7119 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7120 return REPORT(0);
7121 Eop(OP_SCMP);
7122
7123 case KEY_caller:
7124 UNI(OP_CALLER);
7125
7126 case KEY_crypt:
7127#ifdef FCRYPT
7128 if (!PL_cryptseen) {
7129 PL_cryptseen = TRUE;
7130 init_des();
7131 }
7132#endif
7133 LOP(OP_CRYPT,XTERM);
7134
7135 case KEY_chmod:
7136 LOP(OP_CHMOD,XTERM);
7137
7138 case KEY_chown:
7139 LOP(OP_CHOWN,XTERM);
7140
7141 case KEY_connect:
7142 LOP(OP_CONNECT,XTERM);
7143
7144 case KEY_chr:
7145 UNI(OP_CHR);
7146
7147 case KEY_cos:
7148 UNI(OP_COS);
7149
7150 case KEY_chroot:
7151 UNI(OP_CHROOT);
7152
7153 case KEY_default:
7154 PREBLOCK(DEFAULT);
7155
7156 case KEY_do:
7157 s = SKIPSPACE1(s);
7158 if (*s == '{')
7159 PRETERMBLOCK(DO);
7160 if (*s != '\'')
7161 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7162 if (orig_keyword == KEY_do) {
7163 orig_keyword = 0;
7164 pl_yylval.ival = 1;
7165 }
7166 else
7167 pl_yylval.ival = 0;
7168 OPERATOR(DO);
7169
7170 case KEY_die:
7171 PL_hints |= HINT_BLOCK_SCOPE;
7172 LOP(OP_DIE,XTERM);
7173
7174 case KEY_defined:
7175 UNI(OP_DEFINED);
7176
7177 case KEY_delete:
7178 UNI(OP_DELETE);
7179
7180 case KEY_dbmopen:
7181 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7182 STR_WITH_LEN("NDBM_File::"),
7183 STR_WITH_LEN("DB_File::"),
7184 STR_WITH_LEN("GDBM_File::"),
7185 STR_WITH_LEN("SDBM_File::"),
7186 STR_WITH_LEN("ODBM_File::"),
7187 NULL);
7188 LOP(OP_DBMOPEN,XTERM);
7189
7190 case KEY_dbmclose:
7191 UNI(OP_DBMCLOSE);
7192
7193 case KEY_dump:
7194 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7195 LOOPX(OP_DUMP);
7196
7197 case KEY_else:
7198 PREBLOCK(ELSE);
7199
7200 case KEY_elsif:
7201 pl_yylval.ival = CopLINE(PL_curcop);
7202 OPERATOR(ELSIF);
7203
7204 case KEY_eq:
7205 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7206 return REPORT(0);
7207 Eop(OP_SEQ);
7208
7209 case KEY_exists:
7210 UNI(OP_EXISTS);
7211
7212 case KEY_exit:
7213 if (PL_madskills)
7214 UNI(OP_INT);
7215 UNI(OP_EXIT);
7216
7217 case KEY_eval:
7218 s = SKIPSPACE1(s);
7219 if (*s == '{') { /* block eval */
7220 PL_expect = XTERMBLOCK;
7221 UNIBRACK(OP_ENTERTRY);
7222 }
7223 else { /* string eval */
7224 PL_expect = XTERM;
7225 UNIBRACK(OP_ENTEREVAL);
7226 }
7227
7228 case KEY_eof:
7229 UNI(OP_EOF);
7230
7231 case KEY_exp:
7232 UNI(OP_EXP);
7233
7234 case KEY_each:
7235 UNI(OP_EACH);
7236
7237 case KEY_exec:
7238 LOP(OP_EXEC,XREF);
7239
7240 case KEY_endhostent:
7241 FUN0(OP_EHOSTENT);
7242
7243 case KEY_endnetent:
7244 FUN0(OP_ENETENT);
7245
7246 case KEY_endservent:
7247 FUN0(OP_ESERVENT);
7248
7249 case KEY_endprotoent:
7250 FUN0(OP_EPROTOENT);
7251
7252 case KEY_endpwent:
7253 FUN0(OP_EPWENT);
7254
7255 case KEY_endgrent:
7256 FUN0(OP_EGRENT);
7257
7258 case KEY_for:
7259 case KEY_foreach:
7260 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7261 return REPORT(0);
7262 pl_yylval.ival = CopLINE(PL_curcop);
7263 s = SKIPSPACE1(s);
7264 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7265 char *p = s;
7266#ifdef PERL_MAD
7267 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7268#endif
7269
7270 if ((PL_bufend - p) >= 3 &&
7271 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7272 p += 2;
7273 else if ((PL_bufend - p) >= 4 &&
7274 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7275 p += 3;
7276 p = PEEKSPACE(p);
7277 if (isIDFIRST_lazy_if(p,UTF)) {
7278 p = scan_ident(p, PL_bufend,
7279 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7280 p = PEEKSPACE(p);
7281 }
7282 if (*p != '$')
7283 Perl_croak(aTHX_ "Missing $ on loop variable");
7284#ifdef PERL_MAD
7285 s = SvPVX(PL_linestr) + soff;
7286#endif
7287 }
7288 OPERATOR(FOR);
7289
7290 case KEY_formline:
7291 LOP(OP_FORMLINE,XTERM);
7292
7293 case KEY_fork:
7294 FUN0(OP_FORK);
7295
7296 case KEY_fcntl:
7297 LOP(OP_FCNTL,XTERM);
7298
7299 case KEY_fileno:
7300 UNI(OP_FILENO);
7301
7302 case KEY_flock:
7303 LOP(OP_FLOCK,XTERM);
7304
7305 case KEY_gt:
7306 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7307 return REPORT(0);
7308 Rop(OP_SGT);
7309
7310 case KEY_ge:
7311 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7312 return REPORT(0);
7313 Rop(OP_SGE);
7314
7315 case KEY_grep:
7316 LOP(OP_GREPSTART, XREF);
7317
7318 case KEY_goto:
7319 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7320 LOOPX(OP_GOTO);
7321
7322 case KEY_gmtime:
7323 UNI(OP_GMTIME);
7324
7325 case KEY_getc:
7326 UNIDOR(OP_GETC);
7327
7328 case KEY_getppid:
7329 FUN0(OP_GETPPID);
7330
7331 case KEY_getpgrp:
7332 UNI(OP_GETPGRP);
7333
7334 case KEY_getpriority:
7335 LOP(OP_GETPRIORITY,XTERM);
7336
7337 case KEY_getprotobyname:
7338 UNI(OP_GPBYNAME);
7339
7340 case KEY_getprotobynumber:
7341 LOP(OP_GPBYNUMBER,XTERM);
7342
7343 case KEY_getprotoent:
7344 FUN0(OP_GPROTOENT);
7345
7346 case KEY_getpwent:
7347 FUN0(OP_GPWENT);
7348
7349 case KEY_getpwnam:
7350 UNI(OP_GPWNAM);
7351
7352 case KEY_getpwuid:
7353 UNI(OP_GPWUID);
7354
7355 case KEY_getpeername:
7356 UNI(OP_GETPEERNAME);
7357
7358 case KEY_gethostbyname:
7359 UNI(OP_GHBYNAME);
7360
7361 case KEY_gethostbyaddr:
7362 LOP(OP_GHBYADDR,XTERM);
7363
7364 case KEY_gethostent:
7365 FUN0(OP_GHOSTENT);
7366
7367 case KEY_getnetbyname:
7368 UNI(OP_GNBYNAME);
7369
7370 case KEY_getnetbyaddr:
7371 LOP(OP_GNBYADDR,XTERM);
7372
7373 case KEY_getnetent:
7374 FUN0(OP_GNETENT);
7375
7376 case KEY_getservbyname:
7377 LOP(OP_GSBYNAME,XTERM);
7378
7379 case KEY_getservbyport:
7380 LOP(OP_GSBYPORT,XTERM);
7381
7382 case KEY_getservent:
7383 FUN0(OP_GSERVENT);
7384
7385 case KEY_getsockname:
7386 UNI(OP_GETSOCKNAME);
7387
7388 case KEY_getsockopt:
7389 LOP(OP_GSOCKOPT,XTERM);
7390
7391 case KEY_getgrent:
7392 FUN0(OP_GGRENT);
7393
7394 case KEY_getgrnam:
7395 UNI(OP_GGRNAM);
7396
7397 case KEY_getgrgid:
7398 UNI(OP_GGRGID);
7399
7400 case KEY_getlogin:
7401 FUN0(OP_GETLOGIN);
7402
7403 case KEY_given:
7404 pl_yylval.ival = CopLINE(PL_curcop);
7405 OPERATOR(GIVEN);
7406
7407 case KEY_glob:
7408 LOP(OP_GLOB,XTERM);
7409
7410 case KEY_hex:
7411 UNI(OP_HEX);
7412
7413 case KEY_if:
7414 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7415 return REPORT(0);
7416 pl_yylval.ival = CopLINE(PL_curcop);
7417 OPERATOR(IF);
7418
7419 case KEY_index:
7420 LOP(OP_INDEX,XTERM);
7421
7422 case KEY_int:
7423 UNI(OP_INT);
7424
7425 case KEY_ioctl:
7426 LOP(OP_IOCTL,XTERM);
7427
7428 case KEY_join:
7429 LOP(OP_JOIN,XTERM);
7430
7431 case KEY_keys:
7432 UNI(OP_KEYS);
7433
7434 case KEY_kill:
7435 LOP(OP_KILL,XTERM);
7436
7437 case KEY_last:
7438 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7439 LOOPX(OP_LAST);
7440
7441 case KEY_lc:
7442 UNI(OP_LC);
7443
7444 case KEY_lcfirst:
7445 UNI(OP_LCFIRST);
7446
7447 case KEY_local:
7448 pl_yylval.ival = 0;
7449 OPERATOR(LOCAL);
7450
7451 case KEY_length:
7452 UNI(OP_LENGTH);
7453
7454 case KEY_lt:
7455 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7456 return REPORT(0);
7457 Rop(OP_SLT);
7458
7459 case KEY_le:
7460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7461 return REPORT(0);
7462 Rop(OP_SLE);
7463
7464 case KEY_localtime:
7465 UNI(OP_LOCALTIME);
7466
7467 case KEY_log:
7468 UNI(OP_LOG);
7469
7470 case KEY_link:
7471 LOP(OP_LINK,XTERM);
7472
7473 case KEY_listen:
7474 LOP(OP_LISTEN,XTERM);
7475
7476 case KEY_lock:
7477 UNI(OP_LOCK);
7478
7479 case KEY_lstat:
7480 UNI(OP_LSTAT);
7481
7482 case KEY_m:
7483 s = scan_pat(s,OP_MATCH);
7484 TERM(sublex_start());
7485
7486 case KEY_map:
7487 LOP(OP_MAPSTART, XREF);
7488
7489 case KEY_mkdir:
7490 LOP(OP_MKDIR,XTERM);
7491
7492 case KEY_msgctl:
7493 LOP(OP_MSGCTL,XTERM);
7494
7495 case KEY_msgget:
7496 LOP(OP_MSGGET,XTERM);
7497
7498 case KEY_msgrcv:
7499 LOP(OP_MSGRCV,XTERM);
7500
7501 case KEY_msgsnd:
7502 LOP(OP_MSGSND,XTERM);
7503
7504 case KEY_our:
7505 case KEY_my:
7506 case KEY_state:
7507 PL_in_my = (U16)tmp;
7508 s = SKIPSPACE1(s);
7509 if (isIDFIRST_lazy_if(s,UTF)) {
7510#ifdef PERL_MAD
7511 char* start = s;
7512#endif
7513 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7514 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7515 goto really_sub;
7516 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7517 if (!PL_in_my_stash) {
7518 char tmpbuf[1024];
7519 PL_bufptr = s;
7520 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7521 yyerror(tmpbuf);
7522 }
7523#ifdef PERL_MAD
7524 if (PL_madskills) { /* just add type to declarator token */
7525 sv_catsv(PL_thistoken, PL_nextwhite);
7526 PL_nextwhite = 0;
7527 sv_catpvn(PL_thistoken, start, s - start);
7528 }
7529#endif
7530 }
7531 pl_yylval.ival = 1;
7532 OPERATOR(MY);
7533
7534 case KEY_next:
7535 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7536 LOOPX(OP_NEXT);
7537
7538 case KEY_ne:
7539 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7540 return REPORT(0);
7541 Eop(OP_SNE);
7542
7543 case KEY_no:
7544 s = tokenize_use(0, s);
7545 OPERATOR(USE);
7546
7547 case KEY_not:
7548 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7549 FUN1(OP_NOT);
7550 else {
7551 if (!PL_lex_allbrackets &&
7552 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7553 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7554 OPERATOR(NOTOP);
7555 }
7556
7557 case KEY_open:
7558 s = SKIPSPACE1(s);
7559 if (isIDFIRST_lazy_if(s,UTF)) {
7560 const char *t;
7561 for (d = s; isALNUM_lazy_if(d,UTF);)
7562 d++;
7563 for (t=d; isSPACE(*t);)
7564 t++;
7565 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7566 /* [perl #16184] */
7567 && !(t[0] == '=' && t[1] == '>')
7568 ) {
7569 int parms_len = (int)(d-s);
7570 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7571 "Precedence problem: open %.*s should be open(%.*s)",
7572 parms_len, s, parms_len, s);
7573 }
7574 }
7575 LOP(OP_OPEN,XTERM);
7576
7577 case KEY_or:
7578 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7579 return REPORT(0);
7580 pl_yylval.ival = OP_OR;
7581 OPERATOR(OROP);
7582
7583 case KEY_ord:
7584 UNI(OP_ORD);
7585
7586 case KEY_oct:
7587 UNI(OP_OCT);
7588
7589 case KEY_opendir:
7590 LOP(OP_OPEN_DIR,XTERM);
7591
7592 case KEY_print:
7593 checkcomma(s,PL_tokenbuf,"filehandle");
7594 LOP(OP_PRINT,XREF);
7595
7596 case KEY_printf:
7597 checkcomma(s,PL_tokenbuf,"filehandle");
7598 LOP(OP_PRTF,XREF);
7599
7600 case KEY_prototype:
7601 UNI(OP_PROTOTYPE);
7602
7603 case KEY_push:
7604 LOP(OP_PUSH,XTERM);
7605
7606 case KEY_pop:
7607 UNIDOR(OP_POP);
7608
7609 case KEY_pos:
7610 UNIDOR(OP_POS);
7611
7612 case KEY_pack:
7613 LOP(OP_PACK,XTERM);
7614
7615 case KEY_package:
7616 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7617 s = SKIPSPACE1(s);
7618 s = force_strict_version(s);
7619 PL_lex_expect = XBLOCK;
7620 OPERATOR(PACKAGE);
7621
7622 case KEY_pipe:
7623 LOP(OP_PIPE_OP,XTERM);
7624
7625 case KEY_q:
7626 s = scan_str(s,!!PL_madskills,FALSE);
7627 if (!s)
7628 missingterm(NULL);
7629 pl_yylval.ival = OP_CONST;
7630 TERM(sublex_start());
7631
7632 case KEY_quotemeta:
7633 UNI(OP_QUOTEMETA);
7634
7635 case KEY_qw: {
7636 OP *words = NULL;
7637 s = scan_str(s,!!PL_madskills,FALSE);
7638 if (!s)
7639 missingterm(NULL);
7640 PL_expect = XOPERATOR;
7641 if (SvCUR(PL_lex_stuff)) {
7642 int warned_comma = !ckWARN(WARN_QW);
7643 int warned_comment = warned_comma;
7644 d = SvPV_force(PL_lex_stuff, len);
7645 while (len) {
7646 for (; isSPACE(*d) && len; --len, ++d)
7647 /**/;
7648 if (len) {
7649 SV *sv;
7650 const char *b = d;
7651 if (!warned_comma || !warned_comment) {
7652 for (; !isSPACE(*d) && len; --len, ++d) {
7653 if (!warned_comma && *d == ',') {
7654 Perl_warner(aTHX_ packWARN(WARN_QW),
7655 "Possible attempt to separate words with commas");
7656 ++warned_comma;
7657 }
7658 else if (!warned_comment && *d == '#') {
7659 Perl_warner(aTHX_ packWARN(WARN_QW),
7660 "Possible attempt to put comments in qw() list");
7661 ++warned_comment;
7662 }
7663 }
7664 }
7665 else {
7666 for (; !isSPACE(*d) && len; --len, ++d)
7667 /**/;
7668 }
7669 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7670 words = op_append_elem(OP_LIST, words,
7671 newSVOP(OP_CONST, 0, tokeq(sv)));
7672 }
7673 }
7674 }
7675 if (!words)
7676 words = newNULLLIST();
7677 if (PL_lex_stuff) {
7678 SvREFCNT_dec(PL_lex_stuff);
7679 PL_lex_stuff = NULL;
7680 }
7681 PL_expect = XOPERATOR;
7682 pl_yylval.opval = sawparens(words);
7683 TOKEN(QWLIST);
7684 }
7685
7686 case KEY_qq:
7687 s = scan_str(s,!!PL_madskills,FALSE);
7688 if (!s)
7689 missingterm(NULL);
7690 pl_yylval.ival = OP_STRINGIFY;
7691 if (SvIVX(PL_lex_stuff) == '\'')
7692 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7693 TERM(sublex_start());
7694
7695 case KEY_qr:
7696 s = scan_pat(s,OP_QR);
7697 TERM(sublex_start());
7698
7699 case KEY_qx:
7700 s = scan_str(s,!!PL_madskills,FALSE);
7701 if (!s)
7702 missingterm(NULL);
7703 readpipe_override();
7704 TERM(sublex_start());
7705
7706 case KEY_return:
7707 OLDLOP(OP_RETURN);
7708
7709 case KEY_require:
7710 s = SKIPSPACE1(s);
7711 if (isDIGIT(*s)) {
7712 s = force_version(s, FALSE);
7713 }
7714 else if (*s != 'v' || !isDIGIT(s[1])
7715 || (s = force_version(s, TRUE), *s == 'v'))
7716 {
7717 *PL_tokenbuf = '\0';
7718 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7719 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7720 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7721 else if (*s == '<')
7722 yyerror("<> should be quotes");
7723 }
7724 if (orig_keyword == KEY_require) {
7725 orig_keyword = 0;
7726 pl_yylval.ival = 1;
7727 }
7728 else
7729 pl_yylval.ival = 0;
7730 PL_expect = XTERM;
7731 PL_bufptr = s;
7732 PL_last_uni = PL_oldbufptr;
7733 PL_last_lop_op = OP_REQUIRE;
7734 s = skipspace(s);
7735 return REPORT( (int)REQUIRE );
7736
7737 case KEY_reset:
7738 UNI(OP_RESET);
7739
7740 case KEY_redo:
7741 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7742 LOOPX(OP_REDO);
7743
7744 case KEY_rename:
7745 LOP(OP_RENAME,XTERM);
7746
7747 case KEY_rand:
7748 UNI(OP_RAND);
7749
7750 case KEY_rmdir:
7751 UNI(OP_RMDIR);
7752
7753 case KEY_rindex:
7754 LOP(OP_RINDEX,XTERM);
7755
7756 case KEY_read:
7757 LOP(OP_READ,XTERM);
7758
7759 case KEY_readdir:
7760 UNI(OP_READDIR);
7761
7762 case KEY_readline:
7763 UNIDOR(OP_READLINE);
7764
7765 case KEY_readpipe:
7766 UNIDOR(OP_BACKTICK);
7767
7768 case KEY_rewinddir:
7769 UNI(OP_REWINDDIR);
7770
7771 case KEY_recv:
7772 LOP(OP_RECV,XTERM);
7773
7774 case KEY_reverse:
7775 LOP(OP_REVERSE,XTERM);
7776
7777 case KEY_readlink:
7778 UNIDOR(OP_READLINK);
7779
7780 case KEY_ref:
7781 UNI(OP_REF);
7782
7783 case KEY_s:
7784 s = scan_subst(s);
7785 if (pl_yylval.opval)
7786 TERM(sublex_start());
7787 else
7788 TOKEN(1); /* force error */
7789
7790 case KEY_say:
7791 checkcomma(s,PL_tokenbuf,"filehandle");
7792 LOP(OP_SAY,XREF);
7793
7794 case KEY_chomp:
7795 UNI(OP_CHOMP);
7796
7797 case KEY_scalar:
7798 UNI(OP_SCALAR);
7799
7800 case KEY_select:
7801 LOP(OP_SELECT,XTERM);
7802
7803 case KEY_seek:
7804 LOP(OP_SEEK,XTERM);
7805
7806 case KEY_semctl:
7807 LOP(OP_SEMCTL,XTERM);
7808
7809 case KEY_semget:
7810 LOP(OP_SEMGET,XTERM);
7811
7812 case KEY_semop:
7813 LOP(OP_SEMOP,XTERM);
7814
7815 case KEY_send:
7816 LOP(OP_SEND,XTERM);
7817
7818 case KEY_setpgrp:
7819 LOP(OP_SETPGRP,XTERM);
7820
7821 case KEY_setpriority:
7822 LOP(OP_SETPRIORITY,XTERM);
7823
7824 case KEY_sethostent:
7825 UNI(OP_SHOSTENT);
7826
7827 case KEY_setnetent:
7828 UNI(OP_SNETENT);
7829
7830 case KEY_setservent:
7831 UNI(OP_SSERVENT);
7832
7833 case KEY_setprotoent:
7834 UNI(OP_SPROTOENT);
7835
7836 case KEY_setpwent:
7837 FUN0(OP_SPWENT);
7838
7839 case KEY_setgrent:
7840 FUN0(OP_SGRENT);
7841
7842 case KEY_seekdir:
7843 LOP(OP_SEEKDIR,XTERM);
7844
7845 case KEY_setsockopt:
7846 LOP(OP_SSOCKOPT,XTERM);
7847
7848 case KEY_shift:
7849 UNIDOR(OP_SHIFT);
7850
7851 case KEY_shmctl:
7852 LOP(OP_SHMCTL,XTERM);
7853
7854 case KEY_shmget:
7855 LOP(OP_SHMGET,XTERM);
7856
7857 case KEY_shmread:
7858 LOP(OP_SHMREAD,XTERM);
7859
7860 case KEY_shmwrite:
7861 LOP(OP_SHMWRITE,XTERM);
7862
7863 case KEY_shutdown:
7864 LOP(OP_SHUTDOWN,XTERM);
7865
7866 case KEY_sin:
7867 UNI(OP_SIN);
7868
7869 case KEY_sleep:
7870 UNI(OP_SLEEP);
7871
7872 case KEY_socket:
7873 LOP(OP_SOCKET,XTERM);
7874
7875 case KEY_socketpair:
7876 LOP(OP_SOCKPAIR,XTERM);
7877
7878 case KEY_sort:
7879 checkcomma(s,PL_tokenbuf,"subroutine name");
7880 s = SKIPSPACE1(s);
7881 if (*s == ';' || *s == ')') /* probably a close */
7882 Perl_croak(aTHX_ "sort is now a reserved word");
7883 PL_expect = XTERM;
7884 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7885 LOP(OP_SORT,XREF);
7886
7887 case KEY_split:
7888 LOP(OP_SPLIT,XTERM);
7889
7890 case KEY_sprintf:
7891 LOP(OP_SPRINTF,XTERM);
7892
7893 case KEY_splice:
7894 LOP(OP_SPLICE,XTERM);
7895
7896 case KEY_sqrt:
7897 UNI(OP_SQRT);
7898
7899 case KEY_srand:
7900 UNI(OP_SRAND);
7901
7902 case KEY_stat:
7903 UNI(OP_STAT);
7904
7905 case KEY_study:
7906 UNI(OP_STUDY);
7907
7908 case KEY_substr:
7909 LOP(OP_SUBSTR,XTERM);
7910
7911 case KEY_format:
7912 case KEY_sub:
7913 really_sub:
7914 {
7915 char tmpbuf[sizeof PL_tokenbuf];
7916 SSize_t tboffset = 0;
7917 expectation attrful;
7918 bool have_name, have_proto;
7919 const int key = tmp;
7920
7921#ifdef PERL_MAD
7922 SV *tmpwhite = 0;
7923
7924 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7925 SV *subtoken = newSVpvn(tstart, s - tstart);
7926 PL_thistoken = 0;
7927
7928 d = s;
7929 s = SKIPSPACE2(s,tmpwhite);
7930#else
7931 s = skipspace(s);
7932#endif
7933
7934 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7935 (*s == ':' && s[1] == ':'))
7936 {
7937#ifdef PERL_MAD
7938 SV *nametoke = NULL;
7939#endif
7940
7941 PL_expect = XBLOCK;
7942 attrful = XATTRBLOCK;
7943 /* remember buffer pos'n for later force_word */
7944 tboffset = s - PL_oldbufptr;
7945 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7946#ifdef PERL_MAD
7947 if (PL_madskills)
7948 nametoke = newSVpvn(s, d - s);
7949#endif
7950 if (memchr(tmpbuf, ':', len))
7951 sv_setpvn(PL_subname, tmpbuf, len);
7952 else {
7953 sv_setsv(PL_subname,PL_curstname);
7954 sv_catpvs(PL_subname,"::");
7955 sv_catpvn(PL_subname,tmpbuf,len);
7956 }
7957 have_name = TRUE;
7958
7959#ifdef PERL_MAD
7960
7961 start_force(0);
7962 CURMAD('X', nametoke);
7963 CURMAD('_', tmpwhite);
7964 (void) force_word(PL_oldbufptr + tboffset, WORD,
7965 FALSE, TRUE, TRUE);
7966
7967 s = SKIPSPACE2(d,tmpwhite);
7968#else
7969 s = skipspace(d);
7970#endif
7971 }
7972 else {
7973 if (key == KEY_my)
7974 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7975 PL_expect = XTERMBLOCK;
7976 attrful = XATTRTERM;
7977 sv_setpvs(PL_subname,"?");
7978 have_name = FALSE;
7979 }
7980
7981 if (key == KEY_format) {
7982 if (*s == '=')
7983 PL_lex_formbrack = PL_lex_brackets + 1;
7984#ifdef PERL_MAD
7985 PL_thistoken = subtoken;
7986 s = d;
7987#else
7988 if (have_name)
7989 (void) force_word(PL_oldbufptr + tboffset, WORD,
7990 FALSE, TRUE, TRUE);
7991#endif
7992 OPERATOR(FORMAT);
7993 }
7994
7995 /* Look for a prototype */
7996 if (*s == '(') {
7997 char *p;
7998 bool bad_proto = FALSE;
7999 bool in_brackets = FALSE;
8000 char greedy_proto = ' ';
8001 bool proto_after_greedy_proto = FALSE;
8002 bool must_be_last = FALSE;
8003 bool underscore = FALSE;
8004 bool seen_underscore = FALSE;
8005 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8006
8007 s = scan_str(s,!!PL_madskills,FALSE);
8008 if (!s)
8009 Perl_croak(aTHX_ "Prototype not terminated");
8010 /* strip spaces and check for bad characters */
8011 d = SvPVX(PL_lex_stuff);
8012 tmp = 0;
8013 for (p = d; *p; ++p) {
8014 if (!isSPACE(*p)) {
8015 d[tmp++] = *p;
8016
8017 if (warnillegalproto) {
8018 if (must_be_last)
8019 proto_after_greedy_proto = TRUE;
8020 if (!strchr("$@%*;[]&\\_+", *p)) {
8021 bad_proto = TRUE;
8022 }
8023 else {
8024 if ( underscore ) {
8025 if ( *p != ';' )
8026 bad_proto = TRUE;
8027 underscore = FALSE;
8028 }
8029 if ( *p == '[' ) {
8030 in_brackets = TRUE;
8031 }
8032 else if ( *p == ']' ) {
8033 in_brackets = FALSE;
8034 }
8035 else if ( (*p == '@' || *p == '%') &&
8036 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8037 !in_brackets ) {
8038 must_be_last = TRUE;
8039 greedy_proto = *p;
8040 }
8041 else if ( *p == '_' ) {
8042 underscore = seen_underscore = TRUE;
8043 }
8044 }
8045 }
8046 }
8047 }
8048 d[tmp] = '\0';
8049 if (proto_after_greedy_proto)
8050 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8051 "Prototype after '%c' for %"SVf" : %s",
8052 greedy_proto, SVfARG(PL_subname), d);
8053 if (bad_proto)
8054 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8055 "Illegal character %sin prototype for %"SVf" : %s",
8056 seen_underscore ? "after '_' " : "",
8057 SVfARG(PL_subname), d);
8058 SvCUR_set(PL_lex_stuff, tmp);
8059 have_proto = TRUE;
8060
8061#ifdef PERL_MAD
8062 start_force(0);
8063 CURMAD('q', PL_thisopen);
8064 CURMAD('_', tmpwhite);
8065 CURMAD('=', PL_thisstuff);
8066 CURMAD('Q', PL_thisclose);
8067 NEXTVAL_NEXTTOKE.opval =
8068 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8069 PL_lex_stuff = NULL;
8070 force_next(THING);
8071
8072 s = SKIPSPACE2(s,tmpwhite);
8073#else
8074 s = skipspace(s);
8075#endif
8076 }
8077 else
8078 have_proto = FALSE;
8079
8080 if (*s == ':' && s[1] != ':')
8081 PL_expect = attrful;
8082 else if (*s != '{' && key == KEY_sub) {
8083 if (!have_name)
8084 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8085 else if (*s != ';' && *s != '}')
8086 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8087 }
8088
8089#ifdef PERL_MAD
8090 start_force(0);
8091 if (tmpwhite) {
8092 if (PL_madskills)
8093 curmad('^', newSVpvs(""));
8094 CURMAD('_', tmpwhite);
8095 }
8096 force_next(0);
8097
8098 PL_thistoken = subtoken;
8099#else
8100 if (have_proto) {
8101 NEXTVAL_NEXTTOKE.opval =
8102 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8103 PL_lex_stuff = NULL;
8104 force_next(THING);
8105 }
8106#endif
8107 if (!have_name) {
8108 if (PL_curstash)
8109 sv_setpvs(PL_subname, "__ANON__");
8110 else
8111 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8112 TOKEN(ANONSUB);
8113 }
8114#ifndef PERL_MAD
8115 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116 FALSE, TRUE, TRUE);
8117#endif
8118 if (key == KEY_my)
8119 TOKEN(MYSUB);
8120 TOKEN(SUB);
8121 }
8122
8123 case KEY_system:
8124 LOP(OP_SYSTEM,XREF);
8125
8126 case KEY_symlink:
8127 LOP(OP_SYMLINK,XTERM);
8128
8129 case KEY_syscall:
8130 LOP(OP_SYSCALL,XTERM);
8131
8132 case KEY_sysopen:
8133 LOP(OP_SYSOPEN,XTERM);
8134
8135 case KEY_sysseek:
8136 LOP(OP_SYSSEEK,XTERM);
8137
8138 case KEY_sysread:
8139 LOP(OP_SYSREAD,XTERM);
8140
8141 case KEY_syswrite:
8142 LOP(OP_SYSWRITE,XTERM);
8143
8144 case KEY_tr:
8145 s = scan_trans(s);
8146 TERM(sublex_start());
8147
8148 case KEY_tell:
8149 UNI(OP_TELL);
8150
8151 case KEY_telldir:
8152 UNI(OP_TELLDIR);
8153
8154 case KEY_tie:
8155 LOP(OP_TIE,XTERM);
8156
8157 case KEY_tied:
8158 UNI(OP_TIED);
8159
8160 case KEY_time:
8161 FUN0(OP_TIME);
8162
8163 case KEY_times:
8164 FUN0(OP_TMS);
8165
8166 case KEY_truncate:
8167 LOP(OP_TRUNCATE,XTERM);
8168
8169 case KEY_uc:
8170 UNI(OP_UC);
8171
8172 case KEY_ucfirst:
8173 UNI(OP_UCFIRST);
8174
8175 case KEY_untie:
8176 UNI(OP_UNTIE);
8177
8178 case KEY_until:
8179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8180 return REPORT(0);
8181 pl_yylval.ival = CopLINE(PL_curcop);
8182 OPERATOR(UNTIL);
8183
8184 case KEY_unless:
8185 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186 return REPORT(0);
8187 pl_yylval.ival = CopLINE(PL_curcop);
8188 OPERATOR(UNLESS);
8189
8190 case KEY_unlink:
8191 LOP(OP_UNLINK,XTERM);
8192
8193 case KEY_undef:
8194 UNIDOR(OP_UNDEF);
8195
8196 case KEY_unpack:
8197 LOP(OP_UNPACK,XTERM);
8198
8199 case KEY_utime:
8200 LOP(OP_UTIME,XTERM);
8201
8202 case KEY_umask:
8203 UNIDOR(OP_UMASK);
8204
8205 case KEY_unshift:
8206 LOP(OP_UNSHIFT,XTERM);
8207
8208 case KEY_use:
8209 s = tokenize_use(1, s);
8210 OPERATOR(USE);
8211
8212 case KEY_values:
8213 UNI(OP_VALUES);
8214
8215 case KEY_vec:
8216 LOP(OP_VEC,XTERM);
8217
8218 case KEY_when:
8219 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8220 return REPORT(0);
8221 pl_yylval.ival = CopLINE(PL_curcop);
8222 OPERATOR(WHEN);
8223
8224 case KEY_while:
8225 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226 return REPORT(0);
8227 pl_yylval.ival = CopLINE(PL_curcop);
8228 OPERATOR(WHILE);
8229
8230 case KEY_warn:
8231 PL_hints |= HINT_BLOCK_SCOPE;
8232 LOP(OP_WARN,XTERM);
8233
8234 case KEY_wait:
8235 FUN0(OP_WAIT);
8236
8237 case KEY_waitpid:
8238 LOP(OP_WAITPID,XTERM);
8239
8240 case KEY_wantarray:
8241 FUN0(OP_WANTARRAY);
8242
8243 case KEY_write:
8244#ifdef EBCDIC
8245 {
8246 char ctl_l[2];
8247 ctl_l[0] = toCTRL('L');
8248 ctl_l[1] = '\0';
8249 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8250 }
8251#else
8252 /* Make sure $^L is defined */
8253 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8254#endif
8255 UNI(OP_ENTERWRITE);
8256
8257 case KEY_x:
8258 if (PL_expect == XOPERATOR) {
8259 if (*s == '=' && !PL_lex_allbrackets &&
8260 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8261 return REPORT(0);
8262 Mop(OP_REPEAT);
8263 }
8264 check_uni();
8265 goto just_a_word;
8266
8267 case KEY_xor:
8268 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8269 return REPORT(0);
8270 pl_yylval.ival = OP_XOR;
8271 OPERATOR(OROP);
8272
8273 case KEY_y:
8274 s = scan_trans(s);
8275 TERM(sublex_start());
8276 }
8277 }}
8278}
8279#ifdef __SC__
8280#pragma segment Main
8281#endif
8282
8283static int
8284S_pending_ident(pTHX)
8285{
8286 dVAR;
8287 register char *d;
8288 PADOFFSET tmp = 0;
8289 /* pit holds the identifier we read and pending_ident is reset */
8290 char pit = PL_pending_ident;
8291 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8292 /* All routes through this function want to know if there is a colon. */
8293 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8294 PL_pending_ident = 0;
8295
8296 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8297 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8298 "### Pending identifier '%s'\n", PL_tokenbuf); });
8299
8300 /* if we're in a my(), we can't allow dynamics here.
8301 $foo'bar has already been turned into $foo::bar, so
8302 just check for colons.
8303
8304 if it's a legal name, the OP is a PADANY.
8305 */
8306 if (PL_in_my) {
8307 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8308 if (has_colon)
8309 yyerror(Perl_form(aTHX_ "No package name allowed for "
8310 "variable %s in \"our\"",
8311 PL_tokenbuf));
8312 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8313 }
8314 else {
8315 if (has_colon)
8316 yyerror(Perl_form(aTHX_ PL_no_myglob,
8317 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8318
8319 pl_yylval.opval = newOP(OP_PADANY, 0);
8320 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8321 UTF ? SVf_UTF8 : 0);
8322 return PRIVATEREF;
8323 }
8324 }
8325
8326 /*
8327 build the ops for accesses to a my() variable.
8328
8329 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8330 then used in a comparison. This catches most, but not
8331 all cases. For instance, it catches
8332 sort { my($a); $a <=> $b }
8333 but not
8334 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8335 (although why you'd do that is anyone's guess).
8336 */
8337
8338 if (!has_colon) {
8339 if (!PL_in_my)
8340 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8341 UTF ? SVf_UTF8 : 0);
8342 if (tmp != NOT_IN_PAD) {
8343 /* might be an "our" variable" */
8344 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8345 /* build ops for a bareword */
8346 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8347 HEK * const stashname = HvNAME_HEK(stash);
8348 SV * const sym = newSVhek(stashname);
8349 sv_catpvs(sym, "::");
8350 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8351 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8352 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8353 gv_fetchsv(sym,
8354 (PL_in_eval
8355 ? (GV_ADDMULTI | GV_ADDINEVAL)
8356 : GV_ADDMULTI
8357 ),
8358 ((PL_tokenbuf[0] == '$') ? SVt_PV
8359 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8360 : SVt_PVHV));
8361 return WORD;
8362 }
8363
8364 /* if it's a sort block and they're naming $a or $b */
8365 if (PL_last_lop_op == OP_SORT &&
8366 PL_tokenbuf[0] == '$' &&
8367 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8368 && !PL_tokenbuf[2])
8369 {
8370 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8371 d < PL_bufend && *d != '\n';
8372 d++)
8373 {
8374 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8375 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8376 PL_tokenbuf);
8377 }
8378 }
8379 }
8380
8381 pl_yylval.opval = newOP(OP_PADANY, 0);
8382 pl_yylval.opval->op_targ = tmp;
8383 return PRIVATEREF;
8384 }
8385 }
8386
8387 /*
8388 Whine if they've said @foo in a doublequoted string,
8389 and @foo isn't a variable we can find in the symbol
8390 table.
8391 */
8392 if (ckWARN(WARN_AMBIGUOUS) &&
8393 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8394 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8395 SVt_PVAV);
8396 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8397 /* DO NOT warn for @- and @+ */
8398 && !( PL_tokenbuf[2] == '\0' &&
8399 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8400 )
8401 {
8402 /* Downgraded from fatal to warning 20000522 mjd */
8403 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8404 "Possible unintended interpolation of %s in string",
8405 PL_tokenbuf);
8406 }
8407 }
8408
8409 /* build ops for a bareword */
8410 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8411 tokenbuf_len - 1));
8412 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8413 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8414 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8415 ((PL_tokenbuf[0] == '$') ? SVt_PV
8416 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8417 : SVt_PVHV));
8418 return WORD;
8419}
8420
8421STATIC void
8422S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8423{
8424 dVAR;
8425
8426 PERL_ARGS_ASSERT_CHECKCOMMA;
8427
8428 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8429 if (ckWARN(WARN_SYNTAX)) {
8430 int level = 1;
8431 const char *w;
8432 for (w = s+2; *w && level; w++) {
8433 if (*w == '(')
8434 ++level;
8435 else if (*w == ')')
8436 --level;
8437 }
8438 while (isSPACE(*w))
8439 ++w;
8440 /* the list of chars below is for end of statements or
8441 * block / parens, boolean operators (&&, ||, //) and branch
8442 * constructs (or, and, if, until, unless, while, err, for).
8443 * Not a very solid hack... */
8444 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8445 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8446 "%s (...) interpreted as function",name);
8447 }
8448 }
8449 while (s < PL_bufend && isSPACE(*s))
8450 s++;
8451 if (*s == '(')
8452 s++;
8453 while (s < PL_bufend && isSPACE(*s))
8454 s++;
8455 if (isIDFIRST_lazy_if(s,UTF)) {
8456 const char * const w = s++;
8457 while (isALNUM_lazy_if(s,UTF))
8458 s++;
8459 while (s < PL_bufend && isSPACE(*s))
8460 s++;
8461 if (*s == ',') {
8462 GV* gv;
8463 if (keyword(w, s - w, 0))
8464 return;
8465
8466 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8467 if (gv && GvCVu(gv))
8468 return;
8469 Perl_croak(aTHX_ "No comma allowed after %s", what);
8470 }
8471 }
8472}
8473
8474/* Either returns sv, or mortalizes sv and returns a new SV*.
8475 Best used as sv=new_constant(..., sv, ...).
8476 If s, pv are NULL, calls subroutine with one argument,
8477 and type is used with error messages only. */
8478
8479STATIC SV *
8480S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8481 SV *sv, SV *pv, const char *type, STRLEN typelen)
8482{
8483 dVAR; dSP;
8484 HV * const table = GvHV(PL_hintgv); /* ^H */
8485 SV *res;
8486 SV **cvp;
8487 SV *cv, *typesv;
8488 const char *why1 = "", *why2 = "", *why3 = "";
8489
8490 PERL_ARGS_ASSERT_NEW_CONSTANT;
8491
8492 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8493 SV *msg;
8494
8495 why2 = (const char *)
8496 (strEQ(key,"charnames")
8497 ? "(possibly a missing \"use charnames ...\")"
8498 : "");
8499 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8500 (type ? type: "undef"), why2);
8501
8502 /* This is convoluted and evil ("goto considered harmful")
8503 * but I do not understand the intricacies of all the different
8504 * failure modes of %^H in here. The goal here is to make
8505 * the most probable error message user-friendly. --jhi */
8506
8507 goto msgdone;
8508
8509 report:
8510 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8511 (type ? type: "undef"), why1, why2, why3);
8512 msgdone:
8513 yyerror(SvPVX_const(msg));
8514 SvREFCNT_dec(msg);
8515 return sv;
8516 }
8517
8518 /* charnames doesn't work well if there have been errors found */
8519 if (PL_error_count > 0 && strEQ(key,"charnames"))
8520 return &PL_sv_undef;
8521
8522 cvp = hv_fetch(table, key, keylen, FALSE);
8523 if (!cvp || !SvOK(*cvp)) {
8524 why1 = "$^H{";
8525 why2 = key;
8526 why3 = "} is not defined";
8527 goto report;
8528 }
8529 sv_2mortal(sv); /* Parent created it permanently */
8530 cv = *cvp;
8531 if (!pv && s)
8532 pv = newSVpvn_flags(s, len, SVs_TEMP);
8533 if (type && pv)
8534 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8535 else
8536 typesv = &PL_sv_undef;
8537
8538 PUSHSTACKi(PERLSI_OVERLOAD);
8539 ENTER ;
8540 SAVETMPS;
8541
8542 PUSHMARK(SP) ;
8543 EXTEND(sp, 3);
8544 if (pv)
8545 PUSHs(pv);
8546 PUSHs(sv);
8547 if (pv)
8548 PUSHs(typesv);
8549 PUTBACK;
8550 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8551
8552 SPAGAIN ;
8553
8554 /* Check the eval first */
8555 if (!PL_in_eval && SvTRUE(ERRSV)) {
8556 sv_catpvs(ERRSV, "Propagated");
8557 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8558 (void)POPs;
8559 res = SvREFCNT_inc_simple(sv);
8560 }
8561 else {
8562 res = POPs;
8563 SvREFCNT_inc_simple_void(res);
8564 }
8565
8566 PUTBACK ;
8567 FREETMPS ;
8568 LEAVE ;
8569 POPSTACK;
8570
8571 if (!SvOK(res)) {
8572 why1 = "Call to &{$^H{";
8573 why2 = key;
8574 why3 = "}} did not return a defined value";
8575 sv = res;
8576 goto report;
8577 }
8578
8579 return res;
8580}
8581
8582/* Returns a NUL terminated string, with the length of the string written to
8583 *slp
8584 */
8585STATIC char *
8586S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8587{
8588 dVAR;
8589 register char *d = dest;
8590 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8591
8592 PERL_ARGS_ASSERT_SCAN_WORD;
8593
8594 for (;;) {
8595 if (d >= e)
8596 Perl_croak(aTHX_ ident_too_long);
8597 if (isALNUM(*s)) /* UTF handled below */
8598 *d++ = *s++;
8599 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8600 *d++ = ':';
8601 *d++ = ':';
8602 s++;
8603 }
8604 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8605 *d++ = *s++;
8606 *d++ = *s++;
8607 }
8608 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8609 char *t = s + UTF8SKIP(s);
8610 size_t len;
8611 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8612 t += UTF8SKIP(t);
8613 len = t - s;
8614 if (d + len > e)
8615 Perl_croak(aTHX_ ident_too_long);
8616 Copy(s, d, len, char);
8617 d += len;
8618 s = t;
8619 }
8620 else {
8621 *d = '\0';
8622 *slp = d - dest;
8623 return s;
8624 }
8625 }
8626}
8627
8628STATIC char *
8629S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8630{
8631 dVAR;
8632 char *bracket = NULL;
8633 char funny = *s++;
8634 register char *d = dest;
8635 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8636
8637 PERL_ARGS_ASSERT_SCAN_IDENT;
8638
8639 if (isSPACE(*s))
8640 s = PEEKSPACE(s);
8641 if (isDIGIT(*s)) {
8642 while (isDIGIT(*s)) {
8643 if (d >= e)
8644 Perl_croak(aTHX_ ident_too_long);
8645 *d++ = *s++;
8646 }
8647 }
8648 else {
8649 for (;;) {
8650 if (d >= e)
8651 Perl_croak(aTHX_ ident_too_long);
8652 if (isALNUM(*s)) /* UTF handled below */
8653 *d++ = *s++;
8654 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8655 *d++ = ':';
8656 *d++ = ':';
8657 s++;
8658 }
8659 else if (*s == ':' && s[1] == ':') {
8660 *d++ = *s++;
8661 *d++ = *s++;
8662 }
8663 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8664 char *t = s + UTF8SKIP(s);
8665 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8666 t += UTF8SKIP(t);
8667 if (d + (t - s) > e)
8668 Perl_croak(aTHX_ ident_too_long);
8669 Copy(s, d, t - s, char);
8670 d += t - s;
8671 s = t;
8672 }
8673 else
8674 break;
8675 }
8676 }
8677 *d = '\0';
8678 d = dest;
8679 if (*d) {
8680 if (PL_lex_state != LEX_NORMAL)
8681 PL_lex_state = LEX_INTERPENDMAYBE;
8682 return s;
8683 }
8684 if (*s == '$' && s[1] &&
8685 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8686 {
8687 return s;
8688 }
8689 if (*s == '{') {
8690 bracket = s;
8691 s++;
8692 }
8693 else if (ck_uni)
8694 check_uni();
8695 if (s < send)
8696 *d = *s++;
8697 d[1] = '\0';
8698 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8699 *d = toCTRL(*s);
8700 s++;
8701 }
8702 if (bracket) {
8703 if (isSPACE(s[-1])) {
8704 while (s < send) {
8705 const char ch = *s++;
8706 if (!SPACE_OR_TAB(ch)) {
8707 *d = ch;
8708 break;
8709 }
8710 }
8711 }
8712 if (isIDFIRST_lazy_if(d,UTF)) {
8713 d++;
8714 if (UTF) {
8715 char *end = s;
8716 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8717 end += UTF8SKIP(end);
8718 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8719 end += UTF8SKIP(end);
8720 }
8721 Copy(s, d, end - s, char);
8722 d += end - s;
8723 s = end;
8724 }
8725 else {
8726 while ((isALNUM(*s) || *s == ':') && d < e)
8727 *d++ = *s++;
8728 if (d >= e)
8729 Perl_croak(aTHX_ ident_too_long);
8730 }
8731 *d = '\0';
8732 while (s < send && SPACE_OR_TAB(*s))
8733 s++;
8734 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8735 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8736 const char * const brack =
8737 (const char *)
8738 ((*s == '[') ? "[...]" : "{...}");
8739 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8740 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8741 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8742 funny, dest, brack, funny, dest, brack);
8743 }
8744 bracket++;
8745 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8746 PL_lex_allbrackets++;
8747 return s;
8748 }
8749 }
8750 /* Handle extended ${^Foo} variables
8751 * 1999-02-27 mjd-perl-patch@plover.com */
8752 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8753 && isALNUM(*s))
8754 {
8755 d++;
8756 while (isALNUM(*s) && d < e) {
8757 *d++ = *s++;
8758 }
8759 if (d >= e)
8760 Perl_croak(aTHX_ ident_too_long);
8761 *d = '\0';
8762 }
8763 if (*s == '}') {
8764 s++;
8765 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8766 PL_lex_state = LEX_INTERPEND;
8767 PL_expect = XREF;
8768 }
8769 if (PL_lex_state == LEX_NORMAL) {
8770 if (ckWARN(WARN_AMBIGUOUS) &&
8771 (keyword(dest, d - dest, 0)
8772 || get_cvn_flags(dest, d - dest, 0)))
8773 {
8774 if (funny == '#')
8775 funny = '@';
8776 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8777 "Ambiguous use of %c{%s} resolved to %c%s",
8778 funny, dest, funny, dest);
8779 }
8780 }
8781 }
8782 else {
8783 s = bracket; /* let the parser handle it */
8784 *dest = '\0';
8785 }
8786 }
8787 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8788 PL_lex_state = LEX_INTERPEND;
8789 return s;
8790}
8791
8792static bool
8793S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8794
8795 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8796 * the parse starting at 's', based on the subset that are valid in this
8797 * context input to this routine in 'valid_flags'. Advances s. Returns
8798 * TRUE if the input was a valid flag, so the next char may be as well;
8799 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8800 * current regex. This routine will set it to any charset modifier found.
8801 * The caller shouldn't change it. This way, another charset modifier
8802 * encountered in the parse can be detected as an error, as we have decided
8803 * allow only one */
8804
8805 const char c = **s;
8806
8807 if (! strchr(valid_flags, c)) {
8808 if (isALNUM(c)) {
8809 goto deprecate;
8810 }
8811 return FALSE;
8812 }
8813
8814 switch (c) {
8815
8816 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8817 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8818 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8819 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8820 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8821 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8822 case LOCALE_PAT_MOD:
8823
8824 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8825 * can't be regex modifiers.
8826 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8827 * much as easily done. s///lei, for example, has to mean regex
8828 * modifiers if it's not an error (as does any word character
8829 * following the 'e'). Otherwise, we resolve to the backwards-
8830 * compatible, but less likely 's/// le ...', i.e. as meaning
8831 * less-than-or-equal. The reason it's not likely is that s//
8832 * returns a number for code in the field (/r returns a string, but
8833 * that wasn't added until the 5.13 series), and so '<=' should be
8834 * used for comparing, not 'le'. */
8835 if (*((*s) + 1) == 't') {
8836 goto deprecate;
8837 }
8838 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8839
8840 /* 'e' is valid only for substitutes, s///e. If it is not
8841 * valid in the current context, then 'm//le' must mean the
8842 * comparison operator, so use the regular deprecation message.
8843 */
8844 if (! strchr(valid_flags, 'e')) {
8845 goto deprecate;
8846 }
8847 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8848 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8849 return FALSE;
8850 }
8851 if (*charset) {
8852 goto multiple_charsets;
8853 }
8854 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8855 *charset = c;
8856 break;
8857 case UNICODE_PAT_MOD:
8858 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8859 * 'n' means they can't be regex modifiers */
8860 if (*((*s) + 1) == 'n') {
8861 goto deprecate;
8862 }
8863 if (*charset) {
8864 goto multiple_charsets;
8865 }
8866 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8867 *charset = c;
8868 break;
8869 case ASCII_RESTRICT_PAT_MOD:
8870 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8871 * can't be regex modifiers */
8872 if (*((*s) + 1) == 'n') {
8873 goto deprecate;
8874 }
8875
8876 if (! *charset) {
8877 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8878 }
8879 else {
8880
8881 /* Error if previous modifier wasn't an 'a', but if it was, see
8882 * if, and accept, a second occurrence (only) */
8883 if (*charset != 'a'
8884 || get_regex_charset(*pmfl)
8885 != REGEX_ASCII_RESTRICTED_CHARSET)
8886 {
8887 goto multiple_charsets;
8888 }
8889 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8890 }
8891 *charset = c;
8892 break;
8893 case DEPENDS_PAT_MOD:
8894 if (*charset) {
8895 goto multiple_charsets;
8896 }
8897 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8898 *charset = c;
8899 break;
8900 }
8901
8902 (*s)++;
8903 return TRUE;
8904
8905 deprecate:
8906 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8907 "Having no space between pattern and following word is deprecated");
8908 return FALSE;
8909
8910 multiple_charsets:
8911 if (*charset != c) {
8912 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8913 }
8914 else if (c == 'a') {
8915 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8916 }
8917 else {
8918 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8919 }
8920
8921 /* Pretend that it worked, so will continue processing before dieing */
8922 (*s)++;
8923 return TRUE;
8924}
8925
8926STATIC char *
8927S_scan_pat(pTHX_ char *start, I32 type)
8928{
8929 dVAR;
8930 PMOP *pm;
8931 char *s = scan_str(start,!!PL_madskills,FALSE);
8932 const char * const valid_flags =
8933 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8934 char charset = '\0'; /* character set modifier */
8935#ifdef PERL_MAD
8936 char *modstart;
8937#endif
8938
8939 PERL_ARGS_ASSERT_SCAN_PAT;
8940
8941 if (!s) {
8942 const char * const delimiter = skipspace(start);
8943 Perl_croak(aTHX_
8944 (const char *)
8945 (*delimiter == '?'
8946 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8947 : "Search pattern not terminated" ));
8948 }
8949
8950 pm = (PMOP*)newPMOP(type, 0);
8951 if (PL_multi_open == '?') {
8952 /* This is the only point in the code that sets PMf_ONCE: */
8953 pm->op_pmflags |= PMf_ONCE;
8954
8955 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8956 allows us to restrict the list needed by reset to just the ??
8957 matches. */
8958 assert(type != OP_TRANS);
8959 if (PL_curstash) {
8960 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8961 U32 elements;
8962 if (!mg) {
8963 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8964 0);
8965 }
8966 elements = mg->mg_len / sizeof(PMOP**);
8967 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8968 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8969 mg->mg_len = elements * sizeof(PMOP**);
8970 PmopSTASH_set(pm,PL_curstash);
8971 }
8972 }
8973#ifdef PERL_MAD
8974 modstart = s;
8975#endif
8976 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8977#ifdef PERL_MAD
8978 if (PL_madskills && modstart != s) {
8979 SV* tmptoken = newSVpvn(modstart, s - modstart);
8980 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8981 }
8982#endif
8983 /* issue a warning if /c is specified,but /g is not */
8984 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8985 {
8986 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8987 "Use of /c modifier is meaningless without /g" );
8988 }
8989
8990 PL_lex_op = (OP*)pm;
8991 pl_yylval.ival = OP_MATCH;
8992 return s;
8993}
8994
8995STATIC char *
8996S_scan_subst(pTHX_ char *start)
8997{
8998 dVAR;
8999 char *s;
9000 register PMOP *pm;
9001 I32 first_start;
9002 I32 es = 0;
9003 char charset = '\0'; /* character set modifier */
9004#ifdef PERL_MAD
9005 char *modstart;
9006#endif
9007
9008 PERL_ARGS_ASSERT_SCAN_SUBST;
9009
9010 pl_yylval.ival = OP_NULL;
9011
9012 s = scan_str(start,!!PL_madskills,FALSE);
9013
9014 if (!s)
9015 Perl_croak(aTHX_ "Substitution pattern not terminated");
9016
9017 if (s[-1] == PL_multi_open)
9018 s--;
9019#ifdef PERL_MAD
9020 if (PL_madskills) {
9021 CURMAD('q', PL_thisopen);
9022 CURMAD('_', PL_thiswhite);
9023 CURMAD('E', PL_thisstuff);
9024 CURMAD('Q', PL_thisclose);
9025 PL_realtokenstart = s - SvPVX(PL_linestr);
9026 }
9027#endif
9028
9029 first_start = PL_multi_start;
9030 s = scan_str(s,!!PL_madskills,FALSE);
9031 if (!s) {
9032 if (PL_lex_stuff) {
9033 SvREFCNT_dec(PL_lex_stuff);
9034 PL_lex_stuff = NULL;
9035 }
9036 Perl_croak(aTHX_ "Substitution replacement not terminated");
9037 }
9038 PL_multi_start = first_start; /* so whole substitution is taken together */
9039
9040 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9041
9042#ifdef PERL_MAD
9043 if (PL_madskills) {
9044 CURMAD('z', PL_thisopen);
9045 CURMAD('R', PL_thisstuff);
9046 CURMAD('Z', PL_thisclose);
9047 }
9048 modstart = s;
9049#endif
9050
9051 while (*s) {
9052 if (*s == EXEC_PAT_MOD) {
9053 s++;
9054 es++;
9055 }
9056 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9057 {
9058 break;
9059 }
9060 }
9061
9062#ifdef PERL_MAD
9063 if (PL_madskills) {
9064 if (modstart != s)
9065 curmad('m', newSVpvn(modstart, s - modstart));
9066 append_madprops(PL_thismad, (OP*)pm, 0);
9067 PL_thismad = 0;
9068 }
9069#endif
9070 if ((pm->op_pmflags & PMf_CONTINUE)) {
9071 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9072 }
9073
9074 if (es) {
9075 SV * const repl = newSVpvs("");
9076
9077 PL_sublex_info.super_bufptr = s;
9078 PL_sublex_info.super_bufend = PL_bufend;
9079 PL_multi_end = 0;
9080 pm->op_pmflags |= PMf_EVAL;
9081 while (es-- > 0) {
9082 if (es)
9083 sv_catpvs(repl, "eval ");
9084 else
9085 sv_catpvs(repl, "do ");
9086 }
9087 sv_catpvs(repl, "{");
9088 sv_catsv(repl, PL_lex_repl);
9089 if (strchr(SvPVX(PL_lex_repl), '#'))
9090 sv_catpvs(repl, "\n");
9091 sv_catpvs(repl, "}");
9092 SvEVALED_on(repl);
9093 SvREFCNT_dec(PL_lex_repl);
9094 PL_lex_repl = repl;
9095 }
9096
9097 PL_lex_op = (OP*)pm;
9098 pl_yylval.ival = OP_SUBST;
9099 return s;
9100}
9101
9102STATIC char *
9103S_scan_trans(pTHX_ char *start)
9104{
9105 dVAR;
9106 register char* s;
9107 OP *o;
9108 short *tbl;
9109 U8 squash;
9110 U8 del;
9111 U8 complement;
9112 bool nondestruct = 0;
9113#ifdef PERL_MAD
9114 char *modstart;
9115#endif
9116
9117 PERL_ARGS_ASSERT_SCAN_TRANS;
9118
9119 pl_yylval.ival = OP_NULL;
9120
9121 s = scan_str(start,!!PL_madskills,FALSE);
9122 if (!s)
9123 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9124
9125 if (s[-1] == PL_multi_open)
9126 s--;
9127#ifdef PERL_MAD
9128 if (PL_madskills) {
9129 CURMAD('q', PL_thisopen);
9130 CURMAD('_', PL_thiswhite);
9131 CURMAD('E', PL_thisstuff);
9132 CURMAD('Q', PL_thisclose);
9133 PL_realtokenstart = s - SvPVX(PL_linestr);
9134 }
9135#endif
9136
9137 s = scan_str(s,!!PL_madskills,FALSE);
9138 if (!s) {
9139 if (PL_lex_stuff) {
9140 SvREFCNT_dec(PL_lex_stuff);
9141 PL_lex_stuff = NULL;
9142 }
9143 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9144 }
9145 if (PL_madskills) {
9146 CURMAD('z', PL_thisopen);
9147 CURMAD('R', PL_thisstuff);
9148 CURMAD('Z', PL_thisclose);
9149 }
9150
9151 complement = del = squash = 0;
9152#ifdef PERL_MAD
9153 modstart = s;
9154#endif
9155 while (1) {
9156 switch (*s) {
9157 case 'c':
9158 complement = OPpTRANS_COMPLEMENT;
9159 break;
9160 case 'd':
9161 del = OPpTRANS_DELETE;
9162 break;
9163 case 's':
9164 squash = OPpTRANS_SQUASH;
9165 break;
9166 case 'r':
9167 nondestruct = 1;
9168 break;
9169 default:
9170 goto no_more;
9171 }
9172 s++;
9173 }
9174 no_more:
9175
9176 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9177 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9178 o->op_private &= ~OPpTRANS_ALL;
9179 o->op_private |= del|squash|complement|
9180 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9181 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9182
9183 PL_lex_op = o;
9184 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9185
9186#ifdef PERL_MAD
9187 if (PL_madskills) {
9188 if (modstart != s)
9189 curmad('m', newSVpvn(modstart, s - modstart));
9190 append_madprops(PL_thismad, o, 0);
9191 PL_thismad = 0;
9192 }
9193#endif
9194
9195 return s;
9196}
9197
9198STATIC char *
9199S_scan_heredoc(pTHX_ register char *s)
9200{
9201 dVAR;
9202 SV *herewas;
9203 I32 op_type = OP_SCALAR;
9204 I32 len;
9205 SV *tmpstr;
9206 char term;
9207 const char *found_newline;
9208 register char *d;
9209 register char *e;
9210 char *peek;
9211 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9212#ifdef PERL_MAD
9213 I32 stuffstart = s - SvPVX(PL_linestr);
9214 char *tstart;
9215
9216 PL_realtokenstart = -1;
9217#endif
9218
9219 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9220
9221 s += 2;
9222 d = PL_tokenbuf;
9223 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9224 if (!outer)
9225 *d++ = '\n';
9226 peek = s;
9227 while (SPACE_OR_TAB(*peek))
9228 peek++;
9229 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9230 s = peek;
9231 term = *s++;
9232 s = delimcpy(d, e, s, PL_bufend, term, &len);
9233 d += len;
9234 if (s < PL_bufend)
9235 s++;
9236 }
9237 else {
9238 if (*s == '\\')
9239 s++, term = '\'';
9240 else
9241 term = '"';
9242 if (!isALNUM_lazy_if(s,UTF))
9243 deprecate("bare << to mean <<\"\"");
9244 for (; isALNUM_lazy_if(s,UTF); s++) {
9245 if (d < e)
9246 *d++ = *s;
9247 }
9248 }
9249 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9250 Perl_croak(aTHX_ "Delimiter for here document is too long");
9251 *d++ = '\n';
9252 *d = '\0';
9253 len = d - PL_tokenbuf;
9254
9255#ifdef PERL_MAD
9256 if (PL_madskills) {
9257 tstart = PL_tokenbuf + !outer;
9258 PL_thisclose = newSVpvn(tstart, len - !outer);
9259 tstart = SvPVX(PL_linestr) + stuffstart;
9260 PL_thisopen = newSVpvn(tstart, s - tstart);
9261 stuffstart = s - SvPVX(PL_linestr);
9262 }
9263#endif
9264#ifndef PERL_STRICT_CR
9265 d = strchr(s, '\r');
9266 if (d) {
9267 char * const olds = s;
9268 s = d;
9269 while (s < PL_bufend) {
9270 if (*s == '\r') {
9271 *d++ = '\n';
9272 if (*++s == '\n')
9273 s++;
9274 }
9275 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9276 *d++ = *s++;
9277 s++;
9278 }
9279 else
9280 *d++ = *s++;
9281 }
9282 *d = '\0';
9283 PL_bufend = d;
9284 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9285 s = olds;
9286 }
9287#endif
9288#ifdef PERL_MAD
9289 found_newline = 0;
9290#endif
9291 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9292 herewas = newSVpvn(s,PL_bufend-s);
9293 }
9294 else {
9295#ifdef PERL_MAD
9296 herewas = newSVpvn(s-1,found_newline-s+1);
9297#else
9298 s--;
9299 herewas = newSVpvn(s,found_newline-s);
9300#endif
9301 }
9302#ifdef PERL_MAD
9303 if (PL_madskills) {
9304 tstart = SvPVX(PL_linestr) + stuffstart;
9305 if (PL_thisstuff)
9306 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9307 else
9308 PL_thisstuff = newSVpvn(tstart, s - tstart);
9309 }
9310#endif
9311 s += SvCUR(herewas);
9312
9313#ifdef PERL_MAD
9314 stuffstart = s - SvPVX(PL_linestr);
9315
9316 if (found_newline)
9317 s--;
9318#endif
9319
9320 tmpstr = newSV_type(SVt_PVIV);
9321 SvGROW(tmpstr, 80);
9322 if (term == '\'') {
9323 op_type = OP_CONST;
9324 SvIV_set(tmpstr, -1);
9325 }
9326 else if (term == '`') {
9327 op_type = OP_BACKTICK;
9328 SvIV_set(tmpstr, '\\');
9329 }
9330
9331 CLINE;
9332 PL_multi_start = CopLINE(PL_curcop);
9333 PL_multi_open = PL_multi_close = '<';
9334 term = *PL_tokenbuf;
9335 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9336 char * const bufptr = PL_sublex_info.super_bufptr;
9337 char * const bufend = PL_sublex_info.super_bufend;
9338 char * const olds = s - SvCUR(herewas);
9339 s = strchr(bufptr, '\n');
9340 if (!s)
9341 s = bufend;
9342 d = s;
9343 while (s < bufend &&
9344 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9345 if (*s++ == '\n')
9346 CopLINE_inc(PL_curcop);
9347 }
9348 if (s >= bufend) {
9349 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9350 missingterm(PL_tokenbuf);
9351 }
9352 sv_setpvn(herewas,bufptr,d-bufptr+1);
9353 sv_setpvn(tmpstr,d+1,s-d);
9354 s += len - 1;
9355 sv_catpvn(herewas,s,bufend-s);
9356 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9357
9358 s = olds;
9359 goto retval;
9360 }
9361 else if (!outer) {
9362 d = s;
9363 while (s < PL_bufend &&
9364 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9365 if (*s++ == '\n')
9366 CopLINE_inc(PL_curcop);
9367 }
9368 if (s >= PL_bufend) {
9369 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9370 missingterm(PL_tokenbuf);
9371 }
9372 sv_setpvn(tmpstr,d+1,s-d);
9373#ifdef PERL_MAD
9374 if (PL_madskills) {
9375 if (PL_thisstuff)
9376 sv_catpvn(PL_thisstuff, d + 1, s - d);
9377 else
9378 PL_thisstuff = newSVpvn(d + 1, s - d);
9379 stuffstart = s - SvPVX(PL_linestr);
9380 }
9381#endif
9382 s += len - 1;
9383 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9384
9385 sv_catpvn(herewas,s,PL_bufend-s);
9386 sv_setsv(PL_linestr,herewas);
9387 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9388 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9389 PL_last_lop = PL_last_uni = NULL;
9390 }
9391 else
9392 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9393 while (s >= PL_bufend) { /* multiple line string? */
9394#ifdef PERL_MAD
9395 if (PL_madskills) {
9396 tstart = SvPVX(PL_linestr) + stuffstart;
9397 if (PL_thisstuff)
9398 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9399 else
9400 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9401 }
9402#endif
9403 PL_bufptr = s;
9404 CopLINE_inc(PL_curcop);
9405 if (!outer || !lex_next_chunk(0)) {
9406 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9407 missingterm(PL_tokenbuf);
9408 }
9409 CopLINE_dec(PL_curcop);
9410 s = PL_bufptr;
9411#ifdef PERL_MAD
9412 stuffstart = s - SvPVX(PL_linestr);
9413#endif
9414 CopLINE_inc(PL_curcop);
9415 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9416 PL_last_lop = PL_last_uni = NULL;
9417#ifndef PERL_STRICT_CR
9418 if (PL_bufend - PL_linestart >= 2) {
9419 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9420 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9421 {
9422 PL_bufend[-2] = '\n';
9423 PL_bufend--;
9424 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9425 }
9426 else if (PL_bufend[-1] == '\r')
9427 PL_bufend[-1] = '\n';
9428 }
9429 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9430 PL_bufend[-1] = '\n';
9431#endif
9432 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9433 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9434 *(SvPVX(PL_linestr) + off ) = ' ';
9435 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9436 sv_catsv(PL_linestr,herewas);
9437 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9438 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9439 }
9440 else {
9441 s = PL_bufend;
9442 sv_catsv(tmpstr,PL_linestr);
9443 }
9444 }
9445 s++;
9446retval:
9447 PL_multi_end = CopLINE(PL_curcop);
9448 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9449 SvPV_shrink_to_cur(tmpstr);
9450 }
9451 SvREFCNT_dec(herewas);
9452 if (!IN_BYTES) {
9453 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9454 SvUTF8_on(tmpstr);
9455 else if (PL_encoding)
9456 sv_recode_to_utf8(tmpstr, PL_encoding);
9457 }
9458 PL_lex_stuff = tmpstr;
9459 pl_yylval.ival = op_type;
9460 return s;
9461}
9462
9463/* scan_inputsymbol
9464 takes: current position in input buffer
9465 returns: new position in input buffer
9466 side-effects: pl_yylval and lex_op are set.
9467
9468 This code handles:
9469
9470 <> read from ARGV
9471 <FH> read from filehandle
9472 <pkg::FH> read from package qualified filehandle
9473 <pkg'FH> read from package qualified filehandle
9474 <$fh> read from filehandle in $fh
9475 <*.h> filename glob
9476
9477*/
9478
9479STATIC char *
9480S_scan_inputsymbol(pTHX_ char *start)
9481{
9482 dVAR;
9483 register char *s = start; /* current position in buffer */
9484 char *end;
9485 I32 len;
9486 char *d = PL_tokenbuf; /* start of temp holding space */
9487 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9488
9489 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9490
9491 end = strchr(s, '\n');
9492 if (!end)
9493 end = PL_bufend;
9494 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9495
9496 /* die if we didn't have space for the contents of the <>,
9497 or if it didn't end, or if we see a newline
9498 */
9499
9500 if (len >= (I32)sizeof PL_tokenbuf)
9501 Perl_croak(aTHX_ "Excessively long <> operator");
9502 if (s >= end)
9503 Perl_croak(aTHX_ "Unterminated <> operator");
9504
9505 s++;
9506
9507 /* check for <$fh>
9508 Remember, only scalar variables are interpreted as filehandles by
9509 this code. Anything more complex (e.g., <$fh{$num}>) will be
9510 treated as a glob() call.
9511 This code makes use of the fact that except for the $ at the front,
9512 a scalar variable and a filehandle look the same.
9513 */
9514 if (*d == '$' && d[1]) d++;
9515
9516 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9517 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9518 d++;
9519
9520 /* If we've tried to read what we allow filehandles to look like, and
9521 there's still text left, then it must be a glob() and not a getline.
9522 Use scan_str to pull out the stuff between the <> and treat it
9523 as nothing more than a string.
9524 */
9525
9526 if (d - PL_tokenbuf != len) {
9527 pl_yylval.ival = OP_GLOB;
9528 s = scan_str(start,!!PL_madskills,FALSE);
9529 if (!s)
9530 Perl_croak(aTHX_ "Glob not terminated");
9531 return s;
9532 }
9533 else {
9534 bool readline_overriden = FALSE;
9535 GV *gv_readline;
9536 GV **gvp;
9537 /* we're in a filehandle read situation */
9538 d = PL_tokenbuf;
9539
9540 /* turn <> into <ARGV> */
9541 if (!len)
9542 Copy("ARGV",d,5,char);
9543
9544 /* Check whether readline() is overriden */
9545 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9546 if ((gv_readline
9547 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9548 ||
9549 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9550 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9551 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9552 readline_overriden = TRUE;
9553
9554 /* if <$fh>, create the ops to turn the variable into a
9555 filehandle
9556 */
9557 if (*d == '$') {
9558 /* try to find it in the pad for this block, otherwise find
9559 add symbol table ops
9560 */
9561 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9562 if (tmp != NOT_IN_PAD) {
9563 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9564 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9565 HEK * const stashname = HvNAME_HEK(stash);
9566 SV * const sym = sv_2mortal(newSVhek(stashname));
9567 sv_catpvs(sym, "::");
9568 sv_catpv(sym, d+1);
9569 d = SvPVX(sym);
9570 goto intro_sym;
9571 }
9572 else {
9573 OP * const o = newOP(OP_PADSV, 0);
9574 o->op_targ = tmp;
9575 PL_lex_op = readline_overriden
9576 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9577 op_append_elem(OP_LIST, o,
9578 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9579 : (OP*)newUNOP(OP_READLINE, 0, o);
9580 }
9581 }
9582 else {
9583 GV *gv;
9584 ++d;
9585intro_sym:
9586 gv = gv_fetchpv(d,
9587 (PL_in_eval
9588 ? (GV_ADDMULTI | GV_ADDINEVAL)
9589 : GV_ADDMULTI),
9590 SVt_PV);
9591 PL_lex_op = readline_overriden
9592 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9593 op_append_elem(OP_LIST,
9594 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9595 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9596 : (OP*)newUNOP(OP_READLINE, 0,
9597 newUNOP(OP_RV2SV, 0,
9598 newGVOP(OP_GV, 0, gv)));
9599 }
9600 if (!readline_overriden)
9601 PL_lex_op->op_flags |= OPf_SPECIAL;
9602 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9603 pl_yylval.ival = OP_NULL;
9604 }
9605
9606 /* If it's none of the above, it must be a literal filehandle
9607 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9608 else {
9609 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9610 PL_lex_op = readline_overriden
9611 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9612 op_append_elem(OP_LIST,
9613 newGVOP(OP_GV, 0, gv),
9614 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9615 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9616 pl_yylval.ival = OP_NULL;
9617 }
9618 }
9619
9620 return s;
9621}
9622
9623
9624/* scan_str
9625 takes: start position in buffer
9626 keep_quoted preserve \ on the embedded delimiter(s)
9627 keep_delims preserve the delimiters around the string
9628 returns: position to continue reading from buffer
9629 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9630 updates the read buffer.
9631
9632 This subroutine pulls a string out of the input. It is called for:
9633 q single quotes q(literal text)
9634 ' single quotes 'literal text'
9635 qq double quotes qq(interpolate $here please)
9636 " double quotes "interpolate $here please"
9637 qx backticks qx(/bin/ls -l)
9638 ` backticks `/bin/ls -l`
9639 qw quote words @EXPORT_OK = qw( func() $spam )
9640 m// regexp match m/this/
9641 s/// regexp substitute s/this/that/
9642 tr/// string transliterate tr/this/that/
9643 y/// string transliterate y/this/that/
9644 ($*@) sub prototypes sub foo ($)
9645 (stuff) sub attr parameters sub foo : attr(stuff)
9646 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9647
9648 In most of these cases (all but <>, patterns and transliterate)
9649 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9650 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9651 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9652 calls scan_str().
9653
9654 It skips whitespace before the string starts, and treats the first
9655 character as the delimiter. If the delimiter is one of ([{< then
9656 the corresponding "close" character )]}> is used as the closing
9657 delimiter. It allows quoting of delimiters, and if the string has
9658 balanced delimiters ([{<>}]) it allows nesting.
9659
9660 On success, the SV with the resulting string is put into lex_stuff or,
9661 if that is already non-NULL, into lex_repl. The second case occurs only
9662 when parsing the RHS of the special constructs s/// and tr/// (y///).
9663 For convenience, the terminating delimiter character is stuffed into
9664 SvIVX of the SV.
9665*/
9666
9667STATIC char *
9668S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9669{
9670 dVAR;
9671 SV *sv; /* scalar value: string */
9672 const char *tmps; /* temp string, used for delimiter matching */
9673 register char *s = start; /* current position in the buffer */
9674 register char term; /* terminating character */
9675 register char *to; /* current position in the sv's data */
9676 I32 brackets = 1; /* bracket nesting level */
9677 bool has_utf8 = FALSE; /* is there any utf8 content? */
9678 I32 termcode; /* terminating char. code */
9679 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9680 STRLEN termlen; /* length of terminating string */
9681 int last_off = 0; /* last position for nesting bracket */
9682#ifdef PERL_MAD
9683 int stuffstart;
9684 char *tstart;
9685#endif
9686
9687 PERL_ARGS_ASSERT_SCAN_STR;
9688
9689 /* skip space before the delimiter */
9690 if (isSPACE(*s)) {
9691 s = PEEKSPACE(s);
9692 }
9693
9694#ifdef PERL_MAD
9695 if (PL_realtokenstart >= 0) {
9696 stuffstart = PL_realtokenstart;
9697 PL_realtokenstart = -1;
9698 }
9699 else
9700 stuffstart = start - SvPVX(PL_linestr);
9701#endif
9702 /* mark where we are, in case we need to report errors */
9703 CLINE;
9704
9705 /* after skipping whitespace, the next character is the terminator */
9706 term = *s;
9707 if (!UTF) {
9708 termcode = termstr[0] = term;
9709 termlen = 1;
9710 }
9711 else {
9712 termcode = utf8_to_uvchr((U8*)s, &termlen);
9713 Copy(s, termstr, termlen, U8);
9714 if (!UTF8_IS_INVARIANT(term))
9715 has_utf8 = TRUE;
9716 }
9717
9718 /* mark where we are */
9719 PL_multi_start = CopLINE(PL_curcop);
9720 PL_multi_open = term;
9721
9722 /* find corresponding closing delimiter */
9723 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9724 termcode = termstr[0] = term = tmps[5];
9725
9726 PL_multi_close = term;
9727
9728 /* create a new SV to hold the contents. 79 is the SV's initial length.
9729 What a random number. */
9730 sv = newSV_type(SVt_PVIV);
9731 SvGROW(sv, 80);
9732 SvIV_set(sv, termcode);
9733 (void)SvPOK_only(sv); /* validate pointer */
9734
9735 /* move past delimiter and try to read a complete string */
9736 if (keep_delims)
9737 sv_catpvn(sv, s, termlen);
9738 s += termlen;
9739#ifdef PERL_MAD
9740 tstart = SvPVX(PL_linestr) + stuffstart;
9741 if (!PL_thisopen && !keep_delims) {
9742 PL_thisopen = newSVpvn(tstart, s - tstart);
9743 stuffstart = s - SvPVX(PL_linestr);
9744 }
9745#endif
9746 for (;;) {
9747 if (PL_encoding && !UTF) {
9748 bool cont = TRUE;
9749
9750 while (cont) {
9751 int offset = s - SvPVX_const(PL_linestr);
9752 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9753 &offset, (char*)termstr, termlen);
9754 const char * const ns = SvPVX_const(PL_linestr) + offset;
9755 char * const svlast = SvEND(sv) - 1;
9756
9757 for (; s < ns; s++) {
9758 if (*s == '\n' && !PL_rsfp)
9759 CopLINE_inc(PL_curcop);
9760 }
9761 if (!found)
9762 goto read_more_line;
9763 else {
9764 /* handle quoted delimiters */
9765 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9766 const char *t;
9767 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9768 t--;
9769 if ((svlast-1 - t) % 2) {
9770 if (!keep_quoted) {
9771 *(svlast-1) = term;
9772 *svlast = '\0';
9773 SvCUR_set(sv, SvCUR(sv) - 1);
9774 }
9775 continue;
9776 }
9777 }
9778 if (PL_multi_open == PL_multi_close) {
9779 cont = FALSE;
9780 }
9781 else {
9782 const char *t;
9783 char *w;
9784 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9785 /* At here, all closes are "was quoted" one,
9786 so we don't check PL_multi_close. */
9787 if (*t == '\\') {
9788 if (!keep_quoted && *(t+1) == PL_multi_open)
9789 t++;
9790 else
9791 *w++ = *t++;
9792 }
9793 else if (*t == PL_multi_open)
9794 brackets++;
9795
9796 *w = *t;
9797 }
9798 if (w < t) {
9799 *w++ = term;
9800 *w = '\0';
9801 SvCUR_set(sv, w - SvPVX_const(sv));
9802 }
9803 last_off = w - SvPVX(sv);
9804 if (--brackets <= 0)
9805 cont = FALSE;
9806 }
9807 }
9808 }
9809 if (!keep_delims) {
9810 SvCUR_set(sv, SvCUR(sv) - 1);
9811 *SvEND(sv) = '\0';
9812 }
9813 break;
9814 }
9815
9816 /* extend sv if need be */
9817 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9818 /* set 'to' to the next character in the sv's string */
9819 to = SvPVX(sv)+SvCUR(sv);
9820
9821 /* if open delimiter is the close delimiter read unbridle */
9822 if (PL_multi_open == PL_multi_close) {
9823 for (; s < PL_bufend; s++,to++) {
9824 /* embedded newlines increment the current line number */
9825 if (*s == '\n' && !PL_rsfp)
9826 CopLINE_inc(PL_curcop);
9827 /* handle quoted delimiters */
9828 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9829 if (!keep_quoted && s[1] == term)
9830 s++;
9831 /* any other quotes are simply copied straight through */
9832 else
9833 *to++ = *s++;
9834 }
9835 /* terminate when run out of buffer (the for() condition), or
9836 have found the terminator */
9837 else if (*s == term) {
9838 if (termlen == 1)
9839 break;
9840 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9841 break;
9842 }
9843 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9844 has_utf8 = TRUE;
9845 *to = *s;
9846 }
9847 }
9848
9849 /* if the terminator isn't the same as the start character (e.g.,
9850 matched brackets), we have to allow more in the quoting, and
9851 be prepared for nested brackets.
9852 */
9853 else {
9854 /* read until we run out of string, or we find the terminator */
9855 for (; s < PL_bufend; s++,to++) {
9856 /* embedded newlines increment the line count */
9857 if (*s == '\n' && !PL_rsfp)
9858 CopLINE_inc(PL_curcop);
9859 /* backslashes can escape the open or closing characters */
9860 if (*s == '\\' && s+1 < PL_bufend) {
9861 if (!keep_quoted &&
9862 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9863 s++;
9864 else
9865 *to++ = *s++;
9866 }
9867 /* allow nested opens and closes */
9868 else if (*s == PL_multi_close && --brackets <= 0)
9869 break;
9870 else if (*s == PL_multi_open)
9871 brackets++;
9872 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9873 has_utf8 = TRUE;
9874 *to = *s;
9875 }
9876 }
9877 /* terminate the copied string and update the sv's end-of-string */
9878 *to = '\0';
9879 SvCUR_set(sv, to - SvPVX_const(sv));
9880
9881 /*
9882 * this next chunk reads more into the buffer if we're not done yet
9883 */
9884
9885 if (s < PL_bufend)
9886 break; /* handle case where we are done yet :-) */
9887
9888#ifndef PERL_STRICT_CR
9889 if (to - SvPVX_const(sv) >= 2) {
9890 if ((to[-2] == '\r' && to[-1] == '\n') ||
9891 (to[-2] == '\n' && to[-1] == '\r'))
9892 {
9893 to[-2] = '\n';
9894 to--;
9895 SvCUR_set(sv, to - SvPVX_const(sv));
9896 }
9897 else if (to[-1] == '\r')
9898 to[-1] = '\n';
9899 }
9900 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9901 to[-1] = '\n';
9902#endif
9903
9904 read_more_line:
9905 /* if we're out of file, or a read fails, bail and reset the current
9906 line marker so we can report where the unterminated string began
9907 */
9908#ifdef PERL_MAD
9909 if (PL_madskills) {
9910 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9911 if (PL_thisstuff)
9912 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9913 else
9914 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9915 }
9916#endif
9917 CopLINE_inc(PL_curcop);
9918 PL_bufptr = PL_bufend;
9919 if (!lex_next_chunk(0)) {
9920 sv_free(sv);
9921 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9922 return NULL;
9923 }
9924 s = PL_bufptr;
9925#ifdef PERL_MAD
9926 stuffstart = 0;
9927#endif
9928 }
9929
9930 /* at this point, we have successfully read the delimited string */
9931
9932 if (!PL_encoding || UTF) {
9933#ifdef PERL_MAD
9934 if (PL_madskills) {
9935 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9936 const int len = s - tstart;
9937 if (PL_thisstuff)
9938 sv_catpvn(PL_thisstuff, tstart, len);
9939 else
9940 PL_thisstuff = newSVpvn(tstart, len);
9941 if (!PL_thisclose && !keep_delims)
9942 PL_thisclose = newSVpvn(s,termlen);
9943 }
9944#endif
9945
9946 if (keep_delims)
9947 sv_catpvn(sv, s, termlen);
9948 s += termlen;
9949 }
9950#ifdef PERL_MAD
9951 else {
9952 if (PL_madskills) {
9953 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9954 const int len = s - tstart - termlen;
9955 if (PL_thisstuff)
9956 sv_catpvn(PL_thisstuff, tstart, len);
9957 else
9958 PL_thisstuff = newSVpvn(tstart, len);
9959 if (!PL_thisclose && !keep_delims)
9960 PL_thisclose = newSVpvn(s - termlen,termlen);
9961 }
9962 }
9963#endif
9964 if (has_utf8 || PL_encoding)
9965 SvUTF8_on(sv);
9966
9967 PL_multi_end = CopLINE(PL_curcop);
9968
9969 /* if we allocated too much space, give some back */
9970 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9971 SvLEN_set(sv, SvCUR(sv) + 1);
9972 SvPV_renew(sv, SvLEN(sv));
9973 }
9974
9975 /* decide whether this is the first or second quoted string we've read
9976 for this op
9977 */
9978
9979 if (PL_lex_stuff)
9980 PL_lex_repl = sv;
9981 else
9982 PL_lex_stuff = sv;
9983 return s;
9984}
9985
9986/*
9987 scan_num
9988 takes: pointer to position in buffer
9989 returns: pointer to new position in buffer
9990 side-effects: builds ops for the constant in pl_yylval.op
9991
9992 Read a number in any of the formats that Perl accepts:
9993
9994 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9995 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
9996 0b[01](_?[01])*
9997 0[0-7](_?[0-7])*
9998 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9999
10000 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10001 thing it reads.
10002
10003 If it reads a number without a decimal point or an exponent, it will
10004 try converting the number to an integer and see if it can do so
10005 without loss of precision.
10006*/
10007
10008char *
10009Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10010{
10011 dVAR;
10012 register const char *s = start; /* current position in buffer */
10013 register char *d; /* destination in temp buffer */
10014 register char *e; /* end of temp buffer */
10015 NV nv; /* number read, as a double */
10016 SV *sv = NULL; /* place to put the converted number */
10017 bool floatit; /* boolean: int or float? */
10018 const char *lastub = NULL; /* position of last underbar */
10019 static char const number_too_long[] = "Number too long";
10020
10021 PERL_ARGS_ASSERT_SCAN_NUM;
10022
10023 /* We use the first character to decide what type of number this is */
10024
10025 switch (*s) {
10026 default:
10027 Perl_croak(aTHX_ "panic: scan_num");
10028
10029 /* if it starts with a 0, it could be an octal number, a decimal in
10030 0.13 disguise, or a hexadecimal number, or a binary number. */
10031 case '0':
10032 {
10033 /* variables:
10034 u holds the "number so far"
10035 shift the power of 2 of the base
10036 (hex == 4, octal == 3, binary == 1)
10037 overflowed was the number more than we can hold?
10038
10039 Shift is used when we add a digit. It also serves as an "are
10040 we in octal/hex/binary?" indicator to disallow hex characters
10041 when in octal mode.
10042 */
10043 NV n = 0.0;
10044 UV u = 0;
10045 I32 shift;
10046 bool overflowed = FALSE;
10047 bool just_zero = TRUE; /* just plain 0 or binary number? */
10048 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10049 static const char* const bases[5] =
10050 { "", "binary", "", "octal", "hexadecimal" };
10051 static const char* const Bases[5] =
10052 { "", "Binary", "", "Octal", "Hexadecimal" };
10053 static const char* const maxima[5] =
10054 { "",
10055 "0b11111111111111111111111111111111",
10056 "",
10057 "037777777777",
10058 "0xffffffff" };
10059 const char *base, *Base, *max;
10060
10061 /* check for hex */
10062 if (s[1] == 'x' || s[1] == 'X') {
10063 shift = 4;
10064 s += 2;
10065 just_zero = FALSE;
10066 } else if (s[1] == 'b' || s[1] == 'B') {
10067 shift = 1;
10068 s += 2;
10069 just_zero = FALSE;
10070 }
10071 /* check for a decimal in disguise */
10072 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10073 goto decimal;
10074 /* so it must be octal */
10075 else {
10076 shift = 3;
10077 s++;
10078 }
10079
10080 if (*s == '_') {
10081 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10082 "Misplaced _ in number");
10083 lastub = s++;
10084 }
10085
10086 base = bases[shift];
10087 Base = Bases[shift];
10088 max = maxima[shift];
10089
10090 /* read the rest of the number */
10091 for (;;) {
10092 /* x is used in the overflow test,
10093 b is the digit we're adding on. */
10094 UV x, b;
10095
10096 switch (*s) {
10097
10098 /* if we don't mention it, we're done */
10099 default:
10100 goto out;
10101
10102 /* _ are ignored -- but warned about if consecutive */
10103 case '_':
10104 if (lastub && s == lastub + 1)
10105 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10106 "Misplaced _ in number");
10107 lastub = s++;
10108 break;
10109
10110 /* 8 and 9 are not octal */
10111 case '8': case '9':
10112 if (shift == 3)
10113 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10114 /* FALL THROUGH */
10115
10116 /* octal digits */
10117 case '2': case '3': case '4':
10118 case '5': case '6': case '7':
10119 if (shift == 1)
10120 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10121 /* FALL THROUGH */
10122
10123 case '0': case '1':
10124 b = *s++ & 15; /* ASCII digit -> value of digit */
10125 goto digit;
10126
10127 /* hex digits */
10128 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10129 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10130 /* make sure they said 0x */
10131 if (shift != 4)
10132 goto out;
10133 b = (*s++ & 7) + 9;
10134
10135 /* Prepare to put the digit we have onto the end
10136 of the number so far. We check for overflows.
10137 */
10138
10139 digit:
10140 just_zero = FALSE;
10141 if (!overflowed) {
10142 x = u << shift; /* make room for the digit */
10143
10144 if ((x >> shift) != u
10145 && !(PL_hints & HINT_NEW_BINARY)) {
10146 overflowed = TRUE;
10147 n = (NV) u;
10148 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10149 "Integer overflow in %s number",
10150 base);
10151 } else
10152 u = x | b; /* add the digit to the end */
10153 }
10154 if (overflowed) {
10155 n *= nvshift[shift];
10156 /* If an NV has not enough bits in its
10157 * mantissa to represent an UV this summing of
10158 * small low-order numbers is a waste of time
10159 * (because the NV cannot preserve the
10160 * low-order bits anyway): we could just
10161 * remember when did we overflow and in the
10162 * end just multiply n by the right
10163 * amount. */
10164 n += (NV) b;
10165 }
10166 break;
10167 }
10168 }
10169
10170 /* if we get here, we had success: make a scalar value from
10171 the number.
10172 */
10173 out:
10174
10175 /* final misplaced underbar check */
10176 if (s[-1] == '_') {
10177 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10178 }
10179
10180 if (overflowed) {
10181 if (n > 4294967295.0)
10182 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10183 "%s number > %s non-portable",
10184 Base, max);
10185 sv = newSVnv(n);
10186 }
10187 else {
10188#if UVSIZE > 4
10189 if (u > 0xffffffff)
10190 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10191 "%s number > %s non-portable",
10192 Base, max);
10193#endif
10194 sv = newSVuv(u);
10195 }
10196 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10197 sv = new_constant(start, s - start, "integer",
10198 sv, NULL, NULL, 0);
10199 else if (PL_hints & HINT_NEW_BINARY)
10200 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10201 }
10202 break;
10203
10204 /*
10205 handle decimal numbers.
10206 we're also sent here when we read a 0 as the first digit
10207 */
10208 case '1': case '2': case '3': case '4': case '5':
10209 case '6': case '7': case '8': case '9': case '.':
10210 decimal:
10211 d = PL_tokenbuf;
10212 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10213 floatit = FALSE;
10214
10215 /* read next group of digits and _ and copy into d */
10216 while (isDIGIT(*s) || *s == '_') {
10217 /* skip underscores, checking for misplaced ones
10218 if -w is on
10219 */
10220 if (*s == '_') {
10221 if (lastub && s == lastub + 1)
10222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10223 "Misplaced _ in number");
10224 lastub = s++;
10225 }
10226 else {
10227 /* check for end of fixed-length buffer */
10228 if (d >= e)
10229 Perl_croak(aTHX_ number_too_long);
10230 /* if we're ok, copy the character */
10231 *d++ = *s++;
10232 }
10233 }
10234
10235 /* final misplaced underbar check */
10236 if (lastub && s == lastub + 1) {
10237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10238 }
10239
10240 /* read a decimal portion if there is one. avoid
10241 3..5 being interpreted as the number 3. followed
10242 by .5
10243 */
10244 if (*s == '.' && s[1] != '.') {
10245 floatit = TRUE;
10246 *d++ = *s++;
10247
10248 if (*s == '_') {
10249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10250 "Misplaced _ in number");
10251 lastub = s;
10252 }
10253
10254 /* copy, ignoring underbars, until we run out of digits.
10255 */
10256 for (; isDIGIT(*s) || *s == '_'; s++) {
10257 /* fixed length buffer check */
10258 if (d >= e)
10259 Perl_croak(aTHX_ number_too_long);
10260 if (*s == '_') {
10261 if (lastub && s == lastub + 1)
10262 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10263 "Misplaced _ in number");
10264 lastub = s;
10265 }
10266 else
10267 *d++ = *s;
10268 }
10269 /* fractional part ending in underbar? */
10270 if (s[-1] == '_') {
10271 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10272 "Misplaced _ in number");
10273 }
10274 if (*s == '.' && isDIGIT(s[1])) {
10275 /* oops, it's really a v-string, but without the "v" */
10276 s = start;
10277 goto vstring;
10278 }
10279 }
10280
10281 /* read exponent part, if present */
10282 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10283 floatit = TRUE;
10284 s++;
10285
10286 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10287 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10288
10289 /* stray preinitial _ */
10290 if (*s == '_') {
10291 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10292 "Misplaced _ in number");
10293 lastub = s++;
10294 }
10295
10296 /* allow positive or negative exponent */
10297 if (*s == '+' || *s == '-')
10298 *d++ = *s++;
10299
10300 /* stray initial _ */
10301 if (*s == '_') {
10302 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10303 "Misplaced _ in number");
10304 lastub = s++;
10305 }
10306
10307 /* read digits of exponent */
10308 while (isDIGIT(*s) || *s == '_') {
10309 if (isDIGIT(*s)) {
10310 if (d >= e)
10311 Perl_croak(aTHX_ number_too_long);
10312 *d++ = *s++;
10313 }
10314 else {
10315 if (((lastub && s == lastub + 1) ||
10316 (!isDIGIT(s[1]) && s[1] != '_')))
10317 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10318 "Misplaced _ in number");
10319 lastub = s++;
10320 }
10321 }
10322 }
10323
10324
10325 /*
10326 We try to do an integer conversion first if no characters
10327 indicating "float" have been found.
10328 */
10329
10330 if (!floatit) {
10331 UV uv;
10332 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10333
10334 if (flags == IS_NUMBER_IN_UV) {
10335 if (uv <= IV_MAX)
10336 sv = newSViv(uv); /* Prefer IVs over UVs. */
10337 else
10338 sv = newSVuv(uv);
10339 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10340 if (uv <= (UV) IV_MIN)
10341 sv = newSViv(-(IV)uv);
10342 else
10343 floatit = TRUE;
10344 } else
10345 floatit = TRUE;
10346 }
10347 if (floatit) {
10348 /* terminate the string */
10349 *d = '\0';
10350 nv = Atof(PL_tokenbuf);
10351 sv = newSVnv(nv);
10352 }
10353
10354 if ( floatit
10355 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10356 const char *const key = floatit ? "float" : "integer";
10357 const STRLEN keylen = floatit ? 5 : 7;
10358 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10359 key, keylen, sv, NULL, NULL, 0);
10360 }
10361 break;
10362
10363 /* if it starts with a v, it could be a v-string */
10364 case 'v':
10365vstring:
10366 sv = newSV(5); /* preallocate storage space */
10367 s = scan_vstring(s, PL_bufend, sv);
10368 break;
10369 }
10370
10371 /* make the op for the constant and return */
10372
10373 if (sv)
10374 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10375 else
10376 lvalp->opval = NULL;
10377
10378 return (char *)s;
10379}
10380
10381STATIC char *
10382S_scan_formline(pTHX_ register char *s)
10383{
10384 dVAR;
10385 register char *eol;
10386 register char *t;
10387 SV * const stuff = newSVpvs("");
10388 bool needargs = FALSE;
10389 bool eofmt = FALSE;
10390#ifdef PERL_MAD
10391 char *tokenstart = s;
10392 SV* savewhite = NULL;
10393
10394 if (PL_madskills) {
10395 savewhite = PL_thiswhite;
10396 PL_thiswhite = 0;
10397 }
10398#endif
10399
10400 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10401
10402 while (!needargs) {
10403 if (*s == '.') {
10404 t = s+1;
10405#ifdef PERL_STRICT_CR
10406 while (SPACE_OR_TAB(*t))
10407 t++;
10408#else
10409 while (SPACE_OR_TAB(*t) || *t == '\r')
10410 t++;
10411#endif
10412 if (*t == '\n' || t == PL_bufend) {
10413 eofmt = TRUE;
10414 break;
10415 }
10416 }
10417 if (PL_in_eval && !PL_rsfp) {
10418 eol = (char *) memchr(s,'\n',PL_bufend-s);
10419 if (!eol++)
10420 eol = PL_bufend;
10421 }
10422 else
10423 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10424 if (*s != '#') {
10425 for (t = s; t < eol; t++) {
10426 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10427 needargs = FALSE;
10428 goto enough; /* ~~ must be first line in formline */
10429 }
10430 if (*t == '@' || *t == '^')
10431 needargs = TRUE;
10432 }
10433 if (eol > s) {
10434 sv_catpvn(stuff, s, eol-s);
10435#ifndef PERL_STRICT_CR
10436 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10437 char *end = SvPVX(stuff) + SvCUR(stuff);
10438 end[-2] = '\n';
10439 end[-1] = '\0';
10440 SvCUR_set(stuff, SvCUR(stuff) - 1);
10441 }
10442#endif
10443 }
10444 else
10445 break;
10446 }
10447 s = (char*)eol;
10448 if (PL_rsfp) {
10449 bool got_some;
10450#ifdef PERL_MAD
10451 if (PL_madskills) {
10452 if (PL_thistoken)
10453 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10454 else
10455 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10456 }
10457#endif
10458 PL_bufptr = PL_bufend;
10459 CopLINE_inc(PL_curcop);
10460 got_some = lex_next_chunk(0);
10461 CopLINE_dec(PL_curcop);
10462 s = PL_bufptr;
10463#ifdef PERL_MAD
10464 tokenstart = PL_bufptr;
10465#endif
10466 if (!got_some)
10467 break;
10468 }
10469 incline(s);
10470 }
10471 enough:
10472 if (SvCUR(stuff)) {
10473 PL_expect = XTERM;
10474 if (needargs) {
10475 PL_lex_state = LEX_NORMAL;
10476 start_force(PL_curforce);
10477 NEXTVAL_NEXTTOKE.ival = 0;
10478 force_next(',');
10479 }
10480 else
10481 PL_lex_state = LEX_FORMLINE;
10482 if (!IN_BYTES) {
10483 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10484 SvUTF8_on(stuff);
10485 else if (PL_encoding)
10486 sv_recode_to_utf8(stuff, PL_encoding);
10487 }
10488 start_force(PL_curforce);
10489 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10490 force_next(THING);
10491 start_force(PL_curforce);
10492 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10493 force_next(LSTOP);
10494 }
10495 else {
10496 SvREFCNT_dec(stuff);
10497 if (eofmt)
10498 PL_lex_formbrack = 0;
10499 PL_bufptr = s;
10500 }
10501#ifdef PERL_MAD
10502 if (PL_madskills) {
10503 if (PL_thistoken)
10504 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10505 else
10506 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10507 PL_thiswhite = savewhite;
10508 }
10509#endif
10510 return s;
10511}
10512
10513I32
10514Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10515{
10516 dVAR;
10517 const I32 oldsavestack_ix = PL_savestack_ix;
10518 CV* const outsidecv = PL_compcv;
10519
10520 if (PL_compcv) {
10521 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10522 }
10523 SAVEI32(PL_subline);
10524 save_item(PL_subname);
10525 SAVESPTR(PL_compcv);
10526
10527 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10528 CvFLAGS(PL_compcv) |= flags;
10529
10530 PL_subline = CopLINE(PL_curcop);
10531 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10532 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10533 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10534
10535 return oldsavestack_ix;
10536}
10537
10538#ifdef __SC__
10539#pragma segment Perl_yylex
10540#endif
10541static int
10542S_yywarn(pTHX_ const char *const s)
10543{
10544 dVAR;
10545
10546 PERL_ARGS_ASSERT_YYWARN;
10547
10548 PL_in_eval |= EVAL_WARNONLY;
10549 yyerror(s);
10550 PL_in_eval &= ~EVAL_WARNONLY;
10551 return 0;
10552}
10553
10554int
10555Perl_yyerror(pTHX_ const char *const s)
10556{
10557 dVAR;
10558 const char *where = NULL;
10559 const char *context = NULL;
10560 int contlen = -1;
10561 SV *msg;
10562 int yychar = PL_parser->yychar;
10563
10564 PERL_ARGS_ASSERT_YYERROR;
10565
10566 if (!yychar || (yychar == ';' && !PL_rsfp))
10567 where = "at EOF";
10568 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10569 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10570 PL_oldbufptr != PL_bufptr) {
10571 /*
10572 Only for NetWare:
10573 The code below is removed for NetWare because it abends/crashes on NetWare
10574 when the script has error such as not having the closing quotes like:
10575 if ($var eq "value)
10576 Checking of white spaces is anyway done in NetWare code.
10577 */
10578#ifndef NETWARE
10579 while (isSPACE(*PL_oldoldbufptr))
10580 PL_oldoldbufptr++;
10581#endif
10582 context = PL_oldoldbufptr;
10583 contlen = PL_bufptr - PL_oldoldbufptr;
10584 }
10585 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10586 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10587 /*
10588 Only for NetWare:
10589 The code below is removed for NetWare because it abends/crashes on NetWare
10590 when the script has error such as not having the closing quotes like:
10591 if ($var eq "value)
10592 Checking of white spaces is anyway done in NetWare code.
10593 */
10594#ifndef NETWARE
10595 while (isSPACE(*PL_oldbufptr))
10596 PL_oldbufptr++;
10597#endif
10598 context = PL_oldbufptr;
10599 contlen = PL_bufptr - PL_oldbufptr;
10600 }
10601 else if (yychar > 255)
10602 where = "next token ???";
10603 else if (yychar == -2) { /* YYEMPTY */
10604 if (PL_lex_state == LEX_NORMAL ||
10605 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10606 where = "at end of line";
10607 else if (PL_lex_inpat)
10608 where = "within pattern";
10609 else
10610 where = "within string";
10611 }
10612 else {
10613 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10614 if (yychar < 32)
10615 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10616 else if (isPRINT_LC(yychar)) {
10617 const char string = yychar;
10618 sv_catpvn(where_sv, &string, 1);
10619 }
10620 else
10621 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10622 where = SvPVX_const(where_sv);
10623 }
10624 msg = sv_2mortal(newSVpv(s, 0));
10625 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10626 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10627 if (context)
10628 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10629 else
10630 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10631 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10632 Perl_sv_catpvf(aTHX_ msg,
10633 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10634 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10635 PL_multi_end = 0;
10636 }
10637 if (PL_in_eval & EVAL_WARNONLY) {
10638 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10639 }
10640 else
10641 qerror(msg);
10642 if (PL_error_count >= 10) {
10643 if (PL_in_eval && SvCUR(ERRSV))
10644 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10645 SVfARG(ERRSV), OutCopFILE(PL_curcop));
10646 else
10647 Perl_croak(aTHX_ "%s has too many errors.\n",
10648 OutCopFILE(PL_curcop));
10649 }
10650 PL_in_my = 0;
10651 PL_in_my_stash = NULL;
10652 return 0;
10653}
10654#ifdef __SC__
10655#pragma segment Main
10656#endif
10657
10658STATIC char*
10659S_swallow_bom(pTHX_ U8 *s)
10660{
10661 dVAR;
10662 const STRLEN slen = SvCUR(PL_linestr);
10663
10664 PERL_ARGS_ASSERT_SWALLOW_BOM;
10665
10666 switch (s[0]) {
10667 case 0xFF:
10668 if (s[1] == 0xFE) {
10669 /* UTF-16 little-endian? (or UTF-32LE?) */
10670 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10671 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10672#ifndef PERL_NO_UTF16_FILTER
10673 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10674 s += 2;
10675 if (PL_bufend > (char*)s) {
10676 s = add_utf16_textfilter(s, TRUE);
10677 }
10678#else
10679 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10680#endif
10681 }
10682 break;
10683 case 0xFE:
10684 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10685#ifndef PERL_NO_UTF16_FILTER
10686 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10687 s += 2;
10688 if (PL_bufend > (char *)s) {
10689 s = add_utf16_textfilter(s, FALSE);
10690 }
10691#else
10692 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10693#endif
10694 }
10695 break;
10696 case 0xEF:
10697 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10699 s += 3; /* UTF-8 */
10700 }
10701 break;
10702 case 0:
10703 if (slen > 3) {
10704 if (s[1] == 0) {
10705 if (s[2] == 0xFE && s[3] == 0xFF) {
10706 /* UTF-32 big-endian */
10707 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10708 }
10709 }
10710 else if (s[2] == 0 && s[3] != 0) {
10711 /* Leading bytes
10712 * 00 xx 00 xx
10713 * are a good indicator of UTF-16BE. */
10714#ifndef PERL_NO_UTF16_FILTER
10715 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10716 s = add_utf16_textfilter(s, FALSE);
10717#else
10718 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10719#endif
10720 }
10721 }
10722#ifdef EBCDIC
10723 case 0xDD:
10724 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10725 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10726 s += 4; /* UTF-8 */
10727 }
10728 break;
10729#endif
10730
10731 default:
10732 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10733 /* Leading bytes
10734 * xx 00 xx 00
10735 * are a good indicator of UTF-16LE. */
10736#ifndef PERL_NO_UTF16_FILTER
10737 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10738 s = add_utf16_textfilter(s, TRUE);
10739#else
10740 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10741#endif
10742 }
10743 }
10744 return (char*)s;
10745}
10746
10747
10748#ifndef PERL_NO_UTF16_FILTER
10749static I32
10750S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10751{
10752 dVAR;
10753 SV *const filter = FILTER_DATA(idx);
10754 /* We re-use this each time round, throwing the contents away before we
10755 return. */
10756 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10757 SV *const utf8_buffer = filter;
10758 IV status = IoPAGE(filter);
10759 const bool reverse = cBOOL(IoLINES(filter));
10760 I32 retval;
10761
10762 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10763
10764 /* As we're automatically added, at the lowest level, and hence only called
10765 from this file, we can be sure that we're not called in block mode. Hence
10766 don't bother writing code to deal with block mode. */
10767 if (maxlen) {
10768 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10769 }
10770 if (status < 0) {
10771 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10772 }
10773 DEBUG_P(PerlIO_printf(Perl_debug_log,
10774 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10775 FPTR2DPTR(void *, S_utf16_textfilter),
10776 reverse ? 'l' : 'b', idx, maxlen, status,
10777 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10778
10779 while (1) {
10780 STRLEN chars;
10781 STRLEN have;
10782 I32 newlen;
10783 U8 *end;
10784 /* First, look in our buffer of existing UTF-8 data: */
10785 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10786
10787 if (nl) {
10788 ++nl;
10789 } else if (status == 0) {
10790 /* EOF */
10791 IoPAGE(filter) = 0;
10792 nl = SvEND(utf8_buffer);
10793 }
10794 if (nl) {
10795 STRLEN got = nl - SvPVX(utf8_buffer);
10796 /* Did we have anything to append? */
10797 retval = got != 0;
10798 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10799 /* Everything else in this code works just fine if SVp_POK isn't
10800 set. This, however, needs it, and we need it to work, else
10801 we loop infinitely because the buffer is never consumed. */
10802 sv_chop(utf8_buffer, nl);
10803 break;
10804 }
10805
10806 /* OK, not a complete line there, so need to read some more UTF-16.
10807 Read an extra octect if the buffer currently has an odd number. */
10808 while (1) {
10809 if (status <= 0)
10810 break;
10811 if (SvCUR(utf16_buffer) >= 2) {
10812 /* Location of the high octet of the last complete code point.
10813 Gosh, UTF-16 is a pain. All the benefits of variable length,
10814 *coupled* with all the benefits of partial reads and
10815 endianness. */
10816 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10817 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10818
10819 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10820 break;
10821 }
10822
10823 /* We have the first half of a surrogate. Read more. */
10824 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10825 }
10826
10827 status = FILTER_READ(idx + 1, utf16_buffer,
10828 160 + (SvCUR(utf16_buffer) & 1));
10829 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10830 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10831 if (status < 0) {
10832 /* Error */
10833 IoPAGE(filter) = status;
10834 return status;
10835 }
10836 }
10837
10838 chars = SvCUR(utf16_buffer) >> 1;
10839 have = SvCUR(utf8_buffer);
10840 SvGROW(utf8_buffer, have + chars * 3 + 1);
10841
10842 if (reverse) {
10843 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10844 (U8*)SvPVX_const(utf8_buffer) + have,
10845 chars * 2, &newlen);
10846 } else {
10847 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10848 (U8*)SvPVX_const(utf8_buffer) + have,
10849 chars * 2, &newlen);
10850 }
10851 SvCUR_set(utf8_buffer, have + newlen);
10852 *end = '\0';
10853
10854 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10855 it's private to us, and utf16_to_utf8{,reversed} take a
10856 (pointer,length) pair, rather than a NUL-terminated string. */
10857 if(SvCUR(utf16_buffer) & 1) {
10858 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10859 SvCUR_set(utf16_buffer, 1);
10860 } else {
10861 SvCUR_set(utf16_buffer, 0);
10862 }
10863 }
10864 DEBUG_P(PerlIO_printf(Perl_debug_log,
10865 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10866 status,
10867 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10868 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10869 return retval;
10870}
10871
10872static U8 *
10873S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10874{
10875 SV *filter = filter_add(S_utf16_textfilter, NULL);
10876
10877 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10878
10879 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10880 sv_setpvs(filter, "");
10881 IoLINES(filter) = reversed;
10882 IoPAGE(filter) = 1; /* Not EOF */
10883
10884 /* Sadly, we have to return a valid pointer, come what may, so we have to
10885 ignore any error return from this. */
10886 SvCUR_set(PL_linestr, 0);
10887 if (FILTER_READ(0, PL_linestr, 0)) {
10888 SvUTF8_on(PL_linestr);
10889 } else {
10890 SvUTF8_on(PL_linestr);
10891 }
10892 PL_bufend = SvEND(PL_linestr);
10893 return (U8*)SvPVX(PL_linestr);
10894}
10895#endif
10896
10897/*
10898Returns a pointer to the next character after the parsed
10899vstring, as well as updating the passed in sv.
10900
10901Function must be called like
10902
10903 sv = newSV(5);
10904 s = scan_vstring(s,e,sv);
10905
10906where s and e are the start and end of the string.
10907The sv should already be large enough to store the vstring
10908passed in, for performance reasons.
10909
10910*/
10911
10912char *
10913Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10914{
10915 dVAR;
10916 const char *pos = s;
10917 const char *start = s;
10918
10919 PERL_ARGS_ASSERT_SCAN_VSTRING;
10920
10921 if (*pos == 'v') pos++; /* get past 'v' */
10922 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10923 pos++;
10924 if ( *pos != '.') {
10925 /* this may not be a v-string if followed by => */
10926 const char *next = pos;
10927 while (next < e && isSPACE(*next))
10928 ++next;
10929 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10930 /* return string not v-string */
10931 sv_setpvn(sv,(char *)s,pos-s);
10932 return (char *)pos;
10933 }
10934 }
10935
10936 if (!isALPHA(*pos)) {
10937 U8 tmpbuf[UTF8_MAXBYTES+1];
10938
10939 if (*s == 'v')
10940 s++; /* get past 'v' */
10941
10942 sv_setpvs(sv, "");
10943
10944 for (;;) {
10945 /* this is atoi() that tolerates underscores */
10946 U8 *tmpend;
10947 UV rev = 0;
10948 const char *end = pos;
10949 UV mult = 1;
10950 while (--end >= s) {
10951 if (*end != '_') {
10952 const UV orev = rev;
10953 rev += (*end - '0') * mult;
10954 mult *= 10;
10955 if (orev > rev)
10956 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10957 "Integer overflow in decimal number");
10958 }
10959 }
10960#ifdef EBCDIC
10961 if (rev > 0x7FFFFFFF)
10962 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10963#endif
10964 /* Append native character for the rev point */
10965 tmpend = uvchr_to_utf8(tmpbuf, rev);
10966 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10967 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10968 SvUTF8_on(sv);
10969 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10970 s = ++pos;
10971 else {
10972 s = pos;
10973 break;
10974 }
10975 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10976 pos++;
10977 }
10978 SvPOK_on(sv);
10979 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10980 SvRMAGICAL_on(sv);
10981 }
10982 return (char *)s;
10983}
10984
10985int
10986Perl_keyword_plugin_standard(pTHX_
10987 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10988{
10989 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10990 PERL_UNUSED_CONTEXT;
10991 PERL_UNUSED_ARG(keyword_ptr);
10992 PERL_UNUSED_ARG(keyword_len);
10993 PERL_UNUSED_ARG(op_ptr);
10994 return KEYWORD_PLUGIN_DECLINE;
10995}
10996
10997#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10998static void
10999S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11000{
11001 SAVEI32(PL_lex_brackets);
11002 if (PL_lex_brackets > 100)
11003 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11004 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11005 SAVEI32(PL_lex_allbrackets);
11006 PL_lex_allbrackets = 0;
11007 SAVEI8(PL_lex_fakeeof);
11008 PL_lex_fakeeof = (U8)fakeeof;
11009 if(yyparse(gramtype) && !PL_parser->error_count)
11010 qerror(Perl_mess(aTHX_ "Parse error"));
11011}
11012
11013#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11014static OP *
11015S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11016{
11017 OP *o;
11018 ENTER;
11019 SAVEVPTR(PL_eval_root);
11020 PL_eval_root = NULL;
11021 parse_recdescent(gramtype, fakeeof);
11022 o = PL_eval_root;
11023 LEAVE;
11024 return o;
11025}
11026
11027#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11028static OP *
11029S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11030{
11031 OP *exprop;
11032 if (flags & ~PARSE_OPTIONAL)
11033 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11034 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11035 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11036 if (!PL_parser->error_count)
11037 qerror(Perl_mess(aTHX_ "Parse error"));
11038 exprop = newOP(OP_NULL, 0);
11039 }
11040 return exprop;
11041}
11042
11043/*
11044=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11045
11046Parse a Perl arithmetic expression. This may contain operators of precedence
11047down to the bit shift operators. The expression must be followed (and thus
11048terminated) either by a comparison or lower-precedence operator or by
11049something that would normally terminate an expression such as semicolon.
11050If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11051otherwise it is mandatory. It is up to the caller to ensure that the
11052dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11053the source of the code to be parsed and the lexical context for the
11054expression.
11055
11056The op tree representing the expression is returned. If an optional
11057expression is absent, a null pointer is returned, otherwise the pointer
11058will be non-null.
11059
11060If an error occurs in parsing or compilation, in most cases a valid op
11061tree is returned anyway. The error is reflected in the parser state,
11062normally resulting in a single exception at the top level of parsing
11063which covers all the compilation errors that occurred. Some compilation
11064errors, however, will throw an exception immediately.
11065
11066=cut
11067*/
11068
11069OP *
11070Perl_parse_arithexpr(pTHX_ U32 flags)
11071{
11072 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11073}
11074
11075/*
11076=for apidoc Amx|OP *|parse_termexpr|U32 flags
11077
11078Parse a Perl term expression. This may contain operators of precedence
11079down to the assignment operators. The expression must be followed (and thus
11080terminated) either by a comma or lower-precedence operator or by
11081something that would normally terminate an expression such as semicolon.
11082If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11083otherwise it is mandatory. It is up to the caller to ensure that the
11084dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11085the source of the code to be parsed and the lexical context for the
11086expression.
11087
11088The op tree representing the expression is returned. If an optional
11089expression is absent, a null pointer is returned, otherwise the pointer
11090will be non-null.
11091
11092If an error occurs in parsing or compilation, in most cases a valid op
11093tree is returned anyway. The error is reflected in the parser state,
11094normally resulting in a single exception at the top level of parsing
11095which covers all the compilation errors that occurred. Some compilation
11096errors, however, will throw an exception immediately.
11097
11098=cut
11099*/
11100
11101OP *
11102Perl_parse_termexpr(pTHX_ U32 flags)
11103{
11104 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11105}
11106
11107/*
11108=for apidoc Amx|OP *|parse_listexpr|U32 flags
11109
11110Parse a Perl list expression. This may contain operators of precedence
11111down to the comma operator. The expression must be followed (and thus
11112terminated) either by a low-precedence logic operator such as C<or> or by
11113something that would normally terminate an expression such as semicolon.
11114If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11115otherwise it is mandatory. It is up to the caller to ensure that the
11116dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11117the source of the code to be parsed and the lexical context for the
11118expression.
11119
11120The op tree representing the expression is returned. If an optional
11121expression is absent, a null pointer is returned, otherwise the pointer
11122will be non-null.
11123
11124If an error occurs in parsing or compilation, in most cases a valid op
11125tree is returned anyway. The error is reflected in the parser state,
11126normally resulting in a single exception at the top level of parsing
11127which covers all the compilation errors that occurred. Some compilation
11128errors, however, will throw an exception immediately.
11129
11130=cut
11131*/
11132
11133OP *
11134Perl_parse_listexpr(pTHX_ U32 flags)
11135{
11136 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11137}
11138
11139/*
11140=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11141
11142Parse a single complete Perl expression. This allows the full
11143expression grammar, including the lowest-precedence operators such
11144as C<or>. The expression must be followed (and thus terminated) by a
11145token that an expression would normally be terminated by: end-of-file,
11146closing bracketing punctuation, semicolon, or one of the keywords that
11147signals a postfix expression-statement modifier. If I<flags> includes
11148C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11149mandatory. It is up to the caller to ensure that the dynamic parser
11150state (L</PL_parser> et al) is correctly set to reflect the source of
11151the code to be parsed and the lexical context for the expression.
11152
11153The op tree representing the expression is returned. If an optional
11154expression is absent, a null pointer is returned, otherwise the pointer
11155will be non-null.
11156
11157If an error occurs in parsing or compilation, in most cases a valid op
11158tree is returned anyway. The error is reflected in the parser state,
11159normally resulting in a single exception at the top level of parsing
11160which covers all the compilation errors that occurred. Some compilation
11161errors, however, will throw an exception immediately.
11162
11163=cut
11164*/
11165
11166OP *
11167Perl_parse_fullexpr(pTHX_ U32 flags)
11168{
11169 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11170}
11171
11172/*
11173=for apidoc Amx|OP *|parse_block|U32 flags
11174
11175Parse a single complete Perl code block. This consists of an opening
11176brace, a sequence of statements, and a closing brace. The block
11177constitutes a lexical scope, so C<my> variables and various compile-time
11178effects can be contained within it. It is up to the caller to ensure
11179that the dynamic parser state (L</PL_parser> et al) is correctly set to
11180reflect the source of the code to be parsed and the lexical context for
11181the statement.
11182
11183The op tree representing the code block is returned. This is always a
11184real op, never a null pointer. It will normally be a C<lineseq> list,
11185including C<nextstate> or equivalent ops. No ops to construct any kind
11186of runtime scope are included by virtue of it being a block.
11187
11188If an error occurs in parsing or compilation, in most cases a valid op
11189tree (most likely null) is returned anyway. The error is reflected in
11190the parser state, normally resulting in a single exception at the top
11191level of parsing which covers all the compilation errors that occurred.
11192Some compilation errors, however, will throw an exception immediately.
11193
11194The I<flags> parameter is reserved for future use, and must always
11195be zero.
11196
11197=cut
11198*/
11199
11200OP *
11201Perl_parse_block(pTHX_ U32 flags)
11202{
11203 if (flags)
11204 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11205 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11206}
11207
11208/*
11209=for apidoc Amx|OP *|parse_barestmt|U32 flags
11210
11211Parse a single unadorned Perl statement. This may be a normal imperative
11212statement or a declaration that has compile-time effect. It does not
11213include any label or other affixture. It is up to the caller to ensure
11214that the dynamic parser state (L</PL_parser> et al) is correctly set to
11215reflect the source of the code to be parsed and the lexical context for
11216the statement.
11217
11218The op tree representing the statement is returned. This may be a
11219null pointer if the statement is null, for example if it was actually
11220a subroutine definition (which has compile-time side effects). If not
11221null, it will be ops directly implementing the statement, suitable to
11222pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11223equivalent op (except for those embedded in a scope contained entirely
11224within the statement).
11225
11226If an error occurs in parsing or compilation, in most cases a valid op
11227tree (most likely null) is returned anyway. The error is reflected in
11228the parser state, normally resulting in a single exception at the top
11229level of parsing which covers all the compilation errors that occurred.
11230Some compilation errors, however, will throw an exception immediately.
11231
11232The I<flags> parameter is reserved for future use, and must always
11233be zero.
11234
11235=cut
11236*/
11237
11238OP *
11239Perl_parse_barestmt(pTHX_ U32 flags)
11240{
11241 if (flags)
11242 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11243 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11244}
11245
11246/*
11247=for apidoc Amx|SV *|parse_label|U32 flags
11248
11249Parse a single label, possibly optional, of the type that may prefix a
11250Perl statement. It is up to the caller to ensure that the dynamic parser
11251state (L</PL_parser> et al) is correctly set to reflect the source of
11252the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11253label is optional, otherwise it is mandatory.
11254
11255The name of the label is returned in the form of a fresh scalar. If an
11256optional label is absent, a null pointer is returned.
11257
11258If an error occurs in parsing, which can only occur if the label is
11259mandatory, a valid label is returned anyway. The error is reflected in
11260the parser state, normally resulting in a single exception at the top
11261level of parsing which covers all the compilation errors that occurred.
11262
11263=cut
11264*/
11265
11266SV *
11267Perl_parse_label(pTHX_ U32 flags)
11268{
11269 if (flags & ~PARSE_OPTIONAL)
11270 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11271 if (PL_lex_state == LEX_KNOWNEXT) {
11272 PL_parser->yychar = yylex();
11273 if (PL_parser->yychar == LABEL) {
11274 char *lpv = pl_yylval.pval;
11275 STRLEN llen = strlen(lpv);
11276 SV *lsv;
11277 PL_parser->yychar = YYEMPTY;
11278 lsv = newSV_type(SVt_PV);
11279 SvPV_set(lsv, lpv);
11280 SvCUR_set(lsv, llen);
11281 SvLEN_set(lsv, llen+1);
11282 SvPOK_on(lsv);
11283 return lsv;
11284 } else {
11285 yyunlex();
11286 goto no_label;
11287 }
11288 } else {
11289 char *s, *t;
11290 U8 c;
11291 STRLEN wlen, bufptr_pos;
11292 lex_read_space(0);
11293 t = s = PL_bufptr;
11294 c = (U8)*s;
11295 if (!isIDFIRST_A(c))
11296 goto no_label;
11297 do {
11298 c = (U8)*++t;
11299 } while(isWORDCHAR_A(c));
11300 wlen = t - s;
11301 if (word_takes_any_delimeter(s, wlen))
11302 goto no_label;
11303 bufptr_pos = s - SvPVX(PL_linestr);
11304 PL_bufptr = t;
11305 lex_read_space(LEX_KEEP_PREVIOUS);
11306 t = PL_bufptr;
11307 s = SvPVX(PL_linestr) + bufptr_pos;
11308 if (t[0] == ':' && t[1] != ':') {
11309 PL_oldoldbufptr = PL_oldbufptr;
11310 PL_oldbufptr = s;
11311 PL_bufptr = t+1;
11312 return newSVpvn(s, wlen);
11313 } else {
11314 PL_bufptr = s;
11315 no_label:
11316 if (flags & PARSE_OPTIONAL) {
11317 return NULL;
11318 } else {
11319 qerror(Perl_mess(aTHX_ "Parse error"));
11320 return newSVpvs("x");
11321 }
11322 }
11323 }
11324}
11325
11326/*
11327=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11328
11329Parse a single complete Perl statement. This may be a normal imperative
11330statement or a declaration that has compile-time effect, and may include
11331optional labels. It is up to the caller to ensure that the dynamic
11332parser state (L</PL_parser> et al) is correctly set to reflect the source
11333of the code to be parsed and the lexical context for the statement.
11334
11335The op tree representing the statement is returned. This may be a
11336null pointer if the statement is null, for example if it was actually
11337a subroutine definition (which has compile-time side effects). If not
11338null, it will be the result of a L</newSTATEOP> call, normally including
11339a C<nextstate> or equivalent op.
11340
11341If an error occurs in parsing or compilation, in most cases a valid op
11342tree (most likely null) is returned anyway. The error is reflected in
11343the parser state, normally resulting in a single exception at the top
11344level of parsing which covers all the compilation errors that occurred.
11345Some compilation errors, however, will throw an exception immediately.
11346
11347The I<flags> parameter is reserved for future use, and must always
11348be zero.
11349
11350=cut
11351*/
11352
11353OP *
11354Perl_parse_fullstmt(pTHX_ U32 flags)
11355{
11356 if (flags)
11357 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11358 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11359}
11360
11361/*
11362=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11363
11364Parse a sequence of zero or more Perl statements. These may be normal
11365imperative statements, including optional labels, or declarations
11366that have compile-time effect, or any mixture thereof. The statement
11367sequence ends when a closing brace or end-of-file is encountered in a
11368place where a new statement could have validly started. It is up to
11369the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11370is correctly set to reflect the source of the code to be parsed and the
11371lexical context for the statements.
11372
11373The op tree representing the statement sequence is returned. This may
11374be a null pointer if the statements were all null, for example if there
11375were no statements or if there were only subroutine definitions (which
11376have compile-time side effects). If not null, it will be a C<lineseq>
11377list, normally including C<nextstate> or equivalent ops.
11378
11379If an error occurs in parsing or compilation, in most cases a valid op
11380tree is returned anyway. The error is reflected in the parser state,
11381normally resulting in a single exception at the top level of parsing
11382which covers all the compilation errors that occurred. Some compilation
11383errors, however, will throw an exception immediately.
11384
11385The I<flags> parameter is reserved for future use, and must always
11386be zero.
11387
11388=cut
11389*/
11390
11391OP *
11392Perl_parse_stmtseq(pTHX_ U32 flags)
11393{
11394 OP *stmtseqop;
11395 I32 c;
11396 if (flags)
11397 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11398 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11399 c = lex_peek_unichar(0);
11400 if (c != -1 && c != /*{*/'}')
11401 qerror(Perl_mess(aTHX_ "Parse error"));
11402 return stmtseqop;
11403}
11404
11405void
11406Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11407{
11408 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11409 deprecate("qw(...) as parentheses");
11410 force_next((4<<24)|')');
11411 if (qwlist->op_type == OP_STUB) {
11412 op_free(qwlist);
11413 }
11414 else {
11415 start_force(PL_curforce);
11416 NEXTVAL_NEXTTOKE.opval = qwlist;
11417 force_next(THING);
11418 }
11419 force_next((2<<24)|'(');
11420}
11421
11422/*
11423 * Local variables:
11424 * c-indentation-style: bsd
11425 * c-basic-offset: 4
11426 * indent-tabs-mode: t
11427 * End:
11428 *
11429 * ex: set ts=8 sts=4 sw=4 noet:
11430 */