This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Treat a consecutive semicolons in a prototype as 1
[perl5.git] / toke.c
... / ...
CommitLineData
1/* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
39#include "EXTERN.h"
40#define PERL_IN_TOKE_C
41#include "perl.h"
42#include "dquote_static.c"
43
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
47#define pl_yylval (PL_parser->yylval)
48
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
71#define PL_linestr (PL_parser->linestr)
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
82#define PL_lex_state (PL_parser->lex_state)
83#define PL_rsfp (PL_parser->rsfp)
84#define PL_rsfp_filters (PL_parser->rsfp_filters)
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
87#define PL_tokenbuf (PL_parser->tokenbuf)
88#define PL_multi_end (PL_parser->multi_end)
89#define PL_error_count (PL_parser->error_count)
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
111#endif
112
113static const char* const ident_too_long = "Identifier too long";
114
115#ifdef PERL_MAD
116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
118#else
119# define CURMAD(slot,sv)
120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
121#endif
122
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
126
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
129#else
130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
131#endif
132
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
136/* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
140#define SPACE_OR_TAB(c) isBLANK_A(c)
141
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
148 */
149
150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
168
169#ifdef DEBUGGING
170static const char* const lex_state_names[] = {
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
185#ifdef ff_next
186#undef ff_next
187#endif
188
189#include "keywords.h"
190
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
193#ifdef CLINE
194#undef CLINE
195#endif
196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
197
198#ifdef PERL_MAD
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
210/*
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
235 *
236 * Also see LOP and lop() below.
237 */
238
239#ifdef DEBUGGING /* Serve -DT. */
240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
241#else
242# define REPORT(retval) (retval)
243#endif
244
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
266
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
271 */
272#define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
280 s = PEEKSPACE(s); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
287 OPERATOR(f); \
288 }
289
290#define UNIBRACK(f) UNI3(f,0,0)
291
292/* grandfather return to old style */
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
302
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
309 } STMT_END
310
311
312#ifdef DEBUGGING
313
314/* how to interpret the pl_yylval associated with the token */
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
319 TOKENTYPE_PVAL,
320 TOKENTYPE_OPVAL
321};
322
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
328{
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
403};
404
405/* dump the returned token in rv, plus any optional arg in pl_yylval */
406
407STATIC int
408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
409{
410 dVAR;
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
414 if (DEBUG_T_TEST) {
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
419
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
430 {
431 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
432 if ((char)rv == 'p')
433 sv_catpvs(report, " (pending identifier)");
434 }
435 else if (!rv)
436 sv_catpvs(report, "EOF");
437 else
438 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 switch (type) {
440 case TOKENTYPE_NONE:
441 break;
442 case TOKENTYPE_IVAL:
443 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
444 break;
445 case TOKENTYPE_OPNUM:
446 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
447 PL_op_name[lvalp->ival]);
448 break;
449 case TOKENTYPE_PVAL:
450 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
451 break;
452 case TOKENTYPE_OPVAL:
453 if (lvalp->opval) {
454 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
455 PL_op_name[lvalp->opval->op_type]);
456 if (lvalp->opval->op_type == OP_CONST) {
457 Perl_sv_catpvf(aTHX_ report, " %s",
458 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 }
460
461 }
462 else
463 sv_catpvs(report, "(opval=null)");
464 break;
465 }
466 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
467 };
468 return (int)rv;
469}
470
471
472/* print the buffer with suitable escapes */
473
474STATIC void
475S_printbuf(pTHX_ const char *const fmt, const char *const s)
476{
477 SV* const tmp = newSVpvs("");
478
479 PERL_ARGS_ASSERT_PRINTBUF;
480
481 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
482 SvREFCNT_dec(tmp);
483}
484
485#endif
486
487static int
488S_deprecate_commaless_var_list(pTHX) {
489 PL_expect = XTERM;
490 deprecate("comma-less variable list");
491 return REPORT(','); /* grandfather non-comma-format format */
492}
493
494/*
495 * S_ao
496 *
497 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
498 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
499 */
500
501STATIC int
502S_ao(pTHX_ int toketype)
503{
504 dVAR;
505 if (*PL_bufptr == '=') {
506 PL_bufptr++;
507 if (toketype == ANDAND)
508 pl_yylval.ival = OP_ANDASSIGN;
509 else if (toketype == OROR)
510 pl_yylval.ival = OP_ORASSIGN;
511 else if (toketype == DORDOR)
512 pl_yylval.ival = OP_DORASSIGN;
513 toketype = ASSIGNOP;
514 }
515 return toketype;
516}
517
518/*
519 * S_no_op
520 * When Perl expects an operator and finds something else, no_op
521 * prints the warning. It always prints "<something> found where
522 * operator expected. It prints "Missing semicolon on previous line?"
523 * if the surprise occurs at the start of the line. "do you need to
524 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
525 * where the compiler doesn't know if foo is a method call or a function.
526 * It prints "Missing operator before end of line" if there's nothing
527 * after the missing operator, or "... before <...>" if there is something
528 * after the missing operator.
529 */
530
531STATIC void
532S_no_op(pTHX_ const char *const what, char *s)
533{
534 dVAR;
535 char * const oldbp = PL_bufptr;
536 const bool is_first = (PL_oldbufptr == PL_linestart);
537
538 PERL_ARGS_ASSERT_NO_OP;
539
540 if (!s)
541 s = oldbp;
542 else
543 PL_bufptr = s;
544 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
545 if (ckWARN_d(WARN_SYNTAX)) {
546 if (is_first)
547 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
548 "\t(Missing semicolon on previous line?)\n");
549 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
550 const char *t;
551 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
552 t += UTF ? UTF8SKIP(t) : 1)
553 NOOP;
554 if (t < PL_bufptr && isSPACE(*t))
555 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
556 "\t(Do you need to predeclare %"SVf"?)\n",
557 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
558 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
559 }
560 else {
561 assert(s >= oldbp);
562 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
563 "\t(Missing operator before %"SVf"?)\n",
564 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
565 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
566 }
567 }
568 PL_bufptr = oldbp;
569}
570
571/*
572 * S_missingterm
573 * Complain about missing quote/regexp/heredoc terminator.
574 * If it's called with NULL then it cauterizes the line buffer.
575 * If we're in a delimited string and the delimiter is a control
576 * character, it's reformatted into a two-char sequence like ^C.
577 * This is fatal.
578 */
579
580STATIC void
581S_missingterm(pTHX_ char *s)
582{
583 dVAR;
584 char tmpbuf[3];
585 char q;
586 if (s) {
587 char * const nl = strrchr(s,'\n');
588 if (nl)
589 *nl = '\0';
590 }
591 else if (isCNTRL(PL_multi_close)) {
592 *tmpbuf = '^';
593 tmpbuf[1] = (char)toCTRL(PL_multi_close);
594 tmpbuf[2] = '\0';
595 s = tmpbuf;
596 }
597 else {
598 *tmpbuf = (char)PL_multi_close;
599 tmpbuf[1] = '\0';
600 s = tmpbuf;
601 }
602 q = strchr(s,'"') ? '\'' : '"';
603 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
604}
605
606#include "feature.h"
607
608/*
609 * Check whether the named feature is enabled.
610 */
611bool
612Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
613{
614 dVAR;
615 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
616
617 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
618
619 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
620
621 if (namelen > MAX_FEATURE_LEN)
622 return FALSE;
623 memcpy(&he_name[8], name, namelen);
624
625 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
626 REFCOUNTED_HE_EXISTS));
627}
628
629/*
630 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
631 * utf16-to-utf8-reversed.
632 */
633
634#ifdef PERL_CR_FILTER
635static void
636strip_return(SV *sv)
637{
638 const char *s = SvPVX_const(sv);
639 const char * const e = s + SvCUR(sv);
640
641 PERL_ARGS_ASSERT_STRIP_RETURN;
642
643 /* outer loop optimized to do nothing if there are no CR-LFs */
644 while (s < e) {
645 if (*s++ == '\r' && *s == '\n') {
646 /* hit a CR-LF, need to copy the rest */
647 char *d = s - 1;
648 *d++ = *s++;
649 while (s < e) {
650 if (*s == '\r' && s[1] == '\n')
651 s++;
652 *d++ = *s++;
653 }
654 SvCUR(sv) -= s - d;
655 return;
656 }
657 }
658}
659
660STATIC I32
661S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
662{
663 const I32 count = FILTER_READ(idx+1, sv, maxlen);
664 if (count > 0 && !maxlen)
665 strip_return(sv);
666 return count;
667}
668#endif
669
670/*
671=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
672
673Creates and initialises a new lexer/parser state object, supplying
674a context in which to lex and parse from a new source of Perl code.
675A pointer to the new state object is placed in L</PL_parser>. An entry
676is made on the save stack so that upon unwinding the new state object
677will be destroyed and the former value of L</PL_parser> will be restored.
678Nothing else need be done to clean up the parsing context.
679
680The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
681non-null, provides a string (in SV form) containing code to be parsed.
682A copy of the string is made, so subsequent modification of I<line>
683does not affect parsing. I<rsfp>, if non-null, provides an input stream
684from which code will be read to be parsed. If both are non-null, the
685code in I<line> comes first and must consist of complete lines of input,
686and I<rsfp> supplies the remainder of the source.
687
688The I<flags> parameter is reserved for future use. Currently it is only
689used by perl internally, so extensions should always pass zero.
690
691=cut
692*/
693
694/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
695 can share filters with the current parser.
696 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
697 caller, hence isn't owned by the parser, so shouldn't be closed on parser
698 destruction. This is used to handle the case of defaulting to reading the
699 script from the standard input because no filename was given on the command
700 line (without getting confused by situation where STDIN has been closed, so
701 the script handle is opened on fd 0) */
702
703void
704Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
705{
706 dVAR;
707 const char *s = NULL;
708 yy_parser *parser, *oparser;
709 if (flags && flags & ~LEX_START_FLAGS)
710 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
711
712 /* create and initialise a parser */
713
714 Newxz(parser, 1, yy_parser);
715 parser->old_parser = oparser = PL_parser;
716 PL_parser = parser;
717
718 parser->stack = NULL;
719 parser->ps = NULL;
720 parser->stack_size = 0;
721
722 /* on scope exit, free this parser and restore any outer one */
723 SAVEPARSER(parser);
724 parser->saved_curcop = PL_curcop;
725
726 /* initialise lexer state */
727
728#ifdef PERL_MAD
729 parser->curforce = -1;
730#else
731 parser->nexttoke = 0;
732#endif
733 parser->error_count = oparser ? oparser->error_count : 0;
734 parser->copline = NOLINE;
735 parser->lex_state = LEX_NORMAL;
736 parser->expect = XSTATE;
737 parser->rsfp = rsfp;
738 parser->rsfp_filters =
739 !(flags & LEX_START_SAME_FILTER) || !oparser
740 ? NULL
741 : MUTABLE_AV(SvREFCNT_inc(
742 oparser->rsfp_filters
743 ? oparser->rsfp_filters
744 : (oparser->rsfp_filters = newAV())
745 ));
746
747 Newx(parser->lex_brackstack, 120, char);
748 Newx(parser->lex_casestack, 12, char);
749 *parser->lex_casestack = '\0';
750 Newxz(parser->lex_shared, 1, LEXSHARED);
751
752 if (line) {
753 STRLEN len;
754 s = SvPV_const(line, len);
755 parser->linestr = flags & LEX_START_COPIED
756 ? SvREFCNT_inc_simple_NN(line)
757 : newSVpvn_flags(s, len, SvUTF8(line));
758 sv_catpvs(parser->linestr, "\n;");
759 } else {
760 parser->linestr = newSVpvs("\n;");
761 }
762 parser->oldoldbufptr =
763 parser->oldbufptr =
764 parser->bufptr =
765 parser->linestart = SvPVX(parser->linestr);
766 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
767 parser->last_lop = parser->last_uni = NULL;
768 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
769 |LEX_DONT_CLOSE_RSFP);
770
771 parser->in_pod = parser->filtered = 0;
772}
773
774
775/* delete a parser object */
776
777void
778Perl_parser_free(pTHX_ const yy_parser *parser)
779{
780 PERL_ARGS_ASSERT_PARSER_FREE;
781
782 PL_curcop = parser->saved_curcop;
783 SvREFCNT_dec(parser->linestr);
784
785 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
786 PerlIO_clearerr(parser->rsfp);
787 else if (parser->rsfp && (!parser->old_parser ||
788 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
789 PerlIO_close(parser->rsfp);
790 SvREFCNT_dec(parser->rsfp_filters);
791 SvREFCNT_dec(parser->lex_stuff);
792 SvREFCNT_dec(parser->sublex_info.repl);
793
794 Safefree(parser->lex_brackstack);
795 Safefree(parser->lex_casestack);
796 Safefree(parser->lex_shared);
797 PL_parser = parser->old_parser;
798 Safefree(parser);
799}
800
801void
802Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
803{
804#ifdef PERL_MAD
805 I32 nexttoke = parser->lasttoke;
806#else
807 I32 nexttoke = parser->nexttoke;
808#endif
809 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
810 while (nexttoke--) {
811#ifdef PERL_MAD
812 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
813 & 0xffff)
814 && parser->nexttoke[nexttoke].next_val.opval
815 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
816 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
817 op_free(parser->nexttoke[nexttoke].next_val.opval);
818 parser->nexttoke[nexttoke].next_val.opval = NULL;
819 }
820#else
821 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
822 && parser->nextval[nexttoke].opval
823 && parser->nextval[nexttoke].opval->op_slabbed
824 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
825 op_free(parser->nextval[nexttoke].opval);
826 parser->nextval[nexttoke].opval = NULL;
827 }
828#endif
829 }
830}
831
832
833/*
834=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
835
836Buffer scalar containing the chunk currently under consideration of the
837text currently being lexed. This is always a plain string scalar (for
838which C<SvPOK> is true). It is not intended to be used as a scalar by
839normal scalar means; instead refer to the buffer directly by the pointer
840variables described below.
841
842The lexer maintains various C<char*> pointers to things in the
843C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
844reallocated, all of these pointers must be updated. Don't attempt to
845do this manually, but rather use L</lex_grow_linestr> if you need to
846reallocate the buffer.
847
848The content of the text chunk in the buffer is commonly exactly one
849complete line of input, up to and including a newline terminator,
850but there are situations where it is otherwise. The octets of the
851buffer may be intended to be interpreted as either UTF-8 or Latin-1.
852The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
853flag on this scalar, which may disagree with it.
854
855For direct examination of the buffer, the variable
856L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
857lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
858of these pointers is usually preferable to examination of the scalar
859through normal scalar means.
860
861=for apidoc AmxU|char *|PL_parser-E<gt>bufend
862
863Direct pointer to the end of the chunk of text currently being lexed, the
864end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
865+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
866always located at the end of the buffer, and does not count as part of
867the buffer's contents.
868
869=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
870
871Points to the current position of lexing inside the lexer buffer.
872Characters around this point may be freely examined, within
873the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
874L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
875interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
876
877Lexing code (whether in the Perl core or not) moves this pointer past
878the characters that it consumes. It is also expected to perform some
879bookkeeping whenever a newline character is consumed. This movement
880can be more conveniently performed by the function L</lex_read_to>,
881which handles newlines appropriately.
882
883Interpretation of the buffer's octets can be abstracted out by
884using the slightly higher-level functions L</lex_peek_unichar> and
885L</lex_read_unichar>.
886
887=for apidoc AmxU|char *|PL_parser-E<gt>linestart
888
889Points to the start of the current line inside the lexer buffer.
890This is useful for indicating at which column an error occurred, and
891not much else. This must be updated by any lexing code that consumes
892a newline; the function L</lex_read_to> handles this detail.
893
894=cut
895*/
896
897/*
898=for apidoc Amx|bool|lex_bufutf8
899
900Indicates whether the octets in the lexer buffer
901(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
902of Unicode characters. If not, they should be interpreted as Latin-1
903characters. This is analogous to the C<SvUTF8> flag for scalars.
904
905In UTF-8 mode, it is not guaranteed that the lexer buffer actually
906contains valid UTF-8. Lexing code must be robust in the face of invalid
907encoding.
908
909The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
910is significant, but not the whole story regarding the input character
911encoding. Normally, when a file is being read, the scalar contains octets
912and its C<SvUTF8> flag is off, but the octets should be interpreted as
913UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
914however, the scalar may have the C<SvUTF8> flag on, and in this case its
915octets should be interpreted as UTF-8 unless the C<use bytes> pragma
916is in effect. This logic may change in the future; use this function
917instead of implementing the logic yourself.
918
919=cut
920*/
921
922bool
923Perl_lex_bufutf8(pTHX)
924{
925 return UTF;
926}
927
928/*
929=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
930
931Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
932at least I<len> octets (including terminating NUL). Returns a
933pointer to the reallocated buffer. This is necessary before making
934any direct modification of the buffer that would increase its length.
935L</lex_stuff_pvn> provides a more convenient way to insert text into
936the buffer.
937
938Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
939this function updates all of the lexer's variables that point directly
940into the buffer.
941
942=cut
943*/
944
945char *
946Perl_lex_grow_linestr(pTHX_ STRLEN len)
947{
948 SV *linestr;
949 char *buf;
950 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
951 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
952 linestr = PL_parser->linestr;
953 buf = SvPVX(linestr);
954 if (len <= SvLEN(linestr))
955 return buf;
956 bufend_pos = PL_parser->bufend - buf;
957 bufptr_pos = PL_parser->bufptr - buf;
958 oldbufptr_pos = PL_parser->oldbufptr - buf;
959 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
960 linestart_pos = PL_parser->linestart - buf;
961 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
962 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
963 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
964 PL_parser->lex_shared->re_eval_start - buf : 0;
965
966 buf = sv_grow(linestr, len);
967
968 PL_parser->bufend = buf + bufend_pos;
969 PL_parser->bufptr = buf + bufptr_pos;
970 PL_parser->oldbufptr = buf + oldbufptr_pos;
971 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
972 PL_parser->linestart = buf + linestart_pos;
973 if (PL_parser->last_uni)
974 PL_parser->last_uni = buf + last_uni_pos;
975 if (PL_parser->last_lop)
976 PL_parser->last_lop = buf + last_lop_pos;
977 if (PL_parser->lex_shared->re_eval_start)
978 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
979 return buf;
980}
981
982/*
983=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
984
985Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
986immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
987reallocating the buffer if necessary. This means that lexing code that
988runs later will see the characters as if they had appeared in the input.
989It is not recommended to do this as part of normal parsing, and most
990uses of this facility run the risk of the inserted characters being
991interpreted in an unintended manner.
992
993The string to be inserted is represented by I<len> octets starting
994at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
995according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
996The characters are recoded for the lexer buffer, according to how the
997buffer is currently being interpreted (L</lex_bufutf8>). If a string
998to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
999function is more convenient.
1000
1001=cut
1002*/
1003
1004void
1005Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1006{
1007 dVAR;
1008 char *bufptr;
1009 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1010 if (flags & ~(LEX_STUFF_UTF8))
1011 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1012 if (UTF) {
1013 if (flags & LEX_STUFF_UTF8) {
1014 goto plain_copy;
1015 } else {
1016 STRLEN highhalf = 0; /* Count of variants */
1017 const char *p, *e = pv+len;
1018 for (p = pv; p != e; p++) {
1019 if (! UTF8_IS_INVARIANT(*p)) {
1020 highhalf++;
1021 }
1022 }
1023 if (!highhalf)
1024 goto plain_copy;
1025 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1026 bufptr = PL_parser->bufptr;
1027 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1028 SvCUR_set(PL_parser->linestr,
1029 SvCUR(PL_parser->linestr) + len+highhalf);
1030 PL_parser->bufend += len+highhalf;
1031 for (p = pv; p != e; p++) {
1032 U8 c = (U8)*p;
1033 if (! UTF8_IS_INVARIANT(c)) {
1034 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1035 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1036 } else {
1037 *bufptr++ = (char)c;
1038 }
1039 }
1040 }
1041 } else {
1042 if (flags & LEX_STUFF_UTF8) {
1043 STRLEN highhalf = 0;
1044 const char *p, *e = pv+len;
1045 for (p = pv; p != e; p++) {
1046 U8 c = (U8)*p;
1047 if (UTF8_IS_ABOVE_LATIN1(c)) {
1048 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1049 "non-Latin-1 character into Latin-1 input");
1050 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051 p++;
1052 highhalf++;
1053 } else if (! UTF8_IS_INVARIANT(c)) {
1054 /* malformed UTF-8 */
1055 ENTER;
1056 SAVESPTR(PL_warnhook);
1057 PL_warnhook = PERL_WARNHOOK_FATAL;
1058 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1059 LEAVE;
1060 }
1061 }
1062 if (!highhalf)
1063 goto plain_copy;
1064 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1065 bufptr = PL_parser->bufptr;
1066 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1067 SvCUR_set(PL_parser->linestr,
1068 SvCUR(PL_parser->linestr) + len-highhalf);
1069 PL_parser->bufend += len-highhalf;
1070 p = pv;
1071 while (p < e) {
1072 if (UTF8_IS_INVARIANT(*p)) {
1073 *bufptr++ = *p;
1074 p++;
1075 }
1076 else {
1077 assert(p < e -1 );
1078 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1079 p += 2;
1080 }
1081 }
1082 } else {
1083 plain_copy:
1084 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1085 bufptr = PL_parser->bufptr;
1086 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1087 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1088 PL_parser->bufend += len;
1089 Copy(pv, bufptr, len, char);
1090 }
1091 }
1092}
1093
1094/*
1095=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1096
1097Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1098immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1099reallocating the buffer if necessary. This means that lexing code that
1100runs later will see the characters as if they had appeared in the input.
1101It is not recommended to do this as part of normal parsing, and most
1102uses of this facility run the risk of the inserted characters being
1103interpreted in an unintended manner.
1104
1105The string to be inserted is represented by octets starting at I<pv>
1106and continuing to the first nul. These octets are interpreted as either
1107UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1108in I<flags>. The characters are recoded for the lexer buffer, according
1109to how the buffer is currently being interpreted (L</lex_bufutf8>).
1110If it is not convenient to nul-terminate a string to be inserted, the
1111L</lex_stuff_pvn> function is more appropriate.
1112
1113=cut
1114*/
1115
1116void
1117Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1118{
1119 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1120 lex_stuff_pvn(pv, strlen(pv), flags);
1121}
1122
1123/*
1124=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1125
1126Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1127immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1128reallocating the buffer if necessary. This means that lexing code that
1129runs later will see the characters as if they had appeared in the input.
1130It is not recommended to do this as part of normal parsing, and most
1131uses of this facility run the risk of the inserted characters being
1132interpreted in an unintended manner.
1133
1134The string to be inserted is the string value of I<sv>. The characters
1135are recoded for the lexer buffer, according to how the buffer is currently
1136being interpreted (L</lex_bufutf8>). If a string to be inserted is
1137not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1138need to construct a scalar.
1139
1140=cut
1141*/
1142
1143void
1144Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1145{
1146 char *pv;
1147 STRLEN len;
1148 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1149 if (flags)
1150 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1151 pv = SvPV(sv, len);
1152 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1153}
1154
1155/*
1156=for apidoc Amx|void|lex_unstuff|char *ptr
1157
1158Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1159I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1160This hides the discarded text from any lexing code that runs later,
1161as if the text had never appeared.
1162
1163This is not the normal way to consume lexed text. For that, use
1164L</lex_read_to>.
1165
1166=cut
1167*/
1168
1169void
1170Perl_lex_unstuff(pTHX_ char *ptr)
1171{
1172 char *buf, *bufend;
1173 STRLEN unstuff_len;
1174 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1175 buf = PL_parser->bufptr;
1176 if (ptr < buf)
1177 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1178 if (ptr == buf)
1179 return;
1180 bufend = PL_parser->bufend;
1181 if (ptr > bufend)
1182 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1183 unstuff_len = ptr - buf;
1184 Move(ptr, buf, bufend+1-ptr, char);
1185 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1186 PL_parser->bufend = bufend - unstuff_len;
1187}
1188
1189/*
1190=for apidoc Amx|void|lex_read_to|char *ptr
1191
1192Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1193to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1194performing the correct bookkeeping whenever a newline character is passed.
1195This is the normal way to consume lexed text.
1196
1197Interpretation of the buffer's octets can be abstracted out by
1198using the slightly higher-level functions L</lex_peek_unichar> and
1199L</lex_read_unichar>.
1200
1201=cut
1202*/
1203
1204void
1205Perl_lex_read_to(pTHX_ char *ptr)
1206{
1207 char *s;
1208 PERL_ARGS_ASSERT_LEX_READ_TO;
1209 s = PL_parser->bufptr;
1210 if (ptr < s || ptr > PL_parser->bufend)
1211 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1212 for (; s != ptr; s++)
1213 if (*s == '\n') {
1214 COPLINE_INC_WITH_HERELINES;
1215 PL_parser->linestart = s+1;
1216 }
1217 PL_parser->bufptr = ptr;
1218}
1219
1220/*
1221=for apidoc Amx|void|lex_discard_to|char *ptr
1222
1223Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1224up to I<ptr>. The remaining content of the buffer will be moved, and
1225all pointers into the buffer updated appropriately. I<ptr> must not
1226be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1227it is not permitted to discard text that has yet to be lexed.
1228
1229Normally it is not necessarily to do this directly, because it suffices to
1230use the implicit discarding behaviour of L</lex_next_chunk> and things
1231based on it. However, if a token stretches across multiple lines,
1232and the lexing code has kept multiple lines of text in the buffer for
1233that purpose, then after completion of the token it would be wise to
1234explicitly discard the now-unneeded earlier lines, to avoid future
1235multi-line tokens growing the buffer without bound.
1236
1237=cut
1238*/
1239
1240void
1241Perl_lex_discard_to(pTHX_ char *ptr)
1242{
1243 char *buf;
1244 STRLEN discard_len;
1245 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1246 buf = SvPVX(PL_parser->linestr);
1247 if (ptr < buf)
1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 if (ptr == buf)
1250 return;
1251 if (ptr > PL_parser->bufptr)
1252 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1253 discard_len = ptr - buf;
1254 if (PL_parser->oldbufptr < ptr)
1255 PL_parser->oldbufptr = ptr;
1256 if (PL_parser->oldoldbufptr < ptr)
1257 PL_parser->oldoldbufptr = ptr;
1258 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1259 PL_parser->last_uni = NULL;
1260 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1261 PL_parser->last_lop = NULL;
1262 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1263 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1264 PL_parser->bufend -= discard_len;
1265 PL_parser->bufptr -= discard_len;
1266 PL_parser->oldbufptr -= discard_len;
1267 PL_parser->oldoldbufptr -= discard_len;
1268 if (PL_parser->last_uni)
1269 PL_parser->last_uni -= discard_len;
1270 if (PL_parser->last_lop)
1271 PL_parser->last_lop -= discard_len;
1272}
1273
1274/*
1275=for apidoc Amx|bool|lex_next_chunk|U32 flags
1276
1277Reads in the next chunk of text to be lexed, appending it to
1278L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1279looked to the end of the current chunk and wants to know more. It is
1280usual, but not necessary, for lexing to have consumed the entirety of
1281the current chunk at this time.
1282
1283If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1284chunk (i.e., the current chunk has been entirely consumed), normally the
1285current chunk will be discarded at the same time that the new chunk is
1286read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1287will not be discarded. If the current chunk has not been entirely
1288consumed, then it will not be discarded regardless of the flag.
1289
1290Returns true if some new text was added to the buffer, or false if the
1291buffer has reached the end of the input text.
1292
1293=cut
1294*/
1295
1296#define LEX_FAKE_EOF 0x80000000
1297#define LEX_NO_TERM 0x40000000
1298
1299bool
1300Perl_lex_next_chunk(pTHX_ U32 flags)
1301{
1302 SV *linestr;
1303 char *buf;
1304 STRLEN old_bufend_pos, new_bufend_pos;
1305 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1306 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1307 bool got_some_for_debugger = 0;
1308 bool got_some;
1309 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1310 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1311 linestr = PL_parser->linestr;
1312 buf = SvPVX(linestr);
1313 if (!(flags & LEX_KEEP_PREVIOUS) &&
1314 PL_parser->bufptr == PL_parser->bufend) {
1315 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1316 linestart_pos = 0;
1317 if (PL_parser->last_uni != PL_parser->bufend)
1318 PL_parser->last_uni = NULL;
1319 if (PL_parser->last_lop != PL_parser->bufend)
1320 PL_parser->last_lop = NULL;
1321 last_uni_pos = last_lop_pos = 0;
1322 *buf = 0;
1323 SvCUR(linestr) = 0;
1324 } else {
1325 old_bufend_pos = PL_parser->bufend - buf;
1326 bufptr_pos = PL_parser->bufptr - buf;
1327 oldbufptr_pos = PL_parser->oldbufptr - buf;
1328 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1329 linestart_pos = PL_parser->linestart - buf;
1330 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1331 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1332 }
1333 if (flags & LEX_FAKE_EOF) {
1334 goto eof;
1335 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1336 got_some = 0;
1337 } else if (filter_gets(linestr, old_bufend_pos)) {
1338 got_some = 1;
1339 got_some_for_debugger = 1;
1340 } else if (flags & LEX_NO_TERM) {
1341 got_some = 0;
1342 } else {
1343 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1344 sv_setpvs(linestr, "");
1345 eof:
1346 /* End of real input. Close filehandle (unless it was STDIN),
1347 * then add implicit termination.
1348 */
1349 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1350 PerlIO_clearerr(PL_parser->rsfp);
1351 else if (PL_parser->rsfp)
1352 (void)PerlIO_close(PL_parser->rsfp);
1353 PL_parser->rsfp = NULL;
1354 PL_parser->in_pod = PL_parser->filtered = 0;
1355#ifdef PERL_MAD
1356 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1357 PL_faketokens = 1;
1358#endif
1359 if (!PL_in_eval && PL_minus_p) {
1360 sv_catpvs(linestr,
1361 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1362 PL_minus_n = PL_minus_p = 0;
1363 } else if (!PL_in_eval && PL_minus_n) {
1364 sv_catpvs(linestr, /*{*/";}");
1365 PL_minus_n = 0;
1366 } else
1367 sv_catpvs(linestr, ";");
1368 got_some = 1;
1369 }
1370 buf = SvPVX(linestr);
1371 new_bufend_pos = SvCUR(linestr);
1372 PL_parser->bufend = buf + new_bufend_pos;
1373 PL_parser->bufptr = buf + bufptr_pos;
1374 PL_parser->oldbufptr = buf + oldbufptr_pos;
1375 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1376 PL_parser->linestart = buf + linestart_pos;
1377 if (PL_parser->last_uni)
1378 PL_parser->last_uni = buf + last_uni_pos;
1379 if (PL_parser->last_lop)
1380 PL_parser->last_lop = buf + last_lop_pos;
1381 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1382 PL_curstash != PL_debstash) {
1383 /* debugger active and we're not compiling the debugger code,
1384 * so store the line into the debugger's array of lines
1385 */
1386 update_debugger_info(NULL, buf+old_bufend_pos,
1387 new_bufend_pos-old_bufend_pos);
1388 }
1389 return got_some;
1390}
1391
1392/*
1393=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1394
1395Looks ahead one (Unicode) character in the text currently being lexed.
1396Returns the codepoint (unsigned integer value) of the next character,
1397or -1 if lexing has reached the end of the input text. To consume the
1398peeked character, use L</lex_read_unichar>.
1399
1400If the next character is in (or extends into) the next chunk of input
1401text, the next chunk will be read in. Normally the current chunk will be
1402discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1403then the current chunk will not be discarded.
1404
1405If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1406is encountered, an exception is generated.
1407
1408=cut
1409*/
1410
1411I32
1412Perl_lex_peek_unichar(pTHX_ U32 flags)
1413{
1414 dVAR;
1415 char *s, *bufend;
1416 if (flags & ~(LEX_KEEP_PREVIOUS))
1417 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1418 s = PL_parser->bufptr;
1419 bufend = PL_parser->bufend;
1420 if (UTF) {
1421 U8 head;
1422 I32 unichar;
1423 STRLEN len, retlen;
1424 if (s == bufend) {
1425 if (!lex_next_chunk(flags))
1426 return -1;
1427 s = PL_parser->bufptr;
1428 bufend = PL_parser->bufend;
1429 }
1430 head = (U8)*s;
1431 if (UTF8_IS_INVARIANT(head))
1432 return head;
1433 if (UTF8_IS_START(head)) {
1434 len = UTF8SKIP(&head);
1435 while ((STRLEN)(bufend-s) < len) {
1436 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1437 break;
1438 s = PL_parser->bufptr;
1439 bufend = PL_parser->bufend;
1440 }
1441 }
1442 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1443 if (retlen == (STRLEN)-1) {
1444 /* malformed UTF-8 */
1445 ENTER;
1446 SAVESPTR(PL_warnhook);
1447 PL_warnhook = PERL_WARNHOOK_FATAL;
1448 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1449 LEAVE;
1450 }
1451 return unichar;
1452 } else {
1453 if (s == bufend) {
1454 if (!lex_next_chunk(flags))
1455 return -1;
1456 s = PL_parser->bufptr;
1457 }
1458 return (U8)*s;
1459 }
1460}
1461
1462/*
1463=for apidoc Amx|I32|lex_read_unichar|U32 flags
1464
1465Reads the next (Unicode) character in the text currently being lexed.
1466Returns the codepoint (unsigned integer value) of the character read,
1467and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1468if lexing has reached the end of the input text. To non-destructively
1469examine the next character, use L</lex_peek_unichar> instead.
1470
1471If the next character is in (or extends into) the next chunk of input
1472text, the next chunk will be read in. Normally the current chunk will be
1473discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1474then the current chunk will not be discarded.
1475
1476If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1477is encountered, an exception is generated.
1478
1479=cut
1480*/
1481
1482I32
1483Perl_lex_read_unichar(pTHX_ U32 flags)
1484{
1485 I32 c;
1486 if (flags & ~(LEX_KEEP_PREVIOUS))
1487 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1488 c = lex_peek_unichar(flags);
1489 if (c != -1) {
1490 if (c == '\n')
1491 COPLINE_INC_WITH_HERELINES;
1492 if (UTF)
1493 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1494 else
1495 ++(PL_parser->bufptr);
1496 }
1497 return c;
1498}
1499
1500/*
1501=for apidoc Amx|void|lex_read_space|U32 flags
1502
1503Reads optional spaces, in Perl style, in the text currently being
1504lexed. The spaces may include ordinary whitespace characters and
1505Perl-style comments. C<#line> directives are processed if encountered.
1506L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1507at a non-space character (or the end of the input text).
1508
1509If spaces extend into the next chunk of input text, the next chunk will
1510be read in. Normally the current chunk will be discarded at the same
1511time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1512chunk will not be discarded.
1513
1514=cut
1515*/
1516
1517#define LEX_NO_NEXT_CHUNK 0x80000000
1518
1519void
1520Perl_lex_read_space(pTHX_ U32 flags)
1521{
1522 char *s, *bufend;
1523 bool need_incline = 0;
1524 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1525 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1526#ifdef PERL_MAD
1527 if (PL_skipwhite) {
1528 sv_free(PL_skipwhite);
1529 PL_skipwhite = NULL;
1530 }
1531 if (PL_madskills)
1532 PL_skipwhite = newSVpvs("");
1533#endif /* PERL_MAD */
1534 s = PL_parser->bufptr;
1535 bufend = PL_parser->bufend;
1536 while (1) {
1537 char c = *s;
1538 if (c == '#') {
1539 do {
1540 c = *++s;
1541 } while (!(c == '\n' || (c == 0 && s == bufend)));
1542 } else if (c == '\n') {
1543 s++;
1544 PL_parser->linestart = s;
1545 if (s == bufend)
1546 need_incline = 1;
1547 else
1548 incline(s);
1549 } else if (isSPACE(c)) {
1550 s++;
1551 } else if (c == 0 && s == bufend) {
1552 bool got_more;
1553#ifdef PERL_MAD
1554 if (PL_madskills)
1555 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1556#endif /* PERL_MAD */
1557 if (flags & LEX_NO_NEXT_CHUNK)
1558 break;
1559 PL_parser->bufptr = s;
1560 COPLINE_INC_WITH_HERELINES;
1561 got_more = lex_next_chunk(flags);
1562 CopLINE_dec(PL_curcop);
1563 s = PL_parser->bufptr;
1564 bufend = PL_parser->bufend;
1565 if (!got_more)
1566 break;
1567 if (need_incline && PL_parser->rsfp) {
1568 incline(s);
1569 need_incline = 0;
1570 }
1571 } else {
1572 break;
1573 }
1574 }
1575#ifdef PERL_MAD
1576 if (PL_madskills)
1577 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1578#endif /* PERL_MAD */
1579 PL_parser->bufptr = s;
1580}
1581
1582/*
1583 * S_incline
1584 * This subroutine has nothing to do with tilting, whether at windmills
1585 * or pinball tables. Its name is short for "increment line". It
1586 * increments the current line number in CopLINE(PL_curcop) and checks
1587 * to see whether the line starts with a comment of the form
1588 * # line 500 "foo.pm"
1589 * If so, it sets the current line number and file to the values in the comment.
1590 */
1591
1592STATIC void
1593S_incline(pTHX_ const char *s)
1594{
1595 dVAR;
1596 const char *t;
1597 const char *n;
1598 const char *e;
1599 line_t line_num;
1600
1601 PERL_ARGS_ASSERT_INCLINE;
1602
1603 COPLINE_INC_WITH_HERELINES;
1604 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1605 && s+1 == PL_bufend && *s == ';') {
1606 /* fake newline in string eval */
1607 CopLINE_dec(PL_curcop);
1608 return;
1609 }
1610 if (*s++ != '#')
1611 return;
1612 while (SPACE_OR_TAB(*s))
1613 s++;
1614 if (strnEQ(s, "line", 4))
1615 s += 4;
1616 else
1617 return;
1618 if (SPACE_OR_TAB(*s))
1619 s++;
1620 else
1621 return;
1622 while (SPACE_OR_TAB(*s))
1623 s++;
1624 if (!isDIGIT(*s))
1625 return;
1626
1627 n = s;
1628 while (isDIGIT(*s))
1629 s++;
1630 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1631 return;
1632 while (SPACE_OR_TAB(*s))
1633 s++;
1634 if (*s == '"' && (t = strchr(s+1, '"'))) {
1635 s++;
1636 e = t + 1;
1637 }
1638 else {
1639 t = s;
1640 while (!isSPACE(*t))
1641 t++;
1642 e = t;
1643 }
1644 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1645 e++;
1646 if (*e != '\n' && *e != '\0')
1647 return; /* false alarm */
1648
1649 line_num = atoi(n)-1;
1650
1651 if (t - s > 0) {
1652 const STRLEN len = t - s;
1653 SV *const temp_sv = CopFILESV(PL_curcop);
1654 const char *cf;
1655 STRLEN tmplen;
1656
1657 if (temp_sv) {
1658 cf = SvPVX(temp_sv);
1659 tmplen = SvCUR(temp_sv);
1660 } else {
1661 cf = NULL;
1662 tmplen = 0;
1663 }
1664
1665 if (!PL_rsfp && !PL_parser->filtered) {
1666 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1667 * to *{"::_<newfilename"} */
1668 /* However, the long form of evals is only turned on by the
1669 debugger - usually they're "(eval %lu)" */
1670 char smallbuf[128];
1671 char *tmpbuf;
1672 GV **gvp;
1673 STRLEN tmplen2 = len;
1674 if (tmplen + 2 <= sizeof smallbuf)
1675 tmpbuf = smallbuf;
1676 else
1677 Newx(tmpbuf, tmplen + 2, char);
1678 tmpbuf[0] = '_';
1679 tmpbuf[1] = '<';
1680 memcpy(tmpbuf + 2, cf, tmplen);
1681 tmplen += 2;
1682 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1683 if (gvp) {
1684 char *tmpbuf2;
1685 GV *gv2;
1686
1687 if (tmplen2 + 2 <= sizeof smallbuf)
1688 tmpbuf2 = smallbuf;
1689 else
1690 Newx(tmpbuf2, tmplen2 + 2, char);
1691
1692 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1693 /* Either they malloc'd it, or we malloc'd it,
1694 so no prefix is present in ours. */
1695 tmpbuf2[0] = '_';
1696 tmpbuf2[1] = '<';
1697 }
1698
1699 memcpy(tmpbuf2 + 2, s, tmplen2);
1700 tmplen2 += 2;
1701
1702 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1703 if (!isGV(gv2)) {
1704 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1705 /* adjust ${"::_<newfilename"} to store the new file name */
1706 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1707 /* The line number may differ. If that is the case,
1708 alias the saved lines that are in the array.
1709 Otherwise alias the whole array. */
1710 if (CopLINE(PL_curcop) == line_num) {
1711 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1712 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1713 }
1714 else if (GvAV(*gvp)) {
1715 AV * const av = GvAV(*gvp);
1716 const I32 start = CopLINE(PL_curcop)+1;
1717 I32 items = AvFILLp(av) - start;
1718 if (items > 0) {
1719 AV * const av2 = GvAVn(gv2);
1720 SV **svp = AvARRAY(av) + start;
1721 I32 l = (I32)line_num+1;
1722 while (items--)
1723 av_store(av2, l++, SvREFCNT_inc(*svp++));
1724 }
1725 }
1726 }
1727
1728 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1729 }
1730 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1731 }
1732 CopFILE_free(PL_curcop);
1733 CopFILE_setn(PL_curcop, s, len);
1734 }
1735 CopLINE_set(PL_curcop, line_num);
1736}
1737
1738#ifdef PERL_MAD
1739/* skip space before PL_thistoken */
1740
1741STATIC char *
1742S_skipspace0(pTHX_ char *s)
1743{
1744 PERL_ARGS_ASSERT_SKIPSPACE0;
1745
1746 s = skipspace(s);
1747 if (!PL_madskills)
1748 return s;
1749 if (PL_skipwhite) {
1750 if (!PL_thiswhite)
1751 PL_thiswhite = newSVpvs("");
1752 sv_catsv(PL_thiswhite, PL_skipwhite);
1753 sv_free(PL_skipwhite);
1754 PL_skipwhite = 0;
1755 }
1756 PL_realtokenstart = s - SvPVX(PL_linestr);
1757 return s;
1758}
1759
1760/* skip space after PL_thistoken */
1761
1762STATIC char *
1763S_skipspace1(pTHX_ char *s)
1764{
1765 const char *start = s;
1766 I32 startoff = start - SvPVX(PL_linestr);
1767
1768 PERL_ARGS_ASSERT_SKIPSPACE1;
1769
1770 s = skipspace(s);
1771 if (!PL_madskills)
1772 return s;
1773 start = SvPVX(PL_linestr) + startoff;
1774 if (!PL_thistoken && PL_realtokenstart >= 0) {
1775 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1776 PL_thistoken = newSVpvn(tstart, start - tstart);
1777 }
1778 PL_realtokenstart = -1;
1779 if (PL_skipwhite) {
1780 if (!PL_nextwhite)
1781 PL_nextwhite = newSVpvs("");
1782 sv_catsv(PL_nextwhite, PL_skipwhite);
1783 sv_free(PL_skipwhite);
1784 PL_skipwhite = 0;
1785 }
1786 return s;
1787}
1788
1789STATIC char *
1790S_skipspace2(pTHX_ char *s, SV **svp)
1791{
1792 char *start;
1793 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1794 const I32 startoff = s - SvPVX(PL_linestr);
1795
1796 PERL_ARGS_ASSERT_SKIPSPACE2;
1797
1798 s = skipspace(s);
1799 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1800 if (!PL_madskills || !svp)
1801 return s;
1802 start = SvPVX(PL_linestr) + startoff;
1803 if (!PL_thistoken && PL_realtokenstart >= 0) {
1804 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1805 PL_thistoken = newSVpvn(tstart, start - tstart);
1806 PL_realtokenstart = -1;
1807 }
1808 if (PL_skipwhite) {
1809 if (!*svp)
1810 *svp = newSVpvs("");
1811 sv_setsv(*svp, PL_skipwhite);
1812 sv_free(PL_skipwhite);
1813 PL_skipwhite = 0;
1814 }
1815
1816 return s;
1817}
1818#endif
1819
1820STATIC void
1821S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1822{
1823 AV *av = CopFILEAVx(PL_curcop);
1824 if (av) {
1825 SV * const sv = newSV_type(SVt_PVMG);
1826 if (orig_sv)
1827 sv_setsv(sv, orig_sv);
1828 else
1829 sv_setpvn(sv, buf, len);
1830 (void)SvIOK_on(sv);
1831 SvIV_set(sv, 0);
1832 av_store(av, (I32)CopLINE(PL_curcop), sv);
1833 }
1834}
1835
1836/*
1837 * S_skipspace
1838 * Called to gobble the appropriate amount and type of whitespace.
1839 * Skips comments as well.
1840 */
1841
1842STATIC char *
1843S_skipspace(pTHX_ char *s)
1844{
1845#ifdef PERL_MAD
1846 char *start = s;
1847#endif /* PERL_MAD */
1848 PERL_ARGS_ASSERT_SKIPSPACE;
1849#ifdef PERL_MAD
1850 if (PL_skipwhite) {
1851 sv_free(PL_skipwhite);
1852 PL_skipwhite = NULL;
1853 }
1854#endif /* PERL_MAD */
1855 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1856 while (s < PL_bufend && SPACE_OR_TAB(*s))
1857 s++;
1858 } else {
1859 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1860 PL_bufptr = s;
1861 lex_read_space(LEX_KEEP_PREVIOUS |
1862 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1863 LEX_NO_NEXT_CHUNK : 0));
1864 s = PL_bufptr;
1865 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1866 if (PL_linestart > PL_bufptr)
1867 PL_bufptr = PL_linestart;
1868 return s;
1869 }
1870#ifdef PERL_MAD
1871 if (PL_madskills)
1872 PL_skipwhite = newSVpvn(start, s-start);
1873#endif /* PERL_MAD */
1874 return s;
1875}
1876
1877/*
1878 * S_check_uni
1879 * Check the unary operators to ensure there's no ambiguity in how they're
1880 * used. An ambiguous piece of code would be:
1881 * rand + 5
1882 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1883 * the +5 is its argument.
1884 */
1885
1886STATIC void
1887S_check_uni(pTHX)
1888{
1889 dVAR;
1890 const char *s;
1891 const char *t;
1892
1893 if (PL_oldoldbufptr != PL_last_uni)
1894 return;
1895 while (isSPACE(*PL_last_uni))
1896 PL_last_uni++;
1897 s = PL_last_uni;
1898 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1899 s++;
1900 if ((t = strchr(s, '(')) && t < PL_bufptr)
1901 return;
1902
1903 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1904 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1905 (int)(s - PL_last_uni), PL_last_uni);
1906}
1907
1908/*
1909 * LOP : macro to build a list operator. Its behaviour has been replaced
1910 * with a subroutine, S_lop() for which LOP is just another name.
1911 */
1912
1913#define LOP(f,x) return lop(f,x,s)
1914
1915/*
1916 * S_lop
1917 * Build a list operator (or something that might be one). The rules:
1918 * - if we have a next token, then it's a list operator [why?]
1919 * - if the next thing is an opening paren, then it's a function
1920 * - else it's a list operator
1921 */
1922
1923STATIC I32
1924S_lop(pTHX_ I32 f, int x, char *s)
1925{
1926 dVAR;
1927
1928 PERL_ARGS_ASSERT_LOP;
1929
1930 pl_yylval.ival = f;
1931 CLINE;
1932 PL_expect = x;
1933 PL_bufptr = s;
1934 PL_last_lop = PL_oldbufptr;
1935 PL_last_lop_op = (OPCODE)f;
1936#ifdef PERL_MAD
1937 if (PL_lasttoke)
1938 goto lstop;
1939#else
1940 if (PL_nexttoke)
1941 goto lstop;
1942#endif
1943 if (*s == '(')
1944 return REPORT(FUNC);
1945 s = PEEKSPACE(s);
1946 if (*s == '(')
1947 return REPORT(FUNC);
1948 else {
1949 lstop:
1950 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1951 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1952 return REPORT(LSTOP);
1953 }
1954}
1955
1956#ifdef PERL_MAD
1957 /*
1958 * S_start_force
1959 * Sets up for an eventual force_next(). start_force(0) basically does
1960 * an unshift, while start_force(-1) does a push. yylex removes items
1961 * on the "pop" end.
1962 */
1963
1964STATIC void
1965S_start_force(pTHX_ int where)
1966{
1967 int i;
1968
1969 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1970 where = PL_lasttoke;
1971 assert(PL_curforce < 0 || PL_curforce == where);
1972 if (PL_curforce != where) {
1973 for (i = PL_lasttoke; i > where; --i) {
1974 PL_nexttoke[i] = PL_nexttoke[i-1];
1975 }
1976 PL_lasttoke++;
1977 }
1978 if (PL_curforce < 0) /* in case of duplicate start_force() */
1979 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1980 PL_curforce = where;
1981 if (PL_nextwhite) {
1982 if (PL_madskills)
1983 curmad('^', newSVpvs(""));
1984 CURMAD('_', PL_nextwhite);
1985 }
1986}
1987
1988STATIC void
1989S_curmad(pTHX_ char slot, SV *sv)
1990{
1991 MADPROP **where;
1992
1993 if (!sv)
1994 return;
1995 if (PL_curforce < 0)
1996 where = &PL_thismad;
1997 else
1998 where = &PL_nexttoke[PL_curforce].next_mad;
1999
2000 if (PL_faketokens)
2001 sv_setpvs(sv, "");
2002 else {
2003 if (!IN_BYTES) {
2004 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2005 SvUTF8_on(sv);
2006 else if (PL_encoding) {
2007 sv_recode_to_utf8(sv, PL_encoding);
2008 }
2009 }
2010 }
2011
2012 /* keep a slot open for the head of the list? */
2013 if (slot != '_' && *where && (*where)->mad_key == '^') {
2014 (*where)->mad_key = slot;
2015 sv_free(MUTABLE_SV(((*where)->mad_val)));
2016 (*where)->mad_val = (void*)sv;
2017 }
2018 else
2019 addmad(newMADsv(slot, sv), where, 0);
2020}
2021#else
2022# define start_force(where) NOOP
2023# define curmad(slot, sv) NOOP
2024#endif
2025
2026/*
2027 * S_force_next
2028 * When the lexer realizes it knows the next token (for instance,
2029 * it is reordering tokens for the parser) then it can call S_force_next
2030 * to know what token to return the next time the lexer is called. Caller
2031 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2032 * and possibly PL_expect to ensure the lexer handles the token correctly.
2033 */
2034
2035STATIC void
2036S_force_next(pTHX_ I32 type)
2037{
2038 dVAR;
2039#ifdef DEBUGGING
2040 if (DEBUG_T_TEST) {
2041 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2042 tokereport(type, &NEXTVAL_NEXTTOKE);
2043 }
2044#endif
2045#ifdef PERL_MAD
2046 if (PL_curforce < 0)
2047 start_force(PL_lasttoke);
2048 PL_nexttoke[PL_curforce].next_type = type;
2049 if (PL_lex_state != LEX_KNOWNEXT)
2050 PL_lex_defer = PL_lex_state;
2051 PL_lex_state = LEX_KNOWNEXT;
2052 PL_lex_expect = PL_expect;
2053 PL_curforce = -1;
2054#else
2055 PL_nexttype[PL_nexttoke] = type;
2056 PL_nexttoke++;
2057 if (PL_lex_state != LEX_KNOWNEXT) {
2058 PL_lex_defer = PL_lex_state;
2059 PL_lex_expect = PL_expect;
2060 PL_lex_state = LEX_KNOWNEXT;
2061 }
2062#endif
2063}
2064
2065void
2066Perl_yyunlex(pTHX)
2067{
2068 int yyc = PL_parser->yychar;
2069 if (yyc != YYEMPTY) {
2070 if (yyc) {
2071 start_force(-1);
2072 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2073 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2074 PL_lex_allbrackets--;
2075 PL_lex_brackets--;
2076 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2077 } else if (yyc == '('/*)*/) {
2078 PL_lex_allbrackets--;
2079 yyc |= (2<<24);
2080 }
2081 force_next(yyc);
2082 }
2083 PL_parser->yychar = YYEMPTY;
2084 }
2085}
2086
2087STATIC SV *
2088S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2089{
2090 dVAR;
2091 SV * const sv = newSVpvn_utf8(start, len,
2092 !IN_BYTES
2093 && UTF
2094 && !is_ascii_string((const U8*)start, len)
2095 && is_utf8_string((const U8*)start, len));
2096 return sv;
2097}
2098
2099/*
2100 * S_force_word
2101 * When the lexer knows the next thing is a word (for instance, it has
2102 * just seen -> and it knows that the next char is a word char, then
2103 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2104 * lookahead.
2105 *
2106 * Arguments:
2107 * char *start : buffer position (must be within PL_linestr)
2108 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2109 * int check_keyword : if true, Perl checks to make sure the word isn't
2110 * a keyword (do this if the word is a label, e.g. goto FOO)
2111 * int allow_pack : if true, : characters will also be allowed (require,
2112 * use, etc. do this)
2113 * int allow_initial_tick : used by the "sub" lexer only.
2114 */
2115
2116STATIC char *
2117S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2118{
2119 dVAR;
2120 char *s;
2121 STRLEN len;
2122
2123 PERL_ARGS_ASSERT_FORCE_WORD;
2124
2125 start = SKIPSPACE1(start);
2126 s = start;
2127 if (isIDFIRST_lazy_if(s,UTF) ||
2128 (allow_pack && *s == ':') )
2129 {
2130 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2131 if (check_keyword) {
2132 char *s2 = PL_tokenbuf;
2133 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2134 s2 += 6, len -= 6;
2135 if (keyword(s2, len, 0))
2136 return start;
2137 }
2138 start_force(PL_curforce);
2139 if (PL_madskills)
2140 curmad('X', newSVpvn(start,s-start));
2141 if (token == METHOD) {
2142 s = SKIPSPACE1(s);
2143 if (*s == '(')
2144 PL_expect = XTERM;
2145 else {
2146 PL_expect = XOPERATOR;
2147 }
2148 }
2149 if (PL_madskills)
2150 curmad('g', newSVpvs( "forced" ));
2151 NEXTVAL_NEXTTOKE.opval
2152 = (OP*)newSVOP(OP_CONST,0,
2153 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2154 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2155 force_next(token);
2156 }
2157 return s;
2158}
2159
2160/*
2161 * S_force_ident
2162 * Called when the lexer wants $foo *foo &foo etc, but the program
2163 * text only contains the "foo" portion. The first argument is a pointer
2164 * to the "foo", and the second argument is the type symbol to prefix.
2165 * Forces the next token to be a "WORD".
2166 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2167 */
2168
2169STATIC void
2170S_force_ident(pTHX_ const char *s, int kind)
2171{
2172 dVAR;
2173
2174 PERL_ARGS_ASSERT_FORCE_IDENT;
2175
2176 if (s[0]) {
2177 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2178 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2179 UTF ? SVf_UTF8 : 0));
2180 start_force(PL_curforce);
2181 NEXTVAL_NEXTTOKE.opval = o;
2182 force_next(WORD);
2183 if (kind) {
2184 o->op_private = OPpCONST_ENTERED;
2185 /* XXX see note in pp_entereval() for why we forgo typo
2186 warnings if the symbol must be introduced in an eval.
2187 GSAR 96-10-12 */
2188 gv_fetchpvn_flags(s, len,
2189 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2190 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2191 kind == '$' ? SVt_PV :
2192 kind == '@' ? SVt_PVAV :
2193 kind == '%' ? SVt_PVHV :
2194 SVt_PVGV
2195 );
2196 }
2197 }
2198}
2199
2200static void
2201S_force_ident_maybe_lex(pTHX_ char pit)
2202{
2203 start_force(PL_curforce);
2204 NEXTVAL_NEXTTOKE.ival = pit;
2205 force_next('p');
2206}
2207
2208NV
2209Perl_str_to_version(pTHX_ SV *sv)
2210{
2211 NV retval = 0.0;
2212 NV nshift = 1.0;
2213 STRLEN len;
2214 const char *start = SvPV_const(sv,len);
2215 const char * const end = start + len;
2216 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2217
2218 PERL_ARGS_ASSERT_STR_TO_VERSION;
2219
2220 while (start < end) {
2221 STRLEN skip;
2222 UV n;
2223 if (utf)
2224 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2225 else {
2226 n = *(U8*)start;
2227 skip = 1;
2228 }
2229 retval += ((NV)n)/nshift;
2230 start += skip;
2231 nshift *= 1000;
2232 }
2233 return retval;
2234}
2235
2236/*
2237 * S_force_version
2238 * Forces the next token to be a version number.
2239 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2240 * and if "guessing" is TRUE, then no new token is created (and the caller
2241 * must use an alternative parsing method).
2242 */
2243
2244STATIC char *
2245S_force_version(pTHX_ char *s, int guessing)
2246{
2247 dVAR;
2248 OP *version = NULL;
2249 char *d;
2250#ifdef PERL_MAD
2251 I32 startoff = s - SvPVX(PL_linestr);
2252#endif
2253
2254 PERL_ARGS_ASSERT_FORCE_VERSION;
2255
2256 s = SKIPSPACE1(s);
2257
2258 d = s;
2259 if (*d == 'v')
2260 d++;
2261 if (isDIGIT(*d)) {
2262 while (isDIGIT(*d) || *d == '_' || *d == '.')
2263 d++;
2264#ifdef PERL_MAD
2265 if (PL_madskills) {
2266 start_force(PL_curforce);
2267 curmad('X', newSVpvn(s,d-s));
2268 }
2269#endif
2270 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2271 SV *ver;
2272#ifdef USE_LOCALE_NUMERIC
2273 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2274 setlocale(LC_NUMERIC, "C");
2275#endif
2276 s = scan_num(s, &pl_yylval);
2277#ifdef USE_LOCALE_NUMERIC
2278 setlocale(LC_NUMERIC, loc);
2279 Safefree(loc);
2280#endif
2281 version = pl_yylval.opval;
2282 ver = cSVOPx(version)->op_sv;
2283 if (SvPOK(ver) && !SvNIOK(ver)) {
2284 SvUPGRADE(ver, SVt_PVNV);
2285 SvNV_set(ver, str_to_version(ver));
2286 SvNOK_on(ver); /* hint that it is a version */
2287 }
2288 }
2289 else if (guessing) {
2290#ifdef PERL_MAD
2291 if (PL_madskills) {
2292 sv_free(PL_nextwhite); /* let next token collect whitespace */
2293 PL_nextwhite = 0;
2294 s = SvPVX(PL_linestr) + startoff;
2295 }
2296#endif
2297 return s;
2298 }
2299 }
2300
2301#ifdef PERL_MAD
2302 if (PL_madskills && !version) {
2303 sv_free(PL_nextwhite); /* let next token collect whitespace */
2304 PL_nextwhite = 0;
2305 s = SvPVX(PL_linestr) + startoff;
2306 }
2307#endif
2308 /* NOTE: The parser sees the package name and the VERSION swapped */
2309 start_force(PL_curforce);
2310 NEXTVAL_NEXTTOKE.opval = version;
2311 force_next(WORD);
2312
2313 return s;
2314}
2315
2316/*
2317 * S_force_strict_version
2318 * Forces the next token to be a version number using strict syntax rules.
2319 */
2320
2321STATIC char *
2322S_force_strict_version(pTHX_ char *s)
2323{
2324 dVAR;
2325 OP *version = NULL;
2326#ifdef PERL_MAD
2327 I32 startoff = s - SvPVX(PL_linestr);
2328#endif
2329 const char *errstr = NULL;
2330
2331 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2332
2333 while (isSPACE(*s)) /* leading whitespace */
2334 s++;
2335
2336 if (is_STRICT_VERSION(s,&errstr)) {
2337 SV *ver = newSV(0);
2338 s = (char *)scan_version(s, ver, 0);
2339 version = newSVOP(OP_CONST, 0, ver);
2340 }
2341 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2342 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2343 {
2344 PL_bufptr = s;
2345 if (errstr)
2346 yyerror(errstr); /* version required */
2347 return s;
2348 }
2349
2350#ifdef PERL_MAD
2351 if (PL_madskills && !version) {
2352 sv_free(PL_nextwhite); /* let next token collect whitespace */
2353 PL_nextwhite = 0;
2354 s = SvPVX(PL_linestr) + startoff;
2355 }
2356#endif
2357 /* NOTE: The parser sees the package name and the VERSION swapped */
2358 start_force(PL_curforce);
2359 NEXTVAL_NEXTTOKE.opval = version;
2360 force_next(WORD);
2361
2362 return s;
2363}
2364
2365/*
2366 * S_tokeq
2367 * Tokenize a quoted string passed in as an SV. It finds the next
2368 * chunk, up to end of string or a backslash. It may make a new
2369 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2370 * turns \\ into \.
2371 */
2372
2373STATIC SV *
2374S_tokeq(pTHX_ SV *sv)
2375{
2376 dVAR;
2377 char *s;
2378 char *send;
2379 char *d;
2380 STRLEN len = 0;
2381 SV *pv = sv;
2382
2383 PERL_ARGS_ASSERT_TOKEQ;
2384
2385 if (!SvLEN(sv))
2386 goto finish;
2387
2388 s = SvPV_force(sv, len);
2389 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2390 goto finish;
2391 send = s + len;
2392 /* This is relying on the SV being "well formed" with a trailing '\0' */
2393 while (s < send && !(*s == '\\' && s[1] == '\\'))
2394 s++;
2395 if (s == send)
2396 goto finish;
2397 d = s;
2398 if ( PL_hints & HINT_NEW_STRING ) {
2399 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2400 }
2401 while (s < send) {
2402 if (*s == '\\') {
2403 if (s + 1 < send && (s[1] == '\\'))
2404 s++; /* all that, just for this */
2405 }
2406 *d++ = *s++;
2407 }
2408 *d = '\0';
2409 SvCUR_set(sv, d - SvPVX_const(sv));
2410 finish:
2411 if ( PL_hints & HINT_NEW_STRING )
2412 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2413 return sv;
2414}
2415
2416/*
2417 * Now come three functions related to double-quote context,
2418 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2419 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2420 * interact with PL_lex_state, and create fake ( ... ) argument lists
2421 * to handle functions and concatenation.
2422 * For example,
2423 * "foo\lbar"
2424 * is tokenised as
2425 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2426 */
2427
2428/*
2429 * S_sublex_start
2430 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2431 *
2432 * Pattern matching will set PL_lex_op to the pattern-matching op to
2433 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2434 *
2435 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2436 *
2437 * Everything else becomes a FUNC.
2438 *
2439 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2440 * had an OP_CONST or OP_READLINE). This just sets us up for a
2441 * call to S_sublex_push().
2442 */
2443
2444STATIC I32
2445S_sublex_start(pTHX)
2446{
2447 dVAR;
2448 const I32 op_type = pl_yylval.ival;
2449
2450 if (op_type == OP_NULL) {
2451 pl_yylval.opval = PL_lex_op;
2452 PL_lex_op = NULL;
2453 return THING;
2454 }
2455 if (op_type == OP_CONST || op_type == OP_READLINE) {
2456 SV *sv = tokeq(PL_lex_stuff);
2457
2458 if (SvTYPE(sv) == SVt_PVIV) {
2459 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2460 STRLEN len;
2461 const char * const p = SvPV_const(sv, len);
2462 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2463 SvREFCNT_dec(sv);
2464 sv = nsv;
2465 }
2466 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2467 PL_lex_stuff = NULL;
2468 /* Allow <FH> // "foo" */
2469 if (op_type == OP_READLINE)
2470 PL_expect = XTERMORDORDOR;
2471 return THING;
2472 }
2473 else if (op_type == OP_BACKTICK && PL_lex_op) {
2474 /* readpipe() vas overriden */
2475 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2476 pl_yylval.opval = PL_lex_op;
2477 PL_lex_op = NULL;
2478 PL_lex_stuff = NULL;
2479 return THING;
2480 }
2481
2482 PL_sublex_info.super_state = PL_lex_state;
2483 PL_sublex_info.sub_inwhat = (U16)op_type;
2484 PL_sublex_info.sub_op = PL_lex_op;
2485 PL_lex_state = LEX_INTERPPUSH;
2486
2487 PL_expect = XTERM;
2488 if (PL_lex_op) {
2489 pl_yylval.opval = PL_lex_op;
2490 PL_lex_op = NULL;
2491 return PMFUNC;
2492 }
2493 else
2494 return FUNC;
2495}
2496
2497/*
2498 * S_sublex_push
2499 * Create a new scope to save the lexing state. The scope will be
2500 * ended in S_sublex_done. Returns a '(', starting the function arguments
2501 * to the uc, lc, etc. found before.
2502 * Sets PL_lex_state to LEX_INTERPCONCAT.
2503 */
2504
2505STATIC I32
2506S_sublex_push(pTHX)
2507{
2508 dVAR;
2509 LEXSHARED *shared;
2510 ENTER;
2511
2512 PL_lex_state = PL_sublex_info.super_state;
2513 SAVEBOOL(PL_lex_dojoin);
2514 SAVEI32(PL_lex_brackets);
2515 SAVEI32(PL_lex_allbrackets);
2516 SAVEI32(PL_lex_formbrack);
2517 SAVEI8(PL_lex_fakeeof);
2518 SAVEI32(PL_lex_casemods);
2519 SAVEI32(PL_lex_starts);
2520 SAVEI8(PL_lex_state);
2521 SAVESPTR(PL_lex_repl);
2522 SAVEVPTR(PL_lex_inpat);
2523 SAVEI16(PL_lex_inwhat);
2524 SAVECOPLINE(PL_curcop);
2525 SAVEPPTR(PL_bufptr);
2526 SAVEPPTR(PL_bufend);
2527 SAVEPPTR(PL_oldbufptr);
2528 SAVEPPTR(PL_oldoldbufptr);
2529 SAVEPPTR(PL_last_lop);
2530 SAVEPPTR(PL_last_uni);
2531 SAVEPPTR(PL_linestart);
2532 SAVESPTR(PL_linestr);
2533 SAVEGENERICPV(PL_lex_brackstack);
2534 SAVEGENERICPV(PL_lex_casestack);
2535 SAVEGENERICPV(PL_parser->lex_shared);
2536 SAVEBOOL(PL_parser->lex_re_reparsing);
2537
2538 /* The here-doc parser needs to be able to peek into outer lexing
2539 scopes to find the body of the here-doc. So we put PL_linestr and
2540 PL_bufptr into lex_shared, to ‘share’ those values.
2541 */
2542 PL_parser->lex_shared->ls_linestr = PL_linestr;
2543 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2544
2545 PL_linestr = PL_lex_stuff;
2546 PL_lex_repl = PL_sublex_info.repl;
2547 PL_lex_stuff = NULL;
2548 PL_sublex_info.repl = NULL;
2549
2550 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2551 = SvPVX(PL_linestr);
2552 PL_bufend += SvCUR(PL_linestr);
2553 PL_last_lop = PL_last_uni = NULL;
2554 SAVEFREESV(PL_linestr);
2555 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2556
2557 PL_lex_dojoin = FALSE;
2558 PL_lex_brackets = PL_lex_formbrack = 0;
2559 PL_lex_allbrackets = 0;
2560 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2561 Newx(PL_lex_brackstack, 120, char);
2562 Newx(PL_lex_casestack, 12, char);
2563 PL_lex_casemods = 0;
2564 *PL_lex_casestack = '\0';
2565 PL_lex_starts = 0;
2566 PL_lex_state = LEX_INTERPCONCAT;
2567 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2568
2569 Newxz(shared, 1, LEXSHARED);
2570 shared->ls_prev = PL_parser->lex_shared;
2571 PL_parser->lex_shared = shared;
2572
2573 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2574 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2575 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2576 PL_lex_inpat = PL_sublex_info.sub_op;
2577 else
2578 PL_lex_inpat = NULL;
2579
2580 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2581 PL_in_eval &= ~EVAL_RE_REPARSING;
2582
2583 return '(';
2584}
2585
2586/*
2587 * S_sublex_done
2588 * Restores lexer state after a S_sublex_push.
2589 */
2590
2591STATIC I32
2592S_sublex_done(pTHX)
2593{
2594 dVAR;
2595 if (!PL_lex_starts++) {
2596 SV * const sv = newSVpvs("");
2597 if (SvUTF8(PL_linestr))
2598 SvUTF8_on(sv);
2599 PL_expect = XOPERATOR;
2600 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2601 return THING;
2602 }
2603
2604 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2605 PL_lex_state = LEX_INTERPCASEMOD;
2606 return yylex();
2607 }
2608
2609 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2610 assert(PL_lex_inwhat != OP_TRANSR);
2611 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2612 PL_linestr = PL_lex_repl;
2613 PL_lex_inpat = 0;
2614 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2615 PL_bufend += SvCUR(PL_linestr);
2616 PL_last_lop = PL_last_uni = NULL;
2617 PL_lex_dojoin = FALSE;
2618 PL_lex_brackets = 0;
2619 PL_lex_allbrackets = 0;
2620 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2621 PL_lex_casemods = 0;
2622 *PL_lex_casestack = '\0';
2623 PL_lex_starts = 0;
2624 if (SvEVALED(PL_lex_repl)) {
2625 PL_lex_state = LEX_INTERPNORMAL;
2626 PL_lex_starts++;
2627 /* we don't clear PL_lex_repl here, so that we can check later
2628 whether this is an evalled subst; that means we rely on the
2629 logic to ensure sublex_done() is called again only via the
2630 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2631 }
2632 else {
2633 PL_lex_state = LEX_INTERPCONCAT;
2634 PL_lex_repl = NULL;
2635 }
2636 return ',';
2637 }
2638 else {
2639#ifdef PERL_MAD
2640 if (PL_madskills) {
2641 if (PL_thiswhite) {
2642 if (!PL_endwhite)
2643 PL_endwhite = newSVpvs("");
2644 sv_catsv(PL_endwhite, PL_thiswhite);
2645 PL_thiswhite = 0;
2646 }
2647 if (PL_thistoken)
2648 sv_setpvs(PL_thistoken,"");
2649 else
2650 PL_realtokenstart = -1;
2651 }
2652#endif
2653 LEAVE;
2654 PL_bufend = SvPVX(PL_linestr);
2655 PL_bufend += SvCUR(PL_linestr);
2656 PL_expect = XOPERATOR;
2657 PL_sublex_info.sub_inwhat = 0;
2658 return ')';
2659 }
2660}
2661
2662PERL_STATIC_INLINE SV*
2663S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2664{
2665 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2666 * interior, hence to the "}". Finds what the name resolves to, returning
2667 * an SV* containing it; NULL if no valid one found */
2668
2669 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2670
2671 HV * table;
2672 SV **cvp;
2673 SV *cv;
2674 SV *rv;
2675 HV *stash;
2676 const U8* first_bad_char_loc;
2677 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2678
2679 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2680
2681 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2682 e - backslash_ptr,
2683 &first_bad_char_loc))
2684 {
2685 /* If warnings are on, this will print a more detailed analysis of what
2686 * is wrong than the error message below */
2687 utf8n_to_uvuni(first_bad_char_loc,
2688 e - ((char *) first_bad_char_loc),
2689 NULL, 0);
2690
2691 /* We deliberately don't try to print the malformed character, which
2692 * might not print very well; it also may be just the first of many
2693 * malformations, so don't print what comes after it */
2694 yyerror(Perl_form(aTHX_
2695 "Malformed UTF-8 character immediately after '%.*s'",
2696 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2697 return NULL;
2698 }
2699
2700 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2701 /* include the <}> */
2702 e - backslash_ptr + 1);
2703 if (! SvPOK(res)) {
2704 SvREFCNT_dec_NN(res);
2705 return NULL;
2706 }
2707
2708 /* See if the charnames handler is the Perl core's, and if so, we can skip
2709 * the validation needed for a user-supplied one, as Perl's does its own
2710 * validation. */
2711 table = GvHV(PL_hintgv); /* ^H */
2712 cvp = hv_fetchs(table, "charnames", FALSE);
2713 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2714 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2715 {
2716 const char * const name = HvNAME(stash);
2717 if strEQ(name, "_charnames") {
2718 return res;
2719 }
2720 }
2721
2722 /* Here, it isn't Perl's charname handler. We can't rely on a
2723 * user-supplied handler to validate the input name. For non-ut8 input,
2724 * look to see that the first character is legal. Then loop through the
2725 * rest checking that each is a continuation */
2726
2727 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2728 * the same thing */
2729
2730 if (! UTF) {
2731 if (! isALPHAU(*s)) {
2732 goto bad_charname;
2733 }
2734 s++;
2735 while (s < e) {
2736 if (! isCHARNAME_CONT(*s)) {
2737 goto bad_charname;
2738 }
2739 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2740 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2741 "A sequence of multiple spaces in a charnames "
2742 "alias definition is deprecated");
2743 }
2744 s++;
2745 }
2746 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2747 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2748 "Trailing white-space in a charnames alias "
2749 "definition is deprecated");
2750 }
2751 }
2752 else {
2753 /* Similarly for utf8. For invariants can check directly; for other
2754 * Latin1, can calculate their code point and check; otherwise use a
2755 * swash */
2756 if (UTF8_IS_INVARIANT(*s)) {
2757 if (! isALPHAU(*s)) {
2758 goto bad_charname;
2759 }
2760 s++;
2761 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2762 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2763 goto bad_charname;
2764 }
2765 s += 2;
2766 }
2767 else {
2768 if (! PL_utf8_charname_begin) {
2769 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2770 PL_utf8_charname_begin = _core_swash_init("utf8",
2771 "_Perl_Charname_Begin",
2772 &PL_sv_undef,
2773 1, 0, NULL, &flags);
2774 }
2775 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2776 goto bad_charname;
2777 }
2778 s += UTF8SKIP(s);
2779 }
2780
2781 while (s < e) {
2782 if (UTF8_IS_INVARIANT(*s)) {
2783 if (! isCHARNAME_CONT(*s)) {
2784 goto bad_charname;
2785 }
2786 if (*s == ' ' && *(s-1) == ' '
2787 && ckWARN_d(WARN_DEPRECATED)) {
2788 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2789 "A sequence of multiple spaces in a charnam"
2790 "es alias definition is deprecated");
2791 }
2792 s++;
2793 }
2794 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2795 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2796 *(s+1)))))
2797 {
2798 goto bad_charname;
2799 }
2800 s += 2;
2801 }
2802 else {
2803 if (! PL_utf8_charname_continue) {
2804 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2805 PL_utf8_charname_continue = _core_swash_init("utf8",
2806 "_Perl_Charname_Continue",
2807 &PL_sv_undef,
2808 1, 0, NULL, &flags);
2809 }
2810 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2811 goto bad_charname;
2812 }
2813 s += UTF8SKIP(s);
2814 }
2815 }
2816 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2817 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2818 "Trailing white-space in a charnames alias "
2819 "definition is deprecated");
2820 }
2821 }
2822
2823 if (SvUTF8(res)) { /* Don't accept malformed input */
2824 const U8* first_bad_char_loc;
2825 STRLEN len;
2826 const char* const str = SvPV_const(res, len);
2827 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2828 /* If warnings are on, this will print a more detailed analysis of
2829 * what is wrong than the error message below */
2830 utf8n_to_uvuni(first_bad_char_loc,
2831 (char *) first_bad_char_loc - str,
2832 NULL, 0);
2833
2834 /* We deliberately don't try to print the malformed character,
2835 * which might not print very well; it also may be just the first
2836 * of many malformations, so don't print what comes after it */
2837 yyerror_pv(
2838 Perl_form(aTHX_
2839 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2840 (int) (e - backslash_ptr + 1), backslash_ptr,
2841 (int) ((char *) first_bad_char_loc - str), str
2842 ),
2843 SVf_UTF8);
2844 return NULL;
2845 }
2846 }
2847
2848 return res;
2849
2850 bad_charname: {
2851 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2852
2853 /* The final %.*s makes sure that should the trailing NUL be missing
2854 * that this print won't run off the end of the string */
2855 yyerror_pv(
2856 Perl_form(aTHX_
2857 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2858 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2859 (int)(e - s + bad_char_size), s + bad_char_size
2860 ),
2861 UTF ? SVf_UTF8 : 0);
2862 return NULL;
2863 }
2864}
2865
2866/*
2867 scan_const
2868
2869 Extracts the next constant part of a pattern, double-quoted string,
2870 or transliteration. This is terrifying code.
2871
2872 For example, in parsing the double-quoted string "ab\x63$d", it would
2873 stop at the '$' and return an OP_CONST containing 'abc'.
2874
2875 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2876 processing a pattern (PL_lex_inpat is true), a transliteration
2877 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2878
2879 Returns a pointer to the character scanned up to. If this is
2880 advanced from the start pointer supplied (i.e. if anything was
2881 successfully parsed), will leave an OP_CONST for the substring scanned
2882 in pl_yylval. Caller must intuit reason for not parsing further
2883 by looking at the next characters herself.
2884
2885 In patterns:
2886 expand:
2887 \N{FOO} => \N{U+hex_for_character_FOO}
2888 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2889
2890 pass through:
2891 all other \-char, including \N and \N{ apart from \N{ABC}
2892
2893 stops on:
2894 @ and $ where it appears to be a var, but not for $ as tail anchor
2895 \l \L \u \U \Q \E
2896 (?{ or (??{
2897
2898
2899 In transliterations:
2900 characters are VERY literal, except for - not at the start or end
2901 of the string, which indicates a range. If the range is in bytes,
2902 scan_const expands the range to the full set of intermediate
2903 characters. If the range is in utf8, the hyphen is replaced with
2904 a certain range mark which will be handled by pmtrans() in op.c.
2905
2906 In double-quoted strings:
2907 backslashes:
2908 double-quoted style: \r and \n
2909 constants: \x31, etc.
2910 deprecated backrefs: \1 (in substitution replacements)
2911 case and quoting: \U \Q \E
2912 stops on @ and $
2913
2914 scan_const does *not* construct ops to handle interpolated strings.
2915 It stops processing as soon as it finds an embedded $ or @ variable
2916 and leaves it to the caller to work out what's going on.
2917
2918 embedded arrays (whether in pattern or not) could be:
2919 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2920
2921 $ in double-quoted strings must be the symbol of an embedded scalar.
2922
2923 $ in pattern could be $foo or could be tail anchor. Assumption:
2924 it's a tail anchor if $ is the last thing in the string, or if it's
2925 followed by one of "()| \r\n\t"
2926
2927 \1 (backreferences) are turned into $1 in substitutions
2928
2929 The structure of the code is
2930 while (there's a character to process) {
2931 handle transliteration ranges
2932 skip regexp comments /(?#comment)/ and codes /(?{code})/
2933 skip #-initiated comments in //x patterns
2934 check for embedded arrays
2935 check for embedded scalars
2936 if (backslash) {
2937 deprecate \1 in substitution replacements
2938 handle string-changing backslashes \l \U \Q \E, etc.
2939 switch (what was escaped) {
2940 handle \- in a transliteration (becomes a literal -)
2941 if a pattern and not \N{, go treat as regular character
2942 handle \132 (octal characters)
2943 handle \x15 and \x{1234} (hex characters)
2944 handle \N{name} (named characters, also \N{3,5} in a pattern)
2945 handle \cV (control characters)
2946 handle printf-style backslashes (\f, \r, \n, etc)
2947 } (end switch)
2948 continue
2949 } (end if backslash)
2950 handle regular character
2951 } (end while character to read)
2952
2953*/
2954
2955STATIC char *
2956S_scan_const(pTHX_ char *start)
2957{
2958 dVAR;
2959 char *send = PL_bufend; /* end of the constant */
2960 SV *sv = newSV(send - start); /* sv for the constant. See
2961 note below on sizing. */
2962 char *s = start; /* start of the constant */
2963 char *d = SvPVX(sv); /* destination for copies */
2964 bool dorange = FALSE; /* are we in a translit range? */
2965 bool didrange = FALSE; /* did we just finish a range? */
2966 bool in_charclass = FALSE; /* within /[...]/ */
2967 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2968 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2969 to be UTF8? But, this can
2970 show as true when the source
2971 isn't utf8, as for example
2972 when it is entirely composed
2973 of hex constants */
2974 SV *res; /* result from charnames */
2975
2976 /* Note on sizing: The scanned constant is placed into sv, which is
2977 * initialized by newSV() assuming one byte of output for every byte of
2978 * input. This routine expects newSV() to allocate an extra byte for a
2979 * trailing NUL, which this routine will append if it gets to the end of
2980 * the input. There may be more bytes of input than output (eg., \N{LATIN
2981 * CAPITAL LETTER A}), or more output than input if the constant ends up
2982 * recoded to utf8, but each time a construct is found that might increase
2983 * the needed size, SvGROW() is called. Its size parameter each time is
2984 * based on the best guess estimate at the time, namely the length used so
2985 * far, plus the length the current construct will occupy, plus room for
2986 * the trailing NUL, plus one byte for every input byte still unscanned */
2987
2988 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2989 before set */
2990#ifdef EBCDIC
2991 UV literal_endpoint = 0;
2992 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2993#endif
2994
2995 PERL_ARGS_ASSERT_SCAN_CONST;
2996
2997 assert(PL_lex_inwhat != OP_TRANSR);
2998 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2999 /* If we are doing a trans and we know we want UTF8 set expectation */
3000 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3001 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3002 }
3003
3004 /* Protect sv from errors and fatal warnings. */
3005 ENTER_with_name("scan_const");
3006 SAVEFREESV(sv);
3007
3008 while (s < send || dorange) {
3009
3010 /* get transliterations out of the way (they're most literal) */
3011 if (PL_lex_inwhat == OP_TRANS) {
3012 /* expand a range A-Z to the full set of characters. AIE! */
3013 if (dorange) {
3014 I32 i; /* current expanded character */
3015 I32 min; /* first character in range */
3016 I32 max; /* last character in range */
3017
3018#ifdef EBCDIC
3019 UV uvmax = 0;
3020#endif
3021
3022 if (has_utf8
3023#ifdef EBCDIC
3024 && !native_range
3025#endif
3026 ) {
3027 char * const c = (char*)utf8_hop((U8*)d, -1);
3028 char *e = d++;
3029 while (e-- > c)
3030 *(e + 1) = *e;
3031 *c = (char)UTF_TO_NATIVE(0xff);
3032 /* mark the range as done, and continue */
3033 dorange = FALSE;
3034 didrange = TRUE;
3035 continue;
3036 }
3037
3038 i = d - SvPVX_const(sv); /* remember current offset */
3039#ifdef EBCDIC
3040 SvGROW(sv,
3041 SvLEN(sv) + (has_utf8 ?
3042 (512 - UTF_CONTINUATION_MARK +
3043 UNISKIP(0x100))
3044 : 256));
3045 /* How many two-byte within 0..255: 128 in UTF-8,
3046 * 96 in UTF-8-mod. */
3047#else
3048 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3049#endif
3050 d = SvPVX(sv) + i; /* refresh d after realloc */
3051#ifdef EBCDIC
3052 if (has_utf8) {
3053 int j;
3054 for (j = 0; j <= 1; j++) {
3055 char * const c = (char*)utf8_hop((U8*)d, -1);
3056 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3057 if (j)
3058 min = (U8)uv;
3059 else if (uv < 256)
3060 max = (U8)uv;
3061 else {
3062 max = (U8)0xff; /* only to \xff */
3063 uvmax = uv; /* \x{100} to uvmax */
3064 }
3065 d = c; /* eat endpoint chars */
3066 }
3067 }
3068 else {
3069#endif
3070 d -= 2; /* eat the first char and the - */
3071 min = (U8)*d; /* first char in range */
3072 max = (U8)d[1]; /* last char in range */
3073#ifdef EBCDIC
3074 }
3075#endif
3076
3077 if (min > max) {
3078 Perl_croak(aTHX_
3079 "Invalid range \"%c-%c\" in transliteration operator",
3080 (char)min, (char)max);
3081 }
3082
3083#ifdef EBCDIC
3084 if (literal_endpoint == 2 &&
3085 ((isLOWER(min) && isLOWER(max)) ||
3086 (isUPPER(min) && isUPPER(max)))) {
3087 if (isLOWER(min)) {
3088 for (i = min; i <= max; i++)
3089 if (isLOWER(i))
3090 *d++ = NATIVE_TO_NEED(has_utf8,i);
3091 } else {
3092 for (i = min; i <= max; i++)
3093 if (isUPPER(i))
3094 *d++ = NATIVE_TO_NEED(has_utf8,i);
3095 }
3096 }
3097 else
3098#endif
3099 for (i = min; i <= max; i++)
3100#ifdef EBCDIC
3101 if (has_utf8) {
3102 const U8 ch = (U8)NATIVE_TO_UTF(i);
3103 if (UNI_IS_INVARIANT(ch))
3104 *d++ = (U8)i;
3105 else {
3106 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3107 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3108 }
3109 }
3110 else
3111#endif
3112 *d++ = (char)i;
3113
3114#ifdef EBCDIC
3115 if (uvmax) {
3116 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3117 if (uvmax > 0x101)
3118 *d++ = (char)UTF_TO_NATIVE(0xff);
3119 if (uvmax > 0x100)
3120 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3121 }
3122#endif
3123
3124 /* mark the range as done, and continue */
3125 dorange = FALSE;
3126 didrange = TRUE;
3127#ifdef EBCDIC
3128 literal_endpoint = 0;
3129#endif
3130 continue;
3131 }
3132
3133 /* range begins (ignore - as first or last char) */
3134 else if (*s == '-' && s+1 < send && s != start) {
3135 if (didrange) {
3136 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3137 }
3138 if (has_utf8
3139#ifdef EBCDIC
3140 && !native_range
3141#endif
3142 ) {
3143 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
3144 s++;
3145 continue;
3146 }
3147 dorange = TRUE;
3148 s++;
3149 }
3150 else {
3151 didrange = FALSE;
3152#ifdef EBCDIC
3153 literal_endpoint = 0;
3154 native_range = TRUE;
3155#endif
3156 }
3157 }
3158
3159 /* if we get here, we're not doing a transliteration */
3160
3161 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3162 char *s1 = s-1;
3163 int esc = 0;
3164 while (s1 >= start && *s1-- == '\\')
3165 esc = !esc;
3166 if (!esc)
3167 in_charclass = TRUE;
3168 }
3169
3170 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3171 char *s1 = s-1;
3172 int esc = 0;
3173 while (s1 >= start && *s1-- == '\\')
3174 esc = !esc;
3175 if (!esc)
3176 in_charclass = FALSE;
3177 }
3178
3179 /* skip for regexp comments /(?#comment)/, except for the last
3180 * char, which will be done separately.
3181 * Stop on (?{..}) and friends */
3182
3183 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3184 if (s[2] == '#') {
3185 while (s+1 < send && *s != ')')
3186 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3187 }
3188 else if (!PL_lex_casemods && !in_charclass &&
3189 ( s[2] == '{' /* This should match regcomp.c */
3190 || (s[2] == '?' && s[3] == '{')))
3191 {
3192 break;
3193 }
3194 }
3195
3196 /* likewise skip #-initiated comments in //x patterns */
3197 else if (*s == '#' && PL_lex_inpat &&
3198 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3199 while (s+1 < send && *s != '\n')
3200 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3201 }
3202
3203 /* no further processing of single-quoted regex */
3204 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3205 goto default_action;
3206
3207 /* check for embedded arrays
3208 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3209 */
3210 else if (*s == '@' && s[1]) {
3211 if (isWORDCHAR_lazy_if(s+1,UTF))
3212 break;
3213 if (strchr(":'{$", s[1]))
3214 break;
3215 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3216 break; /* in regexp, neither @+ nor @- are interpolated */
3217 }
3218
3219 /* check for embedded scalars. only stop if we're sure it's a
3220 variable.
3221 */
3222 else if (*s == '$') {
3223 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3224 break;
3225 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3226 if (s[1] == '\\') {
3227 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3228 "Possible unintended interpolation of $\\ in regex");
3229 }
3230 break; /* in regexp, $ might be tail anchor */
3231 }
3232 }
3233
3234 /* End of else if chain - OP_TRANS rejoin rest */
3235
3236 /* backslashes */
3237 if (*s == '\\' && s+1 < send) {
3238 char* e; /* Can be used for ending '}', etc. */
3239
3240 s++;
3241
3242 /* warn on \1 - \9 in substitution replacements, but note that \11
3243 * is an octal; and \19 is \1 followed by '9' */
3244 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3245 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3246 {
3247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3248 *--s = '$';
3249 break;
3250 }
3251
3252 /* string-change backslash escapes */
3253 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3254 --s;
3255 break;
3256 }
3257 /* In a pattern, process \N, but skip any other backslash escapes.
3258 * This is because we don't want to translate an escape sequence
3259 * into a meta symbol and have the regex compiler use the meta
3260 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3261 * in spite of this, we do have to process \N here while the proper
3262 * charnames handler is in scope. See bugs #56444 and #62056.
3263 * There is a complication because \N in a pattern may also stand
3264 * for 'match a non-nl', and not mean a charname, in which case its
3265 * processing should be deferred to the regex compiler. To be a
3266 * charname it must be followed immediately by a '{', and not look
3267 * like \N followed by a curly quantifier, i.e., not something like
3268 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3269 * quantifier */
3270 else if (PL_lex_inpat
3271 && (*s != 'N'
3272 || s[1] != '{'
3273 || regcurly(s + 1, FALSE)))
3274 {
3275 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3276 goto default_action;
3277 }
3278
3279 switch (*s) {
3280
3281 /* quoted - in transliterations */
3282 case '-':
3283 if (PL_lex_inwhat == OP_TRANS) {
3284 *d++ = *s++;
3285 continue;
3286 }
3287 /* FALL THROUGH */
3288 default:
3289 {
3290 if ((isALPHANUMERIC(*s)))
3291 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3292 "Unrecognized escape \\%c passed through",
3293 *s);
3294 /* default action is to copy the quoted character */
3295 goto default_action;
3296 }
3297
3298 /* eg. \132 indicates the octal constant 0132 */
3299 case '0': case '1': case '2': case '3':
3300 case '4': case '5': case '6': case '7':
3301 {
3302 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3303 STRLEN len = 3;
3304 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3305 s += len;
3306 if (len < 3 && s < send && isDIGIT(*s)
3307 && ckWARN(WARN_MISC))
3308 {
3309 Perl_warner(aTHX_ packWARN(WARN_MISC),
3310 "%s", form_short_octal_warning(s, len));
3311 }
3312 }
3313 goto NUM_ESCAPE_INSERT;
3314
3315 /* eg. \o{24} indicates the octal constant \024 */
3316 case 'o':
3317 {
3318 const char* error;
3319
3320 bool valid = grok_bslash_o(&s, &uv, &error,
3321 TRUE, /* Output warning */
3322 FALSE, /* Not strict */
3323 TRUE, /* Output warnings for
3324 non-portables */
3325 UTF);
3326 if (! valid) {
3327 yyerror(error);
3328 continue;
3329 }
3330 goto NUM_ESCAPE_INSERT;
3331 }
3332
3333 /* eg. \x24 indicates the hex constant 0x24 */
3334 case 'x':
3335 {
3336 const char* error;
3337
3338 bool valid = grok_bslash_x(&s, &uv, &error,
3339 TRUE, /* Output warning */
3340 FALSE, /* Not strict */
3341 TRUE, /* Output warnings for
3342 non-portables */
3343 UTF);
3344 if (! valid) {
3345 yyerror(error);
3346 continue;
3347 }
3348 }
3349
3350 NUM_ESCAPE_INSERT:
3351 /* Insert oct or hex escaped character. There will always be
3352 * enough room in sv since such escapes will be longer than any
3353 * UTF-8 sequence they can end up as, except if they force us
3354 * to recode the rest of the string into utf8 */
3355
3356 /* Here uv is the ordinal of the next character being added in
3357 * unicode (converted from native). */
3358 if (!UNI_IS_INVARIANT(uv)) {
3359 if (!has_utf8 && uv > 255) {
3360 /* Might need to recode whatever we have accumulated so
3361 * far if it contains any chars variant in utf8 or
3362 * utf-ebcdic. */
3363
3364 SvCUR_set(sv, d - SvPVX_const(sv));
3365 SvPOK_on(sv);
3366 *d = '\0';
3367 /* See Note on sizing above. */
3368 sv_utf8_upgrade_flags_grow(sv,
3369 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3370 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3371 d = SvPVX(sv) + SvCUR(sv);
3372 has_utf8 = TRUE;
3373 }
3374
3375 if (has_utf8) {
3376 d = (char*)uvuni_to_utf8((U8*)d, uv);
3377 if (PL_lex_inwhat == OP_TRANS &&
3378 PL_sublex_info.sub_op) {
3379 PL_sublex_info.sub_op->op_private |=
3380 (PL_lex_repl ? OPpTRANS_FROM_UTF
3381 : OPpTRANS_TO_UTF);
3382 }
3383#ifdef EBCDIC
3384 if (uv > 255 && !dorange)
3385 native_range = FALSE;
3386#endif
3387 }
3388 else {
3389 *d++ = (char)uv;
3390 }
3391 }
3392 else {
3393 *d++ = (char) uv;
3394 }
3395 continue;
3396
3397 case 'N':
3398 /* In a non-pattern \N must be a named character, like \N{LATIN
3399 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3400 * mean to match a non-newline. For non-patterns, named
3401 * characters are converted to their string equivalents. In
3402 * patterns, named characters are not converted to their
3403 * ultimate forms for the same reasons that other escapes
3404 * aren't. Instead, they are converted to the \N{U+...} form
3405 * to get the value from the charnames that is in effect right
3406 * now, while preserving the fact that it was a named character
3407 * so that the regex compiler knows this */
3408
3409 /* This section of code doesn't generally use the
3410 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3411 * a close examination of this macro and determined it is a
3412 * no-op except on utfebcdic variant characters. Every
3413 * character generated by this that would normally need to be
3414 * enclosed by this macro is invariant, so the macro is not
3415 * needed, and would complicate use of copy(). XXX There are
3416 * other parts of this file where the macro is used
3417 * inconsistently, but are saved by it being a no-op */
3418
3419 /* The structure of this section of code (besides checking for
3420 * errors and upgrading to utf8) is:
3421 * Further disambiguate between the two meanings of \N, and if
3422 * not a charname, go process it elsewhere
3423 * If of form \N{U+...}, pass it through if a pattern;
3424 * otherwise convert to utf8
3425 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3426 * pattern; otherwise convert to utf8 */
3427
3428 /* Here, s points to the 'N'; the test below is guaranteed to
3429 * succeed if we are being called on a pattern as we already
3430 * know from a test above that the next character is a '{'.
3431 * On a non-pattern \N must mean 'named sequence, which
3432 * requires braces */
3433 s++;
3434 if (*s != '{') {
3435 yyerror("Missing braces on \\N{}");
3436 continue;
3437 }
3438 s++;
3439
3440 /* If there is no matching '}', it is an error. */
3441 if (! (e = strchr(s, '}'))) {
3442 if (! PL_lex_inpat) {
3443 yyerror("Missing right brace on \\N{}");
3444 } else {
3445 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3446 }
3447 continue;
3448 }
3449
3450 /* Here it looks like a named character */
3451
3452 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3453 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3454 | PERL_SCAN_DISALLOW_PREFIX;
3455 STRLEN len;
3456
3457 /* For \N{U+...}, the '...' is a unicode value even on
3458 * EBCDIC machines */
3459 s += 2; /* Skip to next char after the 'U+' */
3460 len = e - s;
3461 uv = grok_hex(s, &len, &flags, NULL);
3462 if (len == 0 || len != (STRLEN)(e - s)) {
3463 yyerror("Invalid hexadecimal number in \\N{U+...}");
3464 s = e + 1;
3465 continue;
3466 }
3467
3468 if (PL_lex_inpat) {
3469
3470 /* On non-EBCDIC platforms, pass through to the regex
3471 * compiler unchanged. The reason we evaluated the
3472 * number above is to make sure there wasn't a syntax
3473 * error. But on EBCDIC we convert to native so
3474 * downstream code can continue to assume it's native
3475 */
3476 s -= 5; /* Include the '\N{U+' */
3477#ifdef EBCDIC
3478 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3479 and the \0 */
3480 "\\N{U+%X}",
3481 (unsigned int) UNI_TO_NATIVE(uv));
3482#else
3483 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3484 d += e - s + 1;
3485#endif
3486 }
3487 else { /* Not a pattern: convert the hex to string */
3488
3489 /* If destination is not in utf8, unconditionally
3490 * recode it to be so. This is because \N{} implies
3491 * Unicode semantics, and scalars have to be in utf8
3492 * to guarantee those semantics */
3493 if (! has_utf8) {
3494 SvCUR_set(sv, d - SvPVX_const(sv));
3495 SvPOK_on(sv);
3496 *d = '\0';
3497 /* See Note on sizing above. */
3498 sv_utf8_upgrade_flags_grow(
3499 sv,
3500 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3501 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3502 d = SvPVX(sv) + SvCUR(sv);
3503 has_utf8 = TRUE;
3504 }
3505
3506 /* Add the string to the output */
3507 if (UNI_IS_INVARIANT(uv)) {
3508 *d++ = (char) uv;
3509 }
3510 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3511 }
3512 }
3513 else /* Here is \N{NAME} but not \N{U+...}. */
3514 if ((res = get_and_check_backslash_N_name(s, e)))
3515 {
3516 STRLEN len;
3517 const char *str = SvPV_const(res, len);
3518 if (PL_lex_inpat) {
3519
3520 if (! len) { /* The name resolved to an empty string */
3521 Copy("\\N{}", d, 4, char);
3522 d += 4;
3523 }
3524 else {
3525 /* In order to not lose information for the regex
3526 * compiler, pass the result in the specially made
3527 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3528 * the code points in hex of each character
3529 * returned by charnames */
3530
3531 const char *str_end = str + len;
3532 const STRLEN off = d - SvPVX_const(sv);
3533
3534 if (! SvUTF8(res)) {
3535 /* For the non-UTF-8 case, we can determine the
3536 * exact length needed without having to parse
3537 * through the string. Each character takes up
3538 * 2 hex digits plus either a trailing dot or
3539 * the "}" */
3540 d = off + SvGROW(sv, off
3541 + 3 * len
3542 + 6 /* For the "\N{U+", and
3543 trailing NUL */
3544 + (STRLEN)(send - e));
3545 Copy("\\N{U+", d, 5, char);
3546 d += 5;
3547 while (str < str_end) {
3548 char hex_string[4];
3549 my_snprintf(hex_string, sizeof(hex_string),
3550 "%02X.", (U8) *str);
3551 Copy(hex_string, d, 3, char);
3552 d += 3;
3553 str++;
3554 }
3555 d--; /* We will overwrite below the final
3556 dot with a right brace */
3557 }
3558 else {
3559 STRLEN char_length; /* cur char's byte length */
3560
3561 /* and the number of bytes after this is
3562 * translated into hex digits */
3563 STRLEN output_length;
3564
3565 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3566 * for max('U+', '.'); and 1 for NUL */
3567 char hex_string[2 * UTF8_MAXBYTES + 5];
3568
3569 /* Get the first character of the result. */
3570 U32 uv = utf8n_to_uvuni((U8 *) str,
3571 len,
3572 &char_length,
3573 UTF8_ALLOW_ANYUV);
3574 /* Convert first code point to hex, including
3575 * the boiler plate before it. For all these,
3576 * we convert to native format so that
3577 * downstream code can continue to assume the
3578 * input is native */
3579 output_length =
3580 my_snprintf(hex_string, sizeof(hex_string),
3581 "\\N{U+%X",
3582 (unsigned int) UNI_TO_NATIVE(uv));
3583
3584 /* Make sure there is enough space to hold it */
3585 d = off + SvGROW(sv, off
3586 + output_length
3587 + (STRLEN)(send - e)
3588 + 2); /* '}' + NUL */
3589 /* And output it */
3590 Copy(hex_string, d, output_length, char);
3591 d += output_length;
3592
3593 /* For each subsequent character, append dot and
3594 * its ordinal in hex */
3595 while ((str += char_length) < str_end) {
3596 const STRLEN off = d - SvPVX_const(sv);
3597 U32 uv = utf8n_to_uvuni((U8 *) str,
3598 str_end - str,
3599 &char_length,
3600 UTF8_ALLOW_ANYUV);
3601 output_length =
3602 my_snprintf(hex_string,
3603 sizeof(hex_string),
3604 ".%X",
3605 (unsigned int) UNI_TO_NATIVE(uv));
3606
3607 d = off + SvGROW(sv, off
3608 + output_length
3609 + (STRLEN)(send - e)
3610 + 2); /* '}' + NUL */
3611 Copy(hex_string, d, output_length, char);
3612 d += output_length;
3613 }
3614 }
3615
3616 *d++ = '}'; /* Done. Add the trailing brace */
3617 }
3618 }
3619 else { /* Here, not in a pattern. Convert the name to a
3620 * string. */
3621
3622 /* If destination is not in utf8, unconditionally
3623 * recode it to be so. This is because \N{} implies
3624 * Unicode semantics, and scalars have to be in utf8
3625 * to guarantee those semantics */
3626 if (! has_utf8) {
3627 SvCUR_set(sv, d - SvPVX_const(sv));
3628 SvPOK_on(sv);
3629 *d = '\0';
3630 /* See Note on sizing above. */
3631 sv_utf8_upgrade_flags_grow(sv,
3632 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3633 len + (STRLEN)(send - s) + 1);
3634 d = SvPVX(sv) + SvCUR(sv);
3635 has_utf8 = TRUE;
3636 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3637
3638 /* See Note on sizing above. (NOTE: SvCUR() is not
3639 * set correctly here). */
3640 const STRLEN off = d - SvPVX_const(sv);
3641 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3642 }
3643 Copy(str, d, len, char);
3644 d += len;
3645 }
3646
3647 SvREFCNT_dec(res);
3648
3649 } /* End \N{NAME} */
3650#ifdef EBCDIC
3651 if (!dorange)
3652 native_range = FALSE; /* \N{} is defined to be Unicode */
3653#endif
3654 s = e + 1; /* Point to just after the '}' */
3655 continue;
3656
3657 /* \c is a control character */
3658 case 'c':
3659 s++;
3660 if (s < send) {
3661 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3662 }
3663 else {
3664 yyerror("Missing control char name in \\c");
3665 }
3666 continue;
3667
3668 /* printf-style backslashes, formfeeds, newlines, etc */
3669 case 'b':
3670 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3671 break;
3672 case 'n':
3673 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3674 break;
3675 case 'r':
3676 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3677 break;
3678 case 'f':
3679 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3680 break;
3681 case 't':
3682 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3683 break;
3684 case 'e':
3685 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3686 break;
3687 case 'a':
3688 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3689 break;
3690 } /* end switch */
3691
3692 s++;
3693 continue;
3694 } /* end if (backslash) */
3695#ifdef EBCDIC
3696 else
3697 literal_endpoint++;
3698#endif
3699
3700 default_action:
3701 /* If we started with encoded form, or already know we want it,
3702 then encode the next character */
3703 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3704 STRLEN len = 1;
3705
3706
3707 /* One might think that it is wasted effort in the case of the
3708 * source being utf8 (this_utf8 == TRUE) to take the next character
3709 * in the source, convert it to an unsigned value, and then convert
3710 * it back again. But the source has not been validated here. The
3711 * routine that does the conversion checks for errors like
3712 * malformed utf8 */
3713
3714 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3715 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3716 if (!has_utf8) {
3717 SvCUR_set(sv, d - SvPVX_const(sv));
3718 SvPOK_on(sv);
3719 *d = '\0';
3720 /* See Note on sizing above. */
3721 sv_utf8_upgrade_flags_grow(sv,
3722 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3723 need + (STRLEN)(send - s) + 1);
3724 d = SvPVX(sv) + SvCUR(sv);
3725 has_utf8 = TRUE;
3726 } else if (need > len) {
3727 /* encoded value larger than old, may need extra space (NOTE:
3728 * SvCUR() is not set correctly here). See Note on sizing
3729 * above. */
3730 const STRLEN off = d - SvPVX_const(sv);
3731 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3732 }
3733 s += len;
3734
3735 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3736#ifdef EBCDIC
3737 if (uv > 255 && !dorange)
3738 native_range = FALSE;
3739#endif
3740 }
3741 else {
3742 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3743 }
3744 } /* while loop to process each character */
3745
3746 /* terminate the string and set up the sv */
3747 *d = '\0';
3748 SvCUR_set(sv, d - SvPVX_const(sv));
3749 if (SvCUR(sv) >= SvLEN(sv))
3750 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3751 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3752
3753 SvPOK_on(sv);
3754 if (PL_encoding && !has_utf8) {
3755 sv_recode_to_utf8(sv, PL_encoding);
3756 if (SvUTF8(sv))
3757 has_utf8 = TRUE;
3758 }
3759 if (has_utf8) {
3760 SvUTF8_on(sv);
3761 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3762 PL_sublex_info.sub_op->op_private |=
3763 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3764 }
3765 }
3766
3767 /* shrink the sv if we allocated more than we used */
3768 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3769 SvPV_shrink_to_cur(sv);
3770 }
3771
3772 /* return the substring (via pl_yylval) only if we parsed anything */
3773 if (s > PL_bufptr) {
3774 SvREFCNT_inc_simple_void_NN(sv);
3775 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3776 && ! PL_parser->lex_re_reparsing)
3777 {
3778 const char *const key = PL_lex_inpat ? "qr" : "q";
3779 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3780 const char *type;
3781 STRLEN typelen;
3782
3783 if (PL_lex_inwhat == OP_TRANS) {
3784 type = "tr";
3785 typelen = 2;
3786 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3787 type = "s";
3788 typelen = 1;
3789 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3790 type = "q";
3791 typelen = 1;
3792 } else {
3793 type = "qq";
3794 typelen = 2;
3795 }
3796
3797 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3798 type, typelen);
3799 }
3800 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3801 }
3802 LEAVE_with_name("scan_const");
3803 return s;
3804}
3805
3806/* S_intuit_more
3807 * Returns TRUE if there's more to the expression (e.g., a subscript),
3808 * FALSE otherwise.
3809 *
3810 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3811 *
3812 * ->[ and ->{ return TRUE
3813 * { and [ outside a pattern are always subscripts, so return TRUE
3814 * if we're outside a pattern and it's not { or [, then return FALSE
3815 * if we're in a pattern and the first char is a {
3816 * {4,5} (any digits around the comma) returns FALSE
3817 * if we're in a pattern and the first char is a [
3818 * [] returns FALSE
3819 * [SOMETHING] has a funky algorithm to decide whether it's a
3820 * character class or not. It has to deal with things like
3821 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3822 * anything else returns TRUE
3823 */
3824
3825/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3826
3827STATIC int
3828S_intuit_more(pTHX_ char *s)
3829{
3830 dVAR;
3831
3832 PERL_ARGS_ASSERT_INTUIT_MORE;
3833
3834 if (PL_lex_brackets)
3835 return TRUE;
3836 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3837 return TRUE;
3838 if (*s != '{' && *s != '[')
3839 return FALSE;
3840 if (!PL_lex_inpat)
3841 return TRUE;
3842
3843 /* In a pattern, so maybe we have {n,m}. */
3844 if (*s == '{') {
3845 if (regcurly(s, FALSE)) {
3846 return FALSE;
3847 }
3848 return TRUE;
3849 }
3850
3851 /* On the other hand, maybe we have a character class */
3852
3853 s++;
3854 if (*s == ']' || *s == '^')
3855 return FALSE;
3856 else {
3857 /* this is terrifying, and it works */
3858 int weight;
3859 char seen[256];
3860 const char * const send = strchr(s,']');
3861 unsigned char un_char, last_un_char;
3862 char tmpbuf[sizeof PL_tokenbuf * 4];
3863
3864 if (!send) /* has to be an expression */
3865 return TRUE;
3866 weight = 2; /* let's weigh the evidence */
3867
3868 if (*s == '$')
3869 weight -= 3;
3870 else if (isDIGIT(*s)) {
3871 if (s[1] != ']') {
3872 if (isDIGIT(s[1]) && s[2] == ']')
3873 weight -= 10;
3874 }
3875 else
3876 weight -= 100;
3877 }
3878 Zero(seen,256,char);
3879 un_char = 255;
3880 for (; s < send; s++) {
3881 last_un_char = un_char;
3882 un_char = (unsigned char)*s;
3883 switch (*s) {
3884 case '@':
3885 case '&':
3886 case '$':
3887 weight -= seen[un_char] * 10;
3888 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3889 int len;
3890 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3891 len = (int)strlen(tmpbuf);
3892 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3893 UTF ? SVf_UTF8 : 0, SVt_PV))
3894 weight -= 100;
3895 else
3896 weight -= 10;
3897 }
3898 else if (*s == '$' && s[1] &&
3899 strchr("[#!%*<>()-=",s[1])) {
3900 if (/*{*/ strchr("])} =",s[2]))
3901 weight -= 10;
3902 else
3903 weight -= 1;
3904 }
3905 break;
3906 case '\\':
3907 un_char = 254;
3908 if (s[1]) {
3909 if (strchr("wds]",s[1]))
3910 weight += 100;
3911 else if (seen[(U8)'\''] || seen[(U8)'"'])
3912 weight += 1;
3913 else if (strchr("rnftbxcav",s[1]))
3914 weight += 40;
3915 else if (isDIGIT(s[1])) {
3916 weight += 40;
3917 while (s[1] && isDIGIT(s[1]))
3918 s++;
3919 }
3920 }
3921 else
3922 weight += 100;
3923 break;
3924 case '-':
3925 if (s[1] == '\\')
3926 weight += 50;
3927 if (strchr("aA01! ",last_un_char))
3928 weight += 30;
3929 if (strchr("zZ79~",s[1]))
3930 weight += 30;
3931 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3932 weight -= 5; /* cope with negative subscript */
3933 break;
3934 default:
3935 if (!isWORDCHAR(last_un_char)
3936 && !(last_un_char == '$' || last_un_char == '@'
3937 || last_un_char == '&')
3938 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3939 char *d = tmpbuf;
3940 while (isALPHA(*s))
3941 *d++ = *s++;
3942 *d = '\0';
3943 if (keyword(tmpbuf, d - tmpbuf, 0))
3944 weight -= 150;
3945 }
3946 if (un_char == last_un_char + 1)
3947 weight += 5;
3948 weight -= seen[un_char];
3949 break;
3950 }
3951 seen[un_char]++;
3952 }
3953 if (weight >= 0) /* probably a character class */
3954 return FALSE;
3955 }
3956
3957 return TRUE;
3958}
3959
3960/*
3961 * S_intuit_method
3962 *
3963 * Does all the checking to disambiguate
3964 * foo bar
3965 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3966 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3967 *
3968 * First argument is the stuff after the first token, e.g. "bar".
3969 *
3970 * Not a method if foo is a filehandle.
3971 * Not a method if foo is a subroutine prototyped to take a filehandle.
3972 * Not a method if it's really "Foo $bar"
3973 * Method if it's "foo $bar"
3974 * Not a method if it's really "print foo $bar"
3975 * Method if it's really "foo package::" (interpreted as package->foo)
3976 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3977 * Not a method if bar is a filehandle or package, but is quoted with
3978 * =>
3979 */
3980
3981STATIC int
3982S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3983{
3984 dVAR;
3985 char *s = start + (*start == '$');
3986 char tmpbuf[sizeof PL_tokenbuf];
3987 STRLEN len;
3988 GV* indirgv;
3989#ifdef PERL_MAD
3990 int soff;
3991#endif
3992
3993 PERL_ARGS_ASSERT_INTUIT_METHOD;
3994
3995 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3996 return 0;
3997 if (cv && SvPOK(cv)) {
3998 const char *proto = CvPROTO(cv);
3999 if (proto) {
4000 while (*proto && (isSPACE(*proto) || *proto == ';'))
4001 proto++;
4002 if (*proto == '*')
4003 return 0;
4004 }
4005 }
4006
4007 if (*start == '$') {
4008 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4009 isUPPER(*PL_tokenbuf))
4010 return 0;
4011#ifdef PERL_MAD
4012 len = start - SvPVX(PL_linestr);
4013#endif
4014 s = PEEKSPACE(s);
4015#ifdef PERL_MAD
4016 start = SvPVX(PL_linestr) + len;
4017#endif
4018 PL_bufptr = start;
4019 PL_expect = XREF;
4020 return *s == '(' ? FUNCMETH : METHOD;
4021 }
4022
4023 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4024 /* start is the beginning of the possible filehandle/object,
4025 * and s is the end of it
4026 * tmpbuf is a copy of it (but with single quotes as double colons)
4027 */
4028
4029 if (!keyword(tmpbuf, len, 0)) {
4030 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4031 len -= 2;
4032 tmpbuf[len] = '\0';
4033#ifdef PERL_MAD
4034 soff = s - SvPVX(PL_linestr);
4035#endif
4036 goto bare_package;
4037 }
4038 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4039 if (indirgv && GvCVu(indirgv))
4040 return 0;
4041 /* filehandle or package name makes it a method */
4042 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4043#ifdef PERL_MAD
4044 soff = s - SvPVX(PL_linestr);
4045#endif
4046 s = PEEKSPACE(s);
4047 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4048 return 0; /* no assumptions -- "=>" quotes bareword */
4049 bare_package:
4050 start_force(PL_curforce);
4051 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4052 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4053 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4054 if (PL_madskills)
4055 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4056 ( UTF ? SVf_UTF8 : 0 )));
4057 PL_expect = XTERM;
4058 force_next(WORD);
4059 PL_bufptr = s;
4060#ifdef PERL_MAD
4061 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4062#endif
4063 return *s == '(' ? FUNCMETH : METHOD;
4064 }
4065 }
4066 return 0;
4067}
4068
4069/* Encoded script support. filter_add() effectively inserts a
4070 * 'pre-processing' function into the current source input stream.
4071 * Note that the filter function only applies to the current source file
4072 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4073 *
4074 * The datasv parameter (which may be NULL) can be used to pass
4075 * private data to this instance of the filter. The filter function
4076 * can recover the SV using the FILTER_DATA macro and use it to
4077 * store private buffers and state information.
4078 *
4079 * The supplied datasv parameter is upgraded to a PVIO type
4080 * and the IoDIRP/IoANY field is used to store the function pointer,
4081 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4082 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4083 * private use must be set using malloc'd pointers.
4084 */
4085
4086SV *
4087Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4088{
4089 dVAR;
4090 if (!funcp)
4091 return NULL;
4092
4093 if (!PL_parser)
4094 return NULL;
4095
4096 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4097 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4098
4099 if (!PL_rsfp_filters)
4100 PL_rsfp_filters = newAV();
4101 if (!datasv)
4102 datasv = newSV(0);
4103 SvUPGRADE(datasv, SVt_PVIO);
4104 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4105 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4106 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4107 FPTR2DPTR(void *, IoANY(datasv)),
4108 SvPV_nolen(datasv)));
4109 av_unshift(PL_rsfp_filters, 1);
4110 av_store(PL_rsfp_filters, 0, datasv) ;
4111 if (
4112 !PL_parser->filtered
4113 && PL_parser->lex_flags & LEX_EVALBYTES
4114 && PL_bufptr < PL_bufend
4115 ) {
4116 const char *s = PL_bufptr;
4117 while (s < PL_bufend) {
4118 if (*s == '\n') {
4119 SV *linestr = PL_parser->linestr;
4120 char *buf = SvPVX(linestr);
4121 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4122 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4123 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4124 STRLEN const linestart_pos = PL_parser->linestart - buf;
4125 STRLEN const last_uni_pos =
4126 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4127 STRLEN const last_lop_pos =
4128 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4129 av_push(PL_rsfp_filters, linestr);
4130 PL_parser->linestr =
4131 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4132 buf = SvPVX(PL_parser->linestr);
4133 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4134 PL_parser->bufptr = buf + bufptr_pos;
4135 PL_parser->oldbufptr = buf + oldbufptr_pos;
4136 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4137 PL_parser->linestart = buf + linestart_pos;
4138 if (PL_parser->last_uni)
4139 PL_parser->last_uni = buf + last_uni_pos;
4140 if (PL_parser->last_lop)
4141 PL_parser->last_lop = buf + last_lop_pos;
4142 SvLEN(linestr) = SvCUR(linestr);
4143 SvCUR(linestr) = s-SvPVX(linestr);
4144 PL_parser->filtered = 1;
4145 break;
4146 }
4147 s++;
4148 }
4149 }
4150 return(datasv);
4151}
4152
4153
4154/* Delete most recently added instance of this filter function. */
4155void
4156Perl_filter_del(pTHX_ filter_t funcp)
4157{
4158 dVAR;
4159 SV *datasv;
4160
4161 PERL_ARGS_ASSERT_FILTER_DEL;
4162
4163#ifdef DEBUGGING
4164 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4165 FPTR2DPTR(void*, funcp)));
4166#endif
4167 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4168 return;
4169 /* if filter is on top of stack (usual case) just pop it off */
4170 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4171 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4172 sv_free(av_pop(PL_rsfp_filters));
4173
4174 return;
4175 }
4176 /* we need to search for the correct entry and clear it */
4177 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4178}
4179
4180
4181/* Invoke the idxth filter function for the current rsfp. */
4182/* maxlen 0 = read one text line */
4183I32
4184Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4185{
4186 dVAR;
4187 filter_t funcp;
4188 SV *datasv = NULL;
4189 /* This API is bad. It should have been using unsigned int for maxlen.
4190 Not sure if we want to change the API, but if not we should sanity
4191 check the value here. */
4192 unsigned int correct_length
4193 = maxlen < 0 ?
4194#ifdef PERL_MICRO
4195 0x7FFFFFFF
4196#else
4197 INT_MAX
4198#endif
4199 : maxlen;
4200
4201 PERL_ARGS_ASSERT_FILTER_READ;
4202
4203 if (!PL_parser || !PL_rsfp_filters)
4204 return -1;
4205 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4206 /* Provide a default input filter to make life easy. */
4207 /* Note that we append to the line. This is handy. */
4208 DEBUG_P(PerlIO_printf(Perl_debug_log,
4209 "filter_read %d: from rsfp\n", idx));
4210 if (correct_length) {
4211 /* Want a block */
4212 int len ;
4213 const int old_len = SvCUR(buf_sv);
4214
4215 /* ensure buf_sv is large enough */
4216 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4217 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4218 correct_length)) <= 0) {
4219 if (PerlIO_error(PL_rsfp))
4220 return -1; /* error */
4221 else
4222 return 0 ; /* end of file */
4223 }
4224 SvCUR_set(buf_sv, old_len + len) ;
4225 SvPVX(buf_sv)[old_len + len] = '\0';
4226 } else {
4227 /* Want a line */
4228 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4229 if (PerlIO_error(PL_rsfp))
4230 return -1; /* error */
4231 else
4232 return 0 ; /* end of file */
4233 }
4234 }
4235 return SvCUR(buf_sv);
4236 }
4237 /* Skip this filter slot if filter has been deleted */
4238 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4239 DEBUG_P(PerlIO_printf(Perl_debug_log,
4240 "filter_read %d: skipped (filter deleted)\n",
4241 idx));
4242 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4243 }
4244 if (SvTYPE(datasv) != SVt_PVIO) {
4245 if (correct_length) {
4246 /* Want a block */
4247 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4248 if (!remainder) return 0; /* eof */
4249 if (correct_length > remainder) correct_length = remainder;
4250 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4251 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4252 } else {
4253 /* Want a line */
4254 const char *s = SvEND(datasv);
4255 const char *send = SvPVX(datasv) + SvLEN(datasv);
4256 while (s < send) {
4257 if (*s == '\n') {
4258 s++;
4259 break;
4260 }
4261 s++;
4262 }
4263 if (s == send) return 0; /* eof */
4264 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4265 SvCUR_set(datasv, s-SvPVX(datasv));
4266 }
4267 return SvCUR(buf_sv);
4268 }
4269 /* Get function pointer hidden within datasv */
4270 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4271 DEBUG_P(PerlIO_printf(Perl_debug_log,
4272 "filter_read %d: via function %p (%s)\n",
4273 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4274 /* Call function. The function is expected to */
4275 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4276 /* Return: <0:error, =0:eof, >0:not eof */
4277 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4278}
4279
4280STATIC char *
4281S_filter_gets(pTHX_ SV *sv, STRLEN append)
4282{
4283 dVAR;
4284
4285 PERL_ARGS_ASSERT_FILTER_GETS;
4286
4287#ifdef PERL_CR_FILTER
4288 if (!PL_rsfp_filters) {
4289 filter_add(S_cr_textfilter,NULL);
4290 }
4291#endif
4292 if (PL_rsfp_filters) {
4293 if (!append)
4294 SvCUR_set(sv, 0); /* start with empty line */
4295 if (FILTER_READ(0, sv, 0) > 0)
4296 return ( SvPVX(sv) ) ;
4297 else
4298 return NULL ;
4299 }
4300 else
4301 return (sv_gets(sv, PL_rsfp, append));
4302}
4303
4304STATIC HV *
4305S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4306{
4307 dVAR;
4308 GV *gv;
4309
4310 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4311
4312 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4313 return PL_curstash;
4314
4315 if (len > 2 &&
4316 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4317 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4318 {
4319 return GvHV(gv); /* Foo:: */
4320 }
4321
4322 /* use constant CLASS => 'MyClass' */
4323 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4324 if (gv && GvCV(gv)) {
4325 SV * const sv = cv_const_sv(GvCV(gv));
4326 if (sv)
4327 pkgname = SvPV_const(sv, len);
4328 }
4329
4330 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4331}
4332
4333/*
4334 * S_readpipe_override
4335 * Check whether readpipe() is overridden, and generates the appropriate
4336 * optree, provided sublex_start() is called afterwards.
4337 */
4338STATIC void
4339S_readpipe_override(pTHX)
4340{
4341 GV **gvp;
4342 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4343 pl_yylval.ival = OP_BACKTICK;
4344 if ((gv_readpipe
4345 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4346 ||
4347 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4348 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4349 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4350 {
4351 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4352 op_append_elem(OP_LIST,
4353 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4354 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4355 }
4356}
4357
4358#ifdef PERL_MAD
4359 /*
4360 * Perl_madlex
4361 * The intent of this yylex wrapper is to minimize the changes to the
4362 * tokener when we aren't interested in collecting madprops. It remains
4363 * to be seen how successful this strategy will be...
4364 */
4365
4366int
4367Perl_madlex(pTHX)
4368{
4369 int optype;
4370 char *s = PL_bufptr;
4371
4372 /* make sure PL_thiswhite is initialized */
4373 PL_thiswhite = 0;
4374 PL_thismad = 0;
4375
4376 /* previous token ate up our whitespace? */
4377 if (!PL_lasttoke && PL_nextwhite) {
4378 PL_thiswhite = PL_nextwhite;
4379 PL_nextwhite = 0;
4380 }
4381
4382 /* isolate the token, and figure out where it is without whitespace */
4383 PL_realtokenstart = -1;
4384 PL_thistoken = 0;
4385 optype = yylex();
4386 s = PL_bufptr;
4387 assert(PL_curforce < 0);
4388
4389 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4390 if (!PL_thistoken) {
4391 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4392 PL_thistoken = newSVpvs("");
4393 else {
4394 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4395 PL_thistoken = newSVpvn(tstart, s - tstart);
4396 }
4397 }
4398 if (PL_thismad) /* install head */
4399 CURMAD('X', PL_thistoken);
4400 }
4401
4402 /* last whitespace of a sublex? */
4403 if (optype == ')' && PL_endwhite) {
4404 CURMAD('X', PL_endwhite);
4405 }
4406
4407 if (!PL_thismad) {
4408
4409 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4410 if (!PL_thiswhite && !PL_endwhite && !optype) {
4411 sv_free(PL_thistoken);
4412 PL_thistoken = 0;
4413 return 0;
4414 }
4415
4416 /* put off final whitespace till peg */
4417 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4418 PL_nextwhite = PL_thiswhite;
4419 PL_thiswhite = 0;
4420 }
4421 else if (PL_thisopen) {
4422 CURMAD('q', PL_thisopen);
4423 if (PL_thistoken)
4424 sv_free(PL_thistoken);
4425 PL_thistoken = 0;
4426 }
4427 else {
4428 /* Store actual token text as madprop X */
4429 CURMAD('X', PL_thistoken);
4430 }
4431
4432 if (PL_thiswhite) {
4433 /* add preceding whitespace as madprop _ */
4434 CURMAD('_', PL_thiswhite);
4435 }
4436
4437 if (PL_thisstuff) {
4438 /* add quoted material as madprop = */
4439 CURMAD('=', PL_thisstuff);
4440 }
4441
4442 if (PL_thisclose) {
4443 /* add terminating quote as madprop Q */
4444 CURMAD('Q', PL_thisclose);
4445 }
4446 }
4447
4448 /* special processing based on optype */
4449
4450 switch (optype) {
4451
4452 /* opval doesn't need a TOKEN since it can already store mp */
4453 case WORD:
4454 case METHOD:
4455 case FUNCMETH:
4456 case THING:
4457 case PMFUNC:
4458 case PRIVATEREF:
4459 case FUNC0SUB:
4460 case UNIOPSUB:
4461 case LSTOPSUB:
4462 if (pl_yylval.opval)
4463 append_madprops(PL_thismad, pl_yylval.opval, 0);
4464 PL_thismad = 0;
4465 return optype;
4466
4467 /* fake EOF */
4468 case 0:
4469 optype = PEG;
4470 if (PL_endwhite) {
4471 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4472 PL_endwhite = 0;
4473 }
4474 break;
4475
4476 /* pval */
4477 case LABEL:
4478 break;
4479
4480 case ']':
4481 case '}':
4482 if (PL_faketokens)
4483 break;
4484 /* remember any fake bracket that lexer is about to discard */
4485 if (PL_lex_brackets == 1 &&
4486 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4487 {
4488 s = PL_bufptr;
4489 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4490 s++;
4491 if (*s == '}') {
4492 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4493 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4494 PL_thiswhite = 0;
4495 PL_bufptr = s - 1;
4496 break; /* don't bother looking for trailing comment */
4497 }
4498 else
4499 s = PL_bufptr;
4500 }
4501 if (optype == ']')
4502 break;
4503 /* FALLTHROUGH */
4504
4505 /* attach a trailing comment to its statement instead of next token */
4506 case ';':
4507 if (PL_faketokens)
4508 break;
4509 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4510 s = PL_bufptr;
4511 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4512 s++;
4513 if (*s == '\n' || *s == '#') {
4514 while (s < PL_bufend && *s != '\n')
4515 s++;
4516 if (s < PL_bufend)
4517 s++;
4518 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4519 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4520 PL_thiswhite = 0;
4521 PL_bufptr = s;
4522 }
4523 }
4524 break;
4525
4526 /* ival */
4527 default:
4528 break;
4529
4530 }
4531
4532 /* Create new token struct. Note: opvals return early above. */
4533 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4534 PL_thismad = 0;
4535 return optype;
4536}
4537#endif
4538
4539STATIC char *
4540S_tokenize_use(pTHX_ int is_use, char *s) {
4541 dVAR;
4542
4543 PERL_ARGS_ASSERT_TOKENIZE_USE;
4544
4545 if (PL_expect != XSTATE)
4546 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4547 is_use ? "use" : "no"));
4548 PL_expect = XTERM;
4549 s = SKIPSPACE1(s);
4550 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4551 s = force_version(s, TRUE);
4552 if (*s == ';' || *s == '}'
4553 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4554 start_force(PL_curforce);
4555 NEXTVAL_NEXTTOKE.opval = NULL;
4556 force_next(WORD);
4557 }
4558 else if (*s == 'v') {
4559 s = force_word(s,WORD,FALSE,TRUE);
4560 s = force_version(s, FALSE);
4561 }
4562 }
4563 else {
4564 s = force_word(s,WORD,FALSE,TRUE);
4565 s = force_version(s, FALSE);
4566 }
4567 pl_yylval.ival = is_use;
4568 return s;
4569}
4570#ifdef DEBUGGING
4571 static const char* const exp_name[] =
4572 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4573 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4574 };
4575#endif
4576
4577#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4578STATIC bool
4579S_word_takes_any_delimeter(char *p, STRLEN len)
4580{
4581 return (len == 1 && strchr("msyq", p[0])) ||
4582 (len == 2 && (
4583 (p[0] == 't' && p[1] == 'r') ||
4584 (p[0] == 'q' && strchr("qwxr", p[1]))));
4585}
4586
4587/*
4588 yylex
4589
4590 Works out what to call the token just pulled out of the input
4591 stream. The yacc parser takes care of taking the ops we return and
4592 stitching them into a tree.
4593
4594 Returns:
4595 The type of the next token
4596
4597 Structure:
4598 Switch based on the current state:
4599 - if we already built the token before, use it
4600 - if we have a case modifier in a string, deal with that
4601 - handle other cases of interpolation inside a string
4602 - scan the next line if we are inside a format
4603 In the normal state switch on the next character:
4604 - default:
4605 if alphabetic, go to key lookup
4606 unrecoginized character - croak
4607 - 0/4/26: handle end-of-line or EOF
4608 - cases for whitespace
4609 - \n and #: handle comments and line numbers
4610 - various operators, brackets and sigils
4611 - numbers
4612 - quotes
4613 - 'v': vstrings (or go to key lookup)
4614 - 'x' repetition operator (or go to key lookup)
4615 - other ASCII alphanumerics (key lookup begins here):
4616 word before => ?
4617 keyword plugin
4618 scan built-in keyword (but do nothing with it yet)
4619 check for statement label
4620 check for lexical subs
4621 goto just_a_word if there is one
4622 see whether built-in keyword is overridden
4623 switch on keyword number:
4624 - default: just_a_word:
4625 not a built-in keyword; handle bareword lookup
4626 disambiguate between method and sub call
4627 fall back to bareword
4628 - cases for built-in keywords
4629*/
4630
4631
4632#ifdef __SC__
4633#pragma segment Perl_yylex
4634#endif
4635int
4636Perl_yylex(pTHX)
4637{
4638 dVAR;
4639 char *s = PL_bufptr;
4640 char *d;
4641 STRLEN len;
4642 bool bof = FALSE;
4643 U8 formbrack = 0;
4644 U32 fake_eof = 0;
4645
4646 /* orig_keyword, gvp, and gv are initialized here because
4647 * jump to the label just_a_word_zero can bypass their
4648 * initialization later. */
4649 I32 orig_keyword = 0;
4650 GV *gv = NULL;
4651 GV **gvp = NULL;
4652
4653 DEBUG_T( {
4654 SV* tmp = newSVpvs("");
4655 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4656 (IV)CopLINE(PL_curcop),
4657 lex_state_names[PL_lex_state],
4658 exp_name[PL_expect],
4659 pv_display(tmp, s, strlen(s), 0, 60));
4660 SvREFCNT_dec(tmp);
4661 } );
4662
4663 switch (PL_lex_state) {
4664#ifdef COMMENTARY
4665 case LEX_NORMAL: /* Some compilers will produce faster */
4666 case LEX_INTERPNORMAL: /* code if we comment these out. */
4667 break;
4668#endif
4669
4670 /* when we've already built the next token, just pull it out of the queue */
4671 case LEX_KNOWNEXT:
4672#ifdef PERL_MAD
4673 PL_lasttoke--;
4674 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4675 if (PL_madskills) {
4676 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4677 PL_nexttoke[PL_lasttoke].next_mad = 0;
4678 if (PL_thismad && PL_thismad->mad_key == '_') {
4679 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4680 PL_thismad->mad_val = 0;
4681 mad_free(PL_thismad);
4682 PL_thismad = 0;
4683 }
4684 }
4685 if (!PL_lasttoke) {
4686 PL_lex_state = PL_lex_defer;
4687 PL_expect = PL_lex_expect;
4688 PL_lex_defer = LEX_NORMAL;
4689 if (!PL_nexttoke[PL_lasttoke].next_type)
4690 return yylex();
4691 }
4692#else
4693 PL_nexttoke--;
4694 pl_yylval = PL_nextval[PL_nexttoke];
4695 if (!PL_nexttoke) {
4696 PL_lex_state = PL_lex_defer;
4697 PL_expect = PL_lex_expect;
4698 PL_lex_defer = LEX_NORMAL;
4699 }
4700#endif
4701 {
4702 I32 next_type;
4703#ifdef PERL_MAD
4704 next_type = PL_nexttoke[PL_lasttoke].next_type;
4705#else
4706 next_type = PL_nexttype[PL_nexttoke];
4707#endif
4708 if (next_type & (7<<24)) {
4709 if (next_type & (1<<24)) {
4710 if (PL_lex_brackets > 100)
4711 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4712 PL_lex_brackstack[PL_lex_brackets++] =
4713 (char) ((next_type >> 16) & 0xff);
4714 }
4715 if (next_type & (2<<24))
4716 PL_lex_allbrackets++;
4717 if (next_type & (4<<24))
4718 PL_lex_allbrackets--;
4719 next_type &= 0xffff;
4720 }
4721 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4722 }
4723
4724 /* interpolated case modifiers like \L \U, including \Q and \E.
4725 when we get here, PL_bufptr is at the \
4726 */
4727 case LEX_INTERPCASEMOD:
4728#ifdef DEBUGGING
4729 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4730 Perl_croak(aTHX_
4731 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4732 PL_bufptr, PL_bufend, *PL_bufptr);
4733#endif
4734 /* handle \E or end of string */
4735 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4736 /* if at a \E */
4737 if (PL_lex_casemods) {
4738 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4739 PL_lex_casestack[PL_lex_casemods] = '\0';
4740
4741 if (PL_bufptr != PL_bufend
4742 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4743 || oldmod == 'F')) {
4744 PL_bufptr += 2;
4745 PL_lex_state = LEX_INTERPCONCAT;
4746#ifdef PERL_MAD
4747 if (PL_madskills)
4748 PL_thistoken = newSVpvs("\\E");
4749#endif
4750 }
4751 PL_lex_allbrackets--;
4752 return REPORT(')');
4753 }
4754 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4755 /* Got an unpaired \E */
4756 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4757 "Useless use of \\E");
4758 }
4759#ifdef PERL_MAD
4760 while (PL_bufptr != PL_bufend &&
4761 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4762 if (PL_madskills) {
4763 if (!PL_thiswhite)
4764 PL_thiswhite = newSVpvs("");
4765 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4766 }
4767 PL_bufptr += 2;
4768 }
4769#else
4770 if (PL_bufptr != PL_bufend)
4771 PL_bufptr += 2;
4772#endif
4773 PL_lex_state = LEX_INTERPCONCAT;
4774 return yylex();
4775 }
4776 else {
4777 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4778 "### Saw case modifier\n"); });
4779 s = PL_bufptr + 1;
4780 if (s[1] == '\\' && s[2] == 'E') {
4781#ifdef PERL_MAD
4782 if (PL_madskills) {
4783 if (!PL_thiswhite)
4784 PL_thiswhite = newSVpvs("");
4785 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4786 }
4787#endif
4788 PL_bufptr = s + 3;
4789 PL_lex_state = LEX_INTERPCONCAT;
4790 return yylex();
4791 }
4792 else {
4793 I32 tmp;
4794 if (!PL_madskills) /* when just compiling don't need correct */
4795 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4796 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4797 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4798 (strchr(PL_lex_casestack, 'L')
4799 || strchr(PL_lex_casestack, 'U')
4800 || strchr(PL_lex_casestack, 'F'))) {
4801 PL_lex_casestack[--PL_lex_casemods] = '\0';
4802 PL_lex_allbrackets--;
4803 return REPORT(')');
4804 }
4805 if (PL_lex_casemods > 10)
4806 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4807 PL_lex_casestack[PL_lex_casemods++] = *s;
4808 PL_lex_casestack[PL_lex_casemods] = '\0';
4809 PL_lex_state = LEX_INTERPCONCAT;
4810 start_force(PL_curforce);
4811 NEXTVAL_NEXTTOKE.ival = 0;
4812 force_next((2<<24)|'(');
4813 start_force(PL_curforce);
4814 if (*s == 'l')
4815 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4816 else if (*s == 'u')
4817 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4818 else if (*s == 'L')
4819 NEXTVAL_NEXTTOKE.ival = OP_LC;
4820 else if (*s == 'U')
4821 NEXTVAL_NEXTTOKE.ival = OP_UC;
4822 else if (*s == 'Q')
4823 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4824 else if (*s == 'F')
4825 NEXTVAL_NEXTTOKE.ival = OP_FC;
4826 else
4827 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4828 if (PL_madskills) {
4829 SV* const tmpsv = newSVpvs("\\ ");
4830 /* replace the space with the character we want to escape
4831 */
4832 SvPVX(tmpsv)[1] = *s;
4833 curmad('_', tmpsv);
4834 }
4835 PL_bufptr = s + 1;
4836 }
4837 force_next(FUNC);
4838 if (PL_lex_starts) {
4839 s = PL_bufptr;
4840 PL_lex_starts = 0;
4841#ifdef PERL_MAD
4842 if (PL_madskills) {
4843 if (PL_thistoken)
4844 sv_free(PL_thistoken);
4845 PL_thistoken = newSVpvs("");
4846 }
4847#endif
4848 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4849 if (PL_lex_casemods == 1 && PL_lex_inpat)
4850 OPERATOR(',');
4851 else
4852 Aop(OP_CONCAT);
4853 }
4854 else
4855 return yylex();
4856 }
4857
4858 case LEX_INTERPPUSH:
4859 return REPORT(sublex_push());
4860
4861 case LEX_INTERPSTART:
4862 if (PL_bufptr == PL_bufend)
4863 return REPORT(sublex_done());
4864 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4865 "### Interpolated variable\n"); });
4866 PL_expect = XTERM;
4867 /* for /@a/, we leave the joining for the regex engine to do
4868 * (unless we're within \Q etc) */
4869 PL_lex_dojoin = (*PL_bufptr == '@'
4870 && (!PL_lex_inpat || PL_lex_casemods));
4871 PL_lex_state = LEX_INTERPNORMAL;
4872 if (PL_lex_dojoin) {
4873 start_force(PL_curforce);
4874 NEXTVAL_NEXTTOKE.ival = 0;
4875 force_next(',');
4876 start_force(PL_curforce);
4877 force_ident("\"", '$');
4878 start_force(PL_curforce);
4879 NEXTVAL_NEXTTOKE.ival = 0;
4880 force_next('$');
4881 start_force(PL_curforce);
4882 NEXTVAL_NEXTTOKE.ival = 0;
4883 force_next((2<<24)|'(');
4884 start_force(PL_curforce);
4885 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4886 force_next(FUNC);
4887 }
4888 /* Convert (?{...}) and friends to 'do {...}' */
4889 if (PL_lex_inpat && *PL_bufptr == '(') {
4890 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4891 PL_bufptr += 2;
4892 if (*PL_bufptr != '{')
4893 PL_bufptr++;
4894 start_force(PL_curforce);
4895 /* XXX probably need a CURMAD(something) here */
4896 PL_expect = XTERMBLOCK;
4897 force_next(DO);
4898 }
4899
4900 if (PL_lex_starts++) {
4901 s = PL_bufptr;
4902#ifdef PERL_MAD
4903 if (PL_madskills) {
4904 if (PL_thistoken)
4905 sv_free(PL_thistoken);
4906 PL_thistoken = newSVpvs("");
4907 }
4908#endif
4909 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4910 if (!PL_lex_casemods && PL_lex_inpat)
4911 OPERATOR(',');
4912 else
4913 Aop(OP_CONCAT);
4914 }
4915 return yylex();
4916
4917 case LEX_INTERPENDMAYBE:
4918 if (intuit_more(PL_bufptr)) {
4919 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4920 break;
4921 }
4922 /* FALL THROUGH */
4923
4924 case LEX_INTERPEND:
4925 if (PL_lex_dojoin) {
4926 PL_lex_dojoin = FALSE;
4927 PL_lex_state = LEX_INTERPCONCAT;
4928#ifdef PERL_MAD
4929 if (PL_madskills) {
4930 if (PL_thistoken)
4931 sv_free(PL_thistoken);
4932 PL_thistoken = newSVpvs("");
4933 }
4934#endif
4935 PL_lex_allbrackets--;
4936 return REPORT(')');
4937 }
4938 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4939 && SvEVALED(PL_lex_repl))
4940 {
4941 if (PL_bufptr != PL_bufend)
4942 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4943 PL_lex_repl = NULL;
4944 }
4945 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4946 re_eval_str. If the here-doc body’s length equals the previous
4947 value of re_eval_start, re_eval_start will now be null. So
4948 check re_eval_str as well. */
4949 if (PL_parser->lex_shared->re_eval_start
4950 || PL_parser->lex_shared->re_eval_str) {
4951 SV *sv;
4952 if (*PL_bufptr != ')')
4953 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4954 PL_bufptr++;
4955 /* having compiled a (?{..}) expression, return the original
4956 * text too, as a const */
4957 if (PL_parser->lex_shared->re_eval_str) {
4958 sv = PL_parser->lex_shared->re_eval_str;
4959 PL_parser->lex_shared->re_eval_str = NULL;
4960 SvCUR_set(sv,
4961 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4962 SvPV_shrink_to_cur(sv);
4963 }
4964 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4965 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4966 start_force(PL_curforce);
4967 /* XXX probably need a CURMAD(something) here */
4968 NEXTVAL_NEXTTOKE.opval =
4969 (OP*)newSVOP(OP_CONST, 0,
4970 sv);
4971 force_next(THING);
4972 PL_parser->lex_shared->re_eval_start = NULL;
4973 PL_expect = XTERM;
4974 return REPORT(',');
4975 }
4976
4977 /* FALLTHROUGH */
4978 case LEX_INTERPCONCAT:
4979#ifdef DEBUGGING
4980 if (PL_lex_brackets)
4981 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4982 (long) PL_lex_brackets);
4983#endif
4984 if (PL_bufptr == PL_bufend)
4985 return REPORT(sublex_done());
4986
4987 /* m'foo' still needs to be parsed for possible (?{...}) */
4988 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4989 SV *sv = newSVsv(PL_linestr);
4990 sv = tokeq(sv);
4991 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4992 s = PL_bufend;
4993 }
4994 else {
4995 s = scan_const(PL_bufptr);
4996 if (*s == '\\')
4997 PL_lex_state = LEX_INTERPCASEMOD;
4998 else
4999 PL_lex_state = LEX_INTERPSTART;
5000 }
5001
5002 if (s != PL_bufptr) {
5003 start_force(PL_curforce);
5004 if (PL_madskills) {
5005 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5006 }
5007 NEXTVAL_NEXTTOKE = pl_yylval;
5008 PL_expect = XTERM;
5009 force_next(THING);
5010 if (PL_lex_starts++) {
5011#ifdef PERL_MAD
5012 if (PL_madskills) {
5013 if (PL_thistoken)
5014 sv_free(PL_thistoken);
5015 PL_thistoken = newSVpvs("");
5016 }
5017#endif
5018 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5019 if (!PL_lex_casemods && PL_lex_inpat)
5020 OPERATOR(',');
5021 else
5022 Aop(OP_CONCAT);
5023 }
5024 else {
5025 PL_bufptr = s;
5026 return yylex();
5027 }
5028 }
5029
5030 return yylex();
5031 case LEX_FORMLINE:
5032 s = scan_formline(PL_bufptr);
5033 if (!PL_lex_formbrack)
5034 {
5035 formbrack = 1;
5036 goto rightbracket;
5037 }
5038 PL_bufptr = s;
5039 return yylex();
5040 }
5041
5042 s = PL_bufptr;
5043 PL_oldoldbufptr = PL_oldbufptr;
5044 PL_oldbufptr = s;
5045
5046 retry:
5047#ifdef PERL_MAD
5048 if (PL_thistoken) {
5049 sv_free(PL_thistoken);
5050 PL_thistoken = 0;
5051 }
5052 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5053#endif
5054 switch (*s) {
5055 default:
5056 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5057 goto keylookup;
5058 {
5059 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5060 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5061 UTF8SKIP(s),
5062 SVs_TEMP | SVf_UTF8),
5063 10, UNI_DISPLAY_ISPRINT))
5064 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5065 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5066 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5067 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5068 } else {
5069 d = PL_linestart;
5070 }
5071 *s = '\0';
5072 sv_setpv(dsv, d);
5073 if (UTF)
5074 SvUTF8_on(dsv);
5075 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5076 }
5077 case 4:
5078 case 26:
5079 goto fake_eof; /* emulate EOF on ^D or ^Z */
5080 case 0:
5081#ifdef PERL_MAD
5082 if (PL_madskills)
5083 PL_faketokens = 0;
5084#endif
5085 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5086 PL_last_uni = 0;
5087 PL_last_lop = 0;
5088 if (PL_lex_brackets &&
5089 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5090 yyerror((const char *)
5091 (PL_lex_formbrack
5092 ? "Format not terminated"
5093 : "Missing right curly or square bracket"));
5094 }
5095 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5096 "### Tokener got EOF\n");
5097 } );
5098 TOKEN(0);
5099 }
5100 if (s++ < PL_bufend)
5101 goto retry; /* ignore stray nulls */
5102 PL_last_uni = 0;
5103 PL_last_lop = 0;
5104 if (!PL_in_eval && !PL_preambled) {
5105 PL_preambled = TRUE;
5106#ifdef PERL_MAD
5107 if (PL_madskills)
5108 PL_faketokens = 1;
5109#endif
5110 if (PL_perldb) {
5111 /* Generate a string of Perl code to load the debugger.
5112 * If PERL5DB is set, it will return the contents of that,
5113 * otherwise a compile-time require of perl5db.pl. */
5114
5115 const char * const pdb = PerlEnv_getenv("PERL5DB");
5116
5117 if (pdb) {
5118 sv_setpv(PL_linestr, pdb);
5119 sv_catpvs(PL_linestr,";");
5120 } else {
5121 SETERRNO(0,SS_NORMAL);
5122 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5123 }
5124 } else
5125 sv_setpvs(PL_linestr,"");
5126 if (PL_preambleav) {
5127 SV **svp = AvARRAY(PL_preambleav);
5128 SV **const end = svp + AvFILLp(PL_preambleav);
5129 while(svp <= end) {
5130 sv_catsv(PL_linestr, *svp);
5131 ++svp;
5132 sv_catpvs(PL_linestr, ";");
5133 }
5134 sv_free(MUTABLE_SV(PL_preambleav));
5135 PL_preambleav = NULL;
5136 }
5137 if (PL_minus_E)
5138 sv_catpvs(PL_linestr,
5139 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5140 if (PL_minus_n || PL_minus_p) {
5141 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5142 if (PL_minus_l)
5143 sv_catpvs(PL_linestr,"chomp;");
5144 if (PL_minus_a) {
5145 if (PL_minus_F) {
5146 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5147 || *PL_splitstr == '"')
5148 && strchr(PL_splitstr + 1, *PL_splitstr))
5149 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5150 else {
5151 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5152 bytes can be used as quoting characters. :-) */
5153 const char *splits = PL_splitstr;
5154 sv_catpvs(PL_linestr, "our @F=split(q\0");
5155 do {
5156 /* Need to \ \s */
5157 if (*splits == '\\')
5158 sv_catpvn(PL_linestr, splits, 1);
5159 sv_catpvn(PL_linestr, splits, 1);
5160 } while (*splits++);
5161 /* This loop will embed the trailing NUL of
5162 PL_linestr as the last thing it does before
5163 terminating. */
5164 sv_catpvs(PL_linestr, ");");
5165 }
5166 }
5167 else
5168 sv_catpvs(PL_linestr,"our @F=split(' ');");
5169 }
5170 }
5171 sv_catpvs(PL_linestr, "\n");
5172 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5173 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5174 PL_last_lop = PL_last_uni = NULL;
5175 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5176 update_debugger_info(PL_linestr, NULL, 0);
5177 goto retry;
5178 }
5179 do {
5180 fake_eof = 0;
5181 bof = PL_rsfp ? TRUE : FALSE;
5182 if (0) {
5183 fake_eof:
5184 fake_eof = LEX_FAKE_EOF;
5185 }
5186 PL_bufptr = PL_bufend;
5187 COPLINE_INC_WITH_HERELINES;
5188 if (!lex_next_chunk(fake_eof)) {
5189 CopLINE_dec(PL_curcop);
5190 s = PL_bufptr;
5191 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5192 }
5193 CopLINE_dec(PL_curcop);
5194#ifdef PERL_MAD
5195 if (!PL_rsfp)
5196 PL_realtokenstart = -1;
5197#endif
5198 s = PL_bufptr;
5199 /* If it looks like the start of a BOM or raw UTF-16,
5200 * check if it in fact is. */
5201 if (bof && PL_rsfp &&
5202 (*s == 0 ||
5203 *(U8*)s == 0xEF ||
5204 *(U8*)s >= 0xFE ||
5205 s[1] == 0)) {
5206 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5207 bof = (offset == (Off_t)SvCUR(PL_linestr));
5208#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5209 /* offset may include swallowed CR */
5210 if (!bof)
5211 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5212#endif
5213 if (bof) {
5214 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5215 s = swallow_bom((U8*)s);
5216 }
5217 }
5218 if (PL_parser->in_pod) {
5219 /* Incest with pod. */
5220#ifdef PERL_MAD
5221 if (PL_madskills)
5222 sv_catsv(PL_thiswhite, PL_linestr);
5223#endif
5224 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5225 sv_setpvs(PL_linestr, "");
5226 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5227 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5228 PL_last_lop = PL_last_uni = NULL;
5229 PL_parser->in_pod = 0;
5230 }
5231 }
5232 if (PL_rsfp || PL_parser->filtered)
5233 incline(s);
5234 } while (PL_parser->in_pod);
5235 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5236 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5237 PL_last_lop = PL_last_uni = NULL;
5238 if (CopLINE(PL_curcop) == 1) {
5239 while (s < PL_bufend && isSPACE(*s))
5240 s++;
5241 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5242 s++;
5243#ifdef PERL_MAD
5244 if (PL_madskills)
5245 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5246#endif
5247 d = NULL;
5248 if (!PL_in_eval) {
5249 if (*s == '#' && *(s+1) == '!')
5250 d = s + 2;
5251#ifdef ALTERNATE_SHEBANG
5252 else {
5253 static char const as[] = ALTERNATE_SHEBANG;
5254 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5255 d = s + (sizeof(as) - 1);
5256 }
5257#endif /* ALTERNATE_SHEBANG */
5258 }
5259 if (d) {
5260 char *ipath;
5261 char *ipathend;
5262
5263 while (isSPACE(*d))
5264 d++;
5265 ipath = d;
5266 while (*d && !isSPACE(*d))
5267 d++;
5268 ipathend = d;
5269
5270#ifdef ARG_ZERO_IS_SCRIPT
5271 if (ipathend > ipath) {
5272 /*
5273 * HP-UX (at least) sets argv[0] to the script name,
5274 * which makes $^X incorrect. And Digital UNIX and Linux,
5275 * at least, set argv[0] to the basename of the Perl
5276 * interpreter. So, having found "#!", we'll set it right.
5277 */
5278 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5279 SVt_PV)); /* $^X */
5280 assert(SvPOK(x) || SvGMAGICAL(x));
5281 if (sv_eq(x, CopFILESV(PL_curcop))) {
5282 sv_setpvn(x, ipath, ipathend - ipath);
5283 SvSETMAGIC(x);
5284 }
5285 else {
5286 STRLEN blen;
5287 STRLEN llen;
5288 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5289 const char * const lstart = SvPV_const(x,llen);
5290 if (llen < blen) {
5291 bstart += blen - llen;
5292 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5293 sv_setpvn(x, ipath, ipathend - ipath);
5294 SvSETMAGIC(x);
5295 }
5296 }
5297 }
5298 TAINT_NOT; /* $^X is always tainted, but that's OK */
5299 }
5300#endif /* ARG_ZERO_IS_SCRIPT */
5301
5302 /*
5303 * Look for options.
5304 */
5305 d = instr(s,"perl -");
5306 if (!d) {
5307 d = instr(s,"perl");
5308#if defined(DOSISH)
5309 /* avoid getting into infinite loops when shebang
5310 * line contains "Perl" rather than "perl" */
5311 if (!d) {
5312 for (d = ipathend-4; d >= ipath; --d) {
5313 if ((*d == 'p' || *d == 'P')
5314 && !ibcmp(d, "perl", 4))
5315 {
5316 break;
5317 }
5318 }
5319 if (d < ipath)
5320 d = NULL;
5321 }
5322#endif
5323 }
5324#ifdef ALTERNATE_SHEBANG
5325 /*
5326 * If the ALTERNATE_SHEBANG on this system starts with a
5327 * character that can be part of a Perl expression, then if
5328 * we see it but not "perl", we're probably looking at the
5329 * start of Perl code, not a request to hand off to some
5330 * other interpreter. Similarly, if "perl" is there, but
5331 * not in the first 'word' of the line, we assume the line
5332 * contains the start of the Perl program.
5333 */
5334 if (d && *s != '#') {
5335 const char *c = ipath;
5336 while (*c && !strchr("; \t\r\n\f\v#", *c))
5337 c++;
5338 if (c < d)
5339 d = NULL; /* "perl" not in first word; ignore */
5340 else
5341 *s = '#'; /* Don't try to parse shebang line */
5342 }
5343#endif /* ALTERNATE_SHEBANG */
5344 if (!d &&
5345 *s == '#' &&
5346 ipathend > ipath &&
5347 !PL_minus_c &&
5348 !instr(s,"indir") &&
5349 instr(PL_origargv[0],"perl"))
5350 {
5351 dVAR;
5352 char **newargv;
5353
5354 *ipathend = '\0';
5355 s = ipathend + 1;
5356 while (s < PL_bufend && isSPACE(*s))
5357 s++;
5358 if (s < PL_bufend) {
5359 Newx(newargv,PL_origargc+3,char*);
5360 newargv[1] = s;
5361 while (s < PL_bufend && !isSPACE(*s))
5362 s++;
5363 *s = '\0';
5364 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5365 }
5366 else
5367 newargv = PL_origargv;
5368 newargv[0] = ipath;
5369 PERL_FPU_PRE_EXEC
5370 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5371 PERL_FPU_POST_EXEC
5372 Perl_croak(aTHX_ "Can't exec %s", ipath);
5373 }
5374 if (d) {
5375 while (*d && !isSPACE(*d))
5376 d++;
5377 while (SPACE_OR_TAB(*d))
5378 d++;
5379
5380 if (*d++ == '-') {
5381 const bool switches_done = PL_doswitches;
5382 const U32 oldpdb = PL_perldb;
5383 const bool oldn = PL_minus_n;
5384 const bool oldp = PL_minus_p;
5385 const char *d1 = d;
5386
5387 do {
5388 bool baduni = FALSE;
5389 if (*d1 == 'C') {
5390 const char *d2 = d1 + 1;
5391 if (parse_unicode_opts((const char **)&d2)
5392 != PL_unicode)
5393 baduni = TRUE;
5394 }
5395 if (baduni || *d1 == 'M' || *d1 == 'm') {
5396 const char * const m = d1;
5397 while (*d1 && !isSPACE(*d1))
5398 d1++;
5399 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5400 (int)(d1 - m), m);
5401 }
5402 d1 = moreswitches(d1);
5403 } while (d1);
5404 if (PL_doswitches && !switches_done) {
5405 int argc = PL_origargc;
5406 char **argv = PL_origargv;
5407 do {
5408 argc--,argv++;
5409 } while (argc && argv[0][0] == '-' && argv[0][1]);
5410 init_argv_symbols(argc,argv);
5411 }
5412 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5413 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5414 /* if we have already added "LINE: while (<>) {",
5415 we must not do it again */
5416 {
5417 sv_setpvs(PL_linestr, "");
5418 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5419 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5420 PL_last_lop = PL_last_uni = NULL;
5421 PL_preambled = FALSE;
5422 if (PERLDB_LINE || PERLDB_SAVESRC)
5423 (void)gv_fetchfile(PL_origfilename);
5424 goto retry;
5425 }
5426 }
5427 }
5428 }
5429 }
5430 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5431 PL_lex_state = LEX_FORMLINE;
5432 start_force(PL_curforce);
5433 NEXTVAL_NEXTTOKE.ival = 0;
5434 force_next(FORMRBRACK);
5435 TOKEN(';');
5436 }
5437 goto retry;
5438 case '\r':
5439#ifdef PERL_STRICT_CR
5440 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5441 Perl_croak(aTHX_
5442 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5443#endif
5444 case ' ': case '\t': case '\f': case 013:
5445#ifdef PERL_MAD
5446 PL_realtokenstart = -1;
5447 if (PL_madskills) {
5448 if (!PL_thiswhite)
5449 PL_thiswhite = newSVpvs("");
5450 sv_catpvn(PL_thiswhite, s, 1);
5451 }
5452#endif
5453 s++;
5454 goto retry;
5455 case '#':
5456 case '\n':
5457#ifdef PERL_MAD
5458 PL_realtokenstart = -1;
5459 if (PL_madskills)
5460 PL_faketokens = 0;
5461#endif
5462 if (PL_lex_state != LEX_NORMAL ||
5463 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5464 if (*s == '#' && s == PL_linestart && PL_in_eval
5465 && !PL_rsfp && !PL_parser->filtered) {
5466 /* handle eval qq[#line 1 "foo"\n ...] */
5467 CopLINE_dec(PL_curcop);
5468 incline(s);
5469 }
5470 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5471 s = SKIPSPACE0(s);
5472 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5473 incline(s);
5474 }
5475 else {
5476 const bool in_comment = *s == '#';
5477 d = s;
5478 while (d < PL_bufend && *d != '\n')
5479 d++;
5480 if (d < PL_bufend)
5481 d++;
5482 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5483 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5484 d, PL_bufend);
5485#ifdef PERL_MAD
5486 if (PL_madskills)
5487 PL_thiswhite = newSVpvn(s, d - s);
5488#endif
5489 s = d;
5490 if (in_comment && d == PL_bufend
5491 && PL_lex_state == LEX_INTERPNORMAL
5492 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5493 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5494 else incline(s);
5495 }
5496 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5497 PL_lex_state = LEX_FORMLINE;
5498 start_force(PL_curforce);
5499 NEXTVAL_NEXTTOKE.ival = 0;
5500 force_next(FORMRBRACK);
5501 TOKEN(';');
5502 }
5503 }
5504 else {
5505#ifdef PERL_MAD
5506 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5507 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5508 PL_faketokens = 0;
5509 s = SKIPSPACE0(s);
5510 TOKEN(PEG); /* make sure any #! line is accessible */
5511 }
5512 s = SKIPSPACE0(s);
5513 }
5514 else {
5515/* if (PL_madskills && PL_lex_formbrack) { */
5516 d = s;
5517 while (d < PL_bufend && *d != '\n')
5518 d++;
5519 if (d < PL_bufend)
5520 d++;
5521 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5522 Perl_croak(aTHX_ "panic: input overflow");
5523 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5524 if (!PL_thiswhite)
5525 PL_thiswhite = newSVpvs("");
5526 if (CopLINE(PL_curcop) == 1) {
5527 sv_setpvs(PL_thiswhite, "");
5528 PL_faketokens = 0;
5529 }
5530 sv_catpvn(PL_thiswhite, s, d - s);
5531 }
5532 s = d;
5533/* }
5534 *s = '\0';
5535 PL_bufend = s; */
5536 }
5537#else
5538 *s = '\0';
5539 PL_bufend = s;
5540#endif
5541 }
5542 goto retry;
5543 case '-':
5544 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5545 I32 ftst = 0;
5546 char tmp;
5547
5548 s++;
5549 PL_bufptr = s;
5550 tmp = *s++;
5551
5552 while (s < PL_bufend && SPACE_OR_TAB(*s))
5553 s++;
5554
5555 if (strnEQ(s,"=>",2)) {
5556 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5557 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5558 OPERATOR('-'); /* unary minus */
5559 }
5560 PL_last_uni = PL_oldbufptr;
5561 switch (tmp) {
5562 case 'r': ftst = OP_FTEREAD; break;
5563 case 'w': ftst = OP_FTEWRITE; break;
5564 case 'x': ftst = OP_FTEEXEC; break;
5565 case 'o': ftst = OP_FTEOWNED; break;
5566 case 'R': ftst = OP_FTRREAD; break;
5567 case 'W': ftst = OP_FTRWRITE; break;
5568 case 'X': ftst = OP_FTREXEC; break;
5569 case 'O': ftst = OP_FTROWNED; break;
5570 case 'e': ftst = OP_FTIS; break;
5571 case 'z': ftst = OP_FTZERO; break;
5572 case 's': ftst = OP_FTSIZE; break;
5573 case 'f': ftst = OP_FTFILE; break;
5574 case 'd': ftst = OP_FTDIR; break;
5575 case 'l': ftst = OP_FTLINK; break;
5576 case 'p': ftst = OP_FTPIPE; break;
5577 case 'S': ftst = OP_FTSOCK; break;
5578 case 'u': ftst = OP_FTSUID; break;
5579 case 'g': ftst = OP_FTSGID; break;
5580 case 'k': ftst = OP_FTSVTX; break;
5581 case 'b': ftst = OP_FTBLK; break;
5582 case 'c': ftst = OP_FTCHR; break;
5583 case 't': ftst = OP_FTTTY; break;
5584 case 'T': ftst = OP_FTTEXT; break;
5585 case 'B': ftst = OP_FTBINARY; break;
5586 case 'M': case 'A': case 'C':
5587 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5588 switch (tmp) {
5589 case 'M': ftst = OP_FTMTIME; break;
5590 case 'A': ftst = OP_FTATIME; break;
5591 case 'C': ftst = OP_FTCTIME; break;
5592 default: break;
5593 }
5594 break;
5595 default:
5596 break;
5597 }
5598 if (ftst) {
5599 PL_last_lop_op = (OPCODE)ftst;
5600 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5601 "### Saw file test %c\n", (int)tmp);
5602 } );
5603 FTST(ftst);
5604 }
5605 else {
5606 /* Assume it was a minus followed by a one-letter named
5607 * subroutine call (or a -bareword), then. */
5608 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5609 "### '-%c' looked like a file test but was not\n",
5610 (int) tmp);
5611 } );
5612 s = --PL_bufptr;
5613 }
5614 }
5615 {
5616 const char tmp = *s++;
5617 if (*s == tmp) {
5618 s++;
5619 if (PL_expect == XOPERATOR)
5620 TERM(POSTDEC);
5621 else
5622 OPERATOR(PREDEC);
5623 }
5624 else if (*s == '>') {
5625 s++;
5626 s = SKIPSPACE1(s);
5627 if (isIDFIRST_lazy_if(s,UTF)) {
5628 s = force_word(s,METHOD,FALSE,TRUE);
5629 TOKEN(ARROW);
5630 }
5631 else if (*s == '$')
5632 OPERATOR(ARROW);
5633 else
5634 TERM(ARROW);
5635 }
5636 if (PL_expect == XOPERATOR) {
5637 if (*s == '=' && !PL_lex_allbrackets &&
5638 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5639 s--;
5640 TOKEN(0);
5641 }
5642 Aop(OP_SUBTRACT);
5643 }
5644 else {
5645 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5646 check_uni();
5647 OPERATOR('-'); /* unary minus */
5648 }
5649 }
5650
5651 case '+':
5652 {
5653 const char tmp = *s++;
5654 if (*s == tmp) {
5655 s++;
5656 if (PL_expect == XOPERATOR)
5657 TERM(POSTINC);
5658 else
5659 OPERATOR(PREINC);
5660 }
5661 if (PL_expect == XOPERATOR) {
5662 if (*s == '=' && !PL_lex_allbrackets &&
5663 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5664 s--;
5665 TOKEN(0);
5666 }
5667 Aop(OP_ADD);
5668 }
5669 else {
5670 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5671 check_uni();
5672 OPERATOR('+');
5673 }
5674 }
5675
5676 case '*':
5677 if (PL_expect != XOPERATOR) {
5678 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5679 PL_expect = XOPERATOR;
5680 force_ident(PL_tokenbuf, '*');
5681 if (!*PL_tokenbuf)
5682 PREREF('*');
5683 TERM('*');
5684 }
5685 s++;
5686 if (*s == '*') {
5687 s++;
5688 if (*s == '=' && !PL_lex_allbrackets &&
5689 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5690 s -= 2;
5691 TOKEN(0);
5692 }
5693 PWop(OP_POW);
5694 }
5695 if (*s == '=' && !PL_lex_allbrackets &&
5696 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5697 s--;
5698 TOKEN(0);
5699 }
5700 Mop(OP_MULTIPLY);
5701
5702 case '%':
5703 if (PL_expect == XOPERATOR) {
5704 if (s[1] == '=' && !PL_lex_allbrackets &&
5705 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5706 TOKEN(0);
5707 ++s;
5708 Mop(OP_MODULO);
5709 }
5710 PL_tokenbuf[0] = '%';
5711 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5712 sizeof PL_tokenbuf - 1, FALSE);
5713 if (!PL_tokenbuf[1]) {
5714 PREREF('%');
5715 }
5716 PL_expect = XOPERATOR;
5717 force_ident_maybe_lex('%');
5718 TERM('%');
5719
5720 case '^':
5721 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5722 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5723 TOKEN(0);
5724 s++;
5725 BOop(OP_BIT_XOR);
5726 case '[':
5727 if (PL_lex_brackets > 100)
5728 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5729 PL_lex_brackstack[PL_lex_brackets++] = 0;
5730 PL_lex_allbrackets++;
5731 {
5732 const char tmp = *s++;
5733 OPERATOR(tmp);
5734 }
5735 case '~':
5736 if (s[1] == '~'
5737 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5738 {
5739 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5740 TOKEN(0);
5741 s += 2;
5742 Perl_ck_warner_d(aTHX_
5743 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5744 "Smartmatch is experimental");
5745 Eop(OP_SMARTMATCH);
5746 }
5747 s++;
5748 OPERATOR('~');
5749 case ',':
5750 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5751 TOKEN(0);
5752 s++;
5753 OPERATOR(',');
5754 case ':':
5755 if (s[1] == ':') {
5756 len = 0;
5757 goto just_a_word_zero_gv;
5758 }
5759 s++;
5760 switch (PL_expect) {
5761 OP *attrs;
5762#ifdef PERL_MAD
5763 I32 stuffstart;
5764#endif
5765 case XOPERATOR:
5766 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5767 break;
5768 PL_bufptr = s; /* update in case we back off */
5769 if (*s == '=') {
5770 Perl_croak(aTHX_
5771 "Use of := for an empty attribute list is not allowed");
5772 }
5773 goto grabattrs;
5774 case XATTRBLOCK:
5775 PL_expect = XBLOCK;
5776 goto grabattrs;
5777 case XATTRTERM:
5778 PL_expect = XTERMBLOCK;
5779 grabattrs:
5780#ifdef PERL_MAD
5781 stuffstart = s - SvPVX(PL_linestr) - 1;
5782#endif
5783 s = PEEKSPACE(s);
5784 attrs = NULL;
5785 while (isIDFIRST_lazy_if(s,UTF)) {
5786 I32 tmp;
5787 SV *sv;
5788 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5789 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5790 if (tmp < 0) tmp = -tmp;
5791 switch (tmp) {
5792 case KEY_or:
5793 case KEY_and:
5794 case KEY_for:
5795 case KEY_foreach:
5796 case KEY_unless:
5797 case KEY_if:
5798 case KEY_while:
5799 case KEY_until:
5800 goto got_attrs;
5801 default:
5802 break;
5803 }
5804 }
5805 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5806 if (*d == '(') {
5807 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5808 if (!d) {
5809 /* MUST advance bufptr here to avoid bogus
5810 "at end of line" context messages from yyerror().
5811 */
5812 PL_bufptr = s + len;
5813 yyerror("Unterminated attribute parameter in attribute list");
5814 if (attrs)
5815 op_free(attrs);
5816 sv_free(sv);
5817 return REPORT(0); /* EOF indicator */
5818 }
5819 }
5820 if (PL_lex_stuff) {
5821 sv_catsv(sv, PL_lex_stuff);
5822 attrs = op_append_elem(OP_LIST, attrs,
5823 newSVOP(OP_CONST, 0, sv));
5824 SvREFCNT_dec(PL_lex_stuff);
5825 PL_lex_stuff = NULL;
5826 }
5827 else {
5828 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5829 sv_free(sv);
5830 if (PL_in_my == KEY_our) {
5831 deprecate(":unique");
5832 }
5833 else
5834 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5835 }
5836
5837 /* NOTE: any CV attrs applied here need to be part of
5838 the CVf_BUILTIN_ATTRS define in cv.h! */
5839 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5840 sv_free(sv);
5841 CvLVALUE_on(PL_compcv);
5842 }
5843 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5844 sv_free(sv);
5845 deprecate(":locked");
5846 }
5847 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5848 sv_free(sv);
5849 CvMETHOD_on(PL_compcv);
5850 }
5851 /* After we've set the flags, it could be argued that
5852 we don't need to do the attributes.pm-based setting
5853 process, and shouldn't bother appending recognized
5854 flags. To experiment with that, uncomment the
5855 following "else". (Note that's already been
5856 uncommented. That keeps the above-applied built-in
5857 attributes from being intercepted (and possibly
5858 rejected) by a package's attribute routines, but is
5859 justified by the performance win for the common case
5860 of applying only built-in attributes.) */
5861 else
5862 attrs = op_append_elem(OP_LIST, attrs,
5863 newSVOP(OP_CONST, 0,
5864 sv));
5865 }
5866 s = PEEKSPACE(d);
5867 if (*s == ':' && s[1] != ':')
5868 s = PEEKSPACE(s+1);
5869 else if (s == d)
5870 break; /* require real whitespace or :'s */
5871 /* XXX losing whitespace on sequential attributes here */
5872 }
5873 {
5874 const char tmp
5875 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5876 if (*s != ';' && *s != '}' && *s != tmp
5877 && (tmp != '=' || *s != ')')) {
5878 const char q = ((*s == '\'') ? '"' : '\'');
5879 /* If here for an expression, and parsed no attrs, back
5880 off. */
5881 if (tmp == '=' && !attrs) {
5882 s = PL_bufptr;
5883 break;
5884 }
5885 /* MUST advance bufptr here to avoid bogus "at end of line"
5886 context messages from yyerror().
5887 */
5888 PL_bufptr = s;
5889 yyerror( (const char *)
5890 (*s
5891 ? Perl_form(aTHX_ "Invalid separator character "
5892 "%c%c%c in attribute list", q, *s, q)
5893 : "Unterminated attribute list" ) );
5894 if (attrs)
5895 op_free(attrs);
5896 OPERATOR(':');
5897 }
5898 }
5899 got_attrs:
5900 if (attrs) {
5901 start_force(PL_curforce);
5902 NEXTVAL_NEXTTOKE.opval = attrs;
5903 CURMAD('_', PL_nextwhite);
5904 force_next(THING);
5905 }
5906#ifdef PERL_MAD
5907 if (PL_madskills) {
5908 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5909 (s - SvPVX(PL_linestr)) - stuffstart);
5910 }
5911#endif
5912 TOKEN(COLONATTR);
5913 }
5914 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5915 s--;
5916 TOKEN(0);
5917 }
5918 PL_lex_allbrackets--;
5919 OPERATOR(':');
5920 case '(':
5921 s++;
5922 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5923 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5924 else
5925 PL_expect = XTERM;
5926 s = SKIPSPACE1(s);
5927 PL_lex_allbrackets++;
5928 TOKEN('(');
5929 case ';':
5930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5931 TOKEN(0);
5932 CLINE;
5933 s++;
5934 OPERATOR(';');
5935 case ')':
5936 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5937 TOKEN(0);
5938 s++;
5939 PL_lex_allbrackets--;
5940 s = SKIPSPACE1(s);
5941 if (*s == '{')
5942 PREBLOCK(')');
5943 TERM(')');
5944 case ']':
5945 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5946 TOKEN(0);
5947 s++;
5948 if (PL_lex_brackets <= 0)
5949 yyerror("Unmatched right square bracket");
5950 else
5951 --PL_lex_brackets;
5952 PL_lex_allbrackets--;
5953 if (PL_lex_state == LEX_INTERPNORMAL) {
5954 if (PL_lex_brackets == 0) {
5955 if (*s == '-' && s[1] == '>')
5956 PL_lex_state = LEX_INTERPENDMAYBE;
5957 else if (*s != '[' && *s != '{')
5958 PL_lex_state = LEX_INTERPEND;
5959 }
5960 }
5961 TERM(']');
5962 case '{':
5963 s++;
5964 leftbracket:
5965 if (PL_lex_brackets > 100) {
5966 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5967 }
5968 switch (PL_expect) {
5969 case XTERM:
5970 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5971 PL_lex_allbrackets++;
5972 OPERATOR(HASHBRACK);
5973 case XOPERATOR:
5974 while (s < PL_bufend && SPACE_OR_TAB(*s))
5975 s++;
5976 d = s;
5977 PL_tokenbuf[0] = '\0';
5978 if (d < PL_bufend && *d == '-') {
5979 PL_tokenbuf[0] = '-';
5980 d++;
5981 while (d < PL_bufend && SPACE_OR_TAB(*d))
5982 d++;
5983 }
5984 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5985 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5986 FALSE, &len);
5987 while (d < PL_bufend && SPACE_OR_TAB(*d))
5988 d++;
5989 if (*d == '}') {
5990 const char minus = (PL_tokenbuf[0] == '-');
5991 s = force_word(s + minus, WORD, FALSE, TRUE);
5992 if (minus)
5993 force_next('-');
5994 }
5995 }
5996 /* FALL THROUGH */
5997 case XATTRBLOCK:
5998 case XBLOCK:
5999 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6000 PL_lex_allbrackets++;
6001 PL_expect = XSTATE;
6002 break;
6003 case XATTRTERM:
6004 case XTERMBLOCK:
6005 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6006 PL_lex_allbrackets++;
6007 PL_expect = XSTATE;
6008 break;
6009 default: {
6010 const char *t;
6011 if (PL_oldoldbufptr == PL_last_lop)
6012 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6013 else
6014 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6015 PL_lex_allbrackets++;
6016 s = SKIPSPACE1(s);
6017 if (*s == '}') {
6018 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6019 PL_expect = XTERM;
6020 /* This hack is to get the ${} in the message. */
6021 PL_bufptr = s+1;
6022 yyerror("syntax error");
6023 break;
6024 }
6025 OPERATOR(HASHBRACK);
6026 }
6027 /* This hack serves to disambiguate a pair of curlies
6028 * as being a block or an anon hash. Normally, expectation
6029 * determines that, but in cases where we're not in a
6030 * position to expect anything in particular (like inside
6031 * eval"") we have to resolve the ambiguity. This code
6032 * covers the case where the first term in the curlies is a
6033 * quoted string. Most other cases need to be explicitly
6034 * disambiguated by prepending a "+" before the opening
6035 * curly in order to force resolution as an anon hash.
6036 *
6037 * XXX should probably propagate the outer expectation
6038 * into eval"" to rely less on this hack, but that could
6039 * potentially break current behavior of eval"".
6040 * GSAR 97-07-21
6041 */
6042 t = s;
6043 if (*s == '\'' || *s == '"' || *s == '`') {
6044 /* common case: get past first string, handling escapes */
6045 for (t++; t < PL_bufend && *t != *s;)
6046 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6047 t++;
6048 t++;
6049 }
6050 else if (*s == 'q') {
6051 if (++t < PL_bufend
6052 && (!isWORDCHAR(*t)
6053 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6054 && !isWORDCHAR(*t))))
6055 {
6056 /* skip q//-like construct */
6057 const char *tmps;
6058 char open, close, term;
6059 I32 brackets = 1;
6060
6061 while (t < PL_bufend && isSPACE(*t))
6062 t++;
6063 /* check for q => */
6064 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6065 OPERATOR(HASHBRACK);
6066 }
6067 term = *t;
6068 open = term;
6069 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6070 term = tmps[5];
6071 close = term;
6072 if (open == close)
6073 for (t++; t < PL_bufend; t++) {
6074 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6075 t++;
6076 else if (*t == open)
6077 break;
6078 }
6079 else {
6080 for (t++; t < PL_bufend; t++) {
6081 if (*t == '\\' && t+1 < PL_bufend)
6082 t++;
6083 else if (*t == close && --brackets <= 0)
6084 break;
6085 else if (*t == open)
6086 brackets++;
6087 }
6088 }
6089 t++;
6090 }
6091 else
6092 /* skip plain q word */
6093 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6094 t += UTF8SKIP(t);
6095 }
6096 else if (isWORDCHAR_lazy_if(t,UTF)) {
6097 t += UTF8SKIP(t);
6098 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6099 t += UTF8SKIP(t);
6100 }
6101 while (t < PL_bufend && isSPACE(*t))
6102 t++;
6103 /* if comma follows first term, call it an anon hash */
6104 /* XXX it could be a comma expression with loop modifiers */
6105 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6106 || (*t == '=' && t[1] == '>')))
6107 OPERATOR(HASHBRACK);
6108 if (PL_expect == XREF)
6109 PL_expect = XTERM;
6110 else {
6111 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6112 PL_expect = XSTATE;
6113 }
6114 }
6115 break;
6116 }
6117 pl_yylval.ival = CopLINE(PL_curcop);
6118 if (isSPACE(*s) || *s == '#')
6119 PL_copline = NOLINE; /* invalidate current command line number */
6120 TOKEN(formbrack ? '=' : '{');
6121 case '}':
6122 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6123 TOKEN(0);
6124 rightbracket:
6125 s++;
6126 if (PL_lex_brackets <= 0)
6127 yyerror("Unmatched right curly bracket");
6128 else
6129 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6130 PL_lex_allbrackets--;
6131 if (PL_lex_state == LEX_INTERPNORMAL) {
6132 if (PL_lex_brackets == 0) {
6133 if (PL_expect & XFAKEBRACK) {
6134 PL_expect &= XENUMMASK;
6135 PL_lex_state = LEX_INTERPEND;
6136 PL_bufptr = s;
6137#if 0
6138 if (PL_madskills) {
6139 if (!PL_thiswhite)
6140 PL_thiswhite = newSVpvs("");
6141 sv_catpvs(PL_thiswhite,"}");
6142 }
6143#endif
6144 return yylex(); /* ignore fake brackets */
6145 }
6146 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6147 && SvEVALED(PL_lex_repl))
6148 PL_lex_state = LEX_INTERPEND;
6149 else if (*s == '-' && s[1] == '>')
6150 PL_lex_state = LEX_INTERPENDMAYBE;
6151 else if (*s != '[' && *s != '{')
6152 PL_lex_state = LEX_INTERPEND;
6153 }
6154 }
6155 if (PL_expect & XFAKEBRACK) {
6156 PL_expect &= XENUMMASK;
6157 PL_bufptr = s;
6158 return yylex(); /* ignore fake brackets */
6159 }
6160 start_force(PL_curforce);
6161 if (PL_madskills) {
6162 curmad('X', newSVpvn(s-1,1));
6163 CURMAD('_', PL_thiswhite);
6164 }
6165 force_next(formbrack ? '.' : '}');
6166 if (formbrack) LEAVE;
6167#ifdef PERL_MAD
6168 if (PL_madskills && !PL_thistoken)
6169 PL_thistoken = newSVpvs("");
6170#endif
6171 if (formbrack == 2) { /* means . where arguments were expected */
6172 start_force(PL_curforce);
6173 force_next(';');
6174 TOKEN(FORMRBRACK);
6175 }
6176 TOKEN(';');
6177 case '&':
6178 s++;
6179 if (*s++ == '&') {
6180 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6181 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6182 s -= 2;
6183 TOKEN(0);
6184 }
6185 AOPERATOR(ANDAND);
6186 }
6187 s--;
6188 if (PL_expect == XOPERATOR) {
6189 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6190 && isIDFIRST_lazy_if(s,UTF))
6191 {
6192 CopLINE_dec(PL_curcop);
6193 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6194 CopLINE_inc(PL_curcop);
6195 }
6196 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6197 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6198 s--;
6199 TOKEN(0);
6200 }
6201 BAop(OP_BIT_AND);
6202 }
6203
6204 PL_tokenbuf[0] = '&';
6205 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6206 sizeof PL_tokenbuf - 1, TRUE);
6207 if (PL_tokenbuf[1]) {
6208 PL_expect = XOPERATOR;
6209 force_ident_maybe_lex('&');
6210 }
6211 else
6212 PREREF('&');
6213 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6214 TERM('&');
6215
6216 case '|':
6217 s++;
6218 if (*s++ == '|') {
6219 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6220 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6221 s -= 2;
6222 TOKEN(0);
6223 }
6224 AOPERATOR(OROR);
6225 }
6226 s--;
6227 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6228 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6229 s--;
6230 TOKEN(0);
6231 }
6232 BOop(OP_BIT_OR);
6233 case '=':
6234 s++;
6235 {
6236 const char tmp = *s++;
6237 if (tmp == '=') {
6238 if (!PL_lex_allbrackets &&
6239 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6240 s -= 2;
6241 TOKEN(0);
6242 }
6243 Eop(OP_EQ);
6244 }
6245 if (tmp == '>') {
6246 if (!PL_lex_allbrackets &&
6247 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6248 s -= 2;
6249 TOKEN(0);
6250 }
6251 OPERATOR(',');
6252 }
6253 if (tmp == '~')
6254 PMop(OP_MATCH);
6255 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6256 && strchr("+-*/%.^&|<",tmp))
6257 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6258 "Reversed %c= operator",(int)tmp);
6259 s--;
6260 if (PL_expect == XSTATE && isALPHA(tmp) &&
6261 (s == PL_linestart+1 || s[-2] == '\n') )
6262 {
6263 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6264 || PL_lex_state != LEX_NORMAL) {
6265 d = PL_bufend;
6266 while (s < d) {
6267 if (*s++ == '\n') {
6268 incline(s);
6269 if (strnEQ(s,"=cut",4)) {
6270 s = strchr(s,'\n');
6271 if (s)
6272 s++;
6273 else
6274 s = d;
6275 incline(s);
6276 goto retry;
6277 }
6278 }
6279 }
6280 goto retry;
6281 }
6282#ifdef PERL_MAD
6283 if (PL_madskills) {
6284 if (!PL_thiswhite)
6285 PL_thiswhite = newSVpvs("");
6286 sv_catpvn(PL_thiswhite, PL_linestart,
6287 PL_bufend - PL_linestart);
6288 }
6289#endif
6290 s = PL_bufend;
6291 PL_parser->in_pod = 1;
6292 goto retry;
6293 }
6294 }
6295 if (PL_expect == XBLOCK) {
6296 const char *t = s;
6297#ifdef PERL_STRICT_CR
6298 while (SPACE_OR_TAB(*t))
6299#else
6300 while (SPACE_OR_TAB(*t) || *t == '\r')
6301#endif
6302 t++;
6303 if (*t == '\n' || *t == '#') {
6304 formbrack = 1;
6305 ENTER;
6306 SAVEI8(PL_parser->form_lex_state);
6307 SAVEI32(PL_lex_formbrack);
6308 PL_parser->form_lex_state = PL_lex_state;
6309 PL_lex_formbrack = PL_lex_brackets + 1;
6310 goto leftbracket;
6311 }
6312 }
6313 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6314 s--;
6315 TOKEN(0);
6316 }
6317 pl_yylval.ival = 0;
6318 OPERATOR(ASSIGNOP);
6319 case '!':
6320 s++;
6321 {
6322 const char tmp = *s++;
6323 if (tmp == '=') {
6324 /* was this !=~ where !~ was meant?
6325 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6326
6327 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6328 const char *t = s+1;
6329
6330 while (t < PL_bufend && isSPACE(*t))
6331 ++t;
6332
6333 if (*t == '/' || *t == '?' ||
6334 ((*t == 'm' || *t == 's' || *t == 'y')
6335 && !isWORDCHAR(t[1])) ||
6336 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6337 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6338 "!=~ should be !~");
6339 }
6340 if (!PL_lex_allbrackets &&
6341 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6342 s -= 2;
6343 TOKEN(0);
6344 }
6345 Eop(OP_NE);
6346 }
6347 if (tmp == '~')
6348 PMop(OP_NOT);
6349 }
6350 s--;
6351 OPERATOR('!');
6352 case '<':
6353 if (PL_expect != XOPERATOR) {
6354 if (s[1] != '<' && !strchr(s,'>'))
6355 check_uni();
6356 if (s[1] == '<')
6357 s = scan_heredoc(s);
6358 else
6359 s = scan_inputsymbol(s);
6360 PL_expect = XOPERATOR;
6361 TOKEN(sublex_start());
6362 }
6363 s++;
6364 {
6365 char tmp = *s++;
6366 if (tmp == '<') {
6367 if (*s == '=' && !PL_lex_allbrackets &&
6368 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6369 s -= 2;
6370 TOKEN(0);
6371 }
6372 SHop(OP_LEFT_SHIFT);
6373 }
6374 if (tmp == '=') {
6375 tmp = *s++;
6376 if (tmp == '>') {
6377 if (!PL_lex_allbrackets &&
6378 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6379 s -= 3;
6380 TOKEN(0);
6381 }
6382 Eop(OP_NCMP);
6383 }
6384 s--;
6385 if (!PL_lex_allbrackets &&
6386 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6387 s -= 2;
6388 TOKEN(0);
6389 }
6390 Rop(OP_LE);
6391 }
6392 }
6393 s--;
6394 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6395 s--;
6396 TOKEN(0);
6397 }
6398 Rop(OP_LT);
6399 case '>':
6400 s++;
6401 {
6402 const char tmp = *s++;
6403 if (tmp == '>') {
6404 if (*s == '=' && !PL_lex_allbrackets &&
6405 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6406 s -= 2;
6407 TOKEN(0);
6408 }
6409 SHop(OP_RIGHT_SHIFT);
6410 }
6411 else if (tmp == '=') {
6412 if (!PL_lex_allbrackets &&
6413 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6414 s -= 2;
6415 TOKEN(0);
6416 }
6417 Rop(OP_GE);
6418 }
6419 }
6420 s--;
6421 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6422 s--;
6423 TOKEN(0);
6424 }
6425 Rop(OP_GT);
6426
6427 case '$':
6428 CLINE;
6429
6430 if (PL_expect == XOPERATOR) {
6431 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6432 return deprecate_commaless_var_list();
6433 }
6434 }
6435
6436 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6437 PL_tokenbuf[0] = '@';
6438 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6439 sizeof PL_tokenbuf - 1, FALSE);
6440 if (PL_expect == XOPERATOR)
6441 no_op("Array length", s);
6442 if (!PL_tokenbuf[1])
6443 PREREF(DOLSHARP);
6444 PL_expect = XOPERATOR;
6445 force_ident_maybe_lex('#');
6446 TOKEN(DOLSHARP);
6447 }
6448
6449 PL_tokenbuf[0] = '$';
6450 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6451 sizeof PL_tokenbuf - 1, FALSE);
6452 if (PL_expect == XOPERATOR)
6453 no_op("Scalar", s);
6454 if (!PL_tokenbuf[1]) {
6455 if (s == PL_bufend)
6456 yyerror("Final $ should be \\$ or $name");
6457 PREREF('$');
6458 }
6459
6460 d = s;
6461 {
6462 const char tmp = *s;
6463 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6464 s = SKIPSPACE1(s);
6465
6466 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6467 && intuit_more(s)) {
6468 if (*s == '[') {
6469 PL_tokenbuf[0] = '@';
6470 if (ckWARN(WARN_SYNTAX)) {
6471 char *t = s+1;
6472
6473 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6474 t++;
6475 if (*t++ == ',') {
6476 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6477 while (t < PL_bufend && *t != ']')
6478 t++;
6479 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6480 "Multidimensional syntax %.*s not supported",
6481 (int)((t - PL_bufptr) + 1), PL_bufptr);
6482 }
6483 }
6484 }
6485 else if (*s == '{') {
6486 char *t;
6487 PL_tokenbuf[0] = '%';
6488 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6489 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6490 {
6491 char tmpbuf[sizeof PL_tokenbuf];
6492 do {
6493 t++;
6494 } while (isSPACE(*t));
6495 if (isIDFIRST_lazy_if(t,UTF)) {
6496 STRLEN len;
6497 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6498 &len);
6499 while (isSPACE(*t))
6500 t++;
6501 if (*t == ';'
6502 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6504 "You need to quote \"%"SVf"\"",
6505 SVfARG(newSVpvn_flags(tmpbuf, len,
6506 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6507 }
6508 }
6509 }
6510 }
6511
6512 PL_expect = XOPERATOR;
6513 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6514 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6515 if (!islop || PL_last_lop_op == OP_GREPSTART)
6516 PL_expect = XOPERATOR;
6517 else if (strchr("$@\"'`q", *s))
6518 PL_expect = XTERM; /* e.g. print $fh "foo" */
6519 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6520 PL_expect = XTERM; /* e.g. print $fh &sub */
6521 else if (isIDFIRST_lazy_if(s,UTF)) {
6522 char tmpbuf[sizeof PL_tokenbuf];
6523 int t2;
6524 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6525 if ((t2 = keyword(tmpbuf, len, 0))) {
6526 /* binary operators exclude handle interpretations */
6527 switch (t2) {
6528 case -KEY_x:
6529 case -KEY_eq:
6530 case -KEY_ne:
6531 case -KEY_gt:
6532 case -KEY_lt:
6533 case -KEY_ge:
6534 case -KEY_le:
6535 case -KEY_cmp:
6536 break;
6537 default:
6538 PL_expect = XTERM; /* e.g. print $fh length() */
6539 break;
6540 }
6541 }
6542 else {
6543 PL_expect = XTERM; /* e.g. print $fh subr() */
6544 }
6545 }
6546 else if (isDIGIT(*s))
6547 PL_expect = XTERM; /* e.g. print $fh 3 */
6548 else if (*s == '.' && isDIGIT(s[1]))
6549 PL_expect = XTERM; /* e.g. print $fh .3 */
6550 else if ((*s == '?' || *s == '-' || *s == '+')
6551 && !isSPACE(s[1]) && s[1] != '=')
6552 PL_expect = XTERM; /* e.g. print $fh -1 */
6553 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6554 && s[1] != '/')
6555 PL_expect = XTERM; /* e.g. print $fh /.../
6556 XXX except DORDOR operator
6557 */
6558 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6559 && s[2] != '=')
6560 PL_expect = XTERM; /* print $fh <<"EOF" */
6561 }
6562 }
6563 force_ident_maybe_lex('$');
6564 TOKEN('$');
6565
6566 case '@':
6567 if (PL_expect == XOPERATOR)
6568 no_op("Array", s);
6569 PL_tokenbuf[0] = '@';
6570 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6571 if (!PL_tokenbuf[1]) {
6572 PREREF('@');
6573 }
6574 if (PL_lex_state == LEX_NORMAL)
6575 s = SKIPSPACE1(s);
6576 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6577 if (*s == '{')
6578 PL_tokenbuf[0] = '%';
6579
6580 /* Warn about @ where they meant $. */
6581 if (*s == '[' || *s == '{') {
6582 if (ckWARN(WARN_SYNTAX)) {
6583 const char *t = s + 1;
6584 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6585 t += UTF ? UTF8SKIP(t) : 1;
6586 if (*t == '}' || *t == ']') {
6587 t++;
6588 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6589 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6590 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6591 "Scalar value %"SVf" better written as $%"SVf,
6592 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6593 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6594 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6595 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6596 }
6597 }
6598 }
6599 }
6600 PL_expect = XOPERATOR;
6601 force_ident_maybe_lex('@');
6602 TERM('@');
6603
6604 case '/': /* may be division, defined-or, or pattern */
6605 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6606 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6607 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6608 TOKEN(0);
6609 s += 2;
6610 AOPERATOR(DORDOR);
6611 }
6612 case '?': /* may either be conditional or pattern */
6613 if (PL_expect == XOPERATOR) {
6614 char tmp = *s++;
6615 if(tmp == '?') {
6616 if (!PL_lex_allbrackets &&
6617 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6618 s--;
6619 TOKEN(0);
6620 }
6621 PL_lex_allbrackets++;
6622 OPERATOR('?');
6623 }
6624 else {
6625 tmp = *s++;
6626 if(tmp == '/') {
6627 /* A // operator. */
6628 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6629 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6630 LEX_FAKEEOF_LOGIC)) {
6631 s -= 2;
6632 TOKEN(0);
6633 }
6634 AOPERATOR(DORDOR);
6635 }
6636 else {
6637 s--;
6638 if (*s == '=' && !PL_lex_allbrackets &&
6639 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6640 s--;
6641 TOKEN(0);
6642 }
6643 Mop(OP_DIVIDE);
6644 }
6645 }
6646 }
6647 else {
6648 /* Disable warning on "study /blah/" */
6649 if (PL_oldoldbufptr == PL_last_uni
6650 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6651 || memNE(PL_last_uni, "study", 5)
6652 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6653 ))
6654 check_uni();
6655 if (*s == '?')
6656 deprecate("?PATTERN? without explicit operator");
6657 s = scan_pat(s,OP_MATCH);
6658 TERM(sublex_start());
6659 }
6660
6661 case '.':
6662 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6663#ifdef PERL_STRICT_CR
6664 && s[1] == '\n'
6665#else
6666 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6667#endif
6668 && (s == PL_linestart || s[-1] == '\n') )
6669 {
6670 PL_expect = XSTATE;
6671 formbrack = 2; /* dot seen where arguments expected */
6672 goto rightbracket;
6673 }
6674 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6675 s += 3;
6676 OPERATOR(YADAYADA);
6677 }
6678 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6679 char tmp = *s++;
6680 if (*s == tmp) {
6681 if (!PL_lex_allbrackets &&
6682 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6683 s--;
6684 TOKEN(0);
6685 }
6686 s++;
6687 if (*s == tmp) {
6688 s++;
6689 pl_yylval.ival = OPf_SPECIAL;
6690 }
6691 else
6692 pl_yylval.ival = 0;
6693 OPERATOR(DOTDOT);
6694 }
6695 if (*s == '=' && !PL_lex_allbrackets &&
6696 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6697 s--;
6698 TOKEN(0);
6699 }
6700 Aop(OP_CONCAT);
6701 }
6702 /* FALL THROUGH */
6703 case '0': case '1': case '2': case '3': case '4':
6704 case '5': case '6': case '7': case '8': case '9':
6705 s = scan_num(s, &pl_yylval);
6706 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6707 if (PL_expect == XOPERATOR)
6708 no_op("Number",s);
6709 TERM(THING);
6710
6711 case '\'':
6712 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6713 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6714 if (PL_expect == XOPERATOR) {
6715 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6716 return deprecate_commaless_var_list();
6717 }
6718 else
6719 no_op("String",s);
6720 }
6721 if (!s)
6722 missingterm(NULL);
6723 pl_yylval.ival = OP_CONST;
6724 TERM(sublex_start());
6725
6726 case '"':
6727 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6728 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6729 if (PL_expect == XOPERATOR) {
6730 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6731 return deprecate_commaless_var_list();
6732 }
6733 else
6734 no_op("String",s);
6735 }
6736 if (!s)
6737 missingterm(NULL);
6738 pl_yylval.ival = OP_CONST;
6739 /* FIXME. I think that this can be const if char *d is replaced by
6740 more localised variables. */
6741 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6742 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6743 pl_yylval.ival = OP_STRINGIFY;
6744 break;
6745 }
6746 }
6747 TERM(sublex_start());
6748
6749 case '`':
6750 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6751 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6752 if (PL_expect == XOPERATOR)
6753 no_op("Backticks",s);
6754 if (!s)
6755 missingterm(NULL);
6756 readpipe_override();
6757 TERM(sublex_start());
6758
6759 case '\\':
6760 s++;
6761 if (PL_lex_inwhat && isDIGIT(*s))
6762 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6763 *s, *s);
6764 if (PL_expect == XOPERATOR)
6765 no_op("Backslash",s);
6766 OPERATOR(REFGEN);
6767
6768 case 'v':
6769 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6770 char *start = s + 2;
6771 while (isDIGIT(*start) || *start == '_')
6772 start++;
6773 if (*start == '.' && isDIGIT(start[1])) {
6774 s = scan_num(s, &pl_yylval);
6775 TERM(THING);
6776 }
6777 else if ((*start == ':' && start[1] == ':')
6778 || (PL_expect == XSTATE && *start == ':'))
6779 goto keylookup;
6780 else if (PL_expect == XSTATE) {
6781 d = start;
6782 while (d < PL_bufend && isSPACE(*d)) d++;
6783 if (*d == ':') goto keylookup;
6784 }
6785 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6786 if (!isALPHA(*start) && (PL_expect == XTERM
6787 || PL_expect == XREF || PL_expect == XSTATE
6788 || PL_expect == XTERMORDORDOR)) {
6789 GV *const gv = gv_fetchpvn_flags(s, start - s,
6790 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6791 if (!gv) {
6792 s = scan_num(s, &pl_yylval);
6793 TERM(THING);
6794 }
6795 }
6796 }
6797 goto keylookup;
6798 case 'x':
6799 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6800 s++;
6801 Mop(OP_REPEAT);
6802 }
6803 goto keylookup;
6804
6805 case '_':
6806 case 'a': case 'A':
6807 case 'b': case 'B':
6808 case 'c': case 'C':
6809 case 'd': case 'D':
6810 case 'e': case 'E':
6811 case 'f': case 'F':
6812 case 'g': case 'G':
6813 case 'h': case 'H':
6814 case 'i': case 'I':
6815 case 'j': case 'J':
6816 case 'k': case 'K':
6817 case 'l': case 'L':
6818 case 'm': case 'M':
6819 case 'n': case 'N':
6820 case 'o': case 'O':
6821 case 'p': case 'P':
6822 case 'q': case 'Q':
6823 case 'r': case 'R':
6824 case 's': case 'S':
6825 case 't': case 'T':
6826 case 'u': case 'U':
6827 case 'V':
6828 case 'w': case 'W':
6829 case 'X':
6830 case 'y': case 'Y':
6831 case 'z': case 'Z':
6832
6833 keylookup: {
6834 bool anydelim;
6835 bool lex;
6836 I32 tmp;
6837 SV *sv;
6838 CV *cv;
6839 PADOFFSET off;
6840 OP *rv2cv_op;
6841
6842 lex = FALSE;
6843 orig_keyword = 0;
6844 off = 0;
6845 sv = NULL;
6846 cv = NULL;
6847 gv = NULL;
6848 gvp = NULL;
6849 rv2cv_op = NULL;
6850
6851 PL_bufptr = s;
6852 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6853
6854 /* Some keywords can be followed by any delimiter, including ':' */
6855 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6856
6857 /* x::* is just a word, unless x is "CORE" */
6858 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6859 goto just_a_word;
6860
6861 d = s;
6862 while (d < PL_bufend && isSPACE(*d))
6863 d++; /* no comments skipped here, or s### is misparsed */
6864
6865 /* Is this a word before a => operator? */
6866 if (*d == '=' && d[1] == '>') {
6867 CLINE;
6868 pl_yylval.opval
6869 = (OP*)newSVOP(OP_CONST, 0,
6870 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6871 pl_yylval.opval->op_private = OPpCONST_BARE;
6872 TERM(WORD);
6873 }
6874
6875 /* Check for plugged-in keyword */
6876 {
6877 OP *o;
6878 int result;
6879 char *saved_bufptr = PL_bufptr;
6880 PL_bufptr = s;
6881 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6882 s = PL_bufptr;
6883 if (result == KEYWORD_PLUGIN_DECLINE) {
6884 /* not a plugged-in keyword */
6885 PL_bufptr = saved_bufptr;
6886 } else if (result == KEYWORD_PLUGIN_STMT) {
6887 pl_yylval.opval = o;
6888 CLINE;
6889 PL_expect = XSTATE;
6890 return REPORT(PLUGSTMT);
6891 } else if (result == KEYWORD_PLUGIN_EXPR) {
6892 pl_yylval.opval = o;
6893 CLINE;
6894 PL_expect = XOPERATOR;
6895 return REPORT(PLUGEXPR);
6896 } else {
6897 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6898 PL_tokenbuf);
6899 }
6900 }
6901
6902 /* Check for built-in keyword */
6903 tmp = keyword(PL_tokenbuf, len, 0);
6904
6905 /* Is this a label? */
6906 if (!anydelim && PL_expect == XSTATE
6907 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6908 s = d + 1;
6909 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6910 pl_yylval.pval[len] = '\0';
6911 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6912 CLINE;
6913 TOKEN(LABEL);
6914 }
6915
6916 /* Check for lexical sub */
6917 if (PL_expect != XOPERATOR) {
6918 char tmpbuf[sizeof PL_tokenbuf + 1];
6919 *tmpbuf = '&';
6920 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6921 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6922 if (off != NOT_IN_PAD) {
6923 assert(off); /* we assume this is boolean-true below */
6924 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6925 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6926 HEK * const stashname = HvNAME_HEK(stash);
6927 sv = newSVhek(stashname);
6928 sv_catpvs(sv, "::");
6929 sv_catpvn_flags(sv, PL_tokenbuf, len,
6930 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6931 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6932 SVt_PVCV);
6933 off = 0;
6934 if (!gv) {
6935 sv_free(sv);
6936 sv = NULL;
6937 goto just_a_word;
6938 }
6939 }
6940 else {
6941 rv2cv_op = newOP(OP_PADANY, 0);
6942 rv2cv_op->op_targ = off;
6943 cv = find_lexical_cv(off);
6944 }
6945 lex = TRUE;
6946 goto just_a_word;
6947 }
6948 off = 0;
6949 }
6950
6951 if (tmp < 0) { /* second-class keyword? */
6952 GV *ogv = NULL; /* override (winner) */
6953 GV *hgv = NULL; /* hidden (loser) */
6954 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6955 CV *cv;
6956 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6957 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6958 (cv = GvCVu(gv)))
6959 {
6960 if (GvIMPORTED_CV(gv))
6961 ogv = gv;
6962 else if (! CvMETHOD(cv))
6963 hgv = gv;
6964 }
6965 if (!ogv &&
6966 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6967 UTF ? -(I32)len : (I32)len, FALSE)) &&
6968 (gv = *gvp) && isGV_with_GP(gv) &&
6969 GvCVu(gv) && GvIMPORTED_CV(gv))
6970 {
6971 ogv = gv;
6972 }
6973 }
6974 if (ogv) {
6975 orig_keyword = tmp;
6976 tmp = 0; /* overridden by import or by GLOBAL */
6977 }
6978 else if (gv && !gvp
6979 && -tmp==KEY_lock /* XXX generalizable kludge */
6980 && GvCVu(gv))
6981 {
6982 tmp = 0; /* any sub overrides "weak" keyword */
6983 }
6984 else { /* no override */
6985 tmp = -tmp;
6986 if (tmp == KEY_dump) {
6987 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6988 "dump() better written as CORE::dump()");
6989 }
6990 gv = NULL;
6991 gvp = 0;
6992 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6993 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6994 "Ambiguous call resolved as CORE::%s(), "
6995 "qualify as such or use &",
6996 GvENAME(hgv));
6997 }
6998 }
6999
7000 reserved_word:
7001 switch (tmp) {
7002
7003 default: /* not a keyword */
7004 /* Trade off - by using this evil construction we can pull the
7005 variable gv into the block labelled keylookup. If not, then
7006 we have to give it function scope so that the goto from the
7007 earlier ':' case doesn't bypass the initialisation. */
7008 if (0) {
7009 just_a_word_zero_gv:
7010 sv = NULL;
7011 cv = NULL;
7012 gv = NULL;
7013 gvp = NULL;
7014 rv2cv_op = NULL;
7015 orig_keyword = 0;
7016 lex = 0;
7017 off = 0;
7018 }
7019 just_a_word: {
7020 int pkgname = 0;
7021 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7022 const char penultchar =
7023 lastchar && PL_bufptr - 2 >= PL_linestart
7024 ? PL_bufptr[-2]
7025 : 0;
7026#ifdef PERL_MAD
7027 SV *nextPL_nextwhite = 0;
7028#endif
7029
7030
7031 /* Get the rest if it looks like a package qualifier */
7032
7033 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7034 STRLEN morelen;
7035 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7036 TRUE, &morelen);
7037 if (!morelen)
7038 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
7039 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7040 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
7041 *s == '\'' ? "'" : "::");
7042 len += morelen;
7043 pkgname = 1;
7044 }
7045
7046 if (PL_expect == XOPERATOR) {
7047 if (PL_bufptr == PL_linestart) {
7048 CopLINE_dec(PL_curcop);
7049 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7050 CopLINE_inc(PL_curcop);
7051 }
7052 else
7053 no_op("Bareword",s);
7054 }
7055
7056 /* Look for a subroutine with this name in current package,
7057 unless this is a lexical sub, or name is "Foo::",
7058 in which case Foo is a bareword
7059 (and a package name). */
7060
7061 if (len > 2 && !PL_madskills &&
7062 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7063 {
7064 if (ckWARN(WARN_BAREWORD)
7065 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7066 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7067 "Bareword \"%"SVf"\" refers to nonexistent package",
7068 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7069 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7070 len -= 2;
7071 PL_tokenbuf[len] = '\0';
7072 gv = NULL;
7073 gvp = 0;
7074 }
7075 else {
7076 if (!lex && !gv) {
7077 /* Mustn't actually add anything to a symbol table.
7078 But also don't want to "initialise" any placeholder
7079 constants that might already be there into full
7080 blown PVGVs with attached PVCV. */
7081 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7082 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7083 SVt_PVCV);
7084 }
7085 len = 0;
7086 }
7087
7088 /* if we saw a global override before, get the right name */
7089
7090 if (!sv)
7091 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7092 len ? len : strlen(PL_tokenbuf));
7093 if (gvp) {
7094 SV * const tmp_sv = sv;
7095 sv = newSVpvs("CORE::GLOBAL::");
7096 sv_catsv(sv, tmp_sv);
7097 SvREFCNT_dec(tmp_sv);
7098 }
7099
7100#ifdef PERL_MAD
7101 if (PL_madskills && !PL_thistoken) {
7102 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7103 PL_thistoken = newSVpvn(start,s - start);
7104 PL_realtokenstart = s - SvPVX(PL_linestr);
7105 }
7106#endif
7107
7108 /* Presume this is going to be a bareword of some sort. */
7109 CLINE;
7110 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7111 pl_yylval.opval->op_private = OPpCONST_BARE;
7112
7113 /* And if "Foo::", then that's what it certainly is. */
7114 if (len)
7115 goto safe_bareword;
7116
7117 if (!off)
7118 {
7119 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7120 const_op->op_private = OPpCONST_BARE;
7121 rv2cv_op = newCVREF(0, const_op);
7122 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7123 }
7124
7125 /* See if it's the indirect object for a list operator. */
7126
7127 if (PL_oldoldbufptr &&
7128 PL_oldoldbufptr < PL_bufptr &&
7129 (PL_oldoldbufptr == PL_last_lop
7130 || PL_oldoldbufptr == PL_last_uni) &&
7131 /* NO SKIPSPACE BEFORE HERE! */
7132 (PL_expect == XREF ||
7133 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7134 {
7135 bool immediate_paren = *s == '(';
7136
7137 /* (Now we can afford to cross potential line boundary.) */
7138 s = SKIPSPACE2(s,nextPL_nextwhite);
7139#ifdef PERL_MAD
7140 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7141#endif
7142
7143 /* Two barewords in a row may indicate method call. */
7144
7145 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7146 (tmp = intuit_method(s, gv, cv))) {
7147 op_free(rv2cv_op);
7148 if (tmp == METHOD && !PL_lex_allbrackets &&
7149 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7150 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7151 return REPORT(tmp);
7152 }
7153
7154 /* If not a declared subroutine, it's an indirect object. */
7155 /* (But it's an indir obj regardless for sort.) */
7156 /* Also, if "_" follows a filetest operator, it's a bareword */
7157
7158 if (
7159 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7160 (!cv &&
7161 (PL_last_lop_op != OP_MAPSTART &&
7162 PL_last_lop_op != OP_GREPSTART))))
7163 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7164 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7165 )
7166 {
7167 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7168 goto bareword;
7169 }
7170 }
7171
7172 PL_expect = XOPERATOR;
7173#ifdef PERL_MAD
7174 if (isSPACE(*s))
7175 s = SKIPSPACE2(s,nextPL_nextwhite);
7176 PL_nextwhite = nextPL_nextwhite;
7177#else
7178 s = skipspace(s);
7179#endif
7180
7181 /* Is this a word before a => operator? */
7182 if (*s == '=' && s[1] == '>' && !pkgname) {
7183 op_free(rv2cv_op);
7184 CLINE;
7185 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7186 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7187 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7188 TERM(WORD);
7189 }
7190
7191 /* If followed by a paren, it's certainly a subroutine. */
7192 if (*s == '(') {
7193 CLINE;
7194 if (cv) {
7195 d = s + 1;
7196 while (SPACE_OR_TAB(*d))
7197 d++;
7198 if (*d == ')' && (sv = cv_const_sv(cv))) {
7199 s = d + 1;
7200 goto its_constant;
7201 }
7202 }
7203#ifdef PERL_MAD
7204 if (PL_madskills) {
7205 PL_nextwhite = PL_thiswhite;
7206 PL_thiswhite = 0;
7207 }
7208 start_force(PL_curforce);
7209#endif
7210 NEXTVAL_NEXTTOKE.opval =
7211 off ? rv2cv_op : pl_yylval.opval;
7212 PL_expect = XOPERATOR;
7213#ifdef PERL_MAD
7214 if (PL_madskills) {
7215 PL_nextwhite = nextPL_nextwhite;
7216 curmad('X', PL_thistoken);
7217 PL_thistoken = newSVpvs("");
7218 }
7219#endif
7220 if (off)
7221 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7222 else op_free(rv2cv_op), force_next(WORD);
7223 pl_yylval.ival = 0;
7224 TOKEN('&');
7225 }
7226
7227 /* If followed by var or block, call it a method (unless sub) */
7228
7229 if ((*s == '$' || *s == '{') && !cv) {
7230 op_free(rv2cv_op);
7231 PL_last_lop = PL_oldbufptr;
7232 PL_last_lop_op = OP_METHOD;
7233 if (!PL_lex_allbrackets &&
7234 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7235 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7236 PREBLOCK(METHOD);
7237 }
7238
7239 /* If followed by a bareword, see if it looks like indir obj. */
7240
7241 if (!orig_keyword
7242 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7243 && (tmp = intuit_method(s, gv, cv))) {
7244 op_free(rv2cv_op);
7245 if (tmp == METHOD && !PL_lex_allbrackets &&
7246 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7247 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7248 return REPORT(tmp);
7249 }
7250
7251 /* Not a method, so call it a subroutine (if defined) */
7252
7253 if (cv) {
7254 if (lastchar == '-' && penultchar != '-') {
7255 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7256 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7257 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7258 SVfARG(tmpsv), SVfARG(tmpsv));
7259 }
7260 /* Check for a constant sub */
7261 if ((sv = cv_const_sv(cv))) {
7262 its_constant:
7263 op_free(rv2cv_op);
7264 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7265 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7266 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7267 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7268 TOKEN(WORD);
7269 }
7270
7271 op_free(pl_yylval.opval);
7272 pl_yylval.opval =
7273 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7274 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7275 PL_last_lop = PL_oldbufptr;
7276 PL_last_lop_op = OP_ENTERSUB;
7277 /* Is there a prototype? */
7278 if (
7279#ifdef PERL_MAD
7280 cv &&
7281#endif
7282 SvPOK(cv))
7283 {
7284 STRLEN protolen = CvPROTOLEN(cv);
7285 const char *proto = CvPROTO(cv);
7286 bool optional;
7287 if (!protolen)
7288 TERM(FUNC0SUB);
7289 if ((optional = *proto == ';'))
7290 do
7291 proto++;
7292 while (*proto == ';');
7293 if (
7294 (
7295 (
7296 *proto == '$' || *proto == '_'
7297 || *proto == '*' || *proto == '+'
7298 )
7299 && proto[1] == '\0'
7300 )
7301 || (
7302 *proto == '\\' && proto[1] && proto[2] == '\0'
7303 )
7304 )
7305 UNIPROTO(UNIOPSUB,optional);
7306 if (*proto == '\\' && proto[1] == '[') {
7307 const char *p = proto + 2;
7308 while(*p && *p != ']')
7309 ++p;
7310 if(*p == ']' && !p[1])
7311 UNIPROTO(UNIOPSUB,optional);
7312 }
7313 if (*proto == '&' && *s == '{') {
7314 if (PL_curstash)
7315 sv_setpvs(PL_subname, "__ANON__");
7316 else
7317 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7318 if (!PL_lex_allbrackets &&
7319 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7320 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7321 PREBLOCK(LSTOPSUB);
7322 }
7323 }
7324#ifdef PERL_MAD
7325 {
7326 if (PL_madskills) {
7327 PL_nextwhite = PL_thiswhite;
7328 PL_thiswhite = 0;
7329 }
7330 start_force(PL_curforce);
7331 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7332 PL_expect = XTERM;
7333 if (PL_madskills) {
7334 PL_nextwhite = nextPL_nextwhite;
7335 curmad('X', PL_thistoken);
7336 PL_thistoken = newSVpvs("");
7337 }
7338 force_next(off ? PRIVATEREF : WORD);
7339 if (!PL_lex_allbrackets &&
7340 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7341 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7342 TOKEN(NOAMP);
7343 }
7344 }
7345
7346 /* Guess harder when madskills require "best effort". */
7347 if (PL_madskills && (!gv || !GvCVu(gv))) {
7348 int probable_sub = 0;
7349 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7350 probable_sub = 1;
7351 else if (isALPHA(*s)) {
7352 char tmpbuf[1024];
7353 STRLEN tmplen;
7354 d = s;
7355 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7356 if (!keyword(tmpbuf, tmplen, 0))
7357 probable_sub = 1;
7358 else {
7359 while (d < PL_bufend && isSPACE(*d))
7360 d++;
7361 if (*d == '=' && d[1] == '>')
7362 probable_sub = 1;
7363 }
7364 }
7365 if (probable_sub) {
7366 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7367 SVt_PVCV);
7368 op_free(pl_yylval.opval);
7369 pl_yylval.opval =
7370 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7371 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7372 PL_last_lop = PL_oldbufptr;
7373 PL_last_lop_op = OP_ENTERSUB;
7374 PL_nextwhite = PL_thiswhite;
7375 PL_thiswhite = 0;
7376 start_force(PL_curforce);
7377 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7378 PL_expect = XTERM;
7379 PL_nextwhite = nextPL_nextwhite;
7380 curmad('X', PL_thistoken);
7381 PL_thistoken = newSVpvs("");
7382 force_next(off ? PRIVATEREF : WORD);
7383 if (!PL_lex_allbrackets &&
7384 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7385 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7386 TOKEN(NOAMP);
7387 }
7388#else
7389 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7390 PL_expect = XTERM;
7391 force_next(off ? PRIVATEREF : WORD);
7392 if (!PL_lex_allbrackets &&
7393 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7394 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7395 TOKEN(NOAMP);
7396#endif
7397 }
7398
7399 /* Call it a bare word */
7400
7401 if (PL_hints & HINT_STRICT_SUBS)
7402 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7403 else {
7404 bareword:
7405 /* after "print" and similar functions (corresponding to
7406 * "F? L" in opcode.pl), whatever wasn't already parsed as
7407 * a filehandle should be subject to "strict subs".
7408 * Likewise for the optional indirect-object argument to system
7409 * or exec, which can't be a bareword */
7410 if ((PL_last_lop_op == OP_PRINT
7411 || PL_last_lop_op == OP_PRTF
7412 || PL_last_lop_op == OP_SAY
7413 || PL_last_lop_op == OP_SYSTEM
7414 || PL_last_lop_op == OP_EXEC)
7415 && (PL_hints & HINT_STRICT_SUBS))
7416 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7417 if (lastchar != '-') {
7418 if (ckWARN(WARN_RESERVED)) {
7419 d = PL_tokenbuf;
7420 while (isLOWER(*d))
7421 d++;
7422 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7423 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7424 PL_tokenbuf);
7425 }
7426 }
7427 }
7428 op_free(rv2cv_op);
7429
7430 safe_bareword:
7431 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7432 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7433 "Operator or semicolon missing before %c%"SVf,
7434 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7435 strlen(PL_tokenbuf),
7436 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7437 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7438 "Ambiguous use of %c resolved as operator %c",
7439 lastchar, lastchar);
7440 }
7441 TOKEN(WORD);
7442 }
7443
7444 case KEY___FILE__:
7445 FUN0OP(
7446 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7447 );
7448
7449 case KEY___LINE__:
7450 FUN0OP(
7451 (OP*)newSVOP(OP_CONST, 0,
7452 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7453 );
7454
7455 case KEY___PACKAGE__:
7456 FUN0OP(
7457 (OP*)newSVOP(OP_CONST, 0,
7458 (PL_curstash
7459 ? newSVhek(HvNAME_HEK(PL_curstash))
7460 : &PL_sv_undef))
7461 );
7462
7463 case KEY___DATA__:
7464 case KEY___END__: {
7465 GV *gv;
7466 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7467 const char *pname = "main";
7468 STRLEN plen = 4;
7469 U32 putf8 = 0;
7470 if (PL_tokenbuf[2] == 'D')
7471 {
7472 HV * const stash =
7473 PL_curstash ? PL_curstash : PL_defstash;
7474 pname = HvNAME_get(stash);
7475 plen = HvNAMELEN (stash);
7476 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7477 }
7478 gv = gv_fetchpvn_flags(
7479 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7480 plen+6, GV_ADD|putf8, SVt_PVIO
7481 );
7482 GvMULTI_on(gv);
7483 if (!GvIO(gv))
7484 GvIOp(gv) = newIO();
7485 IoIFP(GvIOp(gv)) = PL_rsfp;
7486#if defined(HAS_FCNTL) && defined(F_SETFD)
7487 {
7488 const int fd = PerlIO_fileno(PL_rsfp);
7489 fcntl(fd,F_SETFD,fd >= 3);
7490 }
7491#endif
7492 /* Mark this internal pseudo-handle as clean */
7493 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7494 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7495 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7496 else
7497 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7498#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7499 /* if the script was opened in binmode, we need to revert
7500 * it to text mode for compatibility; but only iff it has CRs
7501 * XXX this is a questionable hack at best. */
7502 if (PL_bufend-PL_bufptr > 2
7503 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7504 {
7505 Off_t loc = 0;
7506 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7507 loc = PerlIO_tell(PL_rsfp);
7508 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7509 }
7510#ifdef NETWARE
7511 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7512#else
7513 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7514#endif /* NETWARE */
7515 if (loc > 0)
7516 PerlIO_seek(PL_rsfp, loc, 0);
7517 }
7518 }
7519#endif
7520#ifdef PERLIO_LAYERS
7521 if (!IN_BYTES) {
7522 if (UTF)
7523 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7524 else if (PL_encoding) {
7525 SV *name;
7526 dSP;
7527 ENTER;
7528 SAVETMPS;
7529 PUSHMARK(sp);
7530 EXTEND(SP, 1);
7531 XPUSHs(PL_encoding);
7532 PUTBACK;
7533 call_method("name", G_SCALAR);
7534 SPAGAIN;
7535 name = POPs;
7536 PUTBACK;
7537 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7538 Perl_form(aTHX_ ":encoding(%"SVf")",
7539 SVfARG(name)));
7540 FREETMPS;
7541 LEAVE;
7542 }
7543 }
7544#endif
7545#ifdef PERL_MAD
7546 if (PL_madskills) {
7547 if (PL_realtokenstart >= 0) {
7548 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7549 if (!PL_endwhite)
7550 PL_endwhite = newSVpvs("");
7551 sv_catsv(PL_endwhite, PL_thiswhite);
7552 PL_thiswhite = 0;
7553 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7554 PL_realtokenstart = -1;
7555 }
7556 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7557 != NULL) ;
7558 }
7559#endif
7560 PL_rsfp = NULL;
7561 }
7562 goto fake_eof;
7563 }
7564
7565 case KEY___SUB__:
7566 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7567
7568 case KEY_AUTOLOAD:
7569 case KEY_DESTROY:
7570 case KEY_BEGIN:
7571 case KEY_UNITCHECK:
7572 case KEY_CHECK:
7573 case KEY_INIT:
7574 case KEY_END:
7575 if (PL_expect == XSTATE) {
7576 s = PL_bufptr;
7577 goto really_sub;
7578 }
7579 goto just_a_word;
7580
7581 case KEY_CORE:
7582 if (*s == ':' && s[1] == ':') {
7583 STRLEN olen = len;
7584 d = s;
7585 s += 2;
7586 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7587 if ((*s == ':' && s[1] == ':')
7588 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7589 {
7590 s = d;
7591 len = olen;
7592 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7593 goto just_a_word;
7594 }
7595 if (!tmp)
7596 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7597 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7598 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7599 if (tmp < 0)
7600 tmp = -tmp;
7601 else if (tmp == KEY_require || tmp == KEY_do
7602 || tmp == KEY_glob)
7603 /* that's a way to remember we saw "CORE::" */
7604 orig_keyword = tmp;
7605 goto reserved_word;
7606 }
7607 goto just_a_word;
7608
7609 case KEY_abs:
7610 UNI(OP_ABS);
7611
7612 case KEY_alarm:
7613 UNI(OP_ALARM);
7614
7615 case KEY_accept:
7616 LOP(OP_ACCEPT,XTERM);
7617
7618 case KEY_and:
7619 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7620 return REPORT(0);
7621 OPERATOR(ANDOP);
7622
7623 case KEY_atan2:
7624 LOP(OP_ATAN2,XTERM);
7625
7626 case KEY_bind:
7627 LOP(OP_BIND,XTERM);
7628
7629 case KEY_binmode:
7630 LOP(OP_BINMODE,XTERM);
7631
7632 case KEY_bless:
7633 LOP(OP_BLESS,XTERM);
7634
7635 case KEY_break:
7636 FUN0(OP_BREAK);
7637
7638 case KEY_chop:
7639 UNI(OP_CHOP);
7640
7641 case KEY_continue:
7642 /* We have to disambiguate the two senses of
7643 "continue". If the next token is a '{' then
7644 treat it as the start of a continue block;
7645 otherwise treat it as a control operator.
7646 */
7647 s = skipspace(s);
7648 if (*s == '{')
7649 PREBLOCK(CONTINUE);
7650 else
7651 FUN0(OP_CONTINUE);
7652
7653 case KEY_chdir:
7654 /* may use HOME */
7655 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7656 UNI(OP_CHDIR);
7657
7658 case KEY_close:
7659 UNI(OP_CLOSE);
7660
7661 case KEY_closedir:
7662 UNI(OP_CLOSEDIR);
7663
7664 case KEY_cmp:
7665 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7666 return REPORT(0);
7667 Eop(OP_SCMP);
7668
7669 case KEY_caller:
7670 UNI(OP_CALLER);
7671
7672 case KEY_crypt:
7673#ifdef FCRYPT
7674 if (!PL_cryptseen) {
7675 PL_cryptseen = TRUE;
7676 init_des();
7677 }
7678#endif
7679 LOP(OP_CRYPT,XTERM);
7680
7681 case KEY_chmod:
7682 LOP(OP_CHMOD,XTERM);
7683
7684 case KEY_chown:
7685 LOP(OP_CHOWN,XTERM);
7686
7687 case KEY_connect:
7688 LOP(OP_CONNECT,XTERM);
7689
7690 case KEY_chr:
7691 UNI(OP_CHR);
7692
7693 case KEY_cos:
7694 UNI(OP_COS);
7695
7696 case KEY_chroot:
7697 UNI(OP_CHROOT);
7698
7699 case KEY_default:
7700 PREBLOCK(DEFAULT);
7701
7702 case KEY_do:
7703 s = SKIPSPACE1(s);
7704 if (*s == '{')
7705 PRETERMBLOCK(DO);
7706 if (*s != '\'') {
7707 *PL_tokenbuf = '&';
7708 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7709 1, &len);
7710 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7711 d = SKIPSPACE1(d);
7712 if (*d == '(') {
7713 force_ident_maybe_lex('&');
7714 s = d;
7715 }
7716 }
7717 }
7718 if (orig_keyword == KEY_do) {
7719 orig_keyword = 0;
7720 pl_yylval.ival = 1;
7721 }
7722 else
7723 pl_yylval.ival = 0;
7724 OPERATOR(DO);
7725
7726 case KEY_die:
7727 PL_hints |= HINT_BLOCK_SCOPE;
7728 LOP(OP_DIE,XTERM);
7729
7730 case KEY_defined:
7731 UNI(OP_DEFINED);
7732
7733 case KEY_delete:
7734 UNI(OP_DELETE);
7735
7736 case KEY_dbmopen:
7737 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7738 STR_WITH_LEN("NDBM_File::"),
7739 STR_WITH_LEN("DB_File::"),
7740 STR_WITH_LEN("GDBM_File::"),
7741 STR_WITH_LEN("SDBM_File::"),
7742 STR_WITH_LEN("ODBM_File::"),
7743 NULL);
7744 LOP(OP_DBMOPEN,XTERM);
7745
7746 case KEY_dbmclose:
7747 UNI(OP_DBMCLOSE);
7748
7749 case KEY_dump:
7750 PL_expect = XOPERATOR;
7751 s = force_word(s,WORD,TRUE,FALSE);
7752 LOOPX(OP_DUMP);
7753
7754 case KEY_else:
7755 PREBLOCK(ELSE);
7756
7757 case KEY_elsif:
7758 pl_yylval.ival = CopLINE(PL_curcop);
7759 OPERATOR(ELSIF);
7760
7761 case KEY_eq:
7762 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7763 return REPORT(0);
7764 Eop(OP_SEQ);
7765
7766 case KEY_exists:
7767 UNI(OP_EXISTS);
7768
7769 case KEY_exit:
7770 if (PL_madskills)
7771 UNI(OP_INT);
7772 UNI(OP_EXIT);
7773
7774 case KEY_eval:
7775 s = SKIPSPACE1(s);
7776 if (*s == '{') { /* block eval */
7777 PL_expect = XTERMBLOCK;
7778 UNIBRACK(OP_ENTERTRY);
7779 }
7780 else { /* string eval */
7781 PL_expect = XTERM;
7782 UNIBRACK(OP_ENTEREVAL);
7783 }
7784
7785 case KEY_evalbytes:
7786 PL_expect = XTERM;
7787 UNIBRACK(-OP_ENTEREVAL);
7788
7789 case KEY_eof:
7790 UNI(OP_EOF);
7791
7792 case KEY_exp:
7793 UNI(OP_EXP);
7794
7795 case KEY_each:
7796 UNI(OP_EACH);
7797
7798 case KEY_exec:
7799 LOP(OP_EXEC,XREF);
7800
7801 case KEY_endhostent:
7802 FUN0(OP_EHOSTENT);
7803
7804 case KEY_endnetent:
7805 FUN0(OP_ENETENT);
7806
7807 case KEY_endservent:
7808 FUN0(OP_ESERVENT);
7809
7810 case KEY_endprotoent:
7811 FUN0(OP_EPROTOENT);
7812
7813 case KEY_endpwent:
7814 FUN0(OP_EPWENT);
7815
7816 case KEY_endgrent:
7817 FUN0(OP_EGRENT);
7818
7819 case KEY_for:
7820 case KEY_foreach:
7821 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7822 return REPORT(0);
7823 pl_yylval.ival = CopLINE(PL_curcop);
7824 s = SKIPSPACE1(s);
7825 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7826 char *p = s;
7827#ifdef PERL_MAD
7828 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7829#endif
7830
7831 if ((PL_bufend - p) >= 3 &&
7832 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7833 p += 2;
7834 else if ((PL_bufend - p) >= 4 &&
7835 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7836 p += 3;
7837 p = PEEKSPACE(p);
7838 if (isIDFIRST_lazy_if(p,UTF)) {
7839 p = scan_ident(p, PL_bufend,
7840 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7841 p = PEEKSPACE(p);
7842 }
7843 if (*p != '$')
7844 Perl_croak(aTHX_ "Missing $ on loop variable");
7845#ifdef PERL_MAD
7846 s = SvPVX(PL_linestr) + soff;
7847#endif
7848 }
7849 OPERATOR(FOR);
7850
7851 case KEY_formline:
7852 LOP(OP_FORMLINE,XTERM);
7853
7854 case KEY_fork:
7855 FUN0(OP_FORK);
7856
7857 case KEY_fc:
7858 UNI(OP_FC);
7859
7860 case KEY_fcntl:
7861 LOP(OP_FCNTL,XTERM);
7862
7863 case KEY_fileno:
7864 UNI(OP_FILENO);
7865
7866 case KEY_flock:
7867 LOP(OP_FLOCK,XTERM);
7868
7869 case KEY_gt:
7870 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7871 return REPORT(0);
7872 Rop(OP_SGT);
7873
7874 case KEY_ge:
7875 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7876 return REPORT(0);
7877 Rop(OP_SGE);
7878
7879 case KEY_grep:
7880 LOP(OP_GREPSTART, XREF);
7881
7882 case KEY_goto:
7883 PL_expect = XOPERATOR;
7884 s = force_word(s,WORD,TRUE,FALSE);
7885 LOOPX(OP_GOTO);
7886
7887 case KEY_gmtime:
7888 UNI(OP_GMTIME);
7889
7890 case KEY_getc:
7891 UNIDOR(OP_GETC);
7892
7893 case KEY_getppid:
7894 FUN0(OP_GETPPID);
7895
7896 case KEY_getpgrp:
7897 UNI(OP_GETPGRP);
7898
7899 case KEY_getpriority:
7900 LOP(OP_GETPRIORITY,XTERM);
7901
7902 case KEY_getprotobyname:
7903 UNI(OP_GPBYNAME);
7904
7905 case KEY_getprotobynumber:
7906 LOP(OP_GPBYNUMBER,XTERM);
7907
7908 case KEY_getprotoent:
7909 FUN0(OP_GPROTOENT);
7910
7911 case KEY_getpwent:
7912 FUN0(OP_GPWENT);
7913
7914 case KEY_getpwnam:
7915 UNI(OP_GPWNAM);
7916
7917 case KEY_getpwuid:
7918 UNI(OP_GPWUID);
7919
7920 case KEY_getpeername:
7921 UNI(OP_GETPEERNAME);
7922
7923 case KEY_gethostbyname:
7924 UNI(OP_GHBYNAME);
7925
7926 case KEY_gethostbyaddr:
7927 LOP(OP_GHBYADDR,XTERM);
7928
7929 case KEY_gethostent:
7930 FUN0(OP_GHOSTENT);
7931
7932 case KEY_getnetbyname:
7933 UNI(OP_GNBYNAME);
7934
7935 case KEY_getnetbyaddr:
7936 LOP(OP_GNBYADDR,XTERM);
7937
7938 case KEY_getnetent:
7939 FUN0(OP_GNETENT);
7940
7941 case KEY_getservbyname:
7942 LOP(OP_GSBYNAME,XTERM);
7943
7944 case KEY_getservbyport:
7945 LOP(OP_GSBYPORT,XTERM);
7946
7947 case KEY_getservent:
7948 FUN0(OP_GSERVENT);
7949
7950 case KEY_getsockname:
7951 UNI(OP_GETSOCKNAME);
7952
7953 case KEY_getsockopt:
7954 LOP(OP_GSOCKOPT,XTERM);
7955
7956 case KEY_getgrent:
7957 FUN0(OP_GGRENT);
7958
7959 case KEY_getgrnam:
7960 UNI(OP_GGRNAM);
7961
7962 case KEY_getgrgid:
7963 UNI(OP_GGRGID);
7964
7965 case KEY_getlogin:
7966 FUN0(OP_GETLOGIN);
7967
7968 case KEY_given:
7969 pl_yylval.ival = CopLINE(PL_curcop);
7970 Perl_ck_warner_d(aTHX_
7971 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7972 "given is experimental");
7973 OPERATOR(GIVEN);
7974
7975 case KEY_glob:
7976 LOP(
7977 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7978 XTERM
7979 );
7980
7981 case KEY_hex:
7982 UNI(OP_HEX);
7983
7984 case KEY_if:
7985 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7986 return REPORT(0);
7987 pl_yylval.ival = CopLINE(PL_curcop);
7988 OPERATOR(IF);
7989
7990 case KEY_index:
7991 LOP(OP_INDEX,XTERM);
7992
7993 case KEY_int:
7994 UNI(OP_INT);
7995
7996 case KEY_ioctl:
7997 LOP(OP_IOCTL,XTERM);
7998
7999 case KEY_join:
8000 LOP(OP_JOIN,XTERM);
8001
8002 case KEY_keys:
8003 UNI(OP_KEYS);
8004
8005 case KEY_kill:
8006 LOP(OP_KILL,XTERM);
8007
8008 case KEY_last:
8009 PL_expect = XOPERATOR;
8010 s = force_word(s,WORD,TRUE,FALSE);
8011 LOOPX(OP_LAST);
8012
8013 case KEY_lc:
8014 UNI(OP_LC);
8015
8016 case KEY_lcfirst:
8017 UNI(OP_LCFIRST);
8018
8019 case KEY_local:
8020 pl_yylval.ival = 0;
8021 OPERATOR(LOCAL);
8022
8023 case KEY_length:
8024 UNI(OP_LENGTH);
8025
8026 case KEY_lt:
8027 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8028 return REPORT(0);
8029 Rop(OP_SLT);
8030
8031 case KEY_le:
8032 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8033 return REPORT(0);
8034 Rop(OP_SLE);
8035
8036 case KEY_localtime:
8037 UNI(OP_LOCALTIME);
8038
8039 case KEY_log:
8040 UNI(OP_LOG);
8041
8042 case KEY_link:
8043 LOP(OP_LINK,XTERM);
8044
8045 case KEY_listen:
8046 LOP(OP_LISTEN,XTERM);
8047
8048 case KEY_lock:
8049 UNI(OP_LOCK);
8050
8051 case KEY_lstat:
8052 UNI(OP_LSTAT);
8053
8054 case KEY_m:
8055 s = scan_pat(s,OP_MATCH);
8056 TERM(sublex_start());
8057
8058 case KEY_map:
8059 LOP(OP_MAPSTART, XREF);
8060
8061 case KEY_mkdir:
8062 LOP(OP_MKDIR,XTERM);
8063
8064 case KEY_msgctl:
8065 LOP(OP_MSGCTL,XTERM);
8066
8067 case KEY_msgget:
8068 LOP(OP_MSGGET,XTERM);
8069
8070 case KEY_msgrcv:
8071 LOP(OP_MSGRCV,XTERM);
8072
8073 case KEY_msgsnd:
8074 LOP(OP_MSGSND,XTERM);
8075
8076 case KEY_our:
8077 case KEY_my:
8078 case KEY_state:
8079 PL_in_my = (U16)tmp;
8080 s = SKIPSPACE1(s);
8081 if (isIDFIRST_lazy_if(s,UTF)) {
8082#ifdef PERL_MAD
8083 char* start = s;
8084#endif
8085 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8086 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8087 {
8088 if (!FEATURE_LEXSUBS_IS_ENABLED)
8089 Perl_croak(aTHX_
8090 "Experimental \"%s\" subs not enabled",
8091 tmp == KEY_my ? "my" :
8092 tmp == KEY_state ? "state" : "our");
8093 Perl_ck_warner_d(aTHX_
8094 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8095 "The lexical_subs feature is experimental");
8096 goto really_sub;
8097 }
8098 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8099 if (!PL_in_my_stash) {
8100 char tmpbuf[1024];
8101 PL_bufptr = s;
8102 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8103 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8104 }
8105#ifdef PERL_MAD
8106 if (PL_madskills) { /* just add type to declarator token */
8107 sv_catsv(PL_thistoken, PL_nextwhite);
8108 PL_nextwhite = 0;
8109 sv_catpvn(PL_thistoken, start, s - start);
8110 }
8111#endif
8112 }
8113 pl_yylval.ival = 1;
8114 OPERATOR(MY);
8115
8116 case KEY_next:
8117 PL_expect = XOPERATOR;
8118 s = force_word(s,WORD,TRUE,FALSE);
8119 LOOPX(OP_NEXT);
8120
8121 case KEY_ne:
8122 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8123 return REPORT(0);
8124 Eop(OP_SNE);
8125
8126 case KEY_no:
8127 s = tokenize_use(0, s);
8128 TERM(USE);
8129
8130 case KEY_not:
8131 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8132 FUN1(OP_NOT);
8133 else {
8134 if (!PL_lex_allbrackets &&
8135 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8136 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8137 OPERATOR(NOTOP);
8138 }
8139
8140 case KEY_open:
8141 s = SKIPSPACE1(s);
8142 if (isIDFIRST_lazy_if(s,UTF)) {
8143 const char *t;
8144 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8145 &len);
8146 for (t=d; isSPACE(*t);)
8147 t++;
8148 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8149 /* [perl #16184] */
8150 && !(t[0] == '=' && t[1] == '>')
8151 && !(t[0] == ':' && t[1] == ':')
8152 && !keyword(s, d-s, 0)
8153 ) {
8154 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8155 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
8156 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8157 "Precedence problem: open %"SVf" should be open(%"SVf")",
8158 SVfARG(tmpsv), SVfARG(tmpsv));
8159 }
8160 }
8161 LOP(OP_OPEN,XTERM);
8162
8163 case KEY_or:
8164 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8165 return REPORT(0);
8166 pl_yylval.ival = OP_OR;
8167 OPERATOR(OROP);
8168
8169 case KEY_ord:
8170 UNI(OP_ORD);
8171
8172 case KEY_oct:
8173 UNI(OP_OCT);
8174
8175 case KEY_opendir:
8176 LOP(OP_OPEN_DIR,XTERM);
8177
8178 case KEY_print:
8179 checkcomma(s,PL_tokenbuf,"filehandle");
8180 LOP(OP_PRINT,XREF);
8181
8182 case KEY_printf:
8183 checkcomma(s,PL_tokenbuf,"filehandle");
8184 LOP(OP_PRTF,XREF);
8185
8186 case KEY_prototype:
8187 UNI(OP_PROTOTYPE);
8188
8189 case KEY_push:
8190 LOP(OP_PUSH,XTERM);
8191
8192 case KEY_pop:
8193 UNIDOR(OP_POP);
8194
8195 case KEY_pos:
8196 UNIDOR(OP_POS);
8197
8198 case KEY_pack:
8199 LOP(OP_PACK,XTERM);
8200
8201 case KEY_package:
8202 s = force_word(s,WORD,FALSE,TRUE);
8203 s = SKIPSPACE1(s);
8204 s = force_strict_version(s);
8205 PL_lex_expect = XBLOCK;
8206 OPERATOR(PACKAGE);
8207
8208 case KEY_pipe:
8209 LOP(OP_PIPE_OP,XTERM);
8210
8211 case KEY_q:
8212 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8213 if (!s)
8214 missingterm(NULL);
8215 pl_yylval.ival = OP_CONST;
8216 TERM(sublex_start());
8217
8218 case KEY_quotemeta:
8219 UNI(OP_QUOTEMETA);
8220
8221 case KEY_qw: {
8222 OP *words = NULL;
8223 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8224 if (!s)
8225 missingterm(NULL);
8226 PL_expect = XOPERATOR;
8227 if (SvCUR(PL_lex_stuff)) {
8228 int warned_comma = !ckWARN(WARN_QW);
8229 int warned_comment = warned_comma;
8230 d = SvPV_force(PL_lex_stuff, len);
8231 while (len) {
8232 for (; isSPACE(*d) && len; --len, ++d)
8233 /**/;
8234 if (len) {
8235 SV *sv;
8236 const char *b = d;
8237 if (!warned_comma || !warned_comment) {
8238 for (; !isSPACE(*d) && len; --len, ++d) {
8239 if (!warned_comma && *d == ',') {
8240 Perl_warner(aTHX_ packWARN(WARN_QW),
8241 "Possible attempt to separate words with commas");
8242 ++warned_comma;
8243 }
8244 else if (!warned_comment && *d == '#') {
8245 Perl_warner(aTHX_ packWARN(WARN_QW),
8246 "Possible attempt to put comments in qw() list");
8247 ++warned_comment;
8248 }
8249 }
8250 }
8251 else {
8252 for (; !isSPACE(*d) && len; --len, ++d)
8253 /**/;
8254 }
8255 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8256 words = op_append_elem(OP_LIST, words,
8257 newSVOP(OP_CONST, 0, tokeq(sv)));
8258 }
8259 }
8260 }
8261 if (!words)
8262 words = newNULLLIST();
8263 if (PL_lex_stuff) {
8264 SvREFCNT_dec(PL_lex_stuff);
8265 PL_lex_stuff = NULL;
8266 }
8267 PL_expect = XOPERATOR;
8268 pl_yylval.opval = sawparens(words);
8269 TOKEN(QWLIST);
8270 }
8271
8272 case KEY_qq:
8273 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8274 if (!s)
8275 missingterm(NULL);
8276 pl_yylval.ival = OP_STRINGIFY;
8277 if (SvIVX(PL_lex_stuff) == '\'')
8278 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8279 TERM(sublex_start());
8280
8281 case KEY_qr:
8282 s = scan_pat(s,OP_QR);
8283 TERM(sublex_start());
8284
8285 case KEY_qx:
8286 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8287 if (!s)
8288 missingterm(NULL);
8289 readpipe_override();
8290 TERM(sublex_start());
8291
8292 case KEY_return:
8293 OLDLOP(OP_RETURN);
8294
8295 case KEY_require:
8296 s = SKIPSPACE1(s);
8297 PL_expect = XOPERATOR;
8298 if (isDIGIT(*s)) {
8299 s = force_version(s, FALSE);
8300 }
8301 else if (*s != 'v' || !isDIGIT(s[1])
8302 || (s = force_version(s, TRUE), *s == 'v'))
8303 {
8304 *PL_tokenbuf = '\0';
8305 s = force_word(s,WORD,TRUE,TRUE);
8306 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8307 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8308 GV_ADD | (UTF ? SVf_UTF8 : 0));
8309 else if (*s == '<')
8310 yyerror("<> should be quotes");
8311 }
8312 if (orig_keyword == KEY_require) {
8313 orig_keyword = 0;
8314 pl_yylval.ival = 1;
8315 }
8316 else
8317 pl_yylval.ival = 0;
8318 PL_expect = XTERM;
8319 PL_bufptr = s;
8320 PL_last_uni = PL_oldbufptr;
8321 PL_last_lop_op = OP_REQUIRE;
8322 s = skipspace(s);
8323 return REPORT( (int)REQUIRE );
8324
8325 case KEY_reset:
8326 UNI(OP_RESET);
8327
8328 case KEY_redo:
8329 PL_expect = XOPERATOR;
8330 s = force_word(s,WORD,TRUE,FALSE);
8331 LOOPX(OP_REDO);
8332
8333 case KEY_rename:
8334 LOP(OP_RENAME,XTERM);
8335
8336 case KEY_rand:
8337 UNI(OP_RAND);
8338
8339 case KEY_rmdir:
8340 UNI(OP_RMDIR);
8341
8342 case KEY_rindex:
8343 LOP(OP_RINDEX,XTERM);
8344
8345 case KEY_read:
8346 LOP(OP_READ,XTERM);
8347
8348 case KEY_readdir:
8349 UNI(OP_READDIR);
8350
8351 case KEY_readline:
8352 UNIDOR(OP_READLINE);
8353
8354 case KEY_readpipe:
8355 UNIDOR(OP_BACKTICK);
8356
8357 case KEY_rewinddir:
8358 UNI(OP_REWINDDIR);
8359
8360 case KEY_recv:
8361 LOP(OP_RECV,XTERM);
8362
8363 case KEY_reverse:
8364 LOP(OP_REVERSE,XTERM);
8365
8366 case KEY_readlink:
8367 UNIDOR(OP_READLINK);
8368
8369 case KEY_ref:
8370 UNI(OP_REF);
8371
8372 case KEY_s:
8373 s = scan_subst(s);
8374 if (pl_yylval.opval)
8375 TERM(sublex_start());
8376 else
8377 TOKEN(1); /* force error */
8378
8379 case KEY_say:
8380 checkcomma(s,PL_tokenbuf,"filehandle");
8381 LOP(OP_SAY,XREF);
8382
8383 case KEY_chomp:
8384 UNI(OP_CHOMP);
8385
8386 case KEY_scalar:
8387 UNI(OP_SCALAR);
8388
8389 case KEY_select:
8390 LOP(OP_SELECT,XTERM);
8391
8392 case KEY_seek:
8393 LOP(OP_SEEK,XTERM);
8394
8395 case KEY_semctl:
8396 LOP(OP_SEMCTL,XTERM);
8397
8398 case KEY_semget:
8399 LOP(OP_SEMGET,XTERM);
8400
8401 case KEY_semop:
8402 LOP(OP_SEMOP,XTERM);
8403
8404 case KEY_send:
8405 LOP(OP_SEND,XTERM);
8406
8407 case KEY_setpgrp:
8408 LOP(OP_SETPGRP,XTERM);
8409
8410 case KEY_setpriority:
8411 LOP(OP_SETPRIORITY,XTERM);
8412
8413 case KEY_sethostent:
8414 UNI(OP_SHOSTENT);
8415
8416 case KEY_setnetent:
8417 UNI(OP_SNETENT);
8418
8419 case KEY_setservent:
8420 UNI(OP_SSERVENT);
8421
8422 case KEY_setprotoent:
8423 UNI(OP_SPROTOENT);
8424
8425 case KEY_setpwent:
8426 FUN0(OP_SPWENT);
8427
8428 case KEY_setgrent:
8429 FUN0(OP_SGRENT);
8430
8431 case KEY_seekdir:
8432 LOP(OP_SEEKDIR,XTERM);
8433
8434 case KEY_setsockopt:
8435 LOP(OP_SSOCKOPT,XTERM);
8436
8437 case KEY_shift:
8438 UNIDOR(OP_SHIFT);
8439
8440 case KEY_shmctl:
8441 LOP(OP_SHMCTL,XTERM);
8442
8443 case KEY_shmget:
8444 LOP(OP_SHMGET,XTERM);
8445
8446 case KEY_shmread:
8447 LOP(OP_SHMREAD,XTERM);
8448
8449 case KEY_shmwrite:
8450 LOP(OP_SHMWRITE,XTERM);
8451
8452 case KEY_shutdown:
8453 LOP(OP_SHUTDOWN,XTERM);
8454
8455 case KEY_sin:
8456 UNI(OP_SIN);
8457
8458 case KEY_sleep:
8459 UNI(OP_SLEEP);
8460
8461 case KEY_socket:
8462 LOP(OP_SOCKET,XTERM);
8463
8464 case KEY_socketpair:
8465 LOP(OP_SOCKPAIR,XTERM);
8466
8467 case KEY_sort:
8468 checkcomma(s,PL_tokenbuf,"subroutine name");
8469 s = SKIPSPACE1(s);
8470 PL_expect = XTERM;
8471 s = force_word(s,WORD,TRUE,TRUE);
8472 LOP(OP_SORT,XREF);
8473
8474 case KEY_split:
8475 LOP(OP_SPLIT,XTERM);
8476
8477 case KEY_sprintf:
8478 LOP(OP_SPRINTF,XTERM);
8479
8480 case KEY_splice:
8481 LOP(OP_SPLICE,XTERM);
8482
8483 case KEY_sqrt:
8484 UNI(OP_SQRT);
8485
8486 case KEY_srand:
8487 UNI(OP_SRAND);
8488
8489 case KEY_stat:
8490 UNI(OP_STAT);
8491
8492 case KEY_study:
8493 UNI(OP_STUDY);
8494
8495 case KEY_substr:
8496 LOP(OP_SUBSTR,XTERM);
8497
8498 case KEY_format:
8499 case KEY_sub:
8500 really_sub:
8501 {
8502 char * const tmpbuf = PL_tokenbuf + 1;
8503 expectation attrful;
8504 bool have_name, have_proto;
8505 const int key = tmp;
8506 SV *format_name = NULL;
8507
8508#ifdef PERL_MAD
8509 SV *tmpwhite = 0;
8510
8511 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8512 SV *subtoken = PL_madskills
8513 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8514 : NULL;
8515 PL_thistoken = 0;
8516
8517 d = s;
8518 s = SKIPSPACE2(s,tmpwhite);
8519#else
8520 d = s;
8521 s = skipspace(s);
8522#endif
8523
8524 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8525 (*s == ':' && s[1] == ':'))
8526 {
8527#ifdef PERL_MAD
8528 SV *nametoke = NULL;
8529#endif
8530
8531 PL_expect = XBLOCK;
8532 attrful = XATTRBLOCK;
8533 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8534 &len);
8535#ifdef PERL_MAD
8536 if (PL_madskills)
8537 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8538#endif
8539 if (key == KEY_format)
8540 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8541 *PL_tokenbuf = '&';
8542 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8543 || pad_findmy_pvn(
8544 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8545 ) != NOT_IN_PAD)
8546 sv_setpvn(PL_subname, tmpbuf, len);
8547 else {
8548 sv_setsv(PL_subname,PL_curstname);
8549 sv_catpvs(PL_subname,"::");
8550 sv_catpvn(PL_subname,tmpbuf,len);
8551 }
8552 if (SvUTF8(PL_linestr))
8553 SvUTF8_on(PL_subname);
8554 have_name = TRUE;
8555
8556
8557#ifdef PERL_MAD
8558 start_force(0);
8559 CURMAD('X', nametoke);
8560 CURMAD('_', tmpwhite);
8561 force_ident_maybe_lex('&');
8562
8563 s = SKIPSPACE2(d,tmpwhite);
8564#else
8565 s = skipspace(d);
8566#endif
8567 }
8568 else {
8569 if (key == KEY_my || key == KEY_our || key==KEY_state)
8570 {
8571 *d = '\0';
8572 /* diag_listed_as: Missing name in "%s sub" */
8573 Perl_croak(aTHX_
8574 "Missing name in \"%s\"", PL_bufptr);
8575 }
8576 PL_expect = XTERMBLOCK;
8577 attrful = XATTRTERM;
8578 sv_setpvs(PL_subname,"?");
8579 have_name = FALSE;
8580 }
8581
8582 if (key == KEY_format) {
8583#ifdef PERL_MAD
8584 PL_thistoken = subtoken;
8585 s = d;
8586#else
8587 if (format_name) {
8588 start_force(PL_curforce);
8589 if (PL_madskills)
8590 curmad('X', newSVpvn(start,s-start));
8591 NEXTVAL_NEXTTOKE.opval
8592 = (OP*)newSVOP(OP_CONST,0, format_name);
8593 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8594 force_next(WORD);
8595 }
8596#endif
8597 PREBLOCK(FORMAT);
8598 }
8599
8600 /* Look for a prototype */
8601 if (*s == '(') {
8602 char *p;
8603 bool bad_proto = FALSE;
8604 bool in_brackets = FALSE;
8605 char greedy_proto = ' ';
8606 bool proto_after_greedy_proto = FALSE;
8607 bool must_be_last = FALSE;
8608 bool underscore = FALSE;
8609 bool seen_underscore = FALSE;
8610 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8611 STRLEN tmplen;
8612
8613 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8614 if (!s)
8615 Perl_croak(aTHX_ "Prototype not terminated");
8616 /* strip spaces and check for bad characters */
8617 d = SvPV(PL_lex_stuff, tmplen);
8618 tmp = 0;
8619 for (p = d; tmplen; tmplen--, ++p) {
8620 if (!isSPACE(*p)) {
8621 d[tmp++] = *p;
8622
8623 if (warnillegalproto) {
8624 if (must_be_last)
8625 proto_after_greedy_proto = TRUE;
8626 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8627 bad_proto = TRUE;
8628 }
8629 else {
8630 if ( underscore ) {
8631 if ( !strchr(";@%", *p) )
8632 bad_proto = TRUE;
8633 underscore = FALSE;
8634 }
8635 if ( *p == '[' ) {
8636 in_brackets = TRUE;
8637 }
8638 else if ( *p == ']' ) {
8639 in_brackets = FALSE;
8640 }
8641 else if ( (*p == '@' || *p == '%') &&
8642 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8643 !in_brackets ) {
8644 must_be_last = TRUE;
8645 greedy_proto = *p;
8646 }
8647 else if ( *p == '_' ) {
8648 underscore = seen_underscore = TRUE;
8649 }
8650 }
8651 }
8652 }
8653 }
8654 d[tmp] = '\0';
8655 if (proto_after_greedy_proto)
8656 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8657 "Prototype after '%c' for %"SVf" : %s",
8658 greedy_proto, SVfARG(PL_subname), d);
8659 if (bad_proto) {
8660 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8661 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8662 "Illegal character %sin prototype for %"SVf" : %s",
8663 seen_underscore ? "after '_' " : "",
8664 SVfARG(PL_subname),
8665 SvUTF8(PL_lex_stuff)
8666 ? sv_uni_display(dsv,
8667 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8668 tmp,
8669 UNI_DISPLAY_ISPRINT)
8670 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8671 PERL_PV_ESCAPE_NONASCII));
8672 }
8673 SvCUR_set(PL_lex_stuff, tmp);
8674 have_proto = TRUE;
8675
8676#ifdef PERL_MAD
8677 start_force(0);
8678 CURMAD('q', PL_thisopen);
8679 CURMAD('_', tmpwhite);
8680 CURMAD('=', PL_thisstuff);
8681 CURMAD('Q', PL_thisclose);
8682 NEXTVAL_NEXTTOKE.opval =
8683 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8684 PL_lex_stuff = NULL;
8685 force_next(THING);
8686
8687 s = SKIPSPACE2(s,tmpwhite);
8688#else
8689 s = skipspace(s);
8690#endif
8691 }
8692 else
8693 have_proto = FALSE;
8694
8695 if (*s == ':' && s[1] != ':')
8696 PL_expect = attrful;
8697 else if (*s != '{' && key == KEY_sub) {
8698 if (!have_name)
8699 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8700 else if (*s != ';' && *s != '}')
8701 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8702 }
8703
8704#ifdef PERL_MAD
8705 start_force(0);
8706 if (tmpwhite) {
8707 if (PL_madskills)
8708 curmad('^', newSVpvs(""));
8709 CURMAD('_', tmpwhite);
8710 }
8711 force_next(0);
8712
8713 PL_thistoken = subtoken;
8714 PERL_UNUSED_VAR(have_proto);
8715#else
8716 if (have_proto) {
8717 NEXTVAL_NEXTTOKE.opval =
8718 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8719 PL_lex_stuff = NULL;
8720 force_next(THING);
8721 }
8722#endif
8723 if (!have_name) {
8724 if (PL_curstash)
8725 sv_setpvs(PL_subname, "__ANON__");
8726 else
8727 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8728 TOKEN(ANONSUB);
8729 }
8730#ifndef PERL_MAD
8731 force_ident_maybe_lex('&');
8732#endif
8733 TOKEN(SUB);
8734 }
8735
8736 case KEY_system:
8737 LOP(OP_SYSTEM,XREF);
8738
8739 case KEY_symlink:
8740 LOP(OP_SYMLINK,XTERM);
8741
8742 case KEY_syscall:
8743 LOP(OP_SYSCALL,XTERM);
8744
8745 case KEY_sysopen:
8746 LOP(OP_SYSOPEN,XTERM);
8747
8748 case KEY_sysseek:
8749 LOP(OP_SYSSEEK,XTERM);
8750
8751 case KEY_sysread:
8752 LOP(OP_SYSREAD,XTERM);
8753
8754 case KEY_syswrite:
8755 LOP(OP_SYSWRITE,XTERM);
8756
8757 case KEY_tr:
8758 case KEY_y:
8759 s = scan_trans(s);
8760 TERM(sublex_start());
8761
8762 case KEY_tell:
8763 UNI(OP_TELL);
8764
8765 case KEY_telldir:
8766 UNI(OP_TELLDIR);
8767
8768 case KEY_tie:
8769 LOP(OP_TIE,XTERM);
8770
8771 case KEY_tied:
8772 UNI(OP_TIED);
8773
8774 case KEY_time:
8775 FUN0(OP_TIME);
8776
8777 case KEY_times:
8778 FUN0(OP_TMS);
8779
8780 case KEY_truncate:
8781 LOP(OP_TRUNCATE,XTERM);
8782
8783 case KEY_uc:
8784 UNI(OP_UC);
8785
8786 case KEY_ucfirst:
8787 UNI(OP_UCFIRST);
8788
8789 case KEY_untie:
8790 UNI(OP_UNTIE);
8791
8792 case KEY_until:
8793 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8794 return REPORT(0);
8795 pl_yylval.ival = CopLINE(PL_curcop);
8796 OPERATOR(UNTIL);
8797
8798 case KEY_unless:
8799 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8800 return REPORT(0);
8801 pl_yylval.ival = CopLINE(PL_curcop);
8802 OPERATOR(UNLESS);
8803
8804 case KEY_unlink:
8805 LOP(OP_UNLINK,XTERM);
8806
8807 case KEY_undef:
8808 UNIDOR(OP_UNDEF);
8809
8810 case KEY_unpack:
8811 LOP(OP_UNPACK,XTERM);
8812
8813 case KEY_utime:
8814 LOP(OP_UTIME,XTERM);
8815
8816 case KEY_umask:
8817 UNIDOR(OP_UMASK);
8818
8819 case KEY_unshift:
8820 LOP(OP_UNSHIFT,XTERM);
8821
8822 case KEY_use:
8823 s = tokenize_use(1, s);
8824 OPERATOR(USE);
8825
8826 case KEY_values:
8827 UNI(OP_VALUES);
8828
8829 case KEY_vec:
8830 LOP(OP_VEC,XTERM);
8831
8832 case KEY_when:
8833 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8834 return REPORT(0);
8835 pl_yylval.ival = CopLINE(PL_curcop);
8836 Perl_ck_warner_d(aTHX_
8837 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8838 "when is experimental");
8839 OPERATOR(WHEN);
8840
8841 case KEY_while:
8842 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8843 return REPORT(0);
8844 pl_yylval.ival = CopLINE(PL_curcop);
8845 OPERATOR(WHILE);
8846
8847 case KEY_warn:
8848 PL_hints |= HINT_BLOCK_SCOPE;
8849 LOP(OP_WARN,XTERM);
8850
8851 case KEY_wait:
8852 FUN0(OP_WAIT);
8853
8854 case KEY_waitpid:
8855 LOP(OP_WAITPID,XTERM);
8856
8857 case KEY_wantarray:
8858 FUN0(OP_WANTARRAY);
8859
8860 case KEY_write:
8861#ifdef EBCDIC
8862 {
8863 char ctl_l[2];
8864 ctl_l[0] = toCTRL('L');
8865 ctl_l[1] = '\0';
8866 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8867 }
8868#else
8869 /* Make sure $^L is defined */
8870 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8871#endif
8872 UNI(OP_ENTERWRITE);
8873
8874 case KEY_x:
8875 if (PL_expect == XOPERATOR) {
8876 if (*s == '=' && !PL_lex_allbrackets &&
8877 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8878 return REPORT(0);
8879 Mop(OP_REPEAT);
8880 }
8881 check_uni();
8882 goto just_a_word;
8883
8884 case KEY_xor:
8885 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8886 return REPORT(0);
8887 pl_yylval.ival = OP_XOR;
8888 OPERATOR(OROP);
8889 }
8890 }}
8891}
8892#ifdef __SC__
8893#pragma segment Main
8894#endif
8895
8896/*
8897 S_pending_ident
8898
8899 Looks up an identifier in the pad or in a package
8900
8901 Returns:
8902 PRIVATEREF if this is a lexical name.
8903 WORD if this belongs to a package.
8904
8905 Structure:
8906 if we're in a my declaration
8907 croak if they tried to say my($foo::bar)
8908 build the ops for a my() declaration
8909 if it's an access to a my() variable
8910 build ops for access to a my() variable
8911 if in a dq string, and they've said @foo and we can't find @foo
8912 warn
8913 build ops for a bareword
8914*/
8915
8916static int
8917S_pending_ident(pTHX)
8918{
8919 dVAR;
8920 PADOFFSET tmp = 0;
8921 const char pit = (char)pl_yylval.ival;
8922 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8923 /* All routes through this function want to know if there is a colon. */
8924 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8925
8926 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8927 "### Pending identifier '%s'\n", PL_tokenbuf); });
8928
8929 /* if we're in a my(), we can't allow dynamics here.
8930 $foo'bar has already been turned into $foo::bar, so
8931 just check for colons.
8932
8933 if it's a legal name, the OP is a PADANY.
8934 */
8935 if (PL_in_my) {
8936 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8937 if (has_colon)
8938 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8939 "variable %s in \"our\"",
8940 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8941 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8942 }
8943 else {
8944 if (has_colon)
8945 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8946 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8947 UTF ? SVf_UTF8 : 0);
8948
8949 pl_yylval.opval = newOP(OP_PADANY, 0);
8950 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8951 UTF ? SVf_UTF8 : 0);
8952 return PRIVATEREF;
8953 }
8954 }
8955
8956 /*
8957 build the ops for accesses to a my() variable.
8958 */
8959
8960 if (!has_colon) {
8961 if (!PL_in_my)
8962 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8963 UTF ? SVf_UTF8 : 0);
8964 if (tmp != NOT_IN_PAD) {
8965 /* might be an "our" variable" */
8966 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8967 /* build ops for a bareword */
8968 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8969 HEK * const stashname = HvNAME_HEK(stash);
8970 SV * const sym = newSVhek(stashname);
8971 sv_catpvs(sym, "::");
8972 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8973 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8974 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8975 if (pit != '&')
8976 gv_fetchsv(sym,
8977 (PL_in_eval
8978 ? (GV_ADDMULTI | GV_ADDINEVAL)
8979 : GV_ADDMULTI
8980 ),
8981 ((PL_tokenbuf[0] == '$') ? SVt_PV
8982 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8983 : SVt_PVHV));
8984 return WORD;
8985 }
8986
8987 pl_yylval.opval = newOP(OP_PADANY, 0);
8988 pl_yylval.opval->op_targ = tmp;
8989 return PRIVATEREF;
8990 }
8991 }
8992
8993 /*
8994 Whine if they've said @foo in a doublequoted string,
8995 and @foo isn't a variable we can find in the symbol
8996 table.
8997 */
8998 if (ckWARN(WARN_AMBIGUOUS) &&
8999 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9000 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9001 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9002 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9003 /* DO NOT warn for @- and @+ */
9004 && !( PL_tokenbuf[2] == '\0' &&
9005 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9006 )
9007 {
9008 /* Downgraded from fatal to warning 20000522 mjd */
9009 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9010 "Possible unintended interpolation of %"SVf" in string",
9011 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
9012 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
9013 }
9014 }
9015
9016 /* build ops for a bareword */
9017 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9018 newSVpvn_flags(PL_tokenbuf + 1,
9019 tokenbuf_len - 1,
9020 UTF ? SVf_UTF8 : 0 ));
9021 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9022 if (pit != '&')
9023 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9024 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9025 | ( UTF ? SVf_UTF8 : 0 ),
9026 ((PL_tokenbuf[0] == '$') ? SVt_PV
9027 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9028 : SVt_PVHV));
9029 return WORD;
9030}
9031
9032STATIC void
9033S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9034{
9035 dVAR;
9036
9037 PERL_ARGS_ASSERT_CHECKCOMMA;
9038
9039 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9040 if (ckWARN(WARN_SYNTAX)) {
9041 int level = 1;
9042 const char *w;
9043 for (w = s+2; *w && level; w++) {
9044 if (*w == '(')
9045 ++level;
9046 else if (*w == ')')
9047 --level;
9048 }
9049 while (isSPACE(*w))
9050 ++w;
9051 /* the list of chars below is for end of statements or
9052 * block / parens, boolean operators (&&, ||, //) and branch
9053 * constructs (or, and, if, until, unless, while, err, for).
9054 * Not a very solid hack... */
9055 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9056 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9057 "%s (...) interpreted as function",name);
9058 }
9059 }
9060 while (s < PL_bufend && isSPACE(*s))
9061 s++;
9062 if (*s == '(')
9063 s++;
9064 while (s < PL_bufend && isSPACE(*s))
9065 s++;
9066 if (isIDFIRST_lazy_if(s,UTF)) {
9067 const char * const w = s;
9068 s += UTF ? UTF8SKIP(s) : 1;
9069 while (isWORDCHAR_lazy_if(s,UTF))
9070 s += UTF ? UTF8SKIP(s) : 1;
9071 while (s < PL_bufend && isSPACE(*s))
9072 s++;
9073 if (*s == ',') {
9074 GV* gv;
9075 if (keyword(w, s - w, 0))
9076 return;
9077
9078 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9079 if (gv && GvCVu(gv))
9080 return;
9081 Perl_croak(aTHX_ "No comma allowed after %s", what);
9082 }
9083 }
9084}
9085
9086/* S_new_constant(): do any overload::constant lookup.
9087
9088 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9089 Best used as sv=new_constant(..., sv, ...).
9090 If s, pv are NULL, calls subroutine with one argument,
9091 and <type> is used with error messages only.
9092 <type> is assumed to be well formed UTF-8 */
9093
9094STATIC SV *
9095S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9096 SV *sv, SV *pv, const char *type, STRLEN typelen)
9097{
9098 dVAR; dSP;
9099 HV * table = GvHV(PL_hintgv); /* ^H */
9100 SV *res;
9101 SV *errsv = NULL;
9102 SV **cvp;
9103 SV *cv, *typesv;
9104 const char *why1 = "", *why2 = "", *why3 = "";
9105
9106 PERL_ARGS_ASSERT_NEW_CONSTANT;
9107 /* We assume that this is true: */
9108 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9109 assert(type || s);
9110
9111 /* charnames doesn't work well if there have been errors found */
9112 if (PL_error_count > 0 && *key == 'c')
9113 {
9114 SvREFCNT_dec_NN(sv);
9115 return &PL_sv_undef;
9116 }
9117
9118 sv_2mortal(sv); /* Parent created it permanently */
9119 if (!table
9120 || ! (PL_hints & HINT_LOCALIZE_HH)
9121 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9122 || ! SvOK(*cvp))
9123 {
9124 char *msg;
9125
9126 /* Here haven't found what we're looking for. If it is charnames,
9127 * perhaps it needs to be loaded. Try doing that before giving up */
9128 if (*key == 'c') {
9129 Perl_load_module(aTHX_
9130 0,
9131 newSVpvs("_charnames"),
9132 /* version parameter; no need to specify it, as if
9133 * we get too early a version, will fail anyway,
9134 * not being able to find '_charnames' */
9135 NULL,
9136 newSVpvs(":full"),
9137 newSVpvs(":short"),
9138 NULL);
9139 SPAGAIN;
9140 table = GvHV(PL_hintgv);
9141 if (table
9142 && (PL_hints & HINT_LOCALIZE_HH)
9143 && (cvp = hv_fetch(table, key, keylen, FALSE))
9144 && SvOK(*cvp))
9145 {
9146 goto now_ok;
9147 }
9148 }
9149 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9150 msg = Perl_form(aTHX_
9151 "Constant(%.*s) unknown",
9152 (int)(type ? typelen : len),
9153 (type ? type: s));
9154 }
9155 else {
9156 why1 = "$^H{";
9157 why2 = key;
9158 why3 = "} is not defined";
9159 report:
9160 if (*key == 'c') {
9161 msg = Perl_form(aTHX_
9162 /* The +3 is for '\N{'; -4 for that, plus '}' */
9163 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9164 );
9165 }
9166 else {
9167 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9168 (int)(type ? typelen : len),
9169 (type ? type: s), why1, why2, why3);
9170 }
9171 }
9172 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9173 return SvREFCNT_inc_simple_NN(sv);
9174 }
9175now_ok:
9176 cv = *cvp;
9177 if (!pv && s)
9178 pv = newSVpvn_flags(s, len, SVs_TEMP);
9179 if (type && pv)
9180 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9181 else
9182 typesv = &PL_sv_undef;
9183
9184 PUSHSTACKi(PERLSI_OVERLOAD);
9185 ENTER ;
9186 SAVETMPS;
9187
9188 PUSHMARK(SP) ;
9189 EXTEND(sp, 3);
9190 if (pv)
9191 PUSHs(pv);
9192 PUSHs(sv);
9193 if (pv)
9194 PUSHs(typesv);
9195 PUTBACK;
9196 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9197
9198 SPAGAIN ;
9199
9200 /* Check the eval first */
9201 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9202 STRLEN errlen;
9203 const char * errstr;
9204 sv_catpvs(errsv, "Propagated");
9205 errstr = SvPV_const(errsv, errlen);
9206 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9207 (void)POPs;
9208 res = SvREFCNT_inc_simple_NN(sv);
9209 }
9210 else {
9211 res = POPs;
9212 SvREFCNT_inc_simple_void_NN(res);
9213 }
9214
9215 PUTBACK ;
9216 FREETMPS ;
9217 LEAVE ;
9218 POPSTACK;
9219
9220 if (!SvOK(res)) {
9221 why1 = "Call to &{$^H{";
9222 why2 = key;
9223 why3 = "}} did not return a defined value";
9224 sv = res;
9225 (void)sv_2mortal(sv);
9226 goto report;
9227 }
9228
9229 return res;
9230}
9231
9232PERL_STATIC_INLINE void
9233S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9234 dVAR;
9235 PERL_ARGS_ASSERT_PARSE_IDENT;
9236
9237 for (;;) {
9238 if (*d >= e)
9239 Perl_croak(aTHX_ "%s", ident_too_long);
9240 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9241 /* The UTF-8 case must come first, otherwise things
9242 * like c\N{COMBINING TILDE} would start failing, as the
9243 * isWORDCHAR_A case below would gobble the 'c' up.
9244 */
9245
9246 char *t = *s + UTF8SKIP(*s);
9247 while (isIDCONT_utf8((U8*)t))
9248 t += UTF8SKIP(t);
9249 if (*d + (t - *s) > e)
9250 Perl_croak(aTHX_ "%s", ident_too_long);
9251 Copy(*s, *d, t - *s, char);
9252 *d += t - *s;
9253 *s = t;
9254 }
9255 else if ( isWORDCHAR_A(**s) ) {
9256 do {
9257 *(*d)++ = *(*s)++;
9258 } while isWORDCHAR_A(**s);
9259 }
9260 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9261 *(*d)++ = ':';
9262 *(*d)++ = ':';
9263 (*s)++;
9264 }
9265 else if (allow_package && **s == ':' && (*s)[1] == ':'
9266 /* Disallow things like Foo::$bar. For the curious, this is
9267 * the code path that triggers the "Bad name after" warning
9268 * when looking for barewords.
9269 */
9270 && (*s)[2] != '$') {
9271 *(*d)++ = *(*s)++;
9272 *(*d)++ = *(*s)++;
9273 }
9274 else
9275 break;
9276 }
9277 return;
9278}
9279
9280/* Returns a NUL terminated string, with the length of the string written to
9281 *slp
9282 */
9283STATIC char *
9284S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9285{
9286 dVAR;
9287 char *d = dest;
9288 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9289 bool is_utf8 = cBOOL(UTF);
9290
9291 PERL_ARGS_ASSERT_SCAN_WORD;
9292
9293 parse_ident(&s, &d, e, allow_package, is_utf8);
9294 *d = '\0';
9295 *slp = d - dest;
9296 return s;
9297}
9298
9299STATIC char *
9300S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9301{
9302 dVAR;
9303 char *bracket = NULL;
9304 char funny = *s++;
9305 char *d = dest;
9306 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9307 bool is_utf8 = cBOOL(UTF);
9308
9309 PERL_ARGS_ASSERT_SCAN_IDENT;
9310
9311 if (isSPACE(*s))
9312 s = PEEKSPACE(s);
9313 if (isDIGIT(*s)) {
9314 while (isDIGIT(*s)) {
9315 if (d >= e)
9316 Perl_croak(aTHX_ "%s", ident_too_long);
9317 *d++ = *s++;
9318 }
9319 }
9320 else {
9321 parse_ident(&s, &d, e, 1, is_utf8);
9322 }
9323 *d = '\0';
9324 d = dest;
9325 if (*d) {
9326 /* Either a digit variable, or parse_ident() found an identifier
9327 (anything valid as a bareword), so job done and return. */
9328 if (PL_lex_state != LEX_NORMAL)
9329 PL_lex_state = LEX_INTERPENDMAYBE;
9330 return s;
9331 }
9332 if (*s == '$' && s[1] &&
9333 (isIDFIRST_lazy_if(s+1,is_utf8)
9334 || isDIGIT_A((U8)s[1])
9335 || s[1] == '$'
9336 || s[1] == '{'
9337 || strnEQ(s+1,"::",2)) )
9338 {
9339 /* Dereferencing a value in a scalar variable.
9340 The alternatives are different syntaxes for a scalar variable.
9341 Using ' as a leading package separator isn't allowed. :: is. */
9342 return s;
9343 }
9344 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9345 if (*s == '{') {
9346 bracket = s;
9347 s++;
9348 while (s < send && SPACE_OR_TAB(*s))
9349 s++;
9350 }
9351
9352#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9353 || isCNTRL_A((U8)(d)) \
9354 || isDIGIT_A((U8)(d)) \
9355 || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
9356 if (s < send
9357 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9358 {
9359 if (is_utf8) {
9360 const STRLEN skip = UTF8SKIP(s);
9361 STRLEN i;
9362 d[skip] = '\0';
9363 for ( i = 0; i < skip; i++ )
9364 d[i] = *s++;
9365 }
9366 else {
9367 *d = *s++;
9368 d[1] = '\0';
9369 }
9370 }
9371 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9372 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9373 *d = toCTRL(*s);
9374 s++;
9375 }
9376 /* Warn about ambiguous code after unary operators if {...} notation isn't
9377 used. There's no difference in ambiguity; it's merely a heuristic
9378 about when not to warn. */
9379 else if (ck_uni && !bracket)
9380 check_uni();
9381 if (bracket) {
9382 /* If we were processing {...} notation then... */
9383 if (isIDFIRST_lazy_if(d,is_utf8)) {
9384 /* if it starts as a valid identifier, assume that it is one.
9385 (the later check for } being at the expected point will trap
9386 cases where this doesn't pan out.) */
9387 d += is_utf8 ? UTF8SKIP(d) : 1;
9388 parse_ident(&s, &d, e, 1, is_utf8);
9389 *d = '\0';
9390 while (s < send && SPACE_OR_TAB(*s))
9391 s++;
9392 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9393 /* ${foo[0]} and ${foo{bar}} notation. */
9394 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9395 const char * const brack =
9396 (const char *)
9397 ((*s == '[') ? "[...]" : "{...}");
9398 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9399 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9400 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9401 funny, dest, brack, funny, dest, brack);
9402 }
9403 bracket++;
9404 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9405 PL_lex_allbrackets++;
9406 return s;
9407 }
9408 }
9409 /* Handle extended ${^Foo} variables
9410 * 1999-02-27 mjd-perl-patch@plover.com */
9411 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9412 && isWORDCHAR(*s))
9413 {
9414 d++;
9415 while (isWORDCHAR(*s) && d < e) {
9416 *d++ = *s++;
9417 }
9418 if (d >= e)
9419 Perl_croak(aTHX_ "%s", ident_too_long);
9420 *d = '\0';
9421 }
9422
9423 while (s < send && SPACE_OR_TAB(*s))
9424 s++;
9425
9426 /* Expect to find a closing } after consuming any trailing whitespace.
9427 */
9428 if (*s == '}') {
9429 s++;
9430 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9431 PL_lex_state = LEX_INTERPEND;
9432 PL_expect = XREF;
9433 }
9434 if (PL_lex_state == LEX_NORMAL) {
9435 if (ckWARN(WARN_AMBIGUOUS) &&
9436 (keyword(dest, d - dest, 0)
9437 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9438 {
9439 SV *tmp = newSVpvn_flags( dest, d - dest,
9440 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9441 if (funny == '#')
9442 funny = '@';
9443 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9444 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9445 funny, tmp, funny, tmp);
9446 }
9447 }
9448 }
9449 else {
9450 /* Didn't find the closing } at the point we expected, so restore
9451 state such that the next thing to process is the opening { and */
9452 s = bracket; /* let the parser handle it */
9453 *dest = '\0';
9454 }
9455 }
9456 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9457 PL_lex_state = LEX_INTERPEND;
9458 return s;
9459}
9460
9461static bool
9462S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9463
9464 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9465 * the parse starting at 's', based on the subset that are valid in this
9466 * context input to this routine in 'valid_flags'. Advances s. Returns
9467 * TRUE if the input should be treated as a valid flag, so the next char
9468 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9469 * first call on the current regex. This routine will set it to any
9470 * charset modifier found. The caller shouldn't change it. This way,
9471 * another charset modifier encountered in the parse can be detected as an
9472 * error, as we have decided to allow only one */
9473
9474 const char c = **s;
9475 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9476
9477 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9478 if (isWORDCHAR_lazy_if(*s, UTF)) {
9479 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9480 UTF ? SVf_UTF8 : 0);
9481 (*s) += charlen;
9482 /* Pretend that it worked, so will continue processing before
9483 * dieing */
9484 return TRUE;
9485 }
9486 return FALSE;
9487 }
9488
9489 switch (c) {
9490
9491 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9492 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9493 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9494 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9495 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9496 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9497 case LOCALE_PAT_MOD:
9498 if (*charset) {
9499 goto multiple_charsets;
9500 }
9501 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9502 *charset = c;
9503 break;
9504 case UNICODE_PAT_MOD:
9505 if (*charset) {
9506 goto multiple_charsets;
9507 }
9508 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9509 *charset = c;
9510 break;
9511 case ASCII_RESTRICT_PAT_MOD:
9512 if (! *charset) {
9513 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9514 }
9515 else {
9516
9517 /* Error if previous modifier wasn't an 'a', but if it was, see
9518 * if, and accept, a second occurrence (only) */
9519 if (*charset != 'a'
9520 || get_regex_charset(*pmfl)
9521 != REGEX_ASCII_RESTRICTED_CHARSET)
9522 {
9523 goto multiple_charsets;
9524 }
9525 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9526 }
9527 *charset = c;
9528 break;
9529 case DEPENDS_PAT_MOD:
9530 if (*charset) {
9531 goto multiple_charsets;
9532 }
9533 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9534 *charset = c;
9535 break;
9536 }
9537
9538 (*s)++;
9539 return TRUE;
9540
9541 multiple_charsets:
9542 if (*charset != c) {
9543 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9544 }
9545 else if (c == 'a') {
9546 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9547 }
9548 else {
9549 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9550 }
9551
9552 /* Pretend that it worked, so will continue processing before dieing */
9553 (*s)++;
9554 return TRUE;
9555}
9556
9557STATIC char *
9558S_scan_pat(pTHX_ char *start, I32 type)
9559{
9560 dVAR;
9561 PMOP *pm;
9562 char *s;
9563 const char * const valid_flags =
9564 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9565 char charset = '\0'; /* character set modifier */
9566#ifdef PERL_MAD
9567 char *modstart;
9568#endif
9569
9570 PERL_ARGS_ASSERT_SCAN_PAT;
9571
9572 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9573 TRUE /* look for escaped bracketed metas */ );
9574
9575 if (!s) {
9576 const char * const delimiter = skipspace(start);
9577 Perl_croak(aTHX_
9578 (const char *)
9579 (*delimiter == '?'
9580 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9581 : "Search pattern not terminated" ));
9582 }
9583
9584 pm = (PMOP*)newPMOP(type, 0);
9585 if (PL_multi_open == '?') {
9586 /* This is the only point in the code that sets PMf_ONCE: */
9587 pm->op_pmflags |= PMf_ONCE;
9588
9589 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9590 allows us to restrict the list needed by reset to just the ??
9591 matches. */
9592 assert(type != OP_TRANS);
9593 if (PL_curstash) {
9594 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9595 U32 elements;
9596 if (!mg) {
9597 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9598 0);
9599 }
9600 elements = mg->mg_len / sizeof(PMOP**);
9601 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9602 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9603 mg->mg_len = elements * sizeof(PMOP**);
9604 PmopSTASH_set(pm,PL_curstash);
9605 }
9606 }
9607#ifdef PERL_MAD
9608 modstart = s;
9609#endif
9610
9611 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9612 * anon CV. False positives like qr/[(?{]/ are harmless */
9613
9614 if (type == OP_QR) {
9615 STRLEN len;
9616 char *e, *p = SvPV(PL_lex_stuff, len);
9617 e = p + len;
9618 for (; p < e; p++) {
9619 if (p[0] == '(' && p[1] == '?'
9620 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9621 {
9622 pm->op_pmflags |= PMf_HAS_CV;
9623 break;
9624 }
9625 }
9626 pm->op_pmflags |= PMf_IS_QR;
9627 }
9628
9629 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9630#ifdef PERL_MAD
9631 if (PL_madskills && modstart != s) {
9632 SV* tmptoken = newSVpvn(modstart, s - modstart);
9633 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9634 }
9635#endif
9636 /* issue a warning if /c is specified,but /g is not */
9637 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9638 {
9639 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9640 "Use of /c modifier is meaningless without /g" );
9641 }
9642
9643 PL_lex_op = (OP*)pm;
9644 pl_yylval.ival = OP_MATCH;
9645 return s;
9646}
9647
9648STATIC char *
9649S_scan_subst(pTHX_ char *start)
9650{
9651 dVAR;
9652 char *s;
9653 PMOP *pm;
9654 I32 first_start;
9655 I32 es = 0;
9656 char charset = '\0'; /* character set modifier */
9657#ifdef PERL_MAD
9658 char *modstart;
9659#endif
9660
9661 PERL_ARGS_ASSERT_SCAN_SUBST;
9662
9663 pl_yylval.ival = OP_NULL;
9664
9665 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9666 TRUE /* look for escaped bracketed metas */ );
9667
9668 if (!s)
9669 Perl_croak(aTHX_ "Substitution pattern not terminated");
9670
9671 if (s[-1] == PL_multi_open)
9672 s--;
9673#ifdef PERL_MAD
9674 if (PL_madskills) {
9675 CURMAD('q', PL_thisopen);
9676 CURMAD('_', PL_thiswhite);
9677 CURMAD('E', PL_thisstuff);
9678 CURMAD('Q', PL_thisclose);
9679 PL_realtokenstart = s - SvPVX(PL_linestr);
9680 }
9681#endif
9682
9683 first_start = PL_multi_start;
9684 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9685 if (!s) {
9686 if (PL_lex_stuff) {
9687 SvREFCNT_dec(PL_lex_stuff);
9688 PL_lex_stuff = NULL;
9689 }
9690 Perl_croak(aTHX_ "Substitution replacement not terminated");
9691 }
9692 PL_multi_start = first_start; /* so whole substitution is taken together */
9693
9694 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9695
9696#ifdef PERL_MAD
9697 if (PL_madskills) {
9698 CURMAD('z', PL_thisopen);
9699 CURMAD('R', PL_thisstuff);
9700 CURMAD('Z', PL_thisclose);
9701 }
9702 modstart = s;
9703#endif
9704
9705 while (*s) {
9706 if (*s == EXEC_PAT_MOD) {
9707 s++;
9708 es++;
9709 }
9710 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9711 {
9712 break;
9713 }
9714 }
9715
9716#ifdef PERL_MAD
9717 if (PL_madskills) {
9718 if (modstart != s)
9719 curmad('m', newSVpvn(modstart, s - modstart));
9720 append_madprops(PL_thismad, (OP*)pm, 0);
9721 PL_thismad = 0;
9722 }
9723#endif
9724 if ((pm->op_pmflags & PMf_CONTINUE)) {
9725 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9726 }
9727
9728 if (es) {
9729 SV * const repl = newSVpvs("");
9730
9731 PL_multi_end = 0;
9732 pm->op_pmflags |= PMf_EVAL;
9733 while (es-- > 0) {
9734 if (es)
9735 sv_catpvs(repl, "eval ");
9736 else
9737 sv_catpvs(repl, "do ");
9738 }
9739 sv_catpvs(repl, "{");
9740 sv_catsv(repl, PL_sublex_info.repl);
9741 sv_catpvs(repl, "}");
9742 SvEVALED_on(repl);
9743 SvREFCNT_dec(PL_sublex_info.repl);
9744 PL_sublex_info.repl = repl;
9745 }
9746
9747 PL_lex_op = (OP*)pm;
9748 pl_yylval.ival = OP_SUBST;
9749 return s;
9750}
9751
9752STATIC char *
9753S_scan_trans(pTHX_ char *start)
9754{
9755 dVAR;
9756 char* s;
9757 OP *o;
9758 U8 squash;
9759 U8 del;
9760 U8 complement;
9761 bool nondestruct = 0;
9762#ifdef PERL_MAD
9763 char *modstart;
9764#endif
9765
9766 PERL_ARGS_ASSERT_SCAN_TRANS;
9767
9768 pl_yylval.ival = OP_NULL;
9769
9770 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9771 if (!s)
9772 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9773
9774 if (s[-1] == PL_multi_open)
9775 s--;
9776#ifdef PERL_MAD
9777 if (PL_madskills) {
9778 CURMAD('q', PL_thisopen);
9779 CURMAD('_', PL_thiswhite);
9780 CURMAD('E', PL_thisstuff);
9781 CURMAD('Q', PL_thisclose);
9782 PL_realtokenstart = s - SvPVX(PL_linestr);
9783 }
9784#endif
9785
9786 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9787 if (!s) {
9788 if (PL_lex_stuff) {
9789 SvREFCNT_dec(PL_lex_stuff);
9790 PL_lex_stuff = NULL;
9791 }
9792 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9793 }
9794 if (PL_madskills) {
9795 CURMAD('z', PL_thisopen);
9796 CURMAD('R', PL_thisstuff);
9797 CURMAD('Z', PL_thisclose);
9798 }
9799
9800 complement = del = squash = 0;
9801#ifdef PERL_MAD
9802 modstart = s;
9803#endif
9804 while (1) {
9805 switch (*s) {
9806 case 'c':
9807 complement = OPpTRANS_COMPLEMENT;
9808 break;
9809 case 'd':
9810 del = OPpTRANS_DELETE;
9811 break;
9812 case 's':
9813 squash = OPpTRANS_SQUASH;
9814 break;
9815 case 'r':
9816 nondestruct = 1;
9817 break;
9818 default:
9819 goto no_more;
9820 }
9821 s++;
9822 }
9823 no_more:
9824
9825 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9826 o->op_private &= ~OPpTRANS_ALL;
9827 o->op_private |= del|squash|complement|
9828 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9829 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9830
9831 PL_lex_op = o;
9832 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9833
9834#ifdef PERL_MAD
9835 if (PL_madskills) {
9836 if (modstart != s)
9837 curmad('m', newSVpvn(modstart, s - modstart));
9838 append_madprops(PL_thismad, o, 0);
9839 PL_thismad = 0;
9840 }
9841#endif
9842
9843 return s;
9844}
9845
9846/* scan_heredoc
9847 Takes a pointer to the first < in <<FOO.
9848 Returns a pointer to the byte following <<FOO.
9849
9850 This function scans a heredoc, which involves different methods
9851 depending on whether we are in a string eval, quoted construct, etc.
9852 This is because PL_linestr could containing a single line of input, or
9853 a whole string being evalled, or the contents of the current quote-
9854 like operator.
9855
9856 The two basic methods are:
9857 - Steal lines from the input stream
9858 - Scan the heredoc in PL_linestr and remove it therefrom
9859
9860 In a file scope or filtered eval, the first method is used; in a
9861 string eval, the second.
9862
9863 In a quote-like operator, we have to choose between the two,
9864 depending on where we can find a newline. We peek into outer lex-
9865 ing scopes until we find one with a newline in it. If we reach the
9866 outermost lexing scope and it is a file, we use the stream method.
9867 Otherwise it is treated as an eval.
9868*/
9869
9870STATIC char *
9871S_scan_heredoc(pTHX_ char *s)
9872{
9873 dVAR;
9874 I32 op_type = OP_SCALAR;
9875 I32 len;
9876 SV *tmpstr;
9877 char term;
9878 char *d;
9879 char *e;
9880 char *peek;
9881 const bool infile = PL_rsfp || PL_parser->filtered;
9882 LEXSHARED *shared = PL_parser->lex_shared;
9883#ifdef PERL_MAD
9884 I32 stuffstart = s - SvPVX(PL_linestr);
9885 char *tstart;
9886
9887 PL_realtokenstart = -1;
9888#endif
9889
9890 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9891
9892 s += 2;
9893 d = PL_tokenbuf + 1;
9894 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9895 *PL_tokenbuf = '\n';
9896 peek = s;
9897 while (SPACE_OR_TAB(*peek))
9898 peek++;
9899 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9900 s = peek;
9901 term = *s++;
9902 s = delimcpy(d, e, s, PL_bufend, term, &len);
9903 if (s == PL_bufend)
9904 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9905 d += len;
9906 s++;
9907 }
9908 else {
9909 if (*s == '\\')
9910 /* <<\FOO is equivalent to <<'FOO' */
9911 s++, term = '\'';
9912 else
9913 term = '"';
9914 if (!isWORDCHAR_lazy_if(s,UTF))
9915 deprecate("bare << to mean <<\"\"");
9916 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9917 if (d < e)
9918 *d++ = *s;
9919 }
9920 }
9921 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9922 Perl_croak(aTHX_ "Delimiter for here document is too long");
9923 *d++ = '\n';
9924 *d = '\0';
9925 len = d - PL_tokenbuf;
9926
9927#ifdef PERL_MAD
9928 if (PL_madskills) {
9929 tstart = PL_tokenbuf + 1;
9930 PL_thisclose = newSVpvn(tstart, len - 1);
9931 tstart = SvPVX(PL_linestr) + stuffstart;
9932 PL_thisopen = newSVpvn(tstart, s - tstart);
9933 stuffstart = s - SvPVX(PL_linestr);
9934 }
9935#endif
9936#ifndef PERL_STRICT_CR
9937 d = strchr(s, '\r');
9938 if (d) {
9939 char * const olds = s;
9940 s = d;
9941 while (s < PL_bufend) {
9942 if (*s == '\r') {
9943 *d++ = '\n';
9944 if (*++s == '\n')
9945 s++;
9946 }
9947 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9948 *d++ = *s++;
9949 s++;
9950 }
9951 else
9952 *d++ = *s++;
9953 }
9954 *d = '\0';
9955 PL_bufend = d;
9956 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9957 s = olds;
9958 }
9959#endif
9960#ifdef PERL_MAD
9961 if (PL_madskills) {
9962 tstart = SvPVX(PL_linestr) + stuffstart;
9963 if (PL_thisstuff)
9964 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9965 else
9966 PL_thisstuff = newSVpvn(tstart, s - tstart);
9967 }
9968
9969 stuffstart = s - SvPVX(PL_linestr);
9970#endif
9971
9972 tmpstr = newSV_type(SVt_PVIV);
9973 SvGROW(tmpstr, 80);
9974 if (term == '\'') {
9975 op_type = OP_CONST;
9976 SvIV_set(tmpstr, -1);
9977 }
9978 else if (term == '`') {
9979 op_type = OP_BACKTICK;
9980 SvIV_set(tmpstr, '\\');
9981 }
9982
9983 PL_multi_start = CopLINE(PL_curcop) + 1;
9984 PL_multi_open = PL_multi_close = '<';
9985 /* inside a string eval or quote-like operator */
9986 if (!infile || PL_lex_inwhat) {
9987 SV *linestr;
9988 char *bufend;
9989 char * const olds = s;
9990 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9991 /* These two fields are not set until an inner lexing scope is
9992 entered. But we need them set here. */
9993 shared->ls_bufptr = s;
9994 shared->ls_linestr = PL_linestr;
9995 if (PL_lex_inwhat)
9996 /* Look for a newline. If the current buffer does not have one,
9997 peek into the line buffer of the parent lexing scope, going
9998 up as many levels as necessary to find one with a newline
9999 after bufptr.
10000 */
10001 while (!(s = (char *)memchr(
10002 (void *)shared->ls_bufptr, '\n',
10003 SvEND(shared->ls_linestr)-shared->ls_bufptr
10004 ))) {
10005 shared = shared->ls_prev;
10006 /* shared is only null if we have gone beyond the outermost
10007 lexing scope. In a file, we will have broken out of the
10008 loop in the previous iteration. In an eval, the string buf-
10009 fer ends with "\n;", so the while condition below will have
10010 evaluated to false. So shared can never be null. */
10011 assert(shared);
10012 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10013 most lexing scope. In a file, shared->ls_linestr at that
10014 level is just one line, so there is no body to steal. */
10015 if (infile && !shared->ls_prev) {
10016 s = olds;
10017 goto streaming;
10018 }
10019 }
10020 else { /* eval */
10021 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10022 assert(s);
10023 }
10024 linestr = shared->ls_linestr;
10025 bufend = SvEND(linestr);
10026 d = s;
10027 while (s < bufend - len + 1 &&
10028 memNE(s,PL_tokenbuf,len) ) {
10029 if (*s++ == '\n')
10030 ++shared->herelines;
10031 }
10032 if (s >= bufend - len + 1) {
10033 goto interminable;
10034 }
10035 sv_setpvn(tmpstr,d+1,s-d);
10036#ifdef PERL_MAD
10037 if (PL_madskills) {
10038 if (PL_thisstuff)
10039 sv_catpvn(PL_thisstuff, d + 1, s - d);
10040 else
10041 PL_thisstuff = newSVpvn(d + 1, s - d);
10042 stuffstart = s - SvPVX(PL_linestr);
10043 }
10044#endif
10045 s += len - 1;
10046 /* the preceding stmt passes a newline */
10047 shared->herelines++;
10048
10049 /* s now points to the newline after the heredoc terminator.
10050 d points to the newline before the body of the heredoc.
10051 */
10052
10053 /* We are going to modify linestr in place here, so set
10054 aside copies of the string if necessary for re-evals or
10055 (caller $n)[6]. */
10056 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10057 check shared->re_eval_str. */
10058 if (shared->re_eval_start || shared->re_eval_str) {
10059 /* Set aside the rest of the regexp */
10060 if (!shared->re_eval_str)
10061 shared->re_eval_str =
10062 newSVpvn(shared->re_eval_start,
10063 bufend - shared->re_eval_start);
10064 shared->re_eval_start -= s-d;
10065 }
10066 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10067 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10068 cx->blk_eval.cur_text == linestr)
10069 {
10070 cx->blk_eval.cur_text = newSVsv(linestr);
10071 SvSCREAM_on(cx->blk_eval.cur_text);
10072 }
10073 /* Copy everything from s onwards back to d. */
10074 Move(s,d,bufend-s + 1,char);
10075 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10076 /* Setting PL_bufend only applies when we have not dug deeper
10077 into other scopes, because sublex_done sets PL_bufend to
10078 SvEND(PL_linestr). */
10079 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10080 s = olds;
10081 }
10082 else
10083 {
10084 SV *linestr_save;
10085 streaming:
10086 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10087 term = PL_tokenbuf[1];
10088 len--;
10089 linestr_save = PL_linestr; /* must restore this afterwards */
10090 d = s; /* and this */
10091 PL_linestr = newSVpvs("");
10092 PL_bufend = SvPVX(PL_linestr);
10093 while (1) {
10094#ifdef PERL_MAD
10095 if (PL_madskills) {
10096 tstart = SvPVX(PL_linestr) + stuffstart;
10097 if (PL_thisstuff)
10098 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10099 else
10100 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10101 }
10102#endif
10103 PL_bufptr = PL_bufend;
10104 CopLINE_set(PL_curcop,
10105 PL_multi_start + shared->herelines);
10106 if (!lex_next_chunk(LEX_NO_TERM)
10107 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10108 SvREFCNT_dec(linestr_save);
10109 goto interminable;
10110 }
10111 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10112 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10113 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10114 sv_catpvs(PL_linestr, "\n\0");
10115 }
10116 s = PL_bufptr;
10117#ifdef PERL_MAD
10118 stuffstart = s - SvPVX(PL_linestr);
10119#endif
10120 shared->herelines++;
10121 PL_last_lop = PL_last_uni = NULL;
10122#ifndef PERL_STRICT_CR
10123 if (PL_bufend - PL_linestart >= 2) {
10124 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10125 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10126 {
10127 PL_bufend[-2] = '\n';
10128 PL_bufend--;
10129 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10130 }
10131 else if (PL_bufend[-1] == '\r')
10132 PL_bufend[-1] = '\n';
10133 }
10134 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10135 PL_bufend[-1] = '\n';
10136#endif
10137 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10138 SvREFCNT_dec(PL_linestr);
10139 PL_linestr = linestr_save;
10140 PL_linestart = SvPVX(linestr_save);
10141 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10142 s = d;
10143 break;
10144 }
10145 else {
10146 sv_catsv(tmpstr,PL_linestr);
10147 }
10148 }
10149 }
10150 PL_multi_end = CopLINE(PL_curcop);
10151 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10152 SvPV_shrink_to_cur(tmpstr);
10153 }
10154 if (!IN_BYTES) {
10155 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10156 SvUTF8_on(tmpstr);
10157 else if (PL_encoding)
10158 sv_recode_to_utf8(tmpstr, PL_encoding);
10159 }
10160 PL_lex_stuff = tmpstr;
10161 pl_yylval.ival = op_type;
10162 return s;
10163
10164 interminable:
10165 SvREFCNT_dec(tmpstr);
10166 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10167 missingterm(PL_tokenbuf + 1);
10168}
10169
10170/* scan_inputsymbol
10171 takes: current position in input buffer
10172 returns: new position in input buffer
10173 side-effects: pl_yylval and lex_op are set.
10174
10175 This code handles:
10176
10177 <> read from ARGV
10178 <FH> read from filehandle
10179 <pkg::FH> read from package qualified filehandle
10180 <pkg'FH> read from package qualified filehandle
10181 <$fh> read from filehandle in $fh
10182 <*.h> filename glob
10183
10184*/
10185
10186STATIC char *
10187S_scan_inputsymbol(pTHX_ char *start)
10188{
10189 dVAR;
10190 char *s = start; /* current position in buffer */
10191 char *end;
10192 I32 len;
10193 char *d = PL_tokenbuf; /* start of temp holding space */
10194 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10195
10196 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10197
10198 end = strchr(s, '\n');
10199 if (!end)
10200 end = PL_bufend;
10201 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10202
10203 /* die if we didn't have space for the contents of the <>,
10204 or if it didn't end, or if we see a newline
10205 */
10206
10207 if (len >= (I32)sizeof PL_tokenbuf)
10208 Perl_croak(aTHX_ "Excessively long <> operator");
10209 if (s >= end)
10210 Perl_croak(aTHX_ "Unterminated <> operator");
10211
10212 s++;
10213
10214 /* check for <$fh>
10215 Remember, only scalar variables are interpreted as filehandles by
10216 this code. Anything more complex (e.g., <$fh{$num}>) will be
10217 treated as a glob() call.
10218 This code makes use of the fact that except for the $ at the front,
10219 a scalar variable and a filehandle look the same.
10220 */
10221 if (*d == '$' && d[1]) d++;
10222
10223 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10224 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10225 d += UTF ? UTF8SKIP(d) : 1;
10226
10227 /* If we've tried to read what we allow filehandles to look like, and
10228 there's still text left, then it must be a glob() and not a getline.
10229 Use scan_str to pull out the stuff between the <> and treat it
10230 as nothing more than a string.
10231 */
10232
10233 if (d - PL_tokenbuf != len) {
10234 pl_yylval.ival = OP_GLOB;
10235 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10236 if (!s)
10237 Perl_croak(aTHX_ "Glob not terminated");
10238 return s;
10239 }
10240 else {
10241 bool readline_overriden = FALSE;
10242 GV *gv_readline;
10243 GV **gvp;
10244 /* we're in a filehandle read situation */
10245 d = PL_tokenbuf;
10246
10247 /* turn <> into <ARGV> */
10248 if (!len)
10249 Copy("ARGV",d,5,char);
10250
10251 /* Check whether readline() is overriden */
10252 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10253 if ((gv_readline
10254 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10255 ||
10256 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10257 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10258 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10259 readline_overriden = TRUE;
10260
10261 /* if <$fh>, create the ops to turn the variable into a
10262 filehandle
10263 */
10264 if (*d == '$') {
10265 /* try to find it in the pad for this block, otherwise find
10266 add symbol table ops
10267 */
10268 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10269 if (tmp != NOT_IN_PAD) {
10270 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10271 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10272 HEK * const stashname = HvNAME_HEK(stash);
10273 SV * const sym = sv_2mortal(newSVhek(stashname));
10274 sv_catpvs(sym, "::");
10275 sv_catpv(sym, d+1);
10276 d = SvPVX(sym);
10277 goto intro_sym;
10278 }
10279 else {
10280 OP * const o = newOP(OP_PADSV, 0);
10281 o->op_targ = tmp;
10282 PL_lex_op = readline_overriden
10283 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10284 op_append_elem(OP_LIST, o,
10285 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10286 : (OP*)newUNOP(OP_READLINE, 0, o);
10287 }
10288 }
10289 else {
10290 GV *gv;
10291 ++d;
10292intro_sym:
10293 gv = gv_fetchpv(d,
10294 (PL_in_eval
10295 ? (GV_ADDMULTI | GV_ADDINEVAL)
10296 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10297 SVt_PV);
10298 PL_lex_op = readline_overriden
10299 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10300 op_append_elem(OP_LIST,
10301 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10302 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10303 : (OP*)newUNOP(OP_READLINE, 0,
10304 newUNOP(OP_RV2SV, 0,
10305 newGVOP(OP_GV, 0, gv)));
10306 }
10307 if (!readline_overriden)
10308 PL_lex_op->op_flags |= OPf_SPECIAL;
10309 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10310 pl_yylval.ival = OP_NULL;
10311 }
10312
10313 /* If it's none of the above, it must be a literal filehandle
10314 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10315 else {
10316 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10317 PL_lex_op = readline_overriden
10318 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10319 op_append_elem(OP_LIST,
10320 newGVOP(OP_GV, 0, gv),
10321 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10322 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10323 pl_yylval.ival = OP_NULL;
10324 }
10325 }
10326
10327 return s;
10328}
10329
10330
10331/* scan_str
10332 takes: start position in buffer
10333 keep_quoted preserve \ on the embedded delimiter(s)
10334 keep_delims preserve the delimiters around the string
10335 re_reparse compiling a run-time /(?{})/:
10336 collapse // to /, and skip encoding src
10337 returns: position to continue reading from buffer
10338 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10339 updates the read buffer.
10340
10341 This subroutine pulls a string out of the input. It is called for:
10342 q single quotes q(literal text)
10343 ' single quotes 'literal text'
10344 qq double quotes qq(interpolate $here please)
10345 " double quotes "interpolate $here please"
10346 qx backticks qx(/bin/ls -l)
10347 ` backticks `/bin/ls -l`
10348 qw quote words @EXPORT_OK = qw( func() $spam )
10349 m// regexp match m/this/
10350 s/// regexp substitute s/this/that/
10351 tr/// string transliterate tr/this/that/
10352 y/// string transliterate y/this/that/
10353 ($*@) sub prototypes sub foo ($)
10354 (stuff) sub attr parameters sub foo : attr(stuff)
10355 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10356
10357 In most of these cases (all but <>, patterns and transliterate)
10358 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10359 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10360 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10361 calls scan_str().
10362
10363 It skips whitespace before the string starts, and treats the first
10364 character as the delimiter. If the delimiter is one of ([{< then
10365 the corresponding "close" character )]}> is used as the closing
10366 delimiter. It allows quoting of delimiters, and if the string has
10367 balanced delimiters ([{<>}]) it allows nesting.
10368
10369 On success, the SV with the resulting string is put into lex_stuff or,
10370 if that is already non-NULL, into lex_repl. The second case occurs only
10371 when parsing the RHS of the special constructs s/// and tr/// (y///).
10372 For convenience, the terminating delimiter character is stuffed into
10373 SvIVX of the SV.
10374*/
10375
10376STATIC char *
10377S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10378 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10379 for certain paired metacharacters that
10380 appear escaped within it */
10381 )
10382{
10383 dVAR;
10384 SV *sv; /* scalar value: string */
10385 const char *tmps; /* temp string, used for delimiter matching */
10386 char *s = start; /* current position in the buffer */
10387 char term; /* terminating character */
10388 char *to; /* current position in the sv's data */
10389 I32 brackets = 1; /* bracket nesting level */
10390 bool has_utf8 = FALSE; /* is there any utf8 content? */
10391 I32 termcode; /* terminating char. code */
10392 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10393 STRLEN termlen; /* length of terminating string */
10394 int last_off = 0; /* last position for nesting bracket */
10395 char *escaped_open = NULL;
10396#ifdef PERL_MAD
10397 int stuffstart;
10398 char *tstart;
10399#endif
10400
10401 PERL_ARGS_ASSERT_SCAN_STR;
10402
10403 /* skip space before the delimiter */
10404 if (isSPACE(*s)) {
10405 s = PEEKSPACE(s);
10406 }
10407
10408#ifdef PERL_MAD
10409 if (PL_realtokenstart >= 0) {
10410 stuffstart = PL_realtokenstart;
10411 PL_realtokenstart = -1;
10412 }
10413 else
10414 stuffstart = start - SvPVX(PL_linestr);
10415#endif
10416 /* mark where we are, in case we need to report errors */
10417 CLINE;
10418
10419 /* after skipping whitespace, the next character is the terminator */
10420 term = *s;
10421 if (!UTF) {
10422 termcode = termstr[0] = term;
10423 termlen = 1;
10424 }
10425 else {
10426 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10427 Copy(s, termstr, termlen, U8);
10428 if (!UTF8_IS_INVARIANT(term))
10429 has_utf8 = TRUE;
10430 }
10431
10432 /* mark where we are */
10433 PL_multi_start = CopLINE(PL_curcop);
10434 PL_multi_open = term;
10435
10436 /* find corresponding closing delimiter */
10437 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10438 termcode = termstr[0] = term = tmps[5];
10439
10440 PL_multi_close = term;
10441
10442 /* A warning is raised if the input parameter requires it for escaped (by a
10443 * backslash) paired metacharacters {} [] and () when the delimiters are
10444 * those same characters, and the backslash is ineffective. This doesn't
10445 * happen for <>, as they aren't metas. */
10446 if (deprecate_escaped_meta
10447 && (PL_multi_open == PL_multi_close
10448 || ! ckWARN_d(WARN_DEPRECATED)
10449 || PL_multi_open == '<'))
10450 {
10451 deprecate_escaped_meta = FALSE;
10452 }
10453
10454 /* create a new SV to hold the contents. 79 is the SV's initial length.
10455 What a random number. */
10456 sv = newSV_type(SVt_PVIV);
10457 SvGROW(sv, 80);
10458 SvIV_set(sv, termcode);
10459 (void)SvPOK_only(sv); /* validate pointer */
10460
10461 /* move past delimiter and try to read a complete string */
10462 if (keep_delims)
10463 sv_catpvn(sv, s, termlen);
10464 s += termlen;
10465#ifdef PERL_MAD
10466 tstart = SvPVX(PL_linestr) + stuffstart;
10467 if (PL_madskills && !PL_thisopen && !keep_delims) {
10468 PL_thisopen = newSVpvn(tstart, s - tstart);
10469 stuffstart = s - SvPVX(PL_linestr);
10470 }
10471#endif
10472 for (;;) {
10473 if (PL_encoding && !UTF && !re_reparse) {
10474 bool cont = TRUE;
10475
10476 while (cont) {
10477 int offset = s - SvPVX_const(PL_linestr);
10478 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10479 &offset, (char*)termstr, termlen);
10480 const char * const ns = SvPVX_const(PL_linestr) + offset;
10481 char * const svlast = SvEND(sv) - 1;
10482
10483 for (; s < ns; s++) {
10484 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10485 COPLINE_INC_WITH_HERELINES;
10486 }
10487 if (!found)
10488 goto read_more_line;
10489 else {
10490 /* handle quoted delimiters */
10491 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10492 const char *t;
10493 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10494 t--;
10495 if ((svlast-1 - t) % 2) {
10496 if (!keep_quoted) {
10497 *(svlast-1) = term;
10498 *svlast = '\0';
10499 SvCUR_set(sv, SvCUR(sv) - 1);
10500 }
10501 continue;
10502 }
10503 }
10504 if (PL_multi_open == PL_multi_close) {
10505 cont = FALSE;
10506 }
10507 else {
10508 const char *t;
10509 char *w;
10510 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10511 /* At here, all closes are "was quoted" one,
10512 so we don't check PL_multi_close. */
10513 if (*t == '\\') {
10514 if (!keep_quoted && *(t+1) == PL_multi_open)
10515 t++;
10516 else
10517 *w++ = *t++;
10518 }
10519 else if (*t == PL_multi_open)
10520 brackets++;
10521
10522 *w = *t;
10523 }
10524 if (w < t) {
10525 *w++ = term;
10526 *w = '\0';
10527 SvCUR_set(sv, w - SvPVX_const(sv));
10528 }
10529 last_off = w - SvPVX(sv);
10530 if (--brackets <= 0)
10531 cont = FALSE;
10532 }
10533 }
10534 }
10535 if (!keep_delims) {
10536 SvCUR_set(sv, SvCUR(sv) - 1);
10537 *SvEND(sv) = '\0';
10538 }
10539 break;
10540 }
10541
10542 /* extend sv if need be */
10543 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10544 /* set 'to' to the next character in the sv's string */
10545 to = SvPVX(sv)+SvCUR(sv);
10546
10547 /* if open delimiter is the close delimiter read unbridle */
10548 if (PL_multi_open == PL_multi_close) {
10549 for (; s < PL_bufend; s++,to++) {
10550 /* embedded newlines increment the current line number */
10551 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10552 COPLINE_INC_WITH_HERELINES;
10553 /* handle quoted delimiters */
10554 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10555 if (!keep_quoted
10556 && (s[1] == term
10557 || (re_reparse && s[1] == '\\'))
10558 )
10559 s++;
10560 /* any other quotes are simply copied straight through */
10561 else
10562 *to++ = *s++;
10563 }
10564 /* terminate when run out of buffer (the for() condition), or
10565 have found the terminator */
10566 else if (*s == term) {
10567 if (termlen == 1)
10568 break;
10569 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10570 break;
10571 }
10572 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10573 has_utf8 = TRUE;
10574 *to = *s;
10575 }
10576 }
10577
10578 /* if the terminator isn't the same as the start character (e.g.,
10579 matched brackets), we have to allow more in the quoting, and
10580 be prepared for nested brackets.
10581 */
10582 else {
10583 /* read until we run out of string, or we find the terminator */
10584 for (; s < PL_bufend; s++,to++) {
10585 /* embedded newlines increment the line count */
10586 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10587 COPLINE_INC_WITH_HERELINES;
10588 /* backslashes can escape the open or closing characters */
10589 if (*s == '\\' && s+1 < PL_bufend) {
10590 if (!keep_quoted &&
10591 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10592 {
10593 s++;
10594
10595 /* Here, 'deprecate_escaped_meta' is true iff the
10596 * delimiters are paired metacharacters, and 's' points
10597 * to an occurrence of one of them within the string,
10598 * which was preceded by a backslash. If this is a
10599 * context where the delimiter is also a metacharacter,
10600 * the backslash is useless, and deprecated. () and []
10601 * are meta in any context. {} are meta only when
10602 * appearing in a quantifier or in things like '\p{'.
10603 * They also aren't meta unless there is a matching
10604 * closed, escaped char later on within the string.
10605 * If 's' points to an open, set a flag; if to a close,
10606 * test that flag, and raise a warning if it was set */
10607
10608 if (deprecate_escaped_meta) {
10609 if (*s == PL_multi_open) {
10610 if (*s != '{') {
10611 escaped_open = s;
10612 }
10613 else if (regcurly(s,
10614 TRUE /* Look for a closing
10615 '\}' */)
10616 || (s - start > 2 /* Look for e.g.
10617 '\x{' */
10618 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10619 {
10620 escaped_open = s;
10621 }
10622 }
10623 else if (escaped_open) {
10624 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10625 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10626 escaped_open = NULL;
10627 }
10628 }
10629 }
10630 else
10631 *to++ = *s++;
10632 }
10633 /* allow nested opens and closes */
10634 else if (*s == PL_multi_close && --brackets <= 0)
10635 break;
10636 else if (*s == PL_multi_open)
10637 brackets++;
10638 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10639 has_utf8 = TRUE;
10640 *to = *s;
10641 }
10642 }
10643 /* terminate the copied string and update the sv's end-of-string */
10644 *to = '\0';
10645 SvCUR_set(sv, to - SvPVX_const(sv));
10646
10647 /*
10648 * this next chunk reads more into the buffer if we're not done yet
10649 */
10650
10651 if (s < PL_bufend)
10652 break; /* handle case where we are done yet :-) */
10653
10654#ifndef PERL_STRICT_CR
10655 if (to - SvPVX_const(sv) >= 2) {
10656 if ((to[-2] == '\r' && to[-1] == '\n') ||
10657 (to[-2] == '\n' && to[-1] == '\r'))
10658 {
10659 to[-2] = '\n';
10660 to--;
10661 SvCUR_set(sv, to - SvPVX_const(sv));
10662 }
10663 else if (to[-1] == '\r')
10664 to[-1] = '\n';
10665 }
10666 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10667 to[-1] = '\n';
10668#endif
10669
10670 read_more_line:
10671 /* if we're out of file, or a read fails, bail and reset the current
10672 line marker so we can report where the unterminated string began
10673 */
10674#ifdef PERL_MAD
10675 if (PL_madskills) {
10676 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10677 if (PL_thisstuff)
10678 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10679 else
10680 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10681 }
10682#endif
10683 COPLINE_INC_WITH_HERELINES;
10684 PL_bufptr = PL_bufend;
10685 if (!lex_next_chunk(0)) {
10686 sv_free(sv);
10687 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10688 return NULL;
10689 }
10690 s = PL_bufptr;
10691#ifdef PERL_MAD
10692 stuffstart = 0;
10693#endif
10694 }
10695
10696 /* at this point, we have successfully read the delimited string */
10697
10698 if (!PL_encoding || UTF || re_reparse) {
10699#ifdef PERL_MAD
10700 if (PL_madskills) {
10701 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10702 const int len = s - tstart;
10703 if (PL_thisstuff)
10704 sv_catpvn(PL_thisstuff, tstart, len);
10705 else
10706 PL_thisstuff = newSVpvn(tstart, len);
10707 if (!PL_thisclose && !keep_delims)
10708 PL_thisclose = newSVpvn(s,termlen);
10709 }
10710#endif
10711
10712 if (keep_delims)
10713 sv_catpvn(sv, s, termlen);
10714 s += termlen;
10715 }
10716#ifdef PERL_MAD
10717 else {
10718 if (PL_madskills) {
10719 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10720 const int len = s - tstart - termlen;
10721 if (PL_thisstuff)
10722 sv_catpvn(PL_thisstuff, tstart, len);
10723 else
10724 PL_thisstuff = newSVpvn(tstart, len);
10725 if (!PL_thisclose && !keep_delims)
10726 PL_thisclose = newSVpvn(s - termlen,termlen);
10727 }
10728 }
10729#endif
10730 if (has_utf8 || (PL_encoding && !re_reparse))
10731 SvUTF8_on(sv);
10732
10733 PL_multi_end = CopLINE(PL_curcop);
10734
10735 /* if we allocated too much space, give some back */
10736 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10737 SvLEN_set(sv, SvCUR(sv) + 1);
10738 SvPV_renew(sv, SvLEN(sv));
10739 }
10740
10741 /* decide whether this is the first or second quoted string we've read
10742 for this op
10743 */
10744
10745 if (PL_lex_stuff)
10746 PL_sublex_info.repl = sv;
10747 else
10748 PL_lex_stuff = sv;
10749 return s;
10750}
10751
10752/*
10753 scan_num
10754 takes: pointer to position in buffer
10755 returns: pointer to new position in buffer
10756 side-effects: builds ops for the constant in pl_yylval.op
10757
10758 Read a number in any of the formats that Perl accepts:
10759
10760 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10761 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10762 0b[01](_?[01])*
10763 0[0-7](_?[0-7])*
10764 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10765
10766 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10767 thing it reads.
10768
10769 If it reads a number without a decimal point or an exponent, it will
10770 try converting the number to an integer and see if it can do so
10771 without loss of precision.
10772*/
10773
10774char *
10775Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10776{
10777 dVAR;
10778 const char *s = start; /* current position in buffer */
10779 char *d; /* destination in temp buffer */
10780 char *e; /* end of temp buffer */
10781 NV nv; /* number read, as a double */
10782 SV *sv = NULL; /* place to put the converted number */
10783 bool floatit; /* boolean: int or float? */
10784 const char *lastub = NULL; /* position of last underbar */
10785 static const char* const number_too_long = "Number too long";
10786
10787 PERL_ARGS_ASSERT_SCAN_NUM;
10788
10789 /* We use the first character to decide what type of number this is */
10790
10791 switch (*s) {
10792 default:
10793 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10794
10795 /* if it starts with a 0, it could be an octal number, a decimal in
10796 0.13 disguise, or a hexadecimal number, or a binary number. */
10797 case '0':
10798 {
10799 /* variables:
10800 u holds the "number so far"
10801 shift the power of 2 of the base
10802 (hex == 4, octal == 3, binary == 1)
10803 overflowed was the number more than we can hold?
10804
10805 Shift is used when we add a digit. It also serves as an "are
10806 we in octal/hex/binary?" indicator to disallow hex characters
10807 when in octal mode.
10808 */
10809 NV n = 0.0;
10810 UV u = 0;
10811 I32 shift;
10812 bool overflowed = FALSE;
10813 bool just_zero = TRUE; /* just plain 0 or binary number? */
10814 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10815 static const char* const bases[5] =
10816 { "", "binary", "", "octal", "hexadecimal" };
10817 static const char* const Bases[5] =
10818 { "", "Binary", "", "Octal", "Hexadecimal" };
10819 static const char* const maxima[5] =
10820 { "",
10821 "0b11111111111111111111111111111111",
10822 "",
10823 "037777777777",
10824 "0xffffffff" };
10825 const char *base, *Base, *max;
10826
10827 /* check for hex */
10828 if (s[1] == 'x' || s[1] == 'X') {
10829 shift = 4;
10830 s += 2;
10831 just_zero = FALSE;
10832 } else if (s[1] == 'b' || s[1] == 'B') {
10833 shift = 1;
10834 s += 2;
10835 just_zero = FALSE;
10836 }
10837 /* check for a decimal in disguise */
10838 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10839 goto decimal;
10840 /* so it must be octal */
10841 else {
10842 shift = 3;
10843 s++;
10844 }
10845
10846 if (*s == '_') {
10847 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10848 "Misplaced _ in number");
10849 lastub = s++;
10850 }
10851
10852 base = bases[shift];
10853 Base = Bases[shift];
10854 max = maxima[shift];
10855
10856 /* read the rest of the number */
10857 for (;;) {
10858 /* x is used in the overflow test,
10859 b is the digit we're adding on. */
10860 UV x, b;
10861
10862 switch (*s) {
10863
10864 /* if we don't mention it, we're done */
10865 default:
10866 goto out;
10867
10868 /* _ are ignored -- but warned about if consecutive */
10869 case '_':
10870 if (lastub && s == lastub + 1)
10871 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10872 "Misplaced _ in number");
10873 lastub = s++;
10874 break;
10875
10876 /* 8 and 9 are not octal */
10877 case '8': case '9':
10878 if (shift == 3)
10879 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10880 /* FALL THROUGH */
10881
10882 /* octal digits */
10883 case '2': case '3': case '4':
10884 case '5': case '6': case '7':
10885 if (shift == 1)
10886 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10887 /* FALL THROUGH */
10888
10889 case '0': case '1':
10890 b = *s++ & 15; /* ASCII digit -> value of digit */
10891 goto digit;
10892
10893 /* hex digits */
10894 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10895 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10896 /* make sure they said 0x */
10897 if (shift != 4)
10898 goto out;
10899 b = (*s++ & 7) + 9;
10900
10901 /* Prepare to put the digit we have onto the end
10902 of the number so far. We check for overflows.
10903 */
10904
10905 digit:
10906 just_zero = FALSE;
10907 if (!overflowed) {
10908 x = u << shift; /* make room for the digit */
10909
10910 if ((x >> shift) != u
10911 && !(PL_hints & HINT_NEW_BINARY)) {
10912 overflowed = TRUE;
10913 n = (NV) u;
10914 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10915 "Integer overflow in %s number",
10916 base);
10917 } else
10918 u = x | b; /* add the digit to the end */
10919 }
10920 if (overflowed) {
10921 n *= nvshift[shift];
10922 /* If an NV has not enough bits in its
10923 * mantissa to represent an UV this summing of
10924 * small low-order numbers is a waste of time
10925 * (because the NV cannot preserve the
10926 * low-order bits anyway): we could just
10927 * remember when did we overflow and in the
10928 * end just multiply n by the right
10929 * amount. */
10930 n += (NV) b;
10931 }
10932 break;
10933 }
10934 }
10935
10936 /* if we get here, we had success: make a scalar value from
10937 the number.
10938 */
10939 out:
10940
10941 /* final misplaced underbar check */
10942 if (s[-1] == '_') {
10943 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10944 }
10945
10946 if (overflowed) {
10947 if (n > 4294967295.0)
10948 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10949 "%s number > %s non-portable",
10950 Base, max);
10951 sv = newSVnv(n);
10952 }
10953 else {
10954#if UVSIZE > 4
10955 if (u > 0xffffffff)
10956 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10957 "%s number > %s non-portable",
10958 Base, max);
10959#endif
10960 sv = newSVuv(u);
10961 }
10962 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10963 sv = new_constant(start, s - start, "integer",
10964 sv, NULL, NULL, 0);
10965 else if (PL_hints & HINT_NEW_BINARY)
10966 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10967 }
10968 break;
10969
10970 /*
10971 handle decimal numbers.
10972 we're also sent here when we read a 0 as the first digit
10973 */
10974 case '1': case '2': case '3': case '4': case '5':
10975 case '6': case '7': case '8': case '9': case '.':
10976 decimal:
10977 d = PL_tokenbuf;
10978 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10979 floatit = FALSE;
10980
10981 /* read next group of digits and _ and copy into d */
10982 while (isDIGIT(*s) || *s == '_') {
10983 /* skip underscores, checking for misplaced ones
10984 if -w is on
10985 */
10986 if (*s == '_') {
10987 if (lastub && s == lastub + 1)
10988 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10989 "Misplaced _ in number");
10990 lastub = s++;
10991 }
10992 else {
10993 /* check for end of fixed-length buffer */
10994 if (d >= e)
10995 Perl_croak(aTHX_ "%s", number_too_long);
10996 /* if we're ok, copy the character */
10997 *d++ = *s++;
10998 }
10999 }
11000
11001 /* final misplaced underbar check */
11002 if (lastub && s == lastub + 1) {
11003 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11004 }
11005
11006 /* read a decimal portion if there is one. avoid
11007 3..5 being interpreted as the number 3. followed
11008 by .5
11009 */
11010 if (*s == '.' && s[1] != '.') {
11011 floatit = TRUE;
11012 *d++ = *s++;
11013
11014 if (*s == '_') {
11015 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11016 "Misplaced _ in number");
11017 lastub = s;
11018 }
11019
11020 /* copy, ignoring underbars, until we run out of digits.
11021 */
11022 for (; isDIGIT(*s) || *s == '_'; s++) {
11023 /* fixed length buffer check */
11024 if (d >= e)
11025 Perl_croak(aTHX_ "%s", number_too_long);
11026 if (*s == '_') {
11027 if (lastub && s == lastub + 1)
11028 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11029 "Misplaced _ in number");
11030 lastub = s;
11031 }
11032 else
11033 *d++ = *s;
11034 }
11035 /* fractional part ending in underbar? */
11036 if (s[-1] == '_') {
11037 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11038 "Misplaced _ in number");
11039 }
11040 if (*s == '.' && isDIGIT(s[1])) {
11041 /* oops, it's really a v-string, but without the "v" */
11042 s = start;
11043 goto vstring;
11044 }
11045 }
11046
11047 /* read exponent part, if present */
11048 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11049 floatit = TRUE;
11050 s++;
11051
11052 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11053 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11054
11055 /* stray preinitial _ */
11056 if (*s == '_') {
11057 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11058 "Misplaced _ in number");
11059 lastub = s++;
11060 }
11061
11062 /* allow positive or negative exponent */
11063 if (*s == '+' || *s == '-')
11064 *d++ = *s++;
11065
11066 /* stray initial _ */
11067 if (*s == '_') {
11068 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11069 "Misplaced _ in number");
11070 lastub = s++;
11071 }
11072
11073 /* read digits of exponent */
11074 while (isDIGIT(*s) || *s == '_') {
11075 if (isDIGIT(*s)) {
11076 if (d >= e)
11077 Perl_croak(aTHX_ "%s", number_too_long);
11078 *d++ = *s++;
11079 }
11080 else {
11081 if (((lastub && s == lastub + 1) ||
11082 (!isDIGIT(s[1]) && s[1] != '_')))
11083 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11084 "Misplaced _ in number");
11085 lastub = s++;
11086 }
11087 }
11088 }
11089
11090
11091 /*
11092 We try to do an integer conversion first if no characters
11093 indicating "float" have been found.
11094 */
11095
11096 if (!floatit) {
11097 UV uv;
11098 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11099
11100 if (flags == IS_NUMBER_IN_UV) {
11101 if (uv <= IV_MAX)
11102 sv = newSViv(uv); /* Prefer IVs over UVs. */
11103 else
11104 sv = newSVuv(uv);
11105 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11106 if (uv <= (UV) IV_MIN)
11107 sv = newSViv(-(IV)uv);
11108 else
11109 floatit = TRUE;
11110 } else
11111 floatit = TRUE;
11112 }
11113 if (floatit) {
11114 /* terminate the string */
11115 *d = '\0';
11116 nv = Atof(PL_tokenbuf);
11117 sv = newSVnv(nv);
11118 }
11119
11120 if ( floatit
11121 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11122 const char *const key = floatit ? "float" : "integer";
11123 const STRLEN keylen = floatit ? 5 : 7;
11124 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11125 key, keylen, sv, NULL, NULL, 0);
11126 }
11127 break;
11128
11129 /* if it starts with a v, it could be a v-string */
11130 case 'v':
11131vstring:
11132 sv = newSV(5); /* preallocate storage space */
11133 ENTER_with_name("scan_vstring");
11134 SAVEFREESV(sv);
11135 s = scan_vstring(s, PL_bufend, sv);
11136 SvREFCNT_inc_simple_void_NN(sv);
11137 LEAVE_with_name("scan_vstring");
11138 break;
11139 }
11140
11141 /* make the op for the constant and return */
11142
11143 if (sv)
11144 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11145 else
11146 lvalp->opval = NULL;
11147
11148 return (char *)s;
11149}
11150
11151STATIC char *
11152S_scan_formline(pTHX_ char *s)
11153{
11154 dVAR;
11155 char *eol;
11156 char *t;
11157 SV * const stuff = newSVpvs("");
11158 bool needargs = FALSE;
11159 bool eofmt = FALSE;
11160#ifdef PERL_MAD
11161 char *tokenstart = s;
11162 SV* savewhite = NULL;
11163
11164 if (PL_madskills) {
11165 savewhite = PL_thiswhite;
11166 PL_thiswhite = 0;
11167 }
11168#endif
11169
11170 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11171
11172 while (!needargs) {
11173 if (*s == '.') {
11174 t = s+1;
11175#ifdef PERL_STRICT_CR
11176 while (SPACE_OR_TAB(*t))
11177 t++;
11178#else
11179 while (SPACE_OR_TAB(*t) || *t == '\r')
11180 t++;
11181#endif
11182 if (*t == '\n' || t == PL_bufend) {
11183 eofmt = TRUE;
11184 break;
11185 }
11186 }
11187 eol = (char *) memchr(s,'\n',PL_bufend-s);
11188 if (!eol++)
11189 eol = PL_bufend;
11190 if (*s != '#') {
11191 for (t = s; t < eol; t++) {
11192 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11193 needargs = FALSE;
11194 goto enough; /* ~~ must be first line in formline */
11195 }
11196 if (*t == '@' || *t == '^')
11197 needargs = TRUE;
11198 }
11199 if (eol > s) {
11200 sv_catpvn(stuff, s, eol-s);
11201#ifndef PERL_STRICT_CR
11202 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11203 char *end = SvPVX(stuff) + SvCUR(stuff);
11204 end[-2] = '\n';
11205 end[-1] = '\0';
11206 SvCUR_set(stuff, SvCUR(stuff) - 1);
11207 }
11208#endif
11209 }
11210 else
11211 break;
11212 }
11213 s = (char*)eol;
11214 if ((PL_rsfp || PL_parser->filtered)
11215 && PL_parser->form_lex_state == LEX_NORMAL) {
11216 bool got_some;
11217#ifdef PERL_MAD
11218 if (PL_madskills) {
11219 if (PL_thistoken)
11220 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11221 else
11222 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11223 }
11224#endif
11225 PL_bufptr = PL_bufend;
11226 COPLINE_INC_WITH_HERELINES;
11227 got_some = lex_next_chunk(0);
11228 CopLINE_dec(PL_curcop);
11229 s = PL_bufptr;
11230#ifdef PERL_MAD
11231 tokenstart = PL_bufptr;
11232#endif
11233 if (!got_some)
11234 break;
11235 }
11236 incline(s);
11237 }
11238 enough:
11239 if (!SvCUR(stuff) || needargs)
11240 PL_lex_state = PL_parser->form_lex_state;
11241 if (SvCUR(stuff)) {
11242 PL_expect = XSTATE;
11243 if (needargs) {
11244 start_force(PL_curforce);
11245 NEXTVAL_NEXTTOKE.ival = 0;
11246 force_next(FORMLBRACK);
11247 }
11248 if (!IN_BYTES) {
11249 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11250 SvUTF8_on(stuff);
11251 else if (PL_encoding)
11252 sv_recode_to_utf8(stuff, PL_encoding);
11253 }
11254 start_force(PL_curforce);
11255 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11256 force_next(THING);
11257 }
11258 else {
11259 SvREFCNT_dec(stuff);
11260 if (eofmt)
11261 PL_lex_formbrack = 0;
11262 }
11263#ifdef PERL_MAD
11264 if (PL_madskills) {
11265 if (PL_thistoken)
11266 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11267 else
11268 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11269 PL_thiswhite = savewhite;
11270 }
11271#endif
11272 return s;
11273}
11274
11275I32
11276Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11277{
11278 dVAR;
11279 const I32 oldsavestack_ix = PL_savestack_ix;
11280 CV* const outsidecv = PL_compcv;
11281
11282 SAVEI32(PL_subline);
11283 save_item(PL_subname);
11284 SAVESPTR(PL_compcv);
11285
11286 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11287 CvFLAGS(PL_compcv) |= flags;
11288
11289 PL_subline = CopLINE(PL_curcop);
11290 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11291 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11292 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11293 if (outsidecv && CvPADLIST(outsidecv))
11294 CvPADLIST(PL_compcv)->xpadl_outid =
11295 PadlistNAMES(CvPADLIST(outsidecv));
11296
11297 return oldsavestack_ix;
11298}
11299
11300#ifdef __SC__
11301#pragma segment Perl_yylex
11302#endif
11303static int
11304S_yywarn(pTHX_ const char *const s, U32 flags)
11305{
11306 dVAR;
11307
11308 PERL_ARGS_ASSERT_YYWARN;
11309
11310 PL_in_eval |= EVAL_WARNONLY;
11311 yyerror_pv(s, flags);
11312 PL_in_eval &= ~EVAL_WARNONLY;
11313 return 0;
11314}
11315
11316int
11317Perl_yyerror(pTHX_ const char *const s)
11318{
11319 PERL_ARGS_ASSERT_YYERROR;
11320 return yyerror_pvn(s, strlen(s), 0);
11321}
11322
11323int
11324Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11325{
11326 PERL_ARGS_ASSERT_YYERROR_PV;
11327 return yyerror_pvn(s, strlen(s), flags);
11328}
11329
11330int
11331Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11332{
11333 dVAR;
11334 const char *context = NULL;
11335 int contlen = -1;
11336 SV *msg;
11337 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11338 int yychar = PL_parser->yychar;
11339
11340 PERL_ARGS_ASSERT_YYERROR_PVN;
11341
11342 if (!yychar || (yychar == ';' && !PL_rsfp))
11343 sv_catpvs(where_sv, "at EOF");
11344 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11345 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11346 PL_oldbufptr != PL_bufptr) {
11347 /*
11348 Only for NetWare:
11349 The code below is removed for NetWare because it abends/crashes on NetWare
11350 when the script has error such as not having the closing quotes like:
11351 if ($var eq "value)
11352 Checking of white spaces is anyway done in NetWare code.
11353 */
11354#ifndef NETWARE
11355 while (isSPACE(*PL_oldoldbufptr))
11356 PL_oldoldbufptr++;
11357#endif
11358 context = PL_oldoldbufptr;
11359 contlen = PL_bufptr - PL_oldoldbufptr;
11360 }
11361 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11362 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11363 /*
11364 Only for NetWare:
11365 The code below is removed for NetWare because it abends/crashes on NetWare
11366 when the script has error such as not having the closing quotes like:
11367 if ($var eq "value)
11368 Checking of white spaces is anyway done in NetWare code.
11369 */
11370#ifndef NETWARE
11371 while (isSPACE(*PL_oldbufptr))
11372 PL_oldbufptr++;
11373#endif
11374 context = PL_oldbufptr;
11375 contlen = PL_bufptr - PL_oldbufptr;
11376 }
11377 else if (yychar > 255)
11378 sv_catpvs(where_sv, "next token ???");
11379 else if (yychar == -2) { /* YYEMPTY */
11380 if (PL_lex_state == LEX_NORMAL ||
11381 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11382 sv_catpvs(where_sv, "at end of line");
11383 else if (PL_lex_inpat)
11384 sv_catpvs(where_sv, "within pattern");
11385 else
11386 sv_catpvs(where_sv, "within string");
11387 }
11388 else {
11389 sv_catpvs(where_sv, "next char ");
11390 if (yychar < 32)
11391 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11392 else if (isPRINT_LC(yychar)) {
11393 const char string = yychar;
11394 sv_catpvn(where_sv, &string, 1);
11395 }
11396 else
11397 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11398 }
11399 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11400 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11401 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11402 if (context)
11403 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11404 SVfARG(newSVpvn_flags(context, contlen,
11405 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11406 else
11407 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11408 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11409 Perl_sv_catpvf(aTHX_ msg,
11410 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11411 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11412 PL_multi_end = 0;
11413 }
11414 if (PL_in_eval & EVAL_WARNONLY) {
11415 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11416 }
11417 else
11418 qerror(msg);
11419 if (PL_error_count >= 10) {
11420 SV * errsv;
11421 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11422 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11423 SVfARG(errsv), OutCopFILE(PL_curcop));
11424 else
11425 Perl_croak(aTHX_ "%s has too many errors.\n",
11426 OutCopFILE(PL_curcop));
11427 }
11428 PL_in_my = 0;
11429 PL_in_my_stash = NULL;
11430 return 0;
11431}
11432#ifdef __SC__
11433#pragma segment Main
11434#endif
11435
11436STATIC char*
11437S_swallow_bom(pTHX_ U8 *s)
11438{
11439 dVAR;
11440 const STRLEN slen = SvCUR(PL_linestr);
11441
11442 PERL_ARGS_ASSERT_SWALLOW_BOM;
11443
11444 switch (s[0]) {
11445 case 0xFF:
11446 if (s[1] == 0xFE) {
11447 /* UTF-16 little-endian? (or UTF-32LE?) */
11448 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11449 /* diag_listed_as: Unsupported script encoding %s */
11450 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11451#ifndef PERL_NO_UTF16_FILTER
11452 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11453 s += 2;
11454 if (PL_bufend > (char*)s) {
11455 s = add_utf16_textfilter(s, TRUE);
11456 }
11457#else
11458 /* diag_listed_as: Unsupported script encoding %s */
11459 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11460#endif
11461 }
11462 break;
11463 case 0xFE:
11464 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11465#ifndef PERL_NO_UTF16_FILTER
11466 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11467 s += 2;
11468 if (PL_bufend > (char *)s) {
11469 s = add_utf16_textfilter(s, FALSE);
11470 }
11471#else
11472 /* diag_listed_as: Unsupported script encoding %s */
11473 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11474#endif
11475 }
11476 break;
11477 case 0xEF:
11478 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11479 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11480 s += 3; /* UTF-8 */
11481 }
11482 break;
11483 case 0:
11484 if (slen > 3) {
11485 if (s[1] == 0) {
11486 if (s[2] == 0xFE && s[3] == 0xFF) {
11487 /* UTF-32 big-endian */
11488 /* diag_listed_as: Unsupported script encoding %s */
11489 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11490 }
11491 }
11492 else if (s[2] == 0 && s[3] != 0) {
11493 /* Leading bytes
11494 * 00 xx 00 xx
11495 * are a good indicator of UTF-16BE. */
11496#ifndef PERL_NO_UTF16_FILTER
11497 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11498 s = add_utf16_textfilter(s, FALSE);
11499#else
11500 /* diag_listed_as: Unsupported script encoding %s */
11501 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11502#endif
11503 }
11504 }
11505#ifdef EBCDIC
11506 case 0xDD:
11507 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11508 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11509 s += 4; /* UTF-8 */
11510 }
11511 break;
11512#endif
11513
11514 default:
11515 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11516 /* Leading bytes
11517 * xx 00 xx 00
11518 * are a good indicator of UTF-16LE. */
11519#ifndef PERL_NO_UTF16_FILTER
11520 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11521 s = add_utf16_textfilter(s, TRUE);
11522#else
11523 /* diag_listed_as: Unsupported script encoding %s */
11524 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11525#endif
11526 }
11527 }
11528 return (char*)s;
11529}
11530
11531
11532#ifndef PERL_NO_UTF16_FILTER
11533static I32
11534S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11535{
11536 dVAR;
11537 SV *const filter = FILTER_DATA(idx);
11538 /* We re-use this each time round, throwing the contents away before we
11539 return. */
11540 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11541 SV *const utf8_buffer = filter;
11542 IV status = IoPAGE(filter);
11543 const bool reverse = cBOOL(IoLINES(filter));
11544 I32 retval;
11545
11546 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11547
11548 /* As we're automatically added, at the lowest level, and hence only called
11549 from this file, we can be sure that we're not called in block mode. Hence
11550 don't bother writing code to deal with block mode. */
11551 if (maxlen) {
11552 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11553 }
11554 if (status < 0) {
11555 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11556 }
11557 DEBUG_P(PerlIO_printf(Perl_debug_log,
11558 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11559 FPTR2DPTR(void *, S_utf16_textfilter),
11560 reverse ? 'l' : 'b', idx, maxlen, status,
11561 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11562
11563 while (1) {
11564 STRLEN chars;
11565 STRLEN have;
11566 I32 newlen;
11567 U8 *end;
11568 /* First, look in our buffer of existing UTF-8 data: */
11569 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11570
11571 if (nl) {
11572 ++nl;
11573 } else if (status == 0) {
11574 /* EOF */
11575 IoPAGE(filter) = 0;
11576 nl = SvEND(utf8_buffer);
11577 }
11578 if (nl) {
11579 STRLEN got = nl - SvPVX(utf8_buffer);
11580 /* Did we have anything to append? */
11581 retval = got != 0;
11582 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11583 /* Everything else in this code works just fine if SVp_POK isn't
11584 set. This, however, needs it, and we need it to work, else
11585 we loop infinitely because the buffer is never consumed. */
11586 sv_chop(utf8_buffer, nl);
11587 break;
11588 }
11589
11590 /* OK, not a complete line there, so need to read some more UTF-16.
11591 Read an extra octect if the buffer currently has an odd number. */
11592 while (1) {
11593 if (status <= 0)
11594 break;
11595 if (SvCUR(utf16_buffer) >= 2) {
11596 /* Location of the high octet of the last complete code point.
11597 Gosh, UTF-16 is a pain. All the benefits of variable length,
11598 *coupled* with all the benefits of partial reads and
11599 endianness. */
11600 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11601 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11602
11603 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11604 break;
11605 }
11606
11607 /* We have the first half of a surrogate. Read more. */
11608 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11609 }
11610
11611 status = FILTER_READ(idx + 1, utf16_buffer,
11612 160 + (SvCUR(utf16_buffer) & 1));
11613 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11614 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11615 if (status < 0) {
11616 /* Error */
11617 IoPAGE(filter) = status;
11618 return status;
11619 }
11620 }
11621
11622 chars = SvCUR(utf16_buffer) >> 1;
11623 have = SvCUR(utf8_buffer);
11624 SvGROW(utf8_buffer, have + chars * 3 + 1);
11625
11626 if (reverse) {
11627 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11628 (U8*)SvPVX_const(utf8_buffer) + have,
11629 chars * 2, &newlen);
11630 } else {
11631 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11632 (U8*)SvPVX_const(utf8_buffer) + have,
11633 chars * 2, &newlen);
11634 }
11635 SvCUR_set(utf8_buffer, have + newlen);
11636 *end = '\0';
11637
11638 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11639 it's private to us, and utf16_to_utf8{,reversed} take a
11640 (pointer,length) pair, rather than a NUL-terminated string. */
11641 if(SvCUR(utf16_buffer) & 1) {
11642 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11643 SvCUR_set(utf16_buffer, 1);
11644 } else {
11645 SvCUR_set(utf16_buffer, 0);
11646 }
11647 }
11648 DEBUG_P(PerlIO_printf(Perl_debug_log,
11649 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11650 status,
11651 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11652 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11653 return retval;
11654}
11655
11656static U8 *
11657S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11658{
11659 SV *filter = filter_add(S_utf16_textfilter, NULL);
11660
11661 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11662
11663 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11664 sv_setpvs(filter, "");
11665 IoLINES(filter) = reversed;
11666 IoPAGE(filter) = 1; /* Not EOF */
11667
11668 /* Sadly, we have to return a valid pointer, come what may, so we have to
11669 ignore any error return from this. */
11670 SvCUR_set(PL_linestr, 0);
11671 if (FILTER_READ(0, PL_linestr, 0)) {
11672 SvUTF8_on(PL_linestr);
11673 } else {
11674 SvUTF8_on(PL_linestr);
11675 }
11676 PL_bufend = SvEND(PL_linestr);
11677 return (U8*)SvPVX(PL_linestr);
11678}
11679#endif
11680
11681/*
11682Returns a pointer to the next character after the parsed
11683vstring, as well as updating the passed in sv.
11684
11685Function must be called like
11686
11687 sv = sv_2mortal(newSV(5));
11688 s = scan_vstring(s,e,sv);
11689
11690where s and e are the start and end of the string.
11691The sv should already be large enough to store the vstring
11692passed in, for performance reasons.
11693
11694This function may croak if fatal warnings are enabled in the
11695calling scope, hence the sv_2mortal in the example (to prevent
11696a leak). Make sure to do SvREFCNT_inc afterwards if you use
11697sv_2mortal.
11698
11699*/
11700
11701char *
11702Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11703{
11704 dVAR;
11705 const char *pos = s;
11706 const char *start = s;
11707
11708 PERL_ARGS_ASSERT_SCAN_VSTRING;
11709
11710 if (*pos == 'v') pos++; /* get past 'v' */
11711 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11712 pos++;
11713 if ( *pos != '.') {
11714 /* this may not be a v-string if followed by => */
11715 const char *next = pos;
11716 while (next < e && isSPACE(*next))
11717 ++next;
11718 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11719 /* return string not v-string */
11720 sv_setpvn(sv,(char *)s,pos-s);
11721 return (char *)pos;
11722 }
11723 }
11724
11725 if (!isALPHA(*pos)) {
11726 U8 tmpbuf[UTF8_MAXBYTES+1];
11727
11728 if (*s == 'v')
11729 s++; /* get past 'v' */
11730
11731 sv_setpvs(sv, "");
11732
11733 for (;;) {
11734 /* this is atoi() that tolerates underscores */
11735 U8 *tmpend;
11736 UV rev = 0;
11737 const char *end = pos;
11738 UV mult = 1;
11739 while (--end >= s) {
11740 if (*end != '_') {
11741 const UV orev = rev;
11742 rev += (*end - '0') * mult;
11743 mult *= 10;
11744 if (orev > rev)
11745 /* diag_listed_as: Integer overflow in %s number */
11746 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11747 "Integer overflow in decimal number");
11748 }
11749 }
11750#ifdef EBCDIC
11751 if (rev > 0x7FFFFFFF)
11752 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11753#endif
11754 /* Append native character for the rev point */
11755 tmpend = uvchr_to_utf8(tmpbuf, rev);
11756 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11757 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11758 SvUTF8_on(sv);
11759 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11760 s = ++pos;
11761 else {
11762 s = pos;
11763 break;
11764 }
11765 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11766 pos++;
11767 }
11768 SvPOK_on(sv);
11769 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11770 SvRMAGICAL_on(sv);
11771 }
11772 return (char *)s;
11773}
11774
11775int
11776Perl_keyword_plugin_standard(pTHX_
11777 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11778{
11779 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11780 PERL_UNUSED_CONTEXT;
11781 PERL_UNUSED_ARG(keyword_ptr);
11782 PERL_UNUSED_ARG(keyword_len);
11783 PERL_UNUSED_ARG(op_ptr);
11784 return KEYWORD_PLUGIN_DECLINE;
11785}
11786
11787#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11788static void
11789S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11790{
11791 SAVEI32(PL_lex_brackets);
11792 if (PL_lex_brackets > 100)
11793 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11794 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11795 SAVEI32(PL_lex_allbrackets);
11796 PL_lex_allbrackets = 0;
11797 SAVEI8(PL_lex_fakeeof);
11798 PL_lex_fakeeof = (U8)fakeeof;
11799 if(yyparse(gramtype) && !PL_parser->error_count)
11800 qerror(Perl_mess(aTHX_ "Parse error"));
11801}
11802
11803#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11804static OP *
11805S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11806{
11807 OP *o;
11808 ENTER;
11809 SAVEVPTR(PL_eval_root);
11810 PL_eval_root = NULL;
11811 parse_recdescent(gramtype, fakeeof);
11812 o = PL_eval_root;
11813 LEAVE;
11814 return o;
11815}
11816
11817#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11818static OP *
11819S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11820{
11821 OP *exprop;
11822 if (flags & ~PARSE_OPTIONAL)
11823 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11824 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11825 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11826 if (!PL_parser->error_count)
11827 qerror(Perl_mess(aTHX_ "Parse error"));
11828 exprop = newOP(OP_NULL, 0);
11829 }
11830 return exprop;
11831}
11832
11833/*
11834=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11835
11836Parse a Perl arithmetic expression. This may contain operators of precedence
11837down to the bit shift operators. The expression must be followed (and thus
11838terminated) either by a comparison or lower-precedence operator or by
11839something that would normally terminate an expression such as semicolon.
11840If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11841otherwise it is mandatory. It is up to the caller to ensure that the
11842dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11843the source of the code to be parsed and the lexical context for the
11844expression.
11845
11846The op tree representing the expression is returned. If an optional
11847expression is absent, a null pointer is returned, otherwise the pointer
11848will be non-null.
11849
11850If an error occurs in parsing or compilation, in most cases a valid op
11851tree is returned anyway. The error is reflected in the parser state,
11852normally resulting in a single exception at the top level of parsing
11853which covers all the compilation errors that occurred. Some compilation
11854errors, however, will throw an exception immediately.
11855
11856=cut
11857*/
11858
11859OP *
11860Perl_parse_arithexpr(pTHX_ U32 flags)
11861{
11862 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11863}
11864
11865/*
11866=for apidoc Amx|OP *|parse_termexpr|U32 flags
11867
11868Parse a Perl term expression. This may contain operators of precedence
11869down to the assignment operators. The expression must be followed (and thus
11870terminated) either by a comma or lower-precedence operator or by
11871something that would normally terminate an expression such as semicolon.
11872If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11873otherwise it is mandatory. It is up to the caller to ensure that the
11874dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11875the source of the code to be parsed and the lexical context for the
11876expression.
11877
11878The op tree representing the expression is returned. If an optional
11879expression is absent, a null pointer is returned, otherwise the pointer
11880will be non-null.
11881
11882If an error occurs in parsing or compilation, in most cases a valid op
11883tree is returned anyway. The error is reflected in the parser state,
11884normally resulting in a single exception at the top level of parsing
11885which covers all the compilation errors that occurred. Some compilation
11886errors, however, will throw an exception immediately.
11887
11888=cut
11889*/
11890
11891OP *
11892Perl_parse_termexpr(pTHX_ U32 flags)
11893{
11894 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11895}
11896
11897/*
11898=for apidoc Amx|OP *|parse_listexpr|U32 flags
11899
11900Parse a Perl list expression. This may contain operators of precedence
11901down to the comma operator. The expression must be followed (and thus
11902terminated) either by a low-precedence logic operator such as C<or> or by
11903something that would normally terminate an expression such as semicolon.
11904If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11905otherwise it is mandatory. It is up to the caller to ensure that the
11906dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11907the source of the code to be parsed and the lexical context for the
11908expression.
11909
11910The op tree representing the expression is returned. If an optional
11911expression is absent, a null pointer is returned, otherwise the pointer
11912will be non-null.
11913
11914If an error occurs in parsing or compilation, in most cases a valid op
11915tree is returned anyway. The error is reflected in the parser state,
11916normally resulting in a single exception at the top level of parsing
11917which covers all the compilation errors that occurred. Some compilation
11918errors, however, will throw an exception immediately.
11919
11920=cut
11921*/
11922
11923OP *
11924Perl_parse_listexpr(pTHX_ U32 flags)
11925{
11926 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11927}
11928
11929/*
11930=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11931
11932Parse a single complete Perl expression. This allows the full
11933expression grammar, including the lowest-precedence operators such
11934as C<or>. The expression must be followed (and thus terminated) by a
11935token that an expression would normally be terminated by: end-of-file,
11936closing bracketing punctuation, semicolon, or one of the keywords that
11937signals a postfix expression-statement modifier. If I<flags> includes
11938C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11939mandatory. It is up to the caller to ensure that the dynamic parser
11940state (L</PL_parser> et al) is correctly set to reflect the source of
11941the code to be parsed and the lexical context for the expression.
11942
11943The op tree representing the expression is returned. If an optional
11944expression is absent, a null pointer is returned, otherwise the pointer
11945will be non-null.
11946
11947If an error occurs in parsing or compilation, in most cases a valid op
11948tree is returned anyway. The error is reflected in the parser state,
11949normally resulting in a single exception at the top level of parsing
11950which covers all the compilation errors that occurred. Some compilation
11951errors, however, will throw an exception immediately.
11952
11953=cut
11954*/
11955
11956OP *
11957Perl_parse_fullexpr(pTHX_ U32 flags)
11958{
11959 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11960}
11961
11962/*
11963=for apidoc Amx|OP *|parse_block|U32 flags
11964
11965Parse a single complete Perl code block. This consists of an opening
11966brace, a sequence of statements, and a closing brace. The block
11967constitutes a lexical scope, so C<my> variables and various compile-time
11968effects can be contained within it. It is up to the caller to ensure
11969that the dynamic parser state (L</PL_parser> et al) is correctly set to
11970reflect the source of the code to be parsed and the lexical context for
11971the statement.
11972
11973The op tree representing the code block is returned. This is always a
11974real op, never a null pointer. It will normally be a C<lineseq> list,
11975including C<nextstate> or equivalent ops. No ops to construct any kind
11976of runtime scope are included by virtue of it being a block.
11977
11978If an error occurs in parsing or compilation, in most cases a valid op
11979tree (most likely null) is returned anyway. The error is reflected in
11980the parser state, normally resulting in a single exception at the top
11981level of parsing which covers all the compilation errors that occurred.
11982Some compilation errors, however, will throw an exception immediately.
11983
11984The I<flags> parameter is reserved for future use, and must always
11985be zero.
11986
11987=cut
11988*/
11989
11990OP *
11991Perl_parse_block(pTHX_ U32 flags)
11992{
11993 if (flags)
11994 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11995 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11996}
11997
11998/*
11999=for apidoc Amx|OP *|parse_barestmt|U32 flags
12000
12001Parse a single unadorned Perl statement. This may be a normal imperative
12002statement or a declaration that has compile-time effect. It does not
12003include any label or other affixture. It is up to the caller to ensure
12004that the dynamic parser state (L</PL_parser> et al) is correctly set to
12005reflect the source of the code to be parsed and the lexical context for
12006the statement.
12007
12008The op tree representing the statement is returned. This may be a
12009null pointer if the statement is null, for example if it was actually
12010a subroutine definition (which has compile-time side effects). If not
12011null, it will be ops directly implementing the statement, suitable to
12012pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12013equivalent op (except for those embedded in a scope contained entirely
12014within the statement).
12015
12016If an error occurs in parsing or compilation, in most cases a valid op
12017tree (most likely null) is returned anyway. The error is reflected in
12018the parser state, normally resulting in a single exception at the top
12019level of parsing which covers all the compilation errors that occurred.
12020Some compilation errors, however, will throw an exception immediately.
12021
12022The I<flags> parameter is reserved for future use, and must always
12023be zero.
12024
12025=cut
12026*/
12027
12028OP *
12029Perl_parse_barestmt(pTHX_ U32 flags)
12030{
12031 if (flags)
12032 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12033 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12034}
12035
12036/*
12037=for apidoc Amx|SV *|parse_label|U32 flags
12038
12039Parse a single label, possibly optional, of the type that may prefix a
12040Perl statement. It is up to the caller to ensure that the dynamic parser
12041state (L</PL_parser> et al) is correctly set to reflect the source of
12042the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12043label is optional, otherwise it is mandatory.
12044
12045The name of the label is returned in the form of a fresh scalar. If an
12046optional label is absent, a null pointer is returned.
12047
12048If an error occurs in parsing, which can only occur if the label is
12049mandatory, a valid label is returned anyway. The error is reflected in
12050the parser state, normally resulting in a single exception at the top
12051level of parsing which covers all the compilation errors that occurred.
12052
12053=cut
12054*/
12055
12056SV *
12057Perl_parse_label(pTHX_ U32 flags)
12058{
12059 if (flags & ~PARSE_OPTIONAL)
12060 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12061 if (PL_lex_state == LEX_KNOWNEXT) {
12062 PL_parser->yychar = yylex();
12063 if (PL_parser->yychar == LABEL) {
12064 char * const lpv = pl_yylval.pval;
12065 STRLEN llen = strlen(lpv);
12066 PL_parser->yychar = YYEMPTY;
12067 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12068 } else {
12069 yyunlex();
12070 goto no_label;
12071 }
12072 } else {
12073 char *s, *t;
12074 STRLEN wlen, bufptr_pos;
12075 lex_read_space(0);
12076 t = s = PL_bufptr;
12077 if (!isIDFIRST_lazy_if(s, UTF))
12078 goto no_label;
12079 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12080 if (word_takes_any_delimeter(s, wlen))
12081 goto no_label;
12082 bufptr_pos = s - SvPVX(PL_linestr);
12083 PL_bufptr = t;
12084 lex_read_space(LEX_KEEP_PREVIOUS);
12085 t = PL_bufptr;
12086 s = SvPVX(PL_linestr) + bufptr_pos;
12087 if (t[0] == ':' && t[1] != ':') {
12088 PL_oldoldbufptr = PL_oldbufptr;
12089 PL_oldbufptr = s;
12090 PL_bufptr = t+1;
12091 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12092 } else {
12093 PL_bufptr = s;
12094 no_label:
12095 if (flags & PARSE_OPTIONAL) {
12096 return NULL;
12097 } else {
12098 qerror(Perl_mess(aTHX_ "Parse error"));
12099 return newSVpvs("x");
12100 }
12101 }
12102 }
12103}
12104
12105/*
12106=for apidoc Amx|OP *|parse_fullstmt|U32 flags
12107
12108Parse a single complete Perl statement. This may be a normal imperative
12109statement or a declaration that has compile-time effect, and may include
12110optional labels. It is up to the caller to ensure that the dynamic
12111parser state (L</PL_parser> et al) is correctly set to reflect the source
12112of the code to be parsed and the lexical context for the statement.
12113
12114The op tree representing the statement is returned. This may be a
12115null pointer if the statement is null, for example if it was actually
12116a subroutine definition (which has compile-time side effects). If not
12117null, it will be the result of a L</newSTATEOP> call, normally including
12118a C<nextstate> or equivalent op.
12119
12120If an error occurs in parsing or compilation, in most cases a valid op
12121tree (most likely null) is returned anyway. The error is reflected in
12122the parser state, normally resulting in a single exception at the top
12123level of parsing which covers all the compilation errors that occurred.
12124Some compilation errors, however, will throw an exception immediately.
12125
12126The I<flags> parameter is reserved for future use, and must always
12127be zero.
12128
12129=cut
12130*/
12131
12132OP *
12133Perl_parse_fullstmt(pTHX_ U32 flags)
12134{
12135 if (flags)
12136 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12137 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12138}
12139
12140/*
12141=for apidoc Amx|OP *|parse_stmtseq|U32 flags
12142
12143Parse a sequence of zero or more Perl statements. These may be normal
12144imperative statements, including optional labels, or declarations
12145that have compile-time effect, or any mixture thereof. The statement
12146sequence ends when a closing brace or end-of-file is encountered in a
12147place where a new statement could have validly started. It is up to
12148the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12149is correctly set to reflect the source of the code to be parsed and the
12150lexical context for the statements.
12151
12152The op tree representing the statement sequence is returned. This may
12153be a null pointer if the statements were all null, for example if there
12154were no statements or if there were only subroutine definitions (which
12155have compile-time side effects). If not null, it will be a C<lineseq>
12156list, normally including C<nextstate> or equivalent ops.
12157
12158If an error occurs in parsing or compilation, in most cases a valid op
12159tree is returned anyway. The error is reflected in the parser state,
12160normally resulting in a single exception at the top level of parsing
12161which covers all the compilation errors that occurred. Some compilation
12162errors, however, will throw an exception immediately.
12163
12164The I<flags> parameter is reserved for future use, and must always
12165be zero.
12166
12167=cut
12168*/
12169
12170OP *
12171Perl_parse_stmtseq(pTHX_ U32 flags)
12172{
12173 OP *stmtseqop;
12174 I32 c;
12175 if (flags)
12176 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12177 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12178 c = lex_peek_unichar(0);
12179 if (c != -1 && c != /*{*/'}')
12180 qerror(Perl_mess(aTHX_ "Parse error"));
12181 return stmtseqop;
12182}
12183
12184/*
12185 * Local variables:
12186 * c-indentation-style: bsd
12187 * c-basic-offset: 4
12188 * indent-tabs-mode: nil
12189 * End:
12190 *
12191 * ex: set ts=8 sts=4 sw=4 et:
12192 */